THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

109 lines
3.6KB

  1. #ifndef GIT_UTF8_H
  2. #define GIT_UTF8_H
  3. struct strbuf;
  4. typedef unsigned int ucs_char_t; /* assuming 32bit int */
  5. size_t display_mode_esc_sequence_len(const char *s);
  6. int utf8_width(const char **start, size_t *remainder_p);
  7. int utf8_strnwidth(const char *string, int len, int skip_ansi);
  8. int utf8_strwidth(const char *string);
  9. int is_utf8(const char *text);
  10. int is_encoding_utf8(const char *name);
  11. int same_encoding(const char *, const char *);
  12. __attribute__((format (printf, 2, 3)))
  13. int utf8_fprintf(FILE *, const char *, ...);
  14. extern const char utf8_bom[];
  15. int skip_utf8_bom(char **, size_t);
  16. void strbuf_add_wrapped_text(struct strbuf *buf,
  17. const char *text, int indent, int indent2, int width);
  18. void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
  19. int indent, int indent2, int width);
  20. void strbuf_utf8_replace(struct strbuf *sb, int pos, int width,
  21. const char *subst);
  22. #ifndef NO_ICONV
  23. char *reencode_string_iconv(const char *in, size_t insz,
  24. iconv_t conv, size_t bom_len, size_t *outsz);
  25. char *reencode_string_len(const char *in, size_t insz,
  26. const char *out_encoding,
  27. const char *in_encoding,
  28. size_t *outsz);
  29. #else
  30. static inline char *reencode_string_len(const char *a, size_t b,
  31. const char *c, const char *d, size_t *e)
  32. { if (e) *e = 0; return NULL; }
  33. #endif
  34. static inline char *reencode_string(const char *in,
  35. const char *out_encoding,
  36. const char *in_encoding)
  37. {
  38. return reencode_string_len(in, strlen(in),
  39. out_encoding, in_encoding,
  40. NULL);
  41. }
  42. int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding);
  43. /*
  44. * Returns true if the path would match ".git" after HFS case-folding.
  45. * The path should be NUL-terminated, but we will match variants of both ".git\0"
  46. * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck
  47. * and verify_path().
  48. *
  49. * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo".
  50. */
  51. int is_hfs_dotgit(const char *path);
  52. int is_hfs_dotgitmodules(const char *path);
  53. int is_hfs_dotgitignore(const char *path);
  54. int is_hfs_dotgitattributes(const char *path);
  55. typedef enum {
  56. ALIGN_LEFT,
  57. ALIGN_MIDDLE,
  58. ALIGN_RIGHT
  59. } align_type;
  60. /*
  61. * Align the string given and store it into a strbuf as per the
  62. * 'position' and 'width'. If the given string length is larger than
  63. * 'width' than then the input string is not truncated and no
  64. * alignment is done.
  65. */
  66. void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width,
  67. const char *s);
  68. /*
  69. * If a data stream is declared as UTF-16BE or UTF-16LE, then a UTF-16
  70. * BOM must not be used [1]. The same applies for the UTF-32 equivalents.
  71. * The function returns true if this rule is violated.
  72. *
  73. * [1] http://unicode.org/faq/utf_bom.html#bom10
  74. */
  75. int has_prohibited_utf_bom(const char *enc, const char *data, size_t len);
  76. /*
  77. * If the endianness is not defined in the encoding name, then we
  78. * require a BOM. The function returns true if a required BOM is missing.
  79. *
  80. * The Unicode standard instructs to assume big-endian if there in no
  81. * BOM for UTF-16/32 [1][2]. However, the W3C/WHATWG encoding standard
  82. * used in HTML5 recommends to assume little-endian to "deal with
  83. * deployed content" [3].
  84. *
  85. * Therefore, strictly requiring a BOM seems to be the safest option for
  86. * content in Git.
  87. *
  88. * [1] http://unicode.org/faq/utf_bom.html#gen6
  89. * [2] http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
  90. * Section 3.10, D98, page 132
  91. * [3] https://encoding.spec.whatwg.org/#utf-16le
  92. */
  93. int is_missing_required_utf_bom(const char *enc, const char *data, size_t len);
  94. #endif