THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1991 lines
48KB

  1. #include "cache.h"
  2. #include "config.h"
  3. #include "object-store.h"
  4. #include "attr.h"
  5. #include "run-command.h"
  6. #include "quote.h"
  7. #include "sigchain.h"
  8. #include "pkt-line.h"
  9. #include "sub-process.h"
  10. #include "utf8.h"
  11. #include "ll-merge.h"
  12. /*
  13. * convert.c - convert a file when checking it out and checking it in.
  14. *
  15. * This should use the pathname to decide on whether it wants to do some
  16. * more interesting conversions (automatic gzip/unzip, general format
  17. * conversions etc etc), but by default it just does automatic CRLF<->LF
  18. * translation when the "text" attribute or "auto_crlf" option is set.
  19. */
  20. /* Stat bits: When BIN is set, the txt bits are unset */
  21. #define CONVERT_STAT_BITS_TXT_LF 0x1
  22. #define CONVERT_STAT_BITS_TXT_CRLF 0x2
  23. #define CONVERT_STAT_BITS_BIN 0x4
  24. enum crlf_action {
  25. CRLF_UNDEFINED,
  26. CRLF_BINARY,
  27. CRLF_TEXT,
  28. CRLF_TEXT_INPUT,
  29. CRLF_TEXT_CRLF,
  30. CRLF_AUTO,
  31. CRLF_AUTO_INPUT,
  32. CRLF_AUTO_CRLF
  33. };
  34. struct text_stat {
  35. /* NUL, CR, LF and CRLF counts */
  36. unsigned nul, lonecr, lonelf, crlf;
  37. /* These are just approximations! */
  38. unsigned printable, nonprintable;
  39. };
  40. static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
  41. {
  42. unsigned long i;
  43. memset(stats, 0, sizeof(*stats));
  44. for (i = 0; i < size; i++) {
  45. unsigned char c = buf[i];
  46. if (c == '\r') {
  47. if (i+1 < size && buf[i+1] == '\n') {
  48. stats->crlf++;
  49. i++;
  50. } else
  51. stats->lonecr++;
  52. continue;
  53. }
  54. if (c == '\n') {
  55. stats->lonelf++;
  56. continue;
  57. }
  58. if (c == 127)
  59. /* DEL */
  60. stats->nonprintable++;
  61. else if (c < 32) {
  62. switch (c) {
  63. /* BS, HT, ESC and FF */
  64. case '\b': case '\t': case '\033': case '\014':
  65. stats->printable++;
  66. break;
  67. case 0:
  68. stats->nul++;
  69. /* fall through */
  70. default:
  71. stats->nonprintable++;
  72. }
  73. }
  74. else
  75. stats->printable++;
  76. }
  77. /* If file ends with EOF then don't count this EOF as non-printable. */
  78. if (size >= 1 && buf[size-1] == '\032')
  79. stats->nonprintable--;
  80. }
  81. /*
  82. * The same heuristics as diff.c::mmfile_is_binary()
  83. * We treat files with bare CR as binary
  84. */
  85. static int convert_is_binary(const struct text_stat *stats)
  86. {
  87. if (stats->lonecr)
  88. return 1;
  89. if (stats->nul)
  90. return 1;
  91. if ((stats->printable >> 7) < stats->nonprintable)
  92. return 1;
  93. return 0;
  94. }
  95. static unsigned int gather_convert_stats(const char *data, unsigned long size)
  96. {
  97. struct text_stat stats;
  98. int ret = 0;
  99. if (!data || !size)
  100. return 0;
  101. gather_stats(data, size, &stats);
  102. if (convert_is_binary(&stats))
  103. ret |= CONVERT_STAT_BITS_BIN;
  104. if (stats.crlf)
  105. ret |= CONVERT_STAT_BITS_TXT_CRLF;
  106. if (stats.lonelf)
  107. ret |= CONVERT_STAT_BITS_TXT_LF;
  108. return ret;
  109. }
  110. static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
  111. {
  112. unsigned int convert_stats = gather_convert_stats(data, size);
  113. if (convert_stats & CONVERT_STAT_BITS_BIN)
  114. return "-text";
  115. switch (convert_stats) {
  116. case CONVERT_STAT_BITS_TXT_LF:
  117. return "lf";
  118. case CONVERT_STAT_BITS_TXT_CRLF:
  119. return "crlf";
  120. case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
  121. return "mixed";
  122. default:
  123. return "none";
  124. }
  125. }
  126. const char *get_cached_convert_stats_ascii(const struct index_state *istate,
  127. const char *path)
  128. {
  129. const char *ret;
  130. unsigned long sz;
  131. void *data = read_blob_data_from_index(istate, path, &sz);
  132. ret = gather_convert_stats_ascii(data, sz);
  133. free(data);
  134. return ret;
  135. }
  136. const char *get_wt_convert_stats_ascii(const char *path)
  137. {
  138. const char *ret = "";
  139. struct strbuf sb = STRBUF_INIT;
  140. if (strbuf_read_file(&sb, path, 0) >= 0)
  141. ret = gather_convert_stats_ascii(sb.buf, sb.len);
  142. strbuf_release(&sb);
  143. return ret;
  144. }
  145. static int text_eol_is_crlf(void)
  146. {
  147. if (auto_crlf == AUTO_CRLF_TRUE)
  148. return 1;
  149. else if (auto_crlf == AUTO_CRLF_INPUT)
  150. return 0;
  151. if (core_eol == EOL_CRLF)
  152. return 1;
  153. if (core_eol == EOL_UNSET && EOL_NATIVE == EOL_CRLF)
  154. return 1;
  155. return 0;
  156. }
  157. static enum eol output_eol(enum crlf_action crlf_action)
  158. {
  159. switch (crlf_action) {
  160. case CRLF_BINARY:
  161. return EOL_UNSET;
  162. case CRLF_TEXT_CRLF:
  163. return EOL_CRLF;
  164. case CRLF_TEXT_INPUT:
  165. return EOL_LF;
  166. case CRLF_UNDEFINED:
  167. case CRLF_AUTO_CRLF:
  168. return EOL_CRLF;
  169. case CRLF_AUTO_INPUT:
  170. return EOL_LF;
  171. case CRLF_TEXT:
  172. case CRLF_AUTO:
  173. /* fall through */
  174. return text_eol_is_crlf() ? EOL_CRLF : EOL_LF;
  175. }
  176. warning(_("illegal crlf_action %d"), (int)crlf_action);
  177. return core_eol;
  178. }
  179. static void check_global_conv_flags_eol(const char *path, enum crlf_action crlf_action,
  180. struct text_stat *old_stats, struct text_stat *new_stats,
  181. int conv_flags)
  182. {
  183. if (old_stats->crlf && !new_stats->crlf ) {
  184. /*
  185. * CRLFs would not be restored by checkout
  186. */
  187. if (conv_flags & CONV_EOL_RNDTRP_DIE)
  188. die(_("CRLF would be replaced by LF in %s"), path);
  189. else if (conv_flags & CONV_EOL_RNDTRP_WARN)
  190. warning(_("CRLF will be replaced by LF in %s.\n"
  191. "The file will have its original line"
  192. " endings in your working directory"), path);
  193. } else if (old_stats->lonelf && !new_stats->lonelf ) {
  194. /*
  195. * CRLFs would be added by checkout
  196. */
  197. if (conv_flags & CONV_EOL_RNDTRP_DIE)
  198. die(_("LF would be replaced by CRLF in %s"), path);
  199. else if (conv_flags & CONV_EOL_RNDTRP_WARN)
  200. warning(_("LF will be replaced by CRLF in %s.\n"
  201. "The file will have its original line"
  202. " endings in your working directory"), path);
  203. }
  204. }
  205. static int has_crlf_in_index(const struct index_state *istate, const char *path)
  206. {
  207. unsigned long sz;
  208. void *data;
  209. const char *crp;
  210. int has_crlf = 0;
  211. data = read_blob_data_from_index(istate, path, &sz);
  212. if (!data)
  213. return 0;
  214. crp = memchr(data, '\r', sz);
  215. if (crp) {
  216. unsigned int ret_stats;
  217. ret_stats = gather_convert_stats(data, sz);
  218. if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&
  219. (ret_stats & CONVERT_STAT_BITS_TXT_CRLF))
  220. has_crlf = 1;
  221. }
  222. free(data);
  223. return has_crlf;
  224. }
  225. static int will_convert_lf_to_crlf(struct text_stat *stats,
  226. enum crlf_action crlf_action)
  227. {
  228. if (output_eol(crlf_action) != EOL_CRLF)
  229. return 0;
  230. /* No "naked" LF? Nothing to convert, regardless. */
  231. if (!stats->lonelf)
  232. return 0;
  233. if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {
  234. /* If we have any CR or CRLF line endings, we do not touch it */
  235. /* This is the new safer autocrlf-handling */
  236. if (stats->lonecr || stats->crlf)
  237. return 0;
  238. if (convert_is_binary(stats))
  239. return 0;
  240. }
  241. return 1;
  242. }
  243. static int validate_encoding(const char *path, const char *enc,
  244. const char *data, size_t len, int die_on_error)
  245. {
  246. /* We only check for UTF here as UTF?? can be an alias for UTF-?? */
  247. if (istarts_with(enc, "UTF")) {
  248. /*
  249. * Check for detectable errors in UTF encodings
  250. */
  251. if (has_prohibited_utf_bom(enc, data, len)) {
  252. const char *error_msg = _(
  253. "BOM is prohibited in '%s' if encoded as %s");
  254. /*
  255. * This advice is shown for UTF-??BE and UTF-??LE encodings.
  256. * We cut off the last two characters of the encoding name
  257. * to generate the encoding name suitable for BOMs.
  258. */
  259. const char *advise_msg = _(
  260. "The file '%s' contains a byte order "
  261. "mark (BOM). Please use UTF-%s as "
  262. "working-tree-encoding.");
  263. const char *stripped = NULL;
  264. char *upper = xstrdup_toupper(enc);
  265. upper[strlen(upper)-2] = '\0';
  266. if (skip_prefix(upper, "UTF", &stripped))
  267. skip_prefix(stripped, "-", &stripped);
  268. advise(advise_msg, path, stripped);
  269. free(upper);
  270. if (die_on_error)
  271. die(error_msg, path, enc);
  272. else {
  273. return error(error_msg, path, enc);
  274. }
  275. } else if (is_missing_required_utf_bom(enc, data, len)) {
  276. const char *error_msg = _(
  277. "BOM is required in '%s' if encoded as %s");
  278. const char *advise_msg = _(
  279. "The file '%s' is missing a byte order "
  280. "mark (BOM). Please use UTF-%sBE or UTF-%sLE "
  281. "(depending on the byte order) as "
  282. "working-tree-encoding.");
  283. const char *stripped = NULL;
  284. char *upper = xstrdup_toupper(enc);
  285. if (skip_prefix(upper, "UTF", &stripped))
  286. skip_prefix(stripped, "-", &stripped);
  287. advise(advise_msg, path, stripped, stripped);
  288. free(upper);
  289. if (die_on_error)
  290. die(error_msg, path, enc);
  291. else {
  292. return error(error_msg, path, enc);
  293. }
  294. }
  295. }
  296. return 0;
  297. }
  298. static void trace_encoding(const char *context, const char *path,
  299. const char *encoding, const char *buf, size_t len)
  300. {
  301. static struct trace_key coe = TRACE_KEY_INIT(WORKING_TREE_ENCODING);
  302. struct strbuf trace = STRBUF_INIT;
  303. int i;
  304. strbuf_addf(&trace, "%s (%s, considered %s):\n", context, path, encoding);
  305. for (i = 0; i < len && buf; ++i) {
  306. strbuf_addf(
  307. &trace, "| \033[2m%2i:\033[0m %2x \033[2m%c\033[0m%c",
  308. i,
  309. (unsigned char) buf[i],
  310. (buf[i] > 32 && buf[i] < 127 ? buf[i] : ' '),
  311. ((i+1) % 8 && (i+1) < len ? ' ' : '\n')
  312. );
  313. }
  314. strbuf_addchars(&trace, '\n', 1);
  315. trace_strbuf(&coe, &trace);
  316. strbuf_release(&trace);
  317. }
  318. static int check_roundtrip(const char *enc_name)
  319. {
  320. /*
  321. * check_roundtrip_encoding contains a string of comma and/or
  322. * space separated encodings (eg. "UTF-16, ASCII, CP1125").
  323. * Search for the given encoding in that string.
  324. */
  325. const char *found = strcasestr(check_roundtrip_encoding, enc_name);
  326. const char *next;
  327. int len;
  328. if (!found)
  329. return 0;
  330. next = found + strlen(enc_name);
  331. len = strlen(check_roundtrip_encoding);
  332. return (found && (
  333. /*
  334. * check that the found encoding is at the
  335. * beginning of check_roundtrip_encoding or
  336. * that it is prefixed with a space or comma
  337. */
  338. found == check_roundtrip_encoding || (
  339. (isspace(found[-1]) || found[-1] == ',')
  340. )
  341. ) && (
  342. /*
  343. * check that the found encoding is at the
  344. * end of check_roundtrip_encoding or
  345. * that it is suffixed with a space or comma
  346. */
  347. next == check_roundtrip_encoding + len || (
  348. next < check_roundtrip_encoding + len &&
  349. (isspace(next[0]) || next[0] == ',')
  350. )
  351. ));
  352. }
  353. static const char *default_encoding = "UTF-8";
  354. static int encode_to_git(const char *path, const char *src, size_t src_len,
  355. struct strbuf *buf, const char *enc, int conv_flags)
  356. {
  357. char *dst;
  358. size_t dst_len;
  359. int die_on_error = conv_flags & CONV_WRITE_OBJECT;
  360. /*
  361. * No encoding is specified or there is nothing to encode.
  362. * Tell the caller that the content was not modified.
  363. */
  364. if (!enc || (src && !src_len))
  365. return 0;
  366. /*
  367. * Looks like we got called from "would_convert_to_git()".
  368. * This means Git wants to know if it would encode (= modify!)
  369. * the content. Let's answer with "yes", since an encoding was
  370. * specified.
  371. */
  372. if (!buf && !src)
  373. return 1;
  374. if (validate_encoding(path, enc, src, src_len, die_on_error))
  375. return 0;
  376. trace_encoding("source", path, enc, src, src_len);
  377. dst = reencode_string_len(src, src_len, default_encoding, enc,
  378. &dst_len);
  379. if (!dst) {
  380. /*
  381. * We could add the blob "as-is" to Git. However, on checkout
  382. * we would try to reencode to the original encoding. This
  383. * would fail and we would leave the user with a messed-up
  384. * working tree. Let's try to avoid this by screaming loud.
  385. */
  386. const char* msg = _("failed to encode '%s' from %s to %s");
  387. if (die_on_error)
  388. die(msg, path, enc, default_encoding);
  389. else {
  390. error(msg, path, enc, default_encoding);
  391. return 0;
  392. }
  393. }
  394. trace_encoding("destination", path, default_encoding, dst, dst_len);
  395. /*
  396. * UTF supports lossless conversion round tripping [1] and conversions
  397. * between UTF and other encodings are mostly round trip safe as
  398. * Unicode aims to be a superset of all other character encodings.
  399. * However, certain encodings (e.g. SHIFT-JIS) are known to have round
  400. * trip issues [2]. Check the round trip conversion for all encodings
  401. * listed in core.checkRoundtripEncoding.
  402. *
  403. * The round trip check is only performed if content is written to Git.
  404. * This ensures that no information is lost during conversion to/from
  405. * the internal UTF-8 representation.
  406. *
  407. * Please note, the code below is not tested because I was not able to
  408. * generate a faulty round trip without an iconv error. Iconv errors
  409. * are already caught above.
  410. *
  411. * [1] http://unicode.org/faq/utf_bom.html#gen2
  412. * [2] https://support.microsoft.com/en-us/help/170559/prb-conversion-problem-between-shift-jis-and-unicode
  413. */
  414. if (die_on_error && check_roundtrip(enc)) {
  415. char *re_src;
  416. size_t re_src_len;
  417. re_src = reencode_string_len(dst, dst_len,
  418. enc, default_encoding,
  419. &re_src_len);
  420. trace_printf("Checking roundtrip encoding for %s...\n", enc);
  421. trace_encoding("reencoded source", path, enc,
  422. re_src, re_src_len);
  423. if (!re_src || src_len != re_src_len ||
  424. memcmp(src, re_src, src_len)) {
  425. const char* msg = _("encoding '%s' from %s to %s and "
  426. "back is not the same");
  427. die(msg, path, enc, default_encoding);
  428. }
  429. free(re_src);
  430. }
  431. strbuf_attach(buf, dst, dst_len, dst_len + 1);
  432. return 1;
  433. }
  434. static int encode_to_worktree(const char *path, const char *src, size_t src_len,
  435. struct strbuf *buf, const char *enc)
  436. {
  437. char *dst;
  438. size_t dst_len;
  439. /*
  440. * No encoding is specified or there is nothing to encode.
  441. * Tell the caller that the content was not modified.
  442. */
  443. if (!enc || (src && !src_len))
  444. return 0;
  445. dst = reencode_string_len(src, src_len, enc, default_encoding,
  446. &dst_len);
  447. if (!dst) {
  448. error(_("failed to encode '%s' from %s to %s"),
  449. path, default_encoding, enc);
  450. return 0;
  451. }
  452. strbuf_attach(buf, dst, dst_len, dst_len + 1);
  453. return 1;
  454. }
  455. static int crlf_to_git(const struct index_state *istate,
  456. const char *path, const char *src, size_t len,
  457. struct strbuf *buf,
  458. enum crlf_action crlf_action, int conv_flags)
  459. {
  460. struct text_stat stats;
  461. char *dst;
  462. int convert_crlf_into_lf;
  463. if (crlf_action == CRLF_BINARY ||
  464. (src && !len))
  465. return 0;
  466. /*
  467. * If we are doing a dry-run and have no source buffer, there is
  468. * nothing to analyze; we must assume we would convert.
  469. */
  470. if (!buf && !src)
  471. return 1;
  472. gather_stats(src, len, &stats);
  473. /* Optimization: No CRLF? Nothing to convert, regardless. */
  474. convert_crlf_into_lf = !!stats.crlf;
  475. if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {
  476. if (convert_is_binary(&stats))
  477. return 0;
  478. /*
  479. * If the file in the index has any CR in it, do not
  480. * convert. This is the new safer autocrlf handling,
  481. * unless we want to renormalize in a merge or
  482. * cherry-pick.
  483. */
  484. if ((!(conv_flags & CONV_EOL_RENORMALIZE)) &&
  485. has_crlf_in_index(istate, path))
  486. convert_crlf_into_lf = 0;
  487. }
  488. if (((conv_flags & CONV_EOL_RNDTRP_WARN) ||
  489. ((conv_flags & CONV_EOL_RNDTRP_DIE) && len))) {
  490. struct text_stat new_stats;
  491. memcpy(&new_stats, &stats, sizeof(new_stats));
  492. /* simulate "git add" */
  493. if (convert_crlf_into_lf) {
  494. new_stats.lonelf += new_stats.crlf;
  495. new_stats.crlf = 0;
  496. }
  497. /* simulate "git checkout" */
  498. if (will_convert_lf_to_crlf(&new_stats, crlf_action)) {
  499. new_stats.crlf += new_stats.lonelf;
  500. new_stats.lonelf = 0;
  501. }
  502. check_global_conv_flags_eol(path, crlf_action, &stats, &new_stats, conv_flags);
  503. }
  504. if (!convert_crlf_into_lf)
  505. return 0;
  506. /*
  507. * At this point all of our source analysis is done, and we are sure we
  508. * would convert. If we are in dry-run mode, we can give an answer.
  509. */
  510. if (!buf)
  511. return 1;
  512. /* only grow if not in place */
  513. if (strbuf_avail(buf) + buf->len < len)
  514. strbuf_grow(buf, len - buf->len);
  515. dst = buf->buf;
  516. if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {
  517. /*
  518. * If we guessed, we already know we rejected a file with
  519. * lone CR, and we can strip a CR without looking at what
  520. * follow it.
  521. */
  522. do {
  523. unsigned char c = *src++;
  524. if (c != '\r')
  525. *dst++ = c;
  526. } while (--len);
  527. } else {
  528. do {
  529. unsigned char c = *src++;
  530. if (! (c == '\r' && (1 < len && *src == '\n')))
  531. *dst++ = c;
  532. } while (--len);
  533. }
  534. strbuf_setlen(buf, dst - buf->buf);
  535. return 1;
  536. }
  537. static int crlf_to_worktree(const char *src, size_t len,
  538. struct strbuf *buf, enum crlf_action crlf_action)
  539. {
  540. char *to_free = NULL;
  541. struct text_stat stats;
  542. if (!len || output_eol(crlf_action) != EOL_CRLF)
  543. return 0;
  544. gather_stats(src, len, &stats);
  545. if (!will_convert_lf_to_crlf(&stats, crlf_action))
  546. return 0;
  547. /* are we "faking" in place editing ? */
  548. if (src == buf->buf)
  549. to_free = strbuf_detach(buf, NULL);
  550. strbuf_grow(buf, len + stats.lonelf);
  551. for (;;) {
  552. const char *nl = memchr(src, '\n', len);
  553. if (!nl)
  554. break;
  555. if (nl > src && nl[-1] == '\r') {
  556. strbuf_add(buf, src, nl + 1 - src);
  557. } else {
  558. strbuf_add(buf, src, nl - src);
  559. strbuf_addstr(buf, "\r\n");
  560. }
  561. len -= nl + 1 - src;
  562. src = nl + 1;
  563. }
  564. strbuf_add(buf, src, len);
  565. free(to_free);
  566. return 1;
  567. }
  568. struct filter_params {
  569. const char *src;
  570. unsigned long size;
  571. int fd;
  572. const char *cmd;
  573. const char *path;
  574. };
  575. static int filter_buffer_or_fd(int in, int out, void *data)
  576. {
  577. /*
  578. * Spawn cmd and feed the buffer contents through its stdin.
  579. */
  580. struct child_process child_process = CHILD_PROCESS_INIT;
  581. struct filter_params *params = (struct filter_params *)data;
  582. int write_err, status;
  583. const char *argv[] = { NULL, NULL };
  584. /* apply % substitution to cmd */
  585. struct strbuf cmd = STRBUF_INIT;
  586. struct strbuf path = STRBUF_INIT;
  587. struct strbuf_expand_dict_entry dict[] = {
  588. { "f", NULL, },
  589. { NULL, NULL, },
  590. };
  591. /* quote the path to preserve spaces, etc. */
  592. sq_quote_buf(&path, params->path);
  593. dict[0].value = path.buf;
  594. /* expand all %f with the quoted path */
  595. strbuf_expand(&cmd, params->cmd, strbuf_expand_dict_cb, &dict);
  596. strbuf_release(&path);
  597. argv[0] = cmd.buf;
  598. child_process.argv = argv;
  599. child_process.use_shell = 1;
  600. child_process.in = -1;
  601. child_process.out = out;
  602. if (start_command(&child_process)) {
  603. strbuf_release(&cmd);
  604. return error(_("cannot fork to run external filter '%s'"),
  605. params->cmd);
  606. }
  607. sigchain_push(SIGPIPE, SIG_IGN);
  608. if (params->src) {
  609. write_err = (write_in_full(child_process.in,
  610. params->src, params->size) < 0);
  611. if (errno == EPIPE)
  612. write_err = 0;
  613. } else {
  614. write_err = copy_fd(params->fd, child_process.in);
  615. if (write_err == COPY_WRITE_ERROR && errno == EPIPE)
  616. write_err = 0;
  617. }
  618. if (close(child_process.in))
  619. write_err = 1;
  620. if (write_err)
  621. error(_("cannot feed the input to external filter '%s'"),
  622. params->cmd);
  623. sigchain_pop(SIGPIPE);
  624. status = finish_command(&child_process);
  625. if (status)
  626. error(_("external filter '%s' failed %d"), params->cmd, status);
  627. strbuf_release(&cmd);
  628. return (write_err || status);
  629. }
  630. static int apply_single_file_filter(const char *path, const char *src, size_t len, int fd,
  631. struct strbuf *dst, const char *cmd)
  632. {
  633. /*
  634. * Create a pipeline to have the command filter the buffer's
  635. * contents.
  636. *
  637. * (child --> cmd) --> us
  638. */
  639. int err = 0;
  640. struct strbuf nbuf = STRBUF_INIT;
  641. struct async async;
  642. struct filter_params params;
  643. memset(&async, 0, sizeof(async));
  644. async.proc = filter_buffer_or_fd;
  645. async.data = &params;
  646. async.out = -1;
  647. params.src = src;
  648. params.size = len;
  649. params.fd = fd;
  650. params.cmd = cmd;
  651. params.path = path;
  652. fflush(NULL);
  653. if (start_async(&async))
  654. return 0; /* error was already reported */
  655. if (strbuf_read(&nbuf, async.out, 0) < 0) {
  656. err = error(_("read from external filter '%s' failed"), cmd);
  657. }
  658. if (close(async.out)) {
  659. err = error(_("read from external filter '%s' failed"), cmd);
  660. }
  661. if (finish_async(&async)) {
  662. err = error(_("external filter '%s' failed"), cmd);
  663. }
  664. if (!err) {
  665. strbuf_swap(dst, &nbuf);
  666. }
  667. strbuf_release(&nbuf);
  668. return !err;
  669. }
  670. #define CAP_CLEAN (1u<<0)
  671. #define CAP_SMUDGE (1u<<1)
  672. #define CAP_DELAY (1u<<2)
  673. struct cmd2process {
  674. struct subprocess_entry subprocess; /* must be the first member! */
  675. unsigned int supported_capabilities;
  676. };
  677. static int subprocess_map_initialized;
  678. static struct hashmap subprocess_map;
  679. static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
  680. {
  681. static int versions[] = {2, 0};
  682. static struct subprocess_capability capabilities[] = {
  683. { "clean", CAP_CLEAN },
  684. { "smudge", CAP_SMUDGE },
  685. { "delay", CAP_DELAY },
  686. { NULL, 0 }
  687. };
  688. struct cmd2process *entry = (struct cmd2process *)subprocess;
  689. return subprocess_handshake(subprocess, "git-filter", versions, NULL,
  690. capabilities,
  691. &entry->supported_capabilities);
  692. }
  693. static void handle_filter_error(const struct strbuf *filter_status,
  694. struct cmd2process *entry,
  695. const unsigned int wanted_capability)
  696. {
  697. if (!strcmp(filter_status->buf, "error"))
  698. ; /* The filter signaled a problem with the file. */
  699. else if (!strcmp(filter_status->buf, "abort") && wanted_capability) {
  700. /*
  701. * The filter signaled a permanent problem. Don't try to filter
  702. * files with the same command for the lifetime of the current
  703. * Git process.
  704. */
  705. entry->supported_capabilities &= ~wanted_capability;
  706. } else {
  707. /*
  708. * Something went wrong with the protocol filter.
  709. * Force shutdown and restart if another blob requires filtering.
  710. */
  711. error(_("external filter '%s' failed"), entry->subprocess.cmd);
  712. subprocess_stop(&subprocess_map, &entry->subprocess);
  713. free(entry);
  714. }
  715. }
  716. static int apply_multi_file_filter(const char *path, const char *src, size_t len,
  717. int fd, struct strbuf *dst, const char *cmd,
  718. const unsigned int wanted_capability,
  719. struct delayed_checkout *dco)
  720. {
  721. int err;
  722. int can_delay = 0;
  723. struct cmd2process *entry;
  724. struct child_process *process;
  725. struct strbuf nbuf = STRBUF_INIT;
  726. struct strbuf filter_status = STRBUF_INIT;
  727. const char *filter_type;
  728. if (!subprocess_map_initialized) {
  729. subprocess_map_initialized = 1;
  730. hashmap_init(&subprocess_map, cmd2process_cmp, NULL, 0);
  731. entry = NULL;
  732. } else {
  733. entry = (struct cmd2process *)subprocess_find_entry(&subprocess_map, cmd);
  734. }
  735. fflush(NULL);
  736. if (!entry) {
  737. entry = xmalloc(sizeof(*entry));
  738. entry->supported_capabilities = 0;
  739. if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, start_multi_file_filter_fn)) {
  740. free(entry);
  741. return 0;
  742. }
  743. }
  744. process = &entry->subprocess.process;
  745. if (!(entry->supported_capabilities & wanted_capability))
  746. return 0;
  747. if (wanted_capability & CAP_CLEAN)
  748. filter_type = "clean";
  749. else if (wanted_capability & CAP_SMUDGE)
  750. filter_type = "smudge";
  751. else
  752. die(_("unexpected filter type"));
  753. sigchain_push(SIGPIPE, SIG_IGN);
  754. assert(strlen(filter_type) < LARGE_PACKET_DATA_MAX - strlen("command=\n"));
  755. err = packet_write_fmt_gently(process->in, "command=%s\n", filter_type);
  756. if (err)
  757. goto done;
  758. err = strlen(path) > LARGE_PACKET_DATA_MAX - strlen("pathname=\n");
  759. if (err) {
  760. error(_("path name too long for external filter"));
  761. goto done;
  762. }
  763. err = packet_write_fmt_gently(process->in, "pathname=%s\n", path);
  764. if (err)
  765. goto done;
  766. if ((entry->supported_capabilities & CAP_DELAY) &&
  767. dco && dco->state == CE_CAN_DELAY) {
  768. can_delay = 1;
  769. err = packet_write_fmt_gently(process->in, "can-delay=1\n");
  770. if (err)
  771. goto done;
  772. }
  773. err = packet_flush_gently(process->in);
  774. if (err)
  775. goto done;
  776. if (fd >= 0)
  777. err = write_packetized_from_fd(fd, process->in);
  778. else
  779. err = write_packetized_from_buf(src, len, process->in);
  780. if (err)
  781. goto done;
  782. err = subprocess_read_status(process->out, &filter_status);
  783. if (err)
  784. goto done;
  785. if (can_delay && !strcmp(filter_status.buf, "delayed")) {
  786. string_list_insert(&dco->filters, cmd);
  787. string_list_insert(&dco->paths, path);
  788. } else {
  789. /* The filter got the blob and wants to send us a response. */
  790. err = strcmp(filter_status.buf, "success");
  791. if (err)
  792. goto done;
  793. err = read_packetized_to_strbuf(process->out, &nbuf) < 0;
  794. if (err)
  795. goto done;
  796. err = subprocess_read_status(process->out, &filter_status);
  797. if (err)
  798. goto done;
  799. err = strcmp(filter_status.buf, "success");
  800. }
  801. done:
  802. sigchain_pop(SIGPIPE);
  803. if (err)
  804. handle_filter_error(&filter_status, entry, wanted_capability);
  805. else
  806. strbuf_swap(dst, &nbuf);
  807. strbuf_release(&nbuf);
  808. return !err;
  809. }
  810. int async_query_available_blobs(const char *cmd, struct string_list *available_paths)
  811. {
  812. int err;
  813. char *line;
  814. struct cmd2process *entry;
  815. struct child_process *process;
  816. struct strbuf filter_status = STRBUF_INIT;
  817. assert(subprocess_map_initialized);
  818. entry = (struct cmd2process *)subprocess_find_entry(&subprocess_map, cmd);
  819. if (!entry) {
  820. error(_("external filter '%s' is not available anymore although "
  821. "not all paths have been filtered"), cmd);
  822. return 0;
  823. }
  824. process = &entry->subprocess.process;
  825. sigchain_push(SIGPIPE, SIG_IGN);
  826. err = packet_write_fmt_gently(
  827. process->in, "command=list_available_blobs\n");
  828. if (err)
  829. goto done;
  830. err = packet_flush_gently(process->in);
  831. if (err)
  832. goto done;
  833. while ((line = packet_read_line(process->out, NULL))) {
  834. const char *path;
  835. if (skip_prefix(line, "pathname=", &path))
  836. string_list_insert(available_paths, xstrdup(path));
  837. else
  838. ; /* ignore unknown keys */
  839. }
  840. err = subprocess_read_status(process->out, &filter_status);
  841. if (err)
  842. goto done;
  843. err = strcmp(filter_status.buf, "success");
  844. done:
  845. sigchain_pop(SIGPIPE);
  846. if (err)
  847. handle_filter_error(&filter_status, entry, 0);
  848. return !err;
  849. }
  850. static struct convert_driver {
  851. const char *name;
  852. struct convert_driver *next;
  853. const char *smudge;
  854. const char *clean;
  855. const char *process;
  856. int required;
  857. } *user_convert, **user_convert_tail;
  858. static int apply_filter(const char *path, const char *src, size_t len,
  859. int fd, struct strbuf *dst, struct convert_driver *drv,
  860. const unsigned int wanted_capability,
  861. struct delayed_checkout *dco)
  862. {
  863. const char *cmd = NULL;
  864. if (!drv)
  865. return 0;
  866. if (!dst)
  867. return 1;
  868. if ((wanted_capability & CAP_CLEAN) && !drv->process && drv->clean)
  869. cmd = drv->clean;
  870. else if ((wanted_capability & CAP_SMUDGE) && !drv->process && drv->smudge)
  871. cmd = drv->smudge;
  872. if (cmd && *cmd)
  873. return apply_single_file_filter(path, src, len, fd, dst, cmd);
  874. else if (drv->process && *drv->process)
  875. return apply_multi_file_filter(path, src, len, fd, dst,
  876. drv->process, wanted_capability, dco);
  877. return 0;
  878. }
  879. static int read_convert_config(const char *var, const char *value, void *cb)
  880. {
  881. const char *key, *name;
  882. int namelen;
  883. struct convert_driver *drv;
  884. /*
  885. * External conversion drivers are configured using
  886. * "filter.<name>.variable".
  887. */
  888. if (parse_config_key(var, "filter", &name, &namelen, &key) < 0 || !name)
  889. return 0;
  890. for (drv = user_convert; drv; drv = drv->next)
  891. if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
  892. break;
  893. if (!drv) {
  894. drv = xcalloc(1, sizeof(struct convert_driver));
  895. drv->name = xmemdupz(name, namelen);
  896. *user_convert_tail = drv;
  897. user_convert_tail = &(drv->next);
  898. }
  899. /*
  900. * filter.<name>.smudge and filter.<name>.clean specifies
  901. * the command line:
  902. *
  903. * command-line
  904. *
  905. * The command-line will not be interpolated in any way.
  906. */
  907. if (!strcmp("smudge", key))
  908. return git_config_string(&drv->smudge, var, value);
  909. if (!strcmp("clean", key))
  910. return git_config_string(&drv->clean, var, value);
  911. if (!strcmp("process", key))
  912. return git_config_string(&drv->process, var, value);
  913. if (!strcmp("required", key)) {
  914. drv->required = git_config_bool(var, value);
  915. return 0;
  916. }
  917. return 0;
  918. }
  919. static int count_ident(const char *cp, unsigned long size)
  920. {
  921. /*
  922. * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
  923. */
  924. int cnt = 0;
  925. char ch;
  926. while (size) {
  927. ch = *cp++;
  928. size--;
  929. if (ch != '$')
  930. continue;
  931. if (size < 3)
  932. break;
  933. if (memcmp("Id", cp, 2))
  934. continue;
  935. ch = cp[2];
  936. cp += 3;
  937. size -= 3;
  938. if (ch == '$')
  939. cnt++; /* $Id$ */
  940. if (ch != ':')
  941. continue;
  942. /*
  943. * "$Id: ... "; scan up to the closing dollar sign and discard.
  944. */
  945. while (size) {
  946. ch = *cp++;
  947. size--;
  948. if (ch == '$') {
  949. cnt++;
  950. break;
  951. }
  952. if (ch == '\n')
  953. break;
  954. }
  955. }
  956. return cnt;
  957. }
  958. static int ident_to_git(const char *src, size_t len,
  959. struct strbuf *buf, int ident)
  960. {
  961. char *dst, *dollar;
  962. if (!ident || (src && !count_ident(src, len)))
  963. return 0;
  964. if (!buf)
  965. return 1;
  966. /* only grow if not in place */
  967. if (strbuf_avail(buf) + buf->len < len)
  968. strbuf_grow(buf, len - buf->len);
  969. dst = buf->buf;
  970. for (;;) {
  971. dollar = memchr(src, '$', len);
  972. if (!dollar)
  973. break;
  974. memmove(dst, src, dollar + 1 - src);
  975. dst += dollar + 1 - src;
  976. len -= dollar + 1 - src;
  977. src = dollar + 1;
  978. if (len > 3 && !memcmp(src, "Id:", 3)) {
  979. dollar = memchr(src + 3, '$', len - 3);
  980. if (!dollar)
  981. break;
  982. if (memchr(src + 3, '\n', dollar - src - 3)) {
  983. /* Line break before the next dollar. */
  984. continue;
  985. }
  986. memcpy(dst, "Id$", 3);
  987. dst += 3;
  988. len -= dollar + 1 - src;
  989. src = dollar + 1;
  990. }
  991. }
  992. memmove(dst, src, len);
  993. strbuf_setlen(buf, dst + len - buf->buf);
  994. return 1;
  995. }
  996. static int ident_to_worktree(const char *src, size_t len,
  997. struct strbuf *buf, int ident)
  998. {
  999. struct object_id oid;
  1000. char *to_free = NULL, *dollar, *spc;
  1001. int cnt;
  1002. if (!ident)
  1003. return 0;
  1004. cnt = count_ident(src, len);
  1005. if (!cnt)
  1006. return 0;
  1007. /* are we "faking" in place editing ? */
  1008. if (src == buf->buf)
  1009. to_free = strbuf_detach(buf, NULL);
  1010. hash_object_file(src, len, "blob", &oid);
  1011. strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + 3));
  1012. for (;;) {
  1013. /* step 1: run to the next '$' */
  1014. dollar = memchr(src, '$', len);
  1015. if (!dollar)
  1016. break;
  1017. strbuf_add(buf, src, dollar + 1 - src);
  1018. len -= dollar + 1 - src;
  1019. src = dollar + 1;
  1020. /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
  1021. if (len < 3 || memcmp("Id", src, 2))
  1022. continue;
  1023. /* step 3: skip over Id$ or Id:xxxxx$ */
  1024. if (src[2] == '$') {
  1025. src += 3;
  1026. len -= 3;
  1027. } else if (src[2] == ':') {
  1028. /*
  1029. * It's possible that an expanded Id has crept its way into the
  1030. * repository, we cope with that by stripping the expansion out.
  1031. * This is probably not a good idea, since it will cause changes
  1032. * on checkout, which won't go away by stash, but let's keep it
  1033. * for git-style ids.
  1034. */
  1035. dollar = memchr(src + 3, '$', len - 3);
  1036. if (!dollar) {
  1037. /* incomplete keyword, no more '$', so just quit the loop */
  1038. break;
  1039. }
  1040. if (memchr(src + 3, '\n', dollar - src - 3)) {
  1041. /* Line break before the next dollar. */
  1042. continue;
  1043. }
  1044. spc = memchr(src + 4, ' ', dollar - src - 4);
  1045. if (spc && spc < dollar-1) {
  1046. /* There are spaces in unexpected places.
  1047. * This is probably an id from some other
  1048. * versioning system. Keep it for now.
  1049. */
  1050. continue;
  1051. }
  1052. len -= dollar + 1 - src;
  1053. src = dollar + 1;
  1054. } else {
  1055. /* it wasn't a "Id$" or "Id:xxxx$" */
  1056. continue;
  1057. }
  1058. /* step 4: substitute */
  1059. strbuf_addstr(buf, "Id: ");
  1060. strbuf_addstr(buf, oid_to_hex(&oid));
  1061. strbuf_addstr(buf, " $");
  1062. }
  1063. strbuf_add(buf, src, len);
  1064. free(to_free);
  1065. return 1;
  1066. }
  1067. static const char *git_path_check_encoding(struct attr_check_item *check)
  1068. {
  1069. const char *value = check->value;
  1070. if (ATTR_UNSET(value) || !strlen(value))
  1071. return NULL;
  1072. if (ATTR_TRUE(value) || ATTR_FALSE(value)) {
  1073. die(_("true/false are no valid working-tree-encodings"));
  1074. }
  1075. /* Don't encode to the default encoding */
  1076. if (same_encoding(value, default_encoding))
  1077. return NULL;
  1078. return value;
  1079. }
  1080. static enum crlf_action git_path_check_crlf(struct attr_check_item *check)
  1081. {
  1082. const char *value = check->value;
  1083. if (ATTR_TRUE(value))
  1084. return CRLF_TEXT;
  1085. else if (ATTR_FALSE(value))
  1086. return CRLF_BINARY;
  1087. else if (ATTR_UNSET(value))
  1088. ;
  1089. else if (!strcmp(value, "input"))
  1090. return CRLF_TEXT_INPUT;
  1091. else if (!strcmp(value, "auto"))
  1092. return CRLF_AUTO;
  1093. return CRLF_UNDEFINED;
  1094. }
  1095. static enum eol git_path_check_eol(struct attr_check_item *check)
  1096. {
  1097. const char *value = check->value;
  1098. if (ATTR_UNSET(value))
  1099. ;
  1100. else if (!strcmp(value, "lf"))
  1101. return EOL_LF;
  1102. else if (!strcmp(value, "crlf"))
  1103. return EOL_CRLF;
  1104. return EOL_UNSET;
  1105. }
  1106. static struct convert_driver *git_path_check_convert(struct attr_check_item *check)
  1107. {
  1108. const char *value = check->value;
  1109. struct convert_driver *drv;
  1110. if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
  1111. return NULL;
  1112. for (drv = user_convert; drv; drv = drv->next)
  1113. if (!strcmp(value, drv->name))
  1114. return drv;
  1115. return NULL;
  1116. }
  1117. static int git_path_check_ident(struct attr_check_item *check)
  1118. {
  1119. const char *value = check->value;
  1120. return !!ATTR_TRUE(value);
  1121. }
  1122. struct conv_attrs {
  1123. struct convert_driver *drv;
  1124. enum crlf_action attr_action; /* What attr says */
  1125. enum crlf_action crlf_action; /* When no attr is set, use core.autocrlf */
  1126. int ident;
  1127. const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */
  1128. };
  1129. static struct attr_check *check;
  1130. static void convert_attrs(const struct index_state *istate,
  1131. struct conv_attrs *ca, const char *path)
  1132. {
  1133. struct attr_check_item *ccheck = NULL;
  1134. if (!check) {
  1135. check = attr_check_initl("crlf", "ident", "filter",
  1136. "eol", "text", "working-tree-encoding",
  1137. NULL);
  1138. user_convert_tail = &user_convert;
  1139. git_config(read_convert_config, NULL);
  1140. }
  1141. git_check_attr(istate, path, check);
  1142. ccheck = check->items;
  1143. ca->crlf_action = git_path_check_crlf(ccheck + 4);
  1144. if (ca->crlf_action == CRLF_UNDEFINED)
  1145. ca->crlf_action = git_path_check_crlf(ccheck + 0);
  1146. ca->ident = git_path_check_ident(ccheck + 1);
  1147. ca->drv = git_path_check_convert(ccheck + 2);
  1148. if (ca->crlf_action != CRLF_BINARY) {
  1149. enum eol eol_attr = git_path_check_eol(ccheck + 3);
  1150. if (ca->crlf_action == CRLF_AUTO && eol_attr == EOL_LF)
  1151. ca->crlf_action = CRLF_AUTO_INPUT;
  1152. else if (ca->crlf_action == CRLF_AUTO && eol_attr == EOL_CRLF)
  1153. ca->crlf_action = CRLF_AUTO_CRLF;
  1154. else if (eol_attr == EOL_LF)
  1155. ca->crlf_action = CRLF_TEXT_INPUT;
  1156. else if (eol_attr == EOL_CRLF)
  1157. ca->crlf_action = CRLF_TEXT_CRLF;
  1158. }
  1159. ca->working_tree_encoding = git_path_check_encoding(ccheck + 5);
  1160. /* Save attr and make a decision for action */
  1161. ca->attr_action = ca->crlf_action;
  1162. if (ca->crlf_action == CRLF_TEXT)
  1163. ca->crlf_action = text_eol_is_crlf() ? CRLF_TEXT_CRLF : CRLF_TEXT_INPUT;
  1164. if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_FALSE)
  1165. ca->crlf_action = CRLF_BINARY;
  1166. if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_TRUE)
  1167. ca->crlf_action = CRLF_AUTO_CRLF;
  1168. if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_INPUT)
  1169. ca->crlf_action = CRLF_AUTO_INPUT;
  1170. }
  1171. void reset_parsed_attributes(void)
  1172. {
  1173. struct convert_driver *drv, *next;
  1174. attr_check_free(check);
  1175. check = NULL;
  1176. reset_merge_attributes();
  1177. for (drv = user_convert; drv; drv = next) {
  1178. next = drv->next;
  1179. free((void *)drv->name);
  1180. free(drv);
  1181. }
  1182. user_convert = NULL;
  1183. user_convert_tail = NULL;
  1184. }
  1185. int would_convert_to_git_filter_fd(const struct index_state *istate, const char *path)
  1186. {
  1187. struct conv_attrs ca;
  1188. convert_attrs(istate, &ca, path);
  1189. if (!ca.drv)
  1190. return 0;
  1191. /*
  1192. * Apply a filter to an fd only if the filter is required to succeed.
  1193. * We must die if the filter fails, because the original data before
  1194. * filtering is not available.
  1195. */
  1196. if (!ca.drv->required)
  1197. return 0;
  1198. return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN, NULL);
  1199. }
  1200. const char *get_convert_attr_ascii(const struct index_state *istate, const char *path)
  1201. {
  1202. struct conv_attrs ca;
  1203. convert_attrs(istate, &ca, path);
  1204. switch (ca.attr_action) {
  1205. case CRLF_UNDEFINED:
  1206. return "";
  1207. case CRLF_BINARY:
  1208. return "-text";
  1209. case CRLF_TEXT:
  1210. return "text";
  1211. case CRLF_TEXT_INPUT:
  1212. return "text eol=lf";
  1213. case CRLF_TEXT_CRLF:
  1214. return "text eol=crlf";
  1215. case CRLF_AUTO:
  1216. return "text=auto";
  1217. case CRLF_AUTO_CRLF:
  1218. return "text=auto eol=crlf";
  1219. case CRLF_AUTO_INPUT:
  1220. return "text=auto eol=lf";
  1221. }
  1222. return "";
  1223. }
  1224. int convert_to_git(const struct index_state *istate,
  1225. const char *path, const char *src, size_t len,
  1226. struct strbuf *dst, int conv_flags)
  1227. {
  1228. int ret = 0;
  1229. struct conv_attrs ca;
  1230. convert_attrs(istate, &ca, path);
  1231. ret |= apply_filter(path, src, len, -1, dst, ca.drv, CAP_CLEAN, NULL);
  1232. if (!ret && ca.drv && ca.drv->required)
  1233. die(_("%s: clean filter '%s' failed"), path, ca.drv->name);
  1234. if (ret && dst) {
  1235. src = dst->buf;
  1236. len = dst->len;
  1237. }
  1238. ret |= encode_to_git(path, src, len, dst, ca.working_tree_encoding, conv_flags);
  1239. if (ret && dst) {
  1240. src = dst->buf;
  1241. len = dst->len;
  1242. }
  1243. if (!(conv_flags & CONV_EOL_KEEP_CRLF)) {
  1244. ret |= crlf_to_git(istate, path, src, len, dst, ca.crlf_action, conv_flags);
  1245. if (ret && dst) {
  1246. src = dst->buf;
  1247. len = dst->len;
  1248. }
  1249. }
  1250. return ret | ident_to_git(src, len, dst, ca.ident);
  1251. }
  1252. void convert_to_git_filter_fd(const struct index_state *istate,
  1253. const char *path, int fd, struct strbuf *dst,
  1254. int conv_flags)
  1255. {
  1256. struct conv_attrs ca;
  1257. convert_attrs(istate, &ca, path);
  1258. assert(ca.drv);
  1259. assert(ca.drv->clean || ca.drv->process);
  1260. if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN, NULL))
  1261. die(_("%s: clean filter '%s' failed"), path, ca.drv->name);
  1262. encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
  1263. crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);
  1264. ident_to_git(dst->buf, dst->len, dst, ca.ident);
  1265. }
  1266. static int convert_to_working_tree_internal(const struct index_state *istate,
  1267. const char *path, const char *src,
  1268. size_t len, struct strbuf *dst,
  1269. int normalizing, struct delayed_checkout *dco)
  1270. {
  1271. int ret = 0, ret_filter = 0;
  1272. struct conv_attrs ca;
  1273. convert_attrs(istate, &ca, path);
  1274. ret |= ident_to_worktree(src, len, dst, ca.ident);
  1275. if (ret) {
  1276. src = dst->buf;
  1277. len = dst->len;
  1278. }
  1279. /*
  1280. * CRLF conversion can be skipped if normalizing, unless there
  1281. * is a smudge or process filter (even if the process filter doesn't
  1282. * support smudge). The filters might expect CRLFs.
  1283. */
  1284. if ((ca.drv && (ca.drv->smudge || ca.drv->process)) || !normalizing) {
  1285. ret |= crlf_to_worktree(src, len, dst, ca.crlf_action);
  1286. if (ret) {
  1287. src = dst->buf;
  1288. len = dst->len;
  1289. }
  1290. }
  1291. ret |= encode_to_worktree(path, src, len, dst, ca.working_tree_encoding);
  1292. if (ret) {
  1293. src = dst->buf;
  1294. len = dst->len;
  1295. }
  1296. ret_filter = apply_filter(
  1297. path, src, len, -1, dst, ca.drv, CAP_SMUDGE, dco);
  1298. if (!ret_filter && ca.drv && ca.drv->required)
  1299. die(_("%s: smudge filter %s failed"), path, ca.drv->name);
  1300. return ret | ret_filter;
  1301. }
  1302. int async_convert_to_working_tree(const struct index_state *istate,
  1303. const char *path, const char *src,
  1304. size_t len, struct strbuf *dst,
  1305. void *dco)
  1306. {
  1307. return convert_to_working_tree_internal(istate, path, src, len, dst, 0, dco);
  1308. }
  1309. int convert_to_working_tree(const struct index_state *istate,
  1310. const char *path, const char *src,
  1311. size_t len, struct strbuf *dst)
  1312. {
  1313. return convert_to_working_tree_internal(istate, path, src, len, dst, 0, NULL);
  1314. }
  1315. int renormalize_buffer(const struct index_state *istate, const char *path,
  1316. const char *src, size_t len, struct strbuf *dst)
  1317. {
  1318. int ret = convert_to_working_tree_internal(istate, path, src, len, dst, 1, NULL);
  1319. if (ret) {
  1320. src = dst->buf;
  1321. len = dst->len;
  1322. }
  1323. return ret | convert_to_git(istate, path, src, len, dst, CONV_EOL_RENORMALIZE);
  1324. }
  1325. /*****************************************************************
  1326. *
  1327. * Streaming conversion support
  1328. *
  1329. *****************************************************************/
  1330. typedef int (*filter_fn)(struct stream_filter *,
  1331. const char *input, size_t *isize_p,
  1332. char *output, size_t *osize_p);
  1333. typedef void (*free_fn)(struct stream_filter *);
  1334. struct stream_filter_vtbl {
  1335. filter_fn filter;
  1336. free_fn free;
  1337. };
  1338. struct stream_filter {
  1339. struct stream_filter_vtbl *vtbl;
  1340. };
  1341. static int null_filter_fn(struct stream_filter *filter,
  1342. const char *input, size_t *isize_p,
  1343. char *output, size_t *osize_p)
  1344. {
  1345. size_t count;
  1346. if (!input)
  1347. return 0; /* we do not keep any states */
  1348. count = *isize_p;
  1349. if (*osize_p < count)
  1350. count = *osize_p;
  1351. if (count) {
  1352. memmove(output, input, count);
  1353. *isize_p -= count;
  1354. *osize_p -= count;
  1355. }
  1356. return 0;
  1357. }
  1358. static void null_free_fn(struct stream_filter *filter)
  1359. {
  1360. ; /* nothing -- null instances are shared */
  1361. }
  1362. static struct stream_filter_vtbl null_vtbl = {
  1363. null_filter_fn,
  1364. null_free_fn,
  1365. };
  1366. static struct stream_filter null_filter_singleton = {
  1367. &null_vtbl,
  1368. };
  1369. int is_null_stream_filter(struct stream_filter *filter)
  1370. {
  1371. return filter == &null_filter_singleton;
  1372. }
  1373. /*
  1374. * LF-to-CRLF filter
  1375. */
  1376. struct lf_to_crlf_filter {
  1377. struct stream_filter filter;
  1378. unsigned has_held:1;
  1379. char held;
  1380. };
  1381. static int lf_to_crlf_filter_fn(struct stream_filter *filter,
  1382. const char *input, size_t *isize_p,
  1383. char *output, size_t *osize_p)
  1384. {
  1385. size_t count, o = 0;
  1386. struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter;
  1387. /*
  1388. * We may be holding onto the CR to see if it is followed by a
  1389. * LF, in which case we would need to go to the main loop.
  1390. * Otherwise, just emit it to the output stream.
  1391. */
  1392. if (lf_to_crlf->has_held && (lf_to_crlf->held != '\r' || !input)) {
  1393. output[o++] = lf_to_crlf->held;
  1394. lf_to_crlf->has_held = 0;
  1395. }
  1396. /* We are told to drain */
  1397. if (!input) {
  1398. *osize_p -= o;
  1399. return 0;
  1400. }
  1401. count = *isize_p;
  1402. if (count || lf_to_crlf->has_held) {
  1403. size_t i;
  1404. int was_cr = 0;
  1405. if (lf_to_crlf->has_held) {
  1406. was_cr = 1;
  1407. lf_to_crlf->has_held = 0;
  1408. }
  1409. for (i = 0; o < *osize_p && i < count; i++) {
  1410. char ch = input[i];
  1411. if (ch == '\n') {
  1412. output[o++] = '\r';
  1413. } else if (was_cr) {
  1414. /*
  1415. * Previous round saw CR and it is not followed
  1416. * by a LF; emit the CR before processing the
  1417. * current character.
  1418. */
  1419. output[o++] = '\r';
  1420. }
  1421. /*
  1422. * We may have consumed the last output slot,
  1423. * in which case we need to break out of this
  1424. * loop; hold the current character before
  1425. * returning.
  1426. */
  1427. if (*osize_p <= o) {
  1428. lf_to_crlf->has_held = 1;
  1429. lf_to_crlf->held = ch;
  1430. continue; /* break but increment i */
  1431. }
  1432. if (ch == '\r') {
  1433. was_cr = 1;
  1434. continue;
  1435. }
  1436. was_cr = 0;
  1437. output[o++] = ch;
  1438. }
  1439. *osize_p -= o;
  1440. *isize_p -= i;
  1441. if (!lf_to_crlf->has_held && was_cr) {
  1442. lf_to_crlf->has_held = 1;
  1443. lf_to_crlf->held = '\r';
  1444. }
  1445. }
  1446. return 0;
  1447. }
  1448. static void lf_to_crlf_free_fn(struct stream_filter *filter)
  1449. {
  1450. free(filter);
  1451. }
  1452. static struct stream_filter_vtbl lf_to_crlf_vtbl = {
  1453. lf_to_crlf_filter_fn,
  1454. lf_to_crlf_free_fn,
  1455. };
  1456. static struct stream_filter *lf_to_crlf_filter(void)
  1457. {
  1458. struct lf_to_crlf_filter *lf_to_crlf = xcalloc(1, sizeof(*lf_to_crlf));
  1459. lf_to_crlf->filter.vtbl = &lf_to_crlf_vtbl;
  1460. return (struct stream_filter *)lf_to_crlf;
  1461. }
  1462. /*
  1463. * Cascade filter
  1464. */
  1465. #define FILTER_BUFFER 1024
  1466. struct cascade_filter {
  1467. struct stream_filter filter;
  1468. struct stream_filter *one;
  1469. struct stream_filter *two;
  1470. char buf[FILTER_BUFFER];
  1471. int end, ptr;
  1472. };
  1473. static int cascade_filter_fn(struct stream_filter *filter,
  1474. const char *input, size_t *isize_p,
  1475. char *output, size_t *osize_p)
  1476. {
  1477. struct cascade_filter *cas = (struct cascade_filter *) filter;
  1478. size_t filled = 0;
  1479. size_t sz = *osize_p;
  1480. size_t to_feed, remaining;
  1481. /*
  1482. * input -- (one) --> buf -- (two) --> output
  1483. */
  1484. while (filled < sz) {
  1485. remaining = sz - filled;
  1486. /* do we already have something to feed two with? */
  1487. if (cas->ptr < cas->end) {
  1488. to_feed = cas->end - cas->ptr;
  1489. if (stream_filter(cas->two,
  1490. cas->buf + cas->ptr, &to_feed,
  1491. output + filled, &remaining))
  1492. return -1;
  1493. cas->ptr += (cas->end - cas->ptr) - to_feed;
  1494. filled = sz - remaining;
  1495. continue;
  1496. }
  1497. /* feed one from upstream and have it emit into our buffer */
  1498. to_feed = input ? *isize_p : 0;
  1499. if (input && !to_feed)
  1500. break;
  1501. remaining = sizeof(cas->buf);
  1502. if (stream_filter(cas->one,
  1503. input, &to_feed,
  1504. cas->buf, &remaining))
  1505. return -1;
  1506. cas->end = sizeof(cas->buf) - remaining;
  1507. cas->ptr = 0;
  1508. if (input) {
  1509. size_t fed = *isize_p - to_feed;
  1510. *isize_p -= fed;
  1511. input += fed;
  1512. }
  1513. /* do we know that we drained one completely? */
  1514. if (input || cas->end)
  1515. continue;
  1516. /* tell two to drain; we have nothing more to give it */
  1517. to_feed = 0;
  1518. remaining = sz - filled;
  1519. if (stream_filter(cas->two,
  1520. NULL, &to_feed,
  1521. output + filled, &remaining))
  1522. return -1;
  1523. if (remaining == (sz - filled))
  1524. break; /* completely drained two */
  1525. filled = sz - remaining;
  1526. }
  1527. *osize_p -= filled;
  1528. return 0;
  1529. }
  1530. static void cascade_free_fn(struct stream_filter *filter)
  1531. {
  1532. struct cascade_filter *cas = (struct cascade_filter *)filter;
  1533. free_stream_filter(cas->one);
  1534. free_stream_filter(cas->two);
  1535. free(filter);
  1536. }
  1537. static struct stream_filter_vtbl cascade_vtbl = {
  1538. cascade_filter_fn,
  1539. cascade_free_fn,
  1540. };
  1541. static struct stream_filter *cascade_filter(struct stream_filter *one,
  1542. struct stream_filter *two)
  1543. {
  1544. struct cascade_filter *cascade;
  1545. if (!one || is_null_stream_filter(one))
  1546. return two;
  1547. if (!two || is_null_stream_filter(two))
  1548. return one;
  1549. cascade = xmalloc(sizeof(*cascade));
  1550. cascade->one = one;
  1551. cascade->two = two;
  1552. cascade->end = cascade->ptr = 0;
  1553. cascade->filter.vtbl = &cascade_vtbl;
  1554. return (struct stream_filter *)cascade;
  1555. }
  1556. /*
  1557. * ident filter
  1558. */
  1559. #define IDENT_DRAINING (-1)
  1560. #define IDENT_SKIPPING (-2)
  1561. struct ident_filter {
  1562. struct stream_filter filter;
  1563. struct strbuf left;
  1564. int state;
  1565. char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */
  1566. };
  1567. static int is_foreign_ident(const char *str)
  1568. {
  1569. int i;
  1570. if (!skip_prefix(str, "$Id: ", &str))
  1571. return 0;
  1572. for (i = 0; str[i]; i++) {
  1573. if (isspace(str[i]) && str[i+1] != '$')
  1574. return 1;
  1575. }
  1576. return 0;
  1577. }
  1578. static void ident_drain(struct ident_filter *ident, char **output_p, size_t *osize_p)
  1579. {
  1580. size_t to_drain = ident->left.len;
  1581. if (*osize_p < to_drain)
  1582. to_drain = *osize_p;
  1583. if (to_drain) {
  1584. memcpy(*output_p, ident->left.buf, to_drain);
  1585. strbuf_remove(&ident->left, 0, to_drain);
  1586. *output_p += to_drain;
  1587. *osize_p -= to_drain;
  1588. }
  1589. if (!ident->left.len)
  1590. ident->state = 0;
  1591. }
  1592. static int ident_filter_fn(struct stream_filter *filter,
  1593. const char *input, size_t *isize_p,
  1594. char *output, size_t *osize_p)
  1595. {
  1596. struct ident_filter *ident = (struct ident_filter *)filter;
  1597. static const char head[] = "$Id";
  1598. if (!input) {
  1599. /* drain upon eof */
  1600. switch (ident->state) {
  1601. default:
  1602. strbuf_add(&ident->left, head, ident->state);
  1603. /* fallthrough */
  1604. case IDENT_SKIPPING:
  1605. /* fallthrough */
  1606. case IDENT_DRAINING:
  1607. ident_drain(ident, &output, osize_p);
  1608. }
  1609. return 0;
  1610. }
  1611. while (*isize_p || (ident->state == IDENT_DRAINING)) {
  1612. int ch;
  1613. if (ident->state == IDENT_DRAINING) {
  1614. ident_drain(ident, &output, osize_p);
  1615. if (!*osize_p)
  1616. break;
  1617. continue;
  1618. }
  1619. ch = *(input++);
  1620. (*isize_p)--;
  1621. if (ident->state == IDENT_SKIPPING) {
  1622. /*
  1623. * Skipping until '$' or LF, but keeping them
  1624. * in case it is a foreign ident.
  1625. */
  1626. strbuf_addch(&ident->left, ch);
  1627. if (ch != '\n' && ch != '$')
  1628. continue;
  1629. if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
  1630. strbuf_setlen(&ident->left, sizeof(head) - 1);
  1631. strbuf_addstr(&ident->left, ident->ident);
  1632. }
  1633. ident->state = IDENT_DRAINING;
  1634. continue;
  1635. }
  1636. if (ident->state < sizeof(head) &&
  1637. head[ident->state] == ch) {
  1638. ident->state++;
  1639. continue;
  1640. }
  1641. if (ident->state)
  1642. strbuf_add(&ident->left, head, ident->state);
  1643. if (ident->state == sizeof(head) - 1) {
  1644. if (ch != ':' && ch != '$') {
  1645. strbuf_addch(&ident->left, ch);
  1646. ident->state = 0;
  1647. continue;
  1648. }
  1649. if (ch == ':') {
  1650. strbuf_addch(&ident->left, ch);
  1651. ident->state = IDENT_SKIPPING;
  1652. } else {
  1653. strbuf_addstr(&ident->left, ident->ident);
  1654. ident->state = IDENT_DRAINING;
  1655. }
  1656. continue;
  1657. }
  1658. strbuf_addch(&ident->left, ch);
  1659. ident->state = IDENT_DRAINING;
  1660. }
  1661. return 0;
  1662. }
  1663. static void ident_free_fn(struct stream_filter *filter)
  1664. {
  1665. struct ident_filter *ident = (struct ident_filter *)filter;
  1666. strbuf_release(&ident->left);
  1667. free(filter);
  1668. }
  1669. static struct stream_filter_vtbl ident_vtbl = {
  1670. ident_filter_fn,
  1671. ident_free_fn,
  1672. };
  1673. static struct stream_filter *ident_filter(const struct object_id *oid)
  1674. {
  1675. struct ident_filter *ident = xmalloc(sizeof(*ident));
  1676. xsnprintf(ident->ident, sizeof(ident->ident),
  1677. ": %s $", oid_to_hex(oid));
  1678. strbuf_init(&ident->left, 0);
  1679. ident->filter.vtbl = &ident_vtbl;
  1680. ident->state = 0;
  1681. return (struct stream_filter *)ident;
  1682. }
  1683. /*
  1684. * Return an appropriately constructed filter for the path, or NULL if
  1685. * the contents cannot be filtered without reading the whole thing
  1686. * in-core.
  1687. *
  1688. * Note that you would be crazy to set CRLF, smuge/clean or ident to a
  1689. * large binary blob you would want us not to slurp into the memory!
  1690. */
  1691. struct stream_filter *get_stream_filter(const struct index_state *istate,
  1692. const char *path,
  1693. const struct object_id *oid)
  1694. {
  1695. struct conv_attrs ca;
  1696. struct stream_filter *filter = NULL;
  1697. convert_attrs(istate, &ca, path);
  1698. if (ca.drv && (ca.drv->process || ca.drv->smudge || ca.drv->clean))
  1699. return NULL;
  1700. if (ca.working_tree_encoding)
  1701. return NULL;
  1702. if (ca.crlf_action == CRLF_AUTO || ca.crlf_action == CRLF_AUTO_CRLF)
  1703. return NULL;
  1704. if (ca.ident)
  1705. filter = ident_filter(oid);
  1706. if (output_eol(ca.crlf_action) == EOL_CRLF)
  1707. filter = cascade_filter(filter, lf_to_crlf_filter());
  1708. else
  1709. filter = cascade_filter(filter, &null_filter_singleton);
  1710. return filter;
  1711. }
  1712. void free_stream_filter(struct stream_filter *filter)
  1713. {
  1714. filter->vtbl->free(filter);
  1715. }
  1716. int stream_filter(struct stream_filter *filter,
  1717. const char *input, size_t *isize_p,
  1718. char *output, size_t *osize_p)
  1719. {
  1720. return filter->vtbl->filter(filter, input, isize_p, output, osize_p);
  1721. }