THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1246 lines
28KB

  1. #include "cache.h"
  2. #include "config.h"
  3. #include "utf8.h"
  4. #include "strbuf.h"
  5. #include "mailinfo.h"
  6. static void cleanup_space(struct strbuf *sb)
  7. {
  8. size_t pos, cnt;
  9. for (pos = 0; pos < sb->len; pos++) {
  10. if (isspace(sb->buf[pos])) {
  11. sb->buf[pos] = ' ';
  12. for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
  13. strbuf_remove(sb, pos + 1, cnt);
  14. }
  15. }
  16. }
  17. static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
  18. {
  19. struct strbuf *src = name;
  20. if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') ||
  21. strchr(name->buf, '<') || strchr(name->buf, '>'))
  22. src = email;
  23. else if (name == out)
  24. return;
  25. strbuf_reset(out);
  26. strbuf_addbuf(out, src);
  27. }
  28. static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
  29. {
  30. /* John Doe <johndoe> */
  31. char *bra, *ket;
  32. /* This is fallback, so do not bother if we already have an
  33. * e-mail address.
  34. */
  35. if (mi->email.len)
  36. return;
  37. bra = strchr(line->buf, '<');
  38. if (!bra)
  39. return;
  40. ket = strchr(bra, '>');
  41. if (!ket)
  42. return;
  43. strbuf_reset(&mi->email);
  44. strbuf_add(&mi->email, bra + 1, ket - bra - 1);
  45. strbuf_reset(&mi->name);
  46. strbuf_add(&mi->name, line->buf, bra - line->buf);
  47. strbuf_trim(&mi->name);
  48. get_sane_name(&mi->name, &mi->name, &mi->email);
  49. }
  50. static const char *unquote_comment(struct strbuf *outbuf, const char *in)
  51. {
  52. int c;
  53. int take_next_literally = 0;
  54. strbuf_addch(outbuf, '(');
  55. while ((c = *in++) != 0) {
  56. if (take_next_literally == 1) {
  57. take_next_literally = 0;
  58. } else {
  59. switch (c) {
  60. case '\\':
  61. take_next_literally = 1;
  62. continue;
  63. case '(':
  64. in = unquote_comment(outbuf, in);
  65. continue;
  66. case ')':
  67. strbuf_addch(outbuf, ')');
  68. return in;
  69. }
  70. }
  71. strbuf_addch(outbuf, c);
  72. }
  73. return in;
  74. }
  75. static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
  76. {
  77. int c;
  78. int take_next_literally = 0;
  79. while ((c = *in++) != 0) {
  80. if (take_next_literally == 1) {
  81. take_next_literally = 0;
  82. } else {
  83. switch (c) {
  84. case '\\':
  85. take_next_literally = 1;
  86. continue;
  87. case '"':
  88. return in;
  89. }
  90. }
  91. strbuf_addch(outbuf, c);
  92. }
  93. return in;
  94. }
  95. static void unquote_quoted_pair(struct strbuf *line)
  96. {
  97. struct strbuf outbuf;
  98. const char *in = line->buf;
  99. int c;
  100. strbuf_init(&outbuf, line->len);
  101. while ((c = *in++) != 0) {
  102. switch (c) {
  103. case '"':
  104. in = unquote_quoted_string(&outbuf, in);
  105. continue;
  106. case '(':
  107. in = unquote_comment(&outbuf, in);
  108. continue;
  109. }
  110. strbuf_addch(&outbuf, c);
  111. }
  112. strbuf_swap(&outbuf, line);
  113. strbuf_release(&outbuf);
  114. }
  115. static void handle_from(struct mailinfo *mi, const struct strbuf *from)
  116. {
  117. char *at;
  118. size_t el;
  119. struct strbuf f;
  120. strbuf_init(&f, from->len);
  121. strbuf_addbuf(&f, from);
  122. unquote_quoted_pair(&f);
  123. at = strchr(f.buf, '@');
  124. if (!at) {
  125. parse_bogus_from(mi, from);
  126. goto out;
  127. }
  128. /*
  129. * If we already have one email, don't take any confusing lines
  130. */
  131. if (mi->email.len && strchr(at + 1, '@'))
  132. goto out;
  133. /* Pick up the string around '@', possibly delimited with <>
  134. * pair; that is the email part.
  135. */
  136. while (at > f.buf) {
  137. char c = at[-1];
  138. if (isspace(c))
  139. break;
  140. if (c == '<') {
  141. at[-1] = ' ';
  142. break;
  143. }
  144. at--;
  145. }
  146. el = strcspn(at, " \n\t\r\v\f>");
  147. strbuf_reset(&mi->email);
  148. strbuf_add(&mi->email, at, el);
  149. strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
  150. /* The remainder is name. It could be
  151. *
  152. * - "John Doe <john.doe@xz>" (a), or
  153. * - "john.doe@xz (John Doe)" (b), or
  154. * - "John (zzz) Doe <john.doe@xz> (Comment)" (c)
  155. *
  156. * but we have removed the email part, so
  157. *
  158. * - remove extra spaces which could stay after email (case 'c'), and
  159. * - trim from both ends, possibly removing the () pair at the end
  160. * (cases 'a' and 'b').
  161. */
  162. cleanup_space(&f);
  163. strbuf_trim(&f);
  164. if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
  165. strbuf_remove(&f, 0, 1);
  166. strbuf_setlen(&f, f.len - 1);
  167. }
  168. get_sane_name(&mi->name, &f, &mi->email);
  169. out:
  170. strbuf_release(&f);
  171. }
  172. static void handle_header(struct strbuf **out, const struct strbuf *line)
  173. {
  174. if (!*out) {
  175. *out = xmalloc(sizeof(struct strbuf));
  176. strbuf_init(*out, line->len);
  177. } else
  178. strbuf_reset(*out);
  179. strbuf_addbuf(*out, line);
  180. }
  181. /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt
  182. * to have enough heuristics to grok MIME encoded patches often found
  183. * on our mailing lists. For example, we do not even treat header lines
  184. * case insensitively.
  185. */
  186. static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
  187. {
  188. const char *ends, *ap = strcasestr(line, name);
  189. size_t sz;
  190. strbuf_setlen(attr, 0);
  191. if (!ap)
  192. return 0;
  193. ap += strlen(name);
  194. if (*ap == '"') {
  195. ap++;
  196. ends = "\"";
  197. }
  198. else
  199. ends = "; \t";
  200. sz = strcspn(ap, ends);
  201. strbuf_add(attr, ap, sz);
  202. return 1;
  203. }
  204. static int has_attr_value(const char *line, const char *name, const char *value)
  205. {
  206. struct strbuf sb = STRBUF_INIT;
  207. int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
  208. strbuf_release(&sb);
  209. return rc;
  210. }
  211. static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
  212. {
  213. struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
  214. strbuf_init(boundary, line->len);
  215. mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
  216. mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
  217. if (slurp_attr(line->buf, "boundary=", boundary)) {
  218. strbuf_insert(boundary, 0, "--", 2);
  219. if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
  220. error("Too many boundaries to handle");
  221. mi->input_error = -1;
  222. mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
  223. return;
  224. }
  225. *(mi->content_top) = boundary;
  226. boundary = NULL;
  227. }
  228. slurp_attr(line->buf, "charset=", &mi->charset);
  229. if (boundary) {
  230. strbuf_release(boundary);
  231. free(boundary);
  232. }
  233. }
  234. static void handle_content_transfer_encoding(struct mailinfo *mi,
  235. const struct strbuf *line)
  236. {
  237. if (strcasestr(line->buf, "base64"))
  238. mi->transfer_encoding = TE_BASE64;
  239. else if (strcasestr(line->buf, "quoted-printable"))
  240. mi->transfer_encoding = TE_QP;
  241. else
  242. mi->transfer_encoding = TE_DONTCARE;
  243. }
  244. static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
  245. {
  246. struct strbuf *content_top = *(mi->content_top);
  247. return ((content_top->len <= line->len) &&
  248. !memcmp(line->buf, content_top->buf, content_top->len));
  249. }
  250. static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
  251. {
  252. size_t at = 0;
  253. while (at < subject->len) {
  254. char *pos;
  255. size_t remove;
  256. switch (subject->buf[at]) {
  257. case 'r': case 'R':
  258. if (subject->len <= at + 3)
  259. break;
  260. if ((subject->buf[at + 1] == 'e' ||
  261. subject->buf[at + 1] == 'E') &&
  262. subject->buf[at + 2] == ':') {
  263. strbuf_remove(subject, at, 3);
  264. continue;
  265. }
  266. at++;
  267. break;
  268. case ' ': case '\t': case ':':
  269. strbuf_remove(subject, at, 1);
  270. continue;
  271. case '[':
  272. pos = strchr(subject->buf + at, ']');
  273. if (!pos)
  274. break;
  275. remove = pos - subject->buf + at + 1;
  276. if (!mi->keep_non_patch_brackets_in_subject ||
  277. (7 <= remove &&
  278. memmem(subject->buf + at, remove, "PATCH", 5)))
  279. strbuf_remove(subject, at, remove);
  280. else {
  281. at += remove;
  282. /*
  283. * If the input had a space after the ], keep
  284. * it. We don't bother with finding the end of
  285. * the space, since we later normalize it
  286. * anyway.
  287. */
  288. if (isspace(subject->buf[at]))
  289. at += 1;
  290. }
  291. continue;
  292. }
  293. break;
  294. }
  295. strbuf_trim(subject);
  296. }
  297. #define MAX_HDR_PARSED 10
  298. static const char *header[MAX_HDR_PARSED] = {
  299. "From","Subject","Date",
  300. };
  301. static inline int cmp_header(const struct strbuf *line, const char *hdr)
  302. {
  303. int len = strlen(hdr);
  304. return !strncasecmp(line->buf, hdr, len) && line->len > len &&
  305. line->buf[len] == ':' && isspace(line->buf[len + 1]);
  306. }
  307. static int is_format_patch_separator(const char *line, int len)
  308. {
  309. static const char SAMPLE[] =
  310. "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
  311. const char *cp;
  312. if (len != strlen(SAMPLE))
  313. return 0;
  314. if (!skip_prefix(line, "From ", &cp))
  315. return 0;
  316. if (strspn(cp, "0123456789abcdef") != 40)
  317. return 0;
  318. cp += 40;
  319. return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
  320. }
  321. static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
  322. {
  323. const char *in = q_seg->buf;
  324. int c;
  325. struct strbuf *out = xmalloc(sizeof(struct strbuf));
  326. strbuf_init(out, q_seg->len);
  327. while ((c = *in++) != 0) {
  328. if (c == '=') {
  329. int ch, d = *in;
  330. if (d == '\n' || !d)
  331. break; /* drop trailing newline */
  332. ch = hex2chr(in);
  333. if (ch >= 0) {
  334. strbuf_addch(out, ch);
  335. in += 2;
  336. continue;
  337. }
  338. /* garbage -- fall through */
  339. }
  340. if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
  341. c = 0x20;
  342. strbuf_addch(out, c);
  343. }
  344. return out;
  345. }
  346. static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
  347. {
  348. /* Decode in..ep, possibly in-place to ot */
  349. int c, pos = 0, acc = 0;
  350. const char *in = b_seg->buf;
  351. struct strbuf *out = xmalloc(sizeof(struct strbuf));
  352. strbuf_init(out, b_seg->len);
  353. while ((c = *in++) != 0) {
  354. if (c == '+')
  355. c = 62;
  356. else if (c == '/')
  357. c = 63;
  358. else if ('A' <= c && c <= 'Z')
  359. c -= 'A';
  360. else if ('a' <= c && c <= 'z')
  361. c -= 'a' - 26;
  362. else if ('0' <= c && c <= '9')
  363. c -= '0' - 52;
  364. else
  365. continue; /* garbage */
  366. switch (pos++) {
  367. case 0:
  368. acc = (c << 2);
  369. break;
  370. case 1:
  371. strbuf_addch(out, (acc | (c >> 4)));
  372. acc = (c & 15) << 4;
  373. break;
  374. case 2:
  375. strbuf_addch(out, (acc | (c >> 2)));
  376. acc = (c & 3) << 6;
  377. break;
  378. case 3:
  379. strbuf_addch(out, (acc | c));
  380. acc = pos = 0;
  381. break;
  382. }
  383. }
  384. return out;
  385. }
  386. static int convert_to_utf8(struct mailinfo *mi,
  387. struct strbuf *line, const char *charset)
  388. {
  389. char *out;
  390. if (!mi->metainfo_charset || !charset || !*charset)
  391. return 0;
  392. if (same_encoding(mi->metainfo_charset, charset))
  393. return 0;
  394. out = reencode_string(line->buf, mi->metainfo_charset, charset);
  395. if (!out) {
  396. mi->input_error = -1;
  397. return error("cannot convert from %s to %s",
  398. charset, mi->metainfo_charset);
  399. }
  400. strbuf_attach(line, out, strlen(out), strlen(out));
  401. return 0;
  402. }
  403. static void decode_header(struct mailinfo *mi, struct strbuf *it)
  404. {
  405. char *in, *ep, *cp;
  406. struct strbuf outbuf = STRBUF_INIT, *dec;
  407. struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
  408. int found_error = 1; /* pessimism */
  409. in = it->buf;
  410. while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
  411. int encoding;
  412. strbuf_reset(&charset_q);
  413. strbuf_reset(&piecebuf);
  414. if (in != ep) {
  415. /*
  416. * We are about to process an encoded-word
  417. * that begins at ep, but there is something
  418. * before the encoded word.
  419. */
  420. char *scan;
  421. for (scan = in; scan < ep; scan++)
  422. if (!isspace(*scan))
  423. break;
  424. if (scan != ep || in == it->buf) {
  425. /*
  426. * We should not lose that "something",
  427. * unless we have just processed an
  428. * encoded-word, and there is only LWS
  429. * before the one we are about to process.
  430. */
  431. strbuf_add(&outbuf, in, ep - in);
  432. }
  433. }
  434. /* E.g.
  435. * ep : "=?iso-2022-jp?B?GyR...?= foo"
  436. * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
  437. */
  438. ep += 2;
  439. if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
  440. goto release_return;
  441. if (cp + 3 - it->buf > it->len)
  442. goto release_return;
  443. strbuf_add(&charset_q, ep, cp - ep);
  444. encoding = cp[1];
  445. if (!encoding || cp[2] != '?')
  446. goto release_return;
  447. ep = strstr(cp + 3, "?=");
  448. if (!ep)
  449. goto release_return;
  450. strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
  451. switch (tolower(encoding)) {
  452. default:
  453. goto release_return;
  454. case 'b':
  455. dec = decode_b_segment(&piecebuf);
  456. break;
  457. case 'q':
  458. dec = decode_q_segment(&piecebuf, 1);
  459. break;
  460. }
  461. if (convert_to_utf8(mi, dec, charset_q.buf))
  462. goto release_return;
  463. strbuf_addbuf(&outbuf, dec);
  464. strbuf_release(dec);
  465. free(dec);
  466. in = ep + 2;
  467. }
  468. strbuf_addstr(&outbuf, in);
  469. strbuf_reset(it);
  470. strbuf_addbuf(it, &outbuf);
  471. found_error = 0;
  472. release_return:
  473. strbuf_release(&outbuf);
  474. strbuf_release(&charset_q);
  475. strbuf_release(&piecebuf);
  476. if (found_error)
  477. mi->input_error = -1;
  478. }
  479. static int check_header(struct mailinfo *mi,
  480. const struct strbuf *line,
  481. struct strbuf *hdr_data[], int overwrite)
  482. {
  483. int i, ret = 0, len;
  484. struct strbuf sb = STRBUF_INIT;
  485. /* search for the interesting parts */
  486. for (i = 0; header[i]; i++) {
  487. int len = strlen(header[i]);
  488. if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
  489. /* Unwrap inline B and Q encoding, and optionally
  490. * normalize the meta information to utf8.
  491. */
  492. strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
  493. decode_header(mi, &sb);
  494. handle_header(&hdr_data[i], &sb);
  495. ret = 1;
  496. goto check_header_out;
  497. }
  498. }
  499. /* Content stuff */
  500. if (cmp_header(line, "Content-Type")) {
  501. len = strlen("Content-Type: ");
  502. strbuf_add(&sb, line->buf + len, line->len - len);
  503. decode_header(mi, &sb);
  504. strbuf_insert(&sb, 0, "Content-Type: ", len);
  505. handle_content_type(mi, &sb);
  506. ret = 1;
  507. goto check_header_out;
  508. }
  509. if (cmp_header(line, "Content-Transfer-Encoding")) {
  510. len = strlen("Content-Transfer-Encoding: ");
  511. strbuf_add(&sb, line->buf + len, line->len - len);
  512. decode_header(mi, &sb);
  513. handle_content_transfer_encoding(mi, &sb);
  514. ret = 1;
  515. goto check_header_out;
  516. }
  517. if (cmp_header(line, "Message-Id")) {
  518. len = strlen("Message-Id: ");
  519. strbuf_add(&sb, line->buf + len, line->len - len);
  520. decode_header(mi, &sb);
  521. if (mi->add_message_id)
  522. mi->message_id = strbuf_detach(&sb, NULL);
  523. ret = 1;
  524. goto check_header_out;
  525. }
  526. check_header_out:
  527. strbuf_release(&sb);
  528. return ret;
  529. }
  530. /*
  531. * Returns 1 if the given line or any line beginning with the given line is an
  532. * in-body header (that is, check_header will succeed when passed
  533. * mi->s_hdr_data).
  534. */
  535. static int is_inbody_header(const struct mailinfo *mi,
  536. const struct strbuf *line)
  537. {
  538. int i;
  539. for (i = 0; header[i]; i++)
  540. if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
  541. return 1;
  542. return 0;
  543. }
  544. static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
  545. {
  546. struct strbuf *ret;
  547. switch (mi->transfer_encoding) {
  548. case TE_QP:
  549. ret = decode_q_segment(line, 0);
  550. break;
  551. case TE_BASE64:
  552. ret = decode_b_segment(line);
  553. break;
  554. case TE_DONTCARE:
  555. default:
  556. return;
  557. }
  558. strbuf_reset(line);
  559. strbuf_addbuf(line, ret);
  560. strbuf_release(ret);
  561. free(ret);
  562. }
  563. static inline int patchbreak(const struct strbuf *line)
  564. {
  565. size_t i;
  566. /* Beginning of a "diff -" header? */
  567. if (starts_with(line->buf, "diff -"))
  568. return 1;
  569. /* CVS "Index: " line? */
  570. if (starts_with(line->buf, "Index: "))
  571. return 1;
  572. /*
  573. * "--- <filename>" starts patches without headers
  574. * "---<sp>*" is a manual separator
  575. */
  576. if (line->len < 4)
  577. return 0;
  578. if (starts_with(line->buf, "---")) {
  579. /* space followed by a filename? */
  580. if (line->buf[3] == ' ' && !isspace(line->buf[4]))
  581. return 1;
  582. /* Just whitespace? */
  583. for (i = 3; i < line->len; i++) {
  584. unsigned char c = line->buf[i];
  585. if (c == '\n')
  586. return 1;
  587. if (!isspace(c))
  588. break;
  589. }
  590. return 0;
  591. }
  592. return 0;
  593. }
  594. static int is_scissors_line(const char *line)
  595. {
  596. const char *c;
  597. int scissors = 0, gap = 0;
  598. const char *first_nonblank = NULL, *last_nonblank = NULL;
  599. int visible, perforation = 0, in_perforation = 0;
  600. for (c = line; *c; c++) {
  601. if (isspace(*c)) {
  602. if (in_perforation) {
  603. perforation++;
  604. gap++;
  605. }
  606. continue;
  607. }
  608. last_nonblank = c;
  609. if (first_nonblank == NULL)
  610. first_nonblank = c;
  611. if (*c == '-') {
  612. in_perforation = 1;
  613. perforation++;
  614. continue;
  615. }
  616. if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) ||
  617. !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) {
  618. in_perforation = 1;
  619. perforation += 2;
  620. scissors += 2;
  621. c++;
  622. continue;
  623. }
  624. in_perforation = 0;
  625. }
  626. /*
  627. * The mark must be at least 8 bytes long (e.g. "-- >8 --").
  628. * Even though there can be arbitrary cruft on the same line
  629. * (e.g. "cut here"), in order to avoid misidentification, the
  630. * perforation must occupy more than a third of the visible
  631. * width of the line, and dashes and scissors must occupy more
  632. * than half of the perforation.
  633. */
  634. if (first_nonblank && last_nonblank)
  635. visible = last_nonblank - first_nonblank + 1;
  636. else
  637. visible = 0;
  638. return (scissors && 8 <= visible &&
  639. visible < perforation * 3 &&
  640. gap * 2 < perforation);
  641. }
  642. static void flush_inbody_header_accum(struct mailinfo *mi)
  643. {
  644. if (!mi->inbody_header_accum.len)
  645. return;
  646. if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
  647. BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
  648. strbuf_reset(&mi->inbody_header_accum);
  649. }
  650. static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
  651. {
  652. if (mi->inbody_header_accum.len &&
  653. (line->buf[0] == ' ' || line->buf[0] == '\t')) {
  654. if (mi->use_scissors && is_scissors_line(line->buf)) {
  655. /*
  656. * This is a scissors line; do not consider this line
  657. * as a header continuation line.
  658. */
  659. flush_inbody_header_accum(mi);
  660. return 0;
  661. }
  662. strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
  663. strbuf_addbuf(&mi->inbody_header_accum, line);
  664. return 1;
  665. }
  666. flush_inbody_header_accum(mi);
  667. if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
  668. return is_format_patch_separator(line->buf + 1, line->len - 1);
  669. if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
  670. int i;
  671. for (i = 0; header[i]; i++)
  672. if (!strcmp("Subject", header[i])) {
  673. handle_header(&mi->s_hdr_data[i], line);
  674. return 1;
  675. }
  676. return 0;
  677. }
  678. if (is_inbody_header(mi, line)) {
  679. strbuf_addbuf(&mi->inbody_header_accum, line);
  680. return 1;
  681. }
  682. return 0;
  683. }
  684. static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
  685. {
  686. assert(!mi->filter_stage);
  687. if (mi->header_stage) {
  688. if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
  689. if (mi->inbody_header_accum.len) {
  690. flush_inbody_header_accum(mi);
  691. mi->header_stage = 0;
  692. }
  693. return 0;
  694. }
  695. }
  696. if (mi->use_inbody_headers && mi->header_stage) {
  697. mi->header_stage = check_inbody_header(mi, line);
  698. if (mi->header_stage)
  699. return 0;
  700. } else
  701. /* Only trim the first (blank) line of the commit message
  702. * when ignoring in-body headers.
  703. */
  704. mi->header_stage = 0;
  705. /* normalize the log message to UTF-8. */
  706. if (convert_to_utf8(mi, line, mi->charset.buf))
  707. return 0; /* mi->input_error already set */
  708. if (mi->use_scissors && is_scissors_line(line->buf)) {
  709. int i;
  710. strbuf_setlen(&mi->log_message, 0);
  711. mi->header_stage = 1;
  712. /*
  713. * We may have already read "secondary headers"; purge
  714. * them to give ourselves a clean restart.
  715. */
  716. for (i = 0; header[i]; i++) {
  717. if (mi->s_hdr_data[i])
  718. strbuf_release(mi->s_hdr_data[i]);
  719. mi->s_hdr_data[i] = NULL;
  720. }
  721. return 0;
  722. }
  723. if (patchbreak(line)) {
  724. if (mi->message_id)
  725. strbuf_addf(&mi->log_message,
  726. "Message-Id: %s\n", mi->message_id);
  727. return 1;
  728. }
  729. strbuf_addbuf(&mi->log_message, line);
  730. return 0;
  731. }
  732. static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
  733. {
  734. fwrite(line->buf, 1, line->len, mi->patchfile);
  735. mi->patch_lines++;
  736. }
  737. static void handle_filter(struct mailinfo *mi, struct strbuf *line)
  738. {
  739. switch (mi->filter_stage) {
  740. case 0:
  741. if (!handle_commit_msg(mi, line))
  742. break;
  743. mi->filter_stage++;
  744. /* fallthrough */
  745. case 1:
  746. handle_patch(mi, line);
  747. break;
  748. }
  749. }
  750. static int is_rfc2822_header(const struct strbuf *line)
  751. {
  752. /*
  753. * The section that defines the loosest possible
  754. * field name is "3.6.8 Optional fields".
  755. *
  756. * optional-field = field-name ":" unstructured CRLF
  757. * field-name = 1*ftext
  758. * ftext = %d33-57 / %59-126
  759. */
  760. int ch;
  761. char *cp = line->buf;
  762. /* Count mbox From headers as headers */
  763. if (starts_with(cp, "From ") || starts_with(cp, ">From "))
  764. return 1;
  765. while ((ch = *cp++)) {
  766. if (ch == ':')
  767. return 1;
  768. if ((33 <= ch && ch <= 57) ||
  769. (59 <= ch && ch <= 126))
  770. continue;
  771. break;
  772. }
  773. return 0;
  774. }
  775. static int read_one_header_line(struct strbuf *line, FILE *in)
  776. {
  777. struct strbuf continuation = STRBUF_INIT;
  778. /* Get the first part of the line. */
  779. if (strbuf_getline_lf(line, in))
  780. return 0;
  781. /*
  782. * Is it an empty line or not a valid rfc2822 header?
  783. * If so, stop here, and return false ("not a header")
  784. */
  785. strbuf_rtrim(line);
  786. if (!line->len || !is_rfc2822_header(line)) {
  787. /* Re-add the newline */
  788. strbuf_addch(line, '\n');
  789. return 0;
  790. }
  791. /*
  792. * Now we need to eat all the continuation lines..
  793. * Yuck, 2822 header "folding"
  794. */
  795. for (;;) {
  796. int peek;
  797. peek = fgetc(in);
  798. if (peek == EOF)
  799. break;
  800. ungetc(peek, in);
  801. if (peek != ' ' && peek != '\t')
  802. break;
  803. if (strbuf_getline_lf(&continuation, in))
  804. break;
  805. continuation.buf[0] = ' ';
  806. strbuf_rtrim(&continuation);
  807. strbuf_addbuf(line, &continuation);
  808. }
  809. strbuf_release(&continuation);
  810. return 1;
  811. }
  812. static int find_boundary(struct mailinfo *mi, struct strbuf *line)
  813. {
  814. while (!strbuf_getline_lf(line, mi->input)) {
  815. if (*(mi->content_top) && is_multipart_boundary(mi, line))
  816. return 1;
  817. }
  818. return 0;
  819. }
  820. static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
  821. {
  822. struct strbuf newline = STRBUF_INIT;
  823. strbuf_addch(&newline, '\n');
  824. again:
  825. if (line->len >= (*(mi->content_top))->len + 2 &&
  826. !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
  827. /* we hit an end boundary */
  828. /* pop the current boundary off the stack */
  829. strbuf_release(*(mi->content_top));
  830. FREE_AND_NULL(*(mi->content_top));
  831. /* technically won't happen as is_multipart_boundary()
  832. will fail first. But just in case..
  833. */
  834. if (--mi->content_top < mi->content) {
  835. error("Detected mismatched boundaries, can't recover");
  836. mi->input_error = -1;
  837. mi->content_top = mi->content;
  838. strbuf_release(&newline);
  839. return 0;
  840. }
  841. handle_filter(mi, &newline);
  842. strbuf_release(&newline);
  843. if (mi->input_error)
  844. return 0;
  845. /* skip to the next boundary */
  846. if (!find_boundary(mi, line))
  847. return 0;
  848. goto again;
  849. }
  850. /* set some defaults */
  851. mi->transfer_encoding = TE_DONTCARE;
  852. strbuf_reset(&mi->charset);
  853. /* slurp in this section's info */
  854. while (read_one_header_line(line, mi->input))
  855. check_header(mi, line, mi->p_hdr_data, 0);
  856. strbuf_release(&newline);
  857. /* replenish line */
  858. if (strbuf_getline_lf(line, mi->input))
  859. return 0;
  860. strbuf_addch(line, '\n');
  861. return 1;
  862. }
  863. static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
  864. struct strbuf *prev)
  865. {
  866. size_t len = line->len;
  867. const char *rest;
  868. if (!mi->format_flowed) {
  869. handle_filter(mi, line);
  870. return;
  871. }
  872. if (line->buf[len - 1] == '\n') {
  873. len--;
  874. if (len && line->buf[len - 1] == '\r')
  875. len--;
  876. }
  877. /* Keep signature separator as-is. */
  878. if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
  879. if (prev->len) {
  880. handle_filter(mi, prev);
  881. strbuf_reset(prev);
  882. }
  883. handle_filter(mi, line);
  884. return;
  885. }
  886. /* Unstuff space-stuffed line. */
  887. if (len && line->buf[0] == ' ') {
  888. strbuf_remove(line, 0, 1);
  889. len--;
  890. }
  891. /* Save flowed line for later, but without the soft line break. */
  892. if (len && line->buf[len - 1] == ' ') {
  893. strbuf_add(prev, line->buf, len - !!mi->delsp);
  894. return;
  895. }
  896. /* Prepend any previous partial lines */
  897. strbuf_insert(line, 0, prev->buf, prev->len);
  898. strbuf_reset(prev);
  899. handle_filter(mi, line);
  900. }
  901. static void handle_body(struct mailinfo *mi, struct strbuf *line)
  902. {
  903. struct strbuf prev = STRBUF_INIT;
  904. /* Skip up to the first boundary */
  905. if (*(mi->content_top)) {
  906. if (!find_boundary(mi, line))
  907. goto handle_body_out;
  908. }
  909. do {
  910. /* process any boundary lines */
  911. if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
  912. /* flush any leftover */
  913. if (prev.len) {
  914. handle_filter(mi, &prev);
  915. strbuf_reset(&prev);
  916. }
  917. if (!handle_boundary(mi, line))
  918. goto handle_body_out;
  919. }
  920. /* Unwrap transfer encoding */
  921. decode_transfer_encoding(mi, line);
  922. switch (mi->transfer_encoding) {
  923. case TE_BASE64:
  924. case TE_QP:
  925. {
  926. struct strbuf **lines, **it, *sb;
  927. /* Prepend any previous partial lines */
  928. strbuf_insert(line, 0, prev.buf, prev.len);
  929. strbuf_reset(&prev);
  930. /*
  931. * This is a decoded line that may contain
  932. * multiple new lines. Pass only one chunk
  933. * at a time to handle_filter()
  934. */
  935. lines = strbuf_split(line, '\n');
  936. for (it = lines; (sb = *it); it++) {
  937. if (*(it + 1) == NULL) /* The last line */
  938. if (sb->buf[sb->len - 1] != '\n') {
  939. /* Partial line, save it for later. */
  940. strbuf_addbuf(&prev, sb);
  941. break;
  942. }
  943. handle_filter_flowed(mi, sb, &prev);
  944. }
  945. /*
  946. * The partial chunk is saved in "prev" and will be
  947. * appended by the next iteration of read_line_with_nul().
  948. */
  949. strbuf_list_free(lines);
  950. break;
  951. }
  952. default:
  953. handle_filter_flowed(mi, line, &prev);
  954. }
  955. if (mi->input_error)
  956. break;
  957. } while (!strbuf_getwholeline(line, mi->input, '\n'));
  958. if (prev.len)
  959. handle_filter(mi, &prev);
  960. flush_inbody_header_accum(mi);
  961. handle_body_out:
  962. strbuf_release(&prev);
  963. }
  964. static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
  965. {
  966. const char *sp = data->buf;
  967. while (1) {
  968. char *ep = strchr(sp, '\n');
  969. int len;
  970. if (!ep)
  971. len = strlen(sp);
  972. else
  973. len = ep - sp;
  974. fprintf(fout, "%s: %.*s\n", hdr, len, sp);
  975. if (!ep)
  976. break;
  977. sp = ep + 1;
  978. }
  979. }
  980. static void handle_info(struct mailinfo *mi)
  981. {
  982. struct strbuf *hdr;
  983. int i;
  984. for (i = 0; header[i]; i++) {
  985. /* only print inbody headers if we output a patch file */
  986. if (mi->patch_lines && mi->s_hdr_data[i])
  987. hdr = mi->s_hdr_data[i];
  988. else if (mi->p_hdr_data[i])
  989. hdr = mi->p_hdr_data[i];
  990. else
  991. continue;
  992. if (!strcmp(header[i], "Subject")) {
  993. if (!mi->keep_subject) {
  994. cleanup_subject(mi, hdr);
  995. cleanup_space(hdr);
  996. }
  997. output_header_lines(mi->output, "Subject", hdr);
  998. } else if (!strcmp(header[i], "From")) {
  999. cleanup_space(hdr);
  1000. handle_from(mi, hdr);
  1001. fprintf(mi->output, "Author: %s\n", mi->name.buf);
  1002. fprintf(mi->output, "Email: %s\n", mi->email.buf);
  1003. } else {
  1004. cleanup_space(hdr);
  1005. fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
  1006. }
  1007. }
  1008. fprintf(mi->output, "\n");
  1009. }
  1010. int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
  1011. {
  1012. FILE *cmitmsg;
  1013. int peek;
  1014. struct strbuf line = STRBUF_INIT;
  1015. cmitmsg = fopen(msg, "w");
  1016. if (!cmitmsg) {
  1017. perror(msg);
  1018. return -1;
  1019. }
  1020. mi->patchfile = fopen(patch, "w");
  1021. if (!mi->patchfile) {
  1022. perror(patch);
  1023. fclose(cmitmsg);
  1024. return -1;
  1025. }
  1026. mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data)));
  1027. mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data)));
  1028. do {
  1029. peek = fgetc(mi->input);
  1030. if (peek == EOF) {
  1031. fclose(cmitmsg);
  1032. return error("empty patch: '%s'", patch);
  1033. }
  1034. } while (isspace(peek));
  1035. ungetc(peek, mi->input);
  1036. /* process the email header */
  1037. while (read_one_header_line(&line, mi->input))
  1038. check_header(mi, &line, mi->p_hdr_data, 1);
  1039. handle_body(mi, &line);
  1040. fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
  1041. fclose(cmitmsg);
  1042. fclose(mi->patchfile);
  1043. handle_info(mi);
  1044. strbuf_release(&line);
  1045. return mi->input_error;
  1046. }
  1047. static int git_mailinfo_config(const char *var, const char *value, void *mi_)
  1048. {
  1049. struct mailinfo *mi = mi_;
  1050. if (!starts_with(var, "mailinfo."))
  1051. return git_default_config(var, value, NULL);
  1052. if (!strcmp(var, "mailinfo.scissors")) {
  1053. mi->use_scissors = git_config_bool(var, value);
  1054. return 0;
  1055. }
  1056. /* perhaps others here */
  1057. return 0;
  1058. }
  1059. void setup_mailinfo(struct mailinfo *mi)
  1060. {
  1061. memset(mi, 0, sizeof(*mi));
  1062. strbuf_init(&mi->name, 0);
  1063. strbuf_init(&mi->email, 0);
  1064. strbuf_init(&mi->charset, 0);
  1065. strbuf_init(&mi->log_message, 0);
  1066. strbuf_init(&mi->inbody_header_accum, 0);
  1067. mi->header_stage = 1;
  1068. mi->use_inbody_headers = 1;
  1069. mi->content_top = mi->content;
  1070. git_config(git_mailinfo_config, mi);
  1071. }
  1072. void clear_mailinfo(struct mailinfo *mi)
  1073. {
  1074. int i;
  1075. strbuf_release(&mi->name);
  1076. strbuf_release(&mi->email);
  1077. strbuf_release(&mi->charset);
  1078. strbuf_release(&mi->inbody_header_accum);
  1079. free(mi->message_id);
  1080. if (mi->p_hdr_data)
  1081. for (i = 0; mi->p_hdr_data[i]; i++)
  1082. strbuf_release(mi->p_hdr_data[i]);
  1083. free(mi->p_hdr_data);
  1084. if (mi->s_hdr_data)
  1085. for (i = 0; mi->s_hdr_data[i]; i++)
  1086. strbuf_release(mi->s_hdr_data[i]);
  1087. free(mi->s_hdr_data);
  1088. while (mi->content < mi->content_top) {
  1089. free(*(mi->content_top));
  1090. mi->content_top--;
  1091. }
  1092. strbuf_release(&mi->log_message);
  1093. }