THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1186 lines
26KB

  1. #include "cache.h"
  2. #include "config.h"
  3. #include "utf8.h"
  4. #include "strbuf.h"
  5. #include "mailinfo.h"
  6. static void cleanup_space(struct strbuf *sb)
  7. {
  8. size_t pos, cnt;
  9. for (pos = 0; pos < sb->len; pos++) {
  10. if (isspace(sb->buf[pos])) {
  11. sb->buf[pos] = ' ';
  12. for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
  13. strbuf_remove(sb, pos + 1, cnt);
  14. }
  15. }
  16. }
  17. static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
  18. {
  19. struct strbuf *src = name;
  20. if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') ||
  21. strchr(name->buf, '<') || strchr(name->buf, '>'))
  22. src = email;
  23. else if (name == out)
  24. return;
  25. strbuf_reset(out);
  26. strbuf_addbuf(out, src);
  27. }
  28. static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
  29. {
  30. /* John Doe <johndoe> */
  31. char *bra, *ket;
  32. /* This is fallback, so do not bother if we already have an
  33. * e-mail address.
  34. */
  35. if (mi->email.len)
  36. return;
  37. bra = strchr(line->buf, '<');
  38. if (!bra)
  39. return;
  40. ket = strchr(bra, '>');
  41. if (!ket)
  42. return;
  43. strbuf_reset(&mi->email);
  44. strbuf_add(&mi->email, bra + 1, ket - bra - 1);
  45. strbuf_reset(&mi->name);
  46. strbuf_add(&mi->name, line->buf, bra - line->buf);
  47. strbuf_trim(&mi->name);
  48. get_sane_name(&mi->name, &mi->name, &mi->email);
  49. }
  50. static const char *unquote_comment(struct strbuf *outbuf, const char *in)
  51. {
  52. int c;
  53. int take_next_literally = 0;
  54. strbuf_addch(outbuf, '(');
  55. while ((c = *in++) != 0) {
  56. if (take_next_literally == 1) {
  57. take_next_literally = 0;
  58. } else {
  59. switch (c) {
  60. case '\\':
  61. take_next_literally = 1;
  62. continue;
  63. case '(':
  64. in = unquote_comment(outbuf, in);
  65. continue;
  66. case ')':
  67. strbuf_addch(outbuf, ')');
  68. return in;
  69. }
  70. }
  71. strbuf_addch(outbuf, c);
  72. }
  73. return in;
  74. }
  75. static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
  76. {
  77. int c;
  78. int take_next_literally = 0;
  79. while ((c = *in++) != 0) {
  80. if (take_next_literally == 1) {
  81. take_next_literally = 0;
  82. } else {
  83. switch (c) {
  84. case '\\':
  85. take_next_literally = 1;
  86. continue;
  87. case '"':
  88. return in;
  89. }
  90. }
  91. strbuf_addch(outbuf, c);
  92. }
  93. return in;
  94. }
  95. static void unquote_quoted_pair(struct strbuf *line)
  96. {
  97. struct strbuf outbuf;
  98. const char *in = line->buf;
  99. int c;
  100. strbuf_init(&outbuf, line->len);
  101. while ((c = *in++) != 0) {
  102. switch (c) {
  103. case '"':
  104. in = unquote_quoted_string(&outbuf, in);
  105. continue;
  106. case '(':
  107. in = unquote_comment(&outbuf, in);
  108. continue;
  109. }
  110. strbuf_addch(&outbuf, c);
  111. }
  112. strbuf_swap(&outbuf, line);
  113. strbuf_release(&outbuf);
  114. }
  115. static void handle_from(struct mailinfo *mi, const struct strbuf *from)
  116. {
  117. char *at;
  118. size_t el;
  119. struct strbuf f;
  120. strbuf_init(&f, from->len);
  121. strbuf_addbuf(&f, from);
  122. unquote_quoted_pair(&f);
  123. at = strchr(f.buf, '@');
  124. if (!at) {
  125. parse_bogus_from(mi, from);
  126. goto out;
  127. }
  128. /*
  129. * If we already have one email, don't take any confusing lines
  130. */
  131. if (mi->email.len && strchr(at + 1, '@'))
  132. goto out;
  133. /* Pick up the string around '@', possibly delimited with <>
  134. * pair; that is the email part.
  135. */
  136. while (at > f.buf) {
  137. char c = at[-1];
  138. if (isspace(c))
  139. break;
  140. if (c == '<') {
  141. at[-1] = ' ';
  142. break;
  143. }
  144. at--;
  145. }
  146. el = strcspn(at, " \n\t\r\v\f>");
  147. strbuf_reset(&mi->email);
  148. strbuf_add(&mi->email, at, el);
  149. strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
  150. /* The remainder is name. It could be
  151. *
  152. * - "John Doe <john.doe@xz>" (a), or
  153. * - "john.doe@xz (John Doe)" (b), or
  154. * - "John (zzz) Doe <john.doe@xz> (Comment)" (c)
  155. *
  156. * but we have removed the email part, so
  157. *
  158. * - remove extra spaces which could stay after email (case 'c'), and
  159. * - trim from both ends, possibly removing the () pair at the end
  160. * (cases 'a' and 'b').
  161. */
  162. cleanup_space(&f);
  163. strbuf_trim(&f);
  164. if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
  165. strbuf_remove(&f, 0, 1);
  166. strbuf_setlen(&f, f.len - 1);
  167. }
  168. get_sane_name(&mi->name, &f, &mi->email);
  169. out:
  170. strbuf_release(&f);
  171. }
  172. static void handle_header(struct strbuf **out, const struct strbuf *line)
  173. {
  174. if (!*out) {
  175. *out = xmalloc(sizeof(struct strbuf));
  176. strbuf_init(*out, line->len);
  177. } else
  178. strbuf_reset(*out);
  179. strbuf_addbuf(*out, line);
  180. }
  181. /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt
  182. * to have enough heuristics to grok MIME encoded patches often found
  183. * on our mailing lists. For example, we do not even treat header lines
  184. * case insensitively.
  185. */
  186. static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
  187. {
  188. const char *ends, *ap = strcasestr(line, name);
  189. size_t sz;
  190. strbuf_setlen(attr, 0);
  191. if (!ap)
  192. return 0;
  193. ap += strlen(name);
  194. if (*ap == '"') {
  195. ap++;
  196. ends = "\"";
  197. }
  198. else
  199. ends = "; \t";
  200. sz = strcspn(ap, ends);
  201. strbuf_add(attr, ap, sz);
  202. return 1;
  203. }
  204. static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
  205. {
  206. struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
  207. strbuf_init(boundary, line->len);
  208. if (slurp_attr(line->buf, "boundary=", boundary)) {
  209. strbuf_insert(boundary, 0, "--", 2);
  210. if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
  211. error("Too many boundaries to handle");
  212. mi->input_error = -1;
  213. mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
  214. return;
  215. }
  216. *(mi->content_top) = boundary;
  217. boundary = NULL;
  218. }
  219. slurp_attr(line->buf, "charset=", &mi->charset);
  220. if (boundary) {
  221. strbuf_release(boundary);
  222. free(boundary);
  223. }
  224. }
  225. static void handle_content_transfer_encoding(struct mailinfo *mi,
  226. const struct strbuf *line)
  227. {
  228. if (strcasestr(line->buf, "base64"))
  229. mi->transfer_encoding = TE_BASE64;
  230. else if (strcasestr(line->buf, "quoted-printable"))
  231. mi->transfer_encoding = TE_QP;
  232. else
  233. mi->transfer_encoding = TE_DONTCARE;
  234. }
  235. static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
  236. {
  237. struct strbuf *content_top = *(mi->content_top);
  238. return ((content_top->len <= line->len) &&
  239. !memcmp(line->buf, content_top->buf, content_top->len));
  240. }
  241. static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
  242. {
  243. size_t at = 0;
  244. while (at < subject->len) {
  245. char *pos;
  246. size_t remove;
  247. switch (subject->buf[at]) {
  248. case 'r': case 'R':
  249. if (subject->len <= at + 3)
  250. break;
  251. if ((subject->buf[at + 1] == 'e' ||
  252. subject->buf[at + 1] == 'E') &&
  253. subject->buf[at + 2] == ':') {
  254. strbuf_remove(subject, at, 3);
  255. continue;
  256. }
  257. at++;
  258. break;
  259. case ' ': case '\t': case ':':
  260. strbuf_remove(subject, at, 1);
  261. continue;
  262. case '[':
  263. pos = strchr(subject->buf + at, ']');
  264. if (!pos)
  265. break;
  266. remove = pos - subject->buf + at + 1;
  267. if (!mi->keep_non_patch_brackets_in_subject ||
  268. (7 <= remove &&
  269. memmem(subject->buf + at, remove, "PATCH", 5)))
  270. strbuf_remove(subject, at, remove);
  271. else {
  272. at += remove;
  273. /*
  274. * If the input had a space after the ], keep
  275. * it. We don't bother with finding the end of
  276. * the space, since we later normalize it
  277. * anyway.
  278. */
  279. if (isspace(subject->buf[at]))
  280. at += 1;
  281. }
  282. continue;
  283. }
  284. break;
  285. }
  286. strbuf_trim(subject);
  287. }
  288. #define MAX_HDR_PARSED 10
  289. static const char *header[MAX_HDR_PARSED] = {
  290. "From","Subject","Date",
  291. };
  292. static inline int cmp_header(const struct strbuf *line, const char *hdr)
  293. {
  294. int len = strlen(hdr);
  295. return !strncasecmp(line->buf, hdr, len) && line->len > len &&
  296. line->buf[len] == ':' && isspace(line->buf[len + 1]);
  297. }
  298. static int is_format_patch_separator(const char *line, int len)
  299. {
  300. static const char SAMPLE[] =
  301. "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
  302. const char *cp;
  303. if (len != strlen(SAMPLE))
  304. return 0;
  305. if (!skip_prefix(line, "From ", &cp))
  306. return 0;
  307. if (strspn(cp, "0123456789abcdef") != 40)
  308. return 0;
  309. cp += 40;
  310. return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
  311. }
  312. static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
  313. {
  314. const char *in = q_seg->buf;
  315. int c;
  316. struct strbuf *out = xmalloc(sizeof(struct strbuf));
  317. strbuf_init(out, q_seg->len);
  318. while ((c = *in++) != 0) {
  319. if (c == '=') {
  320. int ch, d = *in;
  321. if (d == '\n' || !d)
  322. break; /* drop trailing newline */
  323. ch = hex2chr(in);
  324. if (ch >= 0) {
  325. strbuf_addch(out, ch);
  326. in += 2;
  327. continue;
  328. }
  329. /* garbage -- fall through */
  330. }
  331. if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
  332. c = 0x20;
  333. strbuf_addch(out, c);
  334. }
  335. return out;
  336. }
  337. static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
  338. {
  339. /* Decode in..ep, possibly in-place to ot */
  340. int c, pos = 0, acc = 0;
  341. const char *in = b_seg->buf;
  342. struct strbuf *out = xmalloc(sizeof(struct strbuf));
  343. strbuf_init(out, b_seg->len);
  344. while ((c = *in++) != 0) {
  345. if (c == '+')
  346. c = 62;
  347. else if (c == '/')
  348. c = 63;
  349. else if ('A' <= c && c <= 'Z')
  350. c -= 'A';
  351. else if ('a' <= c && c <= 'z')
  352. c -= 'a' - 26;
  353. else if ('0' <= c && c <= '9')
  354. c -= '0' - 52;
  355. else
  356. continue; /* garbage */
  357. switch (pos++) {
  358. case 0:
  359. acc = (c << 2);
  360. break;
  361. case 1:
  362. strbuf_addch(out, (acc | (c >> 4)));
  363. acc = (c & 15) << 4;
  364. break;
  365. case 2:
  366. strbuf_addch(out, (acc | (c >> 2)));
  367. acc = (c & 3) << 6;
  368. break;
  369. case 3:
  370. strbuf_addch(out, (acc | c));
  371. acc = pos = 0;
  372. break;
  373. }
  374. }
  375. return out;
  376. }
  377. static int convert_to_utf8(struct mailinfo *mi,
  378. struct strbuf *line, const char *charset)
  379. {
  380. char *out;
  381. if (!mi->metainfo_charset || !charset || !*charset)
  382. return 0;
  383. if (same_encoding(mi->metainfo_charset, charset))
  384. return 0;
  385. out = reencode_string(line->buf, mi->metainfo_charset, charset);
  386. if (!out) {
  387. mi->input_error = -1;
  388. return error("cannot convert from %s to %s",
  389. charset, mi->metainfo_charset);
  390. }
  391. strbuf_attach(line, out, strlen(out), strlen(out));
  392. return 0;
  393. }
  394. static void decode_header(struct mailinfo *mi, struct strbuf *it)
  395. {
  396. char *in, *ep, *cp;
  397. struct strbuf outbuf = STRBUF_INIT, *dec;
  398. struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
  399. int found_error = 1; /* pessimism */
  400. in = it->buf;
  401. while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
  402. int encoding;
  403. strbuf_reset(&charset_q);
  404. strbuf_reset(&piecebuf);
  405. if (in != ep) {
  406. /*
  407. * We are about to process an encoded-word
  408. * that begins at ep, but there is something
  409. * before the encoded word.
  410. */
  411. char *scan;
  412. for (scan = in; scan < ep; scan++)
  413. if (!isspace(*scan))
  414. break;
  415. if (scan != ep || in == it->buf) {
  416. /*
  417. * We should not lose that "something",
  418. * unless we have just processed an
  419. * encoded-word, and there is only LWS
  420. * before the one we are about to process.
  421. */
  422. strbuf_add(&outbuf, in, ep - in);
  423. }
  424. }
  425. /* E.g.
  426. * ep : "=?iso-2022-jp?B?GyR...?= foo"
  427. * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
  428. */
  429. ep += 2;
  430. if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
  431. goto release_return;
  432. if (cp + 3 - it->buf > it->len)
  433. goto release_return;
  434. strbuf_add(&charset_q, ep, cp - ep);
  435. encoding = cp[1];
  436. if (!encoding || cp[2] != '?')
  437. goto release_return;
  438. ep = strstr(cp + 3, "?=");
  439. if (!ep)
  440. goto release_return;
  441. strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
  442. switch (tolower(encoding)) {
  443. default:
  444. goto release_return;
  445. case 'b':
  446. dec = decode_b_segment(&piecebuf);
  447. break;
  448. case 'q':
  449. dec = decode_q_segment(&piecebuf, 1);
  450. break;
  451. }
  452. if (convert_to_utf8(mi, dec, charset_q.buf))
  453. goto release_return;
  454. strbuf_addbuf(&outbuf, dec);
  455. strbuf_release(dec);
  456. free(dec);
  457. in = ep + 2;
  458. }
  459. strbuf_addstr(&outbuf, in);
  460. strbuf_reset(it);
  461. strbuf_addbuf(it, &outbuf);
  462. found_error = 0;
  463. release_return:
  464. strbuf_release(&outbuf);
  465. strbuf_release(&charset_q);
  466. strbuf_release(&piecebuf);
  467. if (found_error)
  468. mi->input_error = -1;
  469. }
  470. static int check_header(struct mailinfo *mi,
  471. const struct strbuf *line,
  472. struct strbuf *hdr_data[], int overwrite)
  473. {
  474. int i, ret = 0, len;
  475. struct strbuf sb = STRBUF_INIT;
  476. /* search for the interesting parts */
  477. for (i = 0; header[i]; i++) {
  478. int len = strlen(header[i]);
  479. if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
  480. /* Unwrap inline B and Q encoding, and optionally
  481. * normalize the meta information to utf8.
  482. */
  483. strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
  484. decode_header(mi, &sb);
  485. handle_header(&hdr_data[i], &sb);
  486. ret = 1;
  487. goto check_header_out;
  488. }
  489. }
  490. /* Content stuff */
  491. if (cmp_header(line, "Content-Type")) {
  492. len = strlen("Content-Type: ");
  493. strbuf_add(&sb, line->buf + len, line->len - len);
  494. decode_header(mi, &sb);
  495. strbuf_insert(&sb, 0, "Content-Type: ", len);
  496. handle_content_type(mi, &sb);
  497. ret = 1;
  498. goto check_header_out;
  499. }
  500. if (cmp_header(line, "Content-Transfer-Encoding")) {
  501. len = strlen("Content-Transfer-Encoding: ");
  502. strbuf_add(&sb, line->buf + len, line->len - len);
  503. decode_header(mi, &sb);
  504. handle_content_transfer_encoding(mi, &sb);
  505. ret = 1;
  506. goto check_header_out;
  507. }
  508. if (cmp_header(line, "Message-Id")) {
  509. len = strlen("Message-Id: ");
  510. strbuf_add(&sb, line->buf + len, line->len - len);
  511. decode_header(mi, &sb);
  512. if (mi->add_message_id)
  513. mi->message_id = strbuf_detach(&sb, NULL);
  514. ret = 1;
  515. goto check_header_out;
  516. }
  517. check_header_out:
  518. strbuf_release(&sb);
  519. return ret;
  520. }
  521. /*
  522. * Returns 1 if the given line or any line beginning with the given line is an
  523. * in-body header (that is, check_header will succeed when passed
  524. * mi->s_hdr_data).
  525. */
  526. static int is_inbody_header(const struct mailinfo *mi,
  527. const struct strbuf *line)
  528. {
  529. int i;
  530. for (i = 0; header[i]; i++)
  531. if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
  532. return 1;
  533. return 0;
  534. }
  535. static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
  536. {
  537. struct strbuf *ret;
  538. switch (mi->transfer_encoding) {
  539. case TE_QP:
  540. ret = decode_q_segment(line, 0);
  541. break;
  542. case TE_BASE64:
  543. ret = decode_b_segment(line);
  544. break;
  545. case TE_DONTCARE:
  546. default:
  547. return;
  548. }
  549. strbuf_reset(line);
  550. strbuf_addbuf(line, ret);
  551. strbuf_release(ret);
  552. free(ret);
  553. }
  554. static inline int patchbreak(const struct strbuf *line)
  555. {
  556. size_t i;
  557. /* Beginning of a "diff -" header? */
  558. if (starts_with(line->buf, "diff -"))
  559. return 1;
  560. /* CVS "Index: " line? */
  561. if (starts_with(line->buf, "Index: "))
  562. return 1;
  563. /*
  564. * "--- <filename>" starts patches without headers
  565. * "---<sp>*" is a manual separator
  566. */
  567. if (line->len < 4)
  568. return 0;
  569. if (starts_with(line->buf, "---")) {
  570. /* space followed by a filename? */
  571. if (line->buf[3] == ' ' && !isspace(line->buf[4]))
  572. return 1;
  573. /* Just whitespace? */
  574. for (i = 3; i < line->len; i++) {
  575. unsigned char c = line->buf[i];
  576. if (c == '\n')
  577. return 1;
  578. if (!isspace(c))
  579. break;
  580. }
  581. return 0;
  582. }
  583. return 0;
  584. }
  585. static int is_scissors_line(const char *line)
  586. {
  587. const char *c;
  588. int scissors = 0, gap = 0;
  589. const char *first_nonblank = NULL, *last_nonblank = NULL;
  590. int visible, perforation = 0, in_perforation = 0;
  591. for (c = line; *c; c++) {
  592. if (isspace(*c)) {
  593. if (in_perforation) {
  594. perforation++;
  595. gap++;
  596. }
  597. continue;
  598. }
  599. last_nonblank = c;
  600. if (first_nonblank == NULL)
  601. first_nonblank = c;
  602. if (*c == '-') {
  603. in_perforation = 1;
  604. perforation++;
  605. continue;
  606. }
  607. if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) ||
  608. !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) {
  609. in_perforation = 1;
  610. perforation += 2;
  611. scissors += 2;
  612. c++;
  613. continue;
  614. }
  615. in_perforation = 0;
  616. }
  617. /*
  618. * The mark must be at least 8 bytes long (e.g. "-- >8 --").
  619. * Even though there can be arbitrary cruft on the same line
  620. * (e.g. "cut here"), in order to avoid misidentification, the
  621. * perforation must occupy more than a third of the visible
  622. * width of the line, and dashes and scissors must occupy more
  623. * than half of the perforation.
  624. */
  625. if (first_nonblank && last_nonblank)
  626. visible = last_nonblank - first_nonblank + 1;
  627. else
  628. visible = 0;
  629. return (scissors && 8 <= visible &&
  630. visible < perforation * 3 &&
  631. gap * 2 < perforation);
  632. }
  633. static void flush_inbody_header_accum(struct mailinfo *mi)
  634. {
  635. if (!mi->inbody_header_accum.len)
  636. return;
  637. if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
  638. BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
  639. strbuf_reset(&mi->inbody_header_accum);
  640. }
  641. static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
  642. {
  643. if (mi->inbody_header_accum.len &&
  644. (line->buf[0] == ' ' || line->buf[0] == '\t')) {
  645. if (mi->use_scissors && is_scissors_line(line->buf)) {
  646. /*
  647. * This is a scissors line; do not consider this line
  648. * as a header continuation line.
  649. */
  650. flush_inbody_header_accum(mi);
  651. return 0;
  652. }
  653. strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
  654. strbuf_addbuf(&mi->inbody_header_accum, line);
  655. return 1;
  656. }
  657. flush_inbody_header_accum(mi);
  658. if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
  659. return is_format_patch_separator(line->buf + 1, line->len - 1);
  660. if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
  661. int i;
  662. for (i = 0; header[i]; i++)
  663. if (!strcmp("Subject", header[i])) {
  664. handle_header(&mi->s_hdr_data[i], line);
  665. return 1;
  666. }
  667. return 0;
  668. }
  669. if (is_inbody_header(mi, line)) {
  670. strbuf_addbuf(&mi->inbody_header_accum, line);
  671. return 1;
  672. }
  673. return 0;
  674. }
  675. static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
  676. {
  677. assert(!mi->filter_stage);
  678. if (mi->header_stage) {
  679. if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
  680. if (mi->inbody_header_accum.len) {
  681. flush_inbody_header_accum(mi);
  682. mi->header_stage = 0;
  683. }
  684. return 0;
  685. }
  686. }
  687. if (mi->use_inbody_headers && mi->header_stage) {
  688. mi->header_stage = check_inbody_header(mi, line);
  689. if (mi->header_stage)
  690. return 0;
  691. } else
  692. /* Only trim the first (blank) line of the commit message
  693. * when ignoring in-body headers.
  694. */
  695. mi->header_stage = 0;
  696. /* normalize the log message to UTF-8. */
  697. if (convert_to_utf8(mi, line, mi->charset.buf))
  698. return 0; /* mi->input_error already set */
  699. if (mi->use_scissors && is_scissors_line(line->buf)) {
  700. int i;
  701. strbuf_setlen(&mi->log_message, 0);
  702. mi->header_stage = 1;
  703. /*
  704. * We may have already read "secondary headers"; purge
  705. * them to give ourselves a clean restart.
  706. */
  707. for (i = 0; header[i]; i++) {
  708. if (mi->s_hdr_data[i])
  709. strbuf_release(mi->s_hdr_data[i]);
  710. mi->s_hdr_data[i] = NULL;
  711. }
  712. return 0;
  713. }
  714. if (patchbreak(line)) {
  715. if (mi->message_id)
  716. strbuf_addf(&mi->log_message,
  717. "Message-Id: %s\n", mi->message_id);
  718. return 1;
  719. }
  720. strbuf_addbuf(&mi->log_message, line);
  721. return 0;
  722. }
  723. static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
  724. {
  725. fwrite(line->buf, 1, line->len, mi->patchfile);
  726. mi->patch_lines++;
  727. }
  728. static void handle_filter(struct mailinfo *mi, struct strbuf *line)
  729. {
  730. switch (mi->filter_stage) {
  731. case 0:
  732. if (!handle_commit_msg(mi, line))
  733. break;
  734. mi->filter_stage++;
  735. /* fallthrough */
  736. case 1:
  737. handle_patch(mi, line);
  738. break;
  739. }
  740. }
  741. static int is_rfc2822_header(const struct strbuf *line)
  742. {
  743. /*
  744. * The section that defines the loosest possible
  745. * field name is "3.6.8 Optional fields".
  746. *
  747. * optional-field = field-name ":" unstructured CRLF
  748. * field-name = 1*ftext
  749. * ftext = %d33-57 / %59-126
  750. */
  751. int ch;
  752. char *cp = line->buf;
  753. /* Count mbox From headers as headers */
  754. if (starts_with(cp, "From ") || starts_with(cp, ">From "))
  755. return 1;
  756. while ((ch = *cp++)) {
  757. if (ch == ':')
  758. return 1;
  759. if ((33 <= ch && ch <= 57) ||
  760. (59 <= ch && ch <= 126))
  761. continue;
  762. break;
  763. }
  764. return 0;
  765. }
  766. static int read_one_header_line(struct strbuf *line, FILE *in)
  767. {
  768. struct strbuf continuation = STRBUF_INIT;
  769. /* Get the first part of the line. */
  770. if (strbuf_getline_lf(line, in))
  771. return 0;
  772. /*
  773. * Is it an empty line or not a valid rfc2822 header?
  774. * If so, stop here, and return false ("not a header")
  775. */
  776. strbuf_rtrim(line);
  777. if (!line->len || !is_rfc2822_header(line)) {
  778. /* Re-add the newline */
  779. strbuf_addch(line, '\n');
  780. return 0;
  781. }
  782. /*
  783. * Now we need to eat all the continuation lines..
  784. * Yuck, 2822 header "folding"
  785. */
  786. for (;;) {
  787. int peek;
  788. peek = fgetc(in);
  789. if (peek == EOF)
  790. break;
  791. ungetc(peek, in);
  792. if (peek != ' ' && peek != '\t')
  793. break;
  794. if (strbuf_getline_lf(&continuation, in))
  795. break;
  796. continuation.buf[0] = ' ';
  797. strbuf_rtrim(&continuation);
  798. strbuf_addbuf(line, &continuation);
  799. }
  800. strbuf_release(&continuation);
  801. return 1;
  802. }
  803. static int find_boundary(struct mailinfo *mi, struct strbuf *line)
  804. {
  805. while (!strbuf_getline_lf(line, mi->input)) {
  806. if (*(mi->content_top) && is_multipart_boundary(mi, line))
  807. return 1;
  808. }
  809. return 0;
  810. }
  811. static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
  812. {
  813. struct strbuf newline = STRBUF_INIT;
  814. strbuf_addch(&newline, '\n');
  815. again:
  816. if (line->len >= (*(mi->content_top))->len + 2 &&
  817. !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
  818. /* we hit an end boundary */
  819. /* pop the current boundary off the stack */
  820. strbuf_release(*(mi->content_top));
  821. FREE_AND_NULL(*(mi->content_top));
  822. /* technically won't happen as is_multipart_boundary()
  823. will fail first. But just in case..
  824. */
  825. if (--mi->content_top < mi->content) {
  826. error("Detected mismatched boundaries, can't recover");
  827. mi->input_error = -1;
  828. mi->content_top = mi->content;
  829. strbuf_release(&newline);
  830. return 0;
  831. }
  832. handle_filter(mi, &newline);
  833. strbuf_release(&newline);
  834. if (mi->input_error)
  835. return 0;
  836. /* skip to the next boundary */
  837. if (!find_boundary(mi, line))
  838. return 0;
  839. goto again;
  840. }
  841. /* set some defaults */
  842. mi->transfer_encoding = TE_DONTCARE;
  843. strbuf_reset(&mi->charset);
  844. /* slurp in this section's info */
  845. while (read_one_header_line(line, mi->input))
  846. check_header(mi, line, mi->p_hdr_data, 0);
  847. strbuf_release(&newline);
  848. /* replenish line */
  849. if (strbuf_getline_lf(line, mi->input))
  850. return 0;
  851. strbuf_addch(line, '\n');
  852. return 1;
  853. }
  854. static void handle_body(struct mailinfo *mi, struct strbuf *line)
  855. {
  856. struct strbuf prev = STRBUF_INIT;
  857. /* Skip up to the first boundary */
  858. if (*(mi->content_top)) {
  859. if (!find_boundary(mi, line))
  860. goto handle_body_out;
  861. }
  862. do {
  863. /* process any boundary lines */
  864. if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
  865. /* flush any leftover */
  866. if (prev.len) {
  867. handle_filter(mi, &prev);
  868. strbuf_reset(&prev);
  869. }
  870. if (!handle_boundary(mi, line))
  871. goto handle_body_out;
  872. }
  873. /* Unwrap transfer encoding */
  874. decode_transfer_encoding(mi, line);
  875. switch (mi->transfer_encoding) {
  876. case TE_BASE64:
  877. case TE_QP:
  878. {
  879. struct strbuf **lines, **it, *sb;
  880. /* Prepend any previous partial lines */
  881. strbuf_insert(line, 0, prev.buf, prev.len);
  882. strbuf_reset(&prev);
  883. /*
  884. * This is a decoded line that may contain
  885. * multiple new lines. Pass only one chunk
  886. * at a time to handle_filter()
  887. */
  888. lines = strbuf_split(line, '\n');
  889. for (it = lines; (sb = *it); it++) {
  890. if (*(it + 1) == NULL) /* The last line */
  891. if (sb->buf[sb->len - 1] != '\n') {
  892. /* Partial line, save it for later. */
  893. strbuf_addbuf(&prev, sb);
  894. break;
  895. }
  896. handle_filter(mi, sb);
  897. }
  898. /*
  899. * The partial chunk is saved in "prev" and will be
  900. * appended by the next iteration of read_line_with_nul().
  901. */
  902. strbuf_list_free(lines);
  903. break;
  904. }
  905. default:
  906. handle_filter(mi, line);
  907. }
  908. if (mi->input_error)
  909. break;
  910. } while (!strbuf_getwholeline(line, mi->input, '\n'));
  911. flush_inbody_header_accum(mi);
  912. handle_body_out:
  913. strbuf_release(&prev);
  914. }
  915. static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
  916. {
  917. const char *sp = data->buf;
  918. while (1) {
  919. char *ep = strchr(sp, '\n');
  920. int len;
  921. if (!ep)
  922. len = strlen(sp);
  923. else
  924. len = ep - sp;
  925. fprintf(fout, "%s: %.*s\n", hdr, len, sp);
  926. if (!ep)
  927. break;
  928. sp = ep + 1;
  929. }
  930. }
  931. static void handle_info(struct mailinfo *mi)
  932. {
  933. struct strbuf *hdr;
  934. int i;
  935. for (i = 0; header[i]; i++) {
  936. /* only print inbody headers if we output a patch file */
  937. if (mi->patch_lines && mi->s_hdr_data[i])
  938. hdr = mi->s_hdr_data[i];
  939. else if (mi->p_hdr_data[i])
  940. hdr = mi->p_hdr_data[i];
  941. else
  942. continue;
  943. if (!strcmp(header[i], "Subject")) {
  944. if (!mi->keep_subject) {
  945. cleanup_subject(mi, hdr);
  946. cleanup_space(hdr);
  947. }
  948. output_header_lines(mi->output, "Subject", hdr);
  949. } else if (!strcmp(header[i], "From")) {
  950. cleanup_space(hdr);
  951. handle_from(mi, hdr);
  952. fprintf(mi->output, "Author: %s\n", mi->name.buf);
  953. fprintf(mi->output, "Email: %s\n", mi->email.buf);
  954. } else {
  955. cleanup_space(hdr);
  956. fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
  957. }
  958. }
  959. fprintf(mi->output, "\n");
  960. }
  961. int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
  962. {
  963. FILE *cmitmsg;
  964. int peek;
  965. struct strbuf line = STRBUF_INIT;
  966. cmitmsg = fopen(msg, "w");
  967. if (!cmitmsg) {
  968. perror(msg);
  969. return -1;
  970. }
  971. mi->patchfile = fopen(patch, "w");
  972. if (!mi->patchfile) {
  973. perror(patch);
  974. fclose(cmitmsg);
  975. return -1;
  976. }
  977. mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data)));
  978. mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data)));
  979. do {
  980. peek = fgetc(mi->input);
  981. if (peek == EOF) {
  982. fclose(cmitmsg);
  983. return error("empty patch: '%s'", patch);
  984. }
  985. } while (isspace(peek));
  986. ungetc(peek, mi->input);
  987. /* process the email header */
  988. while (read_one_header_line(&line, mi->input))
  989. check_header(mi, &line, mi->p_hdr_data, 1);
  990. handle_body(mi, &line);
  991. fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
  992. fclose(cmitmsg);
  993. fclose(mi->patchfile);
  994. handle_info(mi);
  995. strbuf_release(&line);
  996. return mi->input_error;
  997. }
  998. static int git_mailinfo_config(const char *var, const char *value, void *mi_)
  999. {
  1000. struct mailinfo *mi = mi_;
  1001. if (!starts_with(var, "mailinfo."))
  1002. return git_default_config(var, value, NULL);
  1003. if (!strcmp(var, "mailinfo.scissors")) {
  1004. mi->use_scissors = git_config_bool(var, value);
  1005. return 0;
  1006. }
  1007. /* perhaps others here */
  1008. return 0;
  1009. }
  1010. void setup_mailinfo(struct mailinfo *mi)
  1011. {
  1012. memset(mi, 0, sizeof(*mi));
  1013. strbuf_init(&mi->name, 0);
  1014. strbuf_init(&mi->email, 0);
  1015. strbuf_init(&mi->charset, 0);
  1016. strbuf_init(&mi->log_message, 0);
  1017. strbuf_init(&mi->inbody_header_accum, 0);
  1018. mi->header_stage = 1;
  1019. mi->use_inbody_headers = 1;
  1020. mi->content_top = mi->content;
  1021. git_config(git_mailinfo_config, mi);
  1022. }
  1023. void clear_mailinfo(struct mailinfo *mi)
  1024. {
  1025. int i;
  1026. strbuf_release(&mi->name);
  1027. strbuf_release(&mi->email);
  1028. strbuf_release(&mi->charset);
  1029. strbuf_release(&mi->inbody_header_accum);
  1030. free(mi->message_id);
  1031. if (mi->p_hdr_data)
  1032. for (i = 0; mi->p_hdr_data[i]; i++)
  1033. strbuf_release(mi->p_hdr_data[i]);
  1034. free(mi->p_hdr_data);
  1035. if (mi->s_hdr_data)
  1036. for (i = 0; mi->s_hdr_data[i]; i++)
  1037. strbuf_release(mi->s_hdr_data[i]);
  1038. free(mi->s_hdr_data);
  1039. while (mi->content < mi->content_top) {
  1040. free(*(mi->content_top));
  1041. mi->content_top--;
  1042. }
  1043. strbuf_release(&mi->log_message);
  1044. }