THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1864 lines
50KB

  1. #include "cache.h"
  2. #include "refs.h"
  3. #include "cache-tree.h"
  4. #include "mergesort.h"
  5. #include "diff.h"
  6. #include "diffcore.h"
  7. #include "tag.h"
  8. #include "blame.h"
  9. void blame_origin_decref(struct blame_origin *o)
  10. {
  11. if (o && --o->refcnt <= 0) {
  12. struct blame_origin *p, *l = NULL;
  13. if (o->previous)
  14. blame_origin_decref(o->previous);
  15. free(o->file.ptr);
  16. /* Should be present exactly once in commit chain */
  17. for (p = o->commit->util; p; l = p, p = p->next) {
  18. if (p == o) {
  19. if (l)
  20. l->next = p->next;
  21. else
  22. o->commit->util = p->next;
  23. free(o);
  24. return;
  25. }
  26. }
  27. die("internal error in blame_origin_decref");
  28. }
  29. }
  30. /*
  31. * Given a commit and a path in it, create a new origin structure.
  32. * The callers that add blame to the scoreboard should use
  33. * get_origin() to obtain shared, refcounted copy instead of calling
  34. * this function directly.
  35. */
  36. static struct blame_origin *make_origin(struct commit *commit, const char *path)
  37. {
  38. struct blame_origin *o;
  39. FLEX_ALLOC_STR(o, path, path);
  40. o->commit = commit;
  41. o->refcnt = 1;
  42. o->next = commit->util;
  43. commit->util = o;
  44. return o;
  45. }
  46. /*
  47. * Locate an existing origin or create a new one.
  48. * This moves the origin to front position in the commit util list.
  49. */
  50. static struct blame_origin *get_origin(struct commit *commit, const char *path)
  51. {
  52. struct blame_origin *o, *l;
  53. for (o = commit->util, l = NULL; o; l = o, o = o->next) {
  54. if (!strcmp(o->path, path)) {
  55. /* bump to front */
  56. if (l) {
  57. l->next = o->next;
  58. o->next = commit->util;
  59. commit->util = o;
  60. }
  61. return blame_origin_incref(o);
  62. }
  63. }
  64. return make_origin(commit, path);
  65. }
  66. static void verify_working_tree_path(struct commit *work_tree, const char *path)
  67. {
  68. struct commit_list *parents;
  69. int pos;
  70. for (parents = work_tree->parents; parents; parents = parents->next) {
  71. const struct object_id *commit_oid = &parents->item->object.oid;
  72. struct object_id blob_oid;
  73. unsigned mode;
  74. if (!get_tree_entry(commit_oid, path, &blob_oid, &mode) &&
  75. oid_object_info(&blob_oid, NULL) == OBJ_BLOB)
  76. return;
  77. }
  78. pos = cache_name_pos(path, strlen(path));
  79. if (pos >= 0)
  80. ; /* path is in the index */
  81. else if (-1 - pos < active_nr &&
  82. !strcmp(active_cache[-1 - pos]->name, path))
  83. ; /* path is in the index, unmerged */
  84. else
  85. die("no such path '%s' in HEAD", path);
  86. }
  87. static struct commit_list **append_parent(struct commit_list **tail, const struct object_id *oid)
  88. {
  89. struct commit *parent;
  90. parent = lookup_commit_reference(oid);
  91. if (!parent)
  92. die("no such commit %s", oid_to_hex(oid));
  93. return &commit_list_insert(parent, tail)->next;
  94. }
  95. static void append_merge_parents(struct commit_list **tail)
  96. {
  97. int merge_head;
  98. struct strbuf line = STRBUF_INIT;
  99. merge_head = open(git_path_merge_head(), O_RDONLY);
  100. if (merge_head < 0) {
  101. if (errno == ENOENT)
  102. return;
  103. die("cannot open '%s' for reading", git_path_merge_head());
  104. }
  105. while (!strbuf_getwholeline_fd(&line, merge_head, '\n')) {
  106. struct object_id oid;
  107. if (line.len < GIT_SHA1_HEXSZ || get_oid_hex(line.buf, &oid))
  108. die("unknown line in '%s': %s", git_path_merge_head(), line.buf);
  109. tail = append_parent(tail, &oid);
  110. }
  111. close(merge_head);
  112. strbuf_release(&line);
  113. }
  114. /*
  115. * This isn't as simple as passing sb->buf and sb->len, because we
  116. * want to transfer ownership of the buffer to the commit (so we
  117. * must use detach).
  118. */
  119. static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb)
  120. {
  121. size_t len;
  122. void *buf = strbuf_detach(sb, &len);
  123. set_commit_buffer(c, buf, len);
  124. }
  125. /*
  126. * Prepare a dummy commit that represents the work tree (or staged) item.
  127. * Note that annotating work tree item never works in the reverse.
  128. */
  129. static struct commit *fake_working_tree_commit(struct diff_options *opt,
  130. const char *path,
  131. const char *contents_from)
  132. {
  133. struct commit *commit;
  134. struct blame_origin *origin;
  135. struct commit_list **parent_tail, *parent;
  136. struct object_id head_oid;
  137. struct strbuf buf = STRBUF_INIT;
  138. const char *ident;
  139. time_t now;
  140. int size, len;
  141. struct cache_entry *ce;
  142. unsigned mode;
  143. struct strbuf msg = STRBUF_INIT;
  144. read_cache();
  145. time(&now);
  146. commit = alloc_commit_node();
  147. commit->object.parsed = 1;
  148. commit->date = now;
  149. parent_tail = &commit->parents;
  150. if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING, &head_oid, NULL))
  151. die("no such ref: HEAD");
  152. parent_tail = append_parent(parent_tail, &head_oid);
  153. append_merge_parents(parent_tail);
  154. verify_working_tree_path(commit, path);
  155. origin = make_origin(commit, path);
  156. ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
  157. strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n");
  158. for (parent = commit->parents; parent; parent = parent->next)
  159. strbuf_addf(&msg, "parent %s\n",
  160. oid_to_hex(&parent->item->object.oid));
  161. strbuf_addf(&msg,
  162. "author %s\n"
  163. "committer %s\n\n"
  164. "Version of %s from %s\n",
  165. ident, ident, path,
  166. (!contents_from ? path :
  167. (!strcmp(contents_from, "-") ? "standard input" : contents_from)));
  168. set_commit_buffer_from_strbuf(commit, &msg);
  169. if (!contents_from || strcmp("-", contents_from)) {
  170. struct stat st;
  171. const char *read_from;
  172. char *buf_ptr;
  173. unsigned long buf_len;
  174. if (contents_from) {
  175. if (stat(contents_from, &st) < 0)
  176. die_errno("Cannot stat '%s'", contents_from);
  177. read_from = contents_from;
  178. }
  179. else {
  180. if (lstat(path, &st) < 0)
  181. die_errno("Cannot lstat '%s'", path);
  182. read_from = path;
  183. }
  184. mode = canon_mode(st.st_mode);
  185. switch (st.st_mode & S_IFMT) {
  186. case S_IFREG:
  187. if (opt->flags.allow_textconv &&
  188. textconv_object(read_from, mode, &null_oid, 0, &buf_ptr, &buf_len))
  189. strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1);
  190. else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
  191. die_errno("cannot open or read '%s'", read_from);
  192. break;
  193. case S_IFLNK:
  194. if (strbuf_readlink(&buf, read_from, st.st_size) < 0)
  195. die_errno("cannot readlink '%s'", read_from);
  196. break;
  197. default:
  198. die("unsupported file type %s", read_from);
  199. }
  200. }
  201. else {
  202. /* Reading from stdin */
  203. mode = 0;
  204. if (strbuf_read(&buf, 0, 0) < 0)
  205. die_errno("failed to read from stdin");
  206. }
  207. convert_to_git(&the_index, path, buf.buf, buf.len, &buf, 0);
  208. origin->file.ptr = buf.buf;
  209. origin->file.size = buf.len;
  210. pretend_object_file(buf.buf, buf.len, OBJ_BLOB, &origin->blob_oid);
  211. /*
  212. * Read the current index, replace the path entry with
  213. * origin->blob_sha1 without mucking with its mode or type
  214. * bits; we are not going to write this index out -- we just
  215. * want to run "diff-index --cached".
  216. */
  217. discard_cache();
  218. read_cache();
  219. len = strlen(path);
  220. if (!mode) {
  221. int pos = cache_name_pos(path, len);
  222. if (0 <= pos)
  223. mode = active_cache[pos]->ce_mode;
  224. else
  225. /* Let's not bother reading from HEAD tree */
  226. mode = S_IFREG | 0644;
  227. }
  228. size = cache_entry_size(len);
  229. ce = xcalloc(1, size);
  230. oidcpy(&ce->oid, &origin->blob_oid);
  231. memcpy(ce->name, path, len);
  232. ce->ce_flags = create_ce_flags(0);
  233. ce->ce_namelen = len;
  234. ce->ce_mode = create_ce_mode(mode);
  235. add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
  236. cache_tree_invalidate_path(&the_index, path);
  237. return commit;
  238. }
  239. static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
  240. xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
  241. {
  242. xpparam_t xpp = {0};
  243. xdemitconf_t xecfg = {0};
  244. xdemitcb_t ecb = {NULL};
  245. xpp.flags = xdl_opts;
  246. xecfg.hunk_func = hunk_func;
  247. ecb.priv = cb_data;
  248. return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
  249. }
  250. /*
  251. * Given an origin, prepare mmfile_t structure to be used by the
  252. * diff machinery
  253. */
  254. static void fill_origin_blob(struct diff_options *opt,
  255. struct blame_origin *o, mmfile_t *file, int *num_read_blob)
  256. {
  257. if (!o->file.ptr) {
  258. enum object_type type;
  259. unsigned long file_size;
  260. (*num_read_blob)++;
  261. if (opt->flags.allow_textconv &&
  262. textconv_object(o->path, o->mode, &o->blob_oid, 1, &file->ptr, &file_size))
  263. ;
  264. else
  265. file->ptr = read_object_file(&o->blob_oid, &type,
  266. &file_size);
  267. file->size = file_size;
  268. if (!file->ptr)
  269. die("Cannot read blob %s for path %s",
  270. oid_to_hex(&o->blob_oid),
  271. o->path);
  272. o->file = *file;
  273. }
  274. else
  275. *file = o->file;
  276. }
  277. static void drop_origin_blob(struct blame_origin *o)
  278. {
  279. if (o->file.ptr) {
  280. FREE_AND_NULL(o->file.ptr);
  281. }
  282. }
  283. /*
  284. * Any merge of blames happens on lists of blames that arrived via
  285. * different parents in a single suspect. In this case, we want to
  286. * sort according to the suspect line numbers as opposed to the final
  287. * image line numbers. The function body is somewhat longish because
  288. * it avoids unnecessary writes.
  289. */
  290. static struct blame_entry *blame_merge(struct blame_entry *list1,
  291. struct blame_entry *list2)
  292. {
  293. struct blame_entry *p1 = list1, *p2 = list2,
  294. **tail = &list1;
  295. if (!p1)
  296. return p2;
  297. if (!p2)
  298. return p1;
  299. if (p1->s_lno <= p2->s_lno) {
  300. do {
  301. tail = &p1->next;
  302. if ((p1 = *tail) == NULL) {
  303. *tail = p2;
  304. return list1;
  305. }
  306. } while (p1->s_lno <= p2->s_lno);
  307. }
  308. for (;;) {
  309. *tail = p2;
  310. do {
  311. tail = &p2->next;
  312. if ((p2 = *tail) == NULL) {
  313. *tail = p1;
  314. return list1;
  315. }
  316. } while (p1->s_lno > p2->s_lno);
  317. *tail = p1;
  318. do {
  319. tail = &p1->next;
  320. if ((p1 = *tail) == NULL) {
  321. *tail = p2;
  322. return list1;
  323. }
  324. } while (p1->s_lno <= p2->s_lno);
  325. }
  326. }
  327. static void *get_next_blame(const void *p)
  328. {
  329. return ((struct blame_entry *)p)->next;
  330. }
  331. static void set_next_blame(void *p1, void *p2)
  332. {
  333. ((struct blame_entry *)p1)->next = p2;
  334. }
  335. /*
  336. * Final image line numbers are all different, so we don't need a
  337. * three-way comparison here.
  338. */
  339. static int compare_blame_final(const void *p1, const void *p2)
  340. {
  341. return ((struct blame_entry *)p1)->lno > ((struct blame_entry *)p2)->lno
  342. ? 1 : -1;
  343. }
  344. static int compare_blame_suspect(const void *p1, const void *p2)
  345. {
  346. const struct blame_entry *s1 = p1, *s2 = p2;
  347. /*
  348. * to allow for collating suspects, we sort according to the
  349. * respective pointer value as the primary sorting criterion.
  350. * The actual relation is pretty unimportant as long as it
  351. * establishes a total order. Comparing as integers gives us
  352. * that.
  353. */
  354. if (s1->suspect != s2->suspect)
  355. return (intptr_t)s1->suspect > (intptr_t)s2->suspect ? 1 : -1;
  356. if (s1->s_lno == s2->s_lno)
  357. return 0;
  358. return s1->s_lno > s2->s_lno ? 1 : -1;
  359. }
  360. void blame_sort_final(struct blame_scoreboard *sb)
  361. {
  362. sb->ent = llist_mergesort(sb->ent, get_next_blame, set_next_blame,
  363. compare_blame_final);
  364. }
  365. static int compare_commits_by_reverse_commit_date(const void *a,
  366. const void *b,
  367. void *c)
  368. {
  369. return -compare_commits_by_commit_date(a, b, c);
  370. }
  371. /*
  372. * For debugging -- origin is refcounted, and this asserts that
  373. * we do not underflow.
  374. */
  375. static void sanity_check_refcnt(struct blame_scoreboard *sb)
  376. {
  377. int baa = 0;
  378. struct blame_entry *ent;
  379. for (ent = sb->ent; ent; ent = ent->next) {
  380. /* Nobody should have zero or negative refcnt */
  381. if (ent->suspect->refcnt <= 0) {
  382. fprintf(stderr, "%s in %s has negative refcnt %d\n",
  383. ent->suspect->path,
  384. oid_to_hex(&ent->suspect->commit->object.oid),
  385. ent->suspect->refcnt);
  386. baa = 1;
  387. }
  388. }
  389. if (baa)
  390. sb->on_sanity_fail(sb, baa);
  391. }
  392. /*
  393. * If two blame entries that are next to each other came from
  394. * contiguous lines in the same origin (i.e. <commit, path> pair),
  395. * merge them together.
  396. */
  397. void blame_coalesce(struct blame_scoreboard *sb)
  398. {
  399. struct blame_entry *ent, *next;
  400. for (ent = sb->ent; ent && (next = ent->next); ent = next) {
  401. if (ent->suspect == next->suspect &&
  402. ent->s_lno + ent->num_lines == next->s_lno) {
  403. ent->num_lines += next->num_lines;
  404. ent->next = next->next;
  405. blame_origin_decref(next->suspect);
  406. free(next);
  407. ent->score = 0;
  408. next = ent; /* again */
  409. }
  410. }
  411. if (sb->debug) /* sanity */
  412. sanity_check_refcnt(sb);
  413. }
  414. /*
  415. * Merge the given sorted list of blames into a preexisting origin.
  416. * If there were no previous blames to that commit, it is entered into
  417. * the commit priority queue of the score board.
  418. */
  419. static void queue_blames(struct blame_scoreboard *sb, struct blame_origin *porigin,
  420. struct blame_entry *sorted)
  421. {
  422. if (porigin->suspects)
  423. porigin->suspects = blame_merge(porigin->suspects, sorted);
  424. else {
  425. struct blame_origin *o;
  426. for (o = porigin->commit->util; o; o = o->next) {
  427. if (o->suspects) {
  428. porigin->suspects = sorted;
  429. return;
  430. }
  431. }
  432. porigin->suspects = sorted;
  433. prio_queue_put(&sb->commits, porigin->commit);
  434. }
  435. }
  436. /*
  437. * Fill the blob_sha1 field of an origin if it hasn't, so that later
  438. * call to fill_origin_blob() can use it to locate the data. blob_sha1
  439. * for an origin is also used to pass the blame for the entire file to
  440. * the parent to detect the case where a child's blob is identical to
  441. * that of its parent's.
  442. *
  443. * This also fills origin->mode for corresponding tree path.
  444. */
  445. static int fill_blob_sha1_and_mode(struct blame_origin *origin)
  446. {
  447. if (!is_null_oid(&origin->blob_oid))
  448. return 0;
  449. if (get_tree_entry(&origin->commit->object.oid, origin->path, &origin->blob_oid, &origin->mode))
  450. goto error_out;
  451. if (oid_object_info(&origin->blob_oid, NULL) != OBJ_BLOB)
  452. goto error_out;
  453. return 0;
  454. error_out:
  455. oidclr(&origin->blob_oid);
  456. origin->mode = S_IFINVALID;
  457. return -1;
  458. }
  459. /*
  460. * We have an origin -- check if the same path exists in the
  461. * parent and return an origin structure to represent it.
  462. */
  463. static struct blame_origin *find_origin(struct commit *parent,
  464. struct blame_origin *origin)
  465. {
  466. struct blame_origin *porigin;
  467. struct diff_options diff_opts;
  468. const char *paths[2];
  469. /* First check any existing origins */
  470. for (porigin = parent->util; porigin; porigin = porigin->next)
  471. if (!strcmp(porigin->path, origin->path)) {
  472. /*
  473. * The same path between origin and its parent
  474. * without renaming -- the most common case.
  475. */
  476. return blame_origin_incref (porigin);
  477. }
  478. /* See if the origin->path is different between parent
  479. * and origin first. Most of the time they are the
  480. * same and diff-tree is fairly efficient about this.
  481. */
  482. diff_setup(&diff_opts);
  483. diff_opts.flags.recursive = 1;
  484. diff_opts.detect_rename = 0;
  485. diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
  486. paths[0] = origin->path;
  487. paths[1] = NULL;
  488. parse_pathspec(&diff_opts.pathspec,
  489. PATHSPEC_ALL_MAGIC & ~PATHSPEC_LITERAL,
  490. PATHSPEC_LITERAL_PATH, "", paths);
  491. diff_setup_done(&diff_opts);
  492. if (is_null_oid(&origin->commit->object.oid))
  493. do_diff_cache(&parent->tree->object.oid, &diff_opts);
  494. else
  495. diff_tree_oid(&parent->tree->object.oid,
  496. &origin->commit->tree->object.oid,
  497. "", &diff_opts);
  498. diffcore_std(&diff_opts);
  499. if (!diff_queued_diff.nr) {
  500. /* The path is the same as parent */
  501. porigin = get_origin(parent, origin->path);
  502. oidcpy(&porigin->blob_oid, &origin->blob_oid);
  503. porigin->mode = origin->mode;
  504. } else {
  505. /*
  506. * Since origin->path is a pathspec, if the parent
  507. * commit had it as a directory, we will see a whole
  508. * bunch of deletion of files in the directory that we
  509. * do not care about.
  510. */
  511. int i;
  512. struct diff_filepair *p = NULL;
  513. for (i = 0; i < diff_queued_diff.nr; i++) {
  514. const char *name;
  515. p = diff_queued_diff.queue[i];
  516. name = p->one->path ? p->one->path : p->two->path;
  517. if (!strcmp(name, origin->path))
  518. break;
  519. }
  520. if (!p)
  521. die("internal error in blame::find_origin");
  522. switch (p->status) {
  523. default:
  524. die("internal error in blame::find_origin (%c)",
  525. p->status);
  526. case 'M':
  527. porigin = get_origin(parent, origin->path);
  528. oidcpy(&porigin->blob_oid, &p->one->oid);
  529. porigin->mode = p->one->mode;
  530. break;
  531. case 'A':
  532. case 'T':
  533. /* Did not exist in parent, or type changed */
  534. break;
  535. }
  536. }
  537. diff_flush(&diff_opts);
  538. clear_pathspec(&diff_opts.pathspec);
  539. return porigin;
  540. }
  541. /*
  542. * We have an origin -- find the path that corresponds to it in its
  543. * parent and return an origin structure to represent it.
  544. */
  545. static struct blame_origin *find_rename(struct commit *parent,
  546. struct blame_origin *origin)
  547. {
  548. struct blame_origin *porigin = NULL;
  549. struct diff_options diff_opts;
  550. int i;
  551. diff_setup(&diff_opts);
  552. diff_opts.flags.recursive = 1;
  553. diff_opts.detect_rename = DIFF_DETECT_RENAME;
  554. diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
  555. diff_opts.single_follow = origin->path;
  556. diff_setup_done(&diff_opts);
  557. if (is_null_oid(&origin->commit->object.oid))
  558. do_diff_cache(&parent->tree->object.oid, &diff_opts);
  559. else
  560. diff_tree_oid(&parent->tree->object.oid,
  561. &origin->commit->tree->object.oid,
  562. "", &diff_opts);
  563. diffcore_std(&diff_opts);
  564. for (i = 0; i < diff_queued_diff.nr; i++) {
  565. struct diff_filepair *p = diff_queued_diff.queue[i];
  566. if ((p->status == 'R' || p->status == 'C') &&
  567. !strcmp(p->two->path, origin->path)) {
  568. porigin = get_origin(parent, p->one->path);
  569. oidcpy(&porigin->blob_oid, &p->one->oid);
  570. porigin->mode = p->one->mode;
  571. break;
  572. }
  573. }
  574. diff_flush(&diff_opts);
  575. clear_pathspec(&diff_opts.pathspec);
  576. return porigin;
  577. }
  578. /*
  579. * Append a new blame entry to a given output queue.
  580. */
  581. static void add_blame_entry(struct blame_entry ***queue,
  582. const struct blame_entry *src)
  583. {
  584. struct blame_entry *e = xmalloc(sizeof(*e));
  585. memcpy(e, src, sizeof(*e));
  586. blame_origin_incref(e->suspect);
  587. e->next = **queue;
  588. **queue = e;
  589. *queue = &e->next;
  590. }
  591. /*
  592. * src typically is on-stack; we want to copy the information in it to
  593. * a malloced blame_entry that gets added to the given queue. The
  594. * origin of dst loses a refcnt.
  595. */
  596. static void dup_entry(struct blame_entry ***queue,
  597. struct blame_entry *dst, struct blame_entry *src)
  598. {
  599. blame_origin_incref(src->suspect);
  600. blame_origin_decref(dst->suspect);
  601. memcpy(dst, src, sizeof(*src));
  602. dst->next = **queue;
  603. **queue = dst;
  604. *queue = &dst->next;
  605. }
  606. const char *blame_nth_line(struct blame_scoreboard *sb, long lno)
  607. {
  608. return sb->final_buf + sb->lineno[lno];
  609. }
  610. /*
  611. * It is known that lines between tlno to same came from parent, and e
  612. * has an overlap with that range. it also is known that parent's
  613. * line plno corresponds to e's line tlno.
  614. *
  615. * <---- e ----->
  616. * <------>
  617. * <------------>
  618. * <------------>
  619. * <------------------>
  620. *
  621. * Split e into potentially three parts; before this chunk, the chunk
  622. * to be blamed for the parent, and after that portion.
  623. */
  624. static void split_overlap(struct blame_entry *split,
  625. struct blame_entry *e,
  626. int tlno, int plno, int same,
  627. struct blame_origin *parent)
  628. {
  629. int chunk_end_lno;
  630. memset(split, 0, sizeof(struct blame_entry [3]));
  631. if (e->s_lno < tlno) {
  632. /* there is a pre-chunk part not blamed on parent */
  633. split[0].suspect = blame_origin_incref(e->suspect);
  634. split[0].lno = e->lno;
  635. split[0].s_lno = e->s_lno;
  636. split[0].num_lines = tlno - e->s_lno;
  637. split[1].lno = e->lno + tlno - e->s_lno;
  638. split[1].s_lno = plno;
  639. }
  640. else {
  641. split[1].lno = e->lno;
  642. split[1].s_lno = plno + (e->s_lno - tlno);
  643. }
  644. if (same < e->s_lno + e->num_lines) {
  645. /* there is a post-chunk part not blamed on parent */
  646. split[2].suspect = blame_origin_incref(e->suspect);
  647. split[2].lno = e->lno + (same - e->s_lno);
  648. split[2].s_lno = e->s_lno + (same - e->s_lno);
  649. split[2].num_lines = e->s_lno + e->num_lines - same;
  650. chunk_end_lno = split[2].lno;
  651. }
  652. else
  653. chunk_end_lno = e->lno + e->num_lines;
  654. split[1].num_lines = chunk_end_lno - split[1].lno;
  655. /*
  656. * if it turns out there is nothing to blame the parent for,
  657. * forget about the splitting. !split[1].suspect signals this.
  658. */
  659. if (split[1].num_lines < 1)
  660. return;
  661. split[1].suspect = blame_origin_incref(parent);
  662. }
  663. /*
  664. * split_overlap() divided an existing blame e into up to three parts
  665. * in split. Any assigned blame is moved to queue to
  666. * reflect the split.
  667. */
  668. static void split_blame(struct blame_entry ***blamed,
  669. struct blame_entry ***unblamed,
  670. struct blame_entry *split,
  671. struct blame_entry *e)
  672. {
  673. if (split[0].suspect && split[2].suspect) {
  674. /* The first part (reuse storage for the existing entry e) */
  675. dup_entry(unblamed, e, &split[0]);
  676. /* The last part -- me */
  677. add_blame_entry(unblamed, &split[2]);
  678. /* ... and the middle part -- parent */
  679. add_blame_entry(blamed, &split[1]);
  680. }
  681. else if (!split[0].suspect && !split[2].suspect)
  682. /*
  683. * The parent covers the entire area; reuse storage for
  684. * e and replace it with the parent.
  685. */
  686. dup_entry(blamed, e, &split[1]);
  687. else if (split[0].suspect) {
  688. /* me and then parent */
  689. dup_entry(unblamed, e, &split[0]);
  690. add_blame_entry(blamed, &split[1]);
  691. }
  692. else {
  693. /* parent and then me */
  694. dup_entry(blamed, e, &split[1]);
  695. add_blame_entry(unblamed, &split[2]);
  696. }
  697. }
  698. /*
  699. * After splitting the blame, the origins used by the
  700. * on-stack blame_entry should lose one refcnt each.
  701. */
  702. static void decref_split(struct blame_entry *split)
  703. {
  704. int i;
  705. for (i = 0; i < 3; i++)
  706. blame_origin_decref(split[i].suspect);
  707. }
  708. /*
  709. * reverse_blame reverses the list given in head, appending tail.
  710. * That allows us to build lists in reverse order, then reverse them
  711. * afterwards. This can be faster than building the list in proper
  712. * order right away. The reason is that building in proper order
  713. * requires writing a link in the _previous_ element, while building
  714. * in reverse order just requires placing the list head into the
  715. * _current_ element.
  716. */
  717. static struct blame_entry *reverse_blame(struct blame_entry *head,
  718. struct blame_entry *tail)
  719. {
  720. while (head) {
  721. struct blame_entry *next = head->next;
  722. head->next = tail;
  723. tail = head;
  724. head = next;
  725. }
  726. return tail;
  727. }
  728. /*
  729. * Process one hunk from the patch between the current suspect for
  730. * blame_entry e and its parent. This first blames any unfinished
  731. * entries before the chunk (which is where target and parent start
  732. * differing) on the parent, and then splits blame entries at the
  733. * start and at the end of the difference region. Since use of -M and
  734. * -C options may lead to overlapping/duplicate source line number
  735. * ranges, all we can rely on from sorting/merging is the order of the
  736. * first suspect line number.
  737. */
  738. static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
  739. int tlno, int offset, int same,
  740. struct blame_origin *parent)
  741. {
  742. struct blame_entry *e = **srcq;
  743. struct blame_entry *samep = NULL, *diffp = NULL;
  744. while (e && e->s_lno < tlno) {
  745. struct blame_entry *next = e->next;
  746. /*
  747. * current record starts before differing portion. If
  748. * it reaches into it, we need to split it up and
  749. * examine the second part separately.
  750. */
  751. if (e->s_lno + e->num_lines > tlno) {
  752. /* Move second half to a new record */
  753. int len = tlno - e->s_lno;
  754. struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
  755. n->suspect = e->suspect;
  756. n->lno = e->lno + len;
  757. n->s_lno = e->s_lno + len;
  758. n->num_lines = e->num_lines - len;
  759. e->num_lines = len;
  760. e->score = 0;
  761. /* Push new record to diffp */
  762. n->next = diffp;
  763. diffp = n;
  764. } else
  765. blame_origin_decref(e->suspect);
  766. /* Pass blame for everything before the differing
  767. * chunk to the parent */
  768. e->suspect = blame_origin_incref(parent);
  769. e->s_lno += offset;
  770. e->next = samep;
  771. samep = e;
  772. e = next;
  773. }
  774. /*
  775. * As we don't know how much of a common stretch after this
  776. * diff will occur, the currently blamed parts are all that we
  777. * can assign to the parent for now.
  778. */
  779. if (samep) {
  780. **dstq = reverse_blame(samep, **dstq);
  781. *dstq = &samep->next;
  782. }
  783. /*
  784. * Prepend the split off portions: everything after e starts
  785. * after the blameable portion.
  786. */
  787. e = reverse_blame(diffp, e);
  788. /*
  789. * Now retain records on the target while parts are different
  790. * from the parent.
  791. */
  792. samep = NULL;
  793. diffp = NULL;
  794. while (e && e->s_lno < same) {
  795. struct blame_entry *next = e->next;
  796. /*
  797. * If current record extends into sameness, need to split.
  798. */
  799. if (e->s_lno + e->num_lines > same) {
  800. /*
  801. * Move second half to a new record to be
  802. * processed by later chunks
  803. */
  804. int len = same - e->s_lno;
  805. struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
  806. n->suspect = blame_origin_incref(e->suspect);
  807. n->lno = e->lno + len;
  808. n->s_lno = e->s_lno + len;
  809. n->num_lines = e->num_lines - len;
  810. e->num_lines = len;
  811. e->score = 0;
  812. /* Push new record to samep */
  813. n->next = samep;
  814. samep = n;
  815. }
  816. e->next = diffp;
  817. diffp = e;
  818. e = next;
  819. }
  820. **srcq = reverse_blame(diffp, reverse_blame(samep, e));
  821. /* Move across elements that are in the unblamable portion */
  822. if (diffp)
  823. *srcq = &diffp->next;
  824. }
  825. struct blame_chunk_cb_data {
  826. struct blame_origin *parent;
  827. long offset;
  828. struct blame_entry **dstq;
  829. struct blame_entry **srcq;
  830. };
  831. /* diff chunks are from parent to target */
  832. static int blame_chunk_cb(long start_a, long count_a,
  833. long start_b, long count_b, void *data)
  834. {
  835. struct blame_chunk_cb_data *d = data;
  836. if (start_a - start_b != d->offset)
  837. die("internal error in blame::blame_chunk_cb");
  838. blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b,
  839. start_b + count_b, d->parent);
  840. d->offset = start_a + count_a - (start_b + count_b);
  841. return 0;
  842. }
  843. /*
  844. * We are looking at the origin 'target' and aiming to pass blame
  845. * for the lines it is suspected to its parent. Run diff to find
  846. * which lines came from parent and pass blame for them.
  847. */
  848. static void pass_blame_to_parent(struct blame_scoreboard *sb,
  849. struct blame_origin *target,
  850. struct blame_origin *parent)
  851. {
  852. mmfile_t file_p, file_o;
  853. struct blame_chunk_cb_data d;
  854. struct blame_entry *newdest = NULL;
  855. if (!target->suspects)
  856. return; /* nothing remains for this target */
  857. d.parent = parent;
  858. d.offset = 0;
  859. d.dstq = &newdest; d.srcq = &target->suspects;
  860. fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
  861. fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob);
  862. sb->num_get_patch++;
  863. if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
  864. die("unable to generate diff (%s -> %s)",
  865. oid_to_hex(&parent->commit->object.oid),
  866. oid_to_hex(&target->commit->object.oid));
  867. /* The rest are the same as the parent */
  868. blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent);
  869. *d.dstq = NULL;
  870. queue_blames(sb, parent, newdest);
  871. return;
  872. }
  873. /*
  874. * The lines in blame_entry after splitting blames many times can become
  875. * very small and trivial, and at some point it becomes pointless to
  876. * blame the parents. E.g. "\t\t}\n\t}\n\n" appears everywhere in any
  877. * ordinary C program, and it is not worth to say it was copied from
  878. * totally unrelated file in the parent.
  879. *
  880. * Compute how trivial the lines in the blame_entry are.
  881. */
  882. unsigned blame_entry_score(struct blame_scoreboard *sb, struct blame_entry *e)
  883. {
  884. unsigned score;
  885. const char *cp, *ep;
  886. if (e->score)
  887. return e->score;
  888. score = 1;
  889. cp = blame_nth_line(sb, e->lno);
  890. ep = blame_nth_line(sb, e->lno + e->num_lines);
  891. while (cp < ep) {
  892. unsigned ch = *((unsigned char *)cp);
  893. if (isalnum(ch))
  894. score++;
  895. cp++;
  896. }
  897. e->score = score;
  898. return score;
  899. }
  900. /*
  901. * best_so_far[] and potential[] are both a split of an existing blame_entry
  902. * that passes blame to the parent. Maintain best_so_far the best split so
  903. * far, by comparing potential and best_so_far and copying potential into
  904. * bst_so_far as needed.
  905. */
  906. static void copy_split_if_better(struct blame_scoreboard *sb,
  907. struct blame_entry *best_so_far,
  908. struct blame_entry *potential)
  909. {
  910. int i;
  911. if (!potential[1].suspect)
  912. return;
  913. if (best_so_far[1].suspect) {
  914. if (blame_entry_score(sb, &potential[1]) <
  915. blame_entry_score(sb, &best_so_far[1]))
  916. return;
  917. }
  918. for (i = 0; i < 3; i++)
  919. blame_origin_incref(potential[i].suspect);
  920. decref_split(best_so_far);
  921. memcpy(best_so_far, potential, sizeof(struct blame_entry[3]));
  922. }
  923. /*
  924. * We are looking at a part of the final image represented by
  925. * ent (tlno and same are offset by ent->s_lno).
  926. * tlno is where we are looking at in the final image.
  927. * up to (but not including) same match preimage.
  928. * plno is where we are looking at in the preimage.
  929. *
  930. * <-------------- final image ---------------------->
  931. * <------ent------>
  932. * ^tlno ^same
  933. * <---------preimage----->
  934. * ^plno
  935. *
  936. * All line numbers are 0-based.
  937. */
  938. static void handle_split(struct blame_scoreboard *sb,
  939. struct blame_entry *ent,
  940. int tlno, int plno, int same,
  941. struct blame_origin *parent,
  942. struct blame_entry *split)
  943. {
  944. if (ent->num_lines <= tlno)
  945. return;
  946. if (tlno < same) {
  947. struct blame_entry potential[3];
  948. tlno += ent->s_lno;
  949. same += ent->s_lno;
  950. split_overlap(potential, ent, tlno, plno, same, parent);
  951. copy_split_if_better(sb, split, potential);
  952. decref_split(potential);
  953. }
  954. }
  955. struct handle_split_cb_data {
  956. struct blame_scoreboard *sb;
  957. struct blame_entry *ent;
  958. struct blame_origin *parent;
  959. struct blame_entry *split;
  960. long plno;
  961. long tlno;
  962. };
  963. static int handle_split_cb(long start_a, long count_a,
  964. long start_b, long count_b, void *data)
  965. {
  966. struct handle_split_cb_data *d = data;
  967. handle_split(d->sb, d->ent, d->tlno, d->plno, start_b, d->parent,
  968. d->split);
  969. d->plno = start_a + count_a;
  970. d->tlno = start_b + count_b;
  971. return 0;
  972. }
  973. /*
  974. * Find the lines from parent that are the same as ent so that
  975. * we can pass blames to it. file_p has the blob contents for
  976. * the parent.
  977. */
  978. static void find_copy_in_blob(struct blame_scoreboard *sb,
  979. struct blame_entry *ent,
  980. struct blame_origin *parent,
  981. struct blame_entry *split,
  982. mmfile_t *file_p)
  983. {
  984. const char *cp;
  985. mmfile_t file_o;
  986. struct handle_split_cb_data d;
  987. memset(&d, 0, sizeof(d));
  988. d.sb = sb; d.ent = ent; d.parent = parent; d.split = split;
  989. /*
  990. * Prepare mmfile that contains only the lines in ent.
  991. */
  992. cp = blame_nth_line(sb, ent->lno);
  993. file_o.ptr = (char *) cp;
  994. file_o.size = blame_nth_line(sb, ent->lno + ent->num_lines) - cp;
  995. /*
  996. * file_o is a part of final image we are annotating.
  997. * file_p partially may match that image.
  998. */
  999. memset(split, 0, sizeof(struct blame_entry [3]));
  1000. if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts))
  1001. die("unable to generate diff (%s)",
  1002. oid_to_hex(&parent->commit->object.oid));
  1003. /* remainder, if any, all match the preimage */
  1004. handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
  1005. }
  1006. /* Move all blame entries from list *source that have a score smaller
  1007. * than score_min to the front of list *small.
  1008. * Returns a pointer to the link pointing to the old head of the small list.
  1009. */
  1010. static struct blame_entry **filter_small(struct blame_scoreboard *sb,
  1011. struct blame_entry **small,
  1012. struct blame_entry **source,
  1013. unsigned score_min)
  1014. {
  1015. struct blame_entry *p = *source;
  1016. struct blame_entry *oldsmall = *small;
  1017. while (p) {
  1018. if (blame_entry_score(sb, p) <= score_min) {
  1019. *small = p;
  1020. small = &p->next;
  1021. p = *small;
  1022. } else {
  1023. *source = p;
  1024. source = &p->next;
  1025. p = *source;
  1026. }
  1027. }
  1028. *small = oldsmall;
  1029. *source = NULL;
  1030. return small;
  1031. }
  1032. /*
  1033. * See if lines currently target is suspected for can be attributed to
  1034. * parent.
  1035. */
  1036. static void find_move_in_parent(struct blame_scoreboard *sb,
  1037. struct blame_entry ***blamed,
  1038. struct blame_entry **toosmall,
  1039. struct blame_origin *target,
  1040. struct blame_origin *parent)
  1041. {
  1042. struct blame_entry *e, split[3];
  1043. struct blame_entry *unblamed = target->suspects;
  1044. struct blame_entry *leftover = NULL;
  1045. mmfile_t file_p;
  1046. if (!unblamed)
  1047. return; /* nothing remains for this target */
  1048. fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
  1049. if (!file_p.ptr)
  1050. return;
  1051. /* At each iteration, unblamed has a NULL-terminated list of
  1052. * entries that have not yet been tested for blame. leftover
  1053. * contains the reversed list of entries that have been tested
  1054. * without being assignable to the parent.
  1055. */
  1056. do {
  1057. struct blame_entry **unblamedtail = &unblamed;
  1058. struct blame_entry *next;
  1059. for (e = unblamed; e; e = next) {
  1060. next = e->next;
  1061. find_copy_in_blob(sb, e, parent, split, &file_p);
  1062. if (split[1].suspect &&
  1063. sb->move_score < blame_entry_score(sb, &split[1])) {
  1064. split_blame(blamed, &unblamedtail, split, e);
  1065. } else {
  1066. e->next = leftover;
  1067. leftover = e;
  1068. }
  1069. decref_split(split);
  1070. }
  1071. *unblamedtail = NULL;
  1072. toosmall = filter_small(sb, toosmall, &unblamed, sb->move_score);
  1073. } while (unblamed);
  1074. target->suspects = reverse_blame(leftover, NULL);
  1075. }
  1076. struct blame_list {
  1077. struct blame_entry *ent;
  1078. struct blame_entry split[3];
  1079. };
  1080. /*
  1081. * Count the number of entries the target is suspected for,
  1082. * and prepare a list of entry and the best split.
  1083. */
  1084. static struct blame_list *setup_blame_list(struct blame_entry *unblamed,
  1085. int *num_ents_p)
  1086. {
  1087. struct blame_entry *e;
  1088. int num_ents, i;
  1089. struct blame_list *blame_list = NULL;
  1090. for (e = unblamed, num_ents = 0; e; e = e->next)
  1091. num_ents++;
  1092. if (num_ents) {
  1093. blame_list = xcalloc(num_ents, sizeof(struct blame_list));
  1094. for (e = unblamed, i = 0; e; e = e->next)
  1095. blame_list[i++].ent = e;
  1096. }
  1097. *num_ents_p = num_ents;
  1098. return blame_list;
  1099. }
  1100. /*
  1101. * For lines target is suspected for, see if we can find code movement
  1102. * across file boundary from the parent commit. porigin is the path
  1103. * in the parent we already tried.
  1104. */
  1105. static void find_copy_in_parent(struct blame_scoreboard *sb,
  1106. struct blame_entry ***blamed,
  1107. struct blame_entry **toosmall,
  1108. struct blame_origin *target,
  1109. struct commit *parent,
  1110. struct blame_origin *porigin,
  1111. int opt)
  1112. {
  1113. struct diff_options diff_opts;
  1114. int i, j;
  1115. struct blame_list *blame_list;
  1116. int num_ents;
  1117. struct blame_entry *unblamed = target->suspects;
  1118. struct blame_entry *leftover = NULL;
  1119. if (!unblamed)
  1120. return; /* nothing remains for this target */
  1121. diff_setup(&diff_opts);
  1122. diff_opts.flags.recursive = 1;
  1123. diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
  1124. diff_setup_done(&diff_opts);
  1125. /* Try "find copies harder" on new path if requested;
  1126. * we do not want to use diffcore_rename() actually to
  1127. * match things up; find_copies_harder is set only to
  1128. * force diff_tree_oid() to feed all filepairs to diff_queue,
  1129. * and this code needs to be after diff_setup_done(), which
  1130. * usually makes find-copies-harder imply copy detection.
  1131. */
  1132. if ((opt & PICKAXE_BLAME_COPY_HARDEST)
  1133. || ((opt & PICKAXE_BLAME_COPY_HARDER)
  1134. && (!porigin || strcmp(target->path, porigin->path))))
  1135. diff_opts.flags.find_copies_harder = 1;
  1136. if (is_null_oid(&target->commit->object.oid))
  1137. do_diff_cache(&parent->tree->object.oid, &diff_opts);
  1138. else
  1139. diff_tree_oid(&parent->tree->object.oid,
  1140. &target->commit->tree->object.oid,
  1141. "", &diff_opts);
  1142. if (!diff_opts.flags.find_copies_harder)
  1143. diffcore_std(&diff_opts);
  1144. do {
  1145. struct blame_entry **unblamedtail = &unblamed;
  1146. blame_list = setup_blame_list(unblamed, &num_ents);
  1147. for (i = 0; i < diff_queued_diff.nr; i++) {
  1148. struct diff_filepair *p = diff_queued_diff.queue[i];
  1149. struct blame_origin *norigin;
  1150. mmfile_t file_p;
  1151. struct blame_entry potential[3];
  1152. if (!DIFF_FILE_VALID(p->one))
  1153. continue; /* does not exist in parent */
  1154. if (S_ISGITLINK(p->one->mode))
  1155. continue; /* ignore git links */
  1156. if (porigin && !strcmp(p->one->path, porigin->path))
  1157. /* find_move already dealt with this path */
  1158. continue;
  1159. norigin = get_origin(parent, p->one->path);
  1160. oidcpy(&norigin->blob_oid, &p->one->oid);
  1161. norigin->mode = p->one->mode;
  1162. fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, &sb->num_read_blob);
  1163. if (!file_p.ptr)
  1164. continue;
  1165. for (j = 0; j < num_ents; j++) {
  1166. find_copy_in_blob(sb, blame_list[j].ent,
  1167. norigin, potential, &file_p);
  1168. copy_split_if_better(sb, blame_list[j].split,
  1169. potential);
  1170. decref_split(potential);
  1171. }
  1172. blame_origin_decref(norigin);
  1173. }
  1174. for (j = 0; j < num_ents; j++) {
  1175. struct blame_entry *split = blame_list[j].split;
  1176. if (split[1].suspect &&
  1177. sb->copy_score < blame_entry_score(sb, &split[1])) {
  1178. split_blame(blamed, &unblamedtail, split,
  1179. blame_list[j].ent);
  1180. } else {
  1181. blame_list[j].ent->next = leftover;
  1182. leftover = blame_list[j].ent;
  1183. }
  1184. decref_split(split);
  1185. }
  1186. free(blame_list);
  1187. *unblamedtail = NULL;
  1188. toosmall = filter_small(sb, toosmall, &unblamed, sb->copy_score);
  1189. } while (unblamed);
  1190. target->suspects = reverse_blame(leftover, NULL);
  1191. diff_flush(&diff_opts);
  1192. clear_pathspec(&diff_opts.pathspec);
  1193. }
  1194. /*
  1195. * The blobs of origin and porigin exactly match, so everything
  1196. * origin is suspected for can be blamed on the parent.
  1197. */
  1198. static void pass_whole_blame(struct blame_scoreboard *sb,
  1199. struct blame_origin *origin, struct blame_origin *porigin)
  1200. {
  1201. struct blame_entry *e, *suspects;
  1202. if (!porigin->file.ptr && origin->file.ptr) {
  1203. /* Steal its file */
  1204. porigin->file = origin->file;
  1205. origin->file.ptr = NULL;
  1206. }
  1207. suspects = origin->suspects;
  1208. origin->suspects = NULL;
  1209. for (e = suspects; e; e = e->next) {
  1210. blame_origin_incref(porigin);
  1211. blame_origin_decref(e->suspect);
  1212. e->suspect = porigin;
  1213. }
  1214. queue_blames(sb, porigin, suspects);
  1215. }
  1216. /*
  1217. * We pass blame from the current commit to its parents. We keep saying
  1218. * "parent" (and "porigin"), but what we mean is to find scapegoat to
  1219. * exonerate ourselves.
  1220. */
  1221. static struct commit_list *first_scapegoat(struct rev_info *revs, struct commit *commit,
  1222. int reverse)
  1223. {
  1224. if (!reverse) {
  1225. if (revs->first_parent_only &&
  1226. commit->parents &&
  1227. commit->parents->next) {
  1228. free_commit_list(commit->parents->next);
  1229. commit->parents->next = NULL;
  1230. }
  1231. return commit->parents;
  1232. }
  1233. return lookup_decoration(&revs->children, &commit->object);
  1234. }
  1235. static int num_scapegoats(struct rev_info *revs, struct commit *commit, int reverse)
  1236. {
  1237. struct commit_list *l = first_scapegoat(revs, commit, reverse);
  1238. return commit_list_count(l);
  1239. }
  1240. /* Distribute collected unsorted blames to the respected sorted lists
  1241. * in the various origins.
  1242. */
  1243. static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *blamed)
  1244. {
  1245. blamed = llist_mergesort(blamed, get_next_blame, set_next_blame,
  1246. compare_blame_suspect);
  1247. while (blamed)
  1248. {
  1249. struct blame_origin *porigin = blamed->suspect;
  1250. struct blame_entry *suspects = NULL;
  1251. do {
  1252. struct blame_entry *next = blamed->next;
  1253. blamed->next = suspects;
  1254. suspects = blamed;
  1255. blamed = next;
  1256. } while (blamed && blamed->suspect == porigin);
  1257. suspects = reverse_blame(suspects, NULL);
  1258. queue_blames(sb, porigin, suspects);
  1259. }
  1260. }
  1261. #define MAXSG 16
  1262. static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt)
  1263. {
  1264. struct rev_info *revs = sb->revs;
  1265. int i, pass, num_sg;
  1266. struct commit *commit = origin->commit;
  1267. struct commit_list *sg;
  1268. struct blame_origin *sg_buf[MAXSG];
  1269. struct blame_origin *porigin, **sg_origin = sg_buf;
  1270. struct blame_entry *toosmall = NULL;
  1271. struct blame_entry *blames, **blametail = &blames;
  1272. num_sg = num_scapegoats(revs, commit, sb->reverse);
  1273. if (!num_sg)
  1274. goto finish;
  1275. else if (num_sg < ARRAY_SIZE(sg_buf))
  1276. memset(sg_buf, 0, sizeof(sg_buf));
  1277. else
  1278. sg_origin = xcalloc(num_sg, sizeof(*sg_origin));
  1279. /*
  1280. * The first pass looks for unrenamed path to optimize for
  1281. * common cases, then we look for renames in the second pass.
  1282. */
  1283. for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
  1284. struct blame_origin *(*find)(struct commit *, struct blame_origin *);
  1285. find = pass ? find_rename : find_origin;
  1286. for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
  1287. i < num_sg && sg;
  1288. sg = sg->next, i++) {
  1289. struct commit *p = sg->item;
  1290. int j, same;
  1291. if (sg_origin[i])
  1292. continue;
  1293. if (parse_commit(p))
  1294. continue;
  1295. porigin = find(p, origin);
  1296. if (!porigin)
  1297. continue;
  1298. if (!oidcmp(&porigin->blob_oid, &origin->blob_oid)) {
  1299. pass_whole_blame(sb, origin, porigin);
  1300. blame_origin_decref(porigin);
  1301. goto finish;
  1302. }
  1303. for (j = same = 0; j < i; j++)
  1304. if (sg_origin[j] &&
  1305. !oidcmp(&sg_origin[j]->blob_oid, &porigin->blob_oid)) {
  1306. same = 1;
  1307. break;
  1308. }
  1309. if (!same)
  1310. sg_origin[i] = porigin;
  1311. else
  1312. blame_origin_decref(porigin);
  1313. }
  1314. }
  1315. sb->num_commits++;
  1316. for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
  1317. i < num_sg && sg;
  1318. sg = sg->next, i++) {
  1319. struct blame_origin *porigin = sg_origin[i];
  1320. if (!porigin)
  1321. continue;
  1322. if (!origin->previous) {
  1323. blame_origin_incref(porigin);
  1324. origin->previous = porigin;
  1325. }
  1326. pass_blame_to_parent(sb, origin, porigin);
  1327. if (!origin->suspects)
  1328. goto finish;
  1329. }
  1330. /*
  1331. * Optionally find moves in parents' files.
  1332. */
  1333. if (opt & PICKAXE_BLAME_MOVE) {
  1334. filter_small(sb, &toosmall, &origin->suspects, sb->move_score);
  1335. if (origin->suspects) {
  1336. for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
  1337. i < num_sg && sg;
  1338. sg = sg->next, i++) {
  1339. struct blame_origin *porigin = sg_origin[i];
  1340. if (!porigin)
  1341. continue;
  1342. find_move_in_parent(sb, &blametail, &toosmall, origin, porigin);
  1343. if (!origin->suspects)
  1344. break;
  1345. }
  1346. }
  1347. }
  1348. /*
  1349. * Optionally find copies from parents' files.
  1350. */
  1351. if (opt & PICKAXE_BLAME_COPY) {
  1352. if (sb->copy_score > sb->move_score)
  1353. filter_small(sb, &toosmall, &origin->suspects, sb->copy_score);
  1354. else if (sb->copy_score < sb->move_score) {
  1355. origin->suspects = blame_merge(origin->suspects, toosmall);
  1356. toosmall = NULL;
  1357. filter_small(sb, &toosmall, &origin->suspects, sb->copy_score);
  1358. }
  1359. if (!origin->suspects)
  1360. goto finish;
  1361. for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
  1362. i < num_sg && sg;
  1363. sg = sg->next, i++) {
  1364. struct blame_origin *porigin = sg_origin[i];
  1365. find_copy_in_parent(sb, &blametail, &toosmall,
  1366. origin, sg->item, porigin, opt);
  1367. if (!origin->suspects)
  1368. goto finish;
  1369. }
  1370. }
  1371. finish:
  1372. *blametail = NULL;
  1373. distribute_blame(sb, blames);
  1374. /*
  1375. * prepend toosmall to origin->suspects
  1376. *
  1377. * There is no point in sorting: this ends up on a big
  1378. * unsorted list in the caller anyway.
  1379. */
  1380. if (toosmall) {
  1381. struct blame_entry **tail = &toosmall;
  1382. while (*tail)
  1383. tail = &(*tail)->next;
  1384. *tail = origin->suspects;
  1385. origin->suspects = toosmall;
  1386. }
  1387. for (i = 0; i < num_sg; i++) {
  1388. if (sg_origin[i]) {
  1389. drop_origin_blob(sg_origin[i]);
  1390. blame_origin_decref(sg_origin[i]);
  1391. }
  1392. }
  1393. drop_origin_blob(origin);
  1394. if (sg_buf != sg_origin)
  1395. free(sg_origin);
  1396. }
  1397. /*
  1398. * The main loop -- while we have blobs with lines whose true origin
  1399. * is still unknown, pick one blob, and allow its lines to pass blames
  1400. * to its parents. */
  1401. void assign_blame(struct blame_scoreboard *sb, int opt)
  1402. {
  1403. struct rev_info *revs = sb->revs;
  1404. struct commit *commit = prio_queue_get(&sb->commits);
  1405. while (commit) {
  1406. struct blame_entry *ent;
  1407. struct blame_origin *suspect = commit->util;
  1408. /* find one suspect to break down */
  1409. while (suspect && !suspect->suspects)
  1410. suspect = suspect->next;
  1411. if (!suspect) {
  1412. commit = prio_queue_get(&sb->commits);
  1413. continue;
  1414. }
  1415. assert(commit == suspect->commit);
  1416. /*
  1417. * We will use this suspect later in the loop,
  1418. * so hold onto it in the meantime.
  1419. */
  1420. blame_origin_incref(suspect);
  1421. parse_commit(commit);
  1422. if (sb->reverse ||
  1423. (!(commit->object.flags & UNINTERESTING) &&
  1424. !(revs->max_age != -1 && commit->date < revs->max_age)))
  1425. pass_blame(sb, suspect, opt);
  1426. else {
  1427. commit->object.flags |= UNINTERESTING;
  1428. if (commit->object.parsed)
  1429. mark_parents_uninteresting(commit);
  1430. }
  1431. /* treat root commit as boundary */
  1432. if (!commit->parents && !sb->show_root)
  1433. commit->object.flags |= UNINTERESTING;
  1434. /* Take responsibility for the remaining entries */
  1435. ent = suspect->suspects;
  1436. if (ent) {
  1437. suspect->guilty = 1;
  1438. for (;;) {
  1439. struct blame_entry *next = ent->next;
  1440. if (sb->found_guilty_entry)
  1441. sb->found_guilty_entry(ent, sb->found_guilty_entry_data);
  1442. if (next) {
  1443. ent = next;
  1444. continue;
  1445. }
  1446. ent->next = sb->ent;
  1447. sb->ent = suspect->suspects;
  1448. suspect->suspects = NULL;
  1449. break;
  1450. }
  1451. }
  1452. blame_origin_decref(suspect);
  1453. if (sb->debug) /* sanity */
  1454. sanity_check_refcnt(sb);
  1455. }
  1456. }
  1457. static const char *get_next_line(const char *start, const char *end)
  1458. {
  1459. const char *nl = memchr(start, '\n', end - start);
  1460. return nl ? nl + 1 : end;
  1461. }
  1462. /*
  1463. * To allow quick access to the contents of nth line in the
  1464. * final image, prepare an index in the scoreboard.
  1465. */
  1466. static int prepare_lines(struct blame_scoreboard *sb)
  1467. {
  1468. const char *buf = sb->final_buf;
  1469. unsigned long len = sb->final_buf_size;
  1470. const char *end = buf + len;
  1471. const char *p;
  1472. int *lineno;
  1473. int num = 0;
  1474. for (p = buf; p < end; p = get_next_line(p, end))
  1475. num++;
  1476. ALLOC_ARRAY(sb->lineno, num + 1);
  1477. lineno = sb->lineno;
  1478. for (p = buf; p < end; p = get_next_line(p, end))
  1479. *lineno++ = p - buf;
  1480. *lineno = len;
  1481. sb->num_lines = num;
  1482. return sb->num_lines;
  1483. }
  1484. static struct commit *find_single_final(struct rev_info *revs,
  1485. const char **name_p)
  1486. {
  1487. int i;
  1488. struct commit *found = NULL;
  1489. const char *name = NULL;
  1490. for (i = 0; i < revs->pending.nr; i++) {
  1491. struct object *obj = revs->pending.objects[i].item;
  1492. if (obj->flags & UNINTERESTING)
  1493. continue;
  1494. obj = deref_tag(obj, NULL, 0);
  1495. if (obj->type != OBJ_COMMIT)
  1496. die("Non commit %s?", revs->pending.objects[i].name);
  1497. if (found)
  1498. die("More than one commit to dig from %s and %s?",
  1499. revs->pending.objects[i].name, name);
  1500. found = (struct commit *)obj;
  1501. name = revs->pending.objects[i].name;
  1502. }
  1503. if (name_p)
  1504. *name_p = xstrdup_or_null(name);
  1505. return found;
  1506. }
  1507. static struct commit *dwim_reverse_initial(struct rev_info *revs,
  1508. const char **name_p)
  1509. {
  1510. /*
  1511. * DWIM "git blame --reverse ONE -- PATH" as
  1512. * "git blame --reverse ONE..HEAD -- PATH" but only do so
  1513. * when it makes sense.
  1514. */
  1515. struct object *obj;
  1516. struct commit *head_commit;
  1517. struct object_id head_oid;
  1518. if (revs->pending.nr != 1)
  1519. return NULL;
  1520. /* Is that sole rev a committish? */
  1521. obj = revs->pending.objects[0].item;
  1522. obj = deref_tag(obj, NULL, 0);
  1523. if (obj->type != OBJ_COMMIT)
  1524. return NULL;
  1525. /* Do we have HEAD? */
  1526. if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING, &head_oid, NULL))
  1527. return NULL;
  1528. head_commit = lookup_commit_reference_gently(&head_oid, 1);
  1529. if (!head_commit)
  1530. return NULL;
  1531. /* Turn "ONE" into "ONE..HEAD" then */
  1532. obj->flags |= UNINTERESTING;
  1533. add_pending_object(revs, &head_commit->object, "HEAD");
  1534. if (name_p)
  1535. *name_p = revs->pending.objects[0].name;
  1536. return (struct commit *)obj;
  1537. }
  1538. static struct commit *find_single_initial(struct rev_info *revs,
  1539. const char **name_p)
  1540. {
  1541. int i;
  1542. struct commit *found = NULL;
  1543. const char *name = NULL;
  1544. /*
  1545. * There must be one and only one negative commit, and it must be
  1546. * the boundary.
  1547. */
  1548. for (i = 0; i < revs->pending.nr; i++) {
  1549. struct object *obj = revs->pending.objects[i].item;
  1550. if (!(obj->flags & UNINTERESTING))
  1551. continue;
  1552. obj = deref_tag(obj, NULL, 0);
  1553. if (obj->type != OBJ_COMMIT)
  1554. die("Non commit %s?", revs->pending.objects[i].name);
  1555. if (found)
  1556. die("More than one commit to dig up from, %s and %s?",
  1557. revs->pending.objects[i].name, name);
  1558. found = (struct commit *) obj;
  1559. name = revs->pending.objects[i].name;
  1560. }
  1561. if (!name)
  1562. found = dwim_reverse_initial(revs, &name);
  1563. if (!name)
  1564. die("No commit to dig up from?");
  1565. if (name_p)
  1566. *name_p = xstrdup(name);
  1567. return found;
  1568. }
  1569. void init_scoreboard(struct blame_scoreboard *sb)
  1570. {
  1571. memset(sb, 0, sizeof(struct blame_scoreboard));
  1572. sb->move_score = BLAME_DEFAULT_MOVE_SCORE;
  1573. sb->copy_score = BLAME_DEFAULT_COPY_SCORE;
  1574. }
  1575. void setup_scoreboard(struct blame_scoreboard *sb, const char *path, struct blame_origin **orig)
  1576. {
  1577. const char *final_commit_name = NULL;
  1578. struct blame_origin *o;
  1579. struct commit *final_commit = NULL;
  1580. enum object_type type;
  1581. if (sb->reverse && sb->contents_from)
  1582. die(_("--contents and --reverse do not blend well."));
  1583. if (!sb->reverse) {
  1584. sb->final = find_single_final(sb->revs, &final_commit_name);
  1585. sb->commits.compare = compare_commits_by_commit_date;
  1586. } else {
  1587. sb->final = find_single_initial(sb->revs, &final_commit_name);
  1588. sb->commits.compare = compare_commits_by_reverse_commit_date;
  1589. }
  1590. if (sb->final && sb->contents_from)
  1591. die(_("cannot use --contents with final commit object name"));
  1592. if (sb->reverse && sb->revs->first_parent_only)
  1593. sb->revs->children.name = NULL;
  1594. if (!sb->final) {
  1595. /*
  1596. * "--not A B -- path" without anything positive;
  1597. * do not default to HEAD, but use the working tree
  1598. * or "--contents".
  1599. */
  1600. setup_work_tree();
  1601. sb->final = fake_working_tree_commit(&sb->revs->diffopt,
  1602. path, sb->contents_from);
  1603. add_pending_object(sb->revs, &(sb->final->object), ":");
  1604. }
  1605. if (sb->reverse && sb->revs->first_parent_only) {
  1606. final_commit = find_single_final(sb->revs, NULL);
  1607. if (!final_commit)
  1608. die(_("--reverse and --first-parent together require specified latest commit"));
  1609. }
  1610. /*
  1611. * If we have bottom, this will mark the ancestors of the
  1612. * bottom commits we would reach while traversing as
  1613. * uninteresting.
  1614. */
  1615. if (prepare_revision_walk(sb->revs))
  1616. die(_("revision walk setup failed"));
  1617. if (sb->reverse && sb->revs->first_parent_only) {
  1618. struct commit *c = final_commit;
  1619. sb->revs->children.name = "children";
  1620. while (c->parents &&
  1621. oidcmp(&c->object.oid, &sb->final->object.oid)) {
  1622. struct commit_list *l = xcalloc(1, sizeof(*l));
  1623. l->item = c;
  1624. if (add_decoration(&sb->revs->children,
  1625. &c->parents->item->object, l))
  1626. BUG("not unique item in first-parent chain");
  1627. c = c->parents->item;
  1628. }
  1629. if (oidcmp(&c->object.oid, &sb->final->object.oid))
  1630. die(_("--reverse --first-parent together require range along first-parent chain"));
  1631. }
  1632. if (is_null_oid(&sb->final->object.oid)) {
  1633. o = sb->final->util;
  1634. sb->final_buf = xmemdupz(o->file.ptr, o->file.size);
  1635. sb->final_buf_size = o->file.size;
  1636. }
  1637. else {
  1638. o = get_origin(sb->final, path);
  1639. if (fill_blob_sha1_and_mode(o))
  1640. die(_("no such path %s in %s"), path, final_commit_name);
  1641. if (sb->revs->diffopt.flags.allow_textconv &&
  1642. textconv_object(path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf,
  1643. &sb->final_buf_size))
  1644. ;
  1645. else
  1646. sb->final_buf = read_object_file(&o->blob_oid, &type,
  1647. &sb->final_buf_size);
  1648. if (!sb->final_buf)
  1649. die(_("cannot read blob %s for path %s"),
  1650. oid_to_hex(&o->blob_oid),
  1651. path);
  1652. }
  1653. sb->num_read_blob++;
  1654. prepare_lines(sb);
  1655. if (orig)
  1656. *orig = o;
  1657. free((char *)final_commit_name);
  1658. }
  1659. struct blame_entry *blame_entry_prepend(struct blame_entry *head,
  1660. long start, long end,
  1661. struct blame_origin *o)
  1662. {
  1663. struct blame_entry *new_head = xcalloc(1, sizeof(struct blame_entry));
  1664. new_head->lno = start;
  1665. new_head->num_lines = end - start;
  1666. new_head->suspect = o;
  1667. new_head->s_lno = start;
  1668. new_head->next = head;
  1669. blame_origin_incref(o);
  1670. return new_head;
  1671. }