THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

558 lines
13KB

  1. #include "cache.h"
  2. #include "object-store.h"
  3. #include "commit.h"
  4. #include "tag.h"
  5. #include "diff.h"
  6. #include "revision.h"
  7. #include "list-objects.h"
  8. #include "progress.h"
  9. #include "pack-revindex.h"
  10. #include "pack.h"
  11. #include "pack-bitmap.h"
  12. #include "sha1-lookup.h"
  13. #include "pack-objects.h"
  14. #include "commit-reach.h"
  15. struct bitmapped_commit {
  16. struct commit *commit;
  17. struct ewah_bitmap *bitmap;
  18. struct ewah_bitmap *write_as;
  19. int flags;
  20. int xor_offset;
  21. uint32_t commit_pos;
  22. };
  23. struct bitmap_writer {
  24. struct ewah_bitmap *commits;
  25. struct ewah_bitmap *trees;
  26. struct ewah_bitmap *blobs;
  27. struct ewah_bitmap *tags;
  28. kh_oid_map_t *bitmaps;
  29. kh_oid_map_t *reused;
  30. struct packing_data *to_pack;
  31. struct bitmapped_commit *selected;
  32. unsigned int selected_nr, selected_alloc;
  33. struct progress *progress;
  34. int show_progress;
  35. unsigned char pack_checksum[GIT_MAX_RAWSZ];
  36. };
  37. static struct bitmap_writer writer;
  38. void bitmap_writer_show_progress(int show)
  39. {
  40. writer.show_progress = show;
  41. }
  42. /**
  43. * Build the initial type index for the packfile
  44. */
  45. void bitmap_writer_build_type_index(struct packing_data *to_pack,
  46. struct pack_idx_entry **index,
  47. uint32_t index_nr)
  48. {
  49. uint32_t i;
  50. writer.commits = ewah_new();
  51. writer.trees = ewah_new();
  52. writer.blobs = ewah_new();
  53. writer.tags = ewah_new();
  54. ALLOC_ARRAY(to_pack->in_pack_pos, to_pack->nr_objects);
  55. for (i = 0; i < index_nr; ++i) {
  56. struct object_entry *entry = (struct object_entry *)index[i];
  57. enum object_type real_type;
  58. oe_set_in_pack_pos(to_pack, entry, i);
  59. switch (oe_type(entry)) {
  60. case OBJ_COMMIT:
  61. case OBJ_TREE:
  62. case OBJ_BLOB:
  63. case OBJ_TAG:
  64. real_type = oe_type(entry);
  65. break;
  66. default:
  67. real_type = oid_object_info(to_pack->repo,
  68. &entry->idx.oid, NULL);
  69. break;
  70. }
  71. switch (real_type) {
  72. case OBJ_COMMIT:
  73. ewah_set(writer.commits, i);
  74. break;
  75. case OBJ_TREE:
  76. ewah_set(writer.trees, i);
  77. break;
  78. case OBJ_BLOB:
  79. ewah_set(writer.blobs, i);
  80. break;
  81. case OBJ_TAG:
  82. ewah_set(writer.tags, i);
  83. break;
  84. default:
  85. die("Missing type information for %s (%d/%d)",
  86. oid_to_hex(&entry->idx.oid), real_type,
  87. oe_type(entry));
  88. }
  89. }
  90. }
  91. /**
  92. * Compute the actual bitmaps
  93. */
  94. static struct object **seen_objects;
  95. static unsigned int seen_objects_nr, seen_objects_alloc;
  96. static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitmap *reused)
  97. {
  98. if (writer.selected_nr >= writer.selected_alloc) {
  99. writer.selected_alloc = (writer.selected_alloc + 32) * 2;
  100. REALLOC_ARRAY(writer.selected, writer.selected_alloc);
  101. }
  102. writer.selected[writer.selected_nr].commit = commit;
  103. writer.selected[writer.selected_nr].bitmap = reused;
  104. writer.selected[writer.selected_nr].flags = 0;
  105. writer.selected_nr++;
  106. }
  107. static inline void mark_as_seen(struct object *object)
  108. {
  109. ALLOC_GROW(seen_objects, seen_objects_nr + 1, seen_objects_alloc);
  110. seen_objects[seen_objects_nr++] = object;
  111. }
  112. static inline void reset_all_seen(void)
  113. {
  114. unsigned int i;
  115. for (i = 0; i < seen_objects_nr; ++i) {
  116. seen_objects[i]->flags &= ~(SEEN | ADDED | SHOWN);
  117. }
  118. seen_objects_nr = 0;
  119. }
  120. static uint32_t find_object_pos(const struct object_id *oid)
  121. {
  122. struct object_entry *entry = packlist_find(writer.to_pack, oid);
  123. if (!entry) {
  124. die("Failed to write bitmap index. Packfile doesn't have full closure "
  125. "(object %s is missing)", oid_to_hex(oid));
  126. }
  127. return oe_in_pack_pos(writer.to_pack, entry);
  128. }
  129. static void show_object(struct object *object, const char *name, void *data)
  130. {
  131. struct bitmap *base = data;
  132. bitmap_set(base, find_object_pos(&object->oid));
  133. mark_as_seen(object);
  134. }
  135. static void show_commit(struct commit *commit, void *data)
  136. {
  137. mark_as_seen((struct object *)commit);
  138. }
  139. static int
  140. add_to_include_set(struct bitmap *base, struct commit *commit)
  141. {
  142. khiter_t hash_pos;
  143. uint32_t bitmap_pos = find_object_pos(&commit->object.oid);
  144. if (bitmap_get(base, bitmap_pos))
  145. return 0;
  146. hash_pos = kh_get_oid_map(writer.bitmaps, commit->object.oid);
  147. if (hash_pos < kh_end(writer.bitmaps)) {
  148. struct bitmapped_commit *bc = kh_value(writer.bitmaps, hash_pos);
  149. bitmap_or_ewah(base, bc->bitmap);
  150. return 0;
  151. }
  152. bitmap_set(base, bitmap_pos);
  153. return 1;
  154. }
  155. static int
  156. should_include(struct commit *commit, void *_data)
  157. {
  158. struct bitmap *base = _data;
  159. if (!add_to_include_set(base, commit)) {
  160. struct commit_list *parent = commit->parents;
  161. mark_as_seen((struct object *)commit);
  162. while (parent) {
  163. parent->item->object.flags |= SEEN;
  164. mark_as_seen((struct object *)parent->item);
  165. parent = parent->next;
  166. }
  167. return 0;
  168. }
  169. return 1;
  170. }
  171. static void compute_xor_offsets(void)
  172. {
  173. static const int MAX_XOR_OFFSET_SEARCH = 10;
  174. int i, next = 0;
  175. while (next < writer.selected_nr) {
  176. struct bitmapped_commit *stored = &writer.selected[next];
  177. int best_offset = 0;
  178. struct ewah_bitmap *best_bitmap = stored->bitmap;
  179. struct ewah_bitmap *test_xor;
  180. for (i = 1; i <= MAX_XOR_OFFSET_SEARCH; ++i) {
  181. int curr = next - i;
  182. if (curr < 0)
  183. break;
  184. test_xor = ewah_pool_new();
  185. ewah_xor(writer.selected[curr].bitmap, stored->bitmap, test_xor);
  186. if (test_xor->buffer_size < best_bitmap->buffer_size) {
  187. if (best_bitmap != stored->bitmap)
  188. ewah_pool_free(best_bitmap);
  189. best_bitmap = test_xor;
  190. best_offset = i;
  191. } else {
  192. ewah_pool_free(test_xor);
  193. }
  194. }
  195. stored->xor_offset = best_offset;
  196. stored->write_as = best_bitmap;
  197. next++;
  198. }
  199. }
  200. void bitmap_writer_build(struct packing_data *to_pack)
  201. {
  202. static const double REUSE_BITMAP_THRESHOLD = 0.2;
  203. int i, reuse_after, need_reset;
  204. struct bitmap *base = bitmap_new();
  205. struct rev_info revs;
  206. writer.bitmaps = kh_init_oid_map();
  207. writer.to_pack = to_pack;
  208. if (writer.show_progress)
  209. writer.progress = start_progress("Building bitmaps", writer.selected_nr);
  210. repo_init_revisions(to_pack->repo, &revs, NULL);
  211. revs.tag_objects = 1;
  212. revs.tree_objects = 1;
  213. revs.blob_objects = 1;
  214. revs.no_walk = 0;
  215. revs.include_check = should_include;
  216. reset_revision_walk();
  217. reuse_after = writer.selected_nr * REUSE_BITMAP_THRESHOLD;
  218. need_reset = 0;
  219. for (i = writer.selected_nr - 1; i >= 0; --i) {
  220. struct bitmapped_commit *stored;
  221. struct object *object;
  222. khiter_t hash_pos;
  223. int hash_ret;
  224. stored = &writer.selected[i];
  225. object = (struct object *)stored->commit;
  226. if (stored->bitmap == NULL) {
  227. if (i < writer.selected_nr - 1 &&
  228. (need_reset ||
  229. !in_merge_bases(writer.selected[i + 1].commit,
  230. stored->commit))) {
  231. bitmap_reset(base);
  232. reset_all_seen();
  233. }
  234. add_pending_object(&revs, object, "");
  235. revs.include_check_data = base;
  236. if (prepare_revision_walk(&revs))
  237. die("revision walk setup failed");
  238. traverse_commit_list(&revs, show_commit, show_object, base);
  239. object_array_clear(&revs.pending);
  240. stored->bitmap = bitmap_to_ewah(base);
  241. need_reset = 0;
  242. } else
  243. need_reset = 1;
  244. if (i >= reuse_after)
  245. stored->flags |= BITMAP_FLAG_REUSE;
  246. hash_pos = kh_put_oid_map(writer.bitmaps, object->oid, &hash_ret);
  247. if (hash_ret == 0)
  248. die("Duplicate entry when writing index: %s",
  249. oid_to_hex(&object->oid));
  250. kh_value(writer.bitmaps, hash_pos) = stored;
  251. display_progress(writer.progress, writer.selected_nr - i);
  252. }
  253. bitmap_free(base);
  254. stop_progress(&writer.progress);
  255. compute_xor_offsets();
  256. }
  257. /**
  258. * Select the commits that will be bitmapped
  259. */
  260. static inline unsigned int next_commit_index(unsigned int idx)
  261. {
  262. static const unsigned int MIN_COMMITS = 100;
  263. static const unsigned int MAX_COMMITS = 5000;
  264. static const unsigned int MUST_REGION = 100;
  265. static const unsigned int MIN_REGION = 20000;
  266. unsigned int offset, next;
  267. if (idx <= MUST_REGION)
  268. return 0;
  269. if (idx <= MIN_REGION) {
  270. offset = idx - MUST_REGION;
  271. return (offset < MIN_COMMITS) ? offset : MIN_COMMITS;
  272. }
  273. offset = idx - MIN_REGION;
  274. next = (offset < MAX_COMMITS) ? offset : MAX_COMMITS;
  275. return (next > MIN_COMMITS) ? next : MIN_COMMITS;
  276. }
  277. static int date_compare(const void *_a, const void *_b)
  278. {
  279. struct commit *a = *(struct commit **)_a;
  280. struct commit *b = *(struct commit **)_b;
  281. return (long)b->date - (long)a->date;
  282. }
  283. void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack)
  284. {
  285. struct bitmap_index *bitmap_git;
  286. if (!(bitmap_git = prepare_bitmap_git(to_pack->repo)))
  287. return;
  288. writer.reused = kh_init_oid_map();
  289. rebuild_existing_bitmaps(bitmap_git, to_pack, writer.reused,
  290. writer.show_progress);
  291. /*
  292. * NEEDSWORK: rebuild_existing_bitmaps() makes writer.reused reference
  293. * some bitmaps in bitmap_git, so we can't free the latter.
  294. */
  295. }
  296. static struct ewah_bitmap *find_reused_bitmap(const struct object_id *oid)
  297. {
  298. khiter_t hash_pos;
  299. if (!writer.reused)
  300. return NULL;
  301. hash_pos = kh_get_oid_map(writer.reused, *oid);
  302. if (hash_pos >= kh_end(writer.reused))
  303. return NULL;
  304. return kh_value(writer.reused, hash_pos);
  305. }
  306. void bitmap_writer_select_commits(struct commit **indexed_commits,
  307. unsigned int indexed_commits_nr,
  308. int max_bitmaps)
  309. {
  310. unsigned int i = 0, j, next;
  311. QSORT(indexed_commits, indexed_commits_nr, date_compare);
  312. if (writer.show_progress)
  313. writer.progress = start_progress("Selecting bitmap commits", 0);
  314. if (indexed_commits_nr < 100) {
  315. for (i = 0; i < indexed_commits_nr; ++i)
  316. push_bitmapped_commit(indexed_commits[i], NULL);
  317. return;
  318. }
  319. for (;;) {
  320. struct ewah_bitmap *reused_bitmap = NULL;
  321. struct commit *chosen = NULL;
  322. next = next_commit_index(i);
  323. if (i + next >= indexed_commits_nr)
  324. break;
  325. if (max_bitmaps > 0 && writer.selected_nr >= max_bitmaps) {
  326. writer.selected_nr = max_bitmaps;
  327. break;
  328. }
  329. if (next == 0) {
  330. chosen = indexed_commits[i];
  331. reused_bitmap = find_reused_bitmap(&chosen->object.oid);
  332. } else {
  333. chosen = indexed_commits[i + next];
  334. for (j = 0; j <= next; ++j) {
  335. struct commit *cm = indexed_commits[i + j];
  336. reused_bitmap = find_reused_bitmap(&cm->object.oid);
  337. if (reused_bitmap || (cm->object.flags & NEEDS_BITMAP) != 0) {
  338. chosen = cm;
  339. break;
  340. }
  341. if (cm->parents && cm->parents->next)
  342. chosen = cm;
  343. }
  344. }
  345. push_bitmapped_commit(chosen, reused_bitmap);
  346. i += next + 1;
  347. display_progress(writer.progress, i);
  348. }
  349. stop_progress(&writer.progress);
  350. }
  351. static int hashwrite_ewah_helper(void *f, const void *buf, size_t len)
  352. {
  353. /* hashwrite will die on error */
  354. hashwrite(f, buf, len);
  355. return len;
  356. }
  357. /**
  358. * Write the bitmap index to disk
  359. */
  360. static inline void dump_bitmap(struct hashfile *f, struct ewah_bitmap *bitmap)
  361. {
  362. if (ewah_serialize_to(bitmap, hashwrite_ewah_helper, f) < 0)
  363. die("Failed to write bitmap index");
  364. }
  365. static const unsigned char *sha1_access(size_t pos, void *table)
  366. {
  367. struct pack_idx_entry **index = table;
  368. return index[pos]->oid.hash;
  369. }
  370. static void write_selected_commits_v1(struct hashfile *f,
  371. struct pack_idx_entry **index,
  372. uint32_t index_nr)
  373. {
  374. int i;
  375. for (i = 0; i < writer.selected_nr; ++i) {
  376. struct bitmapped_commit *stored = &writer.selected[i];
  377. int commit_pos =
  378. sha1_pos(stored->commit->object.oid.hash, index, index_nr, sha1_access);
  379. if (commit_pos < 0)
  380. BUG("trying to write commit not in index");
  381. hashwrite_be32(f, commit_pos);
  382. hashwrite_u8(f, stored->xor_offset);
  383. hashwrite_u8(f, stored->flags);
  384. dump_bitmap(f, stored->write_as);
  385. }
  386. }
  387. static void write_hash_cache(struct hashfile *f,
  388. struct pack_idx_entry **index,
  389. uint32_t index_nr)
  390. {
  391. uint32_t i;
  392. for (i = 0; i < index_nr; ++i) {
  393. struct object_entry *entry = (struct object_entry *)index[i];
  394. uint32_t hash_value = htonl(entry->hash);
  395. hashwrite(f, &hash_value, sizeof(hash_value));
  396. }
  397. }
  398. void bitmap_writer_set_checksum(unsigned char *sha1)
  399. {
  400. hashcpy(writer.pack_checksum, sha1);
  401. }
  402. void bitmap_writer_finish(struct pack_idx_entry **index,
  403. uint32_t index_nr,
  404. const char *filename,
  405. uint16_t options)
  406. {
  407. static uint16_t default_version = 1;
  408. static uint16_t flags = BITMAP_OPT_FULL_DAG;
  409. struct strbuf tmp_file = STRBUF_INIT;
  410. struct hashfile *f;
  411. struct bitmap_disk_header header;
  412. int fd = odb_mkstemp(&tmp_file, "pack/tmp_bitmap_XXXXXX");
  413. f = hashfd(fd, tmp_file.buf);
  414. memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE));
  415. header.version = htons(default_version);
  416. header.options = htons(flags | options);
  417. header.entry_count = htonl(writer.selected_nr);
  418. hashcpy(header.checksum, writer.pack_checksum);
  419. hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz);
  420. dump_bitmap(f, writer.commits);
  421. dump_bitmap(f, writer.trees);
  422. dump_bitmap(f, writer.blobs);
  423. dump_bitmap(f, writer.tags);
  424. write_selected_commits_v1(f, index, index_nr);
  425. if (options & BITMAP_OPT_HASH_CACHE)
  426. write_hash_cache(f, index, index_nr);
  427. finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
  428. if (adjust_shared_perm(tmp_file.buf))
  429. die_errno("unable to make temporary bitmap file readable");
  430. if (rename(tmp_file.buf, filename))
  431. die_errno("unable to rename temporary bitmap file to '%s'", filename);
  432. strbuf_release(&tmp_file);
  433. }