THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3642 lines
98KB

  1. /*
  2. * GIT - The information manager from hell
  3. *
  4. * Copyright (C) Linus Torvalds, 2005
  5. */
  6. #include "cache.h"
  7. #include "config.h"
  8. #include "diff.h"
  9. #include "diffcore.h"
  10. #include "tempfile.h"
  11. #include "lockfile.h"
  12. #include "cache-tree.h"
  13. #include "refs.h"
  14. #include "dir.h"
  15. #include "object-store.h"
  16. #include "tree.h"
  17. #include "commit.h"
  18. #include "blob.h"
  19. #include "resolve-undo.h"
  20. #include "run-command.h"
  21. #include "strbuf.h"
  22. #include "varint.h"
  23. #include "split-index.h"
  24. #include "utf8.h"
  25. #include "fsmonitor.h"
  26. #include "thread-utils.h"
  27. #include "progress.h"
  28. /* Mask for the name length in ce_flags in the on-disk index */
  29. #define CE_NAMEMASK (0x0fff)
  30. /* Index extensions.
  31. *
  32. * The first letter should be 'A'..'Z' for extensions that are not
  33. * necessary for a correct operation (i.e. optimization data).
  34. * When new extensions are added that _needs_ to be understood in
  35. * order to correctly interpret the index file, pick character that
  36. * is outside the range, to cause the reader to abort.
  37. */
  38. #define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
  39. #define CACHE_EXT_TREE 0x54524545 /* "TREE" */
  40. #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
  41. #define CACHE_EXT_LINK 0x6c696e6b /* "link" */
  42. #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */
  43. #define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */
  44. #define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */
  45. #define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */
  46. /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
  47. #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
  48. CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
  49. SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | FSMONITOR_CHANGED)
  50. /*
  51. * This is an estimate of the pathname length in the index. We use
  52. * this for V4 index files to guess the un-deltafied size of the index
  53. * in memory because of pathname deltafication. This is not required
  54. * for V2/V3 index formats because their pathnames are not compressed.
  55. * If the initial amount of memory set aside is not sufficient, the
  56. * mem pool will allocate extra memory.
  57. */
  58. #define CACHE_ENTRY_PATH_LENGTH 80
  59. static inline struct cache_entry *mem_pool__ce_alloc(struct mem_pool *mem_pool, size_t len)
  60. {
  61. struct cache_entry *ce;
  62. ce = mem_pool_alloc(mem_pool, cache_entry_size(len));
  63. ce->mem_pool_allocated = 1;
  64. return ce;
  65. }
  66. static inline struct cache_entry *mem_pool__ce_calloc(struct mem_pool *mem_pool, size_t len)
  67. {
  68. struct cache_entry * ce;
  69. ce = mem_pool_calloc(mem_pool, 1, cache_entry_size(len));
  70. ce->mem_pool_allocated = 1;
  71. return ce;
  72. }
  73. static struct mem_pool *find_mem_pool(struct index_state *istate)
  74. {
  75. struct mem_pool **pool_ptr;
  76. if (istate->split_index && istate->split_index->base)
  77. pool_ptr = &istate->split_index->base->ce_mem_pool;
  78. else
  79. pool_ptr = &istate->ce_mem_pool;
  80. if (!*pool_ptr)
  81. mem_pool_init(pool_ptr, 0);
  82. return *pool_ptr;
  83. }
  84. static const char *alternate_index_output;
  85. static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
  86. {
  87. istate->cache[nr] = ce;
  88. add_name_hash(istate, ce);
  89. }
  90. static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
  91. {
  92. struct cache_entry *old = istate->cache[nr];
  93. replace_index_entry_in_base(istate, old, ce);
  94. remove_name_hash(istate, old);
  95. discard_cache_entry(old);
  96. ce->ce_flags &= ~CE_HASHED;
  97. set_index_entry(istate, nr, ce);
  98. ce->ce_flags |= CE_UPDATE_IN_BASE;
  99. mark_fsmonitor_invalid(istate, ce);
  100. istate->cache_changed |= CE_ENTRY_CHANGED;
  101. }
  102. void rename_index_entry_at(struct index_state *istate, int nr, const char *new_name)
  103. {
  104. struct cache_entry *old_entry = istate->cache[nr], *new_entry;
  105. int namelen = strlen(new_name);
  106. new_entry = make_empty_cache_entry(istate, namelen);
  107. copy_cache_entry(new_entry, old_entry);
  108. new_entry->ce_flags &= ~CE_HASHED;
  109. new_entry->ce_namelen = namelen;
  110. new_entry->index = 0;
  111. memcpy(new_entry->name, new_name, namelen + 1);
  112. cache_tree_invalidate_path(istate, old_entry->name);
  113. untracked_cache_remove_from_index(istate, old_entry->name);
  114. remove_index_entry_at(istate, nr);
  115. add_index_entry(istate, new_entry, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
  116. }
  117. void fill_stat_data(struct stat_data *sd, struct stat *st)
  118. {
  119. sd->sd_ctime.sec = (unsigned int)st->st_ctime;
  120. sd->sd_mtime.sec = (unsigned int)st->st_mtime;
  121. sd->sd_ctime.nsec = ST_CTIME_NSEC(*st);
  122. sd->sd_mtime.nsec = ST_MTIME_NSEC(*st);
  123. sd->sd_dev = st->st_dev;
  124. sd->sd_ino = st->st_ino;
  125. sd->sd_uid = st->st_uid;
  126. sd->sd_gid = st->st_gid;
  127. sd->sd_size = st->st_size;
  128. }
  129. int match_stat_data(const struct stat_data *sd, struct stat *st)
  130. {
  131. int changed = 0;
  132. if (sd->sd_mtime.sec != (unsigned int)st->st_mtime)
  133. changed |= MTIME_CHANGED;
  134. if (trust_ctime && check_stat &&
  135. sd->sd_ctime.sec != (unsigned int)st->st_ctime)
  136. changed |= CTIME_CHANGED;
  137. #ifdef USE_NSEC
  138. if (check_stat && sd->sd_mtime.nsec != ST_MTIME_NSEC(*st))
  139. changed |= MTIME_CHANGED;
  140. if (trust_ctime && check_stat &&
  141. sd->sd_ctime.nsec != ST_CTIME_NSEC(*st))
  142. changed |= CTIME_CHANGED;
  143. #endif
  144. if (check_stat) {
  145. if (sd->sd_uid != (unsigned int) st->st_uid ||
  146. sd->sd_gid != (unsigned int) st->st_gid)
  147. changed |= OWNER_CHANGED;
  148. if (sd->sd_ino != (unsigned int) st->st_ino)
  149. changed |= INODE_CHANGED;
  150. }
  151. #ifdef USE_STDEV
  152. /*
  153. * st_dev breaks on network filesystems where different
  154. * clients will have different views of what "device"
  155. * the filesystem is on
  156. */
  157. if (check_stat && sd->sd_dev != (unsigned int) st->st_dev)
  158. changed |= INODE_CHANGED;
  159. #endif
  160. if (sd->sd_size != (unsigned int) st->st_size)
  161. changed |= DATA_CHANGED;
  162. return changed;
  163. }
  164. /*
  165. * This only updates the "non-critical" parts of the directory
  166. * cache, ie the parts that aren't tracked by GIT, and only used
  167. * to validate the cache.
  168. */
  169. void fill_stat_cache_info(struct index_state *istate, struct cache_entry *ce, struct stat *st)
  170. {
  171. fill_stat_data(&ce->ce_stat_data, st);
  172. if (assume_unchanged)
  173. ce->ce_flags |= CE_VALID;
  174. if (S_ISREG(st->st_mode)) {
  175. ce_mark_uptodate(ce);
  176. mark_fsmonitor_valid(istate, ce);
  177. }
  178. }
  179. static int ce_compare_data(struct index_state *istate,
  180. const struct cache_entry *ce,
  181. struct stat *st)
  182. {
  183. int match = -1;
  184. int fd = git_open_cloexec(ce->name, O_RDONLY);
  185. if (fd >= 0) {
  186. struct object_id oid;
  187. if (!index_fd(istate, &oid, fd, st, OBJ_BLOB, ce->name, 0))
  188. match = !oideq(&oid, &ce->oid);
  189. /* index_fd() closed the file descriptor already */
  190. }
  191. return match;
  192. }
  193. static int ce_compare_link(const struct cache_entry *ce, size_t expected_size)
  194. {
  195. int match = -1;
  196. void *buffer;
  197. unsigned long size;
  198. enum object_type type;
  199. struct strbuf sb = STRBUF_INIT;
  200. if (strbuf_readlink(&sb, ce->name, expected_size))
  201. return -1;
  202. buffer = read_object_file(&ce->oid, &type, &size);
  203. if (buffer) {
  204. if (size == sb.len)
  205. match = memcmp(buffer, sb.buf, size);
  206. free(buffer);
  207. }
  208. strbuf_release(&sb);
  209. return match;
  210. }
  211. static int ce_compare_gitlink(const struct cache_entry *ce)
  212. {
  213. struct object_id oid;
  214. /*
  215. * We don't actually require that the .git directory
  216. * under GITLINK directory be a valid git directory. It
  217. * might even be missing (in case nobody populated that
  218. * sub-project).
  219. *
  220. * If so, we consider it always to match.
  221. */
  222. if (resolve_gitlink_ref(ce->name, "HEAD", &oid) < 0)
  223. return 0;
  224. return !oideq(&oid, &ce->oid);
  225. }
  226. static int ce_modified_check_fs(struct index_state *istate,
  227. const struct cache_entry *ce,
  228. struct stat *st)
  229. {
  230. switch (st->st_mode & S_IFMT) {
  231. case S_IFREG:
  232. if (ce_compare_data(istate, ce, st))
  233. return DATA_CHANGED;
  234. break;
  235. case S_IFLNK:
  236. if (ce_compare_link(ce, xsize_t(st->st_size)))
  237. return DATA_CHANGED;
  238. break;
  239. case S_IFDIR:
  240. if (S_ISGITLINK(ce->ce_mode))
  241. return ce_compare_gitlink(ce) ? DATA_CHANGED : 0;
  242. /* else fallthrough */
  243. default:
  244. return TYPE_CHANGED;
  245. }
  246. return 0;
  247. }
  248. static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st)
  249. {
  250. unsigned int changed = 0;
  251. if (ce->ce_flags & CE_REMOVE)
  252. return MODE_CHANGED | DATA_CHANGED | TYPE_CHANGED;
  253. switch (ce->ce_mode & S_IFMT) {
  254. case S_IFREG:
  255. changed |= !S_ISREG(st->st_mode) ? TYPE_CHANGED : 0;
  256. /* We consider only the owner x bit to be relevant for
  257. * "mode changes"
  258. */
  259. if (trust_executable_bit &&
  260. (0100 & (ce->ce_mode ^ st->st_mode)))
  261. changed |= MODE_CHANGED;
  262. break;
  263. case S_IFLNK:
  264. if (!S_ISLNK(st->st_mode) &&
  265. (has_symlinks || !S_ISREG(st->st_mode)))
  266. changed |= TYPE_CHANGED;
  267. break;
  268. case S_IFGITLINK:
  269. /* We ignore most of the st_xxx fields for gitlinks */
  270. if (!S_ISDIR(st->st_mode))
  271. changed |= TYPE_CHANGED;
  272. else if (ce_compare_gitlink(ce))
  273. changed |= DATA_CHANGED;
  274. return changed;
  275. default:
  276. BUG("unsupported ce_mode: %o", ce->ce_mode);
  277. }
  278. changed |= match_stat_data(&ce->ce_stat_data, st);
  279. /* Racily smudged entry? */
  280. if (!ce->ce_stat_data.sd_size) {
  281. if (!is_empty_blob_sha1(ce->oid.hash))
  282. changed |= DATA_CHANGED;
  283. }
  284. return changed;
  285. }
  286. static int is_racy_stat(const struct index_state *istate,
  287. const struct stat_data *sd)
  288. {
  289. return (istate->timestamp.sec &&
  290. #ifdef USE_NSEC
  291. /* nanosecond timestamped files can also be racy! */
  292. (istate->timestamp.sec < sd->sd_mtime.sec ||
  293. (istate->timestamp.sec == sd->sd_mtime.sec &&
  294. istate->timestamp.nsec <= sd->sd_mtime.nsec))
  295. #else
  296. istate->timestamp.sec <= sd->sd_mtime.sec
  297. #endif
  298. );
  299. }
  300. int is_racy_timestamp(const struct index_state *istate,
  301. const struct cache_entry *ce)
  302. {
  303. return (!S_ISGITLINK(ce->ce_mode) &&
  304. is_racy_stat(istate, &ce->ce_stat_data));
  305. }
  306. int match_stat_data_racy(const struct index_state *istate,
  307. const struct stat_data *sd, struct stat *st)
  308. {
  309. if (is_racy_stat(istate, sd))
  310. return MTIME_CHANGED;
  311. return match_stat_data(sd, st);
  312. }
  313. int ie_match_stat(struct index_state *istate,
  314. const struct cache_entry *ce, struct stat *st,
  315. unsigned int options)
  316. {
  317. unsigned int changed;
  318. int ignore_valid = options & CE_MATCH_IGNORE_VALID;
  319. int ignore_skip_worktree = options & CE_MATCH_IGNORE_SKIP_WORKTREE;
  320. int assume_racy_is_modified = options & CE_MATCH_RACY_IS_DIRTY;
  321. int ignore_fsmonitor = options & CE_MATCH_IGNORE_FSMONITOR;
  322. if (!ignore_fsmonitor)
  323. refresh_fsmonitor(istate);
  324. /*
  325. * If it's marked as always valid in the index, it's
  326. * valid whatever the checked-out copy says.
  327. *
  328. * skip-worktree has the same effect with higher precedence
  329. */
  330. if (!ignore_skip_worktree && ce_skip_worktree(ce))
  331. return 0;
  332. if (!ignore_valid && (ce->ce_flags & CE_VALID))
  333. return 0;
  334. if (!ignore_fsmonitor && (ce->ce_flags & CE_FSMONITOR_VALID))
  335. return 0;
  336. /*
  337. * Intent-to-add entries have not been added, so the index entry
  338. * by definition never matches what is in the work tree until it
  339. * actually gets added.
  340. */
  341. if (ce_intent_to_add(ce))
  342. return DATA_CHANGED | TYPE_CHANGED | MODE_CHANGED;
  343. changed = ce_match_stat_basic(ce, st);
  344. /*
  345. * Within 1 second of this sequence:
  346. * echo xyzzy >file && git-update-index --add file
  347. * running this command:
  348. * echo frotz >file
  349. * would give a falsely clean cache entry. The mtime and
  350. * length match the cache, and other stat fields do not change.
  351. *
  352. * We could detect this at update-index time (the cache entry
  353. * being registered/updated records the same time as "now")
  354. * and delay the return from git-update-index, but that would
  355. * effectively mean we can make at most one commit per second,
  356. * which is not acceptable. Instead, we check cache entries
  357. * whose mtime are the same as the index file timestamp more
  358. * carefully than others.
  359. */
  360. if (!changed && is_racy_timestamp(istate, ce)) {
  361. if (assume_racy_is_modified)
  362. changed |= DATA_CHANGED;
  363. else
  364. changed |= ce_modified_check_fs(istate, ce, st);
  365. }
  366. return changed;
  367. }
  368. int ie_modified(struct index_state *istate,
  369. const struct cache_entry *ce,
  370. struct stat *st, unsigned int options)
  371. {
  372. int changed, changed_fs;
  373. changed = ie_match_stat(istate, ce, st, options);
  374. if (!changed)
  375. return 0;
  376. /*
  377. * If the mode or type has changed, there's no point in trying
  378. * to refresh the entry - it's not going to match
  379. */
  380. if (changed & (MODE_CHANGED | TYPE_CHANGED))
  381. return changed;
  382. /*
  383. * Immediately after read-tree or update-index --cacheinfo,
  384. * the length field is zero, as we have never even read the
  385. * lstat(2) information once, and we cannot trust DATA_CHANGED
  386. * returned by ie_match_stat() which in turn was returned by
  387. * ce_match_stat_basic() to signal that the filesize of the
  388. * blob changed. We have to actually go to the filesystem to
  389. * see if the contents match, and if so, should answer "unchanged".
  390. *
  391. * The logic does not apply to gitlinks, as ce_match_stat_basic()
  392. * already has checked the actual HEAD from the filesystem in the
  393. * subproject. If ie_match_stat() already said it is different,
  394. * then we know it is.
  395. */
  396. if ((changed & DATA_CHANGED) &&
  397. (S_ISGITLINK(ce->ce_mode) || ce->ce_stat_data.sd_size != 0))
  398. return changed;
  399. changed_fs = ce_modified_check_fs(istate, ce, st);
  400. if (changed_fs)
  401. return changed | changed_fs;
  402. return 0;
  403. }
  404. int base_name_compare(const char *name1, int len1, int mode1,
  405. const char *name2, int len2, int mode2)
  406. {
  407. unsigned char c1, c2;
  408. int len = len1 < len2 ? len1 : len2;
  409. int cmp;
  410. cmp = memcmp(name1, name2, len);
  411. if (cmp)
  412. return cmp;
  413. c1 = name1[len];
  414. c2 = name2[len];
  415. if (!c1 && S_ISDIR(mode1))
  416. c1 = '/';
  417. if (!c2 && S_ISDIR(mode2))
  418. c2 = '/';
  419. return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
  420. }
  421. /*
  422. * df_name_compare() is identical to base_name_compare(), except it
  423. * compares conflicting directory/file entries as equal. Note that
  424. * while a directory name compares as equal to a regular file, they
  425. * then individually compare _differently_ to a filename that has
  426. * a dot after the basename (because '\0' < '.' < '/').
  427. *
  428. * This is used by routines that want to traverse the git namespace
  429. * but then handle conflicting entries together when possible.
  430. */
  431. int df_name_compare(const char *name1, int len1, int mode1,
  432. const char *name2, int len2, int mode2)
  433. {
  434. int len = len1 < len2 ? len1 : len2, cmp;
  435. unsigned char c1, c2;
  436. cmp = memcmp(name1, name2, len);
  437. if (cmp)
  438. return cmp;
  439. /* Directories and files compare equal (same length, same name) */
  440. if (len1 == len2)
  441. return 0;
  442. c1 = name1[len];
  443. if (!c1 && S_ISDIR(mode1))
  444. c1 = '/';
  445. c2 = name2[len];
  446. if (!c2 && S_ISDIR(mode2))
  447. c2 = '/';
  448. if (c1 == '/' && !c2)
  449. return 0;
  450. if (c2 == '/' && !c1)
  451. return 0;
  452. return c1 - c2;
  453. }
  454. int name_compare(const char *name1, size_t len1, const char *name2, size_t len2)
  455. {
  456. size_t min_len = (len1 < len2) ? len1 : len2;
  457. int cmp = memcmp(name1, name2, min_len);
  458. if (cmp)
  459. return cmp;
  460. if (len1 < len2)
  461. return -1;
  462. if (len1 > len2)
  463. return 1;
  464. return 0;
  465. }
  466. int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2)
  467. {
  468. int cmp;
  469. cmp = name_compare(name1, len1, name2, len2);
  470. if (cmp)
  471. return cmp;
  472. if (stage1 < stage2)
  473. return -1;
  474. if (stage1 > stage2)
  475. return 1;
  476. return 0;
  477. }
  478. static int index_name_stage_pos(const struct index_state *istate, const char *name, int namelen, int stage)
  479. {
  480. int first, last;
  481. first = 0;
  482. last = istate->cache_nr;
  483. while (last > first) {
  484. int next = first + ((last - first) >> 1);
  485. struct cache_entry *ce = istate->cache[next];
  486. int cmp = cache_name_stage_compare(name, namelen, stage, ce->name, ce_namelen(ce), ce_stage(ce));
  487. if (!cmp)
  488. return next;
  489. if (cmp < 0) {
  490. last = next;
  491. continue;
  492. }
  493. first = next+1;
  494. }
  495. return -first-1;
  496. }
  497. int index_name_pos(const struct index_state *istate, const char *name, int namelen)
  498. {
  499. return index_name_stage_pos(istate, name, namelen, 0);
  500. }
  501. int remove_index_entry_at(struct index_state *istate, int pos)
  502. {
  503. struct cache_entry *ce = istate->cache[pos];
  504. record_resolve_undo(istate, ce);
  505. remove_name_hash(istate, ce);
  506. save_or_free_index_entry(istate, ce);
  507. istate->cache_changed |= CE_ENTRY_REMOVED;
  508. istate->cache_nr--;
  509. if (pos >= istate->cache_nr)
  510. return 0;
  511. MOVE_ARRAY(istate->cache + pos, istate->cache + pos + 1,
  512. istate->cache_nr - pos);
  513. return 1;
  514. }
  515. /*
  516. * Remove all cache entries marked for removal, that is where
  517. * CE_REMOVE is set in ce_flags. This is much more effective than
  518. * calling remove_index_entry_at() for each entry to be removed.
  519. */
  520. void remove_marked_cache_entries(struct index_state *istate, int invalidate)
  521. {
  522. struct cache_entry **ce_array = istate->cache;
  523. unsigned int i, j;
  524. for (i = j = 0; i < istate->cache_nr; i++) {
  525. if (ce_array[i]->ce_flags & CE_REMOVE) {
  526. if (invalidate) {
  527. cache_tree_invalidate_path(istate,
  528. ce_array[i]->name);
  529. untracked_cache_remove_from_index(istate,
  530. ce_array[i]->name);
  531. }
  532. remove_name_hash(istate, ce_array[i]);
  533. save_or_free_index_entry(istate, ce_array[i]);
  534. }
  535. else
  536. ce_array[j++] = ce_array[i];
  537. }
  538. if (j == istate->cache_nr)
  539. return;
  540. istate->cache_changed |= CE_ENTRY_REMOVED;
  541. istate->cache_nr = j;
  542. }
  543. int remove_file_from_index(struct index_state *istate, const char *path)
  544. {
  545. int pos = index_name_pos(istate, path, strlen(path));
  546. if (pos < 0)
  547. pos = -pos-1;
  548. cache_tree_invalidate_path(istate, path);
  549. untracked_cache_remove_from_index(istate, path);
  550. while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path))
  551. remove_index_entry_at(istate, pos);
  552. return 0;
  553. }
  554. static int compare_name(struct cache_entry *ce, const char *path, int namelen)
  555. {
  556. return namelen != ce_namelen(ce) || memcmp(path, ce->name, namelen);
  557. }
  558. static int index_name_pos_also_unmerged(struct index_state *istate,
  559. const char *path, int namelen)
  560. {
  561. int pos = index_name_pos(istate, path, namelen);
  562. struct cache_entry *ce;
  563. if (pos >= 0)
  564. return pos;
  565. /* maybe unmerged? */
  566. pos = -1 - pos;
  567. if (pos >= istate->cache_nr ||
  568. compare_name((ce = istate->cache[pos]), path, namelen))
  569. return -1;
  570. /* order of preference: stage 2, 1, 3 */
  571. if (ce_stage(ce) == 1 && pos + 1 < istate->cache_nr &&
  572. ce_stage((ce = istate->cache[pos + 1])) == 2 &&
  573. !compare_name(ce, path, namelen))
  574. pos++;
  575. return pos;
  576. }
  577. static int different_name(struct cache_entry *ce, struct cache_entry *alias)
  578. {
  579. int len = ce_namelen(ce);
  580. return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len);
  581. }
  582. /*
  583. * If we add a filename that aliases in the cache, we will use the
  584. * name that we already have - but we don't want to update the same
  585. * alias twice, because that implies that there were actually two
  586. * different files with aliasing names!
  587. *
  588. * So we use the CE_ADDED flag to verify that the alias was an old
  589. * one before we accept it as
  590. */
  591. static struct cache_entry *create_alias_ce(struct index_state *istate,
  592. struct cache_entry *ce,
  593. struct cache_entry *alias)
  594. {
  595. int len;
  596. struct cache_entry *new_entry;
  597. if (alias->ce_flags & CE_ADDED)
  598. die(_("will not add file alias '%s' ('%s' already exists in index)"),
  599. ce->name, alias->name);
  600. /* Ok, create the new entry using the name of the existing alias */
  601. len = ce_namelen(alias);
  602. new_entry = make_empty_cache_entry(istate, len);
  603. memcpy(new_entry->name, alias->name, len);
  604. copy_cache_entry(new_entry, ce);
  605. save_or_free_index_entry(istate, ce);
  606. return new_entry;
  607. }
  608. void set_object_name_for_intent_to_add_entry(struct cache_entry *ce)
  609. {
  610. struct object_id oid;
  611. if (write_object_file("", 0, blob_type, &oid))
  612. die(_("cannot create an empty blob in the object database"));
  613. oidcpy(&ce->oid, &oid);
  614. }
  615. int add_to_index(struct index_state *istate, const char *path, struct stat *st, int flags)
  616. {
  617. int namelen, was_same;
  618. mode_t st_mode = st->st_mode;
  619. struct cache_entry *ce, *alias = NULL;
  620. unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE|CE_MATCH_RACY_IS_DIRTY;
  621. int verbose = flags & (ADD_CACHE_VERBOSE | ADD_CACHE_PRETEND);
  622. int pretend = flags & ADD_CACHE_PRETEND;
  623. int intent_only = flags & ADD_CACHE_INTENT;
  624. int add_option = (ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE|
  625. (intent_only ? ADD_CACHE_NEW_ONLY : 0));
  626. int hash_flags = HASH_WRITE_OBJECT;
  627. struct object_id oid;
  628. if (flags & ADD_CACHE_RENORMALIZE)
  629. hash_flags |= HASH_RENORMALIZE;
  630. if (!S_ISREG(st_mode) && !S_ISLNK(st_mode) && !S_ISDIR(st_mode))
  631. return error(_("%s: can only add regular files, symbolic links or git-directories"), path);
  632. namelen = strlen(path);
  633. if (S_ISDIR(st_mode)) {
  634. if (resolve_gitlink_ref(path, "HEAD", &oid) < 0)
  635. return error(_("'%s' does not have a commit checked out"), path);
  636. while (namelen && path[namelen-1] == '/')
  637. namelen--;
  638. }
  639. ce = make_empty_cache_entry(istate, namelen);
  640. memcpy(ce->name, path, namelen);
  641. ce->ce_namelen = namelen;
  642. if (!intent_only)
  643. fill_stat_cache_info(istate, ce, st);
  644. else
  645. ce->ce_flags |= CE_INTENT_TO_ADD;
  646. if (trust_executable_bit && has_symlinks) {
  647. ce->ce_mode = create_ce_mode(st_mode);
  648. } else {
  649. /* If there is an existing entry, pick the mode bits and type
  650. * from it, otherwise assume unexecutable regular file.
  651. */
  652. struct cache_entry *ent;
  653. int pos = index_name_pos_also_unmerged(istate, path, namelen);
  654. ent = (0 <= pos) ? istate->cache[pos] : NULL;
  655. ce->ce_mode = ce_mode_from_stat(ent, st_mode);
  656. }
  657. /* When core.ignorecase=true, determine if a directory of the same name but differing
  658. * case already exists within the Git repository. If it does, ensure the directory
  659. * case of the file being added to the repository matches (is folded into) the existing
  660. * entry's directory case.
  661. */
  662. if (ignore_case) {
  663. adjust_dirname_case(istate, ce->name);
  664. }
  665. if (!(flags & ADD_CACHE_RENORMALIZE)) {
  666. alias = index_file_exists(istate, ce->name,
  667. ce_namelen(ce), ignore_case);
  668. if (alias &&
  669. !ce_stage(alias) &&
  670. !ie_match_stat(istate, alias, st, ce_option)) {
  671. /* Nothing changed, really */
  672. if (!S_ISGITLINK(alias->ce_mode))
  673. ce_mark_uptodate(alias);
  674. alias->ce_flags |= CE_ADDED;
  675. discard_cache_entry(ce);
  676. return 0;
  677. }
  678. }
  679. if (!intent_only) {
  680. if (index_path(istate, &ce->oid, path, st, hash_flags)) {
  681. discard_cache_entry(ce);
  682. return error(_("unable to index file '%s'"), path);
  683. }
  684. } else
  685. set_object_name_for_intent_to_add_entry(ce);
  686. if (ignore_case && alias && different_name(ce, alias))
  687. ce = create_alias_ce(istate, ce, alias);
  688. ce->ce_flags |= CE_ADDED;
  689. /* It was suspected to be racily clean, but it turns out to be Ok */
  690. was_same = (alias &&
  691. !ce_stage(alias) &&
  692. oideq(&alias->oid, &ce->oid) &&
  693. ce->ce_mode == alias->ce_mode);
  694. if (pretend)
  695. discard_cache_entry(ce);
  696. else if (add_index_entry(istate, ce, add_option)) {
  697. discard_cache_entry(ce);
  698. return error(_("unable to add '%s' to index"), path);
  699. }
  700. if (verbose && !was_same)
  701. printf("add '%s'\n", path);
  702. return 0;
  703. }
  704. int add_file_to_index(struct index_state *istate, const char *path, int flags)
  705. {
  706. struct stat st;
  707. if (lstat(path, &st))
  708. die_errno(_("unable to stat '%s'"), path);
  709. return add_to_index(istate, path, &st, flags);
  710. }
  711. struct cache_entry *make_empty_cache_entry(struct index_state *istate, size_t len)
  712. {
  713. return mem_pool__ce_calloc(find_mem_pool(istate), len);
  714. }
  715. struct cache_entry *make_empty_transient_cache_entry(size_t len)
  716. {
  717. return xcalloc(1, cache_entry_size(len));
  718. }
  719. struct cache_entry *make_cache_entry(struct index_state *istate,
  720. unsigned int mode,
  721. const struct object_id *oid,
  722. const char *path,
  723. int stage,
  724. unsigned int refresh_options)
  725. {
  726. struct cache_entry *ce, *ret;
  727. int len;
  728. if (!verify_path(path, mode)) {
  729. error(_("invalid path '%s'"), path);
  730. return NULL;
  731. }
  732. len = strlen(path);
  733. ce = make_empty_cache_entry(istate, len);
  734. oidcpy(&ce->oid, oid);
  735. memcpy(ce->name, path, len);
  736. ce->ce_flags = create_ce_flags(stage);
  737. ce->ce_namelen = len;
  738. ce->ce_mode = create_ce_mode(mode);
  739. ret = refresh_cache_entry(istate, ce, refresh_options);
  740. if (ret != ce)
  741. discard_cache_entry(ce);
  742. return ret;
  743. }
  744. struct cache_entry *make_transient_cache_entry(unsigned int mode, const struct object_id *oid,
  745. const char *path, int stage)
  746. {
  747. struct cache_entry *ce;
  748. int len;
  749. if (!verify_path(path, mode)) {
  750. error(_("invalid path '%s'"), path);
  751. return NULL;
  752. }
  753. len = strlen(path);
  754. ce = make_empty_transient_cache_entry(len);
  755. oidcpy(&ce->oid, oid);
  756. memcpy(ce->name, path, len);
  757. ce->ce_flags = create_ce_flags(stage);
  758. ce->ce_namelen = len;
  759. ce->ce_mode = create_ce_mode(mode);
  760. return ce;
  761. }
  762. /*
  763. * Chmod an index entry with either +x or -x.
  764. *
  765. * Returns -1 if the chmod for the particular cache entry failed (if it's
  766. * not a regular file), -2 if an invalid flip argument is passed in, 0
  767. * otherwise.
  768. */
  769. int chmod_index_entry(struct index_state *istate, struct cache_entry *ce,
  770. char flip)
  771. {
  772. if (!S_ISREG(ce->ce_mode))
  773. return -1;
  774. switch (flip) {
  775. case '+':
  776. ce->ce_mode |= 0111;
  777. break;
  778. case '-':
  779. ce->ce_mode &= ~0111;
  780. break;
  781. default:
  782. return -2;
  783. }
  784. cache_tree_invalidate_path(istate, ce->name);
  785. ce->ce_flags |= CE_UPDATE_IN_BASE;
  786. mark_fsmonitor_invalid(istate, ce);
  787. istate->cache_changed |= CE_ENTRY_CHANGED;
  788. return 0;
  789. }
  790. int ce_same_name(const struct cache_entry *a, const struct cache_entry *b)
  791. {
  792. int len = ce_namelen(a);
  793. return ce_namelen(b) == len && !memcmp(a->name, b->name, len);
  794. }
  795. /*
  796. * We fundamentally don't like some paths: we don't want
  797. * dot or dot-dot anywhere, and for obvious reasons don't
  798. * want to recurse into ".git" either.
  799. *
  800. * Also, we don't want double slashes or slashes at the
  801. * end that can make pathnames ambiguous.
  802. */
  803. static int verify_dotfile(const char *rest, unsigned mode)
  804. {
  805. /*
  806. * The first character was '.', but that
  807. * has already been discarded, we now test
  808. * the rest.
  809. */
  810. /* "." is not allowed */
  811. if (*rest == '\0' || is_dir_sep(*rest))
  812. return 0;
  813. switch (*rest) {
  814. /*
  815. * ".git" followed by NUL or slash is bad. Note that we match
  816. * case-insensitively here, even if ignore_case is not set.
  817. * This outlaws ".GIT" everywhere out of an abundance of caution,
  818. * since there's really no good reason to allow it.
  819. *
  820. * Once we've seen ".git", we can also find ".gitmodules", etc (also
  821. * case-insensitively).
  822. */
  823. case 'g':
  824. case 'G':
  825. if (rest[1] != 'i' && rest[1] != 'I')
  826. break;
  827. if (rest[2] != 't' && rest[2] != 'T')
  828. break;
  829. if (rest[3] == '\0' || is_dir_sep(rest[3]))
  830. return 0;
  831. if (S_ISLNK(mode)) {
  832. rest += 3;
  833. if (skip_iprefix(rest, "modules", &rest) &&
  834. (*rest == '\0' || is_dir_sep(*rest)))
  835. return 0;
  836. }
  837. break;
  838. case '.':
  839. if (rest[1] == '\0' || is_dir_sep(rest[1]))
  840. return 0;
  841. }
  842. return 1;
  843. }
  844. int verify_path(const char *path, unsigned mode)
  845. {
  846. char c;
  847. if (has_dos_drive_prefix(path))
  848. return 0;
  849. if (!is_valid_path(path))
  850. return 0;
  851. goto inside;
  852. for (;;) {
  853. if (!c)
  854. return 1;
  855. if (is_dir_sep(c)) {
  856. inside:
  857. if (protect_hfs) {
  858. if (is_hfs_dotgit(path))
  859. return 0;
  860. if (S_ISLNK(mode)) {
  861. if (is_hfs_dotgitmodules(path))
  862. return 0;
  863. }
  864. }
  865. if (protect_ntfs) {
  866. if (is_ntfs_dotgit(path))
  867. return 0;
  868. if (S_ISLNK(mode)) {
  869. if (is_ntfs_dotgitmodules(path))
  870. return 0;
  871. }
  872. }
  873. c = *path++;
  874. if ((c == '.' && !verify_dotfile(path, mode)) ||
  875. is_dir_sep(c) || c == '\0')
  876. return 0;
  877. } else if (c == '\\' && protect_ntfs) {
  878. if (is_ntfs_dotgit(path))
  879. return 0;
  880. if (S_ISLNK(mode)) {
  881. if (is_ntfs_dotgitmodules(path))
  882. return 0;
  883. }
  884. }
  885. c = *path++;
  886. }
  887. }
  888. /*
  889. * Do we have another file that has the beginning components being a
  890. * proper superset of the name we're trying to add?
  891. */
  892. static int has_file_name(struct index_state *istate,
  893. const struct cache_entry *ce, int pos, int ok_to_replace)
  894. {
  895. int retval = 0;
  896. int len = ce_namelen(ce);
  897. int stage = ce_stage(ce);
  898. const char *name = ce->name;
  899. while (pos < istate->cache_nr) {
  900. struct cache_entry *p = istate->cache[pos++];
  901. if (len >= ce_namelen(p))
  902. break;
  903. if (memcmp(name, p->name, len))
  904. break;
  905. if (ce_stage(p) != stage)
  906. continue;
  907. if (p->name[len] != '/')
  908. continue;
  909. if (p->ce_flags & CE_REMOVE)
  910. continue;
  911. retval = -1;
  912. if (!ok_to_replace)
  913. break;
  914. remove_index_entry_at(istate, --pos);
  915. }
  916. return retval;
  917. }
  918. /*
  919. * Like strcmp(), but also return the offset of the first change.
  920. * If strings are equal, return the length.
  921. */
  922. int strcmp_offset(const char *s1, const char *s2, size_t *first_change)
  923. {
  924. size_t k;
  925. if (!first_change)
  926. return strcmp(s1, s2);
  927. for (k = 0; s1[k] == s2[k]; k++)
  928. if (s1[k] == '\0')
  929. break;
  930. *first_change = k;
  931. return (unsigned char)s1[k] - (unsigned char)s2[k];
  932. }
  933. /*
  934. * Do we have another file with a pathname that is a proper
  935. * subset of the name we're trying to add?
  936. *
  937. * That is, is there another file in the index with a path
  938. * that matches a sub-directory in the given entry?
  939. */
  940. static int has_dir_name(struct index_state *istate,
  941. const struct cache_entry *ce, int pos, int ok_to_replace)
  942. {
  943. int retval = 0;
  944. int stage = ce_stage(ce);
  945. const char *name = ce->name;
  946. const char *slash = name + ce_namelen(ce);
  947. size_t len_eq_last;
  948. int cmp_last = 0;
  949. /*
  950. * We are frequently called during an iteration on a sorted
  951. * list of pathnames and while building a new index. Therefore,
  952. * there is a high probability that this entry will eventually
  953. * be appended to the index, rather than inserted in the middle.
  954. * If we can confirm that, we can avoid binary searches on the
  955. * components of the pathname.
  956. *
  957. * Compare the entry's full path with the last path in the index.
  958. */
  959. if (istate->cache_nr > 0) {
  960. cmp_last = strcmp_offset(name,
  961. istate->cache[istate->cache_nr - 1]->name,
  962. &len_eq_last);
  963. if (cmp_last > 0) {
  964. if (len_eq_last == 0) {
  965. /*
  966. * The entry sorts AFTER the last one in the
  967. * index and their paths have no common prefix,
  968. * so there cannot be a F/D conflict.
  969. */
  970. return retval;
  971. } else {
  972. /*
  973. * The entry sorts AFTER the last one in the
  974. * index, but has a common prefix. Fall through
  975. * to the loop below to disect the entry's path
  976. * and see where the difference is.
  977. */
  978. }
  979. } else if (cmp_last == 0) {
  980. /*
  981. * The entry exactly matches the last one in the
  982. * index, but because of multiple stage and CE_REMOVE
  983. * items, we fall through and let the regular search
  984. * code handle it.
  985. */
  986. }
  987. }
  988. for (;;) {
  989. size_t len;
  990. for (;;) {
  991. if (*--slash == '/')
  992. break;
  993. if (slash <= ce->name)
  994. return retval;
  995. }
  996. len = slash - name;
  997. if (cmp_last > 0) {
  998. /*
  999. * (len + 1) is a directory boundary (including
  1000. * the trailing slash). And since the loop is
  1001. * decrementing "slash", the first iteration is
  1002. * the longest directory prefix; subsequent
  1003. * iterations consider parent directories.
  1004. */
  1005. if (len + 1 <= len_eq_last) {
  1006. /*
  1007. * The directory prefix (including the trailing
  1008. * slash) also appears as a prefix in the last
  1009. * entry, so the remainder cannot collide (because
  1010. * strcmp said the whole path was greater).
  1011. *
  1012. * EQ: last: xxx/A
  1013. * this: xxx/B
  1014. *
  1015. * LT: last: xxx/file_A
  1016. * this: xxx/file_B
  1017. */
  1018. return retval;
  1019. }
  1020. if (len > len_eq_last) {
  1021. /*
  1022. * This part of the directory prefix (excluding
  1023. * the trailing slash) is longer than the known
  1024. * equal portions, so this sub-directory cannot
  1025. * collide with a file.
  1026. *
  1027. * GT: last: xxxA
  1028. * this: xxxB/file
  1029. */
  1030. return retval;
  1031. }
  1032. if (istate->cache_nr > 0 &&
  1033. ce_namelen(istate->cache[istate->cache_nr - 1]) > len) {
  1034. /*
  1035. * The directory prefix lines up with part of
  1036. * a longer file or directory name, but sorts
  1037. * after it, so this sub-directory cannot
  1038. * collide with a file.
  1039. *
  1040. * last: xxx/yy-file (because '-' sorts before '/')
  1041. * this: xxx/yy/abc
  1042. */
  1043. return retval;
  1044. }
  1045. /*
  1046. * This is a possible collision. Fall through and
  1047. * let the regular search code handle it.
  1048. *
  1049. * last: xxx
  1050. * this: xxx/file
  1051. */
  1052. }
  1053. pos = index_name_stage_pos(istate, name, len, stage);
  1054. if (pos >= 0) {
  1055. /*
  1056. * Found one, but not so fast. This could
  1057. * be a marker that says "I was here, but
  1058. * I am being removed". Such an entry is
  1059. * not a part of the resulting tree, and
  1060. * it is Ok to have a directory at the same
  1061. * path.
  1062. */
  1063. if (!(istate->cache[pos]->ce_flags & CE_REMOVE)) {
  1064. retval = -1;
  1065. if (!ok_to_replace)
  1066. break;
  1067. remove_index_entry_at(istate, pos);
  1068. continue;
  1069. }
  1070. }
  1071. else
  1072. pos = -pos-1;
  1073. /*
  1074. * Trivial optimization: if we find an entry that
  1075. * already matches the sub-directory, then we know
  1076. * we're ok, and we can exit.
  1077. */
  1078. while (pos < istate->cache_nr) {
  1079. struct cache_entry *p = istate->cache[pos];
  1080. if ((ce_namelen(p) <= len) ||
  1081. (p->name[len] != '/') ||
  1082. memcmp(p->name, name, len))
  1083. break; /* not our subdirectory */
  1084. if (ce_stage(p) == stage && !(p->ce_flags & CE_REMOVE))
  1085. /*
  1086. * p is at the same stage as our entry, and
  1087. * is a subdirectory of what we are looking
  1088. * at, so we cannot have conflicts at our
  1089. * level or anything shorter.
  1090. */
  1091. return retval;
  1092. pos++;
  1093. }
  1094. }
  1095. return retval;
  1096. }
  1097. /* We may be in a situation where we already have path/file and path
  1098. * is being added, or we already have path and path/file is being
  1099. * added. Either one would result in a nonsense tree that has path
  1100. * twice when git-write-tree tries to write it out. Prevent it.
  1101. *
  1102. * If ok-to-replace is specified, we remove the conflicting entries
  1103. * from the cache so the caller should recompute the insert position.
  1104. * When this happens, we return non-zero.
  1105. */
  1106. static int check_file_directory_conflict(struct index_state *istate,
  1107. const struct cache_entry *ce,
  1108. int pos, int ok_to_replace)
  1109. {
  1110. int retval;
  1111. /*
  1112. * When ce is an "I am going away" entry, we allow it to be added
  1113. */
  1114. if (ce->ce_flags & CE_REMOVE)
  1115. return 0;
  1116. /*
  1117. * We check if the path is a sub-path of a subsequent pathname
  1118. * first, since removing those will not change the position
  1119. * in the array.
  1120. */
  1121. retval = has_file_name(istate, ce, pos, ok_to_replace);
  1122. /*
  1123. * Then check if the path might have a clashing sub-directory
  1124. * before it.
  1125. */
  1126. return retval + has_dir_name(istate, ce, pos, ok_to_replace);
  1127. }
  1128. static int add_index_entry_with_check(struct index_state *istate, struct cache_entry *ce, int option)
  1129. {
  1130. int pos;
  1131. int ok_to_add = option & ADD_CACHE_OK_TO_ADD;
  1132. int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE;
  1133. int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK;
  1134. int new_only = option & ADD_CACHE_NEW_ONLY;
  1135. if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
  1136. cache_tree_invalidate_path(istate, ce->name);
  1137. /*
  1138. * If this entry's path sorts after the last entry in the index,
  1139. * we can avoid searching for it.
  1140. */
  1141. if (istate->cache_nr > 0 &&
  1142. strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
  1143. pos = index_pos_to_insert_pos(istate->cache_nr);
  1144. else
  1145. pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
  1146. /* existing match? Just replace it. */
  1147. if (pos >= 0) {
  1148. if (!new_only)
  1149. replace_index_entry(istate, pos, ce);
  1150. return 0;
  1151. }
  1152. pos = -pos-1;
  1153. if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
  1154. untracked_cache_add_to_index(istate, ce->name);
  1155. /*
  1156. * Inserting a merged entry ("stage 0") into the index
  1157. * will always replace all non-merged entries..
  1158. */
  1159. if (pos < istate->cache_nr && ce_stage(ce) == 0) {
  1160. while (ce_same_name(istate->cache[pos], ce)) {
  1161. ok_to_add = 1;
  1162. if (!remove_index_entry_at(istate, pos))
  1163. break;
  1164. }
  1165. }
  1166. if (!ok_to_add)
  1167. return -1;
  1168. if (!verify_path(ce->name, ce->ce_mode))
  1169. return error(_("invalid path '%s'"), ce->name);
  1170. if (!skip_df_check &&
  1171. check_file_directory_conflict(istate, ce, pos, ok_to_replace)) {
  1172. if (!ok_to_replace)
  1173. return error(_("'%s' appears as both a file and as a directory"),
  1174. ce->name);
  1175. pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
  1176. pos = -pos-1;
  1177. }
  1178. return pos + 1;
  1179. }
  1180. int add_index_entry(struct index_state *istate, struct cache_entry *ce, int option)
  1181. {
  1182. int pos;
  1183. if (option & ADD_CACHE_JUST_APPEND)
  1184. pos = istate->cache_nr;
  1185. else {
  1186. int ret;
  1187. ret = add_index_entry_with_check(istate, ce, option);
  1188. if (ret <= 0)
  1189. return ret;
  1190. pos = ret - 1;
  1191. }
  1192. /* Make sure the array is big enough .. */
  1193. ALLOC_GROW(istate->cache, istate->cache_nr + 1, istate->cache_alloc);
  1194. /* Add it in.. */
  1195. istate->cache_nr++;
  1196. if (istate->cache_nr > pos + 1)
  1197. MOVE_ARRAY(istate->cache + pos + 1, istate->cache + pos,
  1198. istate->cache_nr - pos - 1);
  1199. set_index_entry(istate, pos, ce);
  1200. istate->cache_changed |= CE_ENTRY_ADDED;
  1201. return 0;
  1202. }
  1203. /*
  1204. * "refresh" does not calculate a new sha1 file or bring the
  1205. * cache up-to-date for mode/content changes. But what it
  1206. * _does_ do is to "re-match" the stat information of a file
  1207. * with the cache, so that you can refresh the cache for a
  1208. * file that hasn't been changed but where the stat entry is
  1209. * out of date.
  1210. *
  1211. * For example, you'd want to do this after doing a "git-read-tree",
  1212. * to link up the stat cache details with the proper files.
  1213. */
  1214. static struct cache_entry *refresh_cache_ent(struct index_state *istate,
  1215. struct cache_entry *ce,
  1216. unsigned int options, int *err,
  1217. int *changed_ret)
  1218. {
  1219. struct stat st;
  1220. struct cache_entry *updated;
  1221. int changed;
  1222. int refresh = options & CE_MATCH_REFRESH;
  1223. int ignore_valid = options & CE_MATCH_IGNORE_VALID;
  1224. int ignore_skip_worktree = options & CE_MATCH_IGNORE_SKIP_WORKTREE;
  1225. int ignore_missing = options & CE_MATCH_IGNORE_MISSING;
  1226. int ignore_fsmonitor = options & CE_MATCH_IGNORE_FSMONITOR;
  1227. if (!refresh || ce_uptodate(ce))
  1228. return ce;
  1229. if (!ignore_fsmonitor)
  1230. refresh_fsmonitor(istate);
  1231. /*
  1232. * CE_VALID or CE_SKIP_WORKTREE means the user promised us
  1233. * that the change to the work tree does not matter and told
  1234. * us not to worry.
  1235. */
  1236. if (!ignore_skip_worktree && ce_skip_worktree(ce)) {
  1237. ce_mark_uptodate(ce);
  1238. return ce;
  1239. }
  1240. if (!ignore_valid && (ce->ce_flags & CE_VALID)) {
  1241. ce_mark_uptodate(ce);
  1242. return ce;
  1243. }
  1244. if (!ignore_fsmonitor && (ce->ce_flags & CE_FSMONITOR_VALID)) {
  1245. ce_mark_uptodate(ce);
  1246. return ce;
  1247. }
  1248. if (has_symlink_leading_path(ce->name, ce_namelen(ce))) {
  1249. if (ignore_missing)
  1250. return ce;
  1251. if (err)
  1252. *err = ENOENT;
  1253. return NULL;
  1254. }
  1255. if (lstat(ce->name, &st) < 0) {
  1256. if (ignore_missing && errno == ENOENT)
  1257. return ce;
  1258. if (err)
  1259. *err = errno;
  1260. return NULL;
  1261. }
  1262. changed = ie_match_stat(istate, ce, &st, options);
  1263. if (changed_ret)
  1264. *changed_ret = changed;
  1265. if (!changed) {
  1266. /*
  1267. * The path is unchanged. If we were told to ignore
  1268. * valid bit, then we did the actual stat check and
  1269. * found that the entry is unmodified. If the entry
  1270. * is not marked VALID, this is the place to mark it
  1271. * valid again, under "assume unchanged" mode.
  1272. */
  1273. if (ignore_valid && assume_unchanged &&
  1274. !(ce->ce_flags & CE_VALID))
  1275. ; /* mark this one VALID again */
  1276. else {
  1277. /*
  1278. * We do not mark the index itself "modified"
  1279. * because CE_UPTODATE flag is in-core only;
  1280. * we are not going to write this change out.
  1281. */
  1282. if (!S_ISGITLINK(ce->ce_mode)) {
  1283. ce_mark_uptodate(ce);
  1284. mark_fsmonitor_valid(istate, ce);
  1285. }
  1286. return ce;
  1287. }
  1288. }
  1289. if (ie_modified(istate, ce, &st, options)) {
  1290. if (err)
  1291. *err = EINVAL;
  1292. return NULL;
  1293. }
  1294. updated = make_empty_cache_entry(istate, ce_namelen(ce));
  1295. copy_cache_entry(updated, ce);
  1296. memcpy(updated->name, ce->name, ce->ce_namelen + 1);
  1297. fill_stat_cache_info(istate, updated, &st);
  1298. /*
  1299. * If ignore_valid is not set, we should leave CE_VALID bit
  1300. * alone. Otherwise, paths marked with --no-assume-unchanged
  1301. * (i.e. things to be edited) will reacquire CE_VALID bit
  1302. * automatically, which is not really what we want.
  1303. */
  1304. if (!ignore_valid && assume_unchanged &&
  1305. !(ce->ce_flags & CE_VALID))
  1306. updated->ce_flags &= ~CE_VALID;
  1307. /* istate->cache_changed is updated in the caller */
  1308. return updated;
  1309. }
  1310. static void show_file(const char * fmt, const char * name, int in_porcelain,
  1311. int * first, const char *header_msg)
  1312. {
  1313. if (in_porcelain && *first && header_msg) {
  1314. printf("%s\n", header_msg);
  1315. *first = 0;
  1316. }
  1317. printf(fmt, name);
  1318. }
  1319. int repo_refresh_and_write_index(struct repository *repo,
  1320. unsigned int refresh_flags,
  1321. unsigned int write_flags,
  1322. int gentle,
  1323. const struct pathspec *pathspec,
  1324. char *seen, const char *header_msg)
  1325. {
  1326. struct lock_file lock_file = LOCK_INIT;
  1327. int fd, ret = 0;
  1328. fd = repo_hold_locked_index(repo, &lock_file, 0);
  1329. if (!gentle && fd < 0)
  1330. return -1;
  1331. if (refresh_index(repo->index, refresh_flags, pathspec, seen, header_msg))
  1332. ret = 1;
  1333. if (0 <= fd && write_locked_index(repo->index, &lock_file, COMMIT_LOCK | write_flags))
  1334. ret = -1;
  1335. return ret;
  1336. }
  1337. int refresh_index(struct index_state *istate, unsigned int flags,
  1338. const struct pathspec *pathspec,
  1339. char *seen, const char *header_msg)
  1340. {
  1341. int i;
  1342. int has_errors = 0;
  1343. int really = (flags & REFRESH_REALLY) != 0;
  1344. int allow_unmerged = (flags & REFRESH_UNMERGED) != 0;
  1345. int quiet = (flags & REFRESH_QUIET) != 0;
  1346. int not_new = (flags & REFRESH_IGNORE_MISSING) != 0;
  1347. int ignore_submodules = (flags & REFRESH_IGNORE_SUBMODULES) != 0;
  1348. int first = 1;
  1349. int in_porcelain = (flags & REFRESH_IN_PORCELAIN);
  1350. unsigned int options = (CE_MATCH_REFRESH |
  1351. (really ? CE_MATCH_IGNORE_VALID : 0) |
  1352. (not_new ? CE_MATCH_IGNORE_MISSING : 0));
  1353. const char *modified_fmt;
  1354. const char *deleted_fmt;
  1355. const char *typechange_fmt;
  1356. const char *added_fmt;
  1357. const char *unmerged_fmt;
  1358. struct progress *progress = NULL;
  1359. if (flags & REFRESH_PROGRESS && isatty(2))
  1360. progress = start_delayed_progress(_("Refresh index"),
  1361. istate->cache_nr);
  1362. trace_performance_enter();
  1363. modified_fmt = in_porcelain ? "M\t%s\n" : "%s: needs update\n";
  1364. deleted_fmt = in_porcelain ? "D\t%s\n" : "%s: needs update\n";
  1365. typechange_fmt = in_porcelain ? "T\t%s\n" : "%s: needs update\n";
  1366. added_fmt = in_porcelain ? "A\t%s\n" : "%s: needs update\n";
  1367. unmerged_fmt = in_porcelain ? "U\t%s\n" : "%s: needs merge\n";
  1368. /*
  1369. * Use the multi-threaded preload_index() to refresh most of the
  1370. * cache entries quickly then in the single threaded loop below,
  1371. * we only have to do the special cases that are left.
  1372. */
  1373. preload_index(istate, pathspec, 0);
  1374. for (i = 0; i < istate->cache_nr; i++) {
  1375. struct cache_entry *ce, *new_entry;
  1376. int cache_errno = 0;
  1377. int changed = 0;
  1378. int filtered = 0;
  1379. ce = istate->cache[i];
  1380. if (ignore_submodules && S_ISGITLINK(ce->ce_mode))
  1381. continue;
  1382. if (pathspec && !ce_path_match(istate, ce, pathspec, seen))
  1383. filtered = 1;
  1384. if (ce_stage(ce)) {
  1385. while ((i < istate->cache_nr) &&
  1386. ! strcmp(istate->cache[i]->name, ce->name))
  1387. i++;
  1388. i--;
  1389. if (allow_unmerged)
  1390. continue;
  1391. if (!filtered)
  1392. show_file(unmerged_fmt, ce->name, in_porcelain,
  1393. &first, header_msg);
  1394. has_errors = 1;
  1395. continue;
  1396. }
  1397. if (filtered)
  1398. continue;
  1399. new_entry = refresh_cache_ent(istate, ce, options, &cache_errno, &changed);
  1400. if (new_entry == ce)
  1401. continue;
  1402. if (progress)
  1403. display_progress(progress, i);
  1404. if (!new_entry) {
  1405. const char *fmt;
  1406. if (really && cache_errno == EINVAL) {
  1407. /* If we are doing --really-refresh that
  1408. * means the index is not valid anymore.
  1409. */
  1410. ce->ce_flags &= ~CE_VALID;
  1411. ce->ce_flags |= CE_UPDATE_IN_BASE;
  1412. mark_fsmonitor_invalid(istate, ce);
  1413. istate->cache_changed |= CE_ENTRY_CHANGED;
  1414. }
  1415. if (quiet)
  1416. continue;
  1417. if (cache_errno == ENOENT)
  1418. fmt = deleted_fmt;
  1419. else if (ce_intent_to_add(ce))
  1420. fmt = added_fmt; /* must be before other checks */
  1421. else if (changed & TYPE_CHANGED)
  1422. fmt = typechange_fmt;
  1423. else
  1424. fmt = modified_fmt;
  1425. show_file(fmt,
  1426. ce->name, in_porcelain, &first, header_msg);
  1427. has_errors = 1;
  1428. continue;
  1429. }
  1430. replace_index_entry(istate, i, new_entry);
  1431. }
  1432. if (progress) {
  1433. display_progress(progress, istate->cache_nr);
  1434. stop_progress(&progress);
  1435. }
  1436. trace_performance_leave("refresh index");
  1437. return has_errors;
  1438. }
  1439. struct cache_entry *refresh_cache_entry(struct index_state *istate,
  1440. struct cache_entry *ce,
  1441. unsigned int options)
  1442. {
  1443. return refresh_cache_ent(istate, ce, options, NULL, NULL);
  1444. }
  1445. /*****************************************************************
  1446. * Index File I/O
  1447. *****************************************************************/
  1448. #define INDEX_FORMAT_DEFAULT 3
  1449. static unsigned int get_index_format_default(struct repository *r)
  1450. {
  1451. char *envversion = getenv("GIT_INDEX_VERSION");
  1452. char *endp;
  1453. unsigned int version = INDEX_FORMAT_DEFAULT;
  1454. if (!envversion) {
  1455. prepare_repo_settings(r);
  1456. if (r->settings.index_version >= 0)
  1457. version = r->settings.index_version;
  1458. if (version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < version) {
  1459. warning(_("index.version set, but the value is invalid.\n"
  1460. "Using version %i"), INDEX_FORMAT_DEFAULT);
  1461. return INDEX_FORMAT_DEFAULT;
  1462. }
  1463. return version;
  1464. }
  1465. version = strtoul(envversion, &endp, 10);
  1466. if (*endp ||
  1467. version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < version) {
  1468. warning(_("GIT_INDEX_VERSION set, but the value is invalid.\n"
  1469. "Using version %i"), INDEX_FORMAT_DEFAULT);
  1470. version = INDEX_FORMAT_DEFAULT;
  1471. }
  1472. return version;
  1473. }
  1474. /*
  1475. * dev/ino/uid/gid/size are also just tracked to the low 32 bits
  1476. * Again - this is just a (very strong in practice) heuristic that
  1477. * the inode hasn't changed.
  1478. *
  1479. * We save the fields in big-endian order to allow using the
  1480. * index file over NFS transparently.
  1481. */
  1482. struct ondisk_cache_entry {
  1483. struct cache_time ctime;
  1484. struct cache_time mtime;
  1485. uint32_t dev;
  1486. uint32_t ino;
  1487. uint32_t mode;
  1488. uint32_t uid;
  1489. uint32_t gid;
  1490. uint32_t size;
  1491. /*
  1492. * unsigned char hash[hashsz];
  1493. * uint16_t flags;
  1494. * if (flags & CE_EXTENDED)
  1495. * uint16_t flags2;
  1496. */
  1497. unsigned char data[GIT_MAX_RAWSZ + 2 * sizeof(uint16_t)];
  1498. char name[FLEX_ARRAY];
  1499. };
  1500. /* These are only used for v3 or lower */
  1501. #define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len)
  1502. #define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,data) + (len) + 8) & ~7)
  1503. #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
  1504. #define ondisk_data_size(flags, len) (the_hash_algo->rawsz + \
  1505. ((flags & CE_EXTENDED) ? 2 : 1) * sizeof(uint16_t) + len)
  1506. #define ondisk_data_size_max(len) (ondisk_data_size(CE_EXTENDED, len))
  1507. #define ondisk_ce_size(ce) (ondisk_cache_entry_size(ondisk_data_size((ce)->ce_flags, ce_namelen(ce))))
  1508. /* Allow fsck to force verification of the index checksum. */
  1509. int verify_index_checksum;
  1510. /* Allow fsck to force verification of the cache entry order. */
  1511. int verify_ce_order;
  1512. static int verify_hdr(const struct cache_header *hdr, unsigned long size)
  1513. {
  1514. git_hash_ctx c;
  1515. unsigned char hash[GIT_MAX_RAWSZ];
  1516. int hdr_version;
  1517. if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
  1518. return error(_("bad signature 0x%08x"), hdr->hdr_signature);
  1519. hdr_version = ntohl(hdr->hdr_version);
  1520. if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version)
  1521. return error(_("bad index version %d"), hdr_version);
  1522. if (!verify_index_checksum)
  1523. return 0;
  1524. the_hash_algo->init_fn(&c);
  1525. the_hash_algo->update_fn(&c, hdr, size - the_hash_algo->rawsz);
  1526. the_hash_algo->final_fn(hash, &c);
  1527. if (!hasheq(hash, (unsigned char *)hdr + size - the_hash_algo->rawsz))
  1528. return error(_("bad index file sha1 signature"));
  1529. return 0;
  1530. }
  1531. static int read_index_extension(struct index_state *istate,
  1532. const char *ext, const char *data, unsigned long sz)
  1533. {
  1534. switch (CACHE_EXT(ext)) {
  1535. case CACHE_EXT_TREE:
  1536. istate->cache_tree = cache_tree_read(data, sz);
  1537. break;
  1538. case CACHE_EXT_RESOLVE_UNDO:
  1539. istate->resolve_undo = resolve_undo_read(data, sz);
  1540. break;
  1541. case CACHE_EXT_LINK:
  1542. if (read_link_extension(istate, data, sz))
  1543. return -1;
  1544. break;
  1545. case CACHE_EXT_UNTRACKED:
  1546. istate->untracked = read_untracked_extension(data, sz);
  1547. break;
  1548. case CACHE_EXT_FSMONITOR:
  1549. read_fsmonitor_extension(istate, data, sz);
  1550. break;
  1551. case CACHE_EXT_ENDOFINDEXENTRIES:
  1552. case CACHE_EXT_INDEXENTRYOFFSETTABLE:
  1553. /* already handled in do_read_index() */
  1554. break;
  1555. default:
  1556. if (*ext < 'A' || 'Z' < *ext)
  1557. return error(_("index uses %.4s extension, which we do not understand"),
  1558. ext);
  1559. fprintf_ln(stderr, _("ignoring %.4s extension"), ext);
  1560. break;
  1561. }
  1562. return 0;
  1563. }
  1564. static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
  1565. unsigned int version,
  1566. struct ondisk_cache_entry *ondisk,
  1567. unsigned long *ent_size,
  1568. const struct cache_entry *previous_ce)
  1569. {
  1570. struct cache_entry *ce;
  1571. size_t len;
  1572. const char *name;
  1573. const unsigned hashsz = the_hash_algo->rawsz;
  1574. const uint16_t *flagsp = (const uint16_t *)(ondisk->data + hashsz);
  1575. unsigned int flags;
  1576. size_t copy_len = 0;
  1577. /*
  1578. * Adjacent cache entries tend to share the leading paths, so it makes
  1579. * sense to only store the differences in later entries. In the v4
  1580. * on-disk format of the index, each on-disk cache entry stores the
  1581. * number of bytes to be stripped from the end of the previous name,
  1582. * and the bytes to append to the result, to come up with its name.
  1583. */
  1584. int expand_name_field = version == 4;
  1585. /* On-disk flags are just 16 bits */
  1586. flags = get_be16(flagsp);
  1587. len = flags & CE_NAMEMASK;
  1588. if (flags & CE_EXTENDED) {
  1589. int extended_flags;
  1590. extended_flags = get_be16(flagsp + 1) << 16;
  1591. /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
  1592. if (extended_flags & ~CE_EXTENDED_FLAGS)
  1593. die(_("unknown index entry format 0x%08x"), extended_flags);
  1594. flags |= extended_flags;
  1595. name = (const char *)(flagsp + 2);
  1596. }
  1597. else
  1598. name = (const char *)(flagsp + 1);
  1599. if (expand_name_field) {
  1600. const unsigned char *cp = (const unsigned char *)name;
  1601. size_t strip_len, previous_len;
  1602. /* If we're at the beginning of a block, ignore the previous name */
  1603. strip_len = decode_varint(&cp);
  1604. if (previous_ce) {
  1605. previous_len = previous_ce->ce_namelen;
  1606. if (previous_len < strip_len)
  1607. die(_("malformed name field in the index, near path '%s'"),
  1608. previous_ce->name);
  1609. copy_len = previous_len - strip_len;
  1610. }
  1611. name = (const char *)cp;
  1612. }
  1613. if (len == CE_NAMEMASK) {
  1614. len = strlen(name);
  1615. if (expand_name_field)
  1616. len += copy_len;
  1617. }
  1618. ce = mem_pool__ce_alloc(ce_mem_pool, len);
  1619. ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
  1620. ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
  1621. ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
  1622. ce->ce_stat_data.sd_mtime.nsec = get_be32(&ondisk->mtime.nsec);
  1623. ce->ce_stat_data.sd_dev = get_be32(&ondisk->dev);
  1624. ce->ce_stat_data.sd_ino = get_be32(&ondisk->ino);
  1625. ce->ce_mode = get_be32(&ondisk->mode);
  1626. ce->ce_stat_data.sd_uid = get_be32(&ondisk->uid);
  1627. ce->ce_stat_data.sd_gid = get_be32(&ondisk->gid);
  1628. ce->ce_stat_data.sd_size = get_be32(&ondisk->size);
  1629. ce->ce_flags = flags & ~CE_NAMEMASK;
  1630. ce->ce_namelen = len;
  1631. ce->index = 0;
  1632. hashcpy(ce->oid.hash, ondisk->data);
  1633. memcpy(ce->name, name, len);
  1634. ce->name[len] = '\0';
  1635. if (expand_name_field) {
  1636. if (copy_len)
  1637. memcpy(ce->name, previous_ce->name, copy_len);
  1638. memcpy(ce->name + copy_len, name, len + 1 - copy_len);
  1639. *ent_size = (name - ((char *)ondisk)) + len + 1 - copy_len;
  1640. } else {
  1641. memcpy(ce->name, name, len + 1);
  1642. *ent_size = ondisk_ce_size(ce);
  1643. }
  1644. return ce;
  1645. }
  1646. static void check_ce_order(struct index_state *istate)
  1647. {
  1648. unsigned int i;
  1649. if (!verify_ce_order)
  1650. return;
  1651. for (i = 1; i < istate->cache_nr; i++) {
  1652. struct cache_entry *ce = istate->cache[i - 1];
  1653. struct cache_entry *next_ce = istate->cache[i];
  1654. int name_compare = strcmp(ce->name, next_ce->name);
  1655. if (0 < name_compare)
  1656. die(_("unordered stage entries in index"));
  1657. if (!name_compare) {
  1658. if (!ce_stage(ce))
  1659. die(_("multiple stage entries for merged file '%s'"),
  1660. ce->name);
  1661. if (ce_stage(ce) > ce_stage(next_ce))
  1662. die(_("unordered stage entries for '%s'"),
  1663. ce->name);
  1664. }
  1665. }
  1666. }
  1667. static void tweak_untracked_cache(struct index_state *istate)
  1668. {
  1669. struct repository *r = the_repository;
  1670. prepare_repo_settings(r);
  1671. if (r->settings.core_untracked_cache == UNTRACKED_CACHE_REMOVE) {
  1672. remove_untracked_cache(istate);
  1673. return;
  1674. }
  1675. if (r->settings.core_untracked_cache == UNTRACKED_CACHE_WRITE)
  1676. add_untracked_cache(istate);
  1677. }
  1678. static void tweak_split_index(struct index_state *istate)
  1679. {
  1680. switch (git_config_get_split_index()) {
  1681. case -1: /* unset: do nothing */
  1682. break;
  1683. case 0: /* false */
  1684. remove_split_index(istate);
  1685. break;
  1686. case 1: /* true */
  1687. add_split_index(istate);
  1688. break;
  1689. default: /* unknown value: do nothing */
  1690. break;
  1691. }
  1692. }
  1693. static void post_read_index_from(struct index_state *istate)
  1694. {
  1695. check_ce_order(istate);
  1696. tweak_untracked_cache(istate);
  1697. tweak_split_index(istate);
  1698. tweak_fsmonitor(istate);
  1699. }
  1700. static size_t estimate_cache_size_from_compressed(unsigned int entries)
  1701. {
  1702. return entries * (sizeof(struct cache_entry) + CACHE_ENTRY_PATH_LENGTH);
  1703. }
  1704. static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
  1705. {
  1706. long per_entry = sizeof(struct cache_entry) - sizeof(struct ondisk_cache_entry);
  1707. /*
  1708. * Account for potential alignment differences.
  1709. */
  1710. per_entry += align_padding_size(per_entry, 0);
  1711. return ondisk_size + entries * per_entry;
  1712. }
  1713. struct index_entry_offset
  1714. {
  1715. /* starting byte offset into index file, count of index entries in this block */
  1716. int offset, nr;
  1717. };
  1718. struct index_entry_offset_table
  1719. {
  1720. int nr;
  1721. struct index_entry_offset entries[FLEX_ARRAY];
  1722. };
  1723. static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset);
  1724. static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot);
  1725. static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
  1726. static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset);
  1727. struct load_index_extensions
  1728. {
  1729. pthread_t pthread;
  1730. struct index_state *istate;
  1731. const char *mmap;
  1732. size_t mmap_size;
  1733. unsigned long src_offset;
  1734. };
  1735. static void *load_index_extensions(void *_data)
  1736. {
  1737. struct load_index_extensions *p = _data;
  1738. unsigned long src_offset = p->src_offset;
  1739. while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
  1740. /* After an array of active_nr index entries,
  1741. * there can be arbitrary number of extended
  1742. * sections, each of which is prefixed with
  1743. * extension name (4-byte) and section length
  1744. * in 4-byte network byte order.
  1745. */
  1746. uint32_t extsize = get_be32(p->mmap + src_offset + 4);
  1747. if (read_index_extension(p->istate,
  1748. p->mmap + src_offset,
  1749. p->mmap + src_offset + 8,
  1750. extsize) < 0) {
  1751. munmap((void *)p->mmap, p->mmap_size);
  1752. die(_("index file corrupt"));
  1753. }
  1754. src_offset += 8;
  1755. src_offset += extsize;
  1756. }
  1757. return NULL;
  1758. }
  1759. /*
  1760. * A helper function that will load the specified range of cache entries
  1761. * from the memory mapped file and add them to the given index.
  1762. */
  1763. static unsigned long load_cache_entry_block(struct index_state *istate,
  1764. struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap,
  1765. unsigned long start_offset, const struct cache_entry *previous_ce)
  1766. {
  1767. int i;
  1768. unsigned long src_offset = start_offset;
  1769. for (i = offset; i < offset + nr; i++) {
  1770. struct ondisk_cache_entry *disk_ce;
  1771. struct cache_entry *ce;
  1772. unsigned long consumed;
  1773. disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset);
  1774. ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, &consumed, previous_ce);
  1775. set_index_entry(istate, i, ce);
  1776. src_offset += consumed;
  1777. previous_ce = ce;
  1778. }
  1779. return src_offset - start_offset;
  1780. }
  1781. static unsigned long load_all_cache_entries(struct index_state *istate,
  1782. const char *mmap, size_t mmap_size, unsigned long src_offset)
  1783. {
  1784. unsigned long consumed;
  1785. if (istate->version == 4) {
  1786. mem_pool_init(&istate->ce_mem_pool,
  1787. estimate_cache_size_from_compressed(istate->cache_nr));
  1788. } else {
  1789. mem_pool_init(&istate->ce_mem_pool,
  1790. estimate_cache_size(mmap_size, istate->cache_nr));
  1791. }
  1792. consumed = load_cache_entry_block(istate, istate->ce_mem_pool,
  1793. 0, istate->cache_nr, mmap, src_offset, NULL);
  1794. return consumed;
  1795. }
  1796. /*
  1797. * Mostly randomly chosen maximum thread counts: we
  1798. * cap the parallelism to online_cpus() threads, and we want
  1799. * to have at least 10000 cache entries per thread for it to
  1800. * be worth starting a thread.
  1801. */
  1802. #define THREAD_COST (10000)
  1803. struct load_cache_entries_thread_data
  1804. {
  1805. pthread_t pthread;
  1806. struct index_state *istate;
  1807. struct mem_pool *ce_mem_pool;
  1808. int offset;
  1809. const char *mmap;
  1810. struct index_entry_offset_table *ieot;
  1811. int ieot_start; /* starting index into the ieot array */
  1812. int ieot_blocks; /* count of ieot entries to process */
  1813. unsigned long consumed; /* return # of bytes in index file processed */
  1814. };
  1815. /*
  1816. * A thread proc to run the load_cache_entries() computation
  1817. * across multiple background threads.
  1818. */
  1819. static void *load_cache_entries_thread(void *_data)
  1820. {
  1821. struct load_cache_entries_thread_data *p = _data;
  1822. int i;
  1823. /* iterate across all ieot blocks assigned to this thread */
  1824. for (i = p->ieot_start; i < p->ieot_start + p->ieot_blocks; i++) {
  1825. p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool,
  1826. p->offset, p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL);
  1827. p->offset += p->ieot->entries[i].nr;
  1828. }
  1829. return NULL;
  1830. }
  1831. static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size,
  1832. int nr_threads, struct index_entry_offset_table *ieot)
  1833. {
  1834. int i, offset, ieot_blocks, ieot_start, err;
  1835. struct load_cache_entries_thread_data *data;
  1836. unsigned long consumed = 0;
  1837. /* a little sanity checking */
  1838. if (istate->name_hash_initialized)
  1839. BUG("the name hash isn't thread safe");
  1840. mem_pool_init(&istate->ce_mem_pool, 0);
  1841. /* ensure we have no more threads than we have blocks to process */
  1842. if (nr_threads > ieot->nr)
  1843. nr_threads = ieot->nr;
  1844. data = xcalloc(nr_threads, sizeof(*data));
  1845. offset = ieot_start = 0;
  1846. ieot_blocks = DIV_ROUND_UP(ieot->nr, nr_threads);
  1847. for (i = 0; i < nr_threads; i++) {
  1848. struct load_cache_entries_thread_data *p = &data[i];
  1849. int nr, j;
  1850. if (ieot_start + ieot_blocks > ieot->nr)
  1851. ieot_blocks = ieot->nr - ieot_start;
  1852. p->istate = istate;
  1853. p->offset = offset;
  1854. p->mmap = mmap;
  1855. p->ieot = ieot;
  1856. p->ieot_start = ieot_start;
  1857. p->ieot_blocks = ieot_blocks;
  1858. /* create a mem_pool for each thread */
  1859. nr = 0;
  1860. for (j = p->ieot_start; j < p->ieot_start + p->ieot_blocks; j++)
  1861. nr += p->ieot->entries[j].nr;
  1862. if (istate->version == 4) {
  1863. mem_pool_init(&p->ce_mem_pool,
  1864. estimate_cache_size_from_compressed(nr));
  1865. } else {
  1866. mem_pool_init(&p->ce_mem_pool,
  1867. estimate_cache_size(mmap_size, nr));
  1868. }
  1869. err = pthread_create(&p->pthread, NULL, load_cache_entries_thread, p);
  1870. if (err)
  1871. die(_("unable to create load_cache_entries thread: %s"), strerror(err));
  1872. /* increment by the number of cache entries in the ieot block being processed */
  1873. for (j = 0; j < ieot_blocks; j++)
  1874. offset += ieot->entries[ieot_start + j].nr;
  1875. ieot_start += ieot_blocks;
  1876. }
  1877. for (i = 0; i < nr_threads; i++) {
  1878. struct load_cache_entries_thread_data *p = &data[i];
  1879. err = pthread_join(p->pthread, NULL);
  1880. if (err)
  1881. die(_("unable to join load_cache_entries thread: %s"), strerror(err));
  1882. mem_pool_combine(istate->ce_mem_pool, p->ce_mem_pool);
  1883. consumed += p->consumed;
  1884. }
  1885. free(data);
  1886. return consumed;
  1887. }
  1888. /* remember to discard_cache() before reading a different cache! */
  1889. int do_read_index(struct index_state *istate, const char *path, int must_exist)
  1890. {
  1891. int fd;
  1892. struct stat st;
  1893. unsigned long src_offset;
  1894. const struct cache_header *hdr;
  1895. const char *mmap;
  1896. size_t mmap_size;
  1897. struct load_index_extensions p;
  1898. size_t extension_offset = 0;
  1899. int nr_threads, cpus;
  1900. struct index_entry_offset_table *ieot = NULL;
  1901. if (istate->initialized)
  1902. return istate->cache_nr;
  1903. istate->timestamp.sec = 0;
  1904. istate->timestamp.nsec = 0;
  1905. fd = open(path, O_RDONLY);
  1906. if (fd < 0) {
  1907. if (!must_exist && errno == ENOENT)
  1908. return 0;
  1909. die_errno(_("%s: index file open failed"), path);
  1910. }
  1911. if (fstat(fd, &st))
  1912. die_errno(_("%s: cannot stat the open index"), path);
  1913. mmap_size = xsize_t(st.st_size);
  1914. if (mmap_size < sizeof(struct cache_header) + the_hash_algo->rawsz)
  1915. die(_("%s: index file smaller than expected"), path);
  1916. mmap = xmmap_gently(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
  1917. if (mmap == MAP_FAILED)
  1918. die_errno(_("%s: unable to map index file"), path);
  1919. close(fd);
  1920. hdr = (const struct cache_header *)mmap;
  1921. if (verify_hdr(hdr, mmap_size) < 0)
  1922. goto unmap;
  1923. hashcpy(istate->oid.hash, (const unsigned char *)hdr + mmap_size - the_hash_algo->rawsz);
  1924. istate->version = ntohl(hdr->hdr_version);
  1925. istate->cache_nr = ntohl(hdr->hdr_entries);
  1926. istate->cache_alloc = alloc_nr(istate->cache_nr);
  1927. istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
  1928. istate->initialized = 1;
  1929. p.istate = istate;
  1930. p.mmap = mmap;
  1931. p.mmap_size = mmap_size;
  1932. src_offset = sizeof(*hdr);
  1933. if (git_config_get_index_threads(&nr_threads))
  1934. nr_threads = 1;
  1935. /* TODO: does creating more threads than cores help? */
  1936. if (!nr_threads) {
  1937. nr_threads = istate->cache_nr / THREAD_COST;
  1938. cpus = online_cpus();
  1939. if (nr_threads > cpus)
  1940. nr_threads = cpus;
  1941. }
  1942. if (!HAVE_THREADS)
  1943. nr_threads = 1;
  1944. if (nr_threads > 1) {
  1945. extension_offset = read_eoie_extension(mmap, mmap_size);
  1946. if (extension_offset) {
  1947. int err;
  1948. p.src_offset = extension_offset;
  1949. err = pthread_create(&p.pthread, NULL, load_index_extensions, &p);
  1950. if (err)
  1951. die(_("unable to create load_index_extensions thread: %s"), strerror(err));
  1952. nr_threads--;
  1953. }
  1954. }
  1955. /*
  1956. * Locate and read the index entry offset table so that we can use it
  1957. * to multi-thread the reading of the cache entries.
  1958. */
  1959. if (extension_offset && nr_threads > 1)
  1960. ieot = read_ieot_extension(mmap, mmap_size, extension_offset);
  1961. if (ieot) {
  1962. src_offset += load_cache_entries_threaded(istate, mmap, mmap_size, nr_threads, ieot);
  1963. free(ieot);
  1964. } else {
  1965. src_offset += load_all_cache_entries(istate, mmap, mmap_size, src_offset);
  1966. }
  1967. istate->timestamp.sec = st.st_mtime;
  1968. istate->timestamp.nsec = ST_MTIME_NSEC(st);
  1969. /* if we created a thread, join it otherwise load the extensions on the primary thread */
  1970. if (extension_offset) {
  1971. int ret = pthread_join(p.pthread, NULL);
  1972. if (ret)
  1973. die(_("unable to join load_index_extensions thread: %s"), strerror(ret));
  1974. } else {
  1975. p.src_offset = src_offset;
  1976. load_index_extensions(&p);
  1977. }
  1978. munmap((void *)mmap, mmap_size);
  1979. /*
  1980. * TODO trace2: replace "the_repository" with the actual repo instance
  1981. * that is associated with the given "istate".
  1982. */
  1983. trace2_data_intmax("index", the_repository, "read/version",
  1984. istate->version);
  1985. trace2_data_intmax("index", the_repository, "read/cache_nr",
  1986. istate->cache_nr);
  1987. return istate->cache_nr;
  1988. unmap:
  1989. munmap((void *)mmap, mmap_size);
  1990. die(_("index file corrupt"));
  1991. }
  1992. /*
  1993. * Signal that the shared index is used by updating its mtime.
  1994. *
  1995. * This way, shared index can be removed if they have not been used
  1996. * for some time.
  1997. */
  1998. static void freshen_shared_index(const char *shared_index, int warn)
  1999. {
  2000. if (!check_and_freshen_file(shared_index, 1) && warn)
  2001. warning(_("could not freshen shared index '%s'"), shared_index);
  2002. }
  2003. int read_index_from(struct index_state *istate, const char *path,
  2004. const char *gitdir)
  2005. {
  2006. struct split_index *split_index;
  2007. int ret;
  2008. char *base_oid_hex;
  2009. char *base_path;
  2010. /* istate->initialized covers both .git/index and .git/sharedindex.xxx */
  2011. if (istate->initialized)
  2012. return istate->cache_nr;
  2013. /*
  2014. * TODO trace2: replace "the_repository" with the actual repo instance
  2015. * that is associated with the given "istate".
  2016. */
  2017. trace2_region_enter_printf("index", "do_read_index", the_repository,
  2018. "%s", path);
  2019. trace_performance_enter();
  2020. ret = do_read_index(istate, path, 0);
  2021. trace_performance_leave("read cache %s", path);
  2022. trace2_region_leave_printf("index", "do_read_index", the_repository,
  2023. "%s", path);
  2024. split_index = istate->split_index;
  2025. if (!split_index || is_null_oid(&split_index->base_oid)) {
  2026. post_read_index_from(istate);
  2027. return ret;
  2028. }
  2029. trace_performance_enter();
  2030. if (split_index->base)
  2031. discard_index(split_index->base);
  2032. else
  2033. split_index->base = xcalloc(1, sizeof(*split_index->base));
  2034. base_oid_hex = oid_to_hex(&split_index->base_oid);
  2035. base_path = xstrfmt("%s/sharedindex.%s", gitdir, base_oid_hex);
  2036. trace2_region_enter_printf("index", "shared/do_read_index",
  2037. the_repository, "%s", base_path);
  2038. ret = do_read_index(split_index->base, base_path, 1);
  2039. trace2_region_leave_printf("index", "shared/do_read_index",
  2040. the_repository, "%s", base_path);
  2041. if (!oideq(&split_index->base_oid, &split_index->base->oid))
  2042. die(_("broken index, expect %s in %s, got %s"),
  2043. base_oid_hex, base_path,
  2044. oid_to_hex(&split_index->base->oid));
  2045. freshen_shared_index(base_path, 0);
  2046. merge_base_index(istate);
  2047. post_read_index_from(istate);
  2048. trace_performance_leave("read cache %s", base_path);
  2049. free(base_path);
  2050. return ret;
  2051. }
  2052. int is_index_unborn(struct index_state *istate)
  2053. {
  2054. return (!istate->cache_nr && !istate->timestamp.sec);
  2055. }
  2056. int discard_index(struct index_state *istate)
  2057. {
  2058. /*
  2059. * Cache entries in istate->cache[] should have been allocated
  2060. * from the memory pool associated with this index, or from an
  2061. * associated split_index. There is no need to free individual
  2062. * cache entries. validate_cache_entries can detect when this
  2063. * assertion does not hold.
  2064. */
  2065. validate_cache_entries(istate);
  2066. resolve_undo_clear_index(istate);
  2067. istate->cache_nr = 0;
  2068. istate->cache_changed = 0;
  2069. istate->timestamp.sec = 0;
  2070. istate->timestamp.nsec = 0;
  2071. free_name_hash(istate);
  2072. cache_tree_free(&(istate->cache_tree));
  2073. istate->initialized = 0;
  2074. istate->fsmonitor_has_run_once = 0;
  2075. FREE_AND_NULL(istate->cache);
  2076. istate->cache_alloc = 0;
  2077. discard_split_index(istate);
  2078. free_untracked_cache(istate->untracked);
  2079. istate->untracked = NULL;
  2080. if (istate->ce_mem_pool) {
  2081. mem_pool_discard(istate->ce_mem_pool, should_validate_cache_entries());
  2082. istate->ce_mem_pool = NULL;
  2083. }
  2084. return 0;
  2085. }
  2086. /*
  2087. * Validate the cache entries of this index.
  2088. * All cache entries associated with this index
  2089. * should have been allocated by the memory pool
  2090. * associated with this index, or by a referenced
  2091. * split index.
  2092. */
  2093. void validate_cache_entries(const struct index_state *istate)
  2094. {
  2095. int i;
  2096. if (!should_validate_cache_entries() ||!istate || !istate->initialized)
  2097. return;
  2098. for (i = 0; i < istate->cache_nr; i++) {
  2099. if (!istate) {
  2100. BUG("cache entry is not allocated from expected memory pool");
  2101. } else if (!istate->ce_mem_pool ||
  2102. !mem_pool_contains(istate->ce_mem_pool, istate->cache[i])) {
  2103. if (!istate->split_index ||
  2104. !istate->split_index->base ||
  2105. !istate->split_index->base->ce_mem_pool ||
  2106. !mem_pool_contains(istate->split_index->base->ce_mem_pool, istate->cache[i])) {
  2107. BUG("cache entry is not allocated from expected memory pool");
  2108. }
  2109. }
  2110. }
  2111. if (istate->split_index)
  2112. validate_cache_entries(istate->split_index->base);
  2113. }
  2114. int unmerged_index(const struct index_state *istate)
  2115. {
  2116. int i;
  2117. for (i = 0; i < istate->cache_nr; i++) {
  2118. if (ce_stage(istate->cache[i]))
  2119. return 1;
  2120. }
  2121. return 0;
  2122. }
  2123. int repo_index_has_changes(struct repository *repo,
  2124. struct tree *tree,
  2125. struct strbuf *sb)
  2126. {
  2127. struct index_state *istate = repo->index;
  2128. struct object_id cmp;
  2129. int i;
  2130. if (tree)
  2131. cmp = tree->object.oid;
  2132. if (tree || !get_oid_tree("HEAD", &cmp)) {
  2133. struct diff_options opt;
  2134. repo_diff_setup(repo, &opt);
  2135. opt.flags.exit_with_status = 1;
  2136. if (!sb)
  2137. opt.flags.quick = 1;
  2138. do_diff_cache(&cmp, &opt);
  2139. diffcore_std(&opt);
  2140. for (i = 0; sb && i < diff_queued_diff.nr; i++) {
  2141. if (i)
  2142. strbuf_addch(sb, ' ');
  2143. strbuf_addstr(sb, diff_queued_diff.queue[i]->two->path);
  2144. }
  2145. diff_flush(&opt);
  2146. return opt.flags.has_changes != 0;
  2147. } else {
  2148. for (i = 0; sb && i < istate->cache_nr; i++) {
  2149. if (i)
  2150. strbuf_addch(sb, ' ');
  2151. strbuf_addstr(sb, istate->cache[i]->name);
  2152. }
  2153. return !!istate->cache_nr;
  2154. }
  2155. }
  2156. #define WRITE_BUFFER_SIZE 8192
  2157. static unsigned char write_buffer[WRITE_BUFFER_SIZE];
  2158. static unsigned long write_buffer_len;
  2159. static int ce_write_flush(git_hash_ctx *context, int fd)
  2160. {
  2161. unsigned int buffered = write_buffer_len;
  2162. if (buffered) {
  2163. the_hash_algo->update_fn(context, write_buffer, buffered);
  2164. if (write_in_full(fd, write_buffer, buffered) < 0)
  2165. return -1;
  2166. write_buffer_len = 0;
  2167. }
  2168. return 0;
  2169. }
  2170. static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len)
  2171. {
  2172. while (len) {
  2173. unsigned int buffered = write_buffer_len;
  2174. unsigned int partial = WRITE_BUFFER_SIZE - buffered;
  2175. if (partial > len)
  2176. partial = len;
  2177. memcpy(write_buffer + buffered, data, partial);
  2178. buffered += partial;
  2179. if (buffered == WRITE_BUFFER_SIZE) {
  2180. write_buffer_len = buffered;
  2181. if (ce_write_flush(context, fd))
  2182. return -1;
  2183. buffered = 0;
  2184. }
  2185. write_buffer_len = buffered;
  2186. len -= partial;
  2187. data = (char *) data + partial;
  2188. }
  2189. return 0;
  2190. }
  2191. static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx *eoie_context,
  2192. int fd, unsigned int ext, unsigned int sz)
  2193. {
  2194. ext = htonl(ext);
  2195. sz = htonl(sz);
  2196. if (eoie_context) {
  2197. the_hash_algo->update_fn(eoie_context, &ext, 4);
  2198. the_hash_algo->update_fn(eoie_context, &sz, 4);
  2199. }
  2200. return ((ce_write(context, fd, &ext, 4) < 0) ||
  2201. (ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
  2202. }
  2203. static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash)
  2204. {
  2205. unsigned int left = write_buffer_len;
  2206. if (left) {
  2207. write_buffer_len = 0;
  2208. the_hash_algo->update_fn(context, write_buffer, left);
  2209. }
  2210. /* Flush first if not enough space for hash signature */
  2211. if (left + the_hash_algo->rawsz > WRITE_BUFFER_SIZE) {
  2212. if (write_in_full(fd, write_buffer, left) < 0)
  2213. return -1;
  2214. left = 0;
  2215. }
  2216. /* Append the hash signature at the end */
  2217. the_hash_algo->final_fn(write_buffer + left, context);
  2218. hashcpy(hash, write_buffer + left);
  2219. left += the_hash_algo->rawsz;
  2220. return (write_in_full(fd, write_buffer, left) < 0) ? -1 : 0;
  2221. }
  2222. static void ce_smudge_racily_clean_entry(struct index_state *istate,
  2223. struct cache_entry *ce)
  2224. {
  2225. /*
  2226. * The only thing we care about in this function is to smudge the
  2227. * falsely clean entry due to touch-update-touch race, so we leave
  2228. * everything else as they are. We are called for entries whose
  2229. * ce_stat_data.sd_mtime match the index file mtime.
  2230. *
  2231. * Note that this actually does not do much for gitlinks, for
  2232. * which ce_match_stat_basic() always goes to the actual
  2233. * contents. The caller checks with is_racy_timestamp() which
  2234. * always says "no" for gitlinks, so we are not called for them ;-)
  2235. */
  2236. struct stat st;
  2237. if (lstat(ce->name, &st) < 0)
  2238. return;
  2239. if (ce_match_stat_basic(ce, &st))
  2240. return;
  2241. if (ce_modified_check_fs(istate, ce, &st)) {
  2242. /* This is "racily clean"; smudge it. Note that this
  2243. * is a tricky code. At first glance, it may appear
  2244. * that it can break with this sequence:
  2245. *
  2246. * $ echo xyzzy >frotz
  2247. * $ git-update-index --add frotz
  2248. * $ : >frotz
  2249. * $ sleep 3
  2250. * $ echo filfre >nitfol
  2251. * $ git-update-index --add nitfol
  2252. *
  2253. * but it does not. When the second update-index runs,
  2254. * it notices that the entry "frotz" has the same timestamp
  2255. * as index, and if we were to smudge it by resetting its
  2256. * size to zero here, then the object name recorded
  2257. * in index is the 6-byte file but the cached stat information
  2258. * becomes zero --- which would then match what we would
  2259. * obtain from the filesystem next time we stat("frotz").
  2260. *
  2261. * However, the second update-index, before calling
  2262. * this function, notices that the cached size is 6
  2263. * bytes and what is on the filesystem is an empty
  2264. * file, and never calls us, so the cached size information
  2265. * for "frotz" stays 6 which does not match the filesystem.
  2266. */
  2267. ce->ce_stat_data.sd_size = 0;
  2268. }
  2269. }
  2270. /* Copy miscellaneous fields but not the name */
  2271. static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
  2272. struct cache_entry *ce)
  2273. {
  2274. short flags;
  2275. const unsigned hashsz = the_hash_algo->rawsz;
  2276. uint16_t *flagsp = (uint16_t *)(ondisk->data + hashsz);
  2277. ondisk->ctime.sec = htonl(ce->ce_stat_data.sd_ctime.sec);
  2278. ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec);
  2279. ondisk->ctime.nsec = htonl(ce->ce_stat_data.sd_ctime.nsec);
  2280. ondisk->mtime.nsec = htonl(ce->ce_stat_data.sd_mtime.nsec);
  2281. ondisk->dev = htonl(ce->ce_stat_data.sd_dev);
  2282. ondisk->ino = htonl(ce->ce_stat_data.sd_ino);
  2283. ondisk->mode = htonl(ce->ce_mode);
  2284. ondisk->uid = htonl(ce->ce_stat_data.sd_uid);
  2285. ondisk->gid = htonl(ce->ce_stat_data.sd_gid);
  2286. ondisk->size = htonl(ce->ce_stat_data.sd_size);
  2287. hashcpy(ondisk->data, ce->oid.hash);
  2288. flags = ce->ce_flags & ~CE_NAMEMASK;
  2289. flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce));
  2290. flagsp[0] = htons(flags);
  2291. if (ce->ce_flags & CE_EXTENDED) {
  2292. flagsp[1] = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
  2293. }
  2294. }
  2295. static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
  2296. struct strbuf *previous_name, struct ondisk_cache_entry *ondisk)
  2297. {
  2298. int size;
  2299. int result;
  2300. unsigned int saved_namelen;
  2301. int stripped_name = 0;
  2302. static unsigned char padding[8] = { 0x00 };
  2303. if (ce->ce_flags & CE_STRIP_NAME) {
  2304. saved_namelen = ce_namelen(ce);
  2305. ce->ce_namelen = 0;
  2306. stripped_name = 1;
  2307. }
  2308. size = offsetof(struct ondisk_cache_entry,data) + ondisk_data_size(ce->ce_flags, 0);
  2309. if (!previous_name) {
  2310. int len = ce_namelen(ce);
  2311. copy_cache_entry_to_ondisk(ondisk, ce);
  2312. result = ce_write(c, fd, ondisk, size);
  2313. if (!result)
  2314. result = ce_write(c, fd, ce->name, len);
  2315. if (!result)
  2316. result = ce_write(c, fd, padding, align_padding_size(size, len));
  2317. } else {
  2318. int common, to_remove, prefix_size;
  2319. unsigned char to_remove_vi[16];
  2320. for (common = 0;
  2321. (ce->name[common] &&
  2322. common < previous_name->len &&
  2323. ce->name[common] == previous_name->buf[common]);
  2324. common++)
  2325. ; /* still matching */
  2326. to_remove = previous_name->len - common;
  2327. prefix_size = encode_varint(to_remove, to_remove_vi);
  2328. copy_cache_entry_to_ondisk(ondisk, ce);
  2329. result = ce_write(c, fd, ondisk, size);
  2330. if (!result)
  2331. result = ce_write(c, fd, to_remove_vi, prefix_size);
  2332. if (!result)
  2333. result = ce_write(c, fd, ce->name + common, ce_namelen(ce) - common);
  2334. if (!result)
  2335. result = ce_write(c, fd, padding, 1);
  2336. strbuf_splice(previous_name, common, to_remove,
  2337. ce->name + common, ce_namelen(ce) - common);
  2338. }
  2339. if (stripped_name) {
  2340. ce->ce_namelen = saved_namelen;
  2341. ce->ce_flags &= ~CE_STRIP_NAME;
  2342. }
  2343. return result;
  2344. }
  2345. /*
  2346. * This function verifies if index_state has the correct sha1 of the
  2347. * index file. Don't die if we have any other failure, just return 0.
  2348. */
  2349. static int verify_index_from(const struct index_state *istate, const char *path)
  2350. {
  2351. int fd;
  2352. ssize_t n;
  2353. struct stat st;
  2354. unsigned char hash[GIT_MAX_RAWSZ];
  2355. if (!istate->initialized)
  2356. return 0;
  2357. fd = open(path, O_RDONLY);
  2358. if (fd < 0)
  2359. return 0;
  2360. if (fstat(fd, &st))
  2361. goto out;
  2362. if (st.st_size < sizeof(struct cache_header) + the_hash_algo->rawsz)
  2363. goto out;
  2364. n = pread_in_full(fd, hash, the_hash_algo->rawsz, st.st_size - the_hash_algo->rawsz);
  2365. if (n != the_hash_algo->rawsz)
  2366. goto out;
  2367. if (!hasheq(istate->oid.hash, hash))
  2368. goto out;
  2369. close(fd);
  2370. return 1;
  2371. out:
  2372. close(fd);
  2373. return 0;
  2374. }
  2375. static int repo_verify_index(struct repository *repo)
  2376. {
  2377. return verify_index_from(repo->index, repo->index_file);
  2378. }
  2379. static int has_racy_timestamp(struct index_state *istate)
  2380. {
  2381. int entries = istate->cache_nr;
  2382. int i;
  2383. for (i = 0; i < entries; i++) {
  2384. struct cache_entry *ce = istate->cache[i];
  2385. if (is_racy_timestamp(istate, ce))
  2386. return 1;
  2387. }
  2388. return 0;
  2389. }
  2390. void repo_update_index_if_able(struct repository *repo,
  2391. struct lock_file *lockfile)
  2392. {
  2393. if ((repo->index->cache_changed ||
  2394. has_racy_timestamp(repo->index)) &&
  2395. repo_verify_index(repo))
  2396. write_locked_index(repo->index, lockfile, COMMIT_LOCK);
  2397. else
  2398. rollback_lock_file(lockfile);
  2399. }
  2400. static int record_eoie(void)
  2401. {
  2402. int val;
  2403. if (!git_config_get_bool("index.recordendofindexentries", &val))
  2404. return val;
  2405. /*
  2406. * As a convenience, the end of index entries extension
  2407. * used for threading is written by default if the user
  2408. * explicitly requested threaded index reads.
  2409. */
  2410. return !git_config_get_index_threads(&val) && val != 1;
  2411. }
  2412. static int record_ieot(void)
  2413. {
  2414. int val;
  2415. if (!git_config_get_bool("index.recordoffsettable", &val))
  2416. return val;
  2417. /*
  2418. * As a convenience, the offset table used for threading is
  2419. * written by default if the user explicitly requested
  2420. * threaded index reads.
  2421. */
  2422. return !git_config_get_index_threads(&val) && val != 1;
  2423. }
  2424. /*
  2425. * On success, `tempfile` is closed. If it is the temporary file
  2426. * of a `struct lock_file`, we will therefore effectively perform
  2427. * a 'close_lock_file_gently()`. Since that is an implementation
  2428. * detail of lockfiles, callers of `do_write_index()` should not
  2429. * rely on it.
  2430. */
  2431. static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
  2432. int strip_extensions)
  2433. {
  2434. uint64_t start = getnanotime();
  2435. int newfd = tempfile->fd;
  2436. git_hash_ctx c, eoie_c;
  2437. struct cache_header hdr;
  2438. int i, err = 0, removed, extended, hdr_version;
  2439. struct cache_entry **cache = istate->cache;
  2440. int entries = istate->cache_nr;
  2441. struct stat st;
  2442. struct ondisk_cache_entry ondisk;
  2443. struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
  2444. int drop_cache_tree = istate->drop_cache_tree;
  2445. off_t offset;
  2446. int ieot_entries = 1;
  2447. struct index_entry_offset_table *ieot = NULL;
  2448. int nr, nr_threads;
  2449. for (i = removed = extended = 0; i < entries; i++) {
  2450. if (cache[i]->ce_flags & CE_REMOVE)
  2451. removed++;
  2452. /* reduce extended entries if possible */
  2453. cache[i]->ce_flags &= ~CE_EXTENDED;
  2454. if (cache[i]->ce_flags & CE_EXTENDED_FLAGS) {
  2455. extended++;
  2456. cache[i]->ce_flags |= CE_EXTENDED;
  2457. }
  2458. }
  2459. if (!istate->version) {
  2460. istate->version = get_index_format_default(the_repository);
  2461. if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0))
  2462. init_split_index(istate);
  2463. }
  2464. /* demote version 3 to version 2 when the latter suffices */
  2465. if (istate->version == 3 || istate->version == 2)
  2466. istate->version = extended ? 3 : 2;
  2467. hdr_version = istate->version;
  2468. hdr.hdr_signature = htonl(CACHE_SIGNATURE);
  2469. hdr.hdr_version = htonl(hdr_version);
  2470. hdr.hdr_entries = htonl(entries - removed);
  2471. the_hash_algo->init_fn(&c);
  2472. if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
  2473. return -1;
  2474. if (!HAVE_THREADS || git_config_get_index_threads(&nr_threads))
  2475. nr_threads = 1;
  2476. if (nr_threads != 1 && record_ieot()) {
  2477. int ieot_blocks, cpus;
  2478. /*
  2479. * ensure default number of ieot blocks maps evenly to the
  2480. * default number of threads that will process them leaving
  2481. * room for the thread to load the index extensions.
  2482. */
  2483. if (!nr_threads) {
  2484. ieot_blocks = istate->cache_nr / THREAD_COST;
  2485. cpus = online_cpus();
  2486. if (ieot_blocks > cpus - 1)
  2487. ieot_blocks = cpus - 1;
  2488. } else {
  2489. ieot_blocks = nr_threads;
  2490. if (ieot_blocks > istate->cache_nr)
  2491. ieot_blocks = istate->cache_nr;
  2492. }
  2493. /*
  2494. * no reason to write out the IEOT extension if we don't
  2495. * have enough blocks to utilize multi-threading
  2496. */
  2497. if (ieot_blocks > 1) {
  2498. ieot = xcalloc(1, sizeof(struct index_entry_offset_table)
  2499. + (ieot_blocks * sizeof(struct index_entry_offset)));
  2500. ieot_entries = DIV_ROUND_UP(entries, ieot_blocks);
  2501. }
  2502. }
  2503. offset = lseek(newfd, 0, SEEK_CUR);
  2504. if (offset < 0) {
  2505. free(ieot);
  2506. return -1;
  2507. }
  2508. offset += write_buffer_len;
  2509. nr = 0;
  2510. previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
  2511. for (i = 0; i < entries; i++) {
  2512. struct cache_entry *ce = cache[i];
  2513. if (ce->ce_flags & CE_REMOVE)
  2514. continue;
  2515. if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
  2516. ce_smudge_racily_clean_entry(istate, ce);
  2517. if (is_null_oid(&ce->oid)) {
  2518. static const char msg[] = "cache entry has null sha1: %s";
  2519. static int allow = -1;
  2520. if (allow < 0)
  2521. allow = git_env_bool("GIT_ALLOW_NULL_SHA1", 0);
  2522. if (allow)
  2523. warning(msg, ce->name);
  2524. else
  2525. err = error(msg, ce->name);
  2526. drop_cache_tree = 1;
  2527. }
  2528. if (ieot && i && (i % ieot_entries == 0)) {
  2529. ieot->entries[ieot->nr].nr = nr;
  2530. ieot->entries[ieot->nr].offset = offset;
  2531. ieot->nr++;
  2532. /*
  2533. * If we have a V4 index, set the first byte to an invalid
  2534. * character to ensure there is nothing common with the previous
  2535. * entry
  2536. */
  2537. if (previous_name)
  2538. previous_name->buf[0] = 0;
  2539. nr = 0;
  2540. offset = lseek(newfd, 0, SEEK_CUR);
  2541. if (offset < 0) {
  2542. free(ieot);
  2543. return -1;
  2544. }
  2545. offset += write_buffer_len;
  2546. }
  2547. if (ce_write_entry(&c, newfd, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0)
  2548. err = -1;
  2549. if (err)
  2550. break;
  2551. nr++;
  2552. }
  2553. if (ieot && nr) {
  2554. ieot->entries[ieot->nr].nr = nr;
  2555. ieot->entries[ieot->nr].offset = offset;
  2556. ieot->nr++;
  2557. }
  2558. strbuf_release(&previous_name_buf);
  2559. if (err) {
  2560. free(ieot);
  2561. return err;
  2562. }
  2563. /* Write extension data here */
  2564. offset = lseek(newfd, 0, SEEK_CUR);
  2565. if (offset < 0) {
  2566. free(ieot);
  2567. return -1;
  2568. }
  2569. offset += write_buffer_len;
  2570. the_hash_algo->init_fn(&eoie_c);
  2571. /*
  2572. * Lets write out CACHE_EXT_INDEXENTRYOFFSETTABLE first so that we
  2573. * can minimize the number of extensions we have to scan through to
  2574. * find it during load. Write it out regardless of the
  2575. * strip_extensions parameter as we need it when loading the shared
  2576. * index.
  2577. */
  2578. if (ieot) {
  2579. struct strbuf sb = STRBUF_INIT;
  2580. write_ieot_extension(&sb, ieot);
  2581. err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_INDEXENTRYOFFSETTABLE, sb.len) < 0
  2582. || ce_write(&c, newfd, sb.buf, sb.len) < 0;
  2583. strbuf_release(&sb);
  2584. free(ieot);
  2585. if (err)
  2586. return -1;
  2587. }
  2588. if (!strip_extensions && istate->split_index &&
  2589. !is_null_oid(&istate->split_index->base_oid)) {
  2590. struct strbuf sb = STRBUF_INIT;
  2591. err = write_link_extension(&sb, istate) < 0 ||
  2592. write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_LINK,
  2593. sb.len) < 0 ||
  2594. ce_write(&c, newfd, sb.buf, sb.len) < 0;
  2595. strbuf_release(&sb);
  2596. if (err)
  2597. return -1;
  2598. }
  2599. if (!strip_extensions && !drop_cache_tree && istate->cache_tree) {
  2600. struct strbuf sb = STRBUF_INIT;
  2601. cache_tree_write(&sb, istate->cache_tree);
  2602. err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_TREE, sb.len) < 0
  2603. || ce_write(&c, newfd, sb.buf, sb.len) < 0;
  2604. strbuf_release(&sb);
  2605. if (err)
  2606. return -1;
  2607. }
  2608. if (!strip_extensions && istate->resolve_undo) {
  2609. struct strbuf sb = STRBUF_INIT;
  2610. resolve_undo_write(&sb, istate->resolve_undo);
  2611. err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_RESOLVE_UNDO,
  2612. sb.len) < 0
  2613. || ce_write(&c, newfd, sb.buf, sb.len) < 0;
  2614. strbuf_release(&sb);
  2615. if (err)
  2616. return -1;
  2617. }
  2618. if (!strip_extensions && istate->untracked) {
  2619. struct strbuf sb = STRBUF_INIT;
  2620. write_untracked_extension(&sb, istate->untracked);
  2621. err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_UNTRACKED,
  2622. sb.len) < 0 ||
  2623. ce_write(&c, newfd, sb.buf, sb.len) < 0;
  2624. strbuf_release(&sb);
  2625. if (err)
  2626. return -1;
  2627. }
  2628. if (!strip_extensions && istate->fsmonitor_last_update) {
  2629. struct strbuf sb = STRBUF_INIT;
  2630. write_fsmonitor_extension(&sb, istate);
  2631. err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_FSMONITOR, sb.len) < 0
  2632. || ce_write(&c, newfd, sb.buf, sb.len) < 0;
  2633. strbuf_release(&sb);
  2634. if (err)
  2635. return -1;
  2636. }
  2637. /*
  2638. * CACHE_EXT_ENDOFINDEXENTRIES must be written as the last entry before the SHA1
  2639. * so that it can be found and processed before all the index entries are
  2640. * read. Write it out regardless of the strip_extensions parameter as we need it
  2641. * when loading the shared index.
  2642. */
  2643. if (offset && record_eoie()) {
  2644. struct strbuf sb = STRBUF_INIT;
  2645. write_eoie_extension(&sb, &eoie_c, offset);
  2646. err = write_index_ext_header(&c, NULL, newfd, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0
  2647. || ce_write(&c, newfd, sb.buf, sb.len) < 0;
  2648. strbuf_release(&sb);
  2649. if (err)
  2650. return -1;
  2651. }
  2652. if (ce_flush(&c, newfd, istate->oid.hash))
  2653. return -1;
  2654. if (close_tempfile_gently(tempfile)) {
  2655. error(_("could not close '%s'"), tempfile->filename.buf);
  2656. return -1;
  2657. }
  2658. if (stat(tempfile->filename.buf, &st))
  2659. return -1;
  2660. istate->timestamp.sec = (unsigned int)st.st_mtime;
  2661. istate->timestamp.nsec = ST_MTIME_NSEC(st);
  2662. trace_performance_since(start, "write index, changed mask = %x", istate->cache_changed);
  2663. /*
  2664. * TODO trace2: replace "the_repository" with the actual repo instance
  2665. * that is associated with the given "istate".
  2666. */
  2667. trace2_data_intmax("index", the_repository, "write/version",
  2668. istate->version);
  2669. trace2_data_intmax("index", the_repository, "write/cache_nr",
  2670. istate->cache_nr);
  2671. return 0;
  2672. }
  2673. void set_alternate_index_output(const char *name)
  2674. {
  2675. alternate_index_output = name;
  2676. }
  2677. static int commit_locked_index(struct lock_file *lk)
  2678. {
  2679. if (alternate_index_output)
  2680. return commit_lock_file_to(lk, alternate_index_output);
  2681. else
  2682. return commit_lock_file(lk);
  2683. }
  2684. static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
  2685. unsigned flags)
  2686. {
  2687. int ret;
  2688. /*
  2689. * TODO trace2: replace "the_repository" with the actual repo instance
  2690. * that is associated with the given "istate".
  2691. */
  2692. trace2_region_enter_printf("index", "do_write_index", the_repository,
  2693. "%s", lock->tempfile->filename.buf);
  2694. ret = do_write_index(istate, lock->tempfile, 0);
  2695. trace2_region_leave_printf("index", "do_write_index", the_repository,
  2696. "%s", lock->tempfile->filename.buf);
  2697. if (ret)
  2698. return ret;
  2699. if (flags & COMMIT_LOCK)
  2700. ret = commit_locked_index(lock);
  2701. else
  2702. ret = close_lock_file_gently(lock);
  2703. run_hook_le(NULL, "post-index-change",
  2704. istate->updated_workdir ? "1" : "0",
  2705. istate->updated_skipworktree ? "1" : "0", NULL);
  2706. istate->updated_workdir = 0;
  2707. istate->updated_skipworktree = 0;
  2708. return ret;
  2709. }
  2710. static int write_split_index(struct index_state *istate,
  2711. struct lock_file *lock,
  2712. unsigned flags)
  2713. {
  2714. int ret;
  2715. prepare_to_write_split_index(istate);
  2716. ret = do_write_locked_index(istate, lock, flags);
  2717. finish_writing_split_index(istate);
  2718. return ret;
  2719. }
  2720. static const char *shared_index_expire = "2.weeks.ago";
  2721. static unsigned long get_shared_index_expire_date(void)
  2722. {
  2723. static unsigned long shared_index_expire_date;
  2724. static int shared_index_expire_date_prepared;
  2725. if (!shared_index_expire_date_prepared) {
  2726. git_config_get_expiry("splitindex.sharedindexexpire",
  2727. &shared_index_expire);
  2728. shared_index_expire_date = approxidate(shared_index_expire);
  2729. shared_index_expire_date_prepared = 1;
  2730. }
  2731. return shared_index_expire_date;
  2732. }
  2733. static int should_delete_shared_index(const char *shared_index_path)
  2734. {
  2735. struct stat st;
  2736. unsigned long expiration;
  2737. /* Check timestamp */
  2738. expiration = get_shared_index_expire_date();
  2739. if (!expiration)
  2740. return 0;
  2741. if (stat(shared_index_path, &st))
  2742. return error_errno(_("could not stat '%s'"), shared_index_path);
  2743. if (st.st_mtime > expiration)
  2744. return 0;
  2745. return 1;
  2746. }
  2747. static int clean_shared_index_files(const char *current_hex)
  2748. {
  2749. struct dirent *de;
  2750. DIR *dir = opendir(get_git_dir());
  2751. if (!dir)
  2752. return error_errno(_("unable to open git dir: %s"), get_git_dir());
  2753. while ((de = readdir(dir)) != NULL) {
  2754. const char *sha1_hex;
  2755. const char *shared_index_path;
  2756. if (!skip_prefix(de->d_name, "sharedindex.", &sha1_hex))
  2757. continue;
  2758. if (!strcmp(sha1_hex, current_hex))
  2759. continue;
  2760. shared_index_path = git_path("%s", de->d_name);
  2761. if (should_delete_shared_index(shared_index_path) > 0 &&
  2762. unlink(shared_index_path))
  2763. warning_errno(_("unable to unlink: %s"), shared_index_path);
  2764. }
  2765. closedir(dir);
  2766. return 0;
  2767. }
  2768. static int write_shared_index(struct index_state *istate,
  2769. struct tempfile **temp)
  2770. {
  2771. struct split_index *si = istate->split_index;
  2772. int ret;
  2773. move_cache_to_base_index(istate);
  2774. trace2_region_enter_printf("index", "shared/do_write_index",
  2775. the_repository, "%s", (*temp)->filename.buf);
  2776. ret = do_write_index(si->base, *temp, 1);
  2777. trace2_region_leave_printf("index", "shared/do_write_index",
  2778. the_repository, "%s", (*temp)->filename.buf);
  2779. if (ret)
  2780. return ret;
  2781. ret = adjust_shared_perm(get_tempfile_path(*temp));
  2782. if (ret) {
  2783. error(_("cannot fix permission bits on '%s'"), get_tempfile_path(*temp));
  2784. return ret;
  2785. }
  2786. ret = rename_tempfile(temp,
  2787. git_path("sharedindex.%s", oid_to_hex(&si->base->oid)));
  2788. if (!ret) {
  2789. oidcpy(&si->base_oid, &si->base->oid);
  2790. clean_shared_index_files(oid_to_hex(&si->base->oid));
  2791. }
  2792. return ret;
  2793. }
  2794. static const int default_max_percent_split_change = 20;
  2795. static int too_many_not_shared_entries(struct index_state *istate)
  2796. {
  2797. int i, not_shared = 0;
  2798. int max_split = git_config_get_max_percent_split_change();
  2799. switch (max_split) {
  2800. case -1:
  2801. /* not or badly configured: use the default value */
  2802. max_split = default_max_percent_split_change;
  2803. break;
  2804. case 0:
  2805. return 1; /* 0% means always write a new shared index */
  2806. case 100:
  2807. return 0; /* 100% means never write a new shared index */
  2808. default:
  2809. break; /* just use the configured value */
  2810. }
  2811. /* Count not shared entries */
  2812. for (i = 0; i < istate->cache_nr; i++) {
  2813. struct cache_entry *ce = istate->cache[i];
  2814. if (!ce->index)
  2815. not_shared++;
  2816. }
  2817. return (int64_t)istate->cache_nr * max_split < (int64_t)not_shared * 100;
  2818. }
  2819. int write_locked_index(struct index_state *istate, struct lock_file *lock,
  2820. unsigned flags)
  2821. {
  2822. int new_shared_index, ret;
  2823. struct split_index *si = istate->split_index;
  2824. if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
  2825. cache_tree_verify(the_repository, istate);
  2826. if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) {
  2827. if (flags & COMMIT_LOCK)
  2828. rollback_lock_file(lock);
  2829. return 0;
  2830. }
  2831. if (istate->fsmonitor_last_update)
  2832. fill_fsmonitor_bitmap(istate);
  2833. if (!si || alternate_index_output ||
  2834. (istate->cache_changed & ~EXTMASK)) {
  2835. if (si)
  2836. oidclr(&si->base_oid);
  2837. ret = do_write_locked_index(istate, lock, flags);
  2838. goto out;
  2839. }
  2840. if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0)) {
  2841. int v = si->base_oid.hash[0];
  2842. if ((v & 15) < 6)
  2843. istate->cache_changed |= SPLIT_INDEX_ORDERED;
  2844. }
  2845. if (too_many_not_shared_entries(istate))
  2846. istate->cache_changed |= SPLIT_INDEX_ORDERED;
  2847. new_shared_index = istate->cache_changed & SPLIT_INDEX_ORDERED;
  2848. if (new_shared_index) {
  2849. struct tempfile *temp;
  2850. int saved_errno;
  2851. /* Same initial permissions as the main .git/index file */
  2852. temp = mks_tempfile_sm(git_path("sharedindex_XXXXXX"), 0, 0666);
  2853. if (!temp) {
  2854. oidclr(&si->base_oid);
  2855. ret = do_write_locked_index(istate, lock, flags);
  2856. goto out;
  2857. }
  2858. ret = write_shared_index(istate, &temp);
  2859. saved_errno = errno;
  2860. if (is_tempfile_active(temp))
  2861. delete_tempfile(&temp);
  2862. errno = saved_errno;
  2863. if (ret)
  2864. goto out;
  2865. }
  2866. ret = write_split_index(istate, lock, flags);
  2867. /* Freshen the shared index only if the split-index was written */
  2868. if (!ret && !new_shared_index && !is_null_oid(&si->base_oid)) {
  2869. const char *shared_index = git_path("sharedindex.%s",
  2870. oid_to_hex(&si->base_oid));
  2871. freshen_shared_index(shared_index, 1);
  2872. }
  2873. out:
  2874. if (flags & COMMIT_LOCK)
  2875. rollback_lock_file(lock);
  2876. return ret;
  2877. }
  2878. /*
  2879. * Read the index file that is potentially unmerged into given
  2880. * index_state, dropping any unmerged entries to stage #0 (potentially
  2881. * resulting in a path appearing as both a file and a directory in the
  2882. * index; the caller is responsible to clear out the extra entries
  2883. * before writing the index to a tree). Returns true if the index is
  2884. * unmerged. Callers who want to refuse to work from an unmerged
  2885. * state can call this and check its return value, instead of calling
  2886. * read_cache().
  2887. */
  2888. int repo_read_index_unmerged(struct repository *repo)
  2889. {
  2890. struct index_state *istate;
  2891. int i;
  2892. int unmerged = 0;
  2893. repo_read_index(repo);
  2894. istate = repo->index;
  2895. for (i = 0; i < istate->cache_nr; i++) {
  2896. struct cache_entry *ce = istate->cache[i];
  2897. struct cache_entry *new_ce;
  2898. int len;
  2899. if (!ce_stage(ce))
  2900. continue;
  2901. unmerged = 1;
  2902. len = ce_namelen(ce);
  2903. new_ce = make_empty_cache_entry(istate, len);
  2904. memcpy(new_ce->name, ce->name, len);
  2905. new_ce->ce_flags = create_ce_flags(0) | CE_CONFLICTED;
  2906. new_ce->ce_namelen = len;
  2907. new_ce->ce_mode = ce->ce_mode;
  2908. if (add_index_entry(istate, new_ce, ADD_CACHE_SKIP_DFCHECK))
  2909. return error(_("%s: cannot drop to stage #0"),
  2910. new_ce->name);
  2911. }
  2912. return unmerged;
  2913. }
  2914. /*
  2915. * Returns 1 if the path is an "other" path with respect to
  2916. * the index; that is, the path is not mentioned in the index at all,
  2917. * either as a file, a directory with some files in the index,
  2918. * or as an unmerged entry.
  2919. *
  2920. * We helpfully remove a trailing "/" from directories so that
  2921. * the output of read_directory can be used as-is.
  2922. */
  2923. int index_name_is_other(const struct index_state *istate, const char *name,
  2924. int namelen)
  2925. {
  2926. int pos;
  2927. if (namelen && name[namelen - 1] == '/')
  2928. namelen--;
  2929. pos = index_name_pos(istate, name, namelen);
  2930. if (0 <= pos)
  2931. return 0; /* exact match */
  2932. pos = -pos - 1;
  2933. if (pos < istate->cache_nr) {
  2934. struct cache_entry *ce = istate->cache[pos];
  2935. if (ce_namelen(ce) == namelen &&
  2936. !memcmp(ce->name, name, namelen))
  2937. return 0; /* Yup, this one exists unmerged */
  2938. }
  2939. return 1;
  2940. }
  2941. void *read_blob_data_from_index(const struct index_state *istate,
  2942. const char *path, unsigned long *size)
  2943. {
  2944. int pos, len;
  2945. unsigned long sz;
  2946. enum object_type type;
  2947. void *data;
  2948. len = strlen(path);
  2949. pos = index_name_pos(istate, path, len);
  2950. if (pos < 0) {
  2951. /*
  2952. * We might be in the middle of a merge, in which
  2953. * case we would read stage #2 (ours).
  2954. */
  2955. int i;
  2956. for (i = -pos - 1;
  2957. (pos < 0 && i < istate->cache_nr &&
  2958. !strcmp(istate->cache[i]->name, path));
  2959. i++)
  2960. if (ce_stage(istate->cache[i]) == 2)
  2961. pos = i;
  2962. }
  2963. if (pos < 0)
  2964. return NULL;
  2965. data = read_object_file(&istate->cache[pos]->oid, &type, &sz);
  2966. if (!data || type != OBJ_BLOB) {
  2967. free(data);
  2968. return NULL;
  2969. }
  2970. if (size)
  2971. *size = sz;
  2972. return data;
  2973. }
  2974. void stat_validity_clear(struct stat_validity *sv)
  2975. {
  2976. FREE_AND_NULL(sv->sd);
  2977. }
  2978. int stat_validity_check(struct stat_validity *sv, const char *path)
  2979. {
  2980. struct stat st;
  2981. if (stat(path, &st) < 0)
  2982. return sv->sd == NULL;
  2983. if (!sv->sd)
  2984. return 0;
  2985. return S_ISREG(st.st_mode) && !match_stat_data(sv->sd, &st);
  2986. }
  2987. void stat_validity_update(struct stat_validity *sv, int fd)
  2988. {
  2989. struct stat st;
  2990. if (fstat(fd, &st) < 0 || !S_ISREG(st.st_mode))
  2991. stat_validity_clear(sv);
  2992. else {
  2993. if (!sv->sd)
  2994. sv->sd = xcalloc(1, sizeof(struct stat_data));
  2995. fill_stat_data(sv->sd, &st);
  2996. }
  2997. }
  2998. void move_index_extensions(struct index_state *dst, struct index_state *src)
  2999. {
  3000. dst->untracked = src->untracked;
  3001. src->untracked = NULL;
  3002. dst->cache_tree = src->cache_tree;
  3003. src->cache_tree = NULL;
  3004. }
  3005. struct cache_entry *dup_cache_entry(const struct cache_entry *ce,
  3006. struct index_state *istate)
  3007. {
  3008. unsigned int size = ce_size(ce);
  3009. int mem_pool_allocated;
  3010. struct cache_entry *new_entry = make_empty_cache_entry(istate, ce_namelen(ce));
  3011. mem_pool_allocated = new_entry->mem_pool_allocated;
  3012. memcpy(new_entry, ce, size);
  3013. new_entry->mem_pool_allocated = mem_pool_allocated;
  3014. return new_entry;
  3015. }
  3016. void discard_cache_entry(struct cache_entry *ce)
  3017. {
  3018. if (ce && should_validate_cache_entries())
  3019. memset(ce, 0xCD, cache_entry_size(ce->ce_namelen));
  3020. if (ce && ce->mem_pool_allocated)
  3021. return;
  3022. free(ce);
  3023. }
  3024. int should_validate_cache_entries(void)
  3025. {
  3026. static int validate_index_cache_entries = -1;
  3027. if (validate_index_cache_entries < 0) {
  3028. if (getenv("GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES"))
  3029. validate_index_cache_entries = 1;
  3030. else
  3031. validate_index_cache_entries = 0;
  3032. }
  3033. return validate_index_cache_entries;
  3034. }
  3035. #define EOIE_SIZE (4 + GIT_SHA1_RAWSZ) /* <4-byte offset> + <20-byte hash> */
  3036. #define EOIE_SIZE_WITH_HEADER (4 + 4 + EOIE_SIZE) /* <4-byte signature> + <4-byte length> + EOIE_SIZE */
  3037. static size_t read_eoie_extension(const char *mmap, size_t mmap_size)
  3038. {
  3039. /*
  3040. * The end of index entries (EOIE) extension is guaranteed to be last
  3041. * so that it can be found by scanning backwards from the EOF.
  3042. *
  3043. * "EOIE"
  3044. * <4-byte length>
  3045. * <4-byte offset>
  3046. * <20-byte hash>
  3047. */
  3048. const char *index, *eoie;
  3049. uint32_t extsize;
  3050. size_t offset, src_offset;
  3051. unsigned char hash[GIT_MAX_RAWSZ];
  3052. git_hash_ctx c;
  3053. /* ensure we have an index big enough to contain an EOIE extension */
  3054. if (mmap_size < sizeof(struct cache_header) + EOIE_SIZE_WITH_HEADER + the_hash_algo->rawsz)
  3055. return 0;
  3056. /* validate the extension signature */
  3057. index = eoie = mmap + mmap_size - EOIE_SIZE_WITH_HEADER - the_hash_algo->rawsz;
  3058. if (CACHE_EXT(index) != CACHE_EXT_ENDOFINDEXENTRIES)
  3059. return 0;
  3060. index += sizeof(uint32_t);
  3061. /* validate the extension size */
  3062. extsize = get_be32(index);
  3063. if (extsize != EOIE_SIZE)
  3064. return 0;
  3065. index += sizeof(uint32_t);
  3066. /*
  3067. * Validate the offset we're going to look for the first extension
  3068. * signature is after the index header and before the eoie extension.
  3069. */
  3070. offset = get_be32(index);
  3071. if (mmap + offset < mmap + sizeof(struct cache_header))
  3072. return 0;
  3073. if (mmap + offset >= eoie)
  3074. return 0;
  3075. index += sizeof(uint32_t);
  3076. /*
  3077. * The hash is computed over extension types and their sizes (but not
  3078. * their contents). E.g. if we have "TREE" extension that is N-bytes
  3079. * long, "REUC" extension that is M-bytes long, followed by "EOIE",
  3080. * then the hash would be:
  3081. *
  3082. * SHA-1("TREE" + <binary representation of N> +
  3083. * "REUC" + <binary representation of M>)
  3084. */
  3085. src_offset = offset;
  3086. the_hash_algo->init_fn(&c);
  3087. while (src_offset < mmap_size - the_hash_algo->rawsz - EOIE_SIZE_WITH_HEADER) {
  3088. /* After an array of active_nr index entries,
  3089. * there can be arbitrary number of extended
  3090. * sections, each of which is prefixed with
  3091. * extension name (4-byte) and section length
  3092. * in 4-byte network byte order.
  3093. */
  3094. uint32_t extsize;
  3095. memcpy(&extsize, mmap + src_offset + 4, 4);
  3096. extsize = ntohl(extsize);
  3097. /* verify the extension size isn't so large it will wrap around */
  3098. if (src_offset + 8 + extsize < src_offset)
  3099. return 0;
  3100. the_hash_algo->update_fn(&c, mmap + src_offset, 8);
  3101. src_offset += 8;
  3102. src_offset += extsize;
  3103. }
  3104. the_hash_algo->final_fn(hash, &c);
  3105. if (!hasheq(hash, (const unsigned char *)index))
  3106. return 0;
  3107. /* Validate that the extension offsets returned us back to the eoie extension. */
  3108. if (src_offset != mmap_size - the_hash_algo->rawsz - EOIE_SIZE_WITH_HEADER)
  3109. return 0;
  3110. return offset;
  3111. }
  3112. static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset)
  3113. {
  3114. uint32_t buffer;
  3115. unsigned char hash[GIT_MAX_RAWSZ];
  3116. /* offset */
  3117. put_be32(&buffer, offset);
  3118. strbuf_add(sb, &buffer, sizeof(uint32_t));
  3119. /* hash */
  3120. the_hash_algo->final_fn(hash, eoie_context);
  3121. strbuf_add(sb, hash, the_hash_algo->rawsz);
  3122. }
  3123. #define IEOT_VERSION (1)
  3124. static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset)
  3125. {
  3126. const char *index = NULL;
  3127. uint32_t extsize, ext_version;
  3128. struct index_entry_offset_table *ieot;
  3129. int i, nr;
  3130. /* find the IEOT extension */
  3131. if (!offset)
  3132. return NULL;
  3133. while (offset <= mmap_size - the_hash_algo->rawsz - 8) {
  3134. extsize = get_be32(mmap + offset + 4);
  3135. if (CACHE_EXT((mmap + offset)) == CACHE_EXT_INDEXENTRYOFFSETTABLE) {
  3136. index = mmap + offset + 4 + 4;
  3137. break;
  3138. }
  3139. offset += 8;
  3140. offset += extsize;
  3141. }
  3142. if (!index)
  3143. return NULL;
  3144. /* validate the version is IEOT_VERSION */
  3145. ext_version = get_be32(index);
  3146. if (ext_version != IEOT_VERSION) {
  3147. error("invalid IEOT version %d", ext_version);
  3148. return NULL;
  3149. }
  3150. index += sizeof(uint32_t);
  3151. /* extension size - version bytes / bytes per entry */
  3152. nr = (extsize - sizeof(uint32_t)) / (sizeof(uint32_t) + sizeof(uint32_t));
  3153. if (!nr) {
  3154. error("invalid number of IEOT entries %d", nr);
  3155. return NULL;
  3156. }
  3157. ieot = xmalloc(sizeof(struct index_entry_offset_table)
  3158. + (nr * sizeof(struct index_entry_offset)));
  3159. ieot->nr = nr;
  3160. for (i = 0; i < nr; i++) {
  3161. ieot->entries[i].offset = get_be32(index);
  3162. index += sizeof(uint32_t);
  3163. ieot->entries[i].nr = get_be32(index);
  3164. index += sizeof(uint32_t);
  3165. }
  3166. return ieot;
  3167. }
  3168. static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot)
  3169. {
  3170. uint32_t buffer;
  3171. int i;
  3172. /* version */
  3173. put_be32(&buffer, IEOT_VERSION);
  3174. strbuf_add(sb, &buffer, sizeof(uint32_t));
  3175. /* ieot */
  3176. for (i = 0; i < ieot->nr; i++) {
  3177. /* offset */
  3178. put_be32(&buffer, ieot->entries[i].offset);
  3179. strbuf_add(sb, &buffer, sizeof(uint32_t));
  3180. /* count */
  3181. put_be32(&buffer, ieot->entries[i].nr);
  3182. strbuf_add(sb, &buffer, sizeof(uint32_t));
  3183. }
  3184. }