THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
git
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

281 lines
7.2KB

  1. /*
  2. * Copyright (c) 2011, Google Inc.
  3. */
  4. #include "cache.h"
  5. #include "bulk-checkin.h"
  6. #include "repository.h"
  7. #include "csum-file.h"
  8. #include "pack.h"
  9. #include "strbuf.h"
  10. #include "packfile.h"
  11. static struct bulk_checkin_state {
  12. unsigned plugged:1;
  13. char *pack_tmp_name;
  14. struct hashfile *f;
  15. off_t offset;
  16. struct pack_idx_option pack_idx_opts;
  17. struct pack_idx_entry **written;
  18. uint32_t alloc_written;
  19. uint32_t nr_written;
  20. } state;
  21. static void finish_bulk_checkin(struct bulk_checkin_state *state)
  22. {
  23. struct object_id oid;
  24. struct strbuf packname = STRBUF_INIT;
  25. int i;
  26. if (!state->f)
  27. return;
  28. if (state->nr_written == 0) {
  29. close(state->f->fd);
  30. unlink(state->pack_tmp_name);
  31. goto clear_exit;
  32. } else if (state->nr_written == 1) {
  33. hashclose(state->f, oid.hash, CSUM_FSYNC);
  34. } else {
  35. int fd = hashclose(state->f, oid.hash, 0);
  36. fixup_pack_header_footer(fd, oid.hash, state->pack_tmp_name,
  37. state->nr_written, oid.hash,
  38. state->offset);
  39. close(fd);
  40. }
  41. strbuf_addf(&packname, "%s/pack/pack-", get_object_directory());
  42. finish_tmp_packfile(&packname, state->pack_tmp_name,
  43. state->written, state->nr_written,
  44. &state->pack_idx_opts, oid.hash);
  45. for (i = 0; i < state->nr_written; i++)
  46. free(state->written[i]);
  47. clear_exit:
  48. free(state->written);
  49. memset(state, 0, sizeof(*state));
  50. strbuf_release(&packname);
  51. /* Make objects we just wrote available to ourselves */
  52. reprepare_packed_git(the_repository);
  53. }
  54. static int already_written(struct bulk_checkin_state *state, struct object_id *oid)
  55. {
  56. int i;
  57. /* The object may already exist in the repository */
  58. if (has_sha1_file(oid->hash))
  59. return 1;
  60. /* Might want to keep the list sorted */
  61. for (i = 0; i < state->nr_written; i++)
  62. if (!oidcmp(&state->written[i]->oid, oid))
  63. return 1;
  64. /* This is a new object we need to keep */
  65. return 0;
  66. }
  67. /*
  68. * Read the contents from fd for size bytes, streaming it to the
  69. * packfile in state while updating the hash in ctx. Signal a failure
  70. * by returning a negative value when the resulting pack would exceed
  71. * the pack size limit and this is not the first object in the pack,
  72. * so that the caller can discard what we wrote from the current pack
  73. * by truncating it and opening a new one. The caller will then call
  74. * us again after rewinding the input fd.
  75. *
  76. * The already_hashed_to pointer is kept untouched by the caller to
  77. * make sure we do not hash the same byte when we are called
  78. * again. This way, the caller does not have to checkpoint its hash
  79. * status before calling us just in case we ask it to call us again
  80. * with a new pack.
  81. */
  82. static int stream_to_pack(struct bulk_checkin_state *state,
  83. git_hash_ctx *ctx, off_t *already_hashed_to,
  84. int fd, size_t size, enum object_type type,
  85. const char *path, unsigned flags)
  86. {
  87. git_zstream s;
  88. unsigned char obuf[16384];
  89. unsigned hdrlen;
  90. int status = Z_OK;
  91. int write_object = (flags & HASH_WRITE_OBJECT);
  92. off_t offset = 0;
  93. git_deflate_init(&s, pack_compression_level);
  94. hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), type, size);
  95. s.next_out = obuf + hdrlen;
  96. s.avail_out = sizeof(obuf) - hdrlen;
  97. while (status != Z_STREAM_END) {
  98. unsigned char ibuf[16384];
  99. if (size && !s.avail_in) {
  100. ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
  101. ssize_t read_result = read_in_full(fd, ibuf, rsize);
  102. if (read_result < 0)
  103. die_errno("failed to read from '%s'", path);
  104. if (read_result != rsize)
  105. die("failed to read %d bytes from '%s'",
  106. (int)rsize, path);
  107. offset += rsize;
  108. if (*already_hashed_to < offset) {
  109. size_t hsize = offset - *already_hashed_to;
  110. if (rsize < hsize)
  111. hsize = rsize;
  112. if (hsize)
  113. the_hash_algo->update_fn(ctx, ibuf, hsize);
  114. *already_hashed_to = offset;
  115. }
  116. s.next_in = ibuf;
  117. s.avail_in = rsize;
  118. size -= rsize;
  119. }
  120. status = git_deflate(&s, size ? 0 : Z_FINISH);
  121. if (!s.avail_out || status == Z_STREAM_END) {
  122. if (write_object) {
  123. size_t written = s.next_out - obuf;
  124. /* would we bust the size limit? */
  125. if (state->nr_written &&
  126. pack_size_limit_cfg &&
  127. pack_size_limit_cfg < state->offset + written) {
  128. git_deflate_abort(&s);
  129. return -1;
  130. }
  131. hashwrite(state->f, obuf, written);
  132. state->offset += written;
  133. }
  134. s.next_out = obuf;
  135. s.avail_out = sizeof(obuf);
  136. }
  137. switch (status) {
  138. case Z_OK:
  139. case Z_BUF_ERROR:
  140. case Z_STREAM_END:
  141. continue;
  142. default:
  143. die("unexpected deflate failure: %d", status);
  144. }
  145. }
  146. git_deflate_end(&s);
  147. return 0;
  148. }
  149. /* Lazily create backing packfile for the state */
  150. static void prepare_to_stream(struct bulk_checkin_state *state,
  151. unsigned flags)
  152. {
  153. if (!(flags & HASH_WRITE_OBJECT) || state->f)
  154. return;
  155. state->f = create_tmp_packfile(&state->pack_tmp_name);
  156. reset_pack_idx_option(&state->pack_idx_opts);
  157. /* Pretend we are going to write only one object */
  158. state->offset = write_pack_header(state->f, 1);
  159. if (!state->offset)
  160. die_errno("unable to write pack header");
  161. }
  162. static int deflate_to_pack(struct bulk_checkin_state *state,
  163. struct object_id *result_oid,
  164. int fd, size_t size,
  165. enum object_type type, const char *path,
  166. unsigned flags)
  167. {
  168. off_t seekback, already_hashed_to;
  169. git_hash_ctx ctx;
  170. unsigned char obuf[16384];
  171. unsigned header_len;
  172. struct hashfile_checkpoint checkpoint;
  173. struct pack_idx_entry *idx = NULL;
  174. seekback = lseek(fd, 0, SEEK_CUR);
  175. if (seekback == (off_t) -1)
  176. return error("cannot find the current offset");
  177. header_len = xsnprintf((char *)obuf, sizeof(obuf), "%s %" PRIuMAX,
  178. type_name(type), (uintmax_t)size) + 1;
  179. the_hash_algo->init_fn(&ctx);
  180. the_hash_algo->update_fn(&ctx, obuf, header_len);
  181. /* Note: idx is non-NULL when we are writing */
  182. if ((flags & HASH_WRITE_OBJECT) != 0)
  183. idx = xcalloc(1, sizeof(*idx));
  184. already_hashed_to = 0;
  185. while (1) {
  186. prepare_to_stream(state, flags);
  187. if (idx) {
  188. hashfile_checkpoint(state->f, &checkpoint);
  189. idx->offset = state->offset;
  190. crc32_begin(state->f);
  191. }
  192. if (!stream_to_pack(state, &ctx, &already_hashed_to,
  193. fd, size, type, path, flags))
  194. break;
  195. /*
  196. * Writing this object to the current pack will make
  197. * it too big; we need to truncate it, start a new
  198. * pack, and write into it.
  199. */
  200. if (!idx)
  201. BUG("should not happen");
  202. hashfile_truncate(state->f, &checkpoint);
  203. state->offset = checkpoint.offset;
  204. finish_bulk_checkin(state);
  205. if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
  206. return error("cannot seek back");
  207. }
  208. the_hash_algo->final_fn(result_oid->hash, &ctx);
  209. if (!idx)
  210. return 0;
  211. idx->crc32 = crc32_end(state->f);
  212. if (already_written(state, result_oid)) {
  213. hashfile_truncate(state->f, &checkpoint);
  214. state->offset = checkpoint.offset;
  215. free(idx);
  216. } else {
  217. oidcpy(&idx->oid, result_oid);
  218. ALLOC_GROW(state->written,
  219. state->nr_written + 1,
  220. state->alloc_written);
  221. state->written[state->nr_written++] = idx;
  222. }
  223. return 0;
  224. }
  225. int index_bulk_checkin(struct object_id *oid,
  226. int fd, size_t size, enum object_type type,
  227. const char *path, unsigned flags)
  228. {
  229. int status = deflate_to_pack(&state, oid, fd, size, type,
  230. path, flags);
  231. if (!state.plugged)
  232. finish_bulk_checkin(&state);
  233. return status;
  234. }
  235. void plug_bulk_checkin(void)
  236. {
  237. state.plugged = 1;
  238. }
  239. void unplug_bulk_checkin(void)
  240. {
  241. state.plugged = 0;
  242. if (state.f)
  243. finish_bulk_checkin(&state);
  244. }