git-bisect-lk2009: update nist report link
[git/debian.git] / builtin / unpack-objects.c
blob43789b8ef294d8aa3fc9517f65712ac90e1fa60c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "bulk-checkin.h"
4 #include "config.h"
5 #include "object-store.h"
6 #include "object.h"
7 #include "delta.h"
8 #include "pack.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree.h"
13 #include "tree-walk.h"
14 #include "progress.h"
15 #include "decorate.h"
16 #include "fsck.h"
18 static int dry_run, quiet, recover, has_errors, strict;
19 static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]";
21 /* We always read in 4kB chunks. */
22 static unsigned char buffer[4096];
23 static unsigned int offset, len;
24 static off_t consumed_bytes;
25 static off_t max_input_size;
26 static git_hash_ctx ctx;
27 static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
28 static struct progress *progress;
31 * When running under --strict mode, objects whose reachability are
32 * suspect are kept in core without getting written in the object
33 * store.
35 struct obj_buffer {
36 char *buffer;
37 unsigned long size;
40 static struct decoration obj_decorate;
42 static struct obj_buffer *lookup_object_buffer(struct object *base)
44 return lookup_decoration(&obj_decorate, base);
47 static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
49 struct obj_buffer *obj;
50 CALLOC_ARRAY(obj, 1);
51 obj->buffer = buffer;
52 obj->size = size;
53 if (add_decoration(&obj_decorate, object, obj))
54 die("object %s tried to add buffer twice!", oid_to_hex(&object->oid));
58 * Make sure at least "min" bytes are available in the buffer, and
59 * return the pointer to the buffer.
61 static void *fill(int min)
63 if (min <= len)
64 return buffer + offset;
65 if (min > sizeof(buffer))
66 die("cannot fill %d bytes", min);
67 if (offset) {
68 the_hash_algo->update_fn(&ctx, buffer, offset);
69 memmove(buffer, buffer + offset, len);
70 offset = 0;
72 do {
73 ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
74 if (ret <= 0) {
75 if (!ret)
76 die("early EOF");
77 die_errno("read error on input");
79 len += ret;
80 } while (len < min);
81 return buffer;
84 static void use(int bytes)
86 if (bytes > len)
87 die("used more bytes than were available");
88 len -= bytes;
89 offset += bytes;
91 /* make sure off_t is sufficiently large not to wrap */
92 if (signed_add_overflows(consumed_bytes, bytes))
93 die("pack too large for current definition of off_t");
94 consumed_bytes += bytes;
95 if (max_input_size && consumed_bytes > max_input_size)
96 die(_("pack exceeds maximum allowed size"));
97 display_throughput(progress, consumed_bytes);
101 * Decompress zstream from the standard input into a newly
102 * allocated buffer of specified size and return the buffer.
103 * The caller is responsible to free the returned buffer.
105 * But for dry_run mode, "get_data()" is only used to check the
106 * integrity of data, and the returned buffer is not used at all.
107 * Therefore, in dry_run mode, "get_data()" will release the small
108 * allocated buffer which is reused to hold temporary zstream output
109 * and return NULL instead of returning garbage data.
111 static void *get_data(unsigned long size)
113 git_zstream stream;
114 unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
115 void *buf = xmallocz(bufsize);
117 memset(&stream, 0, sizeof(stream));
119 stream.next_out = buf;
120 stream.avail_out = bufsize;
121 stream.next_in = fill(1);
122 stream.avail_in = len;
123 git_inflate_init(&stream);
125 for (;;) {
126 int ret = git_inflate(&stream, 0);
127 use(len - stream.avail_in);
128 if (stream.total_out == size && ret == Z_STREAM_END)
129 break;
130 if (ret != Z_OK) {
131 error("inflate returned %d", ret);
132 FREE_AND_NULL(buf);
133 if (!recover)
134 exit(1);
135 has_errors = 1;
136 break;
138 stream.next_in = fill(1);
139 stream.avail_in = len;
140 if (dry_run) {
141 /* reuse the buffer in dry_run mode */
142 stream.next_out = buf;
143 stream.avail_out = bufsize > size - stream.total_out ?
144 size - stream.total_out :
145 bufsize;
148 git_inflate_end(&stream);
149 if (dry_run)
150 FREE_AND_NULL(buf);
151 return buf;
154 struct delta_info {
155 struct object_id base_oid;
156 unsigned nr;
157 off_t base_offset;
158 unsigned long size;
159 void *delta;
160 struct delta_info *next;
163 static struct delta_info *delta_list;
165 static void add_delta_to_list(unsigned nr, const struct object_id *base_oid,
166 off_t base_offset,
167 void *delta, unsigned long size)
169 struct delta_info *info = xmalloc(sizeof(*info));
171 oidcpy(&info->base_oid, base_oid);
172 info->base_offset = base_offset;
173 info->size = size;
174 info->delta = delta;
175 info->nr = nr;
176 info->next = delta_list;
177 delta_list = info;
180 struct obj_info {
181 off_t offset;
182 struct object_id oid;
183 struct object *obj;
186 /* Remember to update object flag allocation in object.h */
187 #define FLAG_OPEN (1u<<20)
188 #define FLAG_WRITTEN (1u<<21)
190 static struct obj_info *obj_list;
191 static unsigned nr_objects;
194 * Called only from check_object() after it verified this object
195 * is Ok.
197 static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf)
199 struct object_id oid;
201 if (write_object_file(obj_buf->buffer, obj_buf->size,
202 obj->type, &oid) < 0)
203 die("failed to write object %s", oid_to_hex(&obj->oid));
204 obj->flags |= FLAG_WRITTEN;
208 * At the very end of the processing, write_rest() scans the objects
209 * that have reachability requirements and calls this function.
210 * Verify its reachability and validity recursively and write it out.
212 static int check_object(struct object *obj, enum object_type type,
213 void *data, struct fsck_options *options)
215 struct obj_buffer *obj_buf;
217 if (!obj)
218 return 1;
220 if (obj->flags & FLAG_WRITTEN)
221 return 0;
223 if (type != OBJ_ANY && obj->type != type)
224 die("object type mismatch");
226 if (!(obj->flags & FLAG_OPEN)) {
227 unsigned long size;
228 int type = oid_object_info(the_repository, &obj->oid, &size);
229 if (type != obj->type || type <= 0)
230 die("object of unexpected type");
231 obj->flags |= FLAG_WRITTEN;
232 return 0;
235 obj_buf = lookup_object_buffer(obj);
236 if (!obj_buf)
237 die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
238 if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
239 die("fsck error in packed object");
240 fsck_options.walk = check_object;
241 if (fsck_walk(obj, NULL, &fsck_options))
242 die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
243 write_cached_object(obj, obj_buf);
244 return 0;
247 static void write_rest(void)
249 unsigned i;
250 for (i = 0; i < nr_objects; i++) {
251 if (obj_list[i].obj)
252 check_object(obj_list[i].obj, OBJ_ANY, NULL, NULL);
256 static void added_object(unsigned nr, enum object_type type,
257 void *data, unsigned long size);
260 * Write out nr-th object from the list, now we know the contents
261 * of it. Under --strict, this buffers structured objects in-core,
262 * to be checked at the end.
264 static void write_object(unsigned nr, enum object_type type,
265 void *buf, unsigned long size)
267 if (!strict) {
268 if (write_object_file(buf, size, type,
269 &obj_list[nr].oid) < 0)
270 die("failed to write object");
271 added_object(nr, type, buf, size);
272 free(buf);
273 obj_list[nr].obj = NULL;
274 } else if (type == OBJ_BLOB) {
275 struct blob *blob;
276 if (write_object_file(buf, size, type,
277 &obj_list[nr].oid) < 0)
278 die("failed to write object");
279 added_object(nr, type, buf, size);
280 free(buf);
282 blob = lookup_blob(the_repository, &obj_list[nr].oid);
283 if (blob)
284 blob->object.flags |= FLAG_WRITTEN;
285 else
286 die("invalid blob object");
287 obj_list[nr].obj = NULL;
288 } else {
289 struct object *obj;
290 int eaten;
291 hash_object_file(the_hash_algo, buf, size, type,
292 &obj_list[nr].oid);
293 added_object(nr, type, buf, size);
294 obj = parse_object_buffer(the_repository, &obj_list[nr].oid,
295 type, size, buf,
296 &eaten);
297 if (!obj)
298 die("invalid %s", type_name(type));
299 add_object_buffer(obj, buf, size);
300 obj->flags |= FLAG_OPEN;
301 obj_list[nr].obj = obj;
305 static void resolve_delta(unsigned nr, enum object_type type,
306 void *base, unsigned long base_size,
307 void *delta, unsigned long delta_size)
309 void *result;
310 unsigned long result_size;
312 result = patch_delta(base, base_size,
313 delta, delta_size,
314 &result_size);
315 if (!result)
316 die("failed to apply delta");
317 free(delta);
318 write_object(nr, type, result, result_size);
322 * We now know the contents of an object (which is nr-th in the pack);
323 * resolve all the deltified objects that are based on it.
325 static void added_object(unsigned nr, enum object_type type,
326 void *data, unsigned long size)
328 struct delta_info **p = &delta_list;
329 struct delta_info *info;
331 while ((info = *p) != NULL) {
332 if (oideq(&info->base_oid, &obj_list[nr].oid) ||
333 info->base_offset == obj_list[nr].offset) {
334 *p = info->next;
335 p = &delta_list;
336 resolve_delta(info->nr, type, data, size,
337 info->delta, info->size);
338 free(info);
339 continue;
341 p = &info->next;
345 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
346 unsigned nr)
348 void *buf = get_data(size);
350 if (buf)
351 write_object(nr, type, buf, size);
354 struct input_zstream_data {
355 git_zstream *zstream;
356 unsigned char buf[8192];
357 int status;
360 static const void *feed_input_zstream(struct input_stream *in_stream,
361 unsigned long *readlen)
363 struct input_zstream_data *data = in_stream->data;
364 git_zstream *zstream = data->zstream;
365 void *in = fill(1);
367 if (in_stream->is_finished) {
368 *readlen = 0;
369 return NULL;
372 zstream->next_out = data->buf;
373 zstream->avail_out = sizeof(data->buf);
374 zstream->next_in = in;
375 zstream->avail_in = len;
377 data->status = git_inflate(zstream, 0);
379 in_stream->is_finished = data->status != Z_OK;
380 use(len - zstream->avail_in);
381 *readlen = sizeof(data->buf) - zstream->avail_out;
383 return data->buf;
386 static void stream_blob(unsigned long size, unsigned nr)
388 git_zstream zstream = { 0 };
389 struct input_zstream_data data = { 0 };
390 struct input_stream in_stream = {
391 .read = feed_input_zstream,
392 .data = &data,
394 struct obj_info *info = &obj_list[nr];
396 data.zstream = &zstream;
397 git_inflate_init(&zstream);
399 if (stream_loose_object(&in_stream, size, &info->oid))
400 die(_("failed to write object in stream"));
402 if (data.status != Z_STREAM_END)
403 die(_("inflate returned (%d)"), data.status);
404 git_inflate_end(&zstream);
406 if (strict) {
407 struct blob *blob = lookup_blob(the_repository, &info->oid);
409 if (!blob)
410 die(_("invalid blob object from stream"));
411 blob->object.flags |= FLAG_WRITTEN;
413 info->obj = NULL;
416 static int resolve_against_held(unsigned nr, const struct object_id *base,
417 void *delta_data, unsigned long delta_size)
419 struct object *obj;
420 struct obj_buffer *obj_buffer;
421 obj = lookup_object(the_repository, base);
422 if (!obj)
423 return 0;
424 obj_buffer = lookup_object_buffer(obj);
425 if (!obj_buffer)
426 return 0;
427 resolve_delta(nr, obj->type, obj_buffer->buffer,
428 obj_buffer->size, delta_data, delta_size);
429 return 1;
432 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
433 unsigned nr)
435 void *delta_data, *base;
436 unsigned long base_size;
437 struct object_id base_oid;
439 if (type == OBJ_REF_DELTA) {
440 oidread(&base_oid, fill(the_hash_algo->rawsz));
441 use(the_hash_algo->rawsz);
442 delta_data = get_data(delta_size);
443 if (!delta_data)
444 return;
445 if (has_object_file(&base_oid))
446 ; /* Ok we have this one */
447 else if (resolve_against_held(nr, &base_oid,
448 delta_data, delta_size))
449 return; /* we are done */
450 else {
451 /* cannot resolve yet --- queue it */
452 oidclr(&obj_list[nr].oid);
453 add_delta_to_list(nr, &base_oid, 0, delta_data, delta_size);
454 return;
456 } else {
457 unsigned base_found = 0;
458 unsigned char *pack, c;
459 off_t base_offset;
460 unsigned lo, mid, hi;
462 pack = fill(1);
463 c = *pack;
464 use(1);
465 base_offset = c & 127;
466 while (c & 128) {
467 base_offset += 1;
468 if (!base_offset || MSB(base_offset, 7))
469 die("offset value overflow for delta base object");
470 pack = fill(1);
471 c = *pack;
472 use(1);
473 base_offset = (base_offset << 7) + (c & 127);
475 base_offset = obj_list[nr].offset - base_offset;
476 if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
477 die("offset value out of bound for delta base object");
479 delta_data = get_data(delta_size);
480 if (!delta_data)
481 return;
482 lo = 0;
483 hi = nr;
484 while (lo < hi) {
485 mid = lo + (hi - lo) / 2;
486 if (base_offset < obj_list[mid].offset) {
487 hi = mid;
488 } else if (base_offset > obj_list[mid].offset) {
489 lo = mid + 1;
490 } else {
491 oidcpy(&base_oid, &obj_list[mid].oid);
492 base_found = !is_null_oid(&base_oid);
493 break;
496 if (!base_found) {
498 * The delta base object is itself a delta that
499 * has not been resolved yet.
501 oidclr(&obj_list[nr].oid);
502 add_delta_to_list(nr, null_oid(), base_offset,
503 delta_data, delta_size);
504 return;
508 if (resolve_against_held(nr, &base_oid, delta_data, delta_size))
509 return;
511 base = read_object_file(&base_oid, &type, &base_size);
512 if (!base) {
513 error("failed to read delta-pack base object %s",
514 oid_to_hex(&base_oid));
515 if (!recover)
516 exit(1);
517 has_errors = 1;
518 return;
520 resolve_delta(nr, type, base, base_size, delta_data, delta_size);
521 free(base);
524 static void unpack_one(unsigned nr)
526 unsigned shift;
527 unsigned char *pack;
528 unsigned long size, c;
529 enum object_type type;
531 obj_list[nr].offset = consumed_bytes;
533 pack = fill(1);
534 c = *pack;
535 use(1);
536 type = (c >> 4) & 7;
537 size = (c & 15);
538 shift = 4;
539 while (c & 0x80) {
540 pack = fill(1);
541 c = *pack;
542 use(1);
543 size += (c & 0x7f) << shift;
544 shift += 7;
547 switch (type) {
548 case OBJ_BLOB:
549 if (!dry_run && size > big_file_threshold) {
550 stream_blob(size, nr);
551 return;
553 /* fallthrough */
554 case OBJ_COMMIT:
555 case OBJ_TREE:
556 case OBJ_TAG:
557 unpack_non_delta_entry(type, size, nr);
558 return;
559 case OBJ_REF_DELTA:
560 case OBJ_OFS_DELTA:
561 unpack_delta_entry(type, size, nr);
562 return;
563 default:
564 error("bad object type %d", type);
565 has_errors = 1;
566 if (recover)
567 return;
568 exit(1);
572 static void unpack_all(void)
574 int i;
575 struct pack_header *hdr = fill(sizeof(struct pack_header));
577 nr_objects = ntohl(hdr->hdr_entries);
579 if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
580 die("bad pack file");
581 if (!pack_version_ok(hdr->hdr_version))
582 die("unknown pack file version %"PRIu32,
583 ntohl(hdr->hdr_version));
584 use(sizeof(struct pack_header));
586 if (!quiet)
587 progress = start_progress(_("Unpacking objects"), nr_objects);
588 CALLOC_ARRAY(obj_list, nr_objects);
589 begin_odb_transaction();
590 for (i = 0; i < nr_objects; i++) {
591 unpack_one(i);
592 display_progress(progress, i + 1);
594 end_odb_transaction();
595 stop_progress(&progress);
597 if (delta_list)
598 die("unresolved deltas left after unpacking");
601 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
603 int i;
604 struct object_id oid;
606 read_replace_refs = 0;
608 git_config(git_default_config, NULL);
610 quiet = !isatty(2);
612 for (i = 1 ; i < argc; i++) {
613 const char *arg = argv[i];
615 if (*arg == '-') {
616 if (!strcmp(arg, "-n")) {
617 dry_run = 1;
618 continue;
620 if (!strcmp(arg, "-q")) {
621 quiet = 1;
622 continue;
624 if (!strcmp(arg, "-r")) {
625 recover = 1;
626 continue;
628 if (!strcmp(arg, "--strict")) {
629 strict = 1;
630 continue;
632 if (skip_prefix(arg, "--strict=", &arg)) {
633 strict = 1;
634 fsck_set_msg_types(&fsck_options, arg);
635 continue;
637 if (starts_with(arg, "--pack_header=")) {
638 struct pack_header *hdr;
639 char *c;
641 hdr = (struct pack_header *)buffer;
642 hdr->hdr_signature = htonl(PACK_SIGNATURE);
643 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
644 if (*c != ',')
645 die("bad %s", arg);
646 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
647 if (*c)
648 die("bad %s", arg);
649 len = sizeof(*hdr);
650 continue;
652 if (skip_prefix(arg, "--max-input-size=", &arg)) {
653 max_input_size = strtoumax(arg, NULL, 10);
654 continue;
656 usage(unpack_usage);
659 /* We don't take any non-flag arguments now.. Maybe some day */
660 usage(unpack_usage);
662 the_hash_algo->init_fn(&ctx);
663 unpack_all();
664 the_hash_algo->update_fn(&ctx, buffer, offset);
665 the_hash_algo->final_oid_fn(&oid, &ctx);
666 if (strict) {
667 write_rest();
668 if (fsck_finish(&fsck_options))
669 die(_("fsck error in pack objects"));
671 if (!hasheq(fill(the_hash_algo->rawsz), oid.hash))
672 die("final sha1 did not match");
673 use(the_hash_algo->rawsz);
675 /* Write the last part of the buffer to stdout */
676 while (len) {
677 int ret = xwrite(1, buffer + offset, len);
678 if (ret <= 0)
679 break;
680 len -= ret;
681 offset += ret;
684 /* All done */
685 return has_errors;