11 static const char index_pack_usage
[] =
12 "git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
16 struct pack_idx_entry idx
;
18 unsigned int hdr_size
;
19 enum object_type type
;
20 enum object_type real_type
;
24 unsigned char sha1
[20];
29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
30 * to memcmp() only the first 20 bytes.
32 #define UNION_BASE_SZ 20
36 union delta_base base
;
40 static struct object_entry
*objects
;
41 static struct delta_entry
*deltas
;
42 static int nr_objects
;
44 static int nr_resolved_deltas
;
46 static int from_stdin
;
49 static struct progress
*progress
;
51 /* We always read in 4kB chunks. */
52 static unsigned char input_buffer
[4096];
53 static unsigned int input_offset
, input_len
;
54 static off_t consumed_bytes
;
55 static SHA_CTX input_ctx
;
56 static uint32_t input_crc32
;
57 static int input_fd
, output_fd
, pack_fd
;
59 /* Discard current buffer used content. */
60 static void flush(void)
64 write_or_die(output_fd
, input_buffer
, input_offset
);
65 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
66 memmove(input_buffer
, input_buffer
+ input_offset
, input_len
);
72 * Make sure at least "min" bytes are available in the buffer, and
73 * return the pointer to the buffer.
75 static void *fill(int min
)
78 return input_buffer
+ input_offset
;
79 if (min
> sizeof(input_buffer
))
80 die("cannot fill %d bytes", min
);
83 ssize_t ret
= xread(input_fd
, input_buffer
+ input_len
,
84 sizeof(input_buffer
) - input_len
);
88 die("read error on input: %s", strerror(errno
));
91 display_throughput(progress
, ret
);
93 } while (input_len
< min
);
97 static void use(int bytes
)
99 if (bytes
> input_len
)
100 die("used more bytes than were available");
101 input_crc32
= crc32(input_crc32
, input_buffer
+ input_offset
, bytes
);
103 input_offset
+= bytes
;
105 /* make sure off_t is sufficiently large not to wrap */
106 if (consumed_bytes
> consumed_bytes
+ bytes
)
107 die("pack too large for current definition of off_t");
108 consumed_bytes
+= bytes
;
111 static char *open_pack_file(char *pack_name
)
116 static char tmpfile
[PATH_MAX
];
117 snprintf(tmpfile
, sizeof(tmpfile
),
118 "%s/tmp_pack_XXXXXX", get_object_directory());
119 output_fd
= xmkstemp(tmpfile
);
120 pack_name
= xstrdup(tmpfile
);
122 output_fd
= open(pack_name
, O_CREAT
|O_EXCL
|O_RDWR
, 0600);
124 die("unable to create %s: %s\n", pack_name
, strerror(errno
));
127 input_fd
= open(pack_name
, O_RDONLY
);
129 die("cannot open packfile '%s': %s",
130 pack_name
, strerror(errno
));
134 SHA1_Init(&input_ctx
);
138 static void parse_pack_header(void)
140 struct pack_header
*hdr
= fill(sizeof(struct pack_header
));
142 /* Header consistency check */
143 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
144 die("pack signature mismatch");
145 if (!pack_version_ok(hdr
->hdr_version
))
146 die("pack version %d unsupported", ntohl(hdr
->hdr_version
));
148 nr_objects
= ntohl(hdr
->hdr_entries
);
149 use(sizeof(struct pack_header
));
152 static void bad_object(unsigned long offset
, const char *format
,
153 ...) NORETURN
__attribute__((format (printf
, 2, 3)));
155 static void bad_object(unsigned long offset
, const char *format
, ...)
160 va_start(params
, format
);
161 vsnprintf(buf
, sizeof(buf
), format
, params
);
163 die("pack has bad object at offset %lu: %s", offset
, buf
);
166 static void *unpack_entry_data(unsigned long offset
, unsigned long size
)
169 void *buf
= xmalloc(size
);
171 memset(&stream
, 0, sizeof(stream
));
172 stream
.next_out
= buf
;
173 stream
.avail_out
= size
;
174 stream
.next_in
= fill(1);
175 stream
.avail_in
= input_len
;
176 inflateInit(&stream
);
179 int ret
= inflate(&stream
, 0);
180 use(input_len
- stream
.avail_in
);
181 if (stream
.total_out
== size
&& ret
== Z_STREAM_END
)
184 bad_object(offset
, "inflate returned %d", ret
);
185 stream
.next_in
= fill(1);
186 stream
.avail_in
= input_len
;
192 static void *unpack_raw_entry(struct object_entry
*obj
, union delta_base
*delta_base
)
200 obj
->idx
.offset
= consumed_bytes
;
201 input_crc32
= crc32(0, Z_NULL
, 0);
206 obj
->type
= (c
>> 4) & 7;
213 size
+= (c
& 0x7fUL
) << shift
;
220 hashcpy(delta_base
->sha1
, fill(20));
224 memset(delta_base
, 0, sizeof(*delta_base
));
228 base_offset
= c
& 127;
231 if (!base_offset
|| MSB(base_offset
, 7))
232 bad_object(obj
->idx
.offset
, "offset value overflow for delta base object");
236 base_offset
= (base_offset
<< 7) + (c
& 127);
238 delta_base
->offset
= obj
->idx
.offset
- base_offset
;
239 if (delta_base
->offset
>= obj
->idx
.offset
)
240 bad_object(obj
->idx
.offset
, "delta base offset is out of bound");
248 bad_object(obj
->idx
.offset
, "unknown object type %d", obj
->type
);
250 obj
->hdr_size
= consumed_bytes
- obj
->idx
.offset
;
252 data
= unpack_entry_data(obj
->idx
.offset
, obj
->size
);
253 obj
->idx
.crc32
= input_crc32
;
257 static void *get_data_from_pack(struct object_entry
*obj
)
259 unsigned long from
= obj
[0].idx
.offset
+ obj
[0].hdr_size
;
260 unsigned long len
= obj
[1].idx
.offset
- from
;
261 unsigned long rdy
= 0;
262 unsigned char *src
, *data
;
269 ssize_t n
= pread(pack_fd
, data
+ rdy
, len
- rdy
, from
+ rdy
);
271 die("cannot pread pack file: %s", strerror(errno
));
274 data
= xmalloc(obj
->size
);
275 memset(&stream
, 0, sizeof(stream
));
276 stream
.next_out
= data
;
277 stream
.avail_out
= obj
->size
;
278 stream
.next_in
= src
;
279 stream
.avail_in
= len
;
280 inflateInit(&stream
);
281 while ((st
= inflate(&stream
, Z_FINISH
)) == Z_OK
);
283 if (st
!= Z_STREAM_END
|| stream
.total_out
!= obj
->size
)
284 die("serious inflate inconsistency");
289 static int find_delta(const union delta_base
*base
)
291 int first
= 0, last
= nr_deltas
;
293 while (first
< last
) {
294 int next
= (first
+ last
) / 2;
295 struct delta_entry
*delta
= &deltas
[next
];
298 cmp
= memcmp(base
, &delta
->base
, UNION_BASE_SZ
);
310 static int find_delta_children(const union delta_base
*base
,
311 int *first_index
, int *last_index
)
313 int first
= find_delta(base
);
315 int end
= nr_deltas
- 1;
319 while (first
> 0 && !memcmp(&deltas
[first
- 1].base
, base
, UNION_BASE_SZ
))
321 while (last
< end
&& !memcmp(&deltas
[last
+ 1].base
, base
, UNION_BASE_SZ
))
323 *first_index
= first
;
328 static void sha1_object(const void *data
, unsigned long size
,
329 enum object_type type
, unsigned char *sha1
)
331 hash_sha1_file(data
, size
, typename(type
), sha1
);
332 if (has_sha1_file(sha1
)) {
334 enum object_type has_type
;
335 unsigned long has_size
;
336 has_data
= read_sha1_file(sha1
, &has_type
, &has_size
);
338 die("cannot read existing object %s", sha1_to_hex(sha1
));
339 if (size
!= has_size
|| type
!= has_type
||
340 memcmp(data
, has_data
, size
) != 0)
341 die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1
));
346 static void resolve_delta(struct object_entry
*delta_obj
, void *base_data
,
347 unsigned long base_size
, enum object_type type
)
350 unsigned long delta_size
;
352 unsigned long result_size
;
353 union delta_base delta_base
;
356 delta_obj
->real_type
= type
;
357 delta_data
= get_data_from_pack(delta_obj
);
358 delta_size
= delta_obj
->size
;
359 result
= patch_delta(base_data
, base_size
, delta_data
, delta_size
,
363 bad_object(delta_obj
->idx
.offset
, "failed to apply delta");
364 sha1_object(result
, result_size
, type
, delta_obj
->idx
.sha1
);
365 nr_resolved_deltas
++;
367 hashcpy(delta_base
.sha1
, delta_obj
->idx
.sha1
);
368 if (!find_delta_children(&delta_base
, &first
, &last
)) {
369 for (j
= first
; j
<= last
; j
++) {
370 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
371 if (child
->real_type
== OBJ_REF_DELTA
)
372 resolve_delta(child
, result
, result_size
, type
);
376 memset(&delta_base
, 0, sizeof(delta_base
));
377 delta_base
.offset
= delta_obj
->idx
.offset
;
378 if (!find_delta_children(&delta_base
, &first
, &last
)) {
379 for (j
= first
; j
<= last
; j
++) {
380 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
381 if (child
->real_type
== OBJ_OFS_DELTA
)
382 resolve_delta(child
, result
, result_size
, type
);
389 static int compare_delta_entry(const void *a
, const void *b
)
391 const struct delta_entry
*delta_a
= a
;
392 const struct delta_entry
*delta_b
= b
;
393 return memcmp(&delta_a
->base
, &delta_b
->base
, UNION_BASE_SZ
);
396 /* Parse all objects and return the pack content SHA1 hash */
397 static void parse_pack_objects(unsigned char *sha1
)
400 struct delta_entry
*delta
= deltas
;
406 * - find locations of all objects;
407 * - calculate SHA1 of all non-delta objects;
408 * - remember base (SHA1 or offset) for all deltas.
411 progress
= start_progress(
412 from_stdin
? "Receiving objects" : "Indexing objects",
414 for (i
= 0; i
< nr_objects
; i
++) {
415 struct object_entry
*obj
= &objects
[i
];
416 data
= unpack_raw_entry(obj
, &delta
->base
);
417 obj
->real_type
= obj
->type
;
418 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
) {
423 sha1_object(data
, obj
->size
, obj
->type
, obj
->idx
.sha1
);
425 display_progress(progress
, i
+1);
427 objects
[i
].idx
.offset
= consumed_bytes
;
428 stop_progress(&progress
);
430 /* Check pack integrity */
432 SHA1_Final(sha1
, &input_ctx
);
433 if (hashcmp(fill(20), sha1
))
434 die("pack is corrupted (SHA1 mismatch)");
437 /* If input_fd is a file, we should have reached its end now. */
438 if (fstat(input_fd
, &st
))
439 die("cannot fstat packfile: %s", strerror(errno
));
440 if (S_ISREG(st
.st_mode
) &&
441 lseek(input_fd
, 0, SEEK_CUR
) - input_len
!= st
.st_size
)
442 die("pack has junk at the end");
447 /* Sort deltas by base SHA1/offset for fast searching */
448 qsort(deltas
, nr_deltas
, sizeof(struct delta_entry
),
449 compare_delta_entry
);
453 * - for all non-delta objects, look if it is used as a base for
455 * - if used as a base, uncompress the object and apply all deltas,
456 * recursively checking if the resulting object is used as a base
457 * for some more deltas.
460 progress
= start_progress("Resolving deltas", nr_deltas
);
461 for (i
= 0; i
< nr_objects
; i
++) {
462 struct object_entry
*obj
= &objects
[i
];
463 union delta_base base
;
464 int j
, ref
, ref_first
, ref_last
, ofs
, ofs_first
, ofs_last
;
466 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
)
468 hashcpy(base
.sha1
, obj
->idx
.sha1
);
469 ref
= !find_delta_children(&base
, &ref_first
, &ref_last
);
470 memset(&base
, 0, sizeof(base
));
471 base
.offset
= obj
->idx
.offset
;
472 ofs
= !find_delta_children(&base
, &ofs_first
, &ofs_last
);
475 data
= get_data_from_pack(obj
);
477 for (j
= ref_first
; j
<= ref_last
; j
++) {
478 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
479 if (child
->real_type
== OBJ_REF_DELTA
)
480 resolve_delta(child
, data
,
481 obj
->size
, obj
->type
);
484 for (j
= ofs_first
; j
<= ofs_last
; j
++) {
485 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
486 if (child
->real_type
== OBJ_OFS_DELTA
)
487 resolve_delta(child
, data
,
488 obj
->size
, obj
->type
);
491 display_progress(progress
, nr_resolved_deltas
);
495 static int write_compressed(int fd
, void *in
, unsigned int size
, uint32_t *obj_crc
)
498 unsigned long maxsize
;
501 memset(&stream
, 0, sizeof(stream
));
502 deflateInit(&stream
, zlib_compression_level
);
503 maxsize
= deflateBound(&stream
, size
);
504 out
= xmalloc(maxsize
);
508 stream
.avail_in
= size
;
509 stream
.next_out
= out
;
510 stream
.avail_out
= maxsize
;
511 while (deflate(&stream
, Z_FINISH
) == Z_OK
);
514 size
= stream
.total_out
;
515 write_or_die(fd
, out
, size
);
516 *obj_crc
= crc32(*obj_crc
, out
, size
);
521 static void append_obj_to_pack(const unsigned char *sha1
, void *buf
,
522 unsigned long size
, enum object_type type
)
524 struct object_entry
*obj
= &objects
[nr_objects
++];
525 unsigned char header
[10];
526 unsigned long s
= size
;
528 unsigned char c
= (type
<< 4) | (s
& 15);
531 header
[n
++] = c
| 0x80;
536 write_or_die(output_fd
, header
, n
);
537 obj
[0].idx
.crc32
= crc32(0, Z_NULL
, 0);
538 obj
[0].idx
.crc32
= crc32(obj
[0].idx
.crc32
, header
, n
);
539 obj
[1].idx
.offset
= obj
[0].idx
.offset
+ n
;
540 obj
[1].idx
.offset
+= write_compressed(output_fd
, buf
, size
, &obj
[0].idx
.crc32
);
541 hashcpy(obj
->idx
.sha1
, sha1
);
544 static int delta_pos_compare(const void *_a
, const void *_b
)
546 struct delta_entry
*a
= *(struct delta_entry
**)_a
;
547 struct delta_entry
*b
= *(struct delta_entry
**)_b
;
548 return a
->obj_no
- b
->obj_no
;
551 static void fix_unresolved_deltas(int nr_unresolved
)
553 struct delta_entry
**sorted_by_pos
;
557 * Since many unresolved deltas may well be themselves base objects
558 * for more unresolved deltas, we really want to include the
559 * smallest number of base objects that would cover as much delta
560 * as possible by picking the
561 * trunc deltas first, allowing for other deltas to resolve without
562 * additional base objects. Since most base objects are to be found
563 * before deltas depending on them, a good heuristic is to start
564 * resolving deltas in the same order as their position in the pack.
566 sorted_by_pos
= xmalloc(nr_unresolved
* sizeof(*sorted_by_pos
));
567 for (i
= 0; i
< nr_deltas
; i
++) {
568 if (objects
[deltas
[i
].obj_no
].real_type
!= OBJ_REF_DELTA
)
570 sorted_by_pos
[n
++] = &deltas
[i
];
572 qsort(sorted_by_pos
, n
, sizeof(*sorted_by_pos
), delta_pos_compare
);
574 for (i
= 0; i
< n
; i
++) {
575 struct delta_entry
*d
= sorted_by_pos
[i
];
578 enum object_type type
;
581 if (objects
[d
->obj_no
].real_type
!= OBJ_REF_DELTA
)
583 data
= read_sha1_file(d
->base
.sha1
, &type
, &size
);
587 find_delta_children(&d
->base
, &first
, &last
);
588 for (j
= first
; j
<= last
; j
++) {
589 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
590 if (child
->real_type
== OBJ_REF_DELTA
)
591 resolve_delta(child
, data
, size
, type
);
594 if (check_sha1_signature(d
->base
.sha1
, data
, size
, typename(type
)))
595 die("local object %s is corrupt", sha1_to_hex(d
->base
.sha1
));
596 append_obj_to_pack(d
->base
.sha1
, data
, size
, type
);
598 display_progress(progress
, nr_resolved_deltas
);
603 static void final(const char *final_pack_name
, const char *curr_pack_name
,
604 const char *final_index_name
, const char *curr_index_name
,
605 const char *keep_name
, const char *keep_msg
,
608 const char *report
= "pack";
615 err
= close(output_fd
);
617 die("error while closing pack file: %s", strerror(errno
));
618 chmod(curr_pack_name
, 0444);
622 int keep_fd
, keep_msg_len
= strlen(keep_msg
);
624 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.keep",
625 get_object_directory(), sha1_to_hex(sha1
));
628 keep_fd
= open(keep_name
, O_RDWR
|O_CREAT
|O_EXCL
, 0600);
631 die("cannot write keep file");
633 if (keep_msg_len
> 0) {
634 write_or_die(keep_fd
, keep_msg
, keep_msg_len
);
635 write_or_die(keep_fd
, "\n", 1);
637 if (close(keep_fd
) != 0)
638 die("cannot write keep file");
643 if (final_pack_name
!= curr_pack_name
) {
644 if (!final_pack_name
) {
645 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.pack",
646 get_object_directory(), sha1_to_hex(sha1
));
647 final_pack_name
= name
;
649 if (move_temp_to_file(curr_pack_name
, final_pack_name
))
650 die("cannot store pack file");
653 chmod(curr_index_name
, 0444);
654 if (final_index_name
!= curr_index_name
) {
655 if (!final_index_name
) {
656 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.idx",
657 get_object_directory(), sha1_to_hex(sha1
));
658 final_index_name
= name
;
660 if (move_temp_to_file(curr_index_name
, final_index_name
))
661 die("cannot store index file");
665 printf("%s\n", sha1_to_hex(sha1
));
668 int len
= snprintf(buf
, sizeof(buf
), "%s\t%s\n",
669 report
, sha1_to_hex(sha1
));
670 write_or_die(1, buf
, len
);
673 * Let's just mimic git-unpack-objects here and write
674 * the last part of the input buffer to stdout.
677 err
= xwrite(1, input_buffer
+ input_offset
, input_len
);
686 int main(int argc
, char **argv
)
688 int i
, fix_thin_pack
= 0;
689 char *curr_pack
, *pack_name
= NULL
;
690 char *curr_index
, *index_name
= NULL
;
691 const char *keep_name
= NULL
, *keep_msg
= NULL
;
692 char *index_name_buf
= NULL
, *keep_name_buf
= NULL
;
693 struct pack_idx_entry
**idx_objects
;
694 unsigned char sha1
[20];
696 for (i
= 1; i
< argc
; i
++) {
700 if (!strcmp(arg
, "--stdin")) {
702 } else if (!strcmp(arg
, "--fix-thin")) {
704 } else if (!strcmp(arg
, "--keep")) {
706 } else if (!prefixcmp(arg
, "--keep=")) {
708 } else if (!prefixcmp(arg
, "--pack_header=")) {
709 struct pack_header
*hdr
;
712 hdr
= (struct pack_header
*)input_buffer
;
713 hdr
->hdr_signature
= htonl(PACK_SIGNATURE
);
714 hdr
->hdr_version
= htonl(strtoul(arg
+ 14, &c
, 10));
717 hdr
->hdr_entries
= htonl(strtoul(c
+ 1, &c
, 10));
720 input_len
= sizeof(*hdr
);
721 } else if (!strcmp(arg
, "-v")) {
723 } else if (!strcmp(arg
, "-o")) {
724 if (index_name
|| (i
+1) >= argc
)
725 usage(index_pack_usage
);
726 index_name
= argv
[++i
];
727 } else if (!prefixcmp(arg
, "--index-version=")) {
729 pack_idx_default_version
= strtoul(arg
+ 16, &c
, 10);
730 if (pack_idx_default_version
> 2)
733 pack_idx_off32_limit
= strtoul(c
+1, &c
, 0);
734 if (*c
|| pack_idx_off32_limit
& 0x80000000)
737 usage(index_pack_usage
);
742 usage(index_pack_usage
);
746 if (!pack_name
&& !from_stdin
)
747 usage(index_pack_usage
);
748 if (fix_thin_pack
&& !from_stdin
)
749 die("--fix-thin cannot be used without --stdin");
750 if (!index_name
&& pack_name
) {
751 int len
= strlen(pack_name
);
752 if (!has_extension(pack_name
, ".pack"))
753 die("packfile name '%s' does not end with '.pack'",
755 index_name_buf
= xmalloc(len
);
756 memcpy(index_name_buf
, pack_name
, len
- 5);
757 strcpy(index_name_buf
+ len
- 5, ".idx");
758 index_name
= index_name_buf
;
760 if (keep_msg
&& !keep_name
&& pack_name
) {
761 int len
= strlen(pack_name
);
762 if (!has_extension(pack_name
, ".pack"))
763 die("packfile name '%s' does not end with '.pack'",
765 keep_name_buf
= xmalloc(len
);
766 memcpy(keep_name_buf
, pack_name
, len
- 5);
767 strcpy(keep_name_buf
+ len
- 5, ".keep");
768 keep_name
= keep_name_buf
;
771 curr_pack
= open_pack_file(pack_name
);
773 objects
= xmalloc((nr_objects
+ 1) * sizeof(struct object_entry
));
774 deltas
= xmalloc(nr_objects
* sizeof(struct delta_entry
));
775 parse_pack_objects(sha1
);
776 if (nr_deltas
== nr_resolved_deltas
) {
777 stop_progress(&progress
);
778 /* Flush remaining pack final 20-byte SHA1. */
782 int nr_unresolved
= nr_deltas
- nr_resolved_deltas
;
783 int nr_objects_initial
= nr_objects
;
784 if (nr_unresolved
<= 0)
785 die("confusion beyond insanity");
786 objects
= xrealloc(objects
,
787 (nr_objects
+ nr_unresolved
+ 1)
789 fix_unresolved_deltas(nr_unresolved
);
790 stop_progress(&progress
);
792 fprintf(stderr
, "%d objects were added to complete this thin pack.\n",
793 nr_objects
- nr_objects_initial
);
794 fixup_pack_header_footer(output_fd
, sha1
,
795 curr_pack
, nr_objects
);
797 if (nr_deltas
!= nr_resolved_deltas
)
798 die("pack has %d unresolved deltas",
799 nr_deltas
- nr_resolved_deltas
);
803 idx_objects
= xmalloc((nr_objects
) * sizeof(struct pack_idx_entry
*));
804 for (i
= 0; i
< nr_objects
; i
++)
805 idx_objects
[i
] = &objects
[i
].idx
;
806 curr_index
= write_idx_file(index_name
, idx_objects
, nr_objects
, sha1
);
809 final(pack_name
, curr_pack
,
810 index_name
, curr_index
,
814 free(index_name_buf
);
816 if (pack_name
== NULL
)
818 if (index_name
== NULL
)