11 static const char index_pack_usage
[] =
12 "git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
16 struct pack_idx_entry idx
;
18 unsigned int hdr_size
;
19 enum object_type type
;
20 enum object_type real_type
;
24 unsigned char sha1
[20];
29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
30 * to memcmp() only the first 20 bytes.
32 #define UNION_BASE_SZ 20
36 union delta_base base
;
40 static struct object_entry
*objects
;
41 static struct delta_entry
*deltas
;
42 static int nr_objects
;
44 static int nr_resolved_deltas
;
46 static int from_stdin
;
49 static struct progress
*progress
;
51 /* We always read in 4kB chunks. */
52 static unsigned char input_buffer
[4096];
53 static unsigned int input_offset
, input_len
;
54 static off_t consumed_bytes
;
55 static SHA_CTX input_ctx
;
56 static uint32_t input_crc32
;
57 static int input_fd
, output_fd
, pack_fd
;
59 /* Discard current buffer used content. */
60 static void flush(void)
64 write_or_die(output_fd
, input_buffer
, input_offset
);
65 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
66 memmove(input_buffer
, input_buffer
+ input_offset
, input_len
);
72 * Make sure at least "min" bytes are available in the buffer, and
73 * return the pointer to the buffer.
75 static void *fill(int min
)
78 return input_buffer
+ input_offset
;
79 if (min
> sizeof(input_buffer
))
80 die("cannot fill %d bytes", min
);
83 ssize_t ret
= xread(input_fd
, input_buffer
+ input_len
,
84 sizeof(input_buffer
) - input_len
);
88 die("read error on input: %s", strerror(errno
));
91 } while (input_len
< min
);
95 static void use(int bytes
)
97 if (bytes
> input_len
)
98 die("used more bytes than were available");
99 input_crc32
= crc32(input_crc32
, input_buffer
+ input_offset
, bytes
);
101 input_offset
+= bytes
;
103 /* make sure off_t is sufficiently large not to wrap */
104 if (consumed_bytes
> consumed_bytes
+ bytes
)
105 die("pack too large for current definition of off_t");
106 consumed_bytes
+= bytes
;
109 static char *open_pack_file(char *pack_name
)
114 static char tmpfile
[PATH_MAX
];
115 snprintf(tmpfile
, sizeof(tmpfile
),
116 "%s/tmp_pack_XXXXXX", get_object_directory());
117 output_fd
= xmkstemp(tmpfile
);
118 pack_name
= xstrdup(tmpfile
);
120 output_fd
= open(pack_name
, O_CREAT
|O_EXCL
|O_RDWR
, 0600);
122 die("unable to create %s: %s\n", pack_name
, strerror(errno
));
125 input_fd
= open(pack_name
, O_RDONLY
);
127 die("cannot open packfile '%s': %s",
128 pack_name
, strerror(errno
));
132 SHA1_Init(&input_ctx
);
136 static void parse_pack_header(void)
138 struct pack_header
*hdr
= fill(sizeof(struct pack_header
));
140 /* Header consistency check */
141 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
142 die("pack signature mismatch");
143 if (!pack_version_ok(hdr
->hdr_version
))
144 die("pack version %d unsupported", ntohl(hdr
->hdr_version
));
146 nr_objects
= ntohl(hdr
->hdr_entries
);
147 use(sizeof(struct pack_header
));
150 static void bad_object(unsigned long offset
, const char *format
,
151 ...) NORETURN
__attribute__((format (printf
, 2, 3)));
153 static void bad_object(unsigned long offset
, const char *format
, ...)
158 va_start(params
, format
);
159 vsnprintf(buf
, sizeof(buf
), format
, params
);
161 die("pack has bad object at offset %lu: %s", offset
, buf
);
164 static void *unpack_entry_data(unsigned long offset
, unsigned long size
)
167 void *buf
= xmalloc(size
);
169 memset(&stream
, 0, sizeof(stream
));
170 stream
.next_out
= buf
;
171 stream
.avail_out
= size
;
172 stream
.next_in
= fill(1);
173 stream
.avail_in
= input_len
;
174 inflateInit(&stream
);
177 int ret
= inflate(&stream
, 0);
178 use(input_len
- stream
.avail_in
);
179 if (stream
.total_out
== size
&& ret
== Z_STREAM_END
)
182 bad_object(offset
, "inflate returned %d", ret
);
183 stream
.next_in
= fill(1);
184 stream
.avail_in
= input_len
;
190 static void *unpack_raw_entry(struct object_entry
*obj
, union delta_base
*delta_base
)
198 obj
->idx
.offset
= consumed_bytes
;
199 input_crc32
= crc32(0, Z_NULL
, 0);
204 obj
->type
= (c
>> 4) & 7;
211 size
+= (c
& 0x7fUL
) << shift
;
218 hashcpy(delta_base
->sha1
, fill(20));
222 memset(delta_base
, 0, sizeof(*delta_base
));
226 base_offset
= c
& 127;
229 if (!base_offset
|| MSB(base_offset
, 7))
230 bad_object(obj
->idx
.offset
, "offset value overflow for delta base object");
234 base_offset
= (base_offset
<< 7) + (c
& 127);
236 delta_base
->offset
= obj
->idx
.offset
- base_offset
;
237 if (delta_base
->offset
>= obj
->idx
.offset
)
238 bad_object(obj
->idx
.offset
, "delta base offset is out of bound");
246 bad_object(obj
->idx
.offset
, "unknown object type %d", obj
->type
);
248 obj
->hdr_size
= consumed_bytes
- obj
->idx
.offset
;
250 data
= unpack_entry_data(obj
->idx
.offset
, obj
->size
);
251 obj
->idx
.crc32
= input_crc32
;
255 static void *get_data_from_pack(struct object_entry
*obj
)
257 unsigned long from
= obj
[0].idx
.offset
+ obj
[0].hdr_size
;
258 unsigned long len
= obj
[1].idx
.offset
- from
;
259 unsigned long rdy
= 0;
260 unsigned char *src
, *data
;
267 ssize_t n
= pread(pack_fd
, data
+ rdy
, len
- rdy
, from
+ rdy
);
269 die("cannot pread pack file: %s", strerror(errno
));
272 data
= xmalloc(obj
->size
);
273 memset(&stream
, 0, sizeof(stream
));
274 stream
.next_out
= data
;
275 stream
.avail_out
= obj
->size
;
276 stream
.next_in
= src
;
277 stream
.avail_in
= len
;
278 inflateInit(&stream
);
279 while ((st
= inflate(&stream
, Z_FINISH
)) == Z_OK
);
281 if (st
!= Z_STREAM_END
|| stream
.total_out
!= obj
->size
)
282 die("serious inflate inconsistency");
287 static int find_delta(const union delta_base
*base
)
289 int first
= 0, last
= nr_deltas
;
291 while (first
< last
) {
292 int next
= (first
+ last
) / 2;
293 struct delta_entry
*delta
= &deltas
[next
];
296 cmp
= memcmp(base
, &delta
->base
, UNION_BASE_SZ
);
308 static int find_delta_children(const union delta_base
*base
,
309 int *first_index
, int *last_index
)
311 int first
= find_delta(base
);
313 int end
= nr_deltas
- 1;
317 while (first
> 0 && !memcmp(&deltas
[first
- 1].base
, base
, UNION_BASE_SZ
))
319 while (last
< end
&& !memcmp(&deltas
[last
+ 1].base
, base
, UNION_BASE_SZ
))
321 *first_index
= first
;
326 static void sha1_object(const void *data
, unsigned long size
,
327 enum object_type type
, unsigned char *sha1
)
329 hash_sha1_file(data
, size
, typename(type
), sha1
);
330 if (has_sha1_file(sha1
)) {
332 enum object_type has_type
;
333 unsigned long has_size
;
334 has_data
= read_sha1_file(sha1
, &has_type
, &has_size
);
336 die("cannot read existing object %s", sha1_to_hex(sha1
));
337 if (size
!= has_size
|| type
!= has_type
||
338 memcmp(data
, has_data
, size
) != 0)
339 die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1
));
344 static void resolve_delta(struct object_entry
*delta_obj
, void *base_data
,
345 unsigned long base_size
, enum object_type type
)
348 unsigned long delta_size
;
350 unsigned long result_size
;
351 union delta_base delta_base
;
354 delta_obj
->real_type
= type
;
355 delta_data
= get_data_from_pack(delta_obj
);
356 delta_size
= delta_obj
->size
;
357 result
= patch_delta(base_data
, base_size
, delta_data
, delta_size
,
361 bad_object(delta_obj
->idx
.offset
, "failed to apply delta");
362 sha1_object(result
, result_size
, type
, delta_obj
->idx
.sha1
);
363 nr_resolved_deltas
++;
365 hashcpy(delta_base
.sha1
, delta_obj
->idx
.sha1
);
366 if (!find_delta_children(&delta_base
, &first
, &last
)) {
367 for (j
= first
; j
<= last
; j
++) {
368 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
369 if (child
->real_type
== OBJ_REF_DELTA
)
370 resolve_delta(child
, result
, result_size
, type
);
374 memset(&delta_base
, 0, sizeof(delta_base
));
375 delta_base
.offset
= delta_obj
->idx
.offset
;
376 if (!find_delta_children(&delta_base
, &first
, &last
)) {
377 for (j
= first
; j
<= last
; j
++) {
378 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
379 if (child
->real_type
== OBJ_OFS_DELTA
)
380 resolve_delta(child
, result
, result_size
, type
);
387 static int compare_delta_entry(const void *a
, const void *b
)
389 const struct delta_entry
*delta_a
= a
;
390 const struct delta_entry
*delta_b
= b
;
391 return memcmp(&delta_a
->base
, &delta_b
->base
, UNION_BASE_SZ
);
394 /* Parse all objects and return the pack content SHA1 hash */
395 static void parse_pack_objects(unsigned char *sha1
)
398 struct delta_entry
*delta
= deltas
;
404 * - find locations of all objects;
405 * - calculate SHA1 of all non-delta objects;
406 * - remember base (SHA1 or offset) for all deltas.
409 progress
= start_progress("Indexing objects", nr_objects
);
410 for (i
= 0; i
< nr_objects
; i
++) {
411 struct object_entry
*obj
= &objects
[i
];
412 data
= unpack_raw_entry(obj
, &delta
->base
);
413 obj
->real_type
= obj
->type
;
414 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
) {
419 sha1_object(data
, obj
->size
, obj
->type
, obj
->idx
.sha1
);
421 display_progress(progress
, i
+1);
423 objects
[i
].idx
.offset
= consumed_bytes
;
424 stop_progress(&progress
);
426 /* Check pack integrity */
428 SHA1_Final(sha1
, &input_ctx
);
429 if (hashcmp(fill(20), sha1
))
430 die("pack is corrupted (SHA1 mismatch)");
433 /* If input_fd is a file, we should have reached its end now. */
434 if (fstat(input_fd
, &st
))
435 die("cannot fstat packfile: %s", strerror(errno
));
436 if (S_ISREG(st
.st_mode
) &&
437 lseek(input_fd
, 0, SEEK_CUR
) - input_len
!= st
.st_size
)
438 die("pack has junk at the end");
443 /* Sort deltas by base SHA1/offset for fast searching */
444 qsort(deltas
, nr_deltas
, sizeof(struct delta_entry
),
445 compare_delta_entry
);
449 * - for all non-delta objects, look if it is used as a base for
451 * - if used as a base, uncompress the object and apply all deltas,
452 * recursively checking if the resulting object is used as a base
453 * for some more deltas.
456 progress
= start_progress("Resolving deltas", nr_deltas
);
457 for (i
= 0; i
< nr_objects
; i
++) {
458 struct object_entry
*obj
= &objects
[i
];
459 union delta_base base
;
460 int j
, ref
, ref_first
, ref_last
, ofs
, ofs_first
, ofs_last
;
462 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
)
464 hashcpy(base
.sha1
, obj
->idx
.sha1
);
465 ref
= !find_delta_children(&base
, &ref_first
, &ref_last
);
466 memset(&base
, 0, sizeof(base
));
467 base
.offset
= obj
->idx
.offset
;
468 ofs
= !find_delta_children(&base
, &ofs_first
, &ofs_last
);
471 data
= get_data_from_pack(obj
);
473 for (j
= ref_first
; j
<= ref_last
; j
++) {
474 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
475 if (child
->real_type
== OBJ_REF_DELTA
)
476 resolve_delta(child
, data
,
477 obj
->size
, obj
->type
);
480 for (j
= ofs_first
; j
<= ofs_last
; j
++) {
481 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
482 if (child
->real_type
== OBJ_OFS_DELTA
)
483 resolve_delta(child
, data
,
484 obj
->size
, obj
->type
);
487 display_progress(progress
, nr_resolved_deltas
);
491 static int write_compressed(int fd
, void *in
, unsigned int size
, uint32_t *obj_crc
)
494 unsigned long maxsize
;
497 memset(&stream
, 0, sizeof(stream
));
498 deflateInit(&stream
, zlib_compression_level
);
499 maxsize
= deflateBound(&stream
, size
);
500 out
= xmalloc(maxsize
);
504 stream
.avail_in
= size
;
505 stream
.next_out
= out
;
506 stream
.avail_out
= maxsize
;
507 while (deflate(&stream
, Z_FINISH
) == Z_OK
);
510 size
= stream
.total_out
;
511 write_or_die(fd
, out
, size
);
512 *obj_crc
= crc32(*obj_crc
, out
, size
);
517 static void append_obj_to_pack(const unsigned char *sha1
, void *buf
,
518 unsigned long size
, enum object_type type
)
520 struct object_entry
*obj
= &objects
[nr_objects
++];
521 unsigned char header
[10];
522 unsigned long s
= size
;
524 unsigned char c
= (type
<< 4) | (s
& 15);
527 header
[n
++] = c
| 0x80;
532 write_or_die(output_fd
, header
, n
);
533 obj
[0].idx
.crc32
= crc32(0, Z_NULL
, 0);
534 obj
[0].idx
.crc32
= crc32(obj
[0].idx
.crc32
, header
, n
);
535 obj
[1].idx
.offset
= obj
[0].idx
.offset
+ n
;
536 obj
[1].idx
.offset
+= write_compressed(output_fd
, buf
, size
, &obj
[0].idx
.crc32
);
537 hashcpy(obj
->idx
.sha1
, sha1
);
540 static int delta_pos_compare(const void *_a
, const void *_b
)
542 struct delta_entry
*a
= *(struct delta_entry
**)_a
;
543 struct delta_entry
*b
= *(struct delta_entry
**)_b
;
544 return a
->obj_no
- b
->obj_no
;
547 static void fix_unresolved_deltas(int nr_unresolved
)
549 struct delta_entry
**sorted_by_pos
;
553 * Since many unresolved deltas may well be themselves base objects
554 * for more unresolved deltas, we really want to include the
555 * smallest number of base objects that would cover as much delta
556 * as possible by picking the
557 * trunc deltas first, allowing for other deltas to resolve without
558 * additional base objects. Since most base objects are to be found
559 * before deltas depending on them, a good heuristic is to start
560 * resolving deltas in the same order as their position in the pack.
562 sorted_by_pos
= xmalloc(nr_unresolved
* sizeof(*sorted_by_pos
));
563 for (i
= 0; i
< nr_deltas
; i
++) {
564 if (objects
[deltas
[i
].obj_no
].real_type
!= OBJ_REF_DELTA
)
566 sorted_by_pos
[n
++] = &deltas
[i
];
568 qsort(sorted_by_pos
, n
, sizeof(*sorted_by_pos
), delta_pos_compare
);
570 for (i
= 0; i
< n
; i
++) {
571 struct delta_entry
*d
= sorted_by_pos
[i
];
574 enum object_type type
;
577 if (objects
[d
->obj_no
].real_type
!= OBJ_REF_DELTA
)
579 data
= read_sha1_file(d
->base
.sha1
, &type
, &size
);
583 find_delta_children(&d
->base
, &first
, &last
);
584 for (j
= first
; j
<= last
; j
++) {
585 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
586 if (child
->real_type
== OBJ_REF_DELTA
)
587 resolve_delta(child
, data
, size
, type
);
590 if (check_sha1_signature(d
->base
.sha1
, data
, size
, typename(type
)))
591 die("local object %s is corrupt", sha1_to_hex(d
->base
.sha1
));
592 append_obj_to_pack(d
->base
.sha1
, data
, size
, type
);
594 display_progress(progress
, nr_resolved_deltas
);
599 static void final(const char *final_pack_name
, const char *curr_pack_name
,
600 const char *final_index_name
, const char *curr_index_name
,
601 const char *keep_name
, const char *keep_msg
,
604 const char *report
= "pack";
611 err
= close(output_fd
);
613 die("error while closing pack file: %s", strerror(errno
));
614 chmod(curr_pack_name
, 0444);
618 int keep_fd
, keep_msg_len
= strlen(keep_msg
);
620 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.keep",
621 get_object_directory(), sha1_to_hex(sha1
));
624 keep_fd
= open(keep_name
, O_RDWR
|O_CREAT
|O_EXCL
, 0600);
627 die("cannot write keep file");
629 if (keep_msg_len
> 0) {
630 write_or_die(keep_fd
, keep_msg
, keep_msg_len
);
631 write_or_die(keep_fd
, "\n", 1);
633 if (close(keep_fd
) != 0)
634 die("cannot write keep file");
639 if (final_pack_name
!= curr_pack_name
) {
640 if (!final_pack_name
) {
641 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.pack",
642 get_object_directory(), sha1_to_hex(sha1
));
643 final_pack_name
= name
;
645 if (move_temp_to_file(curr_pack_name
, final_pack_name
))
646 die("cannot store pack file");
649 chmod(curr_index_name
, 0444);
650 if (final_index_name
!= curr_index_name
) {
651 if (!final_index_name
) {
652 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.idx",
653 get_object_directory(), sha1_to_hex(sha1
));
654 final_index_name
= name
;
656 if (move_temp_to_file(curr_index_name
, final_index_name
))
657 die("cannot store index file");
661 printf("%s\n", sha1_to_hex(sha1
));
664 int len
= snprintf(buf
, sizeof(buf
), "%s\t%s\n",
665 report
, sha1_to_hex(sha1
));
666 write_or_die(1, buf
, len
);
669 * Let's just mimic git-unpack-objects here and write
670 * the last part of the input buffer to stdout.
673 err
= xwrite(1, input_buffer
+ input_offset
, input_len
);
682 int main(int argc
, char **argv
)
684 int i
, fix_thin_pack
= 0;
685 char *curr_pack
, *pack_name
= NULL
;
686 char *curr_index
, *index_name
= NULL
;
687 const char *keep_name
= NULL
, *keep_msg
= NULL
;
688 char *index_name_buf
= NULL
, *keep_name_buf
= NULL
;
689 struct pack_idx_entry
**idx_objects
;
690 unsigned char sha1
[20];
692 for (i
= 1; i
< argc
; i
++) {
696 if (!strcmp(arg
, "--stdin")) {
698 } else if (!strcmp(arg
, "--fix-thin")) {
700 } else if (!strcmp(arg
, "--keep")) {
702 } else if (!prefixcmp(arg
, "--keep=")) {
704 } else if (!prefixcmp(arg
, "--pack_header=")) {
705 struct pack_header
*hdr
;
708 hdr
= (struct pack_header
*)input_buffer
;
709 hdr
->hdr_signature
= htonl(PACK_SIGNATURE
);
710 hdr
->hdr_version
= htonl(strtoul(arg
+ 14, &c
, 10));
713 hdr
->hdr_entries
= htonl(strtoul(c
+ 1, &c
, 10));
716 input_len
= sizeof(*hdr
);
717 } else if (!strcmp(arg
, "-v")) {
719 } else if (!strcmp(arg
, "-o")) {
720 if (index_name
|| (i
+1) >= argc
)
721 usage(index_pack_usage
);
722 index_name
= argv
[++i
];
723 } else if (!prefixcmp(arg
, "--index-version=")) {
725 pack_idx_default_version
= strtoul(arg
+ 16, &c
, 10);
726 if (pack_idx_default_version
> 2)
729 pack_idx_off32_limit
= strtoul(c
+1, &c
, 0);
730 if (*c
|| pack_idx_off32_limit
& 0x80000000)
733 usage(index_pack_usage
);
738 usage(index_pack_usage
);
742 if (!pack_name
&& !from_stdin
)
743 usage(index_pack_usage
);
744 if (fix_thin_pack
&& !from_stdin
)
745 die("--fix-thin cannot be used without --stdin");
746 if (!index_name
&& pack_name
) {
747 int len
= strlen(pack_name
);
748 if (!has_extension(pack_name
, ".pack"))
749 die("packfile name '%s' does not end with '.pack'",
751 index_name_buf
= xmalloc(len
);
752 memcpy(index_name_buf
, pack_name
, len
- 5);
753 strcpy(index_name_buf
+ len
- 5, ".idx");
754 index_name
= index_name_buf
;
756 if (keep_msg
&& !keep_name
&& pack_name
) {
757 int len
= strlen(pack_name
);
758 if (!has_extension(pack_name
, ".pack"))
759 die("packfile name '%s' does not end with '.pack'",
761 keep_name_buf
= xmalloc(len
);
762 memcpy(keep_name_buf
, pack_name
, len
- 5);
763 strcpy(keep_name_buf
+ len
- 5, ".keep");
764 keep_name
= keep_name_buf
;
767 curr_pack
= open_pack_file(pack_name
);
769 objects
= xmalloc((nr_objects
+ 1) * sizeof(struct object_entry
));
770 deltas
= xmalloc(nr_objects
* sizeof(struct delta_entry
));
771 parse_pack_objects(sha1
);
772 if (nr_deltas
== nr_resolved_deltas
) {
773 stop_progress(&progress
);
774 /* Flush remaining pack final 20-byte SHA1. */
778 int nr_unresolved
= nr_deltas
- nr_resolved_deltas
;
779 int nr_objects_initial
= nr_objects
;
780 if (nr_unresolved
<= 0)
781 die("confusion beyond insanity");
782 objects
= xrealloc(objects
,
783 (nr_objects
+ nr_unresolved
+ 1)
785 fix_unresolved_deltas(nr_unresolved
);
786 stop_progress(&progress
);
788 fprintf(stderr
, "%d objects were added to complete this thin pack.\n",
789 nr_objects
- nr_objects_initial
);
790 fixup_pack_header_footer(output_fd
, sha1
,
791 curr_pack
, nr_objects
);
793 if (nr_deltas
!= nr_resolved_deltas
)
794 die("pack has %d unresolved deltas",
795 nr_deltas
- nr_resolved_deltas
);
799 idx_objects
= xmalloc((nr_objects
) * sizeof(struct pack_idx_entry
*));
800 for (i
= 0; i
< nr_objects
; i
++)
801 idx_objects
[i
] = &objects
[i
].idx
;
802 curr_index
= write_idx_file(index_name
, idx_objects
, nr_objects
, sha1
);
805 final(pack_name
, curr_pack
,
806 index_name
, curr_index
,
810 free(index_name_buf
);
812 if (pack_name
== NULL
)
814 if (index_name
== NULL
)