11 static const char index_pack_usage
[] =
12 "git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
16 struct pack_idx_entry idx
;
18 unsigned int hdr_size
;
19 enum object_type type
;
20 enum object_type real_type
;
24 unsigned char sha1
[20];
29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
30 * to memcmp() only the first 20 bytes.
32 #define UNION_BASE_SZ 20
36 union delta_base base
;
40 static struct object_entry
*objects
;
41 static struct delta_entry
*deltas
;
42 static int nr_objects
;
44 static int nr_resolved_deltas
;
46 static int from_stdin
;
49 static struct progress progress
;
51 /* We always read in 4kB chunks. */
52 static unsigned char input_buffer
[4096];
53 static unsigned int input_offset
, input_len
;
54 static off_t consumed_bytes
;
55 static SHA_CTX input_ctx
;
56 static uint32_t input_crc32
;
57 static int input_fd
, output_fd
, pack_fd
;
59 /* Discard current buffer used content. */
60 static void flush(void)
64 write_or_die(output_fd
, input_buffer
, input_offset
);
65 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
66 memmove(input_buffer
, input_buffer
+ input_offset
, input_len
);
72 * Make sure at least "min" bytes are available in the buffer, and
73 * return the pointer to the buffer.
75 static void *fill(int min
)
78 return input_buffer
+ input_offset
;
79 if (min
> sizeof(input_buffer
))
80 die("cannot fill %d bytes", min
);
83 ssize_t ret
= xread(input_fd
, input_buffer
+ input_len
,
84 sizeof(input_buffer
) - input_len
);
88 die("read error on input: %s", strerror(errno
));
91 } while (input_len
< min
);
95 static void use(int bytes
)
97 if (bytes
> input_len
)
98 die("used more bytes than were available");
99 input_crc32
= crc32(input_crc32
, input_buffer
+ input_offset
, bytes
);
101 input_offset
+= bytes
;
103 /* make sure off_t is sufficiently large not to wrap */
104 if (consumed_bytes
> consumed_bytes
+ bytes
)
105 die("pack too large for current definition of off_t");
106 consumed_bytes
+= bytes
;
109 static const char *open_pack_file(const char *pack_name
)
114 static char tmpfile
[PATH_MAX
];
115 snprintf(tmpfile
, sizeof(tmpfile
),
116 "%s/tmp_pack_XXXXXX", get_object_directory());
117 output_fd
= xmkstemp(tmpfile
);
118 pack_name
= xstrdup(tmpfile
);
120 output_fd
= open(pack_name
, O_CREAT
|O_EXCL
|O_RDWR
, 0600);
122 die("unable to create %s: %s\n", pack_name
, strerror(errno
));
125 input_fd
= open(pack_name
, O_RDONLY
);
127 die("cannot open packfile '%s': %s",
128 pack_name
, strerror(errno
));
132 SHA1_Init(&input_ctx
);
136 static void parse_pack_header(void)
138 struct pack_header
*hdr
= fill(sizeof(struct pack_header
));
140 /* Header consistency check */
141 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
142 die("pack signature mismatch");
143 if (!pack_version_ok(hdr
->hdr_version
))
144 die("pack version %d unsupported", ntohl(hdr
->hdr_version
));
146 nr_objects
= ntohl(hdr
->hdr_entries
);
147 use(sizeof(struct pack_header
));
150 static void bad_object(unsigned long offset
, const char *format
,
151 ...) NORETURN
__attribute__((format (printf
, 2, 3)));
153 static void bad_object(unsigned long offset
, const char *format
, ...)
158 va_start(params
, format
);
159 vsnprintf(buf
, sizeof(buf
), format
, params
);
161 die("pack has bad object at offset %lu: %s", offset
, buf
);
164 static void *unpack_entry_data(unsigned long offset
, unsigned long size
)
167 void *buf
= xmalloc(size
);
169 memset(&stream
, 0, sizeof(stream
));
170 stream
.next_out
= buf
;
171 stream
.avail_out
= size
;
172 stream
.next_in
= fill(1);
173 stream
.avail_in
= input_len
;
174 inflateInit(&stream
);
177 int ret
= inflate(&stream
, 0);
178 use(input_len
- stream
.avail_in
);
179 if (stream
.total_out
== size
&& ret
== Z_STREAM_END
)
182 bad_object(offset
, "inflate returned %d", ret
);
183 stream
.next_in
= fill(1);
184 stream
.avail_in
= input_len
;
190 static void *unpack_raw_entry(struct object_entry
*obj
, union delta_base
*delta_base
)
198 obj
->idx
.offset
= consumed_bytes
;
199 input_crc32
= crc32(0, Z_NULL
, 0);
204 obj
->type
= (c
>> 4) & 7;
211 size
+= (c
& 0x7fUL
) << shift
;
218 hashcpy(delta_base
->sha1
, fill(20));
222 memset(delta_base
, 0, sizeof(*delta_base
));
226 base_offset
= c
& 127;
229 if (!base_offset
|| MSB(base_offset
, 7))
230 bad_object(obj
->idx
.offset
, "offset value overflow for delta base object");
234 base_offset
= (base_offset
<< 7) + (c
& 127);
236 delta_base
->offset
= obj
->idx
.offset
- base_offset
;
237 if (delta_base
->offset
>= obj
->idx
.offset
)
238 bad_object(obj
->idx
.offset
, "delta base offset is out of bound");
246 bad_object(obj
->idx
.offset
, "unknown object type %d", obj
->type
);
248 obj
->hdr_size
= consumed_bytes
- obj
->idx
.offset
;
250 data
= unpack_entry_data(obj
->idx
.offset
, obj
->size
);
251 obj
->idx
.crc32
= input_crc32
;
255 static void *get_data_from_pack(struct object_entry
*obj
)
257 unsigned long from
= obj
[0].idx
.offset
+ obj
[0].hdr_size
;
258 unsigned long len
= obj
[1].idx
.offset
- from
;
259 unsigned long rdy
= 0;
260 unsigned char *src
, *data
;
267 ssize_t n
= pread(pack_fd
, data
+ rdy
, len
- rdy
, from
+ rdy
);
269 die("cannot pread pack file: %s", strerror(errno
));
272 data
= xmalloc(obj
->size
);
273 memset(&stream
, 0, sizeof(stream
));
274 stream
.next_out
= data
;
275 stream
.avail_out
= obj
->size
;
276 stream
.next_in
= src
;
277 stream
.avail_in
= len
;
278 inflateInit(&stream
);
279 while ((st
= inflate(&stream
, Z_FINISH
)) == Z_OK
);
281 if (st
!= Z_STREAM_END
|| stream
.total_out
!= obj
->size
)
282 die("serious inflate inconsistency");
287 static int find_delta(const union delta_base
*base
)
289 int first
= 0, last
= nr_deltas
;
291 while (first
< last
) {
292 int next
= (first
+ last
) / 2;
293 struct delta_entry
*delta
= &deltas
[next
];
296 cmp
= memcmp(base
, &delta
->base
, UNION_BASE_SZ
);
308 static int find_delta_children(const union delta_base
*base
,
309 int *first_index
, int *last_index
)
311 int first
= find_delta(base
);
313 int end
= nr_deltas
- 1;
317 while (first
> 0 && !memcmp(&deltas
[first
- 1].base
, base
, UNION_BASE_SZ
))
319 while (last
< end
&& !memcmp(&deltas
[last
+ 1].base
, base
, UNION_BASE_SZ
))
321 *first_index
= first
;
326 static void sha1_object(const void *data
, unsigned long size
,
327 enum object_type type
, unsigned char *sha1
)
329 hash_sha1_file(data
, size
, typename(type
), sha1
);
330 if (has_sha1_file(sha1
)) {
332 enum object_type has_type
;
333 unsigned long has_size
;
334 has_data
= read_sha1_file(sha1
, &has_type
, &has_size
);
336 die("cannot read existing object %s", sha1_to_hex(sha1
));
337 if (size
!= has_size
|| type
!= has_type
||
338 memcmp(data
, has_data
, size
) != 0)
339 die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1
));
344 static void resolve_delta(struct object_entry
*delta_obj
, void *base_data
,
345 unsigned long base_size
, enum object_type type
)
348 unsigned long delta_size
;
350 unsigned long result_size
;
351 union delta_base delta_base
;
354 delta_obj
->real_type
= type
;
355 delta_data
= get_data_from_pack(delta_obj
);
356 delta_size
= delta_obj
->size
;
357 result
= patch_delta(base_data
, base_size
, delta_data
, delta_size
,
361 bad_object(delta_obj
->idx
.offset
, "failed to apply delta");
362 sha1_object(result
, result_size
, type
, delta_obj
->idx
.sha1
);
363 nr_resolved_deltas
++;
365 hashcpy(delta_base
.sha1
, delta_obj
->idx
.sha1
);
366 if (!find_delta_children(&delta_base
, &first
, &last
)) {
367 for (j
= first
; j
<= last
; j
++) {
368 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
369 if (child
->real_type
== OBJ_REF_DELTA
)
370 resolve_delta(child
, result
, result_size
, type
);
374 memset(&delta_base
, 0, sizeof(delta_base
));
375 delta_base
.offset
= delta_obj
->idx
.offset
;
376 if (!find_delta_children(&delta_base
, &first
, &last
)) {
377 for (j
= first
; j
<= last
; j
++) {
378 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
379 if (child
->real_type
== OBJ_OFS_DELTA
)
380 resolve_delta(child
, result
, result_size
, type
);
387 static int compare_delta_entry(const void *a
, const void *b
)
389 const struct delta_entry
*delta_a
= a
;
390 const struct delta_entry
*delta_b
= b
;
391 return memcmp(&delta_a
->base
, &delta_b
->base
, UNION_BASE_SZ
);
394 /* Parse all objects and return the pack content SHA1 hash */
395 static void parse_pack_objects(unsigned char *sha1
)
398 struct delta_entry
*delta
= deltas
;
404 * - find locations of all objects;
405 * - calculate SHA1 of all non-delta objects;
406 * - remember base (SHA1 or offset) for all deltas.
409 start_progress(&progress
, "Indexing %u objects...", "", nr_objects
);
410 for (i
= 0; i
< nr_objects
; i
++) {
411 struct object_entry
*obj
= &objects
[i
];
412 data
= unpack_raw_entry(obj
, &delta
->base
);
413 obj
->real_type
= obj
->type
;
414 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
) {
419 sha1_object(data
, obj
->size
, obj
->type
, obj
->idx
.sha1
);
422 display_progress(&progress
, i
+1);
424 objects
[i
].idx
.offset
= consumed_bytes
;
426 stop_progress(&progress
);
428 /* Check pack integrity */
430 SHA1_Final(sha1
, &input_ctx
);
431 if (hashcmp(fill(20), sha1
))
432 die("pack is corrupted (SHA1 mismatch)");
435 /* If input_fd is a file, we should have reached its end now. */
436 if (fstat(input_fd
, &st
))
437 die("cannot fstat packfile: %s", strerror(errno
));
438 if (S_ISREG(st
.st_mode
) &&
439 lseek(input_fd
, 0, SEEK_CUR
) - input_len
!= st
.st_size
)
440 die("pack has junk at the end");
445 /* Sort deltas by base SHA1/offset for fast searching */
446 qsort(deltas
, nr_deltas
, sizeof(struct delta_entry
),
447 compare_delta_entry
);
451 * - for all non-delta objects, look if it is used as a base for
453 * - if used as a base, uncompress the object and apply all deltas,
454 * recursively checking if the resulting object is used as a base
455 * for some more deltas.
458 start_progress(&progress
, "Resolving %u deltas...", "", nr_deltas
);
459 for (i
= 0; i
< nr_objects
; i
++) {
460 struct object_entry
*obj
= &objects
[i
];
461 union delta_base base
;
462 int j
, ref
, ref_first
, ref_last
, ofs
, ofs_first
, ofs_last
;
464 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
)
466 hashcpy(base
.sha1
, obj
->idx
.sha1
);
467 ref
= !find_delta_children(&base
, &ref_first
, &ref_last
);
468 memset(&base
, 0, sizeof(base
));
469 base
.offset
= obj
->idx
.offset
;
470 ofs
= !find_delta_children(&base
, &ofs_first
, &ofs_last
);
473 data
= get_data_from_pack(obj
);
475 for (j
= ref_first
; j
<= ref_last
; j
++) {
476 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
477 if (child
->real_type
== OBJ_REF_DELTA
)
478 resolve_delta(child
, data
,
479 obj
->size
, obj
->type
);
482 for (j
= ofs_first
; j
<= ofs_last
; j
++) {
483 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
484 if (child
->real_type
== OBJ_OFS_DELTA
)
485 resolve_delta(child
, data
,
486 obj
->size
, obj
->type
);
490 display_progress(&progress
, nr_resolved_deltas
);
494 static int write_compressed(int fd
, void *in
, unsigned int size
, uint32_t *obj_crc
)
497 unsigned long maxsize
;
500 memset(&stream
, 0, sizeof(stream
));
501 deflateInit(&stream
, zlib_compression_level
);
502 maxsize
= deflateBound(&stream
, size
);
503 out
= xmalloc(maxsize
);
507 stream
.avail_in
= size
;
508 stream
.next_out
= out
;
509 stream
.avail_out
= maxsize
;
510 while (deflate(&stream
, Z_FINISH
) == Z_OK
);
513 size
= stream
.total_out
;
514 write_or_die(fd
, out
, size
);
515 *obj_crc
= crc32(*obj_crc
, out
, size
);
520 static void append_obj_to_pack(const unsigned char *sha1
, void *buf
,
521 unsigned long size
, enum object_type type
)
523 struct object_entry
*obj
= &objects
[nr_objects
++];
524 unsigned char header
[10];
525 unsigned long s
= size
;
527 unsigned char c
= (type
<< 4) | (s
& 15);
530 header
[n
++] = c
| 0x80;
535 write_or_die(output_fd
, header
, n
);
536 obj
[0].idx
.crc32
= crc32(0, Z_NULL
, 0);
537 obj
[0].idx
.crc32
= crc32(obj
[0].idx
.crc32
, header
, n
);
538 obj
[1].idx
.offset
= obj
[0].idx
.offset
+ n
;
539 obj
[1].idx
.offset
+= write_compressed(output_fd
, buf
, size
, &obj
[0].idx
.crc32
);
540 hashcpy(obj
->idx
.sha1
, sha1
);
543 static int delta_pos_compare(const void *_a
, const void *_b
)
545 struct delta_entry
*a
= *(struct delta_entry
**)_a
;
546 struct delta_entry
*b
= *(struct delta_entry
**)_b
;
547 return a
->obj_no
- b
->obj_no
;
550 static void fix_unresolved_deltas(int nr_unresolved
)
552 struct delta_entry
**sorted_by_pos
;
556 * Since many unresolved deltas may well be themselves base objects
557 * for more unresolved deltas, we really want to include the
558 * smallest number of base objects that would cover as much delta
559 * as possible by picking the
560 * trunc deltas first, allowing for other deltas to resolve without
561 * additional base objects. Since most base objects are to be found
562 * before deltas depending on them, a good heuristic is to start
563 * resolving deltas in the same order as their position in the pack.
565 sorted_by_pos
= xmalloc(nr_unresolved
* sizeof(*sorted_by_pos
));
566 for (i
= 0; i
< nr_deltas
; i
++) {
567 if (objects
[deltas
[i
].obj_no
].real_type
!= OBJ_REF_DELTA
)
569 sorted_by_pos
[n
++] = &deltas
[i
];
571 qsort(sorted_by_pos
, n
, sizeof(*sorted_by_pos
), delta_pos_compare
);
573 for (i
= 0; i
< n
; i
++) {
574 struct delta_entry
*d
= sorted_by_pos
[i
];
577 enum object_type type
;
580 if (objects
[d
->obj_no
].real_type
!= OBJ_REF_DELTA
)
582 data
= read_sha1_file(d
->base
.sha1
, &type
, &size
);
586 find_delta_children(&d
->base
, &first
, &last
);
587 for (j
= first
; j
<= last
; j
++) {
588 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
589 if (child
->real_type
== OBJ_REF_DELTA
)
590 resolve_delta(child
, data
, size
, type
);
593 if (check_sha1_signature(d
->base
.sha1
, data
, size
, typename(type
)))
594 die("local object %s is corrupt", sha1_to_hex(d
->base
.sha1
));
595 append_obj_to_pack(d
->base
.sha1
, data
, size
, type
);
598 display_progress(&progress
, nr_resolved_deltas
);
603 static void final(const char *final_pack_name
, const char *curr_pack_name
,
604 const char *final_index_name
, const char *curr_index_name
,
605 const char *keep_name
, const char *keep_msg
,
608 const char *report
= "pack";
615 err
= close(output_fd
);
617 die("error while closing pack file: %s", strerror(errno
));
618 chmod(curr_pack_name
, 0444);
622 int keep_fd
, keep_msg_len
= strlen(keep_msg
);
624 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.keep",
625 get_object_directory(), sha1_to_hex(sha1
));
628 keep_fd
= open(keep_name
, O_RDWR
|O_CREAT
|O_EXCL
, 0600);
631 die("cannot write keep file");
633 if (keep_msg_len
> 0) {
634 write_or_die(keep_fd
, keep_msg
, keep_msg_len
);
635 write_or_die(keep_fd
, "\n", 1);
637 if (close(keep_fd
) != 0)
638 die("cannot write keep file");
643 if (final_pack_name
!= curr_pack_name
) {
644 if (!final_pack_name
) {
645 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.pack",
646 get_object_directory(), sha1_to_hex(sha1
));
647 final_pack_name
= name
;
649 if (move_temp_to_file(curr_pack_name
, final_pack_name
))
650 die("cannot store pack file");
653 chmod(curr_index_name
, 0444);
654 if (final_index_name
!= curr_index_name
) {
655 if (!final_index_name
) {
656 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.idx",
657 get_object_directory(), sha1_to_hex(sha1
));
658 final_index_name
= name
;
660 if (move_temp_to_file(curr_index_name
, final_index_name
))
661 die("cannot store index file");
665 printf("%s\n", sha1_to_hex(sha1
));
668 int len
= snprintf(buf
, sizeof(buf
), "%s\t%s\n",
669 report
, sha1_to_hex(sha1
));
670 write_or_die(1, buf
, len
);
673 * Let's just mimic git-unpack-objects here and write
674 * the last part of the input buffer to stdout.
677 err
= xwrite(1, input_buffer
+ input_offset
, input_len
);
686 int main(int argc
, char **argv
)
688 int i
, fix_thin_pack
= 0;
689 const char *curr_pack
, *pack_name
= NULL
;
690 const char *curr_index
, *index_name
= NULL
;
691 const char *keep_name
= NULL
, *keep_msg
= NULL
;
692 char *index_name_buf
= NULL
, *keep_name_buf
= NULL
;
693 struct pack_idx_entry
**idx_objects
;
694 unsigned char sha1
[20];
696 for (i
= 1; i
< argc
; i
++) {
697 const char *arg
= argv
[i
];
700 if (!strcmp(arg
, "--stdin")) {
702 } else if (!strcmp(arg
, "--fix-thin")) {
704 } else if (!strcmp(arg
, "--keep")) {
706 } else if (!prefixcmp(arg
, "--keep=")) {
708 } else if (!prefixcmp(arg
, "--pack_header=")) {
709 struct pack_header
*hdr
;
712 hdr
= (struct pack_header
*)input_buffer
;
713 hdr
->hdr_signature
= htonl(PACK_SIGNATURE
);
714 hdr
->hdr_version
= htonl(strtoul(arg
+ 14, &c
, 10));
717 hdr
->hdr_entries
= htonl(strtoul(c
+ 1, &c
, 10));
720 input_len
= sizeof(*hdr
);
721 } else if (!strcmp(arg
, "-v")) {
723 } else if (!strcmp(arg
, "-o")) {
724 if (index_name
|| (i
+1) >= argc
)
725 usage(index_pack_usage
);
726 index_name
= argv
[++i
];
727 } else if (!prefixcmp(arg
, "--index-version=")) {
729 pack_idx_default_version
= strtoul(arg
+ 16, &c
, 10);
730 if (pack_idx_default_version
> 2)
733 pack_idx_off32_limit
= strtoul(c
+1, &c
, 0);
734 if (*c
|| pack_idx_off32_limit
& 0x80000000)
737 usage(index_pack_usage
);
742 usage(index_pack_usage
);
746 if (!pack_name
&& !from_stdin
)
747 usage(index_pack_usage
);
748 if (fix_thin_pack
&& !from_stdin
)
749 die("--fix-thin cannot be used without --stdin");
750 if (!index_name
&& pack_name
) {
751 int len
= strlen(pack_name
);
752 if (!has_extension(pack_name
, ".pack"))
753 die("packfile name '%s' does not end with '.pack'",
755 index_name_buf
= xmalloc(len
);
756 memcpy(index_name_buf
, pack_name
, len
- 5);
757 strcpy(index_name_buf
+ len
- 5, ".idx");
758 index_name
= index_name_buf
;
760 if (keep_msg
&& !keep_name
&& pack_name
) {
761 int len
= strlen(pack_name
);
762 if (!has_extension(pack_name
, ".pack"))
763 die("packfile name '%s' does not end with '.pack'",
765 keep_name_buf
= xmalloc(len
);
766 memcpy(keep_name_buf
, pack_name
, len
- 5);
767 strcpy(keep_name_buf
+ len
- 5, ".keep");
768 keep_name
= keep_name_buf
;
771 curr_pack
= open_pack_file(pack_name
);
773 objects
= xmalloc((nr_objects
+ 1) * sizeof(struct object_entry
));
774 deltas
= xmalloc(nr_objects
* sizeof(struct delta_entry
));
775 parse_pack_objects(sha1
);
776 if (nr_deltas
== nr_resolved_deltas
) {
778 stop_progress(&progress
);
779 /* Flush remaining pack final 20-byte SHA1. */
783 int nr_unresolved
= nr_deltas
- nr_resolved_deltas
;
784 int nr_objects_initial
= nr_objects
;
785 if (nr_unresolved
<= 0)
786 die("confusion beyond insanity");
787 objects
= xrealloc(objects
,
788 (nr_objects
+ nr_unresolved
+ 1)
790 fix_unresolved_deltas(nr_unresolved
);
792 stop_progress(&progress
);
793 fprintf(stderr
, "%d objects were added to complete this thin pack.\n",
794 nr_objects
- nr_objects_initial
);
796 fixup_pack_header_footer(output_fd
, sha1
,
797 curr_pack
, nr_objects
);
799 if (nr_deltas
!= nr_resolved_deltas
)
800 die("pack has %d unresolved deltas",
801 nr_deltas
- nr_resolved_deltas
);
805 idx_objects
= xmalloc((nr_objects
) * sizeof(struct pack_idx_entry
*));
806 for (i
= 0; i
< nr_objects
; i
++)
807 idx_objects
[i
] = &objects
[i
].idx
;
808 curr_index
= write_idx_file(index_name
, idx_objects
, nr_objects
, sha1
);
811 final(pack_name
, curr_pack
,
812 index_name
, curr_index
,
816 free(index_name_buf
);