10 static const char index_pack_usage
[] =
11 "git-index-pack [-o <index-file>] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
17 unsigned int hdr_size
;
18 enum object_type type
;
19 enum object_type real_type
;
20 unsigned char sha1
[20];
24 unsigned char sha1
[20];
29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
30 * to memcmp() only the first 20 bytes.
32 #define UNION_BASE_SZ 20
36 union delta_base base
;
40 static struct object_entry
*objects
;
41 static struct delta_entry
*deltas
;
42 static int nr_objects
;
44 static int nr_resolved_deltas
;
46 static int from_stdin
;
48 /* We always read in 4kB chunks. */
49 static unsigned char input_buffer
[4096];
50 static unsigned long input_offset
, input_len
, consumed_bytes
;
51 static SHA_CTX input_ctx
;
52 static int input_fd
, output_fd
, mmap_fd
;
54 /* Discard current buffer used content. */
59 write_or_die(output_fd
, input_buffer
, input_offset
);
60 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
61 memcpy(input_buffer
, input_buffer
+ input_offset
, input_len
);
67 * Make sure at least "min" bytes are available in the buffer, and
68 * return the pointer to the buffer.
70 static void * fill(int min
)
73 return input_buffer
+ input_offset
;
74 if (min
> sizeof(input_buffer
))
75 die("cannot fill %d bytes", min
);
78 int ret
= xread(input_fd
, input_buffer
+ input_len
,
79 sizeof(input_buffer
) - input_len
);
83 die("read error on input: %s", strerror(errno
));
86 } while (input_len
< min
);
90 static void use(int bytes
)
92 if (bytes
> input_len
)
93 die("used more bytes than were available");
95 input_offset
+= bytes
;
96 consumed_bytes
+= bytes
;
99 static const char * open_pack_file(const char *pack_name
)
104 static char tmpfile
[PATH_MAX
];
105 snprintf(tmpfile
, sizeof(tmpfile
),
106 "%s/pack_XXXXXX", get_object_directory());
107 output_fd
= mkstemp(tmpfile
);
108 pack_name
= xstrdup(tmpfile
);
110 output_fd
= open(pack_name
, O_CREAT
|O_EXCL
|O_RDWR
, 0600);
112 die("unable to create %s: %s\n", pack_name
, strerror(errno
));
115 input_fd
= open(pack_name
, O_RDONLY
);
117 die("cannot open packfile '%s': %s",
118 pack_name
, strerror(errno
));
122 SHA1_Init(&input_ctx
);
126 static void parse_pack_header(void)
128 struct pack_header
*hdr
= fill(sizeof(struct pack_header
));
130 /* Header consistency check */
131 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
132 die("pack signature mismatch");
133 if (!pack_version_ok(hdr
->hdr_version
))
134 die("pack version %d unsupported", ntohl(hdr
->hdr_version
));
136 nr_objects
= ntohl(hdr
->hdr_entries
);
137 use(sizeof(struct pack_header
));
138 /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
141 static void bad_object(unsigned long offset
, const char *format
,
142 ...) NORETURN
__attribute__((format (printf
, 2, 3)));
144 static void bad_object(unsigned long offset
, const char *format
, ...)
149 va_start(params
, format
);
150 vsnprintf(buf
, sizeof(buf
), format
, params
);
152 die("pack has bad object at offset %lu: %s", offset
, buf
);
155 static void *unpack_entry_data(unsigned long offset
, unsigned long size
)
158 void *buf
= xmalloc(size
);
160 memset(&stream
, 0, sizeof(stream
));
161 stream
.next_out
= buf
;
162 stream
.avail_out
= size
;
163 stream
.next_in
= fill(1);
164 stream
.avail_in
= input_len
;
165 inflateInit(&stream
);
168 int ret
= inflate(&stream
, 0);
169 use(input_len
- stream
.avail_in
);
170 if (stream
.total_out
== size
&& ret
== Z_STREAM_END
)
173 bad_object(offset
, "inflate returned %d", ret
);
174 stream
.next_in
= fill(1);
175 stream
.avail_in
= input_len
;
181 static void *unpack_raw_entry(struct object_entry
*obj
, union delta_base
*delta_base
)
184 unsigned long size
, base_offset
;
187 obj
->offset
= consumed_bytes
;
192 obj
->type
= (c
>> 4) & 7;
199 size
+= (c
& 0x7fUL
) << shift
;
206 hashcpy(delta_base
->sha1
, fill(20));
210 memset(delta_base
, 0, sizeof(*delta_base
));
214 base_offset
= c
& 127;
217 if (!base_offset
|| base_offset
& ~(~0UL >> 7))
218 bad_object(obj
->offset
, "offset value overflow for delta base object");
222 base_offset
= (base_offset
<< 7) + (c
& 127);
224 delta_base
->offset
= obj
->offset
- base_offset
;
225 if (delta_base
->offset
>= obj
->offset
)
226 bad_object(obj
->offset
, "delta base offset is out of bound");
234 bad_object(obj
->offset
, "bad object type %d", obj
->type
);
236 obj
->hdr_size
= consumed_bytes
- obj
->offset
;
238 return unpack_entry_data(obj
->offset
, obj
->size
);
241 static void * get_data_from_pack(struct object_entry
*obj
)
243 unsigned long from
= obj
[0].offset
+ obj
[0].hdr_size
;
244 unsigned long len
= obj
[1].offset
- from
;
245 unsigned pg_offset
= from
% getpagesize();
246 unsigned char *map
, *data
;
250 map
= mmap(NULL
, len
+ pg_offset
, PROT_READ
, MAP_PRIVATE
,
251 mmap_fd
, from
- pg_offset
);
252 if (map
== MAP_FAILED
)
253 die("cannot mmap pack file: %s", strerror(errno
));
254 data
= xmalloc(obj
->size
);
255 memset(&stream
, 0, sizeof(stream
));
256 stream
.next_out
= data
;
257 stream
.avail_out
= obj
->size
;
258 stream
.next_in
= map
+ pg_offset
;
259 stream
.avail_in
= len
;
260 inflateInit(&stream
);
261 while ((st
= inflate(&stream
, Z_FINISH
)) == Z_OK
);
263 if (st
!= Z_STREAM_END
|| stream
.total_out
!= obj
->size
)
264 die("serious inflate inconsistency");
265 munmap(map
, len
+ pg_offset
);
269 static int find_delta(const union delta_base
*base
)
271 int first
= 0, last
= nr_deltas
;
273 while (first
< last
) {
274 int next
= (first
+ last
) / 2;
275 struct delta_entry
*delta
= &deltas
[next
];
278 cmp
= memcmp(base
, &delta
->base
, UNION_BASE_SZ
);
290 static int find_delta_childs(const union delta_base
*base
,
291 int *first_index
, int *last_index
)
293 int first
= find_delta(base
);
295 int end
= nr_deltas
- 1;
299 while (first
> 0 && !memcmp(&deltas
[first
- 1].base
, base
, UNION_BASE_SZ
))
301 while (last
< end
&& !memcmp(&deltas
[last
+ 1].base
, base
, UNION_BASE_SZ
))
303 *first_index
= first
;
308 static void sha1_object(const void *data
, unsigned long size
,
309 enum object_type type
, unsigned char *sha1
)
314 const char *type_str
;
317 case OBJ_COMMIT
: type_str
= commit_type
; break;
318 case OBJ_TREE
: type_str
= tree_type
; break;
319 case OBJ_BLOB
: type_str
= blob_type
; break;
320 case OBJ_TAG
: type_str
= tag_type
; break;
322 die("bad type %d", type
);
325 header_size
= sprintf(header
, "%s %lu", type_str
, size
) + 1;
328 SHA1_Update(&ctx
, header
, header_size
);
329 SHA1_Update(&ctx
, data
, size
);
330 SHA1_Final(sha1
, &ctx
);
333 static void resolve_delta(struct object_entry
*delta_obj
, void *base_data
,
334 unsigned long base_size
, enum object_type type
)
337 unsigned long delta_size
;
339 unsigned long result_size
;
340 union delta_base delta_base
;
343 delta_obj
->real_type
= type
;
344 delta_data
= get_data_from_pack(delta_obj
);
345 delta_size
= delta_obj
->size
;
346 result
= patch_delta(base_data
, base_size
, delta_data
, delta_size
,
350 bad_object(delta_obj
->offset
, "failed to apply delta");
351 sha1_object(result
, result_size
, type
, delta_obj
->sha1
);
352 nr_resolved_deltas
++;
354 hashcpy(delta_base
.sha1
, delta_obj
->sha1
);
355 if (!find_delta_childs(&delta_base
, &first
, &last
)) {
356 for (j
= first
; j
<= last
; j
++) {
357 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
358 if (child
->real_type
== OBJ_REF_DELTA
)
359 resolve_delta(child
, result
, result_size
, type
);
363 memset(&delta_base
, 0, sizeof(delta_base
));
364 delta_base
.offset
= delta_obj
->offset
;
365 if (!find_delta_childs(&delta_base
, &first
, &last
)) {
366 for (j
= first
; j
<= last
; j
++) {
367 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
368 if (child
->real_type
== OBJ_OFS_DELTA
)
369 resolve_delta(child
, result
, result_size
, type
);
376 static int compare_delta_entry(const void *a
, const void *b
)
378 const struct delta_entry
*delta_a
= a
;
379 const struct delta_entry
*delta_b
= b
;
380 return memcmp(&delta_a
->base
, &delta_b
->base
, UNION_BASE_SZ
);
383 /* Parse all objects and return the pack content SHA1 hash */
384 static void parse_pack_objects(unsigned char *sha1
)
387 struct delta_entry
*delta
= deltas
;
393 * - find locations of all objects;
394 * - calculate SHA1 of all non-delta objects;
395 * - remember base SHA1 for all deltas.
397 for (i
= 0; i
< nr_objects
; i
++) {
398 struct object_entry
*obj
= &objects
[i
];
399 data
= unpack_raw_entry(obj
, &delta
->base
);
400 obj
->real_type
= obj
->type
;
401 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
) {
406 sha1_object(data
, obj
->size
, obj
->type
, obj
->sha1
);
409 objects
[i
].offset
= consumed_bytes
;
411 /* Check pack integrity */
413 SHA1_Final(sha1
, &input_ctx
);
414 if (hashcmp(fill(20), sha1
))
415 die("pack is corrupted (SHA1 mismatch)");
417 /* If input_fd is a file, we should have reached its end now. */
418 if (fstat(input_fd
, &st
))
419 die("cannot fstat packfile: %s", strerror(errno
));
420 if (S_ISREG(st
.st_mode
) && st
.st_size
!= consumed_bytes
+ 20)
421 die("pack has junk at the end");
423 /* Sort deltas by base SHA1/offset for fast searching */
424 qsort(deltas
, nr_deltas
, sizeof(struct delta_entry
),
425 compare_delta_entry
);
429 * - for all non-delta objects, look if it is used as a base for
431 * - if used as a base, uncompress the object and apply all deltas,
432 * recursively checking if the resulting object is used as a base
433 * for some more deltas.
435 for (i
= 0; i
< nr_objects
; i
++) {
436 struct object_entry
*obj
= &objects
[i
];
437 union delta_base base
;
438 int j
, ref
, ref_first
, ref_last
, ofs
, ofs_first
, ofs_last
;
440 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
)
442 hashcpy(base
.sha1
, obj
->sha1
);
443 ref
= !find_delta_childs(&base
, &ref_first
, &ref_last
);
444 memset(&base
, 0, sizeof(base
));
445 base
.offset
= obj
->offset
;
446 ofs
= !find_delta_childs(&base
, &ofs_first
, &ofs_last
);
449 data
= get_data_from_pack(obj
);
451 for (j
= ref_first
; j
<= ref_last
; j
++) {
452 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
453 if (child
->real_type
== OBJ_REF_DELTA
)
454 resolve_delta(child
, data
,
455 obj
->size
, obj
->type
);
458 for (j
= ofs_first
; j
<= ofs_last
; j
++) {
459 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
460 if (child
->real_type
== OBJ_OFS_DELTA
)
461 resolve_delta(child
, data
,
462 obj
->size
, obj
->type
);
468 static int write_compressed(int fd
, void *in
, unsigned int size
)
471 unsigned long maxsize
;
474 memset(&stream
, 0, sizeof(stream
));
475 deflateInit(&stream
, zlib_compression_level
);
476 maxsize
= deflateBound(&stream
, size
);
477 out
= xmalloc(maxsize
);
481 stream
.avail_in
= size
;
482 stream
.next_out
= out
;
483 stream
.avail_out
= maxsize
;
484 while (deflate(&stream
, Z_FINISH
) == Z_OK
);
487 size
= stream
.total_out
;
488 write_or_die(fd
, out
, size
);
493 static void append_obj_to_pack(void *buf
,
494 unsigned long size
, enum object_type type
)
496 struct object_entry
*obj
= &objects
[nr_objects
++];
497 unsigned char header
[10];
498 unsigned long s
= size
;
500 unsigned char c
= (type
<< 4) | (s
& 15);
503 header
[n
++] = c
| 0x80;
508 write_or_die(output_fd
, header
, n
);
509 obj
[1].offset
= obj
[0].offset
+ n
;
510 obj
[1].offset
+= write_compressed(output_fd
, buf
, size
);
511 sha1_object(buf
, size
, type
, obj
->sha1
);
514 static int delta_pos_compare(const void *_a
, const void *_b
)
516 struct delta_entry
*a
= *(struct delta_entry
**)_a
;
517 struct delta_entry
*b
= *(struct delta_entry
**)_b
;
518 return a
->obj_no
- b
->obj_no
;
521 static void fix_unresolved_deltas(int nr_unresolved
)
523 struct delta_entry
**sorted_by_pos
;
527 * Since many unresolved deltas may well be themselves base objects
528 * for more unresolved deltas, we really want to include the
529 * smallest number of base objects that would cover as much delta
530 * as possible by picking the
531 * trunc deltas first, allowing for other deltas to resolve without
532 * additional base objects. Since most base objects are to be found
533 * before deltas depending on them, a good heuristic is to start
534 * resolving deltas in the same order as their position in the pack.
536 sorted_by_pos
= xmalloc(nr_unresolved
* sizeof(*sorted_by_pos
));
537 for (i
= 0; i
< nr_deltas
; i
++) {
538 if (objects
[deltas
[i
].obj_no
].real_type
!= OBJ_REF_DELTA
)
540 sorted_by_pos
[n
++] = &deltas
[i
];
542 qsort(sorted_by_pos
, n
, sizeof(*sorted_by_pos
), delta_pos_compare
);
544 for (i
= 0; i
< n
; i
++) {
545 struct delta_entry
*d
= sorted_by_pos
[i
];
549 enum object_type obj_type
;
552 if (objects
[d
->obj_no
].real_type
!= OBJ_REF_DELTA
)
554 data
= read_sha1_file(d
->base
.sha1
, type
, &size
);
557 if (!strcmp(type
, blob_type
)) obj_type
= OBJ_BLOB
;
558 else if (!strcmp(type
, tree_type
)) obj_type
= OBJ_TREE
;
559 else if (!strcmp(type
, commit_type
)) obj_type
= OBJ_COMMIT
;
560 else if (!strcmp(type
, tag_type
)) obj_type
= OBJ_TAG
;
561 else die("base object %s is of type '%s'",
562 sha1_to_hex(d
->base
.sha1
), type
);
564 find_delta_childs(&d
->base
, &first
, &last
);
565 for (j
= first
; j
<= last
; j
++) {
566 struct object_entry
*child
= objects
+ deltas
[j
].obj_no
;
567 if (child
->real_type
== OBJ_REF_DELTA
)
568 resolve_delta(child
, data
, size
, obj_type
);
571 append_obj_to_pack(data
, size
, obj_type
);
577 static void readjust_pack_header_and_sha1(unsigned char *sha1
)
579 struct pack_header hdr
;
583 /* Rewrite pack header with updated object number */
584 if (lseek(output_fd
, 0, SEEK_SET
) != 0)
585 die("cannot seek back: %s", strerror(errno
));
586 if (xread(output_fd
, &hdr
, sizeof(hdr
)) != sizeof(hdr
))
587 die("cannot read pack header back: %s", strerror(errno
));
588 hdr
.hdr_entries
= htonl(nr_objects
);
589 if (lseek(output_fd
, 0, SEEK_SET
) != 0)
590 die("cannot seek back: %s", strerror(errno
));
591 write_or_die(output_fd
, &hdr
, sizeof(hdr
));
592 if (lseek(output_fd
, 0, SEEK_SET
) != 0)
593 die("cannot seek back: %s", strerror(errno
));
595 /* Recompute and store the new pack's SHA1 */
598 unsigned char *buf
[4096];
599 size
= xread(output_fd
, buf
, sizeof(buf
));
601 die("cannot read pack data back: %s", strerror(errno
));
602 SHA1_Update(&ctx
, buf
, size
);
604 SHA1_Final(sha1
, &ctx
);
605 write_or_die(output_fd
, sha1
, 20);
608 static int sha1_compare(const void *_a
, const void *_b
)
610 struct object_entry
*a
= *(struct object_entry
**)_a
;
611 struct object_entry
*b
= *(struct object_entry
**)_b
;
612 return hashcmp(a
->sha1
, b
->sha1
);
616 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
617 * the SHA1 hash of sorted object names.
619 static const char * write_index_file(const char *index_name
, unsigned char *sha1
)
622 struct object_entry
**sorted_by_sha
, **list
, **last
;
623 unsigned int array
[256];
629 xcalloc(nr_objects
, sizeof(struct object_entry
*));
630 list
= sorted_by_sha
;
631 last
= sorted_by_sha
+ nr_objects
;
632 for (i
= 0; i
< nr_objects
; ++i
)
633 sorted_by_sha
[i
] = &objects
[i
];
634 qsort(sorted_by_sha
, nr_objects
, sizeof(sorted_by_sha
[0]),
639 sorted_by_sha
= list
= last
= NULL
;
642 static char tmpfile
[PATH_MAX
];
643 snprintf(tmpfile
, sizeof(tmpfile
),
644 "%s/index_XXXXXX", get_object_directory());
645 fd
= mkstemp(tmpfile
);
646 index_name
= xstrdup(tmpfile
);
649 fd
= open(index_name
, O_CREAT
|O_EXCL
|O_WRONLY
, 0600);
652 die("unable to create %s: %s", index_name
, strerror(errno
));
653 f
= sha1fd(fd
, index_name
);
656 * Write the first-level table (the list is sorted,
657 * but we use a 256-entry lookup to be able to avoid
658 * having to do eight extra binary search iterations).
660 for (i
= 0; i
< 256; i
++) {
661 struct object_entry
**next
= list
;
662 while (next
< last
) {
663 struct object_entry
*obj
= *next
;
664 if (obj
->sha1
[0] != i
)
668 array
[i
] = htonl(next
- sorted_by_sha
);
671 sha1write(f
, array
, 256 * sizeof(int));
673 /* recompute the SHA1 hash of sorted object names.
674 * currently pack-objects does not do this, but that
679 * Write the actual SHA1 entries..
681 list
= sorted_by_sha
;
682 for (i
= 0; i
< nr_objects
; i
++) {
683 struct object_entry
*obj
= *list
++;
684 unsigned int offset
= htonl(obj
->offset
);
685 sha1write(f
, &offset
, 4);
686 sha1write(f
, obj
->sha1
, 20);
687 SHA1_Update(&ctx
, obj
->sha1
, 20);
689 sha1write(f
, sha1
, 20);
690 sha1close(f
, NULL
, 1);
692 SHA1_Final(sha1
, &ctx
);
696 static void final(const char *final_pack_name
, const char *curr_pack_name
,
697 const char *final_index_name
, const char *curr_index_name
,
706 err
= close(output_fd
);
708 die("error while closing pack file: %s", strerror(errno
));
709 chmod(curr_pack_name
, 0444);
712 if (final_pack_name
!= curr_pack_name
) {
713 if (!final_pack_name
) {
714 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.pack",
715 get_object_directory(), sha1_to_hex(sha1
));
716 final_pack_name
= name
;
718 if (move_temp_to_file(curr_pack_name
, final_pack_name
))
719 die("cannot store pack file");
722 chmod(curr_index_name
, 0444);
723 if (final_index_name
!= curr_index_name
) {
724 if (!final_index_name
) {
725 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.idx",
726 get_object_directory(), sha1_to_hex(sha1
));
727 final_index_name
= name
;
729 if (move_temp_to_file(curr_index_name
, final_index_name
))
730 die("cannot store index file");
734 int main(int argc
, char **argv
)
736 int i
, fix_thin_pack
= 0;
737 const char *curr_pack
, *pack_name
= NULL
;
738 const char *curr_index
, *index_name
= NULL
;
739 char *index_name_buf
= NULL
;
740 unsigned char sha1
[20];
742 for (i
= 1; i
< argc
; i
++) {
743 const char *arg
= argv
[i
];
746 if (!strcmp(arg
, "--stdin")) {
748 } else if (!strcmp(arg
, "--fix-thin")) {
750 } else if (!strcmp(arg
, "-o")) {
751 if (index_name
|| (i
+1) >= argc
)
752 usage(index_pack_usage
);
753 index_name
= argv
[++i
];
755 usage(index_pack_usage
);
760 usage(index_pack_usage
);
764 if (!pack_name
&& !from_stdin
)
765 usage(index_pack_usage
);
766 if (fix_thin_pack
&& !from_stdin
)
767 die("--fix-thin cannot be used without --stdin");
768 if (!index_name
&& pack_name
) {
769 int len
= strlen(pack_name
);
770 if (!has_extension(pack_name
, ".pack"))
771 die("packfile name '%s' does not end with '.pack'",
773 index_name_buf
= xmalloc(len
);
774 memcpy(index_name_buf
, pack_name
, len
- 5);
775 strcpy(index_name_buf
+ len
- 5, ".idx");
776 index_name
= index_name_buf
;
779 curr_pack
= open_pack_file(pack_name
);
781 objects
= xmalloc((nr_objects
+ 1) * sizeof(struct object_entry
));
782 deltas
= xmalloc(nr_objects
* sizeof(struct delta_entry
));
783 parse_pack_objects(sha1
);
784 if (nr_deltas
!= nr_resolved_deltas
) {
786 int nr_unresolved
= nr_deltas
- nr_resolved_deltas
;
787 if (nr_unresolved
<= 0)
788 die("confusion beyond insanity");
789 objects
= xrealloc(objects
,
790 (nr_objects
+ nr_unresolved
+ 1)
792 fix_unresolved_deltas(nr_unresolved
);
793 readjust_pack_header_and_sha1(sha1
);
795 if (nr_deltas
!= nr_resolved_deltas
)
796 die("pack has %d unresolved deltas",
797 nr_deltas
- nr_resolved_deltas
);
799 /* Flush remaining pack final 20-byte SHA1. */
804 curr_index
= write_index_file(index_name
, sha1
);
805 final(pack_name
, curr_pack
, index_name
, curr_index
, sha1
);
807 free(index_name_buf
);
809 printf("%s\n", sha1_to_hex(sha1
));