10 static const char index_pack_usage
[] =
11 "git-index-pack [-o index-file] pack-file";
17 unsigned int hdr_size
;
18 enum object_type type
;
19 enum object_type real_type
;
20 unsigned char sha1
[20];
24 unsigned char sha1
[20];
29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
30 * to memcmp() only the first 20 bytes.
32 #define UNION_BASE_SZ 20
36 struct object_entry
*obj
;
37 union delta_base base
;
40 static const char *pack_name
;
41 static struct object_entry
*objects
;
42 static struct delta_entry
*deltas
;
43 static int nr_objects
;
46 /* We always read in 4kB chunks. */
47 static unsigned char input_buffer
[4096];
48 static unsigned long input_offset
, input_len
, consumed_bytes
;
49 static SHA_CTX input_ctx
;
53 * Make sure at least "min" bytes are available in the buffer, and
54 * return the pointer to the buffer.
56 static void * fill(int min
)
59 return input_buffer
+ input_offset
;
60 if (min
> sizeof(input_buffer
))
61 die("cannot fill %d bytes", min
);
63 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
64 memcpy(input_buffer
, input_buffer
+ input_offset
, input_len
);
68 int ret
= xread(input_fd
, input_buffer
+ input_len
,
69 sizeof(input_buffer
) - input_len
);
73 die("read error on input: %s", strerror(errno
));
76 } while (input_len
< min
);
80 static void use(int bytes
)
82 if (bytes
> input_len
)
83 die("used more bytes than were available");
85 input_offset
+= bytes
;
86 consumed_bytes
+= bytes
;
89 static void open_pack_file(void)
91 input_fd
= open(pack_name
, O_RDONLY
);
93 die("cannot open packfile '%s': %s", pack_name
,
95 SHA1_Init(&input_ctx
);
98 static void parse_pack_header(void)
100 struct pack_header
*hdr
= fill(sizeof(struct pack_header
));
102 /* Header consistency check */
103 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
104 die("packfile '%s' signature mismatch", pack_name
);
105 if (!pack_version_ok(hdr
->hdr_version
))
106 die("packfile '%s' version %d unsupported",
107 pack_name
, ntohl(hdr
->hdr_version
));
109 nr_objects
= ntohl(hdr
->hdr_entries
);
110 use(sizeof(struct pack_header
));
111 /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
114 static void bad_object(unsigned long offset
, const char *format
,
115 ...) NORETURN
__attribute__((format (printf
, 2, 3)));
117 static void bad_object(unsigned long offset
, const char *format
, ...)
122 va_start(params
, format
);
123 vsnprintf(buf
, sizeof(buf
), format
, params
);
125 die("packfile '%s': bad object at offset %lu: %s",
126 pack_name
, offset
, buf
);
129 static void *unpack_entry_data(unsigned long offset
, unsigned long size
)
132 void *buf
= xmalloc(size
);
134 memset(&stream
, 0, sizeof(stream
));
135 stream
.next_out
= buf
;
136 stream
.avail_out
= size
;
137 stream
.next_in
= fill(1);
138 stream
.avail_in
= input_len
;
139 inflateInit(&stream
);
142 int ret
= inflate(&stream
, 0);
143 use(input_len
- stream
.avail_in
);
144 if (stream
.total_out
== size
&& ret
== Z_STREAM_END
)
147 bad_object(offset
, "inflate returned %d", ret
);
148 stream
.next_in
= fill(1);
149 stream
.avail_in
= input_len
;
155 static void *unpack_raw_entry(struct object_entry
*obj
, union delta_base
*delta_base
)
158 unsigned long size
, base_offset
;
161 obj
->offset
= consumed_bytes
;
166 obj
->type
= (c
>> 4) & 7;
173 size
+= (c
& 0x7fUL
) << shift
;
180 hashcpy(delta_base
->sha1
, fill(20));
184 memset(delta_base
, 0, sizeof(*delta_base
));
188 base_offset
= c
& 127;
191 if (!base_offset
|| base_offset
& ~(~0UL >> 7))
192 bad_object(obj
->offset
, "offset value overflow for delta base object");
196 base_offset
= (base_offset
<< 7) + (c
& 127);
198 delta_base
->offset
= obj
->offset
- base_offset
;
199 if (delta_base
->offset
>= obj
->offset
)
200 bad_object(obj
->offset
, "delta base offset is out of bound");
208 bad_object(obj
->offset
, "bad object type %d", obj
->type
);
210 obj
->hdr_size
= consumed_bytes
- obj
->offset
;
212 return unpack_entry_data(obj
->offset
, obj
->size
);
215 static void * get_data_from_pack(struct object_entry
*obj
)
217 unsigned long from
= obj
[0].offset
+ obj
[0].hdr_size
;
218 unsigned long len
= obj
[1].offset
- from
;
219 unsigned pg_offset
= from
% getpagesize();
220 unsigned char *map
, *data
;
224 map
= mmap(NULL
, len
+ pg_offset
, PROT_READ
, MAP_PRIVATE
,
225 input_fd
, from
- pg_offset
);
226 if (map
== MAP_FAILED
)
227 die("cannot mmap packfile '%s': %s", pack_name
, strerror(errno
));
228 data
= xmalloc(obj
->size
);
229 memset(&stream
, 0, sizeof(stream
));
230 stream
.next_out
= data
;
231 stream
.avail_out
= obj
->size
;
232 stream
.next_in
= map
+ pg_offset
;
233 stream
.avail_in
= len
;
234 inflateInit(&stream
);
235 while ((st
= inflate(&stream
, Z_FINISH
)) == Z_OK
);
237 if (st
!= Z_STREAM_END
|| stream
.total_out
!= obj
->size
)
238 die("serious inflate inconsistency");
239 munmap(map
, len
+ pg_offset
);
243 static int find_delta(const union delta_base
*base
)
245 int first
= 0, last
= nr_deltas
;
247 while (first
< last
) {
248 int next
= (first
+ last
) / 2;
249 struct delta_entry
*delta
= &deltas
[next
];
252 cmp
= memcmp(base
, &delta
->base
, UNION_BASE_SZ
);
264 static int find_delta_childs(const union delta_base
*base
,
265 int *first_index
, int *last_index
)
267 int first
= find_delta(base
);
269 int end
= nr_deltas
- 1;
273 while (first
> 0 && !memcmp(&deltas
[first
- 1].base
, base
, UNION_BASE_SZ
))
275 while (last
< end
&& !memcmp(&deltas
[last
+ 1].base
, base
, UNION_BASE_SZ
))
277 *first_index
= first
;
282 static void sha1_object(const void *data
, unsigned long size
,
283 enum object_type type
, unsigned char *sha1
)
288 const char *type_str
;
291 case OBJ_COMMIT
: type_str
= commit_type
; break;
292 case OBJ_TREE
: type_str
= tree_type
; break;
293 case OBJ_BLOB
: type_str
= blob_type
; break;
294 case OBJ_TAG
: type_str
= tag_type
; break;
296 die("bad type %d", type
);
299 header_size
= sprintf(header
, "%s %lu", type_str
, size
) + 1;
302 SHA1_Update(&ctx
, header
, header_size
);
303 SHA1_Update(&ctx
, data
, size
);
304 SHA1_Final(sha1
, &ctx
);
307 static void resolve_delta(struct delta_entry
*delta
, void *base_data
,
308 unsigned long base_size
, enum object_type type
)
310 struct object_entry
*obj
= delta
->obj
;
312 unsigned long delta_size
;
314 unsigned long result_size
;
315 union delta_base delta_base
;
318 obj
->real_type
= type
;
319 delta_data
= get_data_from_pack(obj
);
320 delta_size
= obj
->size
;
321 result
= patch_delta(base_data
, base_size
, delta_data
, delta_size
,
325 bad_object(obj
->offset
, "failed to apply delta");
326 sha1_object(result
, result_size
, type
, obj
->sha1
);
328 hashcpy(delta_base
.sha1
, obj
->sha1
);
329 if (!find_delta_childs(&delta_base
, &first
, &last
)) {
330 for (j
= first
; j
<= last
; j
++)
331 if (deltas
[j
].obj
->type
== OBJ_REF_DELTA
)
332 resolve_delta(&deltas
[j
], result
, result_size
, type
);
335 memset(&delta_base
, 0, sizeof(delta_base
));
336 delta_base
.offset
= obj
->offset
;
337 if (!find_delta_childs(&delta_base
, &first
, &last
)) {
338 for (j
= first
; j
<= last
; j
++)
339 if (deltas
[j
].obj
->type
== OBJ_OFS_DELTA
)
340 resolve_delta(&deltas
[j
], result
, result_size
, type
);
346 static int compare_delta_entry(const void *a
, const void *b
)
348 const struct delta_entry
*delta_a
= a
;
349 const struct delta_entry
*delta_b
= b
;
350 return memcmp(&delta_a
->base
, &delta_b
->base
, UNION_BASE_SZ
);
353 /* Parse all objects and return the pack content SHA1 hash */
354 static void parse_pack_objects(unsigned char *sha1
)
357 struct delta_entry
*delta
= deltas
;
363 * - find locations of all objects;
364 * - calculate SHA1 of all non-delta objects;
365 * - remember base SHA1 for all deltas.
367 for (i
= 0; i
< nr_objects
; i
++) {
368 struct object_entry
*obj
= &objects
[i
];
369 data
= unpack_raw_entry(obj
, &delta
->base
);
370 obj
->real_type
= obj
->type
;
371 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
) {
376 sha1_object(data
, obj
->size
, obj
->type
, obj
->sha1
);
379 objects
[i
].offset
= consumed_bytes
;
381 /* Check pack integrity */
382 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
383 SHA1_Final(sha1
, &input_ctx
);
384 if (hashcmp(fill(20), sha1
))
385 die("packfile '%s' SHA1 mismatch", pack_name
);
388 /* If input_fd is a file, we should have reached its end now. */
389 if (fstat(input_fd
, &st
))
390 die("cannot fstat packfile '%s': %s", pack_name
, strerror(errno
));
391 if (S_ISREG(st
.st_mode
) && st
.st_size
!= consumed_bytes
)
392 die("packfile '%s' has junk at the end", pack_name
);
394 /* Sort deltas by base SHA1/offset for fast searching */
395 qsort(deltas
, nr_deltas
, sizeof(struct delta_entry
),
396 compare_delta_entry
);
400 * - for all non-delta objects, look if it is used as a base for
402 * - if used as a base, uncompress the object and apply all deltas,
403 * recursively checking if the resulting object is used as a base
404 * for some more deltas.
406 for (i
= 0; i
< nr_objects
; i
++) {
407 struct object_entry
*obj
= &objects
[i
];
408 union delta_base base
;
409 int j
, ref
, ref_first
, ref_last
, ofs
, ofs_first
, ofs_last
;
411 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
)
413 hashcpy(base
.sha1
, obj
->sha1
);
414 ref
= !find_delta_childs(&base
, &ref_first
, &ref_last
);
415 memset(&base
, 0, sizeof(base
));
416 base
.offset
= obj
->offset
;
417 ofs
= !find_delta_childs(&base
, &ofs_first
, &ofs_last
);
420 data
= get_data_from_pack(obj
);
422 for (j
= ref_first
; j
<= ref_last
; j
++)
423 if (deltas
[j
].obj
->type
== OBJ_REF_DELTA
)
424 resolve_delta(&deltas
[j
], data
,
425 obj
->size
, obj
->type
);
427 for (j
= ofs_first
; j
<= ofs_last
; j
++)
428 if (deltas
[j
].obj
->type
== OBJ_OFS_DELTA
)
429 resolve_delta(&deltas
[j
], data
,
430 obj
->size
, obj
->type
);
434 /* Check for unresolved deltas */
435 for (i
= 0; i
< nr_deltas
; i
++) {
436 if (deltas
[i
].obj
->real_type
== OBJ_REF_DELTA
||
437 deltas
[i
].obj
->real_type
== OBJ_OFS_DELTA
)
438 die("packfile '%s' has unresolved deltas", pack_name
);
442 static int sha1_compare(const void *_a
, const void *_b
)
444 struct object_entry
*a
= *(struct object_entry
**)_a
;
445 struct object_entry
*b
= *(struct object_entry
**)_b
;
446 return hashcmp(a
->sha1
, b
->sha1
);
450 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
451 * the SHA1 hash of sorted object names.
453 static void write_index_file(const char *index_name
, unsigned char *sha1
)
456 struct object_entry
**sorted_by_sha
, **list
, **last
;
457 unsigned int array
[256];
463 xcalloc(nr_objects
, sizeof(struct object_entry
*));
464 list
= sorted_by_sha
;
465 last
= sorted_by_sha
+ nr_objects
;
466 for (i
= 0; i
< nr_objects
; ++i
)
467 sorted_by_sha
[i
] = &objects
[i
];
468 qsort(sorted_by_sha
, nr_objects
, sizeof(sorted_by_sha
[0]),
473 sorted_by_sha
= list
= last
= NULL
;
476 f
= sha1create("%s", index_name
);
479 * Write the first-level table (the list is sorted,
480 * but we use a 256-entry lookup to be able to avoid
481 * having to do eight extra binary search iterations).
483 for (i
= 0; i
< 256; i
++) {
484 struct object_entry
**next
= list
;
485 while (next
< last
) {
486 struct object_entry
*obj
= *next
;
487 if (obj
->sha1
[0] != i
)
491 array
[i
] = htonl(next
- sorted_by_sha
);
494 sha1write(f
, array
, 256 * sizeof(int));
496 /* recompute the SHA1 hash of sorted object names.
497 * currently pack-objects does not do this, but that
502 * Write the actual SHA1 entries..
504 list
= sorted_by_sha
;
505 for (i
= 0; i
< nr_objects
; i
++) {
506 struct object_entry
*obj
= *list
++;
507 unsigned int offset
= htonl(obj
->offset
);
508 sha1write(f
, &offset
, 4);
509 sha1write(f
, obj
->sha1
, 20);
510 SHA1_Update(&ctx
, obj
->sha1
, 20);
512 sha1write(f
, sha1
, 20);
513 sha1close(f
, NULL
, 1);
515 SHA1_Final(sha1
, &ctx
);
518 int main(int argc
, char **argv
)
521 char *index_name
= NULL
;
522 char *index_name_buf
= NULL
;
523 unsigned char sha1
[20];
525 for (i
= 1; i
< argc
; i
++) {
526 const char *arg
= argv
[i
];
529 if (!strcmp(arg
, "-o")) {
530 if (index_name
|| (i
+1) >= argc
)
531 usage(index_pack_usage
);
532 index_name
= argv
[++i
];
534 usage(index_pack_usage
);
539 usage(index_pack_usage
);
544 usage(index_pack_usage
);
546 int len
= strlen(pack_name
);
547 if (!has_extension(pack_name
, ".pack"))
548 die("packfile name '%s' does not end with '.pack'",
550 index_name_buf
= xmalloc(len
);
551 memcpy(index_name_buf
, pack_name
, len
- 5);
552 strcpy(index_name_buf
+ len
- 5, ".idx");
553 index_name
= index_name_buf
;
558 objects
= xcalloc(nr_objects
+ 1, sizeof(struct object_entry
));
559 deltas
= xcalloc(nr_objects
, sizeof(struct delta_entry
));
560 parse_pack_objects(sha1
);
562 write_index_file(index_name
, sha1
);
564 free(index_name_buf
);
566 printf("%s\n", sha1_to_hex(sha1
));