6 static const char index_pack_usage
[] =
7 "git-index-pack [-o index-file] pack-file";
12 enum object_type type
;
13 enum object_type real_type
;
14 unsigned char sha1
[20];
19 struct object_entry
*obj
;
20 unsigned char base_sha1
[20];
23 static const char *pack_name
;
24 static unsigned char *pack_base
;
25 static unsigned long pack_size
;
26 static struct object_entry
*objects
;
27 static struct delta_entry
*deltas
;
28 static int nr_objects
;
31 static void open_pack_file(void)
36 fd
= open(pack_name
, O_RDONLY
);
38 die("cannot open packfile '%s': %s", pack_name
,
43 die("cannot fstat packfile '%s': %s", pack_name
,
46 pack_size
= st
.st_size
;
47 pack_base
= mmap(NULL
, pack_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
48 if (pack_base
== MAP_FAILED
) {
51 die("cannot mmap packfile '%s': %s", pack_name
,
57 static void parse_pack_header(void)
59 const struct pack_header
*hdr
;
60 unsigned char sha1
[20];
63 /* Ensure there are enough bytes for the header and final SHA1 */
64 if (pack_size
< sizeof(struct pack_header
) + 20)
65 die("packfile '%s' is too small", pack_name
);
67 /* Header consistency check */
68 hdr
= (void *)pack_base
;
69 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
70 die("packfile '%s' signature mismatch", pack_name
);
71 if (hdr
->hdr_version
!= htonl(PACK_VERSION
))
72 die("packfile '%s' version %d different from ours %d",
73 pack_name
, ntohl(hdr
->hdr_version
), PACK_VERSION
);
75 nr_objects
= ntohl(hdr
->hdr_entries
);
77 /* Check packfile integrity */
79 SHA1_Update(&ctx
, pack_base
, pack_size
- 20);
80 SHA1_Final(sha1
, &ctx
);
81 if (memcmp(sha1
, pack_base
+ pack_size
- 20, 20))
82 die("packfile '%s' SHA1 mismatch", pack_name
);
85 static void bad_object(unsigned long offset
, const char *format
,
86 ...) NORETURN
__attribute__((format (printf
, 2, 3)));
88 static void bad_object(unsigned long offset
, const char *format
, ...)
93 va_start(params
, format
);
94 vsnprintf(buf
, sizeof(buf
), format
, params
);
96 die("packfile '%s': bad object at offset %lu: %s",
97 pack_name
, offset
, buf
);
100 static void *unpack_entry_data(unsigned long offset
,
101 unsigned long *current_pos
, unsigned long size
)
103 unsigned long pack_limit
= pack_size
- 20;
104 unsigned long pos
= *current_pos
;
106 void *buf
= xmalloc(size
);
108 memset(&stream
, 0, sizeof(stream
));
109 stream
.next_out
= buf
;
110 stream
.avail_out
= size
;
111 stream
.next_in
= pack_base
+ pos
;
112 stream
.avail_in
= pack_limit
- pos
;
113 inflateInit(&stream
);
116 int ret
= inflate(&stream
, 0);
117 if (ret
== Z_STREAM_END
)
120 bad_object(offset
, "inflate returned %d", ret
);
123 if (stream
.total_out
!= size
)
124 bad_object(offset
, "size mismatch (expected %lu, got %lu)",
125 size
, stream
.total_out
);
126 *current_pos
= pack_limit
- stream
.avail_in
;
130 static void *unpack_raw_entry(unsigned long offset
,
131 enum object_type
*obj_type
,
132 unsigned long *obj_size
,
133 unsigned char *delta_base
,
134 unsigned long *next_obj_offset
)
136 unsigned long pack_limit
= pack_size
- 20;
137 unsigned long pos
= offset
;
141 enum object_type type
;
144 c
= pack_base
[pos
++];
149 if (pos
>= pack_limit
)
150 bad_object(offset
, "object extends past end of pack");
151 c
= pack_base
[pos
++];
152 size
+= (c
& 0x7fUL
) << shift
;
158 if (pos
+ 20 >= pack_limit
)
159 bad_object(offset
, "object extends past end of pack");
160 memcpy(delta_base
, pack_base
+ pos
, 20);
167 data
= unpack_entry_data(offset
, &pos
, size
);
170 bad_object(offset
, "bad object type %d", type
);
175 *next_obj_offset
= pos
;
179 static int find_delta(const unsigned char *base_sha1
)
181 int first
= 0, last
= nr_deltas
;
183 while (first
< last
) {
184 int next
= (first
+ last
) / 2;
185 struct delta_entry
*delta
= &deltas
[next
];
188 cmp
= memcmp(base_sha1
, delta
->base_sha1
, 20);
200 static int find_deltas_based_on_sha1(const unsigned char *base_sha1
,
201 int *first_index
, int *last_index
)
203 int first
= find_delta(base_sha1
);
205 int end
= nr_deltas
- 1;
209 while (first
> 0 && !memcmp(deltas
[first
-1].base_sha1
, base_sha1
, 20))
211 while (last
< end
&& !memcmp(deltas
[last
+1].base_sha1
, base_sha1
, 20))
213 *first_index
= first
;
218 static void sha1_object(const void *data
, unsigned long size
,
219 enum object_type type
, unsigned char *sha1
)
224 const char *type_str
;
227 case OBJ_COMMIT
: type_str
= "commit"; break;
228 case OBJ_TREE
: type_str
= "tree"; break;
229 case OBJ_BLOB
: type_str
= "blob"; break;
230 case OBJ_TAG
: type_str
= "tag"; break;
232 die("bad type %d", type
);
235 header_size
= sprintf(header
, "%s %lu", type_str
, size
) + 1;
238 SHA1_Update(&ctx
, header
, header_size
);
239 SHA1_Update(&ctx
, data
, size
);
240 SHA1_Final(sha1
, &ctx
);
243 static void resolve_delta(struct delta_entry
*delta
, void *base_data
,
244 unsigned long base_size
, enum object_type type
)
246 struct object_entry
*obj
= delta
->obj
;
248 unsigned long delta_size
;
250 unsigned long result_size
;
251 enum object_type delta_type
;
252 unsigned char base_sha1
[20];
253 unsigned long next_obj_offset
;
256 obj
->real_type
= type
;
257 delta_data
= unpack_raw_entry(obj
->offset
, &delta_type
,
258 &delta_size
, base_sha1
,
260 result
= patch_delta(base_data
, base_size
, delta_data
, delta_size
,
264 bad_object(obj
->offset
, "failed to apply delta");
265 sha1_object(result
, result_size
, type
, obj
->sha1
);
266 if (!find_deltas_based_on_sha1(obj
->sha1
, &first
, &last
)) {
267 for (j
= first
; j
<= last
; j
++)
268 resolve_delta(&deltas
[j
], result
, result_size
, type
);
273 static int compare_delta_entry(const void *a
, const void *b
)
275 const struct delta_entry
*delta_a
= a
;
276 const struct delta_entry
*delta_b
= b
;
277 return memcmp(delta_a
->base_sha1
, delta_b
->base_sha1
, 20);
280 static void parse_pack_objects(void)
283 unsigned long offset
= sizeof(struct pack_header
);
284 unsigned char base_sha1
[20];
286 unsigned long data_size
;
290 * - find locations of all objects;
291 * - calculate SHA1 of all non-delta objects;
292 * - remember base SHA1 for all deltas.
294 for (i
= 0; i
< nr_objects
; i
++) {
295 struct object_entry
*obj
= &objects
[i
];
296 obj
->offset
= offset
;
297 data
= unpack_raw_entry(offset
, &obj
->type
, &data_size
,
299 obj
->real_type
= obj
->type
;
300 if (obj
->type
== OBJ_DELTA
) {
301 struct delta_entry
*delta
= &deltas
[nr_deltas
++];
303 memcpy(delta
->base_sha1
, base_sha1
, 20);
305 sha1_object(data
, data_size
, obj
->type
, obj
->sha1
);
308 if (offset
!= pack_size
- 20)
309 die("packfile '%s' has junk at the end", pack_name
);
311 /* Sort deltas by base SHA1 for fast searching */
312 qsort(deltas
, nr_deltas
, sizeof(struct delta_entry
),
313 compare_delta_entry
);
317 * - for all non-delta objects, look if it is used as a base for
319 * - if used as a base, uncompress the object and apply all deltas,
320 * recursively checking if the resulting object is used as a base
321 * for some more deltas.
323 for (i
= 0; i
< nr_objects
; i
++) {
324 struct object_entry
*obj
= &objects
[i
];
327 if (obj
->type
== OBJ_DELTA
)
329 if (find_deltas_based_on_sha1(obj
->sha1
, &first
, &last
))
331 data
= unpack_raw_entry(obj
->offset
, &obj
->type
, &data_size
,
333 for (j
= first
; j
<= last
; j
++)
334 resolve_delta(&deltas
[j
], data
, data_size
, obj
->type
);
338 /* Check for unresolved deltas */
339 for (i
= 0; i
< nr_deltas
; i
++) {
340 if (deltas
[i
].obj
->real_type
== OBJ_DELTA
)
341 die("packfile '%s' has unresolved deltas", pack_name
);
345 static int sha1_compare(const void *_a
, const void *_b
)
347 struct object_entry
*a
= *(struct object_entry
**)_a
;
348 struct object_entry
*b
= *(struct object_entry
**)_b
;
349 return memcmp(a
->sha1
, b
->sha1
, 20);
352 static void write_index_file(const char *index_name
, unsigned char *sha1
)
355 struct object_entry
**sorted_by_sha
=
356 xcalloc(nr_objects
, sizeof(struct object_entry
*));
357 struct object_entry
**list
= sorted_by_sha
;
358 struct object_entry
**last
= sorted_by_sha
+ nr_objects
;
359 unsigned int array
[256];
363 for (i
= 0; i
< nr_objects
; ++i
)
364 sorted_by_sha
[i
] = &objects
[i
];
365 qsort(sorted_by_sha
, nr_objects
, sizeof(sorted_by_sha
[0]),
369 f
= sha1create("%s", index_name
);
372 * Write the first-level table (the list is sorted,
373 * but we use a 256-entry lookup to be able to avoid
374 * having to do eight extra binary search iterations).
376 for (i
= 0; i
< 256; i
++) {
377 struct object_entry
**next
= list
;
378 while (next
< last
) {
379 struct object_entry
*obj
= *next
;
380 if (obj
->sha1
[0] != i
)
384 array
[i
] = htonl(next
- sorted_by_sha
);
387 sha1write(f
, array
, 256 * sizeof(int));
389 /* recompute the SHA1 hash of sorted object names.
390 * currently pack-objects does not do this, but that
395 * Write the actual SHA1 entries..
397 list
= sorted_by_sha
;
398 for (i
= 0; i
< nr_objects
; i
++) {
399 struct object_entry
*obj
= *list
++;
400 unsigned int offset
= htonl(obj
->offset
);
401 sha1write(f
, &offset
, 4);
402 sha1write(f
, obj
->sha1
, 20);
403 SHA1_Update(&ctx
, obj
->sha1
, 20);
405 sha1write(f
, pack_base
+ pack_size
- 20, 20);
406 sha1close(f
, NULL
, 1);
408 SHA1_Final(sha1
, &ctx
);
411 int main(int argc
, char **argv
)
414 char *index_name
= NULL
;
415 char *index_name_buf
= NULL
;
416 unsigned char sha1
[20];
418 for (i
= 1; i
< argc
; i
++) {
419 const char *arg
= argv
[i
];
422 if (!strcmp(arg
, "-o")) {
423 if (index_name
|| (i
+1) >= argc
)
424 usage(index_pack_usage
);
425 index_name
= argv
[++i
];
427 usage(index_pack_usage
);
432 usage(index_pack_usage
);
437 usage(index_pack_usage
);
439 int len
= strlen(pack_name
);
440 if (len
< 5 || strcmp(pack_name
+ len
- 5, ".pack"))
441 die("packfile name '%s' does not end with '.pack'",
443 index_name_buf
= xmalloc(len
- 1);
444 memcpy(index_name_buf
, pack_name
, len
- 5);
445 strcpy(index_name_buf
+ len
- 5, ".idx");
446 index_name
= index_name_buf
;
451 objects
= xcalloc(nr_objects
, sizeof(struct object_entry
));
452 deltas
= xcalloc(nr_objects
, sizeof(struct delta_entry
));
453 parse_pack_objects();
455 write_index_file(index_name
, sha1
);
457 free(index_name_buf
);
459 printf("%s\n", sha1_to_hex(sha1
));