8 static const char pack_usage
[] = "git-pack-objects [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
11 unsigned char sha1
[20];
16 enum object_type type
;
17 unsigned long delta_size
;
18 struct object_entry
*delta
;
21 static unsigned char object_list_sha1
[20];
22 static int non_empty
= 0;
24 static int incremental
= 0;
25 static struct object_entry
**sorted_by_sha
, **sorted_by_type
;
26 static struct object_entry
*objects
= NULL
;
27 static int nr_objects
= 0, nr_alloc
= 0;
28 static const char *base_name
;
29 static unsigned char pack_file_sha1
[20];
30 static int progress
= 0;
32 static void *delta_against(void *buf
, unsigned long size
, struct object_entry
*entry
)
34 unsigned long othersize
, delta_size
;
36 void *otherbuf
= read_sha1_file(entry
->delta
->sha1
, type
, &othersize
);
40 die("unable to read %s", sha1_to_hex(entry
->delta
->sha1
));
41 delta_buf
= diff_delta(otherbuf
, othersize
,
42 buf
, size
, &delta_size
, 0);
43 if (!delta_buf
|| delta_size
!= entry
->delta_size
)
44 die("delta size changed");
51 * The per-object header is a pretty dense thing, which is
52 * - first byte: low four bits are "size", then three bits of "type",
53 * and the high bit is "size continues".
54 * - each byte afterwards: low seven bits are size continuation,
55 * with the high bit being "size continues"
57 static int encode_header(enum object_type type
, unsigned long size
, unsigned char *hdr
)
62 if (type
< OBJ_COMMIT
|| type
> OBJ_DELTA
)
63 die("bad type %d", type
);
65 c
= (type
<< 4) | (size
& 15);
77 static unsigned long write_object(struct sha1file
*f
, struct object_entry
*entry
)
81 void *buf
= read_sha1_file(entry
->sha1
, type
, &size
);
82 unsigned char header
[10];
83 unsigned hdrlen
, datalen
;
84 enum object_type obj_type
;
87 die("unable to read %s", sha1_to_hex(entry
->sha1
));
88 if (size
!= entry
->size
)
89 die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry
->sha1
), size
, entry
->size
);
92 * The object header is a byte of 'type' followed by zero or
93 * more bytes of length. For deltas, the 20 bytes of delta sha1
96 obj_type
= entry
->type
;
98 buf
= delta_against(buf
, size
, entry
);
99 size
= entry
->delta_size
;
100 obj_type
= OBJ_DELTA
;
102 hdrlen
= encode_header(obj_type
, size
, header
);
103 sha1write(f
, header
, hdrlen
);
105 sha1write(f
, entry
->delta
, 20);
108 datalen
= sha1write_compressed(f
, buf
, size
);
110 return hdrlen
+ datalen
;
113 static unsigned long write_one(struct sha1file
*f
,
114 struct object_entry
*e
,
115 unsigned long offset
)
118 /* offset starts from header size and cannot be zero
119 * if it is written already.
123 offset
+= write_object(f
, e
);
124 /* if we are deltified, write out its base object. */
126 offset
= write_one(f
, e
->delta
, offset
);
130 static void write_pack_file(void)
134 unsigned long offset
;
136 struct pack_header hdr
;
139 f
= sha1fd(1, "<stdout>");
141 f
= sha1create("%s-%s.%s", base_name
, sha1_to_hex(object_list_sha1
), "pack");
142 hdr
.hdr_signature
= htonl(PACK_SIGNATURE
);
143 hdr
.hdr_version
= htonl(PACK_VERSION
);
144 hdr
.hdr_entries
= htonl(nr_objects
);
145 sha1write(f
, &hdr
, sizeof(hdr
));
146 offset
= sizeof(hdr
);
147 for (i
= 0; i
< nr_objects
; i
++)
148 offset
= write_one(f
, objects
+ i
, offset
);
150 sha1close(f
, pack_file_sha1
, 1);
155 static void write_index_file(void)
158 struct sha1file
*f
= sha1create("%s-%s.%s", base_name
, sha1_to_hex(object_list_sha1
), "idx");
159 struct object_entry
**list
= sorted_by_sha
;
160 struct object_entry
**last
= list
+ nr_objects
;
161 unsigned int array
[256];
164 * Write the first-level table (the list is sorted,
165 * but we use a 256-entry lookup to be able to avoid
166 * having to do eight extra binary search iterations).
168 for (i
= 0; i
< 256; i
++) {
169 struct object_entry
**next
= list
;
170 while (next
< last
) {
171 struct object_entry
*entry
= *next
;
172 if (entry
->sha1
[0] != i
)
176 array
[i
] = htonl(next
- sorted_by_sha
);
179 sha1write(f
, array
, 256 * sizeof(int));
182 * Write the actual SHA1 entries..
184 list
= sorted_by_sha
;
185 for (i
= 0; i
< nr_objects
; i
++) {
186 struct object_entry
*entry
= *list
++;
187 unsigned int offset
= htonl(entry
->offset
);
188 sha1write(f
, &offset
, 4);
189 sha1write(f
, entry
->sha1
, 20);
191 sha1write(f
, pack_file_sha1
, 20);
192 sha1close(f
, NULL
, 1);
195 static int add_object_entry(unsigned char *sha1
, unsigned int hash
)
197 unsigned int idx
= nr_objects
;
198 struct object_entry
*entry
;
200 if (incremental
|| local
) {
201 struct packed_git
*p
;
203 for (p
= packed_git
; p
; p
= p
->next
) {
206 if (find_pack_entry_one(sha1
, &e
, p
)) {
209 if (local
&& !p
->pack_local
)
215 if (idx
>= nr_alloc
) {
216 unsigned int needed
= (idx
+ 1024) * 3 / 2;
217 objects
= xrealloc(objects
, needed
* sizeof(*entry
));
220 entry
= objects
+ idx
;
221 memset(entry
, 0, sizeof(*entry
));
222 memcpy(entry
->sha1
, sha1
, 20);
228 static void check_object(struct object_entry
*entry
)
232 if (!sha1_object_info(entry
->sha1
, type
, &entry
->size
)) {
233 if (!strcmp(type
, "commit")) {
234 entry
->type
= OBJ_COMMIT
;
235 } else if (!strcmp(type
, "tree")) {
236 entry
->type
= OBJ_TREE
;
237 } else if (!strcmp(type
, "blob")) {
238 entry
->type
= OBJ_BLOB
;
239 } else if (!strcmp(type
, "tag")) {
240 entry
->type
= OBJ_TAG
;
242 die("unable to pack object %s of type %s",
243 sha1_to_hex(entry
->sha1
), type
);
246 die("unable to get type of object %s",
247 sha1_to_hex(entry
->sha1
));
250 static void get_object_details(void)
253 struct object_entry
*entry
= objects
;
255 for (i
= 0; i
< nr_objects
; i
++)
256 check_object(entry
++);
259 typedef int (*entry_sort_t
)(const struct object_entry
*, const struct object_entry
*);
261 static entry_sort_t current_sort
;
263 static int sort_comparator(const void *_a
, const void *_b
)
265 struct object_entry
*a
= *(struct object_entry
**)_a
;
266 struct object_entry
*b
= *(struct object_entry
**)_b
;
267 return current_sort(a
,b
);
270 static struct object_entry
**create_sorted_list(entry_sort_t sort
)
272 struct object_entry
**list
= xmalloc(nr_objects
* sizeof(struct object_entry
*));
275 for (i
= 0; i
< nr_objects
; i
++)
276 list
[i
] = objects
+ i
;
278 qsort(list
, nr_objects
, sizeof(struct object_entry
*), sort_comparator
);
282 static int sha1_sort(const struct object_entry
*a
, const struct object_entry
*b
)
284 return memcmp(a
->sha1
, b
->sha1
, 20);
287 static int type_size_sort(const struct object_entry
*a
, const struct object_entry
*b
)
289 if (a
->type
< b
->type
)
291 if (a
->type
> b
->type
)
293 if (a
->hash
< b
->hash
)
295 if (a
->hash
> b
->hash
)
297 if (a
->size
< b
->size
)
299 if (a
->size
> b
->size
)
301 return a
< b
? -1 : (a
> b
);
305 struct object_entry
*entry
;
310 * We search for deltas _backwards_ in a list sorted by type and
311 * by size, so that we see progressively smaller and smaller files.
312 * That's because we prefer deltas to be from the bigger file
313 * to the smaller - deletes are potentially cheaper, but perhaps
314 * more importantly, the bigger file is likely the more recent
317 static int try_delta(struct unpacked
*cur
, struct unpacked
*old
, unsigned max_depth
)
319 struct object_entry
*cur_entry
= cur
->entry
;
320 struct object_entry
*old_entry
= old
->entry
;
321 unsigned long size
, oldsize
, delta_size
, sizediff
;
325 /* Don't bother doing diffs between different types */
326 if (cur_entry
->type
!= old_entry
->type
)
329 size
= cur_entry
->size
;
332 oldsize
= old_entry
->size
;
333 sizediff
= oldsize
> size
? oldsize
- size
: size
- oldsize
;
334 if (sizediff
> size
/ 8)
336 if (old_entry
->depth
>= max_depth
)
342 * We always delta from the bigger to the smaller, since that's
343 * more space-efficient (deletes don't have to say _what_ they
346 max_size
= size
/ 2 - 20;
347 if (cur_entry
->delta
)
348 max_size
= cur_entry
->delta_size
-1;
349 if (sizediff
>= max_size
)
351 delta_buf
= diff_delta(old
->data
, oldsize
,
352 cur
->data
, size
, &delta_size
, max_size
);
355 cur_entry
->delta
= old_entry
;
356 cur_entry
->delta_size
= delta_size
;
357 cur_entry
->depth
= old_entry
->depth
+ 1;
362 static void find_deltas(struct object_entry
**list
, int window
, int depth
)
365 unsigned int array_size
= window
* sizeof(struct unpacked
);
366 struct unpacked
*array
= xmalloc(array_size
);
369 memset(array
, 0, array_size
);
372 eye_candy
= i
- (nr_objects
/ 20);
375 struct object_entry
*entry
= list
[i
];
376 struct unpacked
*n
= array
+ idx
;
381 if (progress
&& i
<= eye_candy
) {
382 eye_candy
-= nr_objects
/ 20;
387 n
->data
= read_sha1_file(entry
->sha1
, type
, &size
);
388 if (size
!= entry
->size
)
389 die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry
->sha1
), size
, entry
->size
);
392 unsigned int other_idx
= idx
+ j
;
394 if (other_idx
>= window
)
396 m
= array
+ other_idx
;
399 if (try_delta(n
, m
, depth
) < 0)
407 for (i
= 0; i
< window
; ++i
)
412 static void prepare_pack(int window
, int depth
)
414 get_object_details();
417 fprintf(stderr
, "Packing %d objects", nr_objects
);
418 sorted_by_type
= create_sorted_list(type_size_sort
);
420 find_deltas(sorted_by_type
, window
+1, depth
);
426 static int reuse_cached_pack(unsigned char *sha1
, int pack_to_stdout
)
428 static const char cache
[] = "pack-cache/pack-%s.%s";
429 char *cached_pack
, *cached_idx
;
430 int ifd
, ofd
, ifd_ix
= -1;
432 cached_pack
= git_path(cache
, sha1_to_hex(sha1
), "pack");
433 ifd
= open(cached_pack
, O_RDONLY
);
437 if (!pack_to_stdout
) {
438 cached_idx
= git_path(cache
, sha1_to_hex(sha1
), "idx");
439 ifd_ix
= open(cached_idx
, O_RDONLY
);
446 fprintf(stderr
, "Reusing %d objects pack %s\n", nr_objects
,
449 if (pack_to_stdout
) {
456 snprintf(name
, sizeof(name
),
457 "%s-%s.%s", base_name
, sha1_to_hex(sha1
), "pack");
458 ofd
= open(name
, O_CREAT
| O_EXCL
| O_WRONLY
, 0666);
460 die("unable to open %s (%s)", name
, strerror(errno
));
461 if (copy_fd(ifd
, ofd
))
465 snprintf(name
, sizeof(name
),
466 "%s-%s.%s", base_name
, sha1_to_hex(sha1
), "idx");
467 ofd
= open(name
, O_CREAT
| O_EXCL
| O_WRONLY
, 0666);
469 die("unable to open %s (%s)", name
, strerror(errno
));
470 if (copy_fd(ifd_ix
, ofd
))
473 puts(sha1_to_hex(sha1
));
479 int main(int argc
, char **argv
)
482 char line
[PATH_MAX
+ 20];
483 int window
= 10, depth
= 10, pack_to_stdout
= 0;
484 struct object_entry
**list
;
486 struct timeval prev_tv
;
488 int eye_candy_incr
= 500;
491 setup_git_directory();
493 for (i
= 1; i
< argc
; i
++) {
494 const char *arg
= argv
[i
];
497 if (!strcmp("--non-empty", arg
)) {
501 if (!strcmp("--local", arg
)) {
505 if (!strcmp("--incremental", arg
)) {
509 if (!strncmp("--window=", arg
, 9)) {
511 window
= strtoul(arg
+9, &end
, 0);
516 if (!strncmp("--depth=", arg
, 8)) {
518 depth
= strtoul(arg
+8, &end
, 0);
523 if (!strcmp("--stdout", arg
)) {
534 if (pack_to_stdout
!= !base_name
)
537 progress
= isatty(2);
539 prepare_packed_git();
541 fprintf(stderr
, "Generating pack...\n");
542 gettimeofday(&prev_tv
, NULL
);
544 while (fgets(line
, sizeof(line
), stdin
) != NULL
) {
547 unsigned char sha1
[20];
549 if (progress
&& (eye_candy
<= nr_objects
)) {
550 fprintf(stderr
, "Counting objects...%d\r", nr_objects
);
551 if (eye_candy
&& (50 <= eye_candy_incr
)) {
554 gettimeofday(&tv
, NULL
);
555 time_diff
= (tv
.tv_sec
- prev_tv
.tv_sec
);
557 time_diff
+= (tv
.tv_usec
- prev_tv
.tv_usec
);
558 if ((1 << 9) < time_diff
)
559 eye_candy_incr
+= 50;
560 else if (50 < eye_candy_incr
)
561 eye_candy_incr
-= 50;
563 eye_candy
+= eye_candy_incr
;
565 if (get_sha1_hex(line
, sha1
))
566 die("expected sha1, got garbage:\n %s", line
);
570 unsigned char c
= *p
++;
573 hash
= hash
* 11 + c
;
575 add_object_entry(sha1
, hash
);
578 fprintf(stderr
, "Done counting %d objects.\n", nr_objects
);
579 if (non_empty
&& !nr_objects
)
582 sorted_by_sha
= create_sorted_list(sha1_sort
);
584 list
= sorted_by_sha
;
585 for (i
= 0; i
< nr_objects
; i
++) {
586 struct object_entry
*entry
= *list
++;
587 SHA1_Update(&ctx
, entry
->sha1
, 20);
589 SHA1_Final(object_list_sha1
, &ctx
);
591 if (reuse_cached_pack(object_list_sha1
, pack_to_stdout
))
594 prepare_pack(window
, depth
);
595 if (!pack_to_stdout
) {
597 puts(sha1_to_hex(object_list_sha1
));