7 static const char pack_usage
[] = "git-pack-objects [--window=N] [--depth=N] base-name < object-list";
18 unsigned char sha1
[20];
23 enum object_type type
;
24 unsigned long delta_size
;
25 struct object_entry
*delta
;
28 static struct object_entry
**sorted_by_sha
, **sorted_by_type
;
29 static struct object_entry
*objects
= NULL
;
30 static int nr_objects
= 0, nr_alloc
= 0;
31 static const char *base_name
;
32 static unsigned char pack_file_sha1
[20];
34 static void *delta_against(void *buf
, unsigned long size
, struct object_entry
*entry
)
36 unsigned long othersize
, delta_size
;
38 void *otherbuf
= read_sha1_file(entry
->delta
->sha1
, type
, &othersize
);
42 die("unable to read %s", sha1_to_hex(entry
->delta
->sha1
));
43 delta_buf
= diff_delta(otherbuf
, othersize
,
44 buf
, size
, &delta_size
, ~0UL);
45 if (!delta_buf
|| delta_size
!= entry
->delta_size
)
46 die("delta size changed");
52 static unsigned long write_object(struct sha1file
*f
, struct object_entry
*entry
)
56 void *buf
= read_sha1_file(entry
->sha1
, type
, &size
);
58 unsigned hdrlen
, datalen
;
61 die("unable to read %s", sha1_to_hex(entry
->sha1
));
62 if (size
!= entry
->size
)
63 die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry
->sha1
), size
, entry
->size
);
66 * The object header is a byte of 'type' followed by four bytes of
67 * length, except for deltas that has the 20 bytes of delta sha
70 header
[0] = ".CTB"[entry
->type
];
74 memcpy(header
+5, entry
->delta
, 20);
75 buf
= delta_against(buf
, size
, entry
);
76 size
= entry
->delta_size
;
79 datalen
= htonl(size
);
80 memcpy(header
+1, &datalen
, 4);
81 sha1write(f
, header
, hdrlen
);
82 datalen
= sha1write_compressed(f
, buf
, size
);
84 return hdrlen
+ datalen
;
87 static void write_pack_file(void)
90 struct sha1file
*f
= sha1create("%s.%s", base_name
, "pack");
91 unsigned long offset
= 0;
94 for (i
= 0; i
< nr_objects
; i
++) {
95 struct object_entry
*entry
= objects
+ i
;
96 entry
->offset
= offset
;
97 offset
+= write_object(f
, entry
);
99 sha1close(f
, pack_file_sha1
, 1);
104 static void write_index_file(void)
107 struct sha1file
*f
= sha1create("%s.%s", base_name
, "idx");
108 struct object_entry
**list
= sorted_by_sha
;
109 struct object_entry
**last
= list
+ nr_objects
;
110 unsigned int array
[256];
113 * Write the first-level table (the list is sorted,
114 * but we use a 256-entry lookup to be able to avoid
115 * having to do eight extra binary search iterations).
117 for (i
= 0; i
< 256; i
++) {
118 struct object_entry
**next
= list
;
119 while (next
< last
) {
120 struct object_entry
*entry
= *next
;
121 if (entry
->sha1
[0] != i
)
125 array
[i
] = htonl(next
- sorted_by_sha
);
128 sha1write(f
, array
, 256 * sizeof(int));
131 * Write the actual SHA1 entries..
133 list
= sorted_by_sha
;
134 for (i
= 0; i
< nr_objects
; i
++) {
135 struct object_entry
*entry
= *list
++;
136 unsigned int offset
= htonl(entry
->offset
);
137 sha1write(f
, &offset
, 4);
138 sha1write(f
, entry
->sha1
, 20);
140 sha1write(f
, pack_file_sha1
, 20);
141 sha1close(f
, NULL
, 1);
144 static void add_object_entry(unsigned char *sha1
, unsigned int hash
)
146 unsigned int idx
= nr_objects
;
147 struct object_entry
*entry
;
149 if (idx
>= nr_alloc
) {
150 unsigned int needed
= (idx
+ 1024) * 3 / 2;
151 objects
= xrealloc(objects
, needed
* sizeof(*entry
));
154 entry
= objects
+ idx
;
155 memset(entry
, 0, sizeof(*entry
));
156 memcpy(entry
->sha1
, sha1
, 20);
161 static void check_object(struct object_entry
*entry
)
165 if (!sha1_object_info(entry
->sha1
, type
, &entry
->size
)) {
166 if (!strcmp(type
, "commit")) {
167 entry
->type
= OBJ_COMMIT
;
168 } else if (!strcmp(type
, "tree")) {
169 entry
->type
= OBJ_TREE
;
170 } else if (!strcmp(type
, "blob")) {
171 entry
->type
= OBJ_BLOB
;
173 die("unable to pack object %s of type %s",
174 sha1_to_hex(entry
->sha1
), type
);
177 die("unable to get type of object %s",
178 sha1_to_hex(entry
->sha1
));
181 static void get_object_details(void)
184 struct object_entry
*entry
= objects
;
186 for (i
= 0; i
< nr_objects
; i
++)
187 check_object(entry
++);
190 typedef int (*entry_sort_t
)(const struct object_entry
*, const struct object_entry
*);
192 static entry_sort_t current_sort
;
194 static int sort_comparator(const void *_a
, const void *_b
)
196 struct object_entry
*a
= *(struct object_entry
**)_a
;
197 struct object_entry
*b
= *(struct object_entry
**)_b
;
198 return current_sort(a
,b
);
201 static struct object_entry
**create_sorted_list(entry_sort_t sort
)
203 struct object_entry
**list
= xmalloc(nr_objects
* sizeof(struct object_entry
*));
206 for (i
= 0; i
< nr_objects
; i
++)
207 list
[i
] = objects
+ i
;
209 qsort(list
, nr_objects
, sizeof(struct object_entry
*), sort_comparator
);
213 static int sha1_sort(const struct object_entry
*a
, const struct object_entry
*b
)
215 return memcmp(a
->sha1
, b
->sha1
, 20);
218 static int type_size_sort(const struct object_entry
*a
, const struct object_entry
*b
)
220 if (a
->type
< b
->type
)
222 if (a
->type
> b
->type
)
224 if (a
->hash
< b
->hash
)
226 if (a
->hash
> b
->hash
)
228 if (a
->size
< b
->size
)
230 if (a
->size
> b
->size
)
232 return a
< b
? -1 : (a
> b
);
236 struct object_entry
*entry
;
241 * We search for deltas _backwards_ in a list sorted by type and
242 * by size, so that we see progressively smaller and smaller files.
243 * That's because we prefer deltas to be from the bigger file
244 * to the smaller - deletes are potentially cheaper, but perhaps
245 * more importantly, the bigger file is likely the more recent
248 static int try_delta(struct unpacked
*cur
, struct unpacked
*old
, unsigned max_depth
)
250 struct object_entry
*cur_entry
= cur
->entry
;
251 struct object_entry
*old_entry
= old
->entry
;
252 unsigned long size
, oldsize
, delta_size
, sizediff
;
256 /* Don't bother doing diffs between different types */
257 if (cur_entry
->type
!= old_entry
->type
)
260 size
= cur_entry
->size
;
263 oldsize
= old_entry
->size
;
264 sizediff
= oldsize
> size
? oldsize
- size
: size
- oldsize
;
265 if (sizediff
> size
/ 8)
267 if (old_entry
->depth
>= max_depth
)
273 * We always delta from the bigger to the smaller, since that's
274 * more space-efficient (deletes don't have to say _what_ they
277 max_size
= size
/ 2 - 20;
278 if (cur_entry
->delta
)
279 max_size
= cur_entry
->delta_size
-1;
280 if (sizediff
>= max_size
)
282 delta_buf
= diff_delta(old
->data
, oldsize
,
283 cur
->data
, size
, &delta_size
, max_size
);
286 cur_entry
->delta
= old_entry
;
287 cur_entry
->delta_size
= delta_size
;
288 cur_entry
->depth
= old_entry
->depth
+ 1;
293 static void find_deltas(struct object_entry
**list
, int window
, int depth
)
296 unsigned int array_size
= window
* sizeof(struct unpacked
);
297 struct unpacked
*array
= xmalloc(array_size
);
299 memset(array
, 0, array_size
);
303 struct object_entry
*entry
= list
[i
];
304 struct unpacked
*n
= array
+ idx
;
311 n
->data
= read_sha1_file(entry
->sha1
, type
, &size
);
312 if (size
!= entry
->size
)
313 die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry
->sha1
), size
, entry
->size
);
316 unsigned int other_idx
= idx
+ j
;
318 if (other_idx
>= window
)
320 m
= array
+ other_idx
;
323 if (try_delta(n
, m
, depth
) < 0)
332 int main(int argc
, char **argv
)
334 char line
[PATH_MAX
+ 20];
335 int window
= 10, depth
= 10;
338 for (i
= 1; i
< argc
; i
++) {
339 const char *arg
= argv
[i
];
342 if (!strncmp("--window=", arg
, 9)) {
344 window
= strtoul(arg
+9, &end
, 0);
349 if (!strncmp("--depth=", arg
, 8)) {
351 depth
= strtoul(arg
+8, &end
, 0);
366 while (fgets(line
, sizeof(line
), stdin
) != NULL
) {
369 unsigned char sha1
[20];
371 if (get_sha1_hex(line
, sha1
))
372 die("expected sha1, got garbage");
376 unsigned char c
= *p
++;
379 hash
= hash
* 11 + c
;
381 add_object_entry(sha1
, hash
);
383 get_object_details();
385 printf("Packing %d objects\n", nr_objects
);
387 sorted_by_sha
= create_sorted_list(sha1_sort
);
388 sorted_by_type
= create_sorted_list(type_size_sort
);
390 find_deltas(sorted_by_type
, window
+1, depth
);