Set up for better tree diff optimizations
[git/mingw/j6t.git] / builtin-unpack-objects.c
blob3956c5633448a5c29c60cad370ec7da6a8bfeb64
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
11 static int dry_run, quiet, recover, has_errors;
12 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
14 /* We always read in 4kB chunks. */
15 static unsigned char buffer[4096];
16 static unsigned long offset, len, consumed_bytes;
17 static SHA_CTX ctx;
20 * Make sure at least "min" bytes are available in the buffer, and
21 * return the pointer to the buffer.
23 static void *fill(int min)
25 if (min <= len)
26 return buffer + offset;
27 if (min > sizeof(buffer))
28 die("cannot fill %d bytes", min);
29 if (offset) {
30 SHA1_Update(&ctx, buffer, offset);
31 memmove(buffer, buffer + offset, len);
32 offset = 0;
34 do {
35 int ret = xread(0, buffer + len, sizeof(buffer) - len);
36 if (ret <= 0) {
37 if (!ret)
38 die("early EOF");
39 die("read error on input: %s", strerror(errno));
41 len += ret;
42 } while (len < min);
43 return buffer;
46 static void use(int bytes)
48 if (bytes > len)
49 die("used more bytes than were available");
50 len -= bytes;
51 offset += bytes;
52 consumed_bytes += bytes;
55 static void *get_data(unsigned long size)
57 z_stream stream;
58 void *buf = xmalloc(size);
60 memset(&stream, 0, sizeof(stream));
62 stream.next_out = buf;
63 stream.avail_out = size;
64 stream.next_in = fill(1);
65 stream.avail_in = len;
66 inflateInit(&stream);
68 for (;;) {
69 int ret = inflate(&stream, 0);
70 use(len - stream.avail_in);
71 if (stream.total_out == size && ret == Z_STREAM_END)
72 break;
73 if (ret != Z_OK) {
74 error("inflate returned %d\n", ret);
75 free(buf);
76 buf = NULL;
77 if (!recover)
78 exit(1);
79 has_errors = 1;
80 break;
82 stream.next_in = fill(1);
83 stream.avail_in = len;
85 inflateEnd(&stream);
86 return buf;
89 struct delta_info {
90 unsigned char base_sha1[20];
91 unsigned long base_offset;
92 unsigned long size;
93 void *delta;
94 unsigned nr;
95 struct delta_info *next;
98 static struct delta_info *delta_list;
100 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
101 unsigned long base_offset,
102 void *delta, unsigned long size)
104 struct delta_info *info = xmalloc(sizeof(*info));
106 hashcpy(info->base_sha1, base_sha1);
107 info->base_offset = base_offset;
108 info->size = size;
109 info->delta = delta;
110 info->nr = nr;
111 info->next = delta_list;
112 delta_list = info;
115 struct obj_info {
116 unsigned long offset;
117 unsigned char sha1[20];
120 static struct obj_info *obj_list;
122 static void added_object(unsigned nr, enum object_type type,
123 void *data, unsigned long size);
125 static void write_object(unsigned nr, enum object_type type,
126 void *buf, unsigned long size)
128 if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
129 die("failed to write object");
130 added_object(nr, type, buf, size);
133 static void resolve_delta(unsigned nr, enum object_type type,
134 void *base, unsigned long base_size,
135 void *delta, unsigned long delta_size)
137 void *result;
138 unsigned long result_size;
140 result = patch_delta(base, base_size,
141 delta, delta_size,
142 &result_size);
143 if (!result)
144 die("failed to apply delta");
145 free(delta);
146 write_object(nr, type, result, result_size);
147 free(result);
150 static void added_object(unsigned nr, enum object_type type,
151 void *data, unsigned long size)
153 struct delta_info **p = &delta_list;
154 struct delta_info *info;
156 while ((info = *p) != NULL) {
157 if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
158 info->base_offset == obj_list[nr].offset) {
159 *p = info->next;
160 p = &delta_list;
161 resolve_delta(info->nr, type, data, size,
162 info->delta, info->size);
163 free(info);
164 continue;
166 p = &info->next;
170 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
171 unsigned nr)
173 void *buf = get_data(size);
175 if (!dry_run && buf)
176 write_object(nr, type, buf, size);
177 free(buf);
180 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
181 unsigned nr)
183 void *delta_data, *base;
184 unsigned long base_size;
185 unsigned char base_sha1[20];
187 if (type == OBJ_REF_DELTA) {
188 hashcpy(base_sha1, fill(20));
189 use(20);
190 delta_data = get_data(delta_size);
191 if (dry_run || !delta_data) {
192 free(delta_data);
193 return;
195 if (!has_sha1_file(base_sha1)) {
196 hashcpy(obj_list[nr].sha1, null_sha1);
197 add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
198 return;
200 } else {
201 unsigned base_found = 0;
202 unsigned char *pack, c;
203 unsigned long base_offset;
204 unsigned lo, mid, hi;
206 pack = fill(1);
207 c = *pack;
208 use(1);
209 base_offset = c & 127;
210 while (c & 128) {
211 base_offset += 1;
212 if (!base_offset || base_offset & ~(~0UL >> 7))
213 die("offset value overflow for delta base object");
214 pack = fill(1);
215 c = *pack;
216 use(1);
217 base_offset = (base_offset << 7) + (c & 127);
219 base_offset = obj_list[nr].offset - base_offset;
221 delta_data = get_data(delta_size);
222 if (dry_run || !delta_data) {
223 free(delta_data);
224 return;
226 lo = 0;
227 hi = nr;
228 while (lo < hi) {
229 mid = (lo + hi)/2;
230 if (base_offset < obj_list[mid].offset) {
231 hi = mid;
232 } else if (base_offset > obj_list[mid].offset) {
233 lo = mid + 1;
234 } else {
235 hashcpy(base_sha1, obj_list[mid].sha1);
236 base_found = !is_null_sha1(base_sha1);
237 break;
240 if (!base_found) {
241 /* The delta base object is itself a delta that
242 has not been resolved yet. */
243 hashcpy(obj_list[nr].sha1, null_sha1);
244 add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
245 return;
249 base = read_sha1_file(base_sha1, &type, &base_size);
250 if (!base) {
251 error("failed to read delta-pack base object %s",
252 sha1_to_hex(base_sha1));
253 if (!recover)
254 exit(1);
255 has_errors = 1;
256 return;
258 resolve_delta(nr, type, base, base_size, delta_data, delta_size);
259 free(base);
262 static void unpack_one(unsigned nr, unsigned total)
264 unsigned shift;
265 unsigned char *pack, c;
266 unsigned long size;
267 enum object_type type;
269 obj_list[nr].offset = consumed_bytes;
271 pack = fill(1);
272 c = *pack;
273 use(1);
274 type = (c >> 4) & 7;
275 size = (c & 15);
276 shift = 4;
277 while (c & 0x80) {
278 pack = fill(1);
279 c = *pack;
280 use(1);
281 size += (c & 0x7f) << shift;
282 shift += 7;
284 if (!quiet) {
285 static unsigned long last_sec;
286 static unsigned last_percent;
287 struct timeval now;
288 unsigned percentage = ((nr+1) * 100) / total;
290 gettimeofday(&now, NULL);
291 if (percentage != last_percent || now.tv_sec != last_sec) {
292 last_sec = now.tv_sec;
293 last_percent = percentage;
294 fprintf(stderr, "%4u%% (%u/%u) done\r",
295 percentage, (nr+1), total);
298 switch (type) {
299 case OBJ_COMMIT:
300 case OBJ_TREE:
301 case OBJ_BLOB:
302 case OBJ_TAG:
303 unpack_non_delta_entry(type, size, nr);
304 return;
305 case OBJ_REF_DELTA:
306 case OBJ_OFS_DELTA:
307 unpack_delta_entry(type, size, nr);
308 return;
309 default:
310 error("bad object type %d", type);
311 has_errors = 1;
312 if (recover)
313 return;
314 exit(1);
318 static void unpack_all(void)
320 int i;
321 struct pack_header *hdr = fill(sizeof(struct pack_header));
322 unsigned nr_objects = ntohl(hdr->hdr_entries);
324 if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
325 die("bad pack file");
326 if (!pack_version_ok(hdr->hdr_version))
327 die("unknown pack file version %d", ntohl(hdr->hdr_version));
328 fprintf(stderr, "Unpacking %d objects\n", nr_objects);
330 obj_list = xmalloc(nr_objects * sizeof(*obj_list));
331 use(sizeof(struct pack_header));
332 for (i = 0; i < nr_objects; i++)
333 unpack_one(i, nr_objects);
334 if (delta_list)
335 die("unresolved deltas left after unpacking");
338 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
340 int i;
341 unsigned char sha1[20];
343 git_config(git_default_config);
345 quiet = !isatty(2);
347 for (i = 1 ; i < argc; i++) {
348 const char *arg = argv[i];
350 if (*arg == '-') {
351 if (!strcmp(arg, "-n")) {
352 dry_run = 1;
353 continue;
355 if (!strcmp(arg, "-q")) {
356 quiet = 1;
357 continue;
359 if (!strcmp(arg, "-r")) {
360 recover = 1;
361 continue;
363 if (!prefixcmp(arg, "--pack_header=")) {
364 struct pack_header *hdr;
365 char *c;
367 hdr = (struct pack_header *)buffer;
368 hdr->hdr_signature = htonl(PACK_SIGNATURE);
369 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
370 if (*c != ',')
371 die("bad %s", arg);
372 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
373 if (*c)
374 die("bad %s", arg);
375 len = sizeof(*hdr);
376 continue;
378 usage(unpack_usage);
381 /* We don't take any non-flag arguments now.. Maybe some day */
382 usage(unpack_usage);
384 SHA1_Init(&ctx);
385 unpack_all();
386 SHA1_Update(&ctx, buffer, offset);
387 SHA1_Final(sha1, &ctx);
388 if (hashcmp(fill(20), sha1))
389 die("final sha1 did not match");
390 use(20);
392 /* Write the last part of the buffer to stdout */
393 while (len) {
394 int ret = xwrite(1, buffer + offset, len);
395 if (ret <= 0)
396 break;
397 len -= ret;
398 offset += ret;
401 /* All done */
402 if (!quiet)
403 fprintf(stderr, "\n");
404 return has_errors;