Provide more meaningful output from 'git init-db'.
[git.git] / builtin-unpack-objects.c
blobe6d75748444ef4fc263970d4fe87b4623d2790fc
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
11 #include <sys/time.h>
13 static int dry_run, quiet, recover, has_errors;
14 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
16 /* We always read in 4kB chunks. */
17 static unsigned char buffer[4096];
18 static unsigned long offset, len, consumed_bytes;
19 static SHA_CTX ctx;
22 * Make sure at least "min" bytes are available in the buffer, and
23 * return the pointer to the buffer.
25 static void *fill(int min)
27 if (min <= len)
28 return buffer + offset;
29 if (min > sizeof(buffer))
30 die("cannot fill %d bytes", min);
31 if (offset) {
32 SHA1_Update(&ctx, buffer, offset);
33 memmove(buffer, buffer + offset, len);
34 offset = 0;
36 do {
37 int ret = xread(0, buffer + len, sizeof(buffer) - len);
38 if (ret <= 0) {
39 if (!ret)
40 die("early EOF");
41 die("read error on input: %s", strerror(errno));
43 len += ret;
44 } while (len < min);
45 return buffer;
48 static void use(int bytes)
50 if (bytes > len)
51 die("used more bytes than were available");
52 len -= bytes;
53 offset += bytes;
54 consumed_bytes += bytes;
57 static void *get_data(unsigned long size)
59 z_stream stream;
60 void *buf = xmalloc(size);
62 memset(&stream, 0, sizeof(stream));
64 stream.next_out = buf;
65 stream.avail_out = size;
66 stream.next_in = fill(1);
67 stream.avail_in = len;
68 inflateInit(&stream);
70 for (;;) {
71 int ret = inflate(&stream, 0);
72 use(len - stream.avail_in);
73 if (stream.total_out == size && ret == Z_STREAM_END)
74 break;
75 if (ret != Z_OK) {
76 error("inflate returned %d\n", ret);
77 free(buf);
78 buf = NULL;
79 if (!recover)
80 exit(1);
81 has_errors = 1;
82 break;
84 stream.next_in = fill(1);
85 stream.avail_in = len;
87 inflateEnd(&stream);
88 return buf;
91 struct delta_info {
92 unsigned char base_sha1[20];
93 unsigned long base_offset;
94 unsigned long size;
95 void *delta;
96 unsigned nr;
97 struct delta_info *next;
100 static struct delta_info *delta_list;
102 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
103 unsigned long base_offset,
104 void *delta, unsigned long size)
106 struct delta_info *info = xmalloc(sizeof(*info));
108 hashcpy(info->base_sha1, base_sha1);
109 info->base_offset = base_offset;
110 info->size = size;
111 info->delta = delta;
112 info->nr = nr;
113 info->next = delta_list;
114 delta_list = info;
117 struct obj_info {
118 unsigned long offset;
119 unsigned char sha1[20];
122 static struct obj_info *obj_list;
124 static void added_object(unsigned nr, const char *type, void *data,
125 unsigned long size);
127 static void write_object(unsigned nr, void *buf, unsigned long size,
128 const char *type)
130 if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
131 die("failed to write object");
132 added_object(nr, type, buf, size);
135 static void resolve_delta(unsigned nr, const char *type,
136 void *base, unsigned long base_size,
137 void *delta, unsigned long delta_size)
139 void *result;
140 unsigned long result_size;
142 result = patch_delta(base, base_size,
143 delta, delta_size,
144 &result_size);
145 if (!result)
146 die("failed to apply delta");
147 free(delta);
148 write_object(nr, result, result_size, type);
149 free(result);
152 static void added_object(unsigned nr, const char *type, void *data,
153 unsigned long size)
155 struct delta_info **p = &delta_list;
156 struct delta_info *info;
158 while ((info = *p) != NULL) {
159 if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
160 info->base_offset == obj_list[nr].offset) {
161 *p = info->next;
162 p = &delta_list;
163 resolve_delta(info->nr, type, data, size,
164 info->delta, info->size);
165 free(info);
166 continue;
168 p = &info->next;
172 static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
173 unsigned nr)
175 void *buf = get_data(size);
176 const char *type;
178 switch (kind) {
179 case OBJ_COMMIT: type = commit_type; break;
180 case OBJ_TREE: type = tree_type; break;
181 case OBJ_BLOB: type = blob_type; break;
182 case OBJ_TAG: type = tag_type; break;
183 default: die("bad type %d", kind);
185 if (!dry_run && buf)
186 write_object(nr, buf, size, type);
187 free(buf);
190 static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
191 unsigned nr)
193 void *delta_data, *base;
194 unsigned long base_size;
195 char type[20];
196 unsigned char base_sha1[20];
198 if (kind == OBJ_REF_DELTA) {
199 hashcpy(base_sha1, fill(20));
200 use(20);
201 delta_data = get_data(delta_size);
202 if (dry_run || !delta_data) {
203 free(delta_data);
204 return;
206 if (!has_sha1_file(base_sha1)) {
207 hashcpy(obj_list[nr].sha1, null_sha1);
208 add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
209 return;
211 } else {
212 unsigned base_found = 0;
213 unsigned char *pack, c;
214 unsigned long base_offset;
215 unsigned lo, mid, hi;
217 pack = fill(1);
218 c = *pack;
219 use(1);
220 base_offset = c & 127;
221 while (c & 128) {
222 base_offset += 1;
223 if (!base_offset || base_offset & ~(~0UL >> 7))
224 die("offset value overflow for delta base object");
225 pack = fill(1);
226 c = *pack;
227 use(1);
228 base_offset = (base_offset << 7) + (c & 127);
230 base_offset = obj_list[nr].offset - base_offset;
232 delta_data = get_data(delta_size);
233 if (dry_run || !delta_data) {
234 free(delta_data);
235 return;
237 lo = 0;
238 hi = nr;
239 while (lo < hi) {
240 mid = (lo + hi)/2;
241 if (base_offset < obj_list[mid].offset) {
242 hi = mid;
243 } else if (base_offset > obj_list[mid].offset) {
244 lo = mid + 1;
245 } else {
246 hashcpy(base_sha1, obj_list[mid].sha1);
247 base_found = !is_null_sha1(base_sha1);
248 break;
251 if (!base_found) {
252 /* The delta base object is itself a delta that
253 has not been resolved yet. */
254 hashcpy(obj_list[nr].sha1, null_sha1);
255 add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
256 return;
260 base = read_sha1_file(base_sha1, type, &base_size);
261 if (!base) {
262 error("failed to read delta-pack base object %s",
263 sha1_to_hex(base_sha1));
264 if (!recover)
265 exit(1);
266 has_errors = 1;
267 return;
269 resolve_delta(nr, type, base, base_size, delta_data, delta_size);
270 free(base);
273 static void unpack_one(unsigned nr, unsigned total)
275 unsigned shift;
276 unsigned char *pack, c;
277 unsigned long size;
278 enum object_type type;
280 obj_list[nr].offset = consumed_bytes;
282 pack = fill(1);
283 c = *pack;
284 use(1);
285 type = (c >> 4) & 7;
286 size = (c & 15);
287 shift = 4;
288 while (c & 0x80) {
289 pack = fill(1);
290 c = *pack;
291 use(1);
292 size += (c & 0x7f) << shift;
293 shift += 7;
295 if (!quiet) {
296 static unsigned long last_sec;
297 static unsigned last_percent;
298 struct timeval now;
299 unsigned percentage = ((nr+1) * 100) / total;
301 gettimeofday(&now, NULL);
302 if (percentage != last_percent || now.tv_sec != last_sec) {
303 last_sec = now.tv_sec;
304 last_percent = percentage;
305 fprintf(stderr, "%4u%% (%u/%u) done\r",
306 percentage, (nr+1), total);
309 switch (type) {
310 case OBJ_COMMIT:
311 case OBJ_TREE:
312 case OBJ_BLOB:
313 case OBJ_TAG:
314 unpack_non_delta_entry(type, size, nr);
315 return;
316 case OBJ_REF_DELTA:
317 case OBJ_OFS_DELTA:
318 unpack_delta_entry(type, size, nr);
319 return;
320 default:
321 error("bad object type %d", type);
322 has_errors = 1;
323 if (recover)
324 return;
325 exit(1);
329 static void unpack_all(void)
331 int i;
332 struct pack_header *hdr = fill(sizeof(struct pack_header));
333 unsigned nr_objects = ntohl(hdr->hdr_entries);
335 if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
336 die("bad pack file");
337 if (!pack_version_ok(hdr->hdr_version))
338 die("unknown pack file version %d", ntohl(hdr->hdr_version));
339 fprintf(stderr, "Unpacking %d objects\n", nr_objects);
341 obj_list = xmalloc(nr_objects * sizeof(*obj_list));
342 use(sizeof(struct pack_header));
343 for (i = 0; i < nr_objects; i++)
344 unpack_one(i, nr_objects);
345 if (delta_list)
346 die("unresolved deltas left after unpacking");
349 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
351 int i;
352 unsigned char sha1[20];
354 git_config(git_default_config);
356 quiet = !isatty(2);
358 for (i = 1 ; i < argc; i++) {
359 const char *arg = argv[i];
361 if (*arg == '-') {
362 if (!strcmp(arg, "-n")) {
363 dry_run = 1;
364 continue;
366 if (!strcmp(arg, "-q")) {
367 quiet = 1;
368 continue;
370 if (!strcmp(arg, "-r")) {
371 recover = 1;
372 continue;
374 if (!strncmp(arg, "--pack_header=", 14)) {
375 struct pack_header *hdr;
376 char *c;
378 hdr = (struct pack_header *)buffer;
379 hdr->hdr_signature = htonl(PACK_SIGNATURE);
380 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
381 if (*c != ',')
382 die("bad %s", arg);
383 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
384 if (*c)
385 die("bad %s", arg);
386 len = sizeof(*hdr);
387 continue;
389 usage(unpack_usage);
392 /* We don't take any non-flag arguments now.. Maybe some day */
393 usage(unpack_usage);
395 SHA1_Init(&ctx);
396 unpack_all();
397 SHA1_Update(&ctx, buffer, offset);
398 SHA1_Final(sha1, &ctx);
399 if (hashcmp(fill(20), sha1))
400 die("final sha1 did not match");
401 use(20);
403 /* Write the last part of the buffer to stdout */
404 while (len) {
405 int ret = xwrite(1, buffer + offset, len);
406 if (ret <= 0)
407 break;
408 len -= ret;
409 offset += ret;
412 /* All done */
413 if (!quiet)
414 fprintf(stderr, "\n");
415 return has_errors;