git-pack-objects: write the pack files with a SHA1 csum
[git/dscho.git] / unpack-objects.c
blob91a71c55c6bb720c7fbe3d1d2bd393d5d22f764f
1 #include "cache.h"
2 #include "object.h"
3 #include "delta.h"
5 static int dry_run;
6 static int nr_entries;
7 static const char *base_name;
8 static const char unpack_usage[] = "git-unpack-objects basename";
10 struct pack_entry {
11 unsigned int offset; /* network byte order */
12 unsigned char sha1[20];
15 static void *pack_base;
16 static unsigned long pack_size;
17 static void *index_base;
18 static unsigned long index_size;
20 static struct pack_entry **pack_list;
22 static void *map_file(const char *suffix, unsigned long *sizep)
24 static char pathname[PATH_MAX];
25 unsigned long len;
26 int fd;
27 struct stat st;
28 void *map;
30 len = snprintf(pathname, PATH_MAX, "%s.%s", base_name, suffix);
31 if (len >= PATH_MAX)
32 die("bad pack base-name");
33 fd = open(pathname, O_RDONLY);
34 if (fd < 0 || fstat(fd, &st))
35 die("unable to open '%s'", pathname);
36 len = st.st_size;
37 if (!len)
38 die("bad pack file '%s'", pathname);
39 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
40 if (-1 == (int)(long)map)
41 die("unable to mmap '%s'", pathname);
42 close(fd);
43 *sizep = len;
44 return map;
47 static int sort_by_offset(const void *_a, const void *_b)
49 struct pack_entry *a = *(struct pack_entry **)_a;
50 struct pack_entry *b = *(struct pack_entry **)_b;
51 unsigned int o1, o2;
53 o1 = ntohl(a->offset);
54 o2 = ntohl(b->offset);
55 return o1 < o2 ? -1 : 1;
58 static int check_index(void)
60 unsigned int *array = index_base;
61 unsigned int nr;
62 int i;
64 if (index_size < 4*256 + 20)
65 return error("index file too small");
66 nr = 0;
67 for (i = 0; i < 256; i++) {
68 unsigned int n = ntohl(array[i]);
69 if (n < nr)
70 return error("non-monotonic index");
71 nr = n;
74 * Total size:
75 * - 256 index entries 4 bytes each
76 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
77 * - 20-byte SHA1 file checksum
79 if (index_size != 4*256 + nr * 24 + 20)
80 return error("wrong index file size");
82 nr_entries = nr;
83 pack_list = xmalloc(nr * sizeof(struct pack_entry *));
84 for (i = 0; i < nr; i++)
85 pack_list[i] = index_base + 4*256 + i*24;
87 qsort(pack_list, nr, sizeof(*pack_list), sort_by_offset);
89 printf("%d entries\n", nr);
90 return 0;
93 static int unpack_non_delta_entry(struct pack_entry *entry,
94 int kind,
95 unsigned char *data,
96 unsigned long size,
97 unsigned long left)
99 int st;
100 z_stream stream;
101 char *buffer;
102 unsigned char sha1[20];
103 char *type_s;
105 printf("%s %c %lu\n", sha1_to_hex(entry->sha1), kind, size);
106 if (dry_run)
107 return 0;
109 buffer = xmalloc(size + 1);
110 buffer[size] = 0;
111 memset(&stream, 0, sizeof(stream));
112 stream.next_in = data;
113 stream.avail_in = left;
114 stream.next_out = buffer;
115 stream.avail_out = size;
117 inflateInit(&stream);
118 st = inflate(&stream, Z_FINISH);
119 inflateEnd(&stream);
120 if ((st != Z_STREAM_END) || stream.total_out != size)
121 goto err_finish;
122 switch (kind) {
123 case 'C': type_s = "commit"; break;
124 case 'T': type_s = "tree"; break;
125 case 'B': type_s = "blob"; break;
126 default: goto err_finish;
128 if (write_sha1_file(buffer, size, type_s, sha1) < 0)
129 die("failed to write %s (%s)",
130 sha1_to_hex(entry->sha1), type_s);
131 printf("%s %s\n", sha1_to_hex(sha1), type_s);
132 if (memcmp(sha1, entry->sha1, 20))
133 die("resulting %s have wrong SHA1", type_s);
135 finish:
136 st = 0;
137 free(buffer);
138 return st;
139 err_finish:
140 st = -1;
141 goto finish;
144 static int find_pack_entry(unsigned char *sha1, struct pack_entry **ent)
146 int *level1_ofs = index_base;
147 int hi = ntohl(level1_ofs[*sha1]);
148 int lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
149 void *index = index_base + 4*256;
151 do {
152 int mi = (lo + hi) / 2;
153 int cmp = memcmp(index + 24 * mi + 4, sha1, 20);
154 if (!cmp) {
155 *ent = index + 24 * mi;
156 return 1;
158 if (cmp > 0)
159 hi = mi;
160 else
161 lo = mi+1;
162 } while (lo < hi);
163 return 0;
166 /* forward declaration for a mutually recursive function */
167 static void unpack_entry(struct pack_entry *);
169 static int unpack_delta_entry(struct pack_entry *entry,
170 unsigned char *base_sha1,
171 unsigned long delta_size,
172 unsigned long left)
174 void *data, *delta_data, *result, *base;
175 unsigned long data_size, result_size, base_size;
176 z_stream stream;
177 int st;
178 char type[20];
179 unsigned char sha1[20];
181 if (left < 20)
182 die("truncated pack file");
183 data = base_sha1 + 20;
184 data_size = left - 20;
185 printf("%s D %lu", sha1_to_hex(entry->sha1), delta_size);
186 printf(" %s\n", sha1_to_hex(base_sha1));
188 if (dry_run)
189 return 0;
191 /* pack+5 is the base sha1, unless we have it, we need to
192 * unpack it first.
194 if (!has_sha1_file(base_sha1)) {
195 struct pack_entry *base;
196 if (!find_pack_entry(base_sha1, &base))
197 die("cannot find delta-pack base object");
198 unpack_entry(base);
200 delta_data = xmalloc(delta_size);
202 memset(&stream, 0, sizeof(stream));
204 stream.next_in = data;
205 stream.avail_in = data_size;
206 stream.next_out = delta_data;
207 stream.avail_out = delta_size;
209 inflateInit(&stream);
210 st = inflate(&stream, Z_FINISH);
211 inflateEnd(&stream);
212 if ((st != Z_STREAM_END) || stream.total_out != delta_size)
213 die("delta data unpack failed");
215 base = read_sha1_file(base_sha1, type, &base_size);
216 if (!base)
217 die("failed to read delta-pack base object %s", sha1_to_hex(base_sha1));
218 result = patch_delta(base, base_size,
219 delta_data, delta_size,
220 &result_size);
221 if (!result)
222 die("failed to apply delta");
223 free(delta_data);
225 if (write_sha1_file(result, result_size, type, sha1) < 0)
226 die("failed to write %s (%s)",
227 sha1_to_hex(entry->sha1), type);
228 free(result);
229 printf("%s %s\n", sha1_to_hex(sha1), type);
230 if (memcmp(sha1, entry->sha1, 20))
231 die("resulting %s have wrong SHA1", type);
232 return 0;
235 static void unpack_entry(struct pack_entry *entry)
237 unsigned long offset, size, left;
238 unsigned char *pack;
240 /* Have we done this one already due to deltas based on it? */
241 if (lookup_object(entry->sha1))
242 return;
244 offset = ntohl(entry->offset);
245 if (offset > pack_size - 5)
246 die("object offset outside of pack file");
247 pack = pack_base + offset;
248 size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
249 left = pack_size - offset - 5;
250 switch (*pack) {
251 case 'C': case 'T': case 'B':
252 unpack_non_delta_entry(entry, *pack, pack+5, size, left);
253 break;
254 case 'D':
255 unpack_delta_entry(entry, pack+5, size, left);
256 break;
257 default:
258 die("corrupted pack file");
263 * We unpack from the end, older files first. Now, usually
264 * there are deltas etc, so we'll not actually write the
265 * objects in that order, but we might as well try..
267 static void unpack_all(void)
269 int i = nr_entries;
271 while (--i >= 0) {
272 struct pack_entry *entry = pack_list[i];
273 unpack_entry(entry);
277 int main(int argc, char **argv)
279 int i;
281 for (i = 1 ; i < argc; i++) {
282 const char *arg = argv[i];
284 if (*arg == '-') {
285 if (!strcmp(arg, "-n")) {
286 dry_run = 1;
287 continue;
289 usage(unpack_usage);
291 if (base_name)
292 usage(unpack_usage);
293 base_name = arg;
295 if (!base_name)
296 usage(unpack_usage);
297 index_base = map_file("idx", &index_size);
298 pack_base = map_file("pack", &pack_size);
299 if (check_index() < 0)
300 die("bad index file");
301 unpack_all();
302 return 0;