2 * Deltafication of a GIT database.
4 * (C) 2005 Nicolas Pitre <nico@cam.org>
6 * This code is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
14 static int replace_object(char *buf
, unsigned long size
, unsigned char *sha1
)
16 char tmpfile
[PATH_MAX
];
19 snprintf(tmpfile
, sizeof(tmpfile
), "%s/obj_XXXXXX", get_object_directory());
20 fd
= mkstemp(tmpfile
);
22 return error("%s: %s\n", tmpfile
, strerror(errno
));
23 if (write(fd
, buf
, size
) != size
) {
24 perror("unable to write file");
31 if (rename(tmpfile
, sha1_file_name(sha1
))) {
32 perror("unable to replace original object");
39 static void *create_object(char *buf
, unsigned long len
, char *hdr
, int hdrlen
,
40 unsigned long *retsize
)
47 memset(&stream
, 0, sizeof(stream
));
48 deflateInit(&stream
, Z_BEST_COMPRESSION
);
49 size
= deflateBound(&stream
, len
+hdrlen
);
50 compressed
= xmalloc(size
);
53 stream
.next_out
= compressed
;
54 stream
.avail_out
= size
;
58 stream
.avail_in
= hdrlen
;
59 while (deflate(&stream
, 0) == Z_OK
)
62 /* Then the data itself.. */
64 stream
.avail_in
= len
;
65 while (deflate(&stream
, Z_FINISH
) == Z_OK
)
68 *retsize
= stream
.total_out
;
72 static int restore_original_object(char *buf
, unsigned long len
,
73 char *type
, unsigned char *sha1
)
80 hdrlen
= sprintf(hdr
, "%s %lu", type
, len
)+1;
81 compressed
= create_object(buf
, len
, hdr
, hdrlen
, &size
);
82 ret
= replace_object(compressed
, size
, sha1
);
87 static void *create_delta_object(char *buf
, unsigned long len
,
88 unsigned char *sha1_ref
, unsigned long *size
)
93 /* Generate the header + sha1 of reference for delta */
94 hdrlen
= sprintf(hdr
, "delta %lu", len
+20)+1;
95 memcpy(hdr
+ hdrlen
, sha1_ref
, 20);
98 return create_object(buf
, len
, hdr
, hdrlen
, size
);
101 static void *get_buffer(unsigned char *sha1
, char *type
,
102 unsigned long *size
, unsigned long *compsize
)
104 unsigned long mapsize
;
105 void *map
= map_sha1_file(sha1
, &mapsize
);
107 void *buffer
= unpack_sha1_file(map
, mapsize
, type
, size
);
108 munmap(map
, mapsize
);
114 error("unable to get object %s", sha1_to_hex(sha1
));
118 static void *expand_delta(void *delta
, unsigned long *size
, char *type
,
119 unsigned int *depth
, unsigned char **links
)
122 unsigned int level
= (*depth
)++;
124 error("delta object is bad");
127 unsigned long ref_size
;
128 void *ref
= get_buffer(delta
, type
, &ref_size
, NULL
);
129 if (ref
&& !strcmp(type
, "delta"))
130 ref
= expand_delta(ref
, &ref_size
, type
, depth
, links
);
133 *links
= xmalloc(*depth
* 20);
136 buf
= patch_delta(ref
, ref_size
, delta
+20, *size
-20, size
);
139 memcpy(*links
+ level
*20, delta
, 20);
148 static char *mkdelta_usage
=
149 "mkdelta [--max-depth=N] [--max-behind=N] <reference_sha1> <target_sha1> [<next_sha1> ...]";
152 unsigned char sha1
[20]; /* object sha1 */
153 unsigned long size
; /* object size */
154 void *buf
; /* object content */
155 unsigned char *links
; /* delta reference links */
156 unsigned int depth
; /* delta depth */
159 int main(int argc
, char **argv
)
161 struct delta
*ref
, trg
;
162 char ref_type
[20], trg_type
[20], *skip_reason
;
164 unsigned long best_size
, orig_size
, orig_compsize
;
165 unsigned int r
, orig_ref
, best_ref
, nb_refs
, next_ref
, max_refs
= 0;
166 unsigned int i
, duplicate
, skip_lvl
, verbose
= 0, quiet
= 0;
167 unsigned int max_depth
= -1;
169 for (i
= 1; i
< argc
; i
++) {
170 if (!strcmp(argv
[i
], "-v")) {
173 } else if (!strcmp(argv
[i
], "-q")) {
176 } else if (!strcmp(argv
[i
], "-d") && i
+1 < argc
) {
177 max_depth
= atoi(argv
[++i
]);
178 } else if (!strncmp(argv
[i
], "--max-depth=", 12)) {
179 max_depth
= atoi(argv
[i
]+12);
180 } else if (!strcmp(argv
[i
], "-b") && i
+1 < argc
) {
181 max_refs
= atoi(argv
[++i
]);
182 } else if (!strncmp(argv
[i
], "--max-behind=", 13)) {
183 max_refs
= atoi(argv
[i
]+13);
188 if (i
+ (max_depth
!= 0) >= argc
)
189 usage(mkdelta_usage
);
191 if (!max_refs
|| max_refs
> argc
- i
)
193 ref
= xmalloc(max_refs
* sizeof(*ref
));
194 for (r
= 0; r
< max_refs
; r
++)
195 ref
[r
].buf
= ref
[r
].links
= NULL
;
196 next_ref
= nb_refs
= 0;
199 if (get_sha1(argv
[i
], trg
.sha1
))
200 die("bad sha1 %s", argv
[i
]);
201 trg
.buf
= get_buffer(trg
.sha1
, trg_type
, &trg
.size
, &orig_compsize
);
202 if (trg
.buf
&& !trg
.size
) {
204 printf("skip %s (object is empty)\n", argv
[i
]);
207 orig_size
= trg
.size
;
211 if (trg
.buf
&& !strcmp(trg_type
, "delta")) {
212 for (r
= 0; r
< nb_refs
; r
++)
213 if (!memcmp(trg
.buf
, ref
[r
].sha1
, 20))
216 /* no need to reload the reference object */
217 trg
.depth
= ref
[r
].depth
+ 1;
218 trg
.links
= xmalloc(trg
.depth
*20);
219 memcpy(trg
.links
, trg
.buf
, 20);
220 memcpy(trg
.links
+20, ref
[r
].links
, ref
[r
].depth
*20);
221 trg
.buf
= patch_delta(ref
[r
].buf
, ref
[r
].size
,
222 trg
.buf
+20, trg
.size
-20,
224 strcpy(trg_type
, ref_type
);
227 trg
.buf
= expand_delta(trg
.buf
, &trg
.size
, trg_type
,
228 &trg
.depth
, &trg
.links
);
232 die("unable to read target object %s", argv
[i
]);
235 strcpy(ref_type
, trg_type
);
236 } else if (max_depth
&& strcmp(ref_type
, trg_type
)) {
237 die("type mismatch for object %s", argv
[i
]);
246 for (r
= 0; max_depth
&& r
< nb_refs
; r
++) {
247 void *delta_buf
, *comp_buf
;
248 unsigned long delta_size
, comp_size
;
251 duplicate
= !memcmp(trg
.sha1
, ref
[r
].sha1
, 20);
253 skip_reason
= "already seen";
256 if (ref
[r
].depth
>= max_depth
) {
258 skip_reason
= "exceeding max link depth";
263 for (l
= 0; l
< ref
[r
].depth
; l
++)
264 if (!memcmp(trg
.sha1
, ref
[r
].links
+ l
*20, 20))
266 if (l
!= ref
[r
].depth
) {
268 skip_reason
= "would create a loop";
273 if (trg
.depth
< max_depth
&& r
== orig_ref
) {
275 skip_reason
= "delta already in place";
280 delta_buf
= diff_delta(ref
[r
].buf
, ref
[r
].size
,
281 trg
.buf
, trg
.size
, &delta_size
);
283 die("out of memory");
284 if (trg
.depth
< max_depth
&&
285 delta_size
+20 >= orig_size
) {
286 /* no need to even try to compress if original
287 object is smaller than this delta */
290 skip_reason
= "no size reduction";
295 comp_buf
= create_delta_object(delta_buf
, delta_size
,
296 ref
[r
].sha1
, &comp_size
);
298 die("out of memory");
300 if (trg
.depth
< max_depth
&&
301 comp_size
>= orig_compsize
) {
304 skip_reason
= "no size reduction";
309 if ((comp_size
< best_size
) ||
310 (comp_size
== best_size
&&
311 ref
[r
].depth
< ref
[best_ref
].depth
)) {
314 best_size
= comp_size
;
320 if (replace_object(best_buf
, best_size
, trg
.sha1
))
321 die("unable to write delta for %s", argv
[i
]);
324 trg
.depth
= ref
[best_ref
].depth
+ 1;
325 trg
.links
= xmalloc(trg
.depth
*20);
326 memcpy(trg
.links
, ref
[best_ref
].sha1
, 20);
327 memcpy(trg
.links
+20, ref
[best_ref
].links
, ref
[best_ref
].depth
*20);
329 printf("delta %s (size=%ld.%02ld%% depth=%d dist=%d)\n",
330 argv
[i
], best_size
*100 / orig_compsize
,
331 (best_size
*10000 / orig_compsize
)%100,
333 (next_ref
- best_ref
+ max_refs
)
334 % (max_refs
+ 1) + 1);
335 } else if (trg
.depth
> max_depth
) {
336 if (restore_original_object(trg
.buf
, trg
.size
, trg_type
, trg
.sha1
))
337 die("unable to restore %s", argv
[i
]);
339 printf("undelta %s (depth was %d)\n",
344 } else if (skip_reason
&& verbose
) {
345 printf("skip %s (%s)\n", argv
[i
], skip_reason
);
349 free(ref
[next_ref
].buf
);
350 free(ref
[next_ref
].links
);
352 if (++next_ref
> nb_refs
)
354 if (next_ref
== max_refs
)
360 } while (++i
< argc
);