2 * diff-delta.c: generate a delta between two buffers
4 * Many parts of this file have been lifted from LibXDiff version 0.10.
5 * http://www.xmailserver.org/xdiff-lib.html
7 * LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
8 * Copyright (C) 2003 Davide Libenzi
10 * Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
12 * This file is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Use of this within git automatically means that the LGPL
18 * licensing gets turned into GPLv2 within this project.
27 /* block size: min = 16, max = 64k, power of 2 */
30 /* maximum hash entry list for the same hash bucket */
33 #define GR_PRIME 0x9e370001
34 #define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift))
37 const unsigned char *ptr
;
39 struct index_entry
*next
;
44 unsigned long src_size
;
45 unsigned int hash_shift
;
46 struct index_entry
*hash
[0];
49 struct delta_index
* create_delta_index(const void *buf
, unsigned long bufsize
)
51 unsigned int i
, hsize
, hshift
, entries
, *hash_count
;
52 const unsigned char *data
, *buffer
= buf
;
53 struct delta_index
*index
;
54 struct index_entry
*entry
, **hash
;
60 /* determine index hash size */
61 entries
= bufsize
/ BLK_SIZE
;
63 for (i
= 4; (1 << i
) < hsize
&& i
< 31; i
++);
67 /* allocate lookup index */
68 mem
= malloc(sizeof(*index
) +
69 sizeof(*hash
) * hsize
+
70 sizeof(*entry
) * entries
);
80 index
->src_size
= bufsize
;
81 index
->hash_shift
= hshift
;
82 memset(hash
, 0, hsize
* sizeof(*hash
));
84 /* allocate an array to count hash entries */
85 hash_count
= calloc(hsize
, sizeof(*hash_count
));
91 /* then populate the index */
92 data
= buffer
+ entries
* BLK_SIZE
- BLK_SIZE
;
93 while (data
>= buffer
) {
94 unsigned int val
= adler32(0, data
, BLK_SIZE
);
95 i
= HASH(val
, hshift
);
98 entry
->next
= hash
[i
];
105 * Determine a limit on the number of entries in the same hash
106 * bucket. This guard us against patological data sets causing
107 * really bad hash distribution with most entries in the same hash
108 * bucket that would bring us to O(m*n) computing costs (m and n
109 * corresponding to reference and target buffer sizes).
111 * Make sure none of the hash buckets has more entries than
112 * we're willing to test. Otherwise we cull the entry list
113 * uniformly to still preserve a good repartition across
114 * the reference buffer.
116 for (i
= 0; i
< hsize
; i
++) {
117 if (hash_count
[i
] < HASH_LIMIT
)
121 struct index_entry
*keep
= entry
;
122 int skip
= hash_count
[i
] / HASH_LIMIT
/ 2;
125 } while(--skip
&& entry
);
134 void free_delta_index(struct delta_index
*index
)
139 /* provide the size of the copy opcode given the block offset and size */
140 #define COPYOP_SIZE(o, s) \
141 (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
142 !!(s & 0xff) + !!(s & 0xff00) + 1)
144 /* the maximum size for any opcode */
145 #define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
148 create_delta(const struct delta_index
*index
,
149 const void *trg_buf
, unsigned long trg_size
,
150 unsigned long *delta_size
, unsigned long max_size
)
152 unsigned int i
, outpos
, outsize
, hash_shift
;
154 const unsigned char *ref_data
, *ref_top
, *data
, *top
;
157 if (!trg_buf
|| !trg_size
)
162 if (max_size
&& outsize
>= max_size
)
163 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
164 out
= malloc(outsize
);
168 /* store reference buffer size */
171 out
[outpos
++] = i
| 0x80;
176 /* store target buffer size */
179 out
[outpos
++] = i
| 0x80;
184 ref_data
= index
->src_buf
;
185 ref_top
= ref_data
+ index
->src_size
;
187 top
= trg_buf
+ trg_size
;
188 hash_shift
= index
->hash_shift
;
192 unsigned int moff
= 0, msize
= 0;
193 struct index_entry
*entry
;
194 unsigned int val
= adler32(0, data
, BLK_SIZE
);
195 i
= HASH(val
, hash_shift
);
196 for (entry
= index
->hash
[i
]; entry
; entry
= entry
->next
) {
197 const unsigned char *ref
= entry
->ptr
;
198 const unsigned char *src
= data
;
199 unsigned int ref_size
= ref_top
- ref
;
200 if (entry
->val
!= val
)
202 if (ref_size
> top
- src
)
203 ref_size
= top
- src
;
204 if (ref_size
> 0x10000)
206 if (ref_size
<= msize
)
208 while (ref_size
-- && *src
++ == *ref
)
210 if (msize
< ref
- entry
->ptr
) {
211 /* this is our best match so far */
212 msize
= ref
- entry
->ptr
;
213 moff
= entry
->ptr
- ref_data
;
217 if (!msize
|| msize
< COPYOP_SIZE(moff
, msize
)) {
220 out
[outpos
++] = *data
++;
222 if (inscnt
== 0x7f) {
223 out
[outpos
- inscnt
- 1] = inscnt
;
230 while (moff
&& ref_data
[moff
-1] == data
[-1]) {
231 if (msize
== 0x10000)
233 /* we can match one byte back */
240 outpos
--; /* remove count slot */
241 inscnt
--; /* make it -1 */
244 out
[outpos
- inscnt
- 1] = inscnt
;
252 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x01; }
254 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x02; }
256 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x04; }
258 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x08; }
260 if (msize
& 0xff) { out
[outpos
++] = msize
; i
|= 0x10; }
262 if (msize
& 0xff) { out
[outpos
++] = msize
; i
|= 0x20; }
267 if (outpos
>= outsize
- MAX_OP_SIZE
) {
269 outsize
= outsize
* 3 / 2;
270 if (max_size
&& outsize
>= max_size
)
271 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
272 if (max_size
&& outpos
> max_size
)
275 out
= realloc(out
, outsize
);
284 out
[outpos
- inscnt
- 1] = inscnt
;
286 *delta_size
= outpos
;