2 * diff-delta.c: generate a delta between two buffers
4 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
7 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
9 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
14 #include "git-compat-util.h"
17 /* maximum hash entry list for the same hash bucket */
20 #define RABIN_SHIFT 23
21 #define RABIN_WINDOW 16
23 static const unsigned int T
[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
69 static const unsigned int U
[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
116 const unsigned char *ptr
;
118 struct index_entry
*next
;
123 unsigned long src_size
;
124 unsigned int hash_mask
;
125 struct index_entry
*hash
[FLEX_ARRAY
];
128 struct delta_index
* create_delta_index(const void *buf
, unsigned long bufsize
)
130 unsigned int i
, hsize
, hmask
, entries
, prev_val
, *hash_count
;
131 const unsigned char *data
, *buffer
= buf
;
132 struct delta_index
*index
;
133 struct index_entry
*entry
, **hash
;
135 unsigned long memsize
;
137 if (!buf
|| !bufsize
)
140 /* Determine index hash size. Note that indexing skips the
141 first byte to allow for optimizing the Rabin's polynomial
142 initialization in create_delta(). */
143 entries
= (bufsize
- 1) / RABIN_WINDOW
;
145 for (i
= 4; (1u << i
) < hsize
&& i
< 31; i
++);
149 /* allocate lookup index */
150 memsize
= sizeof(*index
) +
151 sizeof(*hash
) * hsize
+
152 sizeof(*entry
) * entries
;
153 mem
= malloc(memsize
);
162 index
->src_buf
= buf
;
163 index
->src_size
= bufsize
;
164 index
->hash_mask
= hmask
;
165 memset(hash
, 0, hsize
* sizeof(*hash
));
167 /* allocate an array to count hash entries */
168 hash_count
= calloc(hsize
, sizeof(*hash_count
));
174 /* then populate the index */
176 for (data
= buffer
+ entries
* RABIN_WINDOW
- RABIN_WINDOW
;
178 data
-= RABIN_WINDOW
) {
179 unsigned int val
= 0;
180 for (i
= 1; i
<= RABIN_WINDOW
; i
++)
181 val
= ((val
<< 8) | data
[i
]) ^ T
[val
>> RABIN_SHIFT
];
182 if (val
== prev_val
) {
183 /* keep the lowest of consecutive identical blocks */
184 entry
[-1].ptr
= data
+ RABIN_WINDOW
;
188 entry
->ptr
= data
+ RABIN_WINDOW
;
190 entry
->next
= hash
[i
];
197 * Determine a limit on the number of entries in the same hash
198 * bucket. This guards us against pathological data sets causing
199 * really bad hash distribution with most entries in the same hash
200 * bucket that would bring us to O(m*n) computing costs (m and n
201 * corresponding to reference and target buffer sizes).
203 * Make sure none of the hash buckets has more entries than
204 * we're willing to test. Otherwise we cull the entry list
205 * uniformly to still preserve a good repartition across
206 * the reference buffer.
208 for (i
= 0; i
< hsize
; i
++) {
209 if (hash_count
[i
] < HASH_LIMIT
)
213 struct index_entry
*keep
= entry
;
214 int skip
= hash_count
[i
] / HASH_LIMIT
/ 2;
217 } while(--skip
&& entry
);
226 void free_delta_index(struct delta_index
*index
)
232 * The maximum size for any opcode sequence, including the initial header
233 * plus Rabin window plus biggest copy.
235 #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
238 create_delta(const struct delta_index
*index
,
239 const void *trg_buf
, unsigned long trg_size
,
240 unsigned long *delta_size
, unsigned long max_size
)
242 unsigned int i
, outpos
, outsize
, moff
, msize
, val
;
244 const unsigned char *ref_data
, *ref_top
, *data
, *top
;
247 if (!trg_buf
|| !trg_size
)
252 if (max_size
&& outsize
>= max_size
)
253 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
254 out
= malloc(outsize
);
258 /* store reference buffer size */
261 out
[outpos
++] = i
| 0x80;
266 /* store target buffer size */
269 out
[outpos
++] = i
| 0x80;
274 ref_data
= index
->src_buf
;
275 ref_top
= ref_data
+ index
->src_size
;
277 top
= (const unsigned char *) trg_buf
+ trg_size
;
281 for (i
= 0; i
< RABIN_WINDOW
&& data
< top
; i
++, data
++) {
282 out
[outpos
++] = *data
;
283 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
291 struct index_entry
*entry
;
292 val
^= U
[data
[-RABIN_WINDOW
]];
293 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
294 i
= val
& index
->hash_mask
;
295 for (entry
= index
->hash
[i
]; entry
; entry
= entry
->next
) {
296 const unsigned char *ref
= entry
->ptr
;
297 const unsigned char *src
= data
;
298 unsigned int ref_size
= ref_top
- ref
;
299 if (entry
->val
!= val
)
301 if (ref_size
> top
- src
)
302 ref_size
= top
- src
;
303 if (ref_size
<= msize
)
305 while (ref_size
-- && *src
++ == *ref
)
307 if (msize
< ref
- entry
->ptr
) {
308 /* this is our best match so far */
309 msize
= ref
- entry
->ptr
;
310 moff
= entry
->ptr
- ref_data
;
311 if (msize
>= 4096) /* good enough */
320 out
[outpos
++] = *data
++;
322 if (inscnt
== 0x7f) {
323 out
[outpos
- inscnt
- 1] = inscnt
;
332 while (moff
&& ref_data
[moff
-1] == data
[-1]) {
333 /* we can match one byte back */
340 outpos
--; /* remove count slot */
341 inscnt
--; /* make it -1 */
344 out
[outpos
- inscnt
- 1] = inscnt
;
348 /* A copy op is currently limited to 64KB (pack v2) */
349 left
= (msize
< 0x10000) ? 0 : (msize
- 0x10000);
355 if (moff
& 0x000000ff)
356 out
[outpos
++] = moff
>> 0, i
|= 0x01;
357 if (moff
& 0x0000ff00)
358 out
[outpos
++] = moff
>> 8, i
|= 0x02;
359 if (moff
& 0x00ff0000)
360 out
[outpos
++] = moff
>> 16, i
|= 0x04;
361 if (moff
& 0xff000000)
362 out
[outpos
++] = moff
>> 24, i
|= 0x08;
365 out
[outpos
++] = msize
>> 0, i
|= 0x10;
367 out
[outpos
++] = msize
>> 8, i
|= 0x20;
378 for (j
= -RABIN_WINDOW
; j
< 0; j
++)
379 val
= ((val
<< 8) | data
[j
])
380 ^ T
[val
>> RABIN_SHIFT
];
384 if (outpos
>= outsize
- MAX_OP_SIZE
) {
386 outsize
= outsize
* 3 / 2;
387 if (max_size
&& outsize
>= max_size
)
388 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
389 if (max_size
&& outpos
> max_size
)
391 out
= realloc(out
, outsize
);
400 out
[outpos
- inscnt
- 1] = inscnt
;
402 if (max_size
&& outpos
> max_size
) {
407 *delta_size
= outpos
;