2 * diff-delta.c: generate a delta between two buffers
4 * Many parts of this file have been lifted from LibXDiff version 0.10.
5 * http://www.xmailserver.org/xdiff-lib.html
7 * LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
8 * Copyright (C) 2003 Davide Libenzi
10 * Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
12 * This file is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Use of this within git automatically means that the LGPL
18 * licensing gets turned into GPLv2 within this project.
25 #include "git-compat-util.h"
27 /* maximum hash entry list for the same hash bucket */
30 #define RABIN_SHIFT 23
31 #define RABIN_WINDOW 16
33 static const unsigned int T
[256] = {
34 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
35 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
36 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
37 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
38 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
39 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
40 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
41 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
42 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
43 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
44 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
45 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
46 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
47 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
48 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
49 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
50 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
51 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
52 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
53 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
54 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
55 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
56 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
57 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
58 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
59 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
60 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
61 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
62 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
63 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
64 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
65 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
66 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
67 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
68 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
69 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
70 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
71 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
72 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
73 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
74 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
75 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
76 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
79 static const unsigned int U
[256] = {
80 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
81 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
82 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
83 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
84 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
85 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
86 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
87 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
88 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
89 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
90 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
91 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
92 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
93 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
94 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
95 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
96 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
97 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
98 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
99 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
100 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
101 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
102 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
103 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
104 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
105 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
106 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
107 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
108 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
109 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
110 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
111 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
112 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
113 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
114 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
115 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
116 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
117 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
118 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
119 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
120 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
121 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
122 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
126 const unsigned char *ptr
;
128 struct index_entry
*next
;
133 unsigned long src_size
;
134 unsigned int hash_mask
;
135 struct index_entry
*hash
[FLEX_ARRAY
];
138 struct delta_index
* create_delta_index(const void *buf
, unsigned long bufsize
)
140 unsigned int i
, hsize
, hmask
, entries
, prev_val
, *hash_count
;
141 const unsigned char *data
, *buffer
= buf
;
142 struct delta_index
*index
;
143 struct index_entry
*entry
, **hash
;
145 unsigned long memsize
;
147 if (!buf
|| !bufsize
)
150 /* Determine index hash size. Note that indexing skips the
151 first byte to allow for optimizing the rabin polynomial
152 initialization in create_delta(). */
153 entries
= (bufsize
- 1) / RABIN_WINDOW
;
155 for (i
= 4; (1 << i
) < hsize
&& i
< 31; i
++);
159 /* allocate lookup index */
160 memsize
= sizeof(*index
) +
161 sizeof(*hash
) * hsize
+
162 sizeof(*entry
) * entries
;
163 mem
= malloc(memsize
);
172 index
->src_buf
= buf
;
173 index
->src_size
= bufsize
;
174 index
->hash_mask
= hmask
;
175 memset(hash
, 0, hsize
* sizeof(*hash
));
177 /* allocate an array to count hash entries */
178 hash_count
= calloc(hsize
, sizeof(*hash_count
));
184 /* then populate the index */
186 for (data
= buffer
+ entries
* RABIN_WINDOW
- RABIN_WINDOW
;
188 data
-= RABIN_WINDOW
) {
189 unsigned int val
= 0;
190 for (i
= 1; i
<= RABIN_WINDOW
; i
++)
191 val
= ((val
<< 8) | data
[i
]) ^ T
[val
>> RABIN_SHIFT
];
192 if (val
== prev_val
) {
193 /* keep the lowest of consecutive identical blocks */
194 entry
[-1].ptr
= data
+ RABIN_WINDOW
;
198 entry
->ptr
= data
+ RABIN_WINDOW
;
200 entry
->next
= hash
[i
];
207 * Determine a limit on the number of entries in the same hash
208 * bucket. This guard us against patological data sets causing
209 * really bad hash distribution with most entries in the same hash
210 * bucket that would bring us to O(m*n) computing costs (m and n
211 * corresponding to reference and target buffer sizes).
213 * Make sure none of the hash buckets has more entries than
214 * we're willing to test. Otherwise we cull the entry list
215 * uniformly to still preserve a good repartition across
216 * the reference buffer.
218 for (i
= 0; i
< hsize
; i
++) {
219 if (hash_count
[i
] < HASH_LIMIT
)
223 struct index_entry
*keep
= entry
;
224 int skip
= hash_count
[i
] / HASH_LIMIT
/ 2;
227 } while(--skip
&& entry
);
236 void free_delta_index(struct delta_index
*index
)
242 * The maximum size for any opcode sequence, including the initial header
243 * plus rabin window plus biggest copy.
245 #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
248 create_delta(const struct delta_index
*index
,
249 const void *trg_buf
, unsigned long trg_size
,
250 unsigned long *delta_size
, unsigned long max_size
)
252 unsigned int i
, outpos
, outsize
, val
;
254 const unsigned char *ref_data
, *ref_top
, *data
, *top
;
257 if (!trg_buf
|| !trg_size
)
262 if (max_size
&& outsize
>= max_size
)
263 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
264 out
= malloc(outsize
);
268 /* store reference buffer size */
271 out
[outpos
++] = i
| 0x80;
276 /* store target buffer size */
279 out
[outpos
++] = i
| 0x80;
284 ref_data
= index
->src_buf
;
285 ref_top
= ref_data
+ index
->src_size
;
287 top
= trg_buf
+ trg_size
;
291 for (i
= 0; i
< RABIN_WINDOW
&& data
< top
; i
++, data
++) {
292 out
[outpos
++] = *data
;
293 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
298 unsigned int moff
= 0, msize
= 0;
299 struct index_entry
*entry
;
300 val
^= U
[data
[-RABIN_WINDOW
]];
301 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
302 i
= val
& index
->hash_mask
;
303 for (entry
= index
->hash
[i
]; entry
; entry
= entry
->next
) {
304 const unsigned char *ref
= entry
->ptr
;
305 const unsigned char *src
= data
;
306 unsigned int ref_size
= ref_top
- ref
;
307 if (entry
->val
!= val
)
309 if (ref_size
> top
- src
)
310 ref_size
= top
- src
;
311 if (ref_size
> 0x10000)
313 if (ref_size
<= msize
)
315 while (ref_size
-- && *src
++ == *ref
)
317 if (msize
< ref
- entry
->ptr
) {
318 /* this is our best match so far */
319 msize
= ref
- entry
->ptr
;
320 moff
= entry
->ptr
- ref_data
;
327 out
[outpos
++] = *data
++;
329 if (inscnt
== 0x7f) {
330 out
[outpos
- inscnt
- 1] = inscnt
;
336 if (msize
>= RABIN_WINDOW
) {
337 const unsigned char *sk
;
338 sk
= data
+ msize
- RABIN_WINDOW
;
340 for (i
= 0; i
< RABIN_WINDOW
; i
++)
341 val
= ((val
<< 8) | *sk
++) ^ T
[val
>> RABIN_SHIFT
];
343 const unsigned char *sk
= data
+ 1;
344 for (i
= 1; i
< msize
; i
++) {
345 val
^= U
[sk
[-RABIN_WINDOW
]];
346 val
= ((val
<< 8) | *sk
++) ^ T
[val
>> RABIN_SHIFT
];
351 while (moff
&& ref_data
[moff
-1] == data
[-1]) {
352 if (msize
== 0x10000)
354 /* we can match one byte back */
361 outpos
--; /* remove count slot */
362 inscnt
--; /* make it -1 */
365 out
[outpos
- inscnt
- 1] = inscnt
;
373 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x01; }
375 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x02; }
377 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x04; }
379 if (moff
& 0xff) { out
[outpos
++] = moff
; i
|= 0x08; }
381 if (msize
& 0xff) { out
[outpos
++] = msize
; i
|= 0x10; }
383 if (msize
& 0xff) { out
[outpos
++] = msize
; i
|= 0x20; }
388 if (outpos
>= outsize
- MAX_OP_SIZE
) {
390 outsize
= outsize
* 3 / 2;
391 if (max_size
&& outsize
>= max_size
)
392 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
393 if (max_size
&& outpos
> max_size
)
395 out
= realloc(out
, outsize
);
404 out
[outpos
- inscnt
- 1] = inscnt
;
406 if (max_size
&& outpos
> max_size
) {
411 *delta_size
= outpos
;