More performance optimizations in crcsync
[httpd-crcsyncproxy.git] / ccan / crcsync / crcsync.c
blob6df7836cd3cd2818a636a8649765168a54f3a138
1 #include "crcsync.h"
2 #include <ccan/crc/crc.h>
3 #include <string.h>
4 #include <assert.h>
5 #include <stdbool.h>
7 /* FIXME: That 64-bit CRC takes a while to warm the lower bits. Do
8 * some quantitative tests and replace it? Meanwhile, use upper bits. */
9 static uint64_t mask_of(unsigned int crcbits)
11 return -1ULL << (64 - crcbits);
14 void crc_of_blocks(const void *data, size_t len, unsigned int normal_block_size,
15 unsigned int crcbits, bool merge_trailing_bytes_in_last_block, uint64_t crc[])
17 unsigned int nblocks = len/normal_block_size;
18 unsigned int i;
19 const uint8_t *buf = data;
20 uint64_t crcmask = mask_of(crcbits);
22 if (len%normal_block_size && !merge_trailing_bytes_in_last_block)
23 nblocks++;
25 for (i = 0; i < nblocks - 1; i++) {
26 crc[i] = (crc64_iso(0, buf, normal_block_size) & crcmask);
27 buf += normal_block_size;
28 len -= normal_block_size;
30 crc[i] = (crc64_iso(0, buf, len) & crcmask);
33 struct crc_hash_record {
34 uint64_t crc;
35 int value;
38 struct crc_hash_table {
39 unsigned mask;
40 struct crc_hash_record *records;
43 struct crc_context {
44 const uint64_t *crc64_iso_tab;
45 size_t normal_block_size;
46 size_t tail_block_size;
47 size_t max_block_size;
48 uint64_t crcmask;
50 /* Saved old buffer bytes (max_block_size bytes). */
51 void *buffer;
52 size_t buffer_size;
53 void *buffer_end; /* invariant to be enforced in code: buffer_end = buffer + buffer_size */
55 /* Progress so far. */
56 uint64_t running_normal_crc;
57 uint64_t running_tail_crc;;
58 size_t literal_bytes;
59 size_t total_bytes;
60 int have_match;
62 /* Uncrc tab. */
63 uint64_t normal_uncrc_tab[256];
64 uint64_t tail_uncrc_tab[256];
66 /* last CRC is special */
67 uint64_t tail_crc;
68 /* This doesn't count the last CRC. */
69 unsigned int num_crcs;
70 struct crc_hash_table crcs;
73 static uint64_t crc64_over_zeros(const uint64_t *crc64_iso_tab, uint64_t crc, int size)
75 while (size--)
77 crc = crc64_iso_tab[crc & 0xFFL] ^ (crc >> 8);
79 return crc;
82 /* Initialize one table that is used to calculate how the crc changes when we take a give
83 * char out of the crc'd area. This function is to be used when there is no tail block */
84 static void init_uncrc_tab(const uint64_t *crc64_iso_tab, uint64_t uncrc_tab[], unsigned int wsize)
86 unsigned int i;
87 uint64_t part_crc;
89 part_crc = crc64_over_zeros(crc64_iso_tab, 0, wsize-1);
90 for (i=0; i < 256; i++)
91 uncrc_tab[i] = crc64_over_zeros(crc64_iso_tab, crc64_iso_tab[i], wsize-1) ^ part_crc;
94 /* Initialize two tables that are used to calculate how the crc changes when we take a give
95 * char out of the crc'd area. This function is to be used when there is a tail block.
96 * The function initializes one table for the tail block and another one for the normal block.
97 * You must pass the params for the smalles block first followed by the params for the largest block */
98 static void init_uncrc_tabs(const uint64_t *crc64_iso_tab, uint64_t small_uncrc_tab[], unsigned int small_wsize, uint64_t large_uncrc_tab[], unsigned int large_wsize)
100 unsigned int i;
101 unsigned int delta_wsize = large_wsize - small_wsize;
102 uint64_t small_part_crc;
103 uint64_t large_part_crc;
104 uint64_t crc;
106 small_part_crc = crc64_over_zeros(crc64_iso_tab, 0, small_wsize-1);
107 large_part_crc = crc64_over_zeros(crc64_iso_tab, small_part_crc, delta_wsize);
108 for (i=0; i < 256; i++) {
109 crc = crc64_over_zeros(crc64_iso_tab, crc64_iso_tab[i], small_wsize-1);
110 small_uncrc_tab[i] = crc ^ small_part_crc;
111 crc = crc64_over_zeros(crc64_iso_tab, crc, delta_wsize);
112 large_uncrc_tab[i] = crc ^ large_part_crc;
116 static unsigned crc_hashtable_getpos(const struct crc_hash_table *crcs, uint64_t crc)
118 unsigned mask = crcs->mask;
119 struct crc_hash_record *records = crcs->records;
120 unsigned pos = (crc >> 32) & mask; // Use highest 32 bits of the checksum as start position
121 unsigned step = (1 + (crc & 0x1e)); // Step with an odd-number of steps, exact value depends on crc lowest 5 bits
123 while (records[pos].value != -1 && records[pos].crc != crc)
125 // This position is already taken by another crc record. Go to next position
126 pos = (pos + step) & mask;
128 return pos;
131 static void crc_hashtable_put(struct crc_hash_table *crcs, uint64_t crc, int value)
133 unsigned pos = crc_hashtable_getpos(crcs, crc);
134 crcs->records[pos].value = value;
135 crcs->records[pos].crc = crc;
138 static int crc_hashtable_get(const struct crc_hash_table *crcs, uint64_t crc)
140 unsigned pos = crc_hashtable_getpos(crcs, crc);
141 // Found an empty position (with value -1) or found the entry for the requested CRC
142 return crcs->records[pos].value;
145 struct crc_context *crc_context_new(size_t normal_block_size, unsigned crcbits,
146 const uint64_t crc[], unsigned num_crcs,
147 size_t tail_block_size)
149 struct crc_context *ctx;
151 assert(num_crcs > 0);
152 assert(normal_block_size > 0);
153 assert(tail_block_size >= 0);
155 ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs);
156 if (ctx) {
157 ctx->crc64_iso_tab = crc64_iso_table();
158 ctx->normal_block_size = normal_block_size;
159 if (tail_block_size == normal_block_size)
161 tail_block_size = 0; // treat a tail block with normal block size as a normal block
163 ctx->tail_block_size = tail_block_size;
164 ctx->max_block_size = (tail_block_size > normal_block_size) ?
165 tail_block_size : normal_block_size;
166 if (tail_block_size)
167 ctx->tail_crc = crc[--num_crcs];
169 ctx->crcmask = mask_of(crcbits);
170 ctx->num_crcs = num_crcs;
171 unsigned crc_hashtable_size = 4;
172 while (crc_hashtable_size < 2*num_crcs)
174 crc_hashtable_size <<= 1;
176 ctx->crcs.mask = crc_hashtable_size-1;
177 ctx->crcs.records = malloc(sizeof(struct crc_hash_record)*crc_hashtable_size);
178 unsigned cnt;
179 for (cnt=0; cnt != crc_hashtable_size; cnt++)
181 ctx->crcs.records[cnt].value = -1;
183 for (cnt=0; cnt != num_crcs; cnt++)
185 crc_hashtable_put(&ctx->crcs, crc[cnt], cnt);
187 // memcpy(ctx->crc, crc, sizeof(crc[0])*num_crcs);
188 ctx->running_normal_crc = 0;
189 ctx->literal_bytes = 0;
190 ctx->total_bytes = 0;
191 ctx->have_match = -1;
192 if (tail_block_size)
194 if (tail_block_size < normal_block_size)
195 init_uncrc_tabs(ctx->crc64_iso_tab, ctx->tail_uncrc_tab, tail_block_size, ctx->normal_uncrc_tab, normal_block_size);
196 else
197 init_uncrc_tabs(ctx->crc64_iso_tab, ctx->normal_uncrc_tab, normal_block_size, ctx->tail_uncrc_tab, tail_block_size);
199 else
201 init_uncrc_tab(ctx->crc64_iso_tab, ctx->normal_uncrc_tab, normal_block_size);
204 ctx->buffer = malloc(ctx->max_block_size);
205 if (!ctx->buffer) {
206 free(ctx);
207 ctx = NULL;
209 else {
210 ctx->buffer_size = 0;
211 ctx->buffer_end = ctx->buffer;
214 return ctx;
217 /* Return -1 or index into matching crc. */
218 /* Only invoke once you have read enough literal bytes! */
219 static int crc_matches(const struct crc_context *ctx)
221 return crc_hashtable_get(&ctx->crcs, ctx->running_normal_crc & ctx->crcmask);
224 /* Return -1 or index of tail crc */
225 /* Only invoke once you have read enough literal bytes! */
226 static int tail_matches(const struct crc_context *ctx)
228 return (ctx->running_tail_crc & ctx->crcmask) == ctx->tail_crc ? ctx->num_crcs : -1;
231 static uint64_t crc_add_byte(const uint64_t *crc64_iso_tab, uint64_t crc, uint8_t newbyte)
233 return crc64_iso_tab[(crc ^ newbyte) & 0xFFL] ^ (crc >> 8);
236 static uint64_t crc_remove_byte(uint64_t crc, uint8_t oldbyte,
237 const uint64_t uncrc_tab[])
239 return crc ^ uncrc_tab[oldbyte];
242 static uint64_t crc_roll(const uint64_t *crc64_iso_tab, uint64_t crc, uint8_t oldbyte, uint8_t newbyte,
243 const uint64_t uncrc_tab[])
245 return crc_add_byte(crc64_iso_tab, crc_remove_byte(crc, oldbyte, uncrc_tab), newbyte);
248 enum RB_PHASE { non_rolling, only_tail_rolling, only_normal_rolling, both_rolling };
249 #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
251 size_t crc_read_block(struct crc_context *ctx, long *result,
252 const void *buf, size_t buflen)
254 size_t consumed = 0, len;
255 int crcmatch = -1;
256 const uint8_t *normal_old, *tail_old, *get_pos = buf;
257 enum RB_PHASE phase;
259 /* Simple optimization, if we found a match last time. */
260 if (ctx->have_match >= 0) {
261 crcmatch = ctx->have_match;
262 goto have_match;
265 /* normal_old is the trailing edge of the normal checksum window. */
266 if (ctx->buffer_size >= ctx->normal_block_size)
267 normal_old = ctx->buffer_end - ctx->normal_block_size;
268 else
269 normal_old = NULL;
271 /* tail_old is the trailing edge of the tail checksum window. */
272 if (ctx->tail_block_size != 0 && ctx->buffer_size >= ctx->tail_block_size)
273 tail_old = ctx->buffer_end - ctx->tail_block_size;
274 else
275 tail_old = NULL;
277 if (normal_old == NULL && tail_old == NULL)
278 phase = non_rolling;
279 else
280 if (normal_old == NULL)
281 phase = only_tail_rolling;
282 else
283 if (tail_old == NULL)
284 phase = only_normal_rolling;
285 else
286 phase = both_rolling;
287 while (consumed != buflen && crcmatch == -1) {
288 size_t old_consumed = consumed;
289 switch (phase)
291 case non_rolling:
293 size_t nbytes = MIN(buflen - consumed, MIN(ctx->normal_block_size - ctx->literal_bytes, ctx->tail_block_size - ctx->literal_bytes));
294 ctx->running_tail_crc = ctx->running_normal_crc = crc64_iso(ctx->running_normal_crc, get_pos, nbytes);
295 consumed += nbytes;
296 ctx->literal_bytes += nbytes;
297 get_pos += nbytes;
298 if (ctx->literal_bytes == ctx->normal_block_size) {
299 /* Reached the end of a normal block. Check CRC
300 and start rolling the CRC at next iteration */
301 if ((crcmatch = crc_matches(ctx)) != -1)
302 break;
303 normal_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
304 phase = only_normal_rolling;
306 else if (ctx->literal_bytes == ctx->tail_block_size) {
307 /* Reached the end of a tail block. Check tail CRC
308 and start rolling the CRC at next iteration */
309 if ((crcmatch = tail_matches(ctx)) != -1)
310 break;
311 tail_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
312 phase = only_tail_rolling;
315 break;
316 case only_normal_rolling:
317 while (consumed != buflen)
319 consumed++;
320 ctx->literal_bytes++;
321 ctx->running_normal_crc = crc_roll(ctx->crc64_iso_tab,
322 ctx->running_normal_crc,
323 *normal_old, *get_pos,
324 ctx->normal_uncrc_tab);
325 if ((crcmatch = crc_matches(ctx)) != -1)
326 break;
327 /* Advance trailing pointer for normal CRC */
328 if (++normal_old == ctx->buffer_end)
329 normal_old = buf;
330 if (ctx->tail_block_size) {
331 ctx->running_tail_crc = crc_add_byte(ctx->crc64_iso_tab, ctx->running_tail_crc, *get_pos++);
332 if (ctx->literal_bytes == ctx->tail_block_size)
334 if ((crcmatch = tail_matches(ctx)) != -1)
335 break;
336 tail_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
337 phase = both_rolling;
338 break;
341 else
342 get_pos++;
344 break;
345 case only_tail_rolling:
346 while (consumed != buflen)
348 consumed++;
349 ctx->literal_bytes++;
350 ctx->running_tail_crc = crc_roll(ctx->crc64_iso_tab,
351 ctx->running_tail_crc,
352 *tail_old, *get_pos,
353 ctx->tail_uncrc_tab);
354 if ((crcmatch = tail_matches(ctx)) != -1)
355 break;
356 /* Advance trailing pointer for tail CRC */
357 if (++tail_old == ctx->buffer_end)
358 tail_old = buf;
359 ctx->running_normal_crc = crc_add_byte(ctx->crc64_iso_tab, ctx->running_normal_crc, *get_pos++);
360 if (ctx->literal_bytes == ctx->normal_block_size)
362 if ((crcmatch = crc_matches(ctx)) != -1)
363 break;
364 normal_old = (ctx->buffer_size != 0) ? ctx->buffer : buf;
365 phase = both_rolling;
366 break;
369 break;
370 case both_rolling:
371 while (consumed != buflen)
373 consumed++;
374 ctx->running_normal_crc = crc_roll(ctx->crc64_iso_tab,
375 ctx->running_normal_crc,
376 *normal_old, *get_pos,
377 ctx->normal_uncrc_tab);
378 if ((crcmatch = crc_matches(ctx)) != -1)
379 break;
380 /* Advance trailing pointer for normal CRC */
381 if (++normal_old == ctx->buffer_end)
382 normal_old = buf;
383 ctx->running_tail_crc = crc_roll(ctx->crc64_iso_tab,
384 ctx->running_tail_crc,
385 *tail_old, *get_pos,
386 ctx->tail_uncrc_tab);
387 if ((crcmatch = tail_matches(ctx)) != -1)
388 break;
389 /* Advance trailing pointer for tail CRC */
390 if (++tail_old == ctx->buffer_end)
391 tail_old = buf;
392 get_pos++;
394 ctx->literal_bytes += (consumed - old_consumed);
395 break;
397 ctx->total_bytes += (consumed - old_consumed);
400 if (crcmatch >= 0) {
401 /* We have a match! */
402 size_t matched_block_size = (crcmatch == ctx->num_crcs) ?
403 ctx->tail_block_size : ctx->normal_block_size;
404 if (ctx->literal_bytes > matched_block_size) {
405 /* Output literal first. */
406 *result = ctx->literal_bytes - matched_block_size;
407 ctx->literal_bytes = matched_block_size;
408 /* Remember for next time! */
409 ctx->have_match = crcmatch;
410 } else {
411 have_match:
412 *result = -crcmatch-1;
413 if (crcmatch == ctx->num_crcs)
414 assert(ctx->literal_bytes == ctx->tail_block_size);
415 else
416 assert(ctx->literal_bytes == ctx->normal_block_size);
417 ctx->literal_bytes = 0;
418 ctx->have_match = -1;
419 ctx->running_normal_crc = 0;
420 ctx->running_tail_crc = 0;
421 /* Nothing more in the buffer. */
422 ctx->buffer_size = 0;
423 ctx->buffer_end = ctx->buffer;
425 } else {
426 /* Output literal if it's more than 1 block ago and
427 keep exactly one block of data for future matching. */
428 if (ctx->literal_bytes > ctx->max_block_size) {
429 *result = ctx->literal_bytes - ctx->max_block_size;
430 ctx->literal_bytes = ctx->max_block_size;
431 /* Advance buffer. */
432 if (*result >= ctx->buffer_size) {
433 ctx->buffer_size = 0;
434 ctx->buffer_end = ctx->buffer;
436 else
438 memmove(ctx->buffer, ctx->buffer + *result,
439 ctx->buffer_size);
440 ctx->buffer_size -= *result;
441 ctx->buffer_end -= *result;
443 } else
444 *result = 0;
446 /* Now save any literal bytes we'll need in future. */
447 len = ctx->literal_bytes - ctx->buffer_size;
448 memcpy(ctx->buffer_end, buf + buflen - len, len);
449 ctx->buffer_size += len;
450 ctx->buffer_end += len;
451 assert(ctx->buffer_size <= ctx->max_block_size);
453 return consumed;
456 long crc_read_flush(struct crc_context *ctx)
458 long ret;
460 /* We might have ended right on a matched block. */
461 if (ctx->have_match != -1) {
462 size_t matched_block_size = (ctx->have_match == ctx->num_crcs) ?
463 ctx->tail_block_size : ctx->normal_block_size;
464 ctx->literal_bytes -= matched_block_size;
465 assert(ctx->literal_bytes == 0);
466 ret = -ctx->have_match-1;
467 ctx->have_match = -1;
468 ctx->running_normal_crc = 0;
469 ctx->running_tail_crc = 0;
470 /* Nothing more in the buffer. */
471 ctx->buffer_size = 0;
472 ctx->buffer_end = ctx->buffer;
473 return ret;
476 /* The rest is just a literal. */
477 ret = ctx->buffer_size;
478 assert(ctx->literal_bytes == ret);
479 ctx->buffer_size = 0;
480 ctx->buffer_end = ctx->buffer;
481 ctx->literal_bytes = 0;
482 return ret;
485 void crc_reset_running_crcs(struct crc_context *ctx)
487 ctx->running_normal_crc = 0;
488 ctx->running_tail_crc = 0;
492 * crc_context_free - free a context returned from crc_context_new.
493 * @ctx: the context returned from crc_context_new, or NULL.
495 void crc_context_free(struct crc_context *ctx)
497 free(ctx->buffer);
498 free(ctx);