2 #include <ccan/crc/crc.h>
7 /* FIXME: That 64-bit CRC takes a while to warm the lower bits. Do
8 * some quantitative tests and replace it? Meanwhile, use upper bits. */
9 static uint64_t mask_of(unsigned int crcbits
)
11 return -1ULL << (64 - crcbits
);
14 void crc_of_blocks(const void *data
, size_t len
, unsigned int normal_block_size
,
15 unsigned int crcbits
, bool merge_trailing_bytes_in_last_block
, uint64_t crc
[])
17 unsigned int nblocks
= len
/normal_block_size
;
19 const uint8_t *buf
= data
;
20 uint64_t crcmask
= mask_of(crcbits
);
22 if (len
%normal_block_size
&& !merge_trailing_bytes_in_last_block
)
25 for (i
= 0; i
< nblocks
- 1; i
++) {
26 crc
[i
] = (crc64_iso(0, buf
, normal_block_size
) & crcmask
);
27 buf
+= normal_block_size
;
28 len
-= normal_block_size
;
30 crc
[i
] = (crc64_iso(0, buf
, len
) & crcmask
);
33 struct crc_hash_record
{
38 struct crc_hash_table
{
40 struct crc_hash_record
*records
;
44 const uint64_t *crc64_iso_tab
;
45 size_t normal_block_size
;
46 size_t tail_block_size
;
47 size_t max_block_size
;
50 /* Saved old buffer bytes (max_block_size bytes). */
53 void *buffer_end
; /* invariant to be enforced in code: buffer_end = buffer + buffer_size */
55 /* Progress so far. */
56 uint64_t running_normal_crc
;
57 uint64_t running_tail_crc
;;
63 uint64_t normal_uncrc_tab
[256];
64 uint64_t tail_uncrc_tab
[256];
66 /* last CRC is special */
68 /* This doesn't count the last CRC. */
69 unsigned int num_crcs
;
70 struct crc_hash_table crcs
;
73 static uint64_t crc64_over_zeros(const uint64_t *crc64_iso_tab
, uint64_t crc
, int size
)
77 crc
= crc64_iso_tab
[crc
& 0xFFL
] ^ (crc
>> 8);
82 /* Initialize one table that is used to calculate how the crc changes when we take a give
83 * char out of the crc'd area. This function is to be used when there is no tail block */
84 static void init_uncrc_tab(const uint64_t *crc64_iso_tab
, uint64_t uncrc_tab
[], unsigned int wsize
)
89 part_crc
= crc64_over_zeros(crc64_iso_tab
, 0, wsize
-1);
90 for (i
=0; i
< 256; i
++)
91 uncrc_tab
[i
] = crc64_over_zeros(crc64_iso_tab
, crc64_iso_tab
[i
], wsize
-1) ^ part_crc
;
94 /* Initialize two tables that are used to calculate how the crc changes when we take a give
95 * char out of the crc'd area. This function is to be used when there is a tail block.
96 * The function initializes one table for the tail block and another one for the normal block.
97 * You must pass the params for the smalles block first followed by the params for the largest block */
98 static void init_uncrc_tabs(const uint64_t *crc64_iso_tab
, uint64_t small_uncrc_tab
[], unsigned int small_wsize
, uint64_t large_uncrc_tab
[], unsigned int large_wsize
)
101 unsigned int delta_wsize
= large_wsize
- small_wsize
;
102 uint64_t small_part_crc
;
103 uint64_t large_part_crc
;
106 small_part_crc
= crc64_over_zeros(crc64_iso_tab
, 0, small_wsize
-1);
107 large_part_crc
= crc64_over_zeros(crc64_iso_tab
, small_part_crc
, delta_wsize
);
108 for (i
=0; i
< 256; i
++) {
109 crc
= crc64_over_zeros(crc64_iso_tab
, crc64_iso_tab
[i
], small_wsize
-1);
110 small_uncrc_tab
[i
] = crc
^ small_part_crc
;
111 crc
= crc64_over_zeros(crc64_iso_tab
, crc
, delta_wsize
);
112 large_uncrc_tab
[i
] = crc
^ large_part_crc
;
116 static unsigned crc_hashtable_getpos(const struct crc_hash_table
*crcs
, uint64_t crc
)
118 unsigned mask
= crcs
->mask
;
119 struct crc_hash_record
*records
= crcs
->records
;
120 unsigned pos
= (crc
>> 32) & mask
; // Use highest 32 bits of the checksum as start position
121 unsigned step
= (1 + (crc
& 0x1e)); // Step with an odd-number of steps, exact value depends on crc lowest 5 bits
123 while (records
[pos
].value
!= -1 && records
[pos
].crc
!= crc
)
125 // This position is already taken by another crc record. Go to next position
126 pos
= (pos
+ step
) & mask
;
131 static void crc_hashtable_put(struct crc_hash_table
*crcs
, uint64_t crc
, int value
)
133 unsigned pos
= crc_hashtable_getpos(crcs
, crc
);
134 crcs
->records
[pos
].value
= value
;
135 crcs
->records
[pos
].crc
= crc
;
138 static int crc_hashtable_get(const struct crc_hash_table
*crcs
, uint64_t crc
)
140 unsigned pos
= crc_hashtable_getpos(crcs
, crc
);
141 // Found an empty position (with value -1) or found the entry for the requested CRC
142 return crcs
->records
[pos
].value
;
145 struct crc_context
*crc_context_new(size_t normal_block_size
, unsigned crcbits
,
146 const uint64_t crc
[], unsigned num_crcs
,
147 size_t tail_block_size
)
149 struct crc_context
*ctx
;
151 assert(num_crcs
> 0);
152 assert(normal_block_size
> 0);
153 assert(tail_block_size
>= 0);
155 ctx
= malloc(sizeof(*ctx
) + sizeof(crc
[0])*num_crcs
);
157 ctx
->crc64_iso_tab
= crc64_iso_table();
158 ctx
->normal_block_size
= normal_block_size
;
159 if (tail_block_size
== normal_block_size
)
161 tail_block_size
= 0; // treat a tail block with normal block size as a normal block
163 ctx
->tail_block_size
= tail_block_size
;
164 ctx
->max_block_size
= (tail_block_size
> normal_block_size
) ?
165 tail_block_size
: normal_block_size
;
167 ctx
->tail_crc
= crc
[--num_crcs
];
169 ctx
->crcmask
= mask_of(crcbits
);
170 ctx
->num_crcs
= num_crcs
;
171 unsigned crc_hashtable_size
= 4;
172 while (crc_hashtable_size
< 2*num_crcs
)
174 crc_hashtable_size
<<= 1;
176 ctx
->crcs
.mask
= crc_hashtable_size
-1;
177 ctx
->crcs
.records
= malloc(sizeof(struct crc_hash_record
)*crc_hashtable_size
);
179 for (cnt
=0; cnt
!= crc_hashtable_size
; cnt
++)
181 ctx
->crcs
.records
[cnt
].value
= -1;
183 for (cnt
=0; cnt
!= num_crcs
; cnt
++)
185 crc_hashtable_put(&ctx
->crcs
, crc
[cnt
], cnt
);
187 // memcpy(ctx->crc, crc, sizeof(crc[0])*num_crcs);
188 ctx
->running_normal_crc
= 0;
189 ctx
->literal_bytes
= 0;
190 ctx
->total_bytes
= 0;
191 ctx
->have_match
= -1;
194 if (tail_block_size
< normal_block_size
)
195 init_uncrc_tabs(ctx
->crc64_iso_tab
, ctx
->tail_uncrc_tab
, tail_block_size
, ctx
->normal_uncrc_tab
, normal_block_size
);
197 init_uncrc_tabs(ctx
->crc64_iso_tab
, ctx
->normal_uncrc_tab
, normal_block_size
, ctx
->tail_uncrc_tab
, tail_block_size
);
201 init_uncrc_tab(ctx
->crc64_iso_tab
, ctx
->normal_uncrc_tab
, normal_block_size
);
204 ctx
->buffer
= malloc(ctx
->max_block_size
);
210 ctx
->buffer_size
= 0;
211 ctx
->buffer_end
= ctx
->buffer
;
217 /* Return -1 or index into matching crc. */
218 /* Only invoke once you have read enough literal bytes! */
219 static int crc_matches(const struct crc_context
*ctx
)
221 return crc_hashtable_get(&ctx
->crcs
, ctx
->running_normal_crc
& ctx
->crcmask
);
224 /* Return -1 or index of tail crc */
225 /* Only invoke once you have read enough literal bytes! */
226 static int tail_matches(const struct crc_context
*ctx
)
228 return (ctx
->running_tail_crc
& ctx
->crcmask
) == ctx
->tail_crc
? ctx
->num_crcs
: -1;
231 static uint64_t crc_add_byte(const uint64_t *crc64_iso_tab
, uint64_t crc
, uint8_t newbyte
)
233 return crc64_iso_tab
[(crc
^ newbyte
) & 0xFFL
] ^ (crc
>> 8);
236 static uint64_t crc_remove_byte(uint64_t crc
, uint8_t oldbyte
,
237 const uint64_t uncrc_tab
[])
239 return crc
^ uncrc_tab
[oldbyte
];
242 static uint64_t crc_roll(const uint64_t *crc64_iso_tab
, uint64_t crc
, uint8_t oldbyte
, uint8_t newbyte
,
243 const uint64_t uncrc_tab
[])
245 return crc_add_byte(crc64_iso_tab
, crc_remove_byte(crc
, oldbyte
, uncrc_tab
), newbyte
);
248 enum RB_PHASE
{ non_rolling
, only_tail_rolling
, only_normal_rolling
, both_rolling
};
249 #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
251 size_t crc_read_block(struct crc_context
*ctx
, long *result
,
252 const void *buf
, size_t buflen
)
254 size_t consumed
= 0, len
;
256 const uint8_t *normal_old
, *tail_old
, *get_pos
= buf
;
259 /* Simple optimization, if we found a match last time. */
260 if (ctx
->have_match
>= 0) {
261 crcmatch
= ctx
->have_match
;
265 /* normal_old is the trailing edge of the normal checksum window. */
266 if (ctx
->buffer_size
>= ctx
->normal_block_size
)
267 normal_old
= ctx
->buffer_end
- ctx
->normal_block_size
;
271 /* tail_old is the trailing edge of the tail checksum window. */
272 if (ctx
->tail_block_size
!= 0 && ctx
->buffer_size
>= ctx
->tail_block_size
)
273 tail_old
= ctx
->buffer_end
- ctx
->tail_block_size
;
277 if (normal_old
== NULL
&& tail_old
== NULL
)
280 if (normal_old
== NULL
)
281 phase
= only_tail_rolling
;
283 if (tail_old
== NULL
)
284 phase
= only_normal_rolling
;
286 phase
= both_rolling
;
287 while (consumed
!= buflen
&& crcmatch
== -1) {
288 size_t old_consumed
= consumed
;
293 size_t nbytes
= MIN(buflen
- consumed
, MIN(ctx
->normal_block_size
- ctx
->literal_bytes
, ctx
->tail_block_size
- ctx
->literal_bytes
));
294 ctx
->running_tail_crc
= ctx
->running_normal_crc
= crc64_iso(ctx
->running_normal_crc
, get_pos
, nbytes
);
296 ctx
->literal_bytes
+= nbytes
;
298 if (ctx
->literal_bytes
== ctx
->normal_block_size
) {
299 /* Reached the end of a normal block. Check CRC
300 and start rolling the CRC at next iteration */
301 if ((crcmatch
= crc_matches(ctx
)) != -1)
303 normal_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
304 phase
= only_normal_rolling
;
306 else if (ctx
->literal_bytes
== ctx
->tail_block_size
) {
307 /* Reached the end of a tail block. Check tail CRC
308 and start rolling the CRC at next iteration */
309 if ((crcmatch
= tail_matches(ctx
)) != -1)
311 tail_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
312 phase
= only_tail_rolling
;
316 case only_normal_rolling
:
317 while (consumed
!= buflen
)
320 ctx
->literal_bytes
++;
321 ctx
->running_normal_crc
= crc_roll(ctx
->crc64_iso_tab
,
322 ctx
->running_normal_crc
,
323 *normal_old
, *get_pos
,
324 ctx
->normal_uncrc_tab
);
325 if ((crcmatch
= crc_matches(ctx
)) != -1)
327 /* Advance trailing pointer for normal CRC */
328 if (++normal_old
== ctx
->buffer_end
)
330 if (ctx
->tail_block_size
) {
331 ctx
->running_tail_crc
= crc_add_byte(ctx
->crc64_iso_tab
, ctx
->running_tail_crc
, *get_pos
++);
332 if (ctx
->literal_bytes
== ctx
->tail_block_size
)
334 if ((crcmatch
= tail_matches(ctx
)) != -1)
336 tail_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
337 phase
= both_rolling
;
345 case only_tail_rolling
:
346 while (consumed
!= buflen
)
349 ctx
->literal_bytes
++;
350 ctx
->running_tail_crc
= crc_roll(ctx
->crc64_iso_tab
,
351 ctx
->running_tail_crc
,
353 ctx
->tail_uncrc_tab
);
354 if ((crcmatch
= tail_matches(ctx
)) != -1)
356 /* Advance trailing pointer for tail CRC */
357 if (++tail_old
== ctx
->buffer_end
)
359 ctx
->running_normal_crc
= crc_add_byte(ctx
->crc64_iso_tab
, ctx
->running_normal_crc
, *get_pos
++);
360 if (ctx
->literal_bytes
== ctx
->normal_block_size
)
362 if ((crcmatch
= crc_matches(ctx
)) != -1)
364 normal_old
= (ctx
->buffer_size
!= 0) ? ctx
->buffer
: buf
;
365 phase
= both_rolling
;
371 while (consumed
!= buflen
)
374 ctx
->running_normal_crc
= crc_roll(ctx
->crc64_iso_tab
,
375 ctx
->running_normal_crc
,
376 *normal_old
, *get_pos
,
377 ctx
->normal_uncrc_tab
);
378 if ((crcmatch
= crc_matches(ctx
)) != -1)
380 /* Advance trailing pointer for normal CRC */
381 if (++normal_old
== ctx
->buffer_end
)
383 ctx
->running_tail_crc
= crc_roll(ctx
->crc64_iso_tab
,
384 ctx
->running_tail_crc
,
386 ctx
->tail_uncrc_tab
);
387 if ((crcmatch
= tail_matches(ctx
)) != -1)
389 /* Advance trailing pointer for tail CRC */
390 if (++tail_old
== ctx
->buffer_end
)
394 ctx
->literal_bytes
+= (consumed
- old_consumed
);
397 ctx
->total_bytes
+= (consumed
- old_consumed
);
401 /* We have a match! */
402 size_t matched_block_size
= (crcmatch
== ctx
->num_crcs
) ?
403 ctx
->tail_block_size
: ctx
->normal_block_size
;
404 if (ctx
->literal_bytes
> matched_block_size
) {
405 /* Output literal first. */
406 *result
= ctx
->literal_bytes
- matched_block_size
;
407 ctx
->literal_bytes
= matched_block_size
;
408 /* Remember for next time! */
409 ctx
->have_match
= crcmatch
;
412 *result
= -crcmatch
-1;
413 if (crcmatch
== ctx
->num_crcs
)
414 assert(ctx
->literal_bytes
== ctx
->tail_block_size
);
416 assert(ctx
->literal_bytes
== ctx
->normal_block_size
);
417 ctx
->literal_bytes
= 0;
418 ctx
->have_match
= -1;
419 ctx
->running_normal_crc
= 0;
420 ctx
->running_tail_crc
= 0;
421 /* Nothing more in the buffer. */
422 ctx
->buffer_size
= 0;
423 ctx
->buffer_end
= ctx
->buffer
;
426 /* Output literal if it's more than 1 block ago and
427 keep exactly one block of data for future matching. */
428 if (ctx
->literal_bytes
> ctx
->max_block_size
) {
429 *result
= ctx
->literal_bytes
- ctx
->max_block_size
;
430 ctx
->literal_bytes
= ctx
->max_block_size
;
431 /* Advance buffer. */
432 if (*result
>= ctx
->buffer_size
) {
433 ctx
->buffer_size
= 0;
434 ctx
->buffer_end
= ctx
->buffer
;
438 memmove(ctx
->buffer
, ctx
->buffer
+ *result
,
440 ctx
->buffer_size
-= *result
;
441 ctx
->buffer_end
-= *result
;
446 /* Now save any literal bytes we'll need in future. */
447 len
= ctx
->literal_bytes
- ctx
->buffer_size
;
448 memcpy(ctx
->buffer_end
, buf
+ buflen
- len
, len
);
449 ctx
->buffer_size
+= len
;
450 ctx
->buffer_end
+= len
;
451 assert(ctx
->buffer_size
<= ctx
->max_block_size
);
456 long crc_read_flush(struct crc_context
*ctx
)
460 /* We might have ended right on a matched block. */
461 if (ctx
->have_match
!= -1) {
462 size_t matched_block_size
= (ctx
->have_match
== ctx
->num_crcs
) ?
463 ctx
->tail_block_size
: ctx
->normal_block_size
;
464 ctx
->literal_bytes
-= matched_block_size
;
465 assert(ctx
->literal_bytes
== 0);
466 ret
= -ctx
->have_match
-1;
467 ctx
->have_match
= -1;
468 ctx
->running_normal_crc
= 0;
469 ctx
->running_tail_crc
= 0;
470 /* Nothing more in the buffer. */
471 ctx
->buffer_size
= 0;
472 ctx
->buffer_end
= ctx
->buffer
;
476 /* The rest is just a literal. */
477 ret
= ctx
->buffer_size
;
478 assert(ctx
->literal_bytes
== ret
);
479 ctx
->buffer_size
= 0;
480 ctx
->buffer_end
= ctx
->buffer
;
481 ctx
->literal_bytes
= 0;
485 void crc_reset_running_crcs(struct crc_context
*ctx
)
487 ctx
->running_normal_crc
= 0;
488 ctx
->running_tail_crc
= 0;
492 * crc_context_free - free a context returned from crc_context_new.
493 * @ctx: the context returned from crc_context_new, or NULL.
495 void crc_context_free(struct crc_context
*ctx
)