reftable/block: reuse zstream when writing log blocks
[alt-git.git] / reftable / block.c
blob9129305515efe8c5296b4dc9e5fdcd64e2f451b5
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "block.h"
11 #include "blocksource.h"
12 #include "constants.h"
13 #include "record.h"
14 #include "reftable-error.h"
15 #include "system.h"
16 #include <zlib.h>
18 int header_size(int version)
20 switch (version) {
21 case 1:
22 return 24;
23 case 2:
24 return 28;
26 abort();
29 int footer_size(int version)
31 switch (version) {
32 case 1:
33 return 68;
34 case 2:
35 return 72;
37 abort();
40 static int block_writer_register_restart(struct block_writer *w, int n,
41 int is_restart, struct strbuf *key)
43 int rlen = w->restart_len;
44 if (rlen >= MAX_RESTARTS) {
45 is_restart = 0;
48 if (is_restart) {
49 rlen++;
51 if (2 + 3 * rlen + n > w->block_size - w->next)
52 return -1;
53 if (is_restart) {
54 REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
55 w->restarts[w->restart_len++] = w->next;
58 w->next += n;
60 strbuf_reset(&w->last_key);
61 strbuf_addbuf(&w->last_key, key);
62 w->entries++;
63 return 0;
66 void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
67 uint32_t block_size, uint32_t header_off, int hash_size)
69 bw->buf = buf;
70 bw->hash_size = hash_size;
71 bw->block_size = block_size;
72 bw->header_off = header_off;
73 bw->buf[header_off] = typ;
74 bw->next = header_off + 4;
75 bw->restart_interval = 16;
76 bw->entries = 0;
77 bw->restart_len = 0;
78 bw->last_key.len = 0;
79 if (!bw->zstream) {
80 REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
81 deflateInit(bw->zstream, 9);
85 uint8_t block_writer_type(struct block_writer *bw)
87 return bw->buf[bw->header_off];
90 /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
91 success. Returns REFTABLE_API_ERROR if attempting to write a record with
92 empty key. */
93 int block_writer_add(struct block_writer *w, struct reftable_record *rec)
95 struct strbuf empty = STRBUF_INIT;
96 struct strbuf last =
97 w->entries % w->restart_interval == 0 ? empty : w->last_key;
98 struct string_view out = {
99 .buf = w->buf + w->next,
100 .len = w->block_size - w->next,
103 struct string_view start = out;
105 int is_restart = 0;
106 struct strbuf key = STRBUF_INIT;
107 int n = 0;
108 int err = -1;
110 reftable_record_key(rec, &key);
111 if (!key.len) {
112 err = REFTABLE_API_ERROR;
113 goto done;
116 n = reftable_encode_key(&is_restart, out, last, key,
117 reftable_record_val_type(rec));
118 if (n < 0)
119 goto done;
120 string_view_consume(&out, n);
122 n = reftable_record_encode(rec, out, w->hash_size);
123 if (n < 0)
124 goto done;
125 string_view_consume(&out, n);
127 err = block_writer_register_restart(w, start.len - out.len, is_restart,
128 &key);
129 done:
130 strbuf_release(&key);
131 return err;
134 int block_writer_finish(struct block_writer *w)
136 int i;
137 for (i = 0; i < w->restart_len; i++) {
138 put_be24(w->buf + w->next, w->restarts[i]);
139 w->next += 3;
142 put_be16(w->buf + w->next, w->restart_len);
143 w->next += 2;
144 put_be24(w->buf + 1 + w->header_off, w->next);
147 * Log records are stored zlib-compressed. Note that the compression
148 * also spans over the restart points we have just written.
150 if (block_writer_type(w) == BLOCK_TYPE_LOG) {
151 int block_header_skip = 4 + w->header_off;
152 uLongf src_len = w->next - block_header_skip, compressed_len;
153 unsigned char *compressed;
154 int ret;
156 ret = deflateReset(w->zstream);
157 if (ret != Z_OK)
158 return REFTABLE_ZLIB_ERROR;
161 * Precompute the upper bound of how many bytes the compressed
162 * data may end up with. Combined with `Z_FINISH`, `deflate()`
163 * is guaranteed to return `Z_STREAM_END`.
165 compressed_len = deflateBound(w->zstream, src_len);
166 REFTABLE_ALLOC_ARRAY(compressed, compressed_len);
168 w->zstream->next_out = compressed;
169 w->zstream->avail_out = compressed_len;
170 w->zstream->next_in = w->buf + block_header_skip;
171 w->zstream->avail_in = src_len;
174 * We want to perform all decompression in a single step, which
175 * is why we can pass Z_FINISH here. As we have precomputed the
176 * deflated buffer's size via `deflateBound()` this function is
177 * guaranteed to succeed according to the zlib documentation.
179 ret = deflate(w->zstream, Z_FINISH);
180 if (ret != Z_STREAM_END) {
181 reftable_free(compressed);
182 return REFTABLE_ZLIB_ERROR;
186 * Overwrite the uncompressed data we have already written and
187 * adjust the `next` pointer to point right after the
188 * compressed data.
190 memcpy(w->buf + block_header_skip, compressed,
191 w->zstream->total_out);
192 w->next = w->zstream->total_out + block_header_skip;
194 reftable_free(compressed);
197 return w->next;
200 uint8_t block_reader_type(struct block_reader *r)
202 return r->block.data[r->header_off];
205 int block_reader_init(struct block_reader *br, struct reftable_block *block,
206 uint32_t header_off, uint32_t table_block_size,
207 int hash_size)
209 uint32_t full_block_size = table_block_size;
210 uint8_t typ = block->data[header_off];
211 uint32_t sz = get_be24(block->data + header_off + 1);
212 int err = 0;
213 uint16_t restart_count = 0;
214 uint32_t restart_start = 0;
215 uint8_t *restart_bytes = NULL;
216 uint8_t *uncompressed = NULL;
218 if (!reftable_is_block_type(typ)) {
219 err = REFTABLE_FORMAT_ERROR;
220 goto done;
223 if (typ == BLOCK_TYPE_LOG) {
224 int block_header_skip = 4 + header_off;
225 uLongf dst_len = sz - block_header_skip; /* total size of dest
226 buffer. */
227 uLongf src_len = block->len - block_header_skip;
229 /* Log blocks specify the *uncompressed* size in their header. */
230 REFTABLE_ALLOC_ARRAY(uncompressed, sz);
232 /* Copy over the block header verbatim. It's not compressed. */
233 memcpy(uncompressed, block->data, block_header_skip);
235 /* Uncompress */
236 if (Z_OK !=
237 uncompress2(uncompressed + block_header_skip, &dst_len,
238 block->data + block_header_skip, &src_len)) {
239 err = REFTABLE_ZLIB_ERROR;
240 goto done;
243 if (dst_len + block_header_skip != sz) {
244 err = REFTABLE_FORMAT_ERROR;
245 goto done;
248 /* We're done with the input data. */
249 reftable_block_done(block);
250 block->data = uncompressed;
251 uncompressed = NULL;
252 block->len = sz;
253 block->source = malloc_block_source();
254 full_block_size = src_len + block_header_skip;
255 } else if (full_block_size == 0) {
256 full_block_size = sz;
257 } else if (sz < full_block_size && sz < block->len &&
258 block->data[sz] != 0) {
259 /* If the block is smaller than the full block size, it is
260 padded (data followed by '\0') or the next block is
261 unaligned. */
262 full_block_size = sz;
265 restart_count = get_be16(block->data + sz - 2);
266 restart_start = sz - 2 - 3 * restart_count;
267 restart_bytes = block->data + restart_start;
269 /* transfer ownership. */
270 br->block = *block;
271 block->data = NULL;
272 block->len = 0;
274 br->hash_size = hash_size;
275 br->block_len = restart_start;
276 br->full_block_size = full_block_size;
277 br->header_off = header_off;
278 br->restart_count = restart_count;
279 br->restart_bytes = restart_bytes;
281 done:
282 reftable_free(uncompressed);
283 return err;
286 static uint32_t block_reader_restart_offset(struct block_reader *br, int i)
288 return get_be24(br->restart_bytes + 3 * i);
291 void block_reader_start(struct block_reader *br, struct block_iter *it)
293 it->br = br;
294 strbuf_reset(&it->last_key);
295 it->next_off = br->header_off + 4;
298 struct restart_find_args {
299 int error;
300 struct strbuf key;
301 struct block_reader *r;
304 static int restart_key_less(size_t idx, void *args)
306 struct restart_find_args *a = args;
307 uint32_t off = block_reader_restart_offset(a->r, idx);
308 struct string_view in = {
309 .buf = a->r->block.data + off,
310 .len = a->r->block_len - off,
313 /* the restart key is verbatim in the block, so this could avoid the
314 alloc for decoding the key */
315 struct strbuf rkey = STRBUF_INIT;
316 uint8_t unused_extra;
317 int n = reftable_decode_key(&rkey, &unused_extra, in);
318 int result;
319 if (n < 0) {
320 a->error = 1;
321 return -1;
324 result = strbuf_cmp(&a->key, &rkey);
325 strbuf_release(&rkey);
326 return result < 0;
329 void block_iter_copy_from(struct block_iter *dest, struct block_iter *src)
331 dest->br = src->br;
332 dest->next_off = src->next_off;
333 strbuf_reset(&dest->last_key);
334 strbuf_addbuf(&dest->last_key, &src->last_key);
337 int block_iter_next(struct block_iter *it, struct reftable_record *rec)
339 struct string_view in = {
340 .buf = it->br->block.data + it->next_off,
341 .len = it->br->block_len - it->next_off,
343 struct string_view start = in;
344 uint8_t extra = 0;
345 int n = 0;
347 if (it->next_off >= it->br->block_len)
348 return 1;
350 n = reftable_decode_key(&it->last_key, &extra, in);
351 if (n < 0)
352 return -1;
353 if (!it->last_key.len)
354 return REFTABLE_FORMAT_ERROR;
356 string_view_consume(&in, n);
357 n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size,
358 &it->scratch);
359 if (n < 0)
360 return -1;
361 string_view_consume(&in, n);
363 it->next_off += start.len - in.len;
364 return 0;
367 int block_reader_first_key(struct block_reader *br, struct strbuf *key)
369 int off = br->header_off + 4, n;
370 struct string_view in = {
371 .buf = br->block.data + off,
372 .len = br->block_len - off,
374 uint8_t extra = 0;
376 strbuf_reset(key);
378 n = reftable_decode_key(key, &extra, in);
379 if (n < 0)
380 return n;
381 if (!key->len)
382 return REFTABLE_FORMAT_ERROR;
384 return 0;
387 int block_iter_seek(struct block_iter *it, struct strbuf *want)
389 return block_reader_seek(it->br, it, want);
392 void block_iter_close(struct block_iter *it)
394 strbuf_release(&it->last_key);
395 strbuf_release(&it->scratch);
398 int block_reader_seek(struct block_reader *br, struct block_iter *it,
399 struct strbuf *want)
401 struct restart_find_args args = {
402 .key = *want,
403 .r = br,
405 struct block_iter next = BLOCK_ITER_INIT;
406 struct reftable_record rec;
407 int err = 0, i;
409 if (args.error) {
410 err = REFTABLE_FORMAT_ERROR;
411 goto done;
414 i = binsearch(br->restart_count, &restart_key_less, &args);
415 if (i > 0)
416 it->next_off = block_reader_restart_offset(br, i - 1);
417 else
418 it->next_off = br->header_off + 4;
419 it->br = br;
421 reftable_record_init(&rec, block_reader_type(br));
423 /* We're looking for the last entry less/equal than the wanted key, so
424 we have to go one entry too far and then back up.
426 while (1) {
427 block_iter_copy_from(&next, it);
428 err = block_iter_next(&next, &rec);
429 if (err < 0)
430 goto done;
432 reftable_record_key(&rec, &it->last_key);
433 if (err > 0 || strbuf_cmp(&it->last_key, want) >= 0) {
434 err = 0;
435 goto done;
438 block_iter_copy_from(it, &next);
441 done:
442 block_iter_close(&next);
443 reftable_record_release(&rec);
445 return err;
448 void block_writer_release(struct block_writer *bw)
450 deflateEnd(bw->zstream);
451 FREE_AND_NULL(bw->zstream);
452 FREE_AND_NULL(bw->restarts);
453 strbuf_release(&bw->last_key);
454 /* the block is not owned. */
457 void reftable_block_done(struct reftable_block *blockp)
459 struct reftable_block_source source = blockp->source;
460 if (blockp && source.ops)
461 source.ops->return_block(source.arg, blockp);
462 blockp->data = NULL;
463 blockp->len = 0;
464 blockp->source.ops = NULL;
465 blockp->source.arg = NULL;