color: add support for 12-bit RGB colors
[git/gitster.git] / reftable / block.c
blob3e87460cba96e34b63ec3848fc017c57fc747902
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "block.h"
11 #include "blocksource.h"
12 #include "constants.h"
13 #include "record.h"
14 #include "reftable-error.h"
15 #include "system.h"
16 #include <zlib.h>
18 int header_size(int version)
20 switch (version) {
21 case 1:
22 return 24;
23 case 2:
24 return 28;
26 abort();
29 int footer_size(int version)
31 switch (version) {
32 case 1:
33 return 68;
34 case 2:
35 return 72;
37 abort();
40 static int block_writer_register_restart(struct block_writer *w, int n,
41 int is_restart, struct strbuf *key)
43 int rlen = w->restart_len;
44 if (rlen >= MAX_RESTARTS) {
45 is_restart = 0;
48 if (is_restart) {
49 rlen++;
51 if (2 + 3 * rlen + n > w->block_size - w->next)
52 return -1;
53 if (is_restart) {
54 REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
55 w->restarts[w->restart_len++] = w->next;
58 w->next += n;
60 strbuf_reset(&w->last_key);
61 strbuf_addbuf(&w->last_key, key);
62 w->entries++;
63 return 0;
66 void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
67 uint32_t block_size, uint32_t header_off, int hash_size)
69 bw->buf = buf;
70 bw->hash_size = hash_size;
71 bw->block_size = block_size;
72 bw->header_off = header_off;
73 bw->buf[header_off] = typ;
74 bw->next = header_off + 4;
75 bw->restart_interval = 16;
76 bw->entries = 0;
77 bw->restart_len = 0;
78 bw->last_key.len = 0;
81 uint8_t block_writer_type(struct block_writer *bw)
83 return bw->buf[bw->header_off];
86 /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
87 success. Returns REFTABLE_API_ERROR if attempting to write a record with
88 empty key. */
89 int block_writer_add(struct block_writer *w, struct reftable_record *rec)
91 struct strbuf empty = STRBUF_INIT;
92 struct strbuf last =
93 w->entries % w->restart_interval == 0 ? empty : w->last_key;
94 struct string_view out = {
95 .buf = w->buf + w->next,
96 .len = w->block_size - w->next,
99 struct string_view start = out;
101 int is_restart = 0;
102 struct strbuf key = STRBUF_INIT;
103 int n = 0;
104 int err = -1;
106 reftable_record_key(rec, &key);
107 if (!key.len) {
108 err = REFTABLE_API_ERROR;
109 goto done;
112 n = reftable_encode_key(&is_restart, out, last, key,
113 reftable_record_val_type(rec));
114 if (n < 0)
115 goto done;
116 string_view_consume(&out, n);
118 n = reftable_record_encode(rec, out, w->hash_size);
119 if (n < 0)
120 goto done;
121 string_view_consume(&out, n);
123 err = block_writer_register_restart(w, start.len - out.len, is_restart,
124 &key);
125 done:
126 strbuf_release(&key);
127 return err;
130 int block_writer_finish(struct block_writer *w)
132 int i;
133 for (i = 0; i < w->restart_len; i++) {
134 put_be24(w->buf + w->next, w->restarts[i]);
135 w->next += 3;
138 put_be16(w->buf + w->next, w->restart_len);
139 w->next += 2;
140 put_be24(w->buf + 1 + w->header_off, w->next);
142 if (block_writer_type(w) == BLOCK_TYPE_LOG) {
143 int block_header_skip = 4 + w->header_off;
144 uLongf src_len = w->next - block_header_skip;
145 uLongf dest_cap = src_len * 1.001 + 12;
146 uint8_t *compressed;
148 REFTABLE_ALLOC_ARRAY(compressed, dest_cap);
150 while (1) {
151 uLongf out_dest_len = dest_cap;
152 int zresult = compress2(compressed, &out_dest_len,
153 w->buf + block_header_skip,
154 src_len, 9);
155 if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) {
156 dest_cap *= 2;
157 compressed =
158 reftable_realloc(compressed, dest_cap);
159 if (compressed)
160 continue;
163 if (Z_OK != zresult) {
164 reftable_free(compressed);
165 return REFTABLE_ZLIB_ERROR;
168 memcpy(w->buf + block_header_skip, compressed,
169 out_dest_len);
170 w->next = out_dest_len + block_header_skip;
171 reftable_free(compressed);
172 break;
175 return w->next;
178 int block_reader_init(struct block_reader *br, struct reftable_block *block,
179 uint32_t header_off, uint32_t table_block_size,
180 int hash_size)
182 uint32_t full_block_size = table_block_size;
183 uint8_t typ = block->data[header_off];
184 uint32_t sz = get_be24(block->data + header_off + 1);
185 int err = 0;
186 uint16_t restart_count = 0;
187 uint32_t restart_start = 0;
188 uint8_t *restart_bytes = NULL;
190 reftable_block_done(&br->block);
192 if (!reftable_is_block_type(typ)) {
193 err = REFTABLE_FORMAT_ERROR;
194 goto done;
197 if (typ == BLOCK_TYPE_LOG) {
198 uint32_t block_header_skip = 4 + header_off;
199 uLong dst_len = sz - block_header_skip;
200 uLong src_len = block->len - block_header_skip;
202 /* Log blocks specify the *uncompressed* size in their header. */
203 REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
204 br->uncompressed_cap);
206 /* Copy over the block header verbatim. It's not compressed. */
207 memcpy(br->uncompressed_data, block->data, block_header_skip);
209 if (!br->zstream) {
210 REFTABLE_CALLOC_ARRAY(br->zstream, 1);
211 err = inflateInit(br->zstream);
212 } else {
213 err = inflateReset(br->zstream);
215 if (err != Z_OK) {
216 err = REFTABLE_ZLIB_ERROR;
217 goto done;
220 br->zstream->next_in = block->data + block_header_skip;
221 br->zstream->avail_in = src_len;
222 br->zstream->next_out = br->uncompressed_data + block_header_skip;
223 br->zstream->avail_out = dst_len;
226 * We know both input as well as output size, and we know that
227 * the sizes should never be bigger than `uInt_MAX` because
228 * blocks can at most be 16MB large. We can thus use `Z_FINISH`
229 * here to instruct zlib to inflate the data in one go, which
230 * is more efficient than using `Z_NO_FLUSH`.
232 err = inflate(br->zstream, Z_FINISH);
233 if (err != Z_STREAM_END) {
234 err = REFTABLE_ZLIB_ERROR;
235 goto done;
237 err = 0;
239 if (br->zstream->total_out + block_header_skip != sz) {
240 err = REFTABLE_FORMAT_ERROR;
241 goto done;
244 /* We're done with the input data. */
245 reftable_block_done(block);
246 block->data = br->uncompressed_data;
247 block->len = sz;
248 full_block_size = src_len + block_header_skip - br->zstream->avail_in;
249 } else if (full_block_size == 0) {
250 full_block_size = sz;
251 } else if (sz < full_block_size && sz < block->len &&
252 block->data[sz] != 0) {
253 /* If the block is smaller than the full block size, it is
254 padded (data followed by '\0') or the next block is
255 unaligned. */
256 full_block_size = sz;
259 restart_count = get_be16(block->data + sz - 2);
260 restart_start = sz - 2 - 3 * restart_count;
261 restart_bytes = block->data + restart_start;
263 /* transfer ownership. */
264 br->block = *block;
265 block->data = NULL;
266 block->len = 0;
268 br->hash_size = hash_size;
269 br->block_len = restart_start;
270 br->full_block_size = full_block_size;
271 br->header_off = header_off;
272 br->restart_count = restart_count;
273 br->restart_bytes = restart_bytes;
275 done:
276 return err;
279 void block_reader_release(struct block_reader *br)
281 inflateEnd(br->zstream);
282 reftable_free(br->zstream);
283 reftable_free(br->uncompressed_data);
284 reftable_block_done(&br->block);
287 uint8_t block_reader_type(const struct block_reader *r)
289 return r->block.data[r->header_off];
292 int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
294 int off = br->header_off + 4, n;
295 struct string_view in = {
296 .buf = br->block.data + off,
297 .len = br->block_len - off,
299 uint8_t extra = 0;
301 strbuf_reset(key);
303 n = reftable_decode_key(key, &extra, in);
304 if (n < 0)
305 return n;
306 if (!key->len)
307 return REFTABLE_FORMAT_ERROR;
309 return 0;
312 static uint32_t block_reader_restart_offset(const struct block_reader *br, int i)
314 return get_be24(br->restart_bytes + 3 * i);
317 void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
319 it->block = br->block.data;
320 it->block_len = br->block_len;
321 it->hash_size = br->hash_size;
322 strbuf_reset(&it->last_key);
323 it->next_off = br->header_off + 4;
326 struct restart_needle_less_args {
327 int error;
328 struct strbuf needle;
329 const struct block_reader *reader;
332 static int restart_needle_less(size_t idx, void *_args)
334 struct restart_needle_less_args *args = _args;
335 uint32_t off = block_reader_restart_offset(args->reader, idx);
336 struct string_view in = {
337 .buf = args->reader->block.data + off,
338 .len = args->reader->block_len - off,
340 uint64_t prefix_len, suffix_len;
341 uint8_t extra;
342 int n;
345 * Records at restart points are stored without prefix compression, so
346 * there is no need to fully decode the record key here. This removes
347 * the need for allocating memory.
349 n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
350 if (n < 0 || prefix_len) {
351 args->error = 1;
352 return -1;
355 string_view_consume(&in, n);
356 if (suffix_len > in.len) {
357 args->error = 1;
358 return -1;
361 n = memcmp(args->needle.buf, in.buf,
362 args->needle.len < suffix_len ? args->needle.len : suffix_len);
363 if (n)
364 return n < 0;
365 return args->needle.len < suffix_len;
368 int block_iter_next(struct block_iter *it, struct reftable_record *rec)
370 struct string_view in = {
371 .buf = (unsigned char *) it->block + it->next_off,
372 .len = it->block_len - it->next_off,
374 struct string_view start = in;
375 uint8_t extra = 0;
376 int n = 0;
378 if (it->next_off >= it->block_len)
379 return 1;
381 n = reftable_decode_key(&it->last_key, &extra, in);
382 if (n < 0)
383 return -1;
384 if (!it->last_key.len)
385 return REFTABLE_FORMAT_ERROR;
387 string_view_consume(&in, n);
388 n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
389 &it->scratch);
390 if (n < 0)
391 return -1;
392 string_view_consume(&in, n);
394 it->next_off += start.len - in.len;
395 return 0;
398 void block_iter_reset(struct block_iter *it)
400 strbuf_reset(&it->last_key);
401 it->next_off = 0;
402 it->block = NULL;
403 it->block_len = 0;
404 it->hash_size = 0;
407 void block_iter_close(struct block_iter *it)
409 strbuf_release(&it->last_key);
410 strbuf_release(&it->scratch);
413 int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
414 struct strbuf *want)
416 struct restart_needle_less_args args = {
417 .needle = *want,
418 .reader = br,
420 struct reftable_record rec;
421 int err = 0;
422 size_t i;
425 * Perform a binary search over the block's restart points, which
426 * avoids doing a linear scan over the whole block. Like this, we
427 * identify the section of the block that should contain our key.
429 * Note that we explicitly search for the first restart point _greater_
430 * than the sought-after record, not _greater or equal_ to it. In case
431 * the sought-after record is located directly at the restart point we
432 * would otherwise start doing the linear search at the preceding
433 * restart point. While that works alright, we would end up scanning
434 * too many record.
436 i = binsearch(br->restart_count, &restart_needle_less, &args);
437 if (args.error) {
438 err = REFTABLE_FORMAT_ERROR;
439 goto done;
443 * Now there are multiple cases:
445 * - `i == 0`: The wanted record is smaller than the record found at
446 * the first restart point. As the first restart point is the first
447 * record in the block, our wanted record cannot be located in this
448 * block at all. We still need to position the iterator so that the
449 * next call to `block_iter_next()` will yield an end-of-iterator
450 * signal.
452 * - `i == restart_count`: The wanted record was not found at any of
453 * the restart points. As there is no restart point at the end of
454 * the section the record may thus be contained in the last block.
456 * - `i > 0`: The wanted record must be contained in the section
457 * before the found restart point. We thus do a linear search
458 * starting from the preceding restart point.
460 if (i > 0)
461 it->next_off = block_reader_restart_offset(br, i - 1);
462 else
463 it->next_off = br->header_off + 4;
464 it->block = br->block.data;
465 it->block_len = br->block_len;
466 it->hash_size = br->hash_size;
468 reftable_record_init(&rec, block_reader_type(br));
471 * We're looking for the last entry less than the wanted key so that
472 * the next call to `block_reader_next()` would yield the wanted
473 * record. We thus don't want to position our reader at the sought
474 * after record, but one before. To do so, we have to go one entry too
475 * far and then back up.
477 while (1) {
478 size_t prev_off = it->next_off;
480 err = block_iter_next(it, &rec);
481 if (err < 0)
482 goto done;
483 if (err > 0) {
484 it->next_off = prev_off;
485 err = 0;
486 goto done;
490 * Check whether the current key is greater or equal to the
491 * sought-after key. In case it is greater we know that the
492 * record does not exist in the block and can thus abort early.
493 * In case it is equal to the sought-after key we have found
494 * the desired record.
496 * Note that we store the next record's key record directly in
497 * `last_key` without restoring the key of the preceding record
498 * in case we need to go one record back. This is safe to do as
499 * `block_iter_next()` would return the ref whose key is equal
500 * to `last_key` now, and naturally all keys share a prefix
501 * with themselves.
503 reftable_record_key(&rec, &it->last_key);
504 if (strbuf_cmp(&it->last_key, want) >= 0) {
505 it->next_off = prev_off;
506 goto done;
510 done:
511 reftable_record_release(&rec);
512 return err;
515 void block_writer_release(struct block_writer *bw)
517 FREE_AND_NULL(bw->restarts);
518 strbuf_release(&bw->last_key);
519 /* the block is not owned. */
522 void reftable_block_done(struct reftable_block *blockp)
524 struct reftable_block_source source = blockp->source;
525 if (blockp && source.ops)
526 source.ops->return_block(source.arg, blockp);
527 blockp->data = NULL;
528 blockp->len = 0;
529 blockp->source.ops = NULL;
530 blockp->source.arg = NULL;