2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
11 #include "blocksource.h"
12 #include "constants.h"
14 #include "reftable-error.h"
18 int header_size(int version
)
29 int footer_size(int version
)
40 static int block_writer_register_restart(struct block_writer
*w
, int n
,
41 int is_restart
, struct strbuf
*key
)
43 int rlen
= w
->restart_len
;
44 if (rlen
>= MAX_RESTARTS
) {
51 if (2 + 3 * rlen
+ n
> w
->block_size
- w
->next
)
54 REFTABLE_ALLOC_GROW(w
->restarts
, w
->restart_len
+ 1, w
->restart_cap
);
55 w
->restarts
[w
->restart_len
++] = w
->next
;
60 strbuf_reset(&w
->last_key
);
61 strbuf_addbuf(&w
->last_key
, key
);
66 void block_writer_init(struct block_writer
*bw
, uint8_t typ
, uint8_t *buf
,
67 uint32_t block_size
, uint32_t header_off
, int hash_size
)
70 bw
->hash_size
= hash_size
;
71 bw
->block_size
= block_size
;
72 bw
->header_off
= header_off
;
73 bw
->buf
[header_off
] = typ
;
74 bw
->next
= header_off
+ 4;
75 bw
->restart_interval
= 16;
80 REFTABLE_CALLOC_ARRAY(bw
->zstream
, 1);
81 deflateInit(bw
->zstream
, 9);
85 uint8_t block_writer_type(struct block_writer
*bw
)
87 return bw
->buf
[bw
->header_off
];
90 /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
91 success. Returns REFTABLE_API_ERROR if attempting to write a record with
93 int block_writer_add(struct block_writer
*w
, struct reftable_record
*rec
)
95 struct strbuf empty
= STRBUF_INIT
;
97 w
->entries
% w
->restart_interval
== 0 ? empty
: w
->last_key
;
98 struct string_view out
= {
99 .buf
= w
->buf
+ w
->next
,
100 .len
= w
->block_size
- w
->next
,
103 struct string_view start
= out
;
106 struct strbuf key
= STRBUF_INIT
;
110 reftable_record_key(rec
, &key
);
112 err
= REFTABLE_API_ERROR
;
116 n
= reftable_encode_key(&is_restart
, out
, last
, key
,
117 reftable_record_val_type(rec
));
120 string_view_consume(&out
, n
);
122 n
= reftable_record_encode(rec
, out
, w
->hash_size
);
125 string_view_consume(&out
, n
);
127 err
= block_writer_register_restart(w
, start
.len
- out
.len
, is_restart
,
130 strbuf_release(&key
);
134 int block_writer_finish(struct block_writer
*w
)
137 for (i
= 0; i
< w
->restart_len
; i
++) {
138 put_be24(w
->buf
+ w
->next
, w
->restarts
[i
]);
142 put_be16(w
->buf
+ w
->next
, w
->restart_len
);
144 put_be24(w
->buf
+ 1 + w
->header_off
, w
->next
);
147 * Log records are stored zlib-compressed. Note that the compression
148 * also spans over the restart points we have just written.
150 if (block_writer_type(w
) == BLOCK_TYPE_LOG
) {
151 int block_header_skip
= 4 + w
->header_off
;
152 uLongf src_len
= w
->next
- block_header_skip
, compressed_len
;
153 unsigned char *compressed
;
156 ret
= deflateReset(w
->zstream
);
158 return REFTABLE_ZLIB_ERROR
;
161 * Precompute the upper bound of how many bytes the compressed
162 * data may end up with. Combined with `Z_FINISH`, `deflate()`
163 * is guaranteed to return `Z_STREAM_END`.
165 compressed_len
= deflateBound(w
->zstream
, src_len
);
166 REFTABLE_ALLOC_ARRAY(compressed
, compressed_len
);
168 w
->zstream
->next_out
= compressed
;
169 w
->zstream
->avail_out
= compressed_len
;
170 w
->zstream
->next_in
= w
->buf
+ block_header_skip
;
171 w
->zstream
->avail_in
= src_len
;
174 * We want to perform all decompression in a single step, which
175 * is why we can pass Z_FINISH here. As we have precomputed the
176 * deflated buffer's size via `deflateBound()` this function is
177 * guaranteed to succeed according to the zlib documentation.
179 ret
= deflate(w
->zstream
, Z_FINISH
);
180 if (ret
!= Z_STREAM_END
) {
181 reftable_free(compressed
);
182 return REFTABLE_ZLIB_ERROR
;
186 * Overwrite the uncompressed data we have already written and
187 * adjust the `next` pointer to point right after the
190 memcpy(w
->buf
+ block_header_skip
, compressed
,
191 w
->zstream
->total_out
);
192 w
->next
= w
->zstream
->total_out
+ block_header_skip
;
194 reftable_free(compressed
);
200 uint8_t block_reader_type(struct block_reader
*r
)
202 return r
->block
.data
[r
->header_off
];
205 int block_reader_init(struct block_reader
*br
, struct reftable_block
*block
,
206 uint32_t header_off
, uint32_t table_block_size
,
209 uint32_t full_block_size
= table_block_size
;
210 uint8_t typ
= block
->data
[header_off
];
211 uint32_t sz
= get_be24(block
->data
+ header_off
+ 1);
213 uint16_t restart_count
= 0;
214 uint32_t restart_start
= 0;
215 uint8_t *restart_bytes
= NULL
;
216 uint8_t *uncompressed
= NULL
;
218 if (!reftable_is_block_type(typ
)) {
219 err
= REFTABLE_FORMAT_ERROR
;
223 if (typ
== BLOCK_TYPE_LOG
) {
224 int block_header_skip
= 4 + header_off
;
225 uLongf dst_len
= sz
- block_header_skip
; /* total size of dest
227 uLongf src_len
= block
->len
- block_header_skip
;
229 /* Log blocks specify the *uncompressed* size in their header. */
230 REFTABLE_ALLOC_ARRAY(uncompressed
, sz
);
232 /* Copy over the block header verbatim. It's not compressed. */
233 memcpy(uncompressed
, block
->data
, block_header_skip
);
237 uncompress2(uncompressed
+ block_header_skip
, &dst_len
,
238 block
->data
+ block_header_skip
, &src_len
)) {
239 err
= REFTABLE_ZLIB_ERROR
;
243 if (dst_len
+ block_header_skip
!= sz
) {
244 err
= REFTABLE_FORMAT_ERROR
;
248 /* We're done with the input data. */
249 reftable_block_done(block
);
250 block
->data
= uncompressed
;
253 block
->source
= malloc_block_source();
254 full_block_size
= src_len
+ block_header_skip
;
255 } else if (full_block_size
== 0) {
256 full_block_size
= sz
;
257 } else if (sz
< full_block_size
&& sz
< block
->len
&&
258 block
->data
[sz
] != 0) {
259 /* If the block is smaller than the full block size, it is
260 padded (data followed by '\0') or the next block is
262 full_block_size
= sz
;
265 restart_count
= get_be16(block
->data
+ sz
- 2);
266 restart_start
= sz
- 2 - 3 * restart_count
;
267 restart_bytes
= block
->data
+ restart_start
;
269 /* transfer ownership. */
274 br
->hash_size
= hash_size
;
275 br
->block_len
= restart_start
;
276 br
->full_block_size
= full_block_size
;
277 br
->header_off
= header_off
;
278 br
->restart_count
= restart_count
;
279 br
->restart_bytes
= restart_bytes
;
282 reftable_free(uncompressed
);
286 static uint32_t block_reader_restart_offset(struct block_reader
*br
, int i
)
288 return get_be24(br
->restart_bytes
+ 3 * i
);
291 void block_reader_start(struct block_reader
*br
, struct block_iter
*it
)
294 strbuf_reset(&it
->last_key
);
295 it
->next_off
= br
->header_off
+ 4;
298 struct restart_find_args
{
301 struct block_reader
*r
;
304 static int restart_key_less(size_t idx
, void *args
)
306 struct restart_find_args
*a
= args
;
307 uint32_t off
= block_reader_restart_offset(a
->r
, idx
);
308 struct string_view in
= {
309 .buf
= a
->r
->block
.data
+ off
,
310 .len
= a
->r
->block_len
- off
,
313 /* the restart key is verbatim in the block, so this could avoid the
314 alloc for decoding the key */
315 struct strbuf rkey
= STRBUF_INIT
;
316 uint8_t unused_extra
;
317 int n
= reftable_decode_key(&rkey
, &unused_extra
, in
);
324 result
= strbuf_cmp(&a
->key
, &rkey
);
325 strbuf_release(&rkey
);
329 void block_iter_copy_from(struct block_iter
*dest
, struct block_iter
*src
)
332 dest
->next_off
= src
->next_off
;
333 strbuf_reset(&dest
->last_key
);
334 strbuf_addbuf(&dest
->last_key
, &src
->last_key
);
337 int block_iter_next(struct block_iter
*it
, struct reftable_record
*rec
)
339 struct string_view in
= {
340 .buf
= it
->br
->block
.data
+ it
->next_off
,
341 .len
= it
->br
->block_len
- it
->next_off
,
343 struct string_view start
= in
;
347 if (it
->next_off
>= it
->br
->block_len
)
350 n
= reftable_decode_key(&it
->last_key
, &extra
, in
);
353 if (!it
->last_key
.len
)
354 return REFTABLE_FORMAT_ERROR
;
356 string_view_consume(&in
, n
);
357 n
= reftable_record_decode(rec
, it
->last_key
, extra
, in
, it
->br
->hash_size
,
361 string_view_consume(&in
, n
);
363 it
->next_off
+= start
.len
- in
.len
;
367 int block_reader_first_key(struct block_reader
*br
, struct strbuf
*key
)
369 int off
= br
->header_off
+ 4, n
;
370 struct string_view in
= {
371 .buf
= br
->block
.data
+ off
,
372 .len
= br
->block_len
- off
,
378 n
= reftable_decode_key(key
, &extra
, in
);
382 return REFTABLE_FORMAT_ERROR
;
387 int block_iter_seek(struct block_iter
*it
, struct strbuf
*want
)
389 return block_reader_seek(it
->br
, it
, want
);
392 void block_iter_close(struct block_iter
*it
)
394 strbuf_release(&it
->last_key
);
395 strbuf_release(&it
->scratch
);
398 int block_reader_seek(struct block_reader
*br
, struct block_iter
*it
,
401 struct restart_find_args args
= {
405 struct block_iter next
= BLOCK_ITER_INIT
;
406 struct reftable_record rec
;
410 err
= REFTABLE_FORMAT_ERROR
;
414 i
= binsearch(br
->restart_count
, &restart_key_less
, &args
);
416 it
->next_off
= block_reader_restart_offset(br
, i
- 1);
418 it
->next_off
= br
->header_off
+ 4;
421 reftable_record_init(&rec
, block_reader_type(br
));
423 /* We're looking for the last entry less/equal than the wanted key, so
424 we have to go one entry too far and then back up.
427 block_iter_copy_from(&next
, it
);
428 err
= block_iter_next(&next
, &rec
);
432 reftable_record_key(&rec
, &it
->last_key
);
433 if (err
> 0 || strbuf_cmp(&it
->last_key
, want
) >= 0) {
438 block_iter_copy_from(it
, &next
);
442 block_iter_close(&next
);
443 reftable_record_release(&rec
);
448 void block_writer_release(struct block_writer
*bw
)
450 deflateEnd(bw
->zstream
);
451 FREE_AND_NULL(bw
->zstream
);
452 FREE_AND_NULL(bw
->restarts
);
453 strbuf_release(&bw
->last_key
);
454 /* the block is not owned. */
457 void reftable_block_done(struct reftable_block
*blockp
)
459 struct reftable_block_source source
= blockp
->source
;
460 if (blockp
&& source
.ops
)
461 source
.ops
->return_block(source
.arg
, blockp
);
464 blockp
->source
.ops
= NULL
;
465 blockp
->source
.arg
= NULL
;