reftable/block.c

   1 /*
   2 Copyright 2020 Google LLC
   3
   4 Use of this source code is governed by a BSD-style
   5 license that can be found in the LICENSE file or at
   6 https://developers.google.com/open-source/licenses/bsd
   7 */
   8
   9 #include "block.h"
  10
  11 #include "blocksource.h"
  12 #include "constants.h"
  13 #include "record.h"
  14 #include "reftable-error.h"
  15 #include "system.h"
  16 #include <zlib.h>
  17
  18 int header_size(int version)
  19 {
  20         switch (version) {
  21         case 1:
  22                 return 24;
  23         case 2:
  24                 return 28;
  25         }
  26         abort();
  27 }
  28
  29 int footer_size(int version)
  30 {
  31         switch (version) {
  32         case 1:
  33                 return 68;
  34         case 2:
  35                 return 72;
  36         }
  37         abort();
  38 }
  39
  40 static int block_writer_register_restart(struct block_writer *w, int n,
  41                                          int is_restart, struct strbuf *key)
  42 {
  43         int rlen = w->restart_len;
  44         if (rlen >= MAX_RESTARTS) {
  45                 is_restart = 0;
  46         }
  47
  48         if (is_restart) {
  49                 rlen++;
  50         }
  51         if (2 + 3 * rlen + n > w->block_size - w->next)
  52                 return -1;
  53         if (is_restart) {
  54                 REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
  55                 w->restarts[w->restart_len++] = w->next;
  56         }
  57
  58         w->next += n;
  59
  60         strbuf_reset(&w->last_key);
  61         strbuf_addbuf(&w->last_key, key);
  62         w->entries++;
  63         return 0;
  64 }
  65
  66 void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
  67                        uint32_t block_size, uint32_t header_off, int hash_size)
  68 {
  69         bw->buf = buf;
  70         bw->hash_size = hash_size;
  71         bw->block_size = block_size;
  72         bw->header_off = header_off;
  73         bw->buf[header_off] = typ;
  74         bw->next = header_off + 4;
  75         bw->restart_interval = 16;
  76         bw->entries = 0;
  77         bw->restart_len = 0;
  78         bw->last_key.len = 0;
  79 }
  80
  81 uint8_t block_writer_type(struct block_writer *bw)
  82 {
  83         return bw->buf[bw->header_off];
  84 }
  85
  86 /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
  87    success. Returns REFTABLE_API_ERROR if attempting to write a record with
  88    empty key. */
  89 int block_writer_add(struct block_writer *w, struct reftable_record *rec)
  90 {
  91         struct strbuf empty = STRBUF_INIT;
  92         struct strbuf last =
  93                 w->entries % w->restart_interval == 0 ? empty : w->last_key;
  94         struct string_view out = {
  95                 .buf = w->buf + w->next,
  96                 .len = w->block_size - w->next,
  97         };
  98
  99         struct string_view start = out;
 100
 101         int is_restart = 0;
 102         struct strbuf key = STRBUF_INIT;
 103         int n = 0;
 104         int err = -1;
 105
 106         reftable_record_key(rec, &key);
 107         if (!key.len) {
 108                 err = REFTABLE_API_ERROR;
 109                 goto done;
 110         }
 111
 112         n = reftable_encode_key(&is_restart, out, last, key,
 113                                 reftable_record_val_type(rec));
 114         if (n < 0)
 115                 goto done;
 116         string_view_consume(&out, n);
 117
 118         n = reftable_record_encode(rec, out, w->hash_size);
 119         if (n < 0)
 120                 goto done;
 121         string_view_consume(&out, n);
 122
 123         err = block_writer_register_restart(w, start.len - out.len, is_restart,
 124                                             &key);
 125 done:
 126         strbuf_release(&key);
 127         return err;
 128 }
 129
 130 int block_writer_finish(struct block_writer *w)
 131 {
 132         int i;
 133         for (i = 0; i < w->restart_len; i++) {
 134                 put_be24(w->buf + w->next, w->restarts[i]);
 135                 w->next += 3;
 136         }
 137
 138         put_be16(w->buf + w->next, w->restart_len);
 139         w->next += 2;
 140         put_be24(w->buf + 1 + w->header_off, w->next);
 141
 142         if (block_writer_type(w) == BLOCK_TYPE_LOG) {
 143                 int block_header_skip = 4 + w->header_off;
 144                 uLongf src_len = w->next - block_header_skip;
 145                 uLongf dest_cap = src_len * 1.001 + 12;
 146                 uint8_t *compressed;
 147
 148                 REFTABLE_ALLOC_ARRAY(compressed, dest_cap);
 149
 150                 while (1) {
 151                         uLongf out_dest_len = dest_cap;
 152                         int zresult = compress2(compressed, &out_dest_len,
 153                                                 w->buf + block_header_skip,
 154                                                 src_len, 9);
 155                         if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) {
 156                                 dest_cap *= 2;
 157                                 compressed =
 158                                         reftable_realloc(compressed, dest_cap);
 159                                 if (compressed)
 160                                         continue;
 161                         }
 162
 163                         if (Z_OK != zresult) {
 164                                 reftable_free(compressed);
 165                                 return REFTABLE_ZLIB_ERROR;
 166                         }
 167
 168                         memcpy(w->buf + block_header_skip, compressed,
 169                                out_dest_len);
 170                         w->next = out_dest_len + block_header_skip;
 171                         reftable_free(compressed);
 172                         break;
 173                 }
 174         }
 175         return w->next;
 176 }
 177
 178 int block_reader_init(struct block_reader *br, struct reftable_block *block,
 179                       uint32_t header_off, uint32_t table_block_size,
 180                       int hash_size)
 181 {
 182         uint32_t full_block_size = table_block_size;
 183         uint8_t typ = block->data[header_off];
 184         uint32_t sz = get_be24(block->data + header_off + 1);
 185         int err = 0;
 186         uint16_t restart_count = 0;
 187         uint32_t restart_start = 0;
 188         uint8_t *restart_bytes = NULL;
 189
 190         reftable_block_done(&br->block);
 191
 192         if (!reftable_is_block_type(typ)) {
 193                 err =  REFTABLE_FORMAT_ERROR;
 194                 goto done;
 195         }
 196
 197         if (typ == BLOCK_TYPE_LOG) {
 198                 uint32_t block_header_skip = 4 + header_off;
 199                 uLong dst_len = sz - block_header_skip;
 200                 uLong src_len = block->len - block_header_skip;
 201
 202                 /* Log blocks specify the *uncompressed* size in their header. */
 203                 REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
 204                                     br->uncompressed_cap);
 205
 206                 /* Copy over the block header verbatim. It's not compressed. */
 207                 memcpy(br->uncompressed_data, block->data, block_header_skip);
 208
 209                 if (!br->zstream) {
 210                         REFTABLE_CALLOC_ARRAY(br->zstream, 1);
 211                         err = inflateInit(br->zstream);
 212                 } else {
 213                         err = inflateReset(br->zstream);
 214                 }
 215                 if (err != Z_OK) {
 216                         err = REFTABLE_ZLIB_ERROR;
 217                         goto done;
 218                 }
 219
 220                 br->zstream->next_in = block->data + block_header_skip;
 221                 br->zstream->avail_in = src_len;
 222                 br->zstream->next_out = br->uncompressed_data + block_header_skip;
 223                 br->zstream->avail_out = dst_len;
 224
 225                 /*
 226                  * We know both input as well as output size, and we know that
 227                  * the sizes should never be bigger than `uInt_MAX` because
 228                  * blocks can at most be 16MB large. We can thus use `Z_FINISH`
 229                  * here to instruct zlib to inflate the data in one go, which
 230                  * is more efficient than using `Z_NO_FLUSH`.
 231                  */
 232                 err = inflate(br->zstream, Z_FINISH);
 233                 if (err != Z_STREAM_END) {
 234                         err = REFTABLE_ZLIB_ERROR;
 235                         goto done;
 236                 }
 237                 err = 0;
 238
 239                 if (br->zstream->total_out + block_header_skip != sz) {
 240                         err = REFTABLE_FORMAT_ERROR;
 241                         goto done;
 242                 }
 243
 244                 /* We're done with the input data. */
 245                 reftable_block_done(block);
 246                 block->data = br->uncompressed_data;
 247                 block->len = sz;
 248                 full_block_size = src_len + block_header_skip - br->zstream->avail_in;
 249         } else if (full_block_size == 0) {
 250                 full_block_size = sz;
 251         } else if (sz < full_block_size && sz < block->len &&
 252                    block->data[sz] != 0) {
 253                 /* If the block is smaller than the full block size, it is
 254                    padded (data followed by '\0') or the next block is
 255                    unaligned. */
 256                 full_block_size = sz;
 257         }
 258
 259         restart_count = get_be16(block->data + sz - 2);
 260         restart_start = sz - 2 - 3 * restart_count;
 261         restart_bytes = block->data + restart_start;
 262
 263         /* transfer ownership. */
 264         br->block = *block;
 265         block->data = NULL;
 266         block->len = 0;
 267
 268         br->hash_size = hash_size;
 269         br->block_len = restart_start;
 270         br->full_block_size = full_block_size;
 271         br->header_off = header_off;
 272         br->restart_count = restart_count;
 273         br->restart_bytes = restart_bytes;
 274
 275 done:
 276         return err;
 277 }
 278
 279 void block_reader_release(struct block_reader *br)
 280 {
 281         inflateEnd(br->zstream);
 282         reftable_free(br->zstream);
 283         reftable_free(br->uncompressed_data);
 284         reftable_block_done(&br->block);
 285 }
 286
 287 uint8_t block_reader_type(const struct block_reader *r)
 288 {
 289         return r->block.data[r->header_off];
 290 }
 291
 292 int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
 293 {
 294         int off = br->header_off + 4, n;
 295         struct string_view in = {
 296                 .buf = br->block.data + off,
 297                 .len = br->block_len - off,
 298         };
 299         uint8_t extra = 0;
 300
 301         strbuf_reset(key);
 302
 303         n = reftable_decode_key(key, &extra, in);
 304         if (n < 0)
 305                 return n;
 306         if (!key->len)
 307                 return REFTABLE_FORMAT_ERROR;
 308
 309         return 0;
 310 }
 311
 312 static uint32_t block_reader_restart_offset(const struct block_reader *br, int i)
 313 {
 314         return get_be24(br->restart_bytes + 3 * i);
 315 }
 316
 317 void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
 318 {
 319         it->block = br->block.data;
 320         it->block_len = br->block_len;
 321         it->hash_size = br->hash_size;
 322         strbuf_reset(&it->last_key);
 323         it->next_off = br->header_off + 4;
 324 }
 325
 326 struct restart_needle_less_args {
 327         int error;
 328         struct strbuf needle;
 329         const struct block_reader *reader;
 330 };
 331
 332 static int restart_needle_less(size_t idx, void *_args)
 333 {
 334         struct restart_needle_less_args *args = _args;
 335         uint32_t off = block_reader_restart_offset(args->reader, idx);
 336         struct string_view in = {
 337                 .buf = args->reader->block.data + off,
 338                 .len = args->reader->block_len - off,
 339         };
 340         uint64_t prefix_len, suffix_len;
 341         uint8_t extra;
 342         int n;
 343
 344         /*
 345          * Records at restart points are stored without prefix compression, so
 346          * there is no need to fully decode the record key here. This removes
 347          * the need for allocating memory.
 348          */
 349         n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
 350         if (n < 0 || prefix_len) {
 351                 args->error = 1;
 352                 return -1;
 353         }
 354
 355         string_view_consume(&in, n);
 356         if (suffix_len > in.len) {
 357                 args->error = 1;
 358                 return -1;
 359         }
 360
 361         n = memcmp(args->needle.buf, in.buf,
 362                    args->needle.len < suffix_len ? args->needle.len : suffix_len);
 363         if (n)
 364                 return n < 0;
 365         return args->needle.len < suffix_len;
 366 }
 367
 368 int block_iter_next(struct block_iter *it, struct reftable_record *rec)
 369 {
 370         struct string_view in = {
 371                 .buf = (unsigned char *) it->block + it->next_off,
 372                 .len = it->block_len - it->next_off,
 373         };
 374         struct string_view start = in;
 375         uint8_t extra = 0;
 376         int n = 0;
 377
 378         if (it->next_off >= it->block_len)
 379                 return 1;
 380
 381         n = reftable_decode_key(&it->last_key, &extra, in);
 382         if (n < 0)
 383                 return -1;
 384         if (!it->last_key.len)
 385                 return REFTABLE_FORMAT_ERROR;
 386
 387         string_view_consume(&in, n);
 388         n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
 389                                    &it->scratch);
 390         if (n < 0)
 391                 return -1;
 392         string_view_consume(&in, n);
 393
 394         it->next_off += start.len - in.len;
 395         return 0;
 396 }
 397
 398 void block_iter_reset(struct block_iter *it)
 399 {
 400         strbuf_reset(&it->last_key);
 401         it->next_off = 0;
 402         it->block = NULL;
 403         it->block_len = 0;
 404         it->hash_size = 0;
 405 }
 406
 407 void block_iter_close(struct block_iter *it)
 408 {
 409         strbuf_release(&it->last_key);
 410         strbuf_release(&it->scratch);
 411 }
 412
 413 int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
 414                         struct strbuf *want)
 415 {
 416         struct restart_needle_less_args args = {
 417                 .needle = *want,
 418                 .reader = br,
 419         };
 420         struct reftable_record rec;
 421         int err = 0;
 422         size_t i;
 423
 424         /*
 425          * Perform a binary search over the block's restart points, which
 426          * avoids doing a linear scan over the whole block. Like this, we
 427          * identify the section of the block that should contain our key.
 428          *
 429          * Note that we explicitly search for the first restart point _greater_
 430          * than the sought-after record, not _greater or equal_ to it. In case
 431          * the sought-after record is located directly at the restart point we
 432          * would otherwise start doing the linear search at the preceding
 433          * restart point. While that works alright, we would end up scanning
 434          * too many record.
 435          */
 436         i = binsearch(br->restart_count, &restart_needle_less, &args);
 437         if (args.error) {
 438                 err = REFTABLE_FORMAT_ERROR;
 439                 goto done;
 440         }
 441
 442         /*
 443          * Now there are multiple cases:
 444          *
 445          *   - `i == 0`: The wanted record is smaller than the record found at
 446          *     the first restart point. As the first restart point is the first
 447          *     record in the block, our wanted record cannot be located in this
 448          *     block at all. We still need to position the iterator so that the
 449          *     next call to `block_iter_next()` will yield an end-of-iterator
 450          *     signal.
 451          *
 452          *   - `i == restart_count`: The wanted record was not found at any of
 453          *     the restart points. As there is no restart point at the end of
 454          *     the section the record may thus be contained in the last block.
 455          *
 456          *   - `i > 0`: The wanted record must be contained in the section
 457          *     before the found restart point. We thus do a linear search
 458          *     starting from the preceding restart point.
 459          */
 460         if (i > 0)
 461                 it->next_off = block_reader_restart_offset(br, i - 1);
 462         else
 463                 it->next_off = br->header_off + 4;
 464         it->block = br->block.data;
 465         it->block_len = br->block_len;
 466         it->hash_size = br->hash_size;
 467
 468         reftable_record_init(&rec, block_reader_type(br));
 469
 470         /*
 471          * We're looking for the last entry less than the wanted key so that
 472          * the next call to `block_reader_next()` would yield the wanted
 473          * record. We thus don't want to position our reader at the sought
 474          * after record, but one before. To do so, we have to go one entry too
 475          * far and then back up.
 476          */
 477         while (1) {
 478                 size_t prev_off = it->next_off;
 479
 480                 err = block_iter_next(it, &rec);
 481                 if (err < 0)
 482                         goto done;
 483                 if (err > 0) {
 484                         it->next_off = prev_off;
 485                         err = 0;
 486                         goto done;
 487                 }
 488
 489                 /*
 490                  * Check whether the current key is greater or equal to the
 491                  * sought-after key. In case it is greater we know that the
 492                  * record does not exist in the block and can thus abort early.
 493                  * In case it is equal to the sought-after key we have found
 494                  * the desired record.
 495                  *
 496                  * Note that we store the next record's key record directly in
 497                  * `last_key` without restoring the key of the preceding record
 498                  * in case we need to go one record back. This is safe to do as
 499                  * `block_iter_next()` would return the ref whose key is equal
 500                  * to `last_key` now, and naturally all keys share a prefix
 501                  * with themselves.
 502                  */
 503                 reftable_record_key(&rec, &it->last_key);
 504                 if (strbuf_cmp(&it->last_key, want) >= 0) {
 505                         it->next_off = prev_off;
 506                         goto done;
 507                 }
 508         }
 509
 510 done:
 511         reftable_record_release(&rec);
 512         return err;
 513 }
 514
 515 void block_writer_release(struct block_writer *bw)
 516 {
 517         FREE_AND_NULL(bw->restarts);
 518         strbuf_release(&bw->last_key);
 519         /* the block is not owned. */
 520 }
 521
 522 void reftable_block_done(struct reftable_block *blockp)
 523 {
 524         struct reftable_block_source source = blockp->source;
 525         if (blockp && source.ops)
 526                 source.ops->return_block(source.arg, blockp);
 527         blockp->data = NULL;
 528         blockp->len = 0;
 529         blockp->source.ops = NULL;
 530         blockp->source.arg = NULL;
 531 }