Merge branch 'ps/reftable-write-optim'
[git.git] / reftable / stack.c
bloba59ebe038d01d215ca0a5a4e9b6d737a6dca344e
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "stack.h"
11 #include "../write-or-die.h"
12 #include "system.h"
13 #include "merged.h"
14 #include "reader.h"
15 #include "reftable-error.h"
16 #include "reftable-generic.h"
17 #include "reftable-record.h"
18 #include "reftable-merged.h"
19 #include "writer.h"
20 #include "tempfile.h"
22 static int stack_try_add(struct reftable_stack *st,
23 int (*write_table)(struct reftable_writer *wr,
24 void *arg),
25 void *arg);
26 static int stack_write_compact(struct reftable_stack *st,
27 struct reftable_writer *wr,
28 size_t first, size_t last,
29 struct reftable_log_expiry_config *config);
30 static void reftable_addition_close(struct reftable_addition *add);
31 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
32 int reuse_open);
34 static void stack_filename(struct strbuf *dest, struct reftable_stack *st,
35 const char *name)
37 strbuf_reset(dest);
38 strbuf_addstr(dest, st->reftable_dir);
39 strbuf_addstr(dest, "/");
40 strbuf_addstr(dest, name);
43 static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz)
45 int *fdp = (int *)arg;
46 return write_in_full(*fdp, data, sz);
49 static int reftable_fd_flush(void *arg)
51 int *fdp = (int *)arg;
53 return fsync_component(FSYNC_COMPONENT_REFERENCE, *fdp);
56 int reftable_new_stack(struct reftable_stack **dest, const char *dir,
57 struct reftable_write_options config)
59 struct reftable_stack *p = reftable_calloc(1, sizeof(*p));
60 struct strbuf list_file_name = STRBUF_INIT;
61 int err = 0;
63 if (config.hash_id == 0) {
64 config.hash_id = GIT_SHA1_FORMAT_ID;
67 *dest = NULL;
69 strbuf_reset(&list_file_name);
70 strbuf_addstr(&list_file_name, dir);
71 strbuf_addstr(&list_file_name, "/tables.list");
73 p->list_file = strbuf_detach(&list_file_name, NULL);
74 p->list_fd = -1;
75 p->reftable_dir = xstrdup(dir);
76 p->config = config;
78 err = reftable_stack_reload_maybe_reuse(p, 1);
79 if (err < 0) {
80 reftable_stack_destroy(p);
81 } else {
82 *dest = p;
84 return err;
87 static int fd_read_lines(int fd, char ***namesp)
89 off_t size = lseek(fd, 0, SEEK_END);
90 char *buf = NULL;
91 int err = 0;
92 if (size < 0) {
93 err = REFTABLE_IO_ERROR;
94 goto done;
96 err = lseek(fd, 0, SEEK_SET);
97 if (err < 0) {
98 err = REFTABLE_IO_ERROR;
99 goto done;
102 REFTABLE_ALLOC_ARRAY(buf, size + 1);
103 if (read_in_full(fd, buf, size) != size) {
104 err = REFTABLE_IO_ERROR;
105 goto done;
107 buf[size] = 0;
109 parse_names(buf, size, namesp);
111 done:
112 reftable_free(buf);
113 return err;
116 int read_lines(const char *filename, char ***namesp)
118 int fd = open(filename, O_RDONLY);
119 int err = 0;
120 if (fd < 0) {
121 if (errno == ENOENT) {
122 REFTABLE_CALLOC_ARRAY(*namesp, 1);
123 return 0;
126 return REFTABLE_IO_ERROR;
128 err = fd_read_lines(fd, namesp);
129 close(fd);
130 return err;
133 struct reftable_merged_table *
134 reftable_stack_merged_table(struct reftable_stack *st)
136 return st->merged;
139 static int has_name(char **names, const char *name)
141 while (*names) {
142 if (!strcmp(*names, name))
143 return 1;
144 names++;
146 return 0;
149 /* Close and free the stack */
150 void reftable_stack_destroy(struct reftable_stack *st)
152 char **names = NULL;
153 int err = 0;
154 if (st->merged) {
155 reftable_merged_table_free(st->merged);
156 st->merged = NULL;
159 err = read_lines(st->list_file, &names);
160 if (err < 0) {
161 FREE_AND_NULL(names);
164 if (st->readers) {
165 int i = 0;
166 struct strbuf filename = STRBUF_INIT;
167 for (i = 0; i < st->readers_len; i++) {
168 const char *name = reader_name(st->readers[i]);
169 strbuf_reset(&filename);
170 if (names && !has_name(names, name)) {
171 stack_filename(&filename, st, name);
173 reftable_reader_free(st->readers[i]);
175 if (filename.len) {
176 /* On Windows, can only unlink after closing. */
177 unlink(filename.buf);
180 strbuf_release(&filename);
181 st->readers_len = 0;
182 FREE_AND_NULL(st->readers);
185 if (st->list_fd >= 0) {
186 close(st->list_fd);
187 st->list_fd = -1;
190 FREE_AND_NULL(st->list_file);
191 FREE_AND_NULL(st->reftable_dir);
192 reftable_free(st);
193 free_names(names);
196 static struct reftable_reader **stack_copy_readers(struct reftable_stack *st,
197 int cur_len)
199 struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur));
200 int i = 0;
201 for (i = 0; i < cur_len; i++) {
202 cur[i] = st->readers[i];
204 return cur;
207 static int reftable_stack_reload_once(struct reftable_stack *st, char **names,
208 int reuse_open)
210 size_t cur_len = !st->merged ? 0 : st->merged->stack_len;
211 struct reftable_reader **cur = stack_copy_readers(st, cur_len);
212 size_t names_len = names_length(names);
213 struct reftable_reader **new_readers =
214 reftable_calloc(names_len, sizeof(*new_readers));
215 struct reftable_table *new_tables =
216 reftable_calloc(names_len, sizeof(*new_tables));
217 size_t new_readers_len = 0;
218 struct reftable_merged_table *new_merged = NULL;
219 struct strbuf table_path = STRBUF_INIT;
220 int err = 0;
221 size_t i;
223 while (*names) {
224 struct reftable_reader *rd = NULL;
225 char *name = *names++;
227 /* this is linear; we assume compaction keeps the number of
228 tables under control so this is not quadratic. */
229 for (i = 0; reuse_open && i < cur_len; i++) {
230 if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
231 rd = cur[i];
232 cur[i] = NULL;
233 break;
237 if (!rd) {
238 struct reftable_block_source src = { NULL };
239 stack_filename(&table_path, st, name);
241 err = reftable_block_source_from_file(&src,
242 table_path.buf);
243 if (err < 0)
244 goto done;
246 err = reftable_new_reader(&rd, &src, name);
247 if (err < 0)
248 goto done;
251 new_readers[new_readers_len] = rd;
252 reftable_table_from_reader(&new_tables[new_readers_len], rd);
253 new_readers_len++;
256 /* success! */
257 err = reftable_new_merged_table(&new_merged, new_tables,
258 new_readers_len, st->config.hash_id);
259 if (err < 0)
260 goto done;
262 new_tables = NULL;
263 st->readers_len = new_readers_len;
264 if (st->merged) {
265 merged_table_release(st->merged);
266 reftable_merged_table_free(st->merged);
268 if (st->readers) {
269 reftable_free(st->readers);
271 st->readers = new_readers;
272 new_readers = NULL;
273 new_readers_len = 0;
275 new_merged->suppress_deletions = 1;
276 st->merged = new_merged;
277 for (i = 0; i < cur_len; i++) {
278 if (cur[i]) {
279 const char *name = reader_name(cur[i]);
280 stack_filename(&table_path, st, name);
282 reader_close(cur[i]);
283 reftable_reader_free(cur[i]);
285 /* On Windows, can only unlink after closing. */
286 unlink(table_path.buf);
290 done:
291 for (i = 0; i < new_readers_len; i++) {
292 reader_close(new_readers[i]);
293 reftable_reader_free(new_readers[i]);
295 reftable_free(new_readers);
296 reftable_free(new_tables);
297 reftable_free(cur);
298 strbuf_release(&table_path);
299 return err;
302 /* return negative if a before b. */
303 static int tv_cmp(struct timeval *a, struct timeval *b)
305 time_t diff = a->tv_sec - b->tv_sec;
306 int udiff = a->tv_usec - b->tv_usec;
308 if (diff != 0)
309 return diff;
311 return udiff;
314 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
315 int reuse_open)
317 char **names = NULL, **names_after = NULL;
318 struct timeval deadline;
319 int64_t delay = 0;
320 int tries = 0, err;
321 int fd = -1;
323 err = gettimeofday(&deadline, NULL);
324 if (err < 0)
325 goto out;
326 deadline.tv_sec += 3;
328 while (1) {
329 struct timeval now;
331 err = gettimeofday(&now, NULL);
332 if (err < 0)
333 goto out;
336 * Only look at deadlines after the first few times. This
337 * simplifies debugging in GDB.
339 tries++;
340 if (tries > 3 && tv_cmp(&now, &deadline) >= 0)
341 goto out;
343 fd = open(st->list_file, O_RDONLY);
344 if (fd < 0) {
345 if (errno != ENOENT) {
346 err = REFTABLE_IO_ERROR;
347 goto out;
350 REFTABLE_CALLOC_ARRAY(names, 1);
351 } else {
352 err = fd_read_lines(fd, &names);
353 if (err < 0)
354 goto out;
357 err = reftable_stack_reload_once(st, names, reuse_open);
358 if (!err)
359 break;
360 if (err != REFTABLE_NOT_EXIST_ERROR)
361 goto out;
364 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
365 * writer. Check if there was one by checking if the name list
366 * changed.
368 err = read_lines(st->list_file, &names_after);
369 if (err < 0)
370 goto out;
371 if (names_equal(names_after, names)) {
372 err = REFTABLE_NOT_EXIST_ERROR;
373 goto out;
376 free_names(names);
377 names = NULL;
378 free_names(names_after);
379 names_after = NULL;
380 close(fd);
381 fd = -1;
383 delay = delay + (delay * rand()) / RAND_MAX + 1;
384 sleep_millisec(delay);
387 out:
389 * Invalidate the stat cache. It is sufficient to only close the file
390 * descriptor and keep the cached stat info because we never use the
391 * latter when the former is negative.
393 if (st->list_fd >= 0) {
394 close(st->list_fd);
395 st->list_fd = -1;
399 * Cache stat information in case it provides a useful signal to us.
400 * According to POSIX, "The st_ino and st_dev fields taken together
401 * uniquely identify the file within the system." That being said,
402 * Windows is not POSIX compliant and we do not have these fields
403 * available. So the information we have there is insufficient to
404 * determine whether two file descriptors point to the same file.
406 * While we could fall back to using other signals like the file's
407 * mtime, those are not sufficient to avoid races. We thus refrain from
408 * using the stat cache on such systems and fall back to the secondary
409 * caching mechanism, which is to check whether contents of the file
410 * have changed.
412 * On other systems which are POSIX compliant we must keep the file
413 * descriptor open. This is to avoid a race condition where two
414 * processes access the reftable stack at the same point in time:
416 * 1. A reads the reftable stack and caches its stat info.
418 * 2. B updates the stack, appending a new table to "tables.list".
419 * This will both use a new inode and result in a different file
420 * size, thus invalidating A's cache in theory.
422 * 3. B decides to auto-compact the stack and merges two tables. The
423 * file size now matches what A has cached again. Furthermore, the
424 * filesystem may decide to recycle the inode number of the file
425 * we have replaced in (2) because it is not in use anymore.
427 * 4. A reloads the reftable stack. Neither the inode number nor the
428 * file size changed. If the timestamps did not change either then
429 * we think the cached copy of our stack is up-to-date.
431 * By keeping the file descriptor open the inode number cannot be
432 * recycled, mitigating the race.
434 if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
435 st->list_st.st_dev && st->list_st.st_ino) {
436 st->list_fd = fd;
437 fd = -1;
440 if (fd >= 0)
441 close(fd);
442 free_names(names);
443 free_names(names_after);
444 return err;
447 /* -1 = error
448 0 = up to date
449 1 = changed. */
450 static int stack_uptodate(struct reftable_stack *st)
452 char **names = NULL;
453 int err;
454 int i = 0;
457 * When we have cached stat information available then we use it to
458 * verify whether the file has been rewritten.
460 * Note that we explicitly do not want to use `stat_validity_check()`
461 * and friends here because they may end up not comparing the `st_dev`
462 * and `st_ino` fields. These functions thus cannot guarantee that we
463 * indeed still have the same file.
465 if (st->list_fd >= 0) {
466 struct stat list_st;
468 if (stat(st->list_file, &list_st) < 0) {
470 * It's fine for "tables.list" to not exist. In that
471 * case, we have to refresh when the loaded stack has
472 * any readers.
474 if (errno == ENOENT)
475 return !!st->readers_len;
476 return REFTABLE_IO_ERROR;
480 * When "tables.list" refers to the same file we can assume
481 * that it didn't change. This is because we always use
482 * rename(3P) to update the file and never write to it
483 * directly.
485 if (st->list_st.st_dev == list_st.st_dev &&
486 st->list_st.st_ino == list_st.st_ino)
487 return 0;
490 err = read_lines(st->list_file, &names);
491 if (err < 0)
492 return err;
494 for (i = 0; i < st->readers_len; i++) {
495 if (!names[i]) {
496 err = 1;
497 goto done;
500 if (strcmp(st->readers[i]->name, names[i])) {
501 err = 1;
502 goto done;
506 if (names[st->merged->stack_len]) {
507 err = 1;
508 goto done;
511 done:
512 free_names(names);
513 return err;
516 int reftable_stack_reload(struct reftable_stack *st)
518 int err = stack_uptodate(st);
519 if (err > 0)
520 return reftable_stack_reload_maybe_reuse(st, 1);
521 return err;
524 int reftable_stack_add(struct reftable_stack *st,
525 int (*write)(struct reftable_writer *wr, void *arg),
526 void *arg)
528 int err = stack_try_add(st, write, arg);
529 if (err < 0) {
530 if (err == REFTABLE_OUTDATED_ERROR) {
531 /* Ignore error return, we want to propagate
532 REFTABLE_OUTDATED_ERROR.
534 reftable_stack_reload(st);
536 return err;
539 return 0;
542 static void format_name(struct strbuf *dest, uint64_t min, uint64_t max)
544 char buf[100];
545 uint32_t rnd = (uint32_t)git_rand();
546 snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x",
547 min, max, rnd);
548 strbuf_reset(dest);
549 strbuf_addstr(dest, buf);
552 struct reftable_addition {
553 struct tempfile *lock_file;
554 struct reftable_stack *stack;
556 char **new_tables;
557 size_t new_tables_len, new_tables_cap;
558 uint64_t next_update_index;
561 #define REFTABLE_ADDITION_INIT {0}
563 static int reftable_stack_init_addition(struct reftable_addition *add,
564 struct reftable_stack *st)
566 struct strbuf lock_file_name = STRBUF_INIT;
567 int err = 0;
568 add->stack = st;
570 strbuf_addf(&lock_file_name, "%s.lock", st->list_file);
572 add->lock_file = create_tempfile(lock_file_name.buf);
573 if (!add->lock_file) {
574 if (errno == EEXIST) {
575 err = REFTABLE_LOCK_ERROR;
576 } else {
577 err = REFTABLE_IO_ERROR;
579 goto done;
581 if (st->config.default_permissions) {
582 if (chmod(add->lock_file->filename.buf, st->config.default_permissions) < 0) {
583 err = REFTABLE_IO_ERROR;
584 goto done;
588 err = stack_uptodate(st);
589 if (err < 0)
590 goto done;
591 if (err > 0) {
592 err = REFTABLE_OUTDATED_ERROR;
593 goto done;
596 add->next_update_index = reftable_stack_next_update_index(st);
597 done:
598 if (err) {
599 reftable_addition_close(add);
601 strbuf_release(&lock_file_name);
602 return err;
605 static void reftable_addition_close(struct reftable_addition *add)
607 struct strbuf nm = STRBUF_INIT;
608 size_t i;
610 for (i = 0; i < add->new_tables_len; i++) {
611 stack_filename(&nm, add->stack, add->new_tables[i]);
612 unlink(nm.buf);
613 reftable_free(add->new_tables[i]);
614 add->new_tables[i] = NULL;
616 reftable_free(add->new_tables);
617 add->new_tables = NULL;
618 add->new_tables_len = 0;
619 add->new_tables_cap = 0;
621 delete_tempfile(&add->lock_file);
622 strbuf_release(&nm);
625 void reftable_addition_destroy(struct reftable_addition *add)
627 if (!add) {
628 return;
630 reftable_addition_close(add);
631 reftable_free(add);
634 int reftable_addition_commit(struct reftable_addition *add)
636 struct strbuf table_list = STRBUF_INIT;
637 int lock_file_fd = get_tempfile_fd(add->lock_file);
638 int err = 0;
639 size_t i;
641 if (add->new_tables_len == 0)
642 goto done;
644 for (i = 0; i < add->stack->merged->stack_len; i++) {
645 strbuf_addstr(&table_list, add->stack->readers[i]->name);
646 strbuf_addstr(&table_list, "\n");
648 for (i = 0; i < add->new_tables_len; i++) {
649 strbuf_addstr(&table_list, add->new_tables[i]);
650 strbuf_addstr(&table_list, "\n");
653 err = write_in_full(lock_file_fd, table_list.buf, table_list.len);
654 strbuf_release(&table_list);
655 if (err < 0) {
656 err = REFTABLE_IO_ERROR;
657 goto done;
660 fsync_component_or_die(FSYNC_COMPONENT_REFERENCE, lock_file_fd,
661 get_tempfile_path(add->lock_file));
663 err = rename_tempfile(&add->lock_file, add->stack->list_file);
664 if (err < 0) {
665 err = REFTABLE_IO_ERROR;
666 goto done;
669 /* success, no more state to clean up. */
670 for (i = 0; i < add->new_tables_len; i++)
671 reftable_free(add->new_tables[i]);
672 reftable_free(add->new_tables);
673 add->new_tables = NULL;
674 add->new_tables_len = 0;
675 add->new_tables_cap = 0;
677 err = reftable_stack_reload_maybe_reuse(add->stack, 1);
678 if (err)
679 goto done;
681 if (!add->stack->config.disable_auto_compact) {
683 * Auto-compact the stack to keep the number of tables in
684 * control. It is possible that a concurrent writer is already
685 * trying to compact parts of the stack, which would lead to a
686 * `REFTABLE_LOCK_ERROR` because parts of the stack are locked
687 * already. This is a benign error though, so we ignore it.
689 err = reftable_stack_auto_compact(add->stack);
690 if (err < 0 && err != REFTABLE_LOCK_ERROR)
691 goto done;
692 err = 0;
695 done:
696 reftable_addition_close(add);
697 return err;
700 int reftable_stack_new_addition(struct reftable_addition **dest,
701 struct reftable_stack *st)
703 int err = 0;
704 struct reftable_addition empty = REFTABLE_ADDITION_INIT;
705 REFTABLE_CALLOC_ARRAY(*dest, 1);
706 **dest = empty;
707 err = reftable_stack_init_addition(*dest, st);
708 if (err) {
709 reftable_free(*dest);
710 *dest = NULL;
712 return err;
715 static int stack_try_add(struct reftable_stack *st,
716 int (*write_table)(struct reftable_writer *wr,
717 void *arg),
718 void *arg)
720 struct reftable_addition add = REFTABLE_ADDITION_INIT;
721 int err = reftable_stack_init_addition(&add, st);
722 if (err < 0)
723 goto done;
725 err = reftable_addition_add(&add, write_table, arg);
726 if (err < 0)
727 goto done;
729 err = reftable_addition_commit(&add);
730 done:
731 reftable_addition_close(&add);
732 return err;
735 int reftable_addition_add(struct reftable_addition *add,
736 int (*write_table)(struct reftable_writer *wr,
737 void *arg),
738 void *arg)
740 struct strbuf temp_tab_file_name = STRBUF_INIT;
741 struct strbuf tab_file_name = STRBUF_INIT;
742 struct strbuf next_name = STRBUF_INIT;
743 struct reftable_writer *wr = NULL;
744 struct tempfile *tab_file = NULL;
745 int err = 0;
746 int tab_fd;
748 strbuf_reset(&next_name);
749 format_name(&next_name, add->next_update_index, add->next_update_index);
751 stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
752 strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
754 tab_file = mks_tempfile(temp_tab_file_name.buf);
755 if (!tab_file) {
756 err = REFTABLE_IO_ERROR;
757 goto done;
759 if (add->stack->config.default_permissions) {
760 if (chmod(get_tempfile_path(tab_file),
761 add->stack->config.default_permissions)) {
762 err = REFTABLE_IO_ERROR;
763 goto done;
766 tab_fd = get_tempfile_fd(tab_file);
768 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd,
769 &add->stack->config);
770 err = write_table(wr, arg);
771 if (err < 0)
772 goto done;
774 err = reftable_writer_close(wr);
775 if (err == REFTABLE_EMPTY_TABLE_ERROR) {
776 err = 0;
777 goto done;
779 if (err < 0)
780 goto done;
782 err = close_tempfile_gently(tab_file);
783 if (err < 0) {
784 err = REFTABLE_IO_ERROR;
785 goto done;
788 if (wr->min_update_index < add->next_update_index) {
789 err = REFTABLE_API_ERROR;
790 goto done;
793 format_name(&next_name, wr->min_update_index, wr->max_update_index);
794 strbuf_addstr(&next_name, ".ref");
795 stack_filename(&tab_file_name, add->stack, next_name.buf);
798 On windows, this relies on rand() picking a unique destination name.
799 Maybe we should do retry loop as well?
801 err = rename_tempfile(&tab_file, tab_file_name.buf);
802 if (err < 0) {
803 err = REFTABLE_IO_ERROR;
804 goto done;
807 REFTABLE_ALLOC_GROW(add->new_tables, add->new_tables_len + 1,
808 add->new_tables_cap);
809 add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL);
810 done:
811 delete_tempfile(&tab_file);
812 strbuf_release(&temp_tab_file_name);
813 strbuf_release(&tab_file_name);
814 strbuf_release(&next_name);
815 reftable_writer_free(wr);
816 return err;
819 uint64_t reftable_stack_next_update_index(struct reftable_stack *st)
821 int sz = st->merged->stack_len;
822 if (sz > 0)
823 return reftable_reader_max_update_index(st->readers[sz - 1]) +
825 return 1;
828 static int stack_compact_locked(struct reftable_stack *st,
829 size_t first, size_t last,
830 struct reftable_log_expiry_config *config,
831 struct tempfile **tab_file_out)
833 struct strbuf next_name = STRBUF_INIT;
834 struct strbuf tab_file_path = STRBUF_INIT;
835 struct reftable_writer *wr = NULL;
836 struct tempfile *tab_file;
837 int tab_fd, err = 0;
839 format_name(&next_name,
840 reftable_reader_min_update_index(st->readers[first]),
841 reftable_reader_max_update_index(st->readers[last]));
842 stack_filename(&tab_file_path, st, next_name.buf);
843 strbuf_addstr(&tab_file_path, ".temp.XXXXXX");
845 tab_file = mks_tempfile(tab_file_path.buf);
846 if (!tab_file) {
847 err = REFTABLE_IO_ERROR;
848 goto done;
850 tab_fd = get_tempfile_fd(tab_file);
852 if (st->config.default_permissions &&
853 chmod(get_tempfile_path(tab_file), st->config.default_permissions) < 0) {
854 err = REFTABLE_IO_ERROR;
855 goto done;
858 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush,
859 &tab_fd, &st->config);
860 err = stack_write_compact(st, wr, first, last, config);
861 if (err < 0)
862 goto done;
864 err = reftable_writer_close(wr);
865 if (err < 0)
866 goto done;
868 err = close_tempfile_gently(tab_file);
869 if (err < 0)
870 goto done;
872 *tab_file_out = tab_file;
873 tab_file = NULL;
875 done:
876 delete_tempfile(&tab_file);
877 reftable_writer_free(wr);
878 strbuf_release(&next_name);
879 strbuf_release(&tab_file_path);
880 return err;
883 static int stack_write_compact(struct reftable_stack *st,
884 struct reftable_writer *wr,
885 size_t first, size_t last,
886 struct reftable_log_expiry_config *config)
888 size_t subtabs_len = last - first + 1;
889 struct reftable_table *subtabs = reftable_calloc(
890 last - first + 1, sizeof(*subtabs));
891 struct reftable_merged_table *mt = NULL;
892 struct reftable_iterator it = { NULL };
893 struct reftable_ref_record ref = { NULL };
894 struct reftable_log_record log = { NULL };
895 uint64_t entries = 0;
896 int err = 0;
898 for (size_t i = first, j = 0; i <= last; i++) {
899 struct reftable_reader *t = st->readers[i];
900 reftable_table_from_reader(&subtabs[j++], t);
901 st->stats.bytes += t->size;
903 reftable_writer_set_limits(wr, st->readers[first]->min_update_index,
904 st->readers[last]->max_update_index);
906 err = reftable_new_merged_table(&mt, subtabs, subtabs_len,
907 st->config.hash_id);
908 if (err < 0) {
909 reftable_free(subtabs);
910 goto done;
913 err = reftable_merged_table_seek_ref(mt, &it, "");
914 if (err < 0)
915 goto done;
917 while (1) {
918 err = reftable_iterator_next_ref(&it, &ref);
919 if (err > 0) {
920 err = 0;
921 break;
923 if (err < 0)
924 goto done;
926 if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
927 continue;
930 err = reftable_writer_add_ref(wr, &ref);
931 if (err < 0)
932 goto done;
933 entries++;
935 reftable_iterator_destroy(&it);
937 err = reftable_merged_table_seek_log(mt, &it, "");
938 if (err < 0)
939 goto done;
941 while (1) {
942 err = reftable_iterator_next_log(&it, &log);
943 if (err > 0) {
944 err = 0;
945 break;
947 if (err < 0)
948 goto done;
949 if (first == 0 && reftable_log_record_is_deletion(&log)) {
950 continue;
953 if (config && config->min_update_index > 0 &&
954 log.update_index < config->min_update_index) {
955 continue;
958 if (config && config->time > 0 &&
959 log.value.update.time < config->time) {
960 continue;
963 err = reftable_writer_add_log(wr, &log);
964 if (err < 0)
965 goto done;
966 entries++;
969 done:
970 reftable_iterator_destroy(&it);
971 if (mt) {
972 merged_table_release(mt);
973 reftable_merged_table_free(mt);
975 reftable_ref_record_release(&ref);
976 reftable_log_record_release(&log);
977 st->stats.entries_written += entries;
978 return err;
982 * Compact all tables in the range `[first, last)` into a single new table.
984 * This function returns `0` on success or a code `< 0` on failure. When the
985 * stack or any of the tables in the specified range are already locked then
986 * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that
987 * callers can either ignore, or they may choose to retry compaction after some
988 * amount of time.
990 static int stack_compact_range(struct reftable_stack *st,
991 size_t first, size_t last,
992 struct reftable_log_expiry_config *expiry)
994 struct strbuf tables_list_buf = STRBUF_INIT;
995 struct strbuf new_table_name = STRBUF_INIT;
996 struct strbuf new_table_path = STRBUF_INIT;
997 struct strbuf table_name = STRBUF_INIT;
998 struct lock_file tables_list_lock = LOCK_INIT;
999 struct lock_file *table_locks = NULL;
1000 struct tempfile *new_table = NULL;
1001 int is_empty_table = 0, err = 0;
1002 size_t i;
1004 if (first > last || (!expiry && first == last)) {
1005 err = 0;
1006 goto done;
1009 st->stats.attempts++;
1012 * Hold the lock so that we can read "tables.list" and lock all tables
1013 * which are part of the user-specified range.
1015 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1016 LOCK_NO_DEREF);
1017 if (err < 0) {
1018 if (errno == EEXIST)
1019 err = REFTABLE_LOCK_ERROR;
1020 else
1021 err = REFTABLE_IO_ERROR;
1022 goto done;
1025 err = stack_uptodate(st);
1026 if (err)
1027 goto done;
1030 * Lock all tables in the user-provided range. This is the slice of our
1031 * stack which we'll compact.
1033 REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1);
1034 for (i = first; i <= last; i++) {
1035 stack_filename(&table_name, st, reader_name(st->readers[i]));
1037 err = hold_lock_file_for_update(&table_locks[i - first],
1038 table_name.buf, LOCK_NO_DEREF);
1039 if (err < 0) {
1040 if (errno == EEXIST)
1041 err = REFTABLE_LOCK_ERROR;
1042 else
1043 err = REFTABLE_IO_ERROR;
1044 goto done;
1048 * We need to close the lockfiles as we might otherwise easily
1049 * run into file descriptor exhaustion when we compress a lot
1050 * of tables.
1052 err = close_lock_file_gently(&table_locks[i - first]);
1053 if (err < 0) {
1054 err = REFTABLE_IO_ERROR;
1055 goto done;
1060 * We have locked all tables in our range and can thus release the
1061 * "tables.list" lock while compacting the locked tables. This allows
1062 * concurrent updates to the stack to proceed.
1064 err = rollback_lock_file(&tables_list_lock);
1065 if (err < 0) {
1066 err = REFTABLE_IO_ERROR;
1067 goto done;
1071 * Compact the now-locked tables into a new table. Note that compacting
1072 * these tables may end up with an empty new table in case tombstones
1073 * end up cancelling out all refs in that range.
1075 err = stack_compact_locked(st, first, last, expiry, &new_table);
1076 if (err < 0) {
1077 if (err != REFTABLE_EMPTY_TABLE_ERROR)
1078 goto done;
1079 is_empty_table = 1;
1083 * Now that we have written the new, compacted table we need to re-lock
1084 * "tables.list". We'll then replace the compacted range of tables with
1085 * the new table.
1087 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1088 LOCK_NO_DEREF);
1089 if (err < 0) {
1090 if (errno == EEXIST)
1091 err = REFTABLE_LOCK_ERROR;
1092 else
1093 err = REFTABLE_IO_ERROR;
1094 goto done;
1097 if (st->config.default_permissions) {
1098 if (chmod(get_lock_file_path(&tables_list_lock),
1099 st->config.default_permissions) < 0) {
1100 err = REFTABLE_IO_ERROR;
1101 goto done;
1106 * If the resulting compacted table is not empty, then we need to move
1107 * it into place now.
1109 if (!is_empty_table) {
1110 format_name(&new_table_name, st->readers[first]->min_update_index,
1111 st->readers[last]->max_update_index);
1112 strbuf_addstr(&new_table_name, ".ref");
1113 stack_filename(&new_table_path, st, new_table_name.buf);
1115 err = rename_tempfile(&new_table, new_table_path.buf);
1116 if (err < 0) {
1117 err = REFTABLE_IO_ERROR;
1118 goto done;
1123 * Write the new "tables.list" contents with the compacted table we
1124 * have just written. In case the compacted table became empty we
1125 * simply skip writing it.
1127 for (i = 0; i < first; i++)
1128 strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1129 if (!is_empty_table)
1130 strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf);
1131 for (i = last + 1; i < st->merged->stack_len; i++)
1132 strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1134 err = write_in_full(get_lock_file_fd(&tables_list_lock),
1135 tables_list_buf.buf, tables_list_buf.len);
1136 if (err < 0) {
1137 err = REFTABLE_IO_ERROR;
1138 unlink(new_table_path.buf);
1139 goto done;
1142 err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock));
1143 if (err < 0) {
1144 err = REFTABLE_IO_ERROR;
1145 unlink(new_table_path.buf);
1146 goto done;
1149 err = commit_lock_file(&tables_list_lock);
1150 if (err < 0) {
1151 err = REFTABLE_IO_ERROR;
1152 unlink(new_table_path.buf);
1153 goto done;
1157 * Reload the stack before deleting the compacted tables. We can only
1158 * delete the files after we closed them on Windows, so this needs to
1159 * happen first.
1161 err = reftable_stack_reload_maybe_reuse(st, first < last);
1162 if (err < 0)
1163 goto done;
1166 * Delete the old tables. They may still be in use by concurrent
1167 * readers, so it is expected that unlinking tables may fail.
1169 for (i = first; i <= last; i++) {
1170 struct lock_file *table_lock = &table_locks[i - first];
1171 char *table_path = get_locked_file_path(table_lock);
1172 unlink(table_path);
1173 free(table_path);
1176 done:
1177 rollback_lock_file(&tables_list_lock);
1178 for (i = first; table_locks && i <= last; i++)
1179 rollback_lock_file(&table_locks[i - first]);
1180 reftable_free(table_locks);
1182 delete_tempfile(&new_table);
1183 strbuf_release(&new_table_name);
1184 strbuf_release(&new_table_path);
1186 strbuf_release(&tables_list_buf);
1187 strbuf_release(&table_name);
1188 return err;
1191 int reftable_stack_compact_all(struct reftable_stack *st,
1192 struct reftable_log_expiry_config *config)
1194 return stack_compact_range(st, 0, st->merged->stack_len ?
1195 st->merged->stack_len - 1 : 0, config);
1198 static int stack_compact_range_stats(struct reftable_stack *st,
1199 size_t first, size_t last,
1200 struct reftable_log_expiry_config *config)
1202 int err = stack_compact_range(st, first, last, config);
1203 if (err == REFTABLE_LOCK_ERROR)
1204 st->stats.failures++;
1205 return err;
1208 static int segment_size(struct segment *s)
1210 return s->end - s->start;
1213 struct segment suggest_compaction_segment(uint64_t *sizes, size_t n)
1215 struct segment seg = { 0 };
1216 uint64_t bytes;
1217 size_t i;
1220 * If there are no tables or only a single one then we don't have to
1221 * compact anything. The sequence is geometric by definition already.
1223 if (n <= 1)
1224 return seg;
1227 * Find the ending table of the compaction segment needed to restore the
1228 * geometric sequence. Note that the segment end is exclusive.
1230 * To do so, we iterate backwards starting from the most recent table
1231 * until a valid segment end is found. If the preceding table is smaller
1232 * than the current table multiplied by the geometric factor (2), the
1233 * compaction segment end has been identified.
1235 * Tables after the ending point are not added to the byte count because
1236 * they are already valid members of the geometric sequence. Due to the
1237 * properties of a geometric sequence, it is not possible for the sum of
1238 * these tables to exceed the value of the ending point table.
1240 * Example table size sequence requiring no compaction:
1241 * 64, 32, 16, 8, 4, 2, 1
1243 * Example table size sequence where compaction segment end is set to
1244 * the last table. Since the segment end is exclusive, the last table is
1245 * excluded during subsequent compaction and the table with size 3 is
1246 * the final table included:
1247 * 64, 32, 16, 8, 4, 3, 1
1249 for (i = n - 1; i > 0; i--) {
1250 if (sizes[i - 1] < sizes[i] * 2) {
1251 seg.end = i + 1;
1252 bytes = sizes[i];
1253 break;
1258 * Find the starting table of the compaction segment by iterating
1259 * through the remaining tables and keeping track of the accumulated
1260 * size of all tables seen from the segment end table. The previous
1261 * table is compared to the accumulated size because the tables from the
1262 * segment end are merged backwards recursively.
1264 * Note that we keep iterating even after we have found the first
1265 * starting point. This is because there may be tables in the stack
1266 * preceding that first starting point which violate the geometric
1267 * sequence.
1269 * Example compaction segment start set to table with size 32:
1270 * 128, 32, 16, 8, 4, 3, 1
1272 for (; i > 0; i--) {
1273 uint64_t curr = bytes;
1274 bytes += sizes[i - 1];
1276 if (sizes[i - 1] < curr * 2) {
1277 seg.start = i - 1;
1278 seg.bytes = bytes;
1282 return seg;
1285 static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
1287 uint64_t *sizes =
1288 reftable_calloc(st->merged->stack_len, sizeof(*sizes));
1289 int version = (st->config.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2;
1290 int overhead = header_size(version) - 1;
1291 int i = 0;
1292 for (i = 0; i < st->merged->stack_len; i++) {
1293 sizes[i] = st->readers[i]->size - overhead;
1295 return sizes;
1298 int reftable_stack_auto_compact(struct reftable_stack *st)
1300 uint64_t *sizes = stack_table_sizes_for_compaction(st);
1301 struct segment seg =
1302 suggest_compaction_segment(sizes, st->merged->stack_len);
1303 reftable_free(sizes);
1304 if (segment_size(&seg) > 0)
1305 return stack_compact_range_stats(st, seg.start, seg.end - 1,
1306 NULL);
1308 return 0;
1311 struct reftable_compaction_stats *
1312 reftable_stack_compaction_stats(struct reftable_stack *st)
1314 return &st->stats;
1317 int reftable_stack_read_ref(struct reftable_stack *st, const char *refname,
1318 struct reftable_ref_record *ref)
1320 struct reftable_table tab = { NULL };
1321 reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st));
1322 return reftable_table_read_ref(&tab, refname, ref);
1325 int reftable_stack_read_log(struct reftable_stack *st, const char *refname,
1326 struct reftable_log_record *log)
1328 struct reftable_iterator it = { NULL };
1329 struct reftable_merged_table *mt = reftable_stack_merged_table(st);
1330 int err = reftable_merged_table_seek_log(mt, &it, refname);
1331 if (err)
1332 goto done;
1334 err = reftable_iterator_next_log(&it, log);
1335 if (err)
1336 goto done;
1338 if (strcmp(log->refname, refname) ||
1339 reftable_log_record_is_deletion(log)) {
1340 err = 1;
1341 goto done;
1344 done:
1345 if (err) {
1346 reftable_log_record_release(log);
1348 reftable_iterator_destroy(&it);
1349 return err;
1352 static int is_table_name(const char *s)
1354 const char *dot = strrchr(s, '.');
1355 return dot && !strcmp(dot, ".ref");
1358 static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
1359 const char *name)
1361 int err = 0;
1362 uint64_t update_idx = 0;
1363 struct reftable_block_source src = { NULL };
1364 struct reftable_reader *rd = NULL;
1365 struct strbuf table_path = STRBUF_INIT;
1366 stack_filename(&table_path, st, name);
1368 err = reftable_block_source_from_file(&src, table_path.buf);
1369 if (err < 0)
1370 goto done;
1372 err = reftable_new_reader(&rd, &src, name);
1373 if (err < 0)
1374 goto done;
1376 update_idx = reftable_reader_max_update_index(rd);
1377 reftable_reader_free(rd);
1379 if (update_idx <= max) {
1380 unlink(table_path.buf);
1382 done:
1383 strbuf_release(&table_path);
1386 static int reftable_stack_clean_locked(struct reftable_stack *st)
1388 uint64_t max = reftable_merged_table_max_update_index(
1389 reftable_stack_merged_table(st));
1390 DIR *dir = opendir(st->reftable_dir);
1391 struct dirent *d = NULL;
1392 if (!dir) {
1393 return REFTABLE_IO_ERROR;
1396 while ((d = readdir(dir))) {
1397 int i = 0;
1398 int found = 0;
1399 if (!is_table_name(d->d_name))
1400 continue;
1402 for (i = 0; !found && i < st->readers_len; i++) {
1403 found = !strcmp(reader_name(st->readers[i]), d->d_name);
1405 if (found)
1406 continue;
1408 remove_maybe_stale_table(st, max, d->d_name);
1411 closedir(dir);
1412 return 0;
1415 int reftable_stack_clean(struct reftable_stack *st)
1417 struct reftable_addition *add = NULL;
1418 int err = reftable_stack_new_addition(&add, st);
1419 if (err < 0) {
1420 goto done;
1423 err = reftable_stack_reload(st);
1424 if (err < 0) {
1425 goto done;
1428 err = reftable_stack_clean_locked(st);
1430 done:
1431 reftable_addition_destroy(add);
1432 return err;
1435 int reftable_stack_print_directory(const char *stackdir, uint32_t hash_id)
1437 struct reftable_stack *stack = NULL;
1438 struct reftable_write_options cfg = { .hash_id = hash_id };
1439 struct reftable_merged_table *merged = NULL;
1440 struct reftable_table table = { NULL };
1442 int err = reftable_new_stack(&stack, stackdir, cfg);
1443 if (err < 0)
1444 goto done;
1446 merged = reftable_stack_merged_table(stack);
1447 reftable_table_from_merged_table(&table, merged);
1448 err = reftable_table_print(&table);
1449 done:
1450 if (stack)
1451 reftable_stack_destroy(stack);
1452 return err;