Merge branch 'ps/ref-storage-migration'
[git.git] / reftable / stack.c
blob324cb27ce9433a82eb2d8a9f47cf32f66ce5d65d
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "stack.h"
11 #include "../write-or-die.h"
12 #include "system.h"
13 #include "constants.h"
14 #include "merged.h"
15 #include "reader.h"
16 #include "reftable-error.h"
17 #include "reftable-generic.h"
18 #include "reftable-record.h"
19 #include "reftable-merged.h"
20 #include "writer.h"
21 #include "tempfile.h"
23 static int stack_try_add(struct reftable_stack *st,
24 int (*write_table)(struct reftable_writer *wr,
25 void *arg),
26 void *arg);
27 static int stack_write_compact(struct reftable_stack *st,
28 struct reftable_writer *wr,
29 size_t first, size_t last,
30 struct reftable_log_expiry_config *config);
31 static void reftable_addition_close(struct reftable_addition *add);
32 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
33 int reuse_open);
35 static void stack_filename(struct strbuf *dest, struct reftable_stack *st,
36 const char *name)
38 strbuf_reset(dest);
39 strbuf_addstr(dest, st->reftable_dir);
40 strbuf_addstr(dest, "/");
41 strbuf_addstr(dest, name);
44 static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz)
46 int *fdp = (int *)arg;
47 return write_in_full(*fdp, data, sz);
50 static int reftable_fd_flush(void *arg)
52 int *fdp = (int *)arg;
54 return fsync_component(FSYNC_COMPONENT_REFERENCE, *fdp);
57 int reftable_new_stack(struct reftable_stack **dest, const char *dir,
58 const struct reftable_write_options *_opts)
60 struct reftable_stack *p = reftable_calloc(1, sizeof(*p));
61 struct strbuf list_file_name = STRBUF_INIT;
62 struct reftable_write_options opts = {0};
63 int err = 0;
65 if (_opts)
66 opts = *_opts;
67 if (opts.hash_id == 0)
68 opts.hash_id = GIT_SHA1_FORMAT_ID;
70 *dest = NULL;
72 strbuf_reset(&list_file_name);
73 strbuf_addstr(&list_file_name, dir);
74 strbuf_addstr(&list_file_name, "/tables.list");
76 p->list_file = strbuf_detach(&list_file_name, NULL);
77 p->list_fd = -1;
78 p->reftable_dir = xstrdup(dir);
79 p->opts = opts;
81 err = reftable_stack_reload_maybe_reuse(p, 1);
82 if (err < 0) {
83 reftable_stack_destroy(p);
84 } else {
85 *dest = p;
87 return err;
90 static int fd_read_lines(int fd, char ***namesp)
92 off_t size = lseek(fd, 0, SEEK_END);
93 char *buf = NULL;
94 int err = 0;
95 if (size < 0) {
96 err = REFTABLE_IO_ERROR;
97 goto done;
99 err = lseek(fd, 0, SEEK_SET);
100 if (err < 0) {
101 err = REFTABLE_IO_ERROR;
102 goto done;
105 REFTABLE_ALLOC_ARRAY(buf, size + 1);
106 if (read_in_full(fd, buf, size) != size) {
107 err = REFTABLE_IO_ERROR;
108 goto done;
110 buf[size] = 0;
112 parse_names(buf, size, namesp);
114 done:
115 reftable_free(buf);
116 return err;
119 int read_lines(const char *filename, char ***namesp)
121 int fd = open(filename, O_RDONLY);
122 int err = 0;
123 if (fd < 0) {
124 if (errno == ENOENT) {
125 REFTABLE_CALLOC_ARRAY(*namesp, 1);
126 return 0;
129 return REFTABLE_IO_ERROR;
131 err = fd_read_lines(fd, namesp);
132 close(fd);
133 return err;
136 void reftable_stack_init_ref_iterator(struct reftable_stack *st,
137 struct reftable_iterator *it)
139 merged_table_init_iter(reftable_stack_merged_table(st),
140 it, BLOCK_TYPE_REF);
143 void reftable_stack_init_log_iterator(struct reftable_stack *st,
144 struct reftable_iterator *it)
146 merged_table_init_iter(reftable_stack_merged_table(st),
147 it, BLOCK_TYPE_LOG);
150 struct reftable_merged_table *
151 reftable_stack_merged_table(struct reftable_stack *st)
153 return st->merged;
156 static int has_name(char **names, const char *name)
158 while (*names) {
159 if (!strcmp(*names, name))
160 return 1;
161 names++;
163 return 0;
166 /* Close and free the stack */
167 void reftable_stack_destroy(struct reftable_stack *st)
169 char **names = NULL;
170 int err = 0;
171 if (st->merged) {
172 reftable_merged_table_free(st->merged);
173 st->merged = NULL;
176 err = read_lines(st->list_file, &names);
177 if (err < 0) {
178 FREE_AND_NULL(names);
181 if (st->readers) {
182 int i = 0;
183 struct strbuf filename = STRBUF_INIT;
184 for (i = 0; i < st->readers_len; i++) {
185 const char *name = reader_name(st->readers[i]);
186 strbuf_reset(&filename);
187 if (names && !has_name(names, name)) {
188 stack_filename(&filename, st, name);
190 reftable_reader_free(st->readers[i]);
192 if (filename.len) {
193 /* On Windows, can only unlink after closing. */
194 unlink(filename.buf);
197 strbuf_release(&filename);
198 st->readers_len = 0;
199 FREE_AND_NULL(st->readers);
202 if (st->list_fd >= 0) {
203 close(st->list_fd);
204 st->list_fd = -1;
207 FREE_AND_NULL(st->list_file);
208 FREE_AND_NULL(st->reftable_dir);
209 reftable_free(st);
210 free_names(names);
213 static struct reftable_reader **stack_copy_readers(struct reftable_stack *st,
214 int cur_len)
216 struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur));
217 int i = 0;
218 for (i = 0; i < cur_len; i++) {
219 cur[i] = st->readers[i];
221 return cur;
224 static int reftable_stack_reload_once(struct reftable_stack *st, char **names,
225 int reuse_open)
227 size_t cur_len = !st->merged ? 0 : st->merged->stack_len;
228 struct reftable_reader **cur = stack_copy_readers(st, cur_len);
229 size_t names_len = names_length(names);
230 struct reftable_reader **new_readers =
231 reftable_calloc(names_len, sizeof(*new_readers));
232 struct reftable_table *new_tables =
233 reftable_calloc(names_len, sizeof(*new_tables));
234 size_t new_readers_len = 0;
235 struct reftable_merged_table *new_merged = NULL;
236 struct strbuf table_path = STRBUF_INIT;
237 int err = 0;
238 size_t i;
240 while (*names) {
241 struct reftable_reader *rd = NULL;
242 char *name = *names++;
244 /* this is linear; we assume compaction keeps the number of
245 tables under control so this is not quadratic. */
246 for (i = 0; reuse_open && i < cur_len; i++) {
247 if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
248 rd = cur[i];
249 cur[i] = NULL;
250 break;
254 if (!rd) {
255 struct reftable_block_source src = { NULL };
256 stack_filename(&table_path, st, name);
258 err = reftable_block_source_from_file(&src,
259 table_path.buf);
260 if (err < 0)
261 goto done;
263 err = reftable_new_reader(&rd, &src, name);
264 if (err < 0)
265 goto done;
268 new_readers[new_readers_len] = rd;
269 reftable_table_from_reader(&new_tables[new_readers_len], rd);
270 new_readers_len++;
273 /* success! */
274 err = reftable_new_merged_table(&new_merged, new_tables,
275 new_readers_len, st->opts.hash_id);
276 if (err < 0)
277 goto done;
279 new_tables = NULL;
280 st->readers_len = new_readers_len;
281 if (st->merged)
282 reftable_merged_table_free(st->merged);
283 if (st->readers) {
284 reftable_free(st->readers);
286 st->readers = new_readers;
287 new_readers = NULL;
288 new_readers_len = 0;
290 new_merged->suppress_deletions = 1;
291 st->merged = new_merged;
292 for (i = 0; i < cur_len; i++) {
293 if (cur[i]) {
294 const char *name = reader_name(cur[i]);
295 stack_filename(&table_path, st, name);
297 reader_close(cur[i]);
298 reftable_reader_free(cur[i]);
300 /* On Windows, can only unlink after closing. */
301 unlink(table_path.buf);
305 done:
306 for (i = 0; i < new_readers_len; i++) {
307 reader_close(new_readers[i]);
308 reftable_reader_free(new_readers[i]);
310 reftable_free(new_readers);
311 reftable_free(new_tables);
312 reftable_free(cur);
313 strbuf_release(&table_path);
314 return err;
317 /* return negative if a before b. */
318 static int tv_cmp(struct timeval *a, struct timeval *b)
320 time_t diff = a->tv_sec - b->tv_sec;
321 int udiff = a->tv_usec - b->tv_usec;
323 if (diff != 0)
324 return diff;
326 return udiff;
329 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
330 int reuse_open)
332 char **names = NULL, **names_after = NULL;
333 struct timeval deadline;
334 int64_t delay = 0;
335 int tries = 0, err;
336 int fd = -1;
338 err = gettimeofday(&deadline, NULL);
339 if (err < 0)
340 goto out;
341 deadline.tv_sec += 3;
343 while (1) {
344 struct timeval now;
346 err = gettimeofday(&now, NULL);
347 if (err < 0)
348 goto out;
351 * Only look at deadlines after the first few times. This
352 * simplifies debugging in GDB.
354 tries++;
355 if (tries > 3 && tv_cmp(&now, &deadline) >= 0)
356 goto out;
358 fd = open(st->list_file, O_RDONLY);
359 if (fd < 0) {
360 if (errno != ENOENT) {
361 err = REFTABLE_IO_ERROR;
362 goto out;
365 REFTABLE_CALLOC_ARRAY(names, 1);
366 } else {
367 err = fd_read_lines(fd, &names);
368 if (err < 0)
369 goto out;
372 err = reftable_stack_reload_once(st, names, reuse_open);
373 if (!err)
374 break;
375 if (err != REFTABLE_NOT_EXIST_ERROR)
376 goto out;
379 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
380 * writer. Check if there was one by checking if the name list
381 * changed.
383 err = read_lines(st->list_file, &names_after);
384 if (err < 0)
385 goto out;
386 if (names_equal(names_after, names)) {
387 err = REFTABLE_NOT_EXIST_ERROR;
388 goto out;
391 free_names(names);
392 names = NULL;
393 free_names(names_after);
394 names_after = NULL;
395 close(fd);
396 fd = -1;
398 delay = delay + (delay * rand()) / RAND_MAX + 1;
399 sleep_millisec(delay);
402 out:
404 * Invalidate the stat cache. It is sufficient to only close the file
405 * descriptor and keep the cached stat info because we never use the
406 * latter when the former is negative.
408 if (st->list_fd >= 0) {
409 close(st->list_fd);
410 st->list_fd = -1;
414 * Cache stat information in case it provides a useful signal to us.
415 * According to POSIX, "The st_ino and st_dev fields taken together
416 * uniquely identify the file within the system." That being said,
417 * Windows is not POSIX compliant and we do not have these fields
418 * available. So the information we have there is insufficient to
419 * determine whether two file descriptors point to the same file.
421 * While we could fall back to using other signals like the file's
422 * mtime, those are not sufficient to avoid races. We thus refrain from
423 * using the stat cache on such systems and fall back to the secondary
424 * caching mechanism, which is to check whether contents of the file
425 * have changed.
427 * On other systems which are POSIX compliant we must keep the file
428 * descriptor open. This is to avoid a race condition where two
429 * processes access the reftable stack at the same point in time:
431 * 1. A reads the reftable stack and caches its stat info.
433 * 2. B updates the stack, appending a new table to "tables.list".
434 * This will both use a new inode and result in a different file
435 * size, thus invalidating A's cache in theory.
437 * 3. B decides to auto-compact the stack and merges two tables. The
438 * file size now matches what A has cached again. Furthermore, the
439 * filesystem may decide to recycle the inode number of the file
440 * we have replaced in (2) because it is not in use anymore.
442 * 4. A reloads the reftable stack. Neither the inode number nor the
443 * file size changed. If the timestamps did not change either then
444 * we think the cached copy of our stack is up-to-date.
446 * By keeping the file descriptor open the inode number cannot be
447 * recycled, mitigating the race.
449 if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
450 st->list_st.st_dev && st->list_st.st_ino) {
451 st->list_fd = fd;
452 fd = -1;
455 if (fd >= 0)
456 close(fd);
457 free_names(names);
458 free_names(names_after);
459 return err;
462 /* -1 = error
463 0 = up to date
464 1 = changed. */
465 static int stack_uptodate(struct reftable_stack *st)
467 char **names = NULL;
468 int err;
469 int i = 0;
472 * When we have cached stat information available then we use it to
473 * verify whether the file has been rewritten.
475 * Note that we explicitly do not want to use `stat_validity_check()`
476 * and friends here because they may end up not comparing the `st_dev`
477 * and `st_ino` fields. These functions thus cannot guarantee that we
478 * indeed still have the same file.
480 if (st->list_fd >= 0) {
481 struct stat list_st;
483 if (stat(st->list_file, &list_st) < 0) {
485 * It's fine for "tables.list" to not exist. In that
486 * case, we have to refresh when the loaded stack has
487 * any readers.
489 if (errno == ENOENT)
490 return !!st->readers_len;
491 return REFTABLE_IO_ERROR;
495 * When "tables.list" refers to the same file we can assume
496 * that it didn't change. This is because we always use
497 * rename(3P) to update the file and never write to it
498 * directly.
500 if (st->list_st.st_dev == list_st.st_dev &&
501 st->list_st.st_ino == list_st.st_ino)
502 return 0;
505 err = read_lines(st->list_file, &names);
506 if (err < 0)
507 return err;
509 for (i = 0; i < st->readers_len; i++) {
510 if (!names[i]) {
511 err = 1;
512 goto done;
515 if (strcmp(st->readers[i]->name, names[i])) {
516 err = 1;
517 goto done;
521 if (names[st->merged->stack_len]) {
522 err = 1;
523 goto done;
526 done:
527 free_names(names);
528 return err;
531 int reftable_stack_reload(struct reftable_stack *st)
533 int err = stack_uptodate(st);
534 if (err > 0)
535 return reftable_stack_reload_maybe_reuse(st, 1);
536 return err;
539 int reftable_stack_add(struct reftable_stack *st,
540 int (*write)(struct reftable_writer *wr, void *arg),
541 void *arg)
543 int err = stack_try_add(st, write, arg);
544 if (err < 0) {
545 if (err == REFTABLE_OUTDATED_ERROR) {
546 /* Ignore error return, we want to propagate
547 REFTABLE_OUTDATED_ERROR.
549 reftable_stack_reload(st);
551 return err;
554 return 0;
557 static void format_name(struct strbuf *dest, uint64_t min, uint64_t max)
559 char buf[100];
560 uint32_t rnd = (uint32_t)git_rand();
561 snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x",
562 min, max, rnd);
563 strbuf_reset(dest);
564 strbuf_addstr(dest, buf);
567 struct reftable_addition {
568 struct tempfile *lock_file;
569 struct reftable_stack *stack;
571 char **new_tables;
572 size_t new_tables_len, new_tables_cap;
573 uint64_t next_update_index;
576 #define REFTABLE_ADDITION_INIT {0}
578 static int reftable_stack_init_addition(struct reftable_addition *add,
579 struct reftable_stack *st)
581 struct strbuf lock_file_name = STRBUF_INIT;
582 int err = 0;
583 add->stack = st;
585 strbuf_addf(&lock_file_name, "%s.lock", st->list_file);
587 add->lock_file = create_tempfile(lock_file_name.buf);
588 if (!add->lock_file) {
589 if (errno == EEXIST) {
590 err = REFTABLE_LOCK_ERROR;
591 } else {
592 err = REFTABLE_IO_ERROR;
594 goto done;
596 if (st->opts.default_permissions) {
597 if (chmod(add->lock_file->filename.buf, st->opts.default_permissions) < 0) {
598 err = REFTABLE_IO_ERROR;
599 goto done;
603 err = stack_uptodate(st);
604 if (err < 0)
605 goto done;
606 if (err > 0) {
607 err = REFTABLE_OUTDATED_ERROR;
608 goto done;
611 add->next_update_index = reftable_stack_next_update_index(st);
612 done:
613 if (err) {
614 reftable_addition_close(add);
616 strbuf_release(&lock_file_name);
617 return err;
620 static void reftable_addition_close(struct reftable_addition *add)
622 struct strbuf nm = STRBUF_INIT;
623 size_t i;
625 for (i = 0; i < add->new_tables_len; i++) {
626 stack_filename(&nm, add->stack, add->new_tables[i]);
627 unlink(nm.buf);
628 reftable_free(add->new_tables[i]);
629 add->new_tables[i] = NULL;
631 reftable_free(add->new_tables);
632 add->new_tables = NULL;
633 add->new_tables_len = 0;
634 add->new_tables_cap = 0;
636 delete_tempfile(&add->lock_file);
637 strbuf_release(&nm);
640 void reftable_addition_destroy(struct reftable_addition *add)
642 if (!add) {
643 return;
645 reftable_addition_close(add);
646 reftable_free(add);
649 int reftable_addition_commit(struct reftable_addition *add)
651 struct strbuf table_list = STRBUF_INIT;
652 int lock_file_fd = get_tempfile_fd(add->lock_file);
653 int err = 0;
654 size_t i;
656 if (add->new_tables_len == 0)
657 goto done;
659 for (i = 0; i < add->stack->merged->stack_len; i++) {
660 strbuf_addstr(&table_list, add->stack->readers[i]->name);
661 strbuf_addstr(&table_list, "\n");
663 for (i = 0; i < add->new_tables_len; i++) {
664 strbuf_addstr(&table_list, add->new_tables[i]);
665 strbuf_addstr(&table_list, "\n");
668 err = write_in_full(lock_file_fd, table_list.buf, table_list.len);
669 strbuf_release(&table_list);
670 if (err < 0) {
671 err = REFTABLE_IO_ERROR;
672 goto done;
675 fsync_component_or_die(FSYNC_COMPONENT_REFERENCE, lock_file_fd,
676 get_tempfile_path(add->lock_file));
678 err = rename_tempfile(&add->lock_file, add->stack->list_file);
679 if (err < 0) {
680 err = REFTABLE_IO_ERROR;
681 goto done;
684 /* success, no more state to clean up. */
685 for (i = 0; i < add->new_tables_len; i++)
686 reftable_free(add->new_tables[i]);
687 reftable_free(add->new_tables);
688 add->new_tables = NULL;
689 add->new_tables_len = 0;
690 add->new_tables_cap = 0;
692 err = reftable_stack_reload_maybe_reuse(add->stack, 1);
693 if (err)
694 goto done;
696 if (!add->stack->opts.disable_auto_compact) {
698 * Auto-compact the stack to keep the number of tables in
699 * control. It is possible that a concurrent writer is already
700 * trying to compact parts of the stack, which would lead to a
701 * `REFTABLE_LOCK_ERROR` because parts of the stack are locked
702 * already. This is a benign error though, so we ignore it.
704 err = reftable_stack_auto_compact(add->stack);
705 if (err < 0 && err != REFTABLE_LOCK_ERROR)
706 goto done;
707 err = 0;
710 done:
711 reftable_addition_close(add);
712 return err;
715 int reftable_stack_new_addition(struct reftable_addition **dest,
716 struct reftable_stack *st)
718 int err = 0;
719 struct reftable_addition empty = REFTABLE_ADDITION_INIT;
720 REFTABLE_CALLOC_ARRAY(*dest, 1);
721 **dest = empty;
722 err = reftable_stack_init_addition(*dest, st);
723 if (err) {
724 reftable_free(*dest);
725 *dest = NULL;
727 return err;
730 static int stack_try_add(struct reftable_stack *st,
731 int (*write_table)(struct reftable_writer *wr,
732 void *arg),
733 void *arg)
735 struct reftable_addition add = REFTABLE_ADDITION_INIT;
736 int err = reftable_stack_init_addition(&add, st);
737 if (err < 0)
738 goto done;
740 err = reftable_addition_add(&add, write_table, arg);
741 if (err < 0)
742 goto done;
744 err = reftable_addition_commit(&add);
745 done:
746 reftable_addition_close(&add);
747 return err;
750 int reftable_addition_add(struct reftable_addition *add,
751 int (*write_table)(struct reftable_writer *wr,
752 void *arg),
753 void *arg)
755 struct strbuf temp_tab_file_name = STRBUF_INIT;
756 struct strbuf tab_file_name = STRBUF_INIT;
757 struct strbuf next_name = STRBUF_INIT;
758 struct reftable_writer *wr = NULL;
759 struct tempfile *tab_file = NULL;
760 int err = 0;
761 int tab_fd;
763 strbuf_reset(&next_name);
764 format_name(&next_name, add->next_update_index, add->next_update_index);
766 stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
767 strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
769 tab_file = mks_tempfile(temp_tab_file_name.buf);
770 if (!tab_file) {
771 err = REFTABLE_IO_ERROR;
772 goto done;
774 if (add->stack->opts.default_permissions) {
775 if (chmod(get_tempfile_path(tab_file),
776 add->stack->opts.default_permissions)) {
777 err = REFTABLE_IO_ERROR;
778 goto done;
781 tab_fd = get_tempfile_fd(tab_file);
783 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd,
784 &add->stack->opts);
785 err = write_table(wr, arg);
786 if (err < 0)
787 goto done;
789 err = reftable_writer_close(wr);
790 if (err == REFTABLE_EMPTY_TABLE_ERROR) {
791 err = 0;
792 goto done;
794 if (err < 0)
795 goto done;
797 err = close_tempfile_gently(tab_file);
798 if (err < 0) {
799 err = REFTABLE_IO_ERROR;
800 goto done;
803 if (wr->min_update_index < add->next_update_index) {
804 err = REFTABLE_API_ERROR;
805 goto done;
808 format_name(&next_name, wr->min_update_index, wr->max_update_index);
809 strbuf_addstr(&next_name, ".ref");
810 stack_filename(&tab_file_name, add->stack, next_name.buf);
813 On windows, this relies on rand() picking a unique destination name.
814 Maybe we should do retry loop as well?
816 err = rename_tempfile(&tab_file, tab_file_name.buf);
817 if (err < 0) {
818 err = REFTABLE_IO_ERROR;
819 goto done;
822 REFTABLE_ALLOC_GROW(add->new_tables, add->new_tables_len + 1,
823 add->new_tables_cap);
824 add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL);
825 done:
826 delete_tempfile(&tab_file);
827 strbuf_release(&temp_tab_file_name);
828 strbuf_release(&tab_file_name);
829 strbuf_release(&next_name);
830 reftable_writer_free(wr);
831 return err;
834 uint64_t reftable_stack_next_update_index(struct reftable_stack *st)
836 int sz = st->merged->stack_len;
837 if (sz > 0)
838 return reftable_reader_max_update_index(st->readers[sz - 1]) +
840 return 1;
843 static int stack_compact_locked(struct reftable_stack *st,
844 size_t first, size_t last,
845 struct reftable_log_expiry_config *config,
846 struct tempfile **tab_file_out)
848 struct strbuf next_name = STRBUF_INIT;
849 struct strbuf tab_file_path = STRBUF_INIT;
850 struct reftable_writer *wr = NULL;
851 struct tempfile *tab_file;
852 int tab_fd, err = 0;
854 format_name(&next_name,
855 reftable_reader_min_update_index(st->readers[first]),
856 reftable_reader_max_update_index(st->readers[last]));
857 stack_filename(&tab_file_path, st, next_name.buf);
858 strbuf_addstr(&tab_file_path, ".temp.XXXXXX");
860 tab_file = mks_tempfile(tab_file_path.buf);
861 if (!tab_file) {
862 err = REFTABLE_IO_ERROR;
863 goto done;
865 tab_fd = get_tempfile_fd(tab_file);
867 if (st->opts.default_permissions &&
868 chmod(get_tempfile_path(tab_file), st->opts.default_permissions) < 0) {
869 err = REFTABLE_IO_ERROR;
870 goto done;
873 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush,
874 &tab_fd, &st->opts);
875 err = stack_write_compact(st, wr, first, last, config);
876 if (err < 0)
877 goto done;
879 err = reftable_writer_close(wr);
880 if (err < 0)
881 goto done;
883 err = close_tempfile_gently(tab_file);
884 if (err < 0)
885 goto done;
887 *tab_file_out = tab_file;
888 tab_file = NULL;
890 done:
891 delete_tempfile(&tab_file);
892 reftable_writer_free(wr);
893 strbuf_release(&next_name);
894 strbuf_release(&tab_file_path);
895 return err;
898 static int stack_write_compact(struct reftable_stack *st,
899 struct reftable_writer *wr,
900 size_t first, size_t last,
901 struct reftable_log_expiry_config *config)
903 size_t subtabs_len = last - first + 1;
904 struct reftable_table *subtabs = reftable_calloc(
905 last - first + 1, sizeof(*subtabs));
906 struct reftable_merged_table *mt = NULL;
907 struct reftable_iterator it = { NULL };
908 struct reftable_ref_record ref = { NULL };
909 struct reftable_log_record log = { NULL };
910 uint64_t entries = 0;
911 int err = 0;
913 for (size_t i = first, j = 0; i <= last; i++) {
914 struct reftable_reader *t = st->readers[i];
915 reftable_table_from_reader(&subtabs[j++], t);
916 st->stats.bytes += t->size;
918 reftable_writer_set_limits(wr, st->readers[first]->min_update_index,
919 st->readers[last]->max_update_index);
921 err = reftable_new_merged_table(&mt, subtabs, subtabs_len,
922 st->opts.hash_id);
923 if (err < 0) {
924 reftable_free(subtabs);
925 goto done;
928 merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
929 err = reftable_iterator_seek_ref(&it, "");
930 if (err < 0)
931 goto done;
933 while (1) {
934 err = reftable_iterator_next_ref(&it, &ref);
935 if (err > 0) {
936 err = 0;
937 break;
939 if (err < 0)
940 goto done;
942 if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
943 continue;
946 err = reftable_writer_add_ref(wr, &ref);
947 if (err < 0)
948 goto done;
949 entries++;
951 reftable_iterator_destroy(&it);
953 merged_table_init_iter(mt, &it, BLOCK_TYPE_LOG);
954 err = reftable_iterator_seek_log(&it, "");
955 if (err < 0)
956 goto done;
958 while (1) {
959 err = reftable_iterator_next_log(&it, &log);
960 if (err > 0) {
961 err = 0;
962 break;
964 if (err < 0)
965 goto done;
966 if (first == 0 && reftable_log_record_is_deletion(&log)) {
967 continue;
970 if (config && config->min_update_index > 0 &&
971 log.update_index < config->min_update_index) {
972 continue;
975 if (config && config->time > 0 &&
976 log.value.update.time < config->time) {
977 continue;
980 err = reftable_writer_add_log(wr, &log);
981 if (err < 0)
982 goto done;
983 entries++;
986 done:
987 reftable_iterator_destroy(&it);
988 if (mt)
989 reftable_merged_table_free(mt);
990 reftable_ref_record_release(&ref);
991 reftable_log_record_release(&log);
992 st->stats.entries_written += entries;
993 return err;
997 * Compact all tables in the range `[first, last)` into a single new table.
999 * This function returns `0` on success or a code `< 0` on failure. When the
1000 * stack or any of the tables in the specified range are already locked then
1001 * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that
1002 * callers can either ignore, or they may choose to retry compaction after some
1003 * amount of time.
1005 static int stack_compact_range(struct reftable_stack *st,
1006 size_t first, size_t last,
1007 struct reftable_log_expiry_config *expiry)
1009 struct strbuf tables_list_buf = STRBUF_INIT;
1010 struct strbuf new_table_name = STRBUF_INIT;
1011 struct strbuf new_table_path = STRBUF_INIT;
1012 struct strbuf table_name = STRBUF_INIT;
1013 struct lock_file tables_list_lock = LOCK_INIT;
1014 struct lock_file *table_locks = NULL;
1015 struct tempfile *new_table = NULL;
1016 int is_empty_table = 0, err = 0;
1017 size_t i;
1019 if (first > last || (!expiry && first == last)) {
1020 err = 0;
1021 goto done;
1024 st->stats.attempts++;
1027 * Hold the lock so that we can read "tables.list" and lock all tables
1028 * which are part of the user-specified range.
1030 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1031 LOCK_NO_DEREF);
1032 if (err < 0) {
1033 if (errno == EEXIST)
1034 err = REFTABLE_LOCK_ERROR;
1035 else
1036 err = REFTABLE_IO_ERROR;
1037 goto done;
1040 err = stack_uptodate(st);
1041 if (err)
1042 goto done;
1045 * Lock all tables in the user-provided range. This is the slice of our
1046 * stack which we'll compact.
1048 REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1);
1049 for (i = first; i <= last; i++) {
1050 stack_filename(&table_name, st, reader_name(st->readers[i]));
1052 err = hold_lock_file_for_update(&table_locks[i - first],
1053 table_name.buf, LOCK_NO_DEREF);
1054 if (err < 0) {
1055 if (errno == EEXIST)
1056 err = REFTABLE_LOCK_ERROR;
1057 else
1058 err = REFTABLE_IO_ERROR;
1059 goto done;
1063 * We need to close the lockfiles as we might otherwise easily
1064 * run into file descriptor exhaustion when we compress a lot
1065 * of tables.
1067 err = close_lock_file_gently(&table_locks[i - first]);
1068 if (err < 0) {
1069 err = REFTABLE_IO_ERROR;
1070 goto done;
1075 * We have locked all tables in our range and can thus release the
1076 * "tables.list" lock while compacting the locked tables. This allows
1077 * concurrent updates to the stack to proceed.
1079 err = rollback_lock_file(&tables_list_lock);
1080 if (err < 0) {
1081 err = REFTABLE_IO_ERROR;
1082 goto done;
1086 * Compact the now-locked tables into a new table. Note that compacting
1087 * these tables may end up with an empty new table in case tombstones
1088 * end up cancelling out all refs in that range.
1090 err = stack_compact_locked(st, first, last, expiry, &new_table);
1091 if (err < 0) {
1092 if (err != REFTABLE_EMPTY_TABLE_ERROR)
1093 goto done;
1094 is_empty_table = 1;
1098 * Now that we have written the new, compacted table we need to re-lock
1099 * "tables.list". We'll then replace the compacted range of tables with
1100 * the new table.
1102 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1103 LOCK_NO_DEREF);
1104 if (err < 0) {
1105 if (errno == EEXIST)
1106 err = REFTABLE_LOCK_ERROR;
1107 else
1108 err = REFTABLE_IO_ERROR;
1109 goto done;
1112 if (st->opts.default_permissions) {
1113 if (chmod(get_lock_file_path(&tables_list_lock),
1114 st->opts.default_permissions) < 0) {
1115 err = REFTABLE_IO_ERROR;
1116 goto done;
1121 * If the resulting compacted table is not empty, then we need to move
1122 * it into place now.
1124 if (!is_empty_table) {
1125 format_name(&new_table_name, st->readers[first]->min_update_index,
1126 st->readers[last]->max_update_index);
1127 strbuf_addstr(&new_table_name, ".ref");
1128 stack_filename(&new_table_path, st, new_table_name.buf);
1130 err = rename_tempfile(&new_table, new_table_path.buf);
1131 if (err < 0) {
1132 err = REFTABLE_IO_ERROR;
1133 goto done;
1138 * Write the new "tables.list" contents with the compacted table we
1139 * have just written. In case the compacted table became empty we
1140 * simply skip writing it.
1142 for (i = 0; i < first; i++)
1143 strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1144 if (!is_empty_table)
1145 strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf);
1146 for (i = last + 1; i < st->merged->stack_len; i++)
1147 strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1149 err = write_in_full(get_lock_file_fd(&tables_list_lock),
1150 tables_list_buf.buf, tables_list_buf.len);
1151 if (err < 0) {
1152 err = REFTABLE_IO_ERROR;
1153 unlink(new_table_path.buf);
1154 goto done;
1157 err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock));
1158 if (err < 0) {
1159 err = REFTABLE_IO_ERROR;
1160 unlink(new_table_path.buf);
1161 goto done;
1164 err = commit_lock_file(&tables_list_lock);
1165 if (err < 0) {
1166 err = REFTABLE_IO_ERROR;
1167 unlink(new_table_path.buf);
1168 goto done;
1172 * Reload the stack before deleting the compacted tables. We can only
1173 * delete the files after we closed them on Windows, so this needs to
1174 * happen first.
1176 err = reftable_stack_reload_maybe_reuse(st, first < last);
1177 if (err < 0)
1178 goto done;
1181 * Delete the old tables. They may still be in use by concurrent
1182 * readers, so it is expected that unlinking tables may fail.
1184 for (i = first; i <= last; i++) {
1185 struct lock_file *table_lock = &table_locks[i - first];
1186 char *table_path = get_locked_file_path(table_lock);
1187 unlink(table_path);
1188 free(table_path);
1191 done:
1192 rollback_lock_file(&tables_list_lock);
1193 for (i = first; table_locks && i <= last; i++)
1194 rollback_lock_file(&table_locks[i - first]);
1195 reftable_free(table_locks);
1197 delete_tempfile(&new_table);
1198 strbuf_release(&new_table_name);
1199 strbuf_release(&new_table_path);
1201 strbuf_release(&tables_list_buf);
1202 strbuf_release(&table_name);
1203 return err;
1206 int reftable_stack_compact_all(struct reftable_stack *st,
1207 struct reftable_log_expiry_config *config)
1209 return stack_compact_range(st, 0, st->merged->stack_len ?
1210 st->merged->stack_len - 1 : 0, config);
1213 static int stack_compact_range_stats(struct reftable_stack *st,
1214 size_t first, size_t last,
1215 struct reftable_log_expiry_config *config)
1217 int err = stack_compact_range(st, first, last, config);
1218 if (err == REFTABLE_LOCK_ERROR)
1219 st->stats.failures++;
1220 return err;
1223 static int segment_size(struct segment *s)
1225 return s->end - s->start;
1228 struct segment suggest_compaction_segment(uint64_t *sizes, size_t n,
1229 uint8_t factor)
1231 struct segment seg = { 0 };
1232 uint64_t bytes;
1233 size_t i;
1235 if (!factor)
1236 factor = DEFAULT_GEOMETRIC_FACTOR;
1239 * If there are no tables or only a single one then we don't have to
1240 * compact anything. The sequence is geometric by definition already.
1242 if (n <= 1)
1243 return seg;
1246 * Find the ending table of the compaction segment needed to restore the
1247 * geometric sequence. Note that the segment end is exclusive.
1249 * To do so, we iterate backwards starting from the most recent table
1250 * until a valid segment end is found. If the preceding table is smaller
1251 * than the current table multiplied by the geometric factor (2), the
1252 * compaction segment end has been identified.
1254 * Tables after the ending point are not added to the byte count because
1255 * they are already valid members of the geometric sequence. Due to the
1256 * properties of a geometric sequence, it is not possible for the sum of
1257 * these tables to exceed the value of the ending point table.
1259 * Example table size sequence requiring no compaction:
1260 * 64, 32, 16, 8, 4, 2, 1
1262 * Example table size sequence where compaction segment end is set to
1263 * the last table. Since the segment end is exclusive, the last table is
1264 * excluded during subsequent compaction and the table with size 3 is
1265 * the final table included:
1266 * 64, 32, 16, 8, 4, 3, 1
1268 for (i = n - 1; i > 0; i--) {
1269 if (sizes[i - 1] < sizes[i] * factor) {
1270 seg.end = i + 1;
1271 bytes = sizes[i];
1272 break;
1277 * Find the starting table of the compaction segment by iterating
1278 * through the remaining tables and keeping track of the accumulated
1279 * size of all tables seen from the segment end table. The previous
1280 * table is compared to the accumulated size because the tables from the
1281 * segment end are merged backwards recursively.
1283 * Note that we keep iterating even after we have found the first
1284 * starting point. This is because there may be tables in the stack
1285 * preceding that first starting point which violate the geometric
1286 * sequence.
1288 * Example compaction segment start set to table with size 32:
1289 * 128, 32, 16, 8, 4, 3, 1
1291 for (; i > 0; i--) {
1292 uint64_t curr = bytes;
1293 bytes += sizes[i - 1];
1295 if (sizes[i - 1] < curr * factor) {
1296 seg.start = i - 1;
1297 seg.bytes = bytes;
1301 return seg;
1304 static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
1306 uint64_t *sizes =
1307 reftable_calloc(st->merged->stack_len, sizeof(*sizes));
1308 int version = (st->opts.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2;
1309 int overhead = header_size(version) - 1;
1310 int i = 0;
1311 for (i = 0; i < st->merged->stack_len; i++) {
1312 sizes[i] = st->readers[i]->size - overhead;
1314 return sizes;
1317 int reftable_stack_auto_compact(struct reftable_stack *st)
1319 uint64_t *sizes = stack_table_sizes_for_compaction(st);
1320 struct segment seg =
1321 suggest_compaction_segment(sizes, st->merged->stack_len,
1322 st->opts.auto_compaction_factor);
1323 reftable_free(sizes);
1324 if (segment_size(&seg) > 0)
1325 return stack_compact_range_stats(st, seg.start, seg.end - 1,
1326 NULL);
1328 return 0;
1331 struct reftable_compaction_stats *
1332 reftable_stack_compaction_stats(struct reftable_stack *st)
1334 return &st->stats;
1337 int reftable_stack_read_ref(struct reftable_stack *st, const char *refname,
1338 struct reftable_ref_record *ref)
1340 struct reftable_table tab = { NULL };
1341 reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st));
1342 return reftable_table_read_ref(&tab, refname, ref);
1345 int reftable_stack_read_log(struct reftable_stack *st, const char *refname,
1346 struct reftable_log_record *log)
1348 struct reftable_iterator it = {0};
1349 int err;
1351 reftable_stack_init_log_iterator(st, &it);
1352 err = reftable_iterator_seek_log(&it, refname);
1353 if (err)
1354 goto done;
1356 err = reftable_iterator_next_log(&it, log);
1357 if (err)
1358 goto done;
1360 if (strcmp(log->refname, refname) ||
1361 reftable_log_record_is_deletion(log)) {
1362 err = 1;
1363 goto done;
1366 done:
1367 if (err) {
1368 reftable_log_record_release(log);
1370 reftable_iterator_destroy(&it);
1371 return err;
1374 static int is_table_name(const char *s)
1376 const char *dot = strrchr(s, '.');
1377 return dot && !strcmp(dot, ".ref");
1380 static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
1381 const char *name)
1383 int err = 0;
1384 uint64_t update_idx = 0;
1385 struct reftable_block_source src = { NULL };
1386 struct reftable_reader *rd = NULL;
1387 struct strbuf table_path = STRBUF_INIT;
1388 stack_filename(&table_path, st, name);
1390 err = reftable_block_source_from_file(&src, table_path.buf);
1391 if (err < 0)
1392 goto done;
1394 err = reftable_new_reader(&rd, &src, name);
1395 if (err < 0)
1396 goto done;
1398 update_idx = reftable_reader_max_update_index(rd);
1399 reftable_reader_free(rd);
1401 if (update_idx <= max) {
1402 unlink(table_path.buf);
1404 done:
1405 strbuf_release(&table_path);
1408 static int reftable_stack_clean_locked(struct reftable_stack *st)
1410 uint64_t max = reftable_merged_table_max_update_index(
1411 reftable_stack_merged_table(st));
1412 DIR *dir = opendir(st->reftable_dir);
1413 struct dirent *d = NULL;
1414 if (!dir) {
1415 return REFTABLE_IO_ERROR;
1418 while ((d = readdir(dir))) {
1419 int i = 0;
1420 int found = 0;
1421 if (!is_table_name(d->d_name))
1422 continue;
1424 for (i = 0; !found && i < st->readers_len; i++) {
1425 found = !strcmp(reader_name(st->readers[i]), d->d_name);
1427 if (found)
1428 continue;
1430 remove_maybe_stale_table(st, max, d->d_name);
1433 closedir(dir);
1434 return 0;
1437 int reftable_stack_clean(struct reftable_stack *st)
1439 struct reftable_addition *add = NULL;
1440 int err = reftable_stack_new_addition(&add, st);
1441 if (err < 0) {
1442 goto done;
1445 err = reftable_stack_reload(st);
1446 if (err < 0) {
1447 goto done;
1450 err = reftable_stack_clean_locked(st);
1452 done:
1453 reftable_addition_destroy(add);
1454 return err;
1457 int reftable_stack_print_directory(const char *stackdir, uint32_t hash_id)
1459 struct reftable_stack *stack = NULL;
1460 struct reftable_write_options opts = { .hash_id = hash_id };
1461 struct reftable_merged_table *merged = NULL;
1462 struct reftable_table table = { NULL };
1464 int err = reftable_new_stack(&stack, stackdir, &opts);
1465 if (err < 0)
1466 goto done;
1468 merged = reftable_stack_merged_table(stack);
1469 reftable_table_from_merged_table(&table, merged);
1470 err = reftable_table_print(&table);
1471 done:
1472 if (stack)
1473 reftable_stack_destroy(stack);
1474 return err;