Sync with 'maint'
[git/gitster.git] / reftable / stack.c
blob737591125e74965c3cbfb60884b1b5efd4359b13
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "stack.h"
11 #include "../write-or-die.h"
12 #include "system.h"
13 #include "constants.h"
14 #include "merged.h"
15 #include "reader.h"
16 #include "reftable-error.h"
17 #include "reftable-generic.h"
18 #include "reftable-record.h"
19 #include "reftable-merged.h"
20 #include "writer.h"
21 #include "tempfile.h"
23 static int stack_try_add(struct reftable_stack *st,
24 int (*write_table)(struct reftable_writer *wr,
25 void *arg),
26 void *arg);
27 static int stack_write_compact(struct reftable_stack *st,
28 struct reftable_writer *wr,
29 size_t first, size_t last,
30 struct reftable_log_expiry_config *config);
31 static void reftable_addition_close(struct reftable_addition *add);
32 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
33 int reuse_open);
35 static void stack_filename(struct strbuf *dest, struct reftable_stack *st,
36 const char *name)
38 strbuf_reset(dest);
39 strbuf_addstr(dest, st->reftable_dir);
40 strbuf_addstr(dest, "/");
41 strbuf_addstr(dest, name);
44 static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz)
46 int *fdp = (int *)arg;
47 return write_in_full(*fdp, data, sz);
50 static int reftable_fd_flush(void *arg)
52 int *fdp = (int *)arg;
54 return fsync_component(FSYNC_COMPONENT_REFERENCE, *fdp);
57 int reftable_new_stack(struct reftable_stack **dest, const char *dir,
58 const struct reftable_write_options *_opts)
60 struct reftable_stack *p = reftable_calloc(1, sizeof(*p));
61 struct strbuf list_file_name = STRBUF_INIT;
62 struct reftable_write_options opts = {0};
63 int err = 0;
65 if (_opts)
66 opts = *_opts;
67 if (opts.hash_id == 0)
68 opts.hash_id = GIT_SHA1_FORMAT_ID;
70 *dest = NULL;
72 strbuf_reset(&list_file_name);
73 strbuf_addstr(&list_file_name, dir);
74 strbuf_addstr(&list_file_name, "/tables.list");
76 p->list_file = strbuf_detach(&list_file_name, NULL);
77 p->list_fd = -1;
78 p->reftable_dir = xstrdup(dir);
79 p->opts = opts;
81 err = reftable_stack_reload_maybe_reuse(p, 1);
82 if (err < 0) {
83 reftable_stack_destroy(p);
84 } else {
85 *dest = p;
87 return err;
90 static int fd_read_lines(int fd, char ***namesp)
92 off_t size = lseek(fd, 0, SEEK_END);
93 char *buf = NULL;
94 int err = 0;
95 if (size < 0) {
96 err = REFTABLE_IO_ERROR;
97 goto done;
99 err = lseek(fd, 0, SEEK_SET);
100 if (err < 0) {
101 err = REFTABLE_IO_ERROR;
102 goto done;
105 REFTABLE_ALLOC_ARRAY(buf, size + 1);
106 if (read_in_full(fd, buf, size) != size) {
107 err = REFTABLE_IO_ERROR;
108 goto done;
110 buf[size] = 0;
112 parse_names(buf, size, namesp);
114 done:
115 reftable_free(buf);
116 return err;
119 int read_lines(const char *filename, char ***namesp)
121 int fd = open(filename, O_RDONLY);
122 int err = 0;
123 if (fd < 0) {
124 if (errno == ENOENT) {
125 REFTABLE_CALLOC_ARRAY(*namesp, 1);
126 return 0;
129 return REFTABLE_IO_ERROR;
131 err = fd_read_lines(fd, namesp);
132 close(fd);
133 return err;
136 void reftable_stack_init_ref_iterator(struct reftable_stack *st,
137 struct reftable_iterator *it)
139 merged_table_init_iter(reftable_stack_merged_table(st),
140 it, BLOCK_TYPE_REF);
143 void reftable_stack_init_log_iterator(struct reftable_stack *st,
144 struct reftable_iterator *it)
146 merged_table_init_iter(reftable_stack_merged_table(st),
147 it, BLOCK_TYPE_LOG);
150 struct reftable_merged_table *
151 reftable_stack_merged_table(struct reftable_stack *st)
153 return st->merged;
156 static int has_name(char **names, const char *name)
158 while (*names) {
159 if (!strcmp(*names, name))
160 return 1;
161 names++;
163 return 0;
166 /* Close and free the stack */
167 void reftable_stack_destroy(struct reftable_stack *st)
169 char **names = NULL;
170 int err = 0;
171 if (st->merged) {
172 reftable_merged_table_free(st->merged);
173 st->merged = NULL;
176 err = read_lines(st->list_file, &names);
177 if (err < 0) {
178 FREE_AND_NULL(names);
181 if (st->readers) {
182 int i = 0;
183 struct strbuf filename = STRBUF_INIT;
184 for (i = 0; i < st->readers_len; i++) {
185 const char *name = reader_name(st->readers[i]);
186 strbuf_reset(&filename);
187 if (names && !has_name(names, name)) {
188 stack_filename(&filename, st, name);
190 reftable_reader_free(st->readers[i]);
192 if (filename.len) {
193 /* On Windows, can only unlink after closing. */
194 unlink(filename.buf);
197 strbuf_release(&filename);
198 st->readers_len = 0;
199 FREE_AND_NULL(st->readers);
202 if (st->list_fd >= 0) {
203 close(st->list_fd);
204 st->list_fd = -1;
207 FREE_AND_NULL(st->list_file);
208 FREE_AND_NULL(st->reftable_dir);
209 reftable_free(st);
210 free_names(names);
213 static struct reftable_reader **stack_copy_readers(struct reftable_stack *st,
214 int cur_len)
216 struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur));
217 int i = 0;
218 for (i = 0; i < cur_len; i++) {
219 cur[i] = st->readers[i];
221 return cur;
224 static int reftable_stack_reload_once(struct reftable_stack *st,
225 const char **names,
226 int reuse_open)
228 size_t cur_len = !st->merged ? 0 : st->merged->stack_len;
229 struct reftable_reader **cur = stack_copy_readers(st, cur_len);
230 size_t names_len = names_length(names);
231 struct reftable_reader **new_readers =
232 reftable_calloc(names_len, sizeof(*new_readers));
233 struct reftable_table *new_tables =
234 reftable_calloc(names_len, sizeof(*new_tables));
235 size_t new_readers_len = 0;
236 struct reftable_merged_table *new_merged = NULL;
237 struct strbuf table_path = STRBUF_INIT;
238 int err = 0;
239 size_t i;
241 while (*names) {
242 struct reftable_reader *rd = NULL;
243 const char *name = *names++;
245 /* this is linear; we assume compaction keeps the number of
246 tables under control so this is not quadratic. */
247 for (i = 0; reuse_open && i < cur_len; i++) {
248 if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
249 rd = cur[i];
250 cur[i] = NULL;
251 break;
255 if (!rd) {
256 struct reftable_block_source src = { NULL };
257 stack_filename(&table_path, st, name);
259 err = reftable_block_source_from_file(&src,
260 table_path.buf);
261 if (err < 0)
262 goto done;
264 err = reftable_new_reader(&rd, &src, name);
265 if (err < 0)
266 goto done;
269 new_readers[new_readers_len] = rd;
270 reftable_table_from_reader(&new_tables[new_readers_len], rd);
271 new_readers_len++;
274 /* success! */
275 err = reftable_new_merged_table(&new_merged, new_tables,
276 new_readers_len, st->opts.hash_id);
277 if (err < 0)
278 goto done;
280 new_tables = NULL;
281 st->readers_len = new_readers_len;
282 if (st->merged)
283 reftable_merged_table_free(st->merged);
284 if (st->readers) {
285 reftable_free(st->readers);
287 st->readers = new_readers;
288 new_readers = NULL;
289 new_readers_len = 0;
291 new_merged->suppress_deletions = 1;
292 st->merged = new_merged;
293 for (i = 0; i < cur_len; i++) {
294 if (cur[i]) {
295 const char *name = reader_name(cur[i]);
296 stack_filename(&table_path, st, name);
298 reader_close(cur[i]);
299 reftable_reader_free(cur[i]);
301 /* On Windows, can only unlink after closing. */
302 unlink(table_path.buf);
306 done:
307 for (i = 0; i < new_readers_len; i++) {
308 reader_close(new_readers[i]);
309 reftable_reader_free(new_readers[i]);
311 reftable_free(new_readers);
312 reftable_free(new_tables);
313 reftable_free(cur);
314 strbuf_release(&table_path);
315 return err;
318 /* return negative if a before b. */
319 static int tv_cmp(struct timeval *a, struct timeval *b)
321 time_t diff = a->tv_sec - b->tv_sec;
322 int udiff = a->tv_usec - b->tv_usec;
324 if (diff != 0)
325 return diff;
327 return udiff;
330 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
331 int reuse_open)
333 char **names = NULL, **names_after = NULL;
334 struct timeval deadline;
335 int64_t delay = 0;
336 int tries = 0, err;
337 int fd = -1;
339 err = gettimeofday(&deadline, NULL);
340 if (err < 0)
341 goto out;
342 deadline.tv_sec += 3;
344 while (1) {
345 struct timeval now;
347 err = gettimeofday(&now, NULL);
348 if (err < 0)
349 goto out;
352 * Only look at deadlines after the first few times. This
353 * simplifies debugging in GDB.
355 tries++;
356 if (tries > 3 && tv_cmp(&now, &deadline) >= 0)
357 goto out;
359 fd = open(st->list_file, O_RDONLY);
360 if (fd < 0) {
361 if (errno != ENOENT) {
362 err = REFTABLE_IO_ERROR;
363 goto out;
366 REFTABLE_CALLOC_ARRAY(names, 1);
367 } else {
368 err = fd_read_lines(fd, &names);
369 if (err < 0)
370 goto out;
373 err = reftable_stack_reload_once(st, (const char **) names, reuse_open);
374 if (!err)
375 break;
376 if (err != REFTABLE_NOT_EXIST_ERROR)
377 goto out;
380 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
381 * writer. Check if there was one by checking if the name list
382 * changed.
384 err = read_lines(st->list_file, &names_after);
385 if (err < 0)
386 goto out;
387 if (names_equal((const char **) names_after,
388 (const char **) names)) {
389 err = REFTABLE_NOT_EXIST_ERROR;
390 goto out;
393 free_names(names);
394 names = NULL;
395 free_names(names_after);
396 names_after = NULL;
397 close(fd);
398 fd = -1;
400 delay = delay + (delay * rand()) / RAND_MAX + 1;
401 sleep_millisec(delay);
404 out:
406 * Invalidate the stat cache. It is sufficient to only close the file
407 * descriptor and keep the cached stat info because we never use the
408 * latter when the former is negative.
410 if (st->list_fd >= 0) {
411 close(st->list_fd);
412 st->list_fd = -1;
416 * Cache stat information in case it provides a useful signal to us.
417 * According to POSIX, "The st_ino and st_dev fields taken together
418 * uniquely identify the file within the system." That being said,
419 * Windows is not POSIX compliant and we do not have these fields
420 * available. So the information we have there is insufficient to
421 * determine whether two file descriptors point to the same file.
423 * While we could fall back to using other signals like the file's
424 * mtime, those are not sufficient to avoid races. We thus refrain from
425 * using the stat cache on such systems and fall back to the secondary
426 * caching mechanism, which is to check whether contents of the file
427 * have changed.
429 * On other systems which are POSIX compliant we must keep the file
430 * descriptor open. This is to avoid a race condition where two
431 * processes access the reftable stack at the same point in time:
433 * 1. A reads the reftable stack and caches its stat info.
435 * 2. B updates the stack, appending a new table to "tables.list".
436 * This will both use a new inode and result in a different file
437 * size, thus invalidating A's cache in theory.
439 * 3. B decides to auto-compact the stack and merges two tables. The
440 * file size now matches what A has cached again. Furthermore, the
441 * filesystem may decide to recycle the inode number of the file
442 * we have replaced in (2) because it is not in use anymore.
444 * 4. A reloads the reftable stack. Neither the inode number nor the
445 * file size changed. If the timestamps did not change either then
446 * we think the cached copy of our stack is up-to-date.
448 * By keeping the file descriptor open the inode number cannot be
449 * recycled, mitigating the race.
451 if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
452 st->list_st.st_dev && st->list_st.st_ino) {
453 st->list_fd = fd;
454 fd = -1;
457 if (fd >= 0)
458 close(fd);
459 free_names(names);
460 free_names(names_after);
461 return err;
464 /* -1 = error
465 0 = up to date
466 1 = changed. */
467 static int stack_uptodate(struct reftable_stack *st)
469 char **names = NULL;
470 int err;
471 int i = 0;
474 * When we have cached stat information available then we use it to
475 * verify whether the file has been rewritten.
477 * Note that we explicitly do not want to use `stat_validity_check()`
478 * and friends here because they may end up not comparing the `st_dev`
479 * and `st_ino` fields. These functions thus cannot guarantee that we
480 * indeed still have the same file.
482 if (st->list_fd >= 0) {
483 struct stat list_st;
485 if (stat(st->list_file, &list_st) < 0) {
487 * It's fine for "tables.list" to not exist. In that
488 * case, we have to refresh when the loaded stack has
489 * any readers.
491 if (errno == ENOENT)
492 return !!st->readers_len;
493 return REFTABLE_IO_ERROR;
497 * When "tables.list" refers to the same file we can assume
498 * that it didn't change. This is because we always use
499 * rename(3P) to update the file and never write to it
500 * directly.
502 if (st->list_st.st_dev == list_st.st_dev &&
503 st->list_st.st_ino == list_st.st_ino)
504 return 0;
507 err = read_lines(st->list_file, &names);
508 if (err < 0)
509 return err;
511 for (i = 0; i < st->readers_len; i++) {
512 if (!names[i]) {
513 err = 1;
514 goto done;
517 if (strcmp(st->readers[i]->name, names[i])) {
518 err = 1;
519 goto done;
523 if (names[st->merged->stack_len]) {
524 err = 1;
525 goto done;
528 done:
529 free_names(names);
530 return err;
533 int reftable_stack_reload(struct reftable_stack *st)
535 int err = stack_uptodate(st);
536 if (err > 0)
537 return reftable_stack_reload_maybe_reuse(st, 1);
538 return err;
541 int reftable_stack_add(struct reftable_stack *st,
542 int (*write)(struct reftable_writer *wr, void *arg),
543 void *arg)
545 int err = stack_try_add(st, write, arg);
546 if (err < 0) {
547 if (err == REFTABLE_OUTDATED_ERROR) {
548 /* Ignore error return, we want to propagate
549 REFTABLE_OUTDATED_ERROR.
551 reftable_stack_reload(st);
553 return err;
556 return 0;
559 static void format_name(struct strbuf *dest, uint64_t min, uint64_t max)
561 char buf[100];
562 uint32_t rnd = (uint32_t)git_rand();
563 snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x",
564 min, max, rnd);
565 strbuf_reset(dest);
566 strbuf_addstr(dest, buf);
569 struct reftable_addition {
570 struct tempfile *lock_file;
571 struct reftable_stack *stack;
573 char **new_tables;
574 size_t new_tables_len, new_tables_cap;
575 uint64_t next_update_index;
578 #define REFTABLE_ADDITION_INIT {0}
580 static int reftable_stack_init_addition(struct reftable_addition *add,
581 struct reftable_stack *st)
583 struct strbuf lock_file_name = STRBUF_INIT;
584 int err = 0;
585 add->stack = st;
587 strbuf_addf(&lock_file_name, "%s.lock", st->list_file);
589 add->lock_file = create_tempfile(lock_file_name.buf);
590 if (!add->lock_file) {
591 if (errno == EEXIST) {
592 err = REFTABLE_LOCK_ERROR;
593 } else {
594 err = REFTABLE_IO_ERROR;
596 goto done;
598 if (st->opts.default_permissions) {
599 if (chmod(add->lock_file->filename.buf, st->opts.default_permissions) < 0) {
600 err = REFTABLE_IO_ERROR;
601 goto done;
605 err = stack_uptodate(st);
606 if (err < 0)
607 goto done;
608 if (err > 0) {
609 err = REFTABLE_OUTDATED_ERROR;
610 goto done;
613 add->next_update_index = reftable_stack_next_update_index(st);
614 done:
615 if (err) {
616 reftable_addition_close(add);
618 strbuf_release(&lock_file_name);
619 return err;
622 static void reftable_addition_close(struct reftable_addition *add)
624 struct strbuf nm = STRBUF_INIT;
625 size_t i;
627 for (i = 0; i < add->new_tables_len; i++) {
628 stack_filename(&nm, add->stack, add->new_tables[i]);
629 unlink(nm.buf);
630 reftable_free(add->new_tables[i]);
631 add->new_tables[i] = NULL;
633 reftable_free(add->new_tables);
634 add->new_tables = NULL;
635 add->new_tables_len = 0;
636 add->new_tables_cap = 0;
638 delete_tempfile(&add->lock_file);
639 strbuf_release(&nm);
642 void reftable_addition_destroy(struct reftable_addition *add)
644 if (!add) {
645 return;
647 reftable_addition_close(add);
648 reftable_free(add);
651 int reftable_addition_commit(struct reftable_addition *add)
653 struct strbuf table_list = STRBUF_INIT;
654 int lock_file_fd = get_tempfile_fd(add->lock_file);
655 int err = 0;
656 size_t i;
658 if (add->new_tables_len == 0)
659 goto done;
661 for (i = 0; i < add->stack->merged->stack_len; i++) {
662 strbuf_addstr(&table_list, add->stack->readers[i]->name);
663 strbuf_addstr(&table_list, "\n");
665 for (i = 0; i < add->new_tables_len; i++) {
666 strbuf_addstr(&table_list, add->new_tables[i]);
667 strbuf_addstr(&table_list, "\n");
670 err = write_in_full(lock_file_fd, table_list.buf, table_list.len);
671 strbuf_release(&table_list);
672 if (err < 0) {
673 err = REFTABLE_IO_ERROR;
674 goto done;
677 fsync_component_or_die(FSYNC_COMPONENT_REFERENCE, lock_file_fd,
678 get_tempfile_path(add->lock_file));
680 err = rename_tempfile(&add->lock_file, add->stack->list_file);
681 if (err < 0) {
682 err = REFTABLE_IO_ERROR;
683 goto done;
686 /* success, no more state to clean up. */
687 for (i = 0; i < add->new_tables_len; i++)
688 reftable_free(add->new_tables[i]);
689 reftable_free(add->new_tables);
690 add->new_tables = NULL;
691 add->new_tables_len = 0;
692 add->new_tables_cap = 0;
694 err = reftable_stack_reload_maybe_reuse(add->stack, 1);
695 if (err)
696 goto done;
698 if (!add->stack->opts.disable_auto_compact) {
700 * Auto-compact the stack to keep the number of tables in
701 * control. It is possible that a concurrent writer is already
702 * trying to compact parts of the stack, which would lead to a
703 * `REFTABLE_LOCK_ERROR` because parts of the stack are locked
704 * already. This is a benign error though, so we ignore it.
706 err = reftable_stack_auto_compact(add->stack);
707 if (err < 0 && err != REFTABLE_LOCK_ERROR)
708 goto done;
709 err = 0;
712 done:
713 reftable_addition_close(add);
714 return err;
717 int reftable_stack_new_addition(struct reftable_addition **dest,
718 struct reftable_stack *st)
720 int err = 0;
721 struct reftable_addition empty = REFTABLE_ADDITION_INIT;
722 REFTABLE_CALLOC_ARRAY(*dest, 1);
723 **dest = empty;
724 err = reftable_stack_init_addition(*dest, st);
725 if (err) {
726 reftable_free(*dest);
727 *dest = NULL;
729 return err;
732 static int stack_try_add(struct reftable_stack *st,
733 int (*write_table)(struct reftable_writer *wr,
734 void *arg),
735 void *arg)
737 struct reftable_addition add = REFTABLE_ADDITION_INIT;
738 int err = reftable_stack_init_addition(&add, st);
739 if (err < 0)
740 goto done;
742 err = reftable_addition_add(&add, write_table, arg);
743 if (err < 0)
744 goto done;
746 err = reftable_addition_commit(&add);
747 done:
748 reftable_addition_close(&add);
749 return err;
752 int reftable_addition_add(struct reftable_addition *add,
753 int (*write_table)(struct reftable_writer *wr,
754 void *arg),
755 void *arg)
757 struct strbuf temp_tab_file_name = STRBUF_INIT;
758 struct strbuf tab_file_name = STRBUF_INIT;
759 struct strbuf next_name = STRBUF_INIT;
760 struct reftable_writer *wr = NULL;
761 struct tempfile *tab_file = NULL;
762 int err = 0;
763 int tab_fd;
765 strbuf_reset(&next_name);
766 format_name(&next_name, add->next_update_index, add->next_update_index);
768 stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
769 strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
771 tab_file = mks_tempfile(temp_tab_file_name.buf);
772 if (!tab_file) {
773 err = REFTABLE_IO_ERROR;
774 goto done;
776 if (add->stack->opts.default_permissions) {
777 if (chmod(get_tempfile_path(tab_file),
778 add->stack->opts.default_permissions)) {
779 err = REFTABLE_IO_ERROR;
780 goto done;
783 tab_fd = get_tempfile_fd(tab_file);
785 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd,
786 &add->stack->opts);
787 err = write_table(wr, arg);
788 if (err < 0)
789 goto done;
791 err = reftable_writer_close(wr);
792 if (err == REFTABLE_EMPTY_TABLE_ERROR) {
793 err = 0;
794 goto done;
796 if (err < 0)
797 goto done;
799 err = close_tempfile_gently(tab_file);
800 if (err < 0) {
801 err = REFTABLE_IO_ERROR;
802 goto done;
805 if (wr->min_update_index < add->next_update_index) {
806 err = REFTABLE_API_ERROR;
807 goto done;
810 format_name(&next_name, wr->min_update_index, wr->max_update_index);
811 strbuf_addstr(&next_name, ".ref");
812 stack_filename(&tab_file_name, add->stack, next_name.buf);
815 On windows, this relies on rand() picking a unique destination name.
816 Maybe we should do retry loop as well?
818 err = rename_tempfile(&tab_file, tab_file_name.buf);
819 if (err < 0) {
820 err = REFTABLE_IO_ERROR;
821 goto done;
824 REFTABLE_ALLOC_GROW(add->new_tables, add->new_tables_len + 1,
825 add->new_tables_cap);
826 add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL);
827 done:
828 delete_tempfile(&tab_file);
829 strbuf_release(&temp_tab_file_name);
830 strbuf_release(&tab_file_name);
831 strbuf_release(&next_name);
832 reftable_writer_free(wr);
833 return err;
836 uint64_t reftable_stack_next_update_index(struct reftable_stack *st)
838 int sz = st->merged->stack_len;
839 if (sz > 0)
840 return reftable_reader_max_update_index(st->readers[sz - 1]) +
842 return 1;
845 static int stack_compact_locked(struct reftable_stack *st,
846 size_t first, size_t last,
847 struct reftable_log_expiry_config *config,
848 struct tempfile **tab_file_out)
850 struct strbuf next_name = STRBUF_INIT;
851 struct strbuf tab_file_path = STRBUF_INIT;
852 struct reftable_writer *wr = NULL;
853 struct tempfile *tab_file;
854 int tab_fd, err = 0;
856 format_name(&next_name,
857 reftable_reader_min_update_index(st->readers[first]),
858 reftable_reader_max_update_index(st->readers[last]));
859 stack_filename(&tab_file_path, st, next_name.buf);
860 strbuf_addstr(&tab_file_path, ".temp.XXXXXX");
862 tab_file = mks_tempfile(tab_file_path.buf);
863 if (!tab_file) {
864 err = REFTABLE_IO_ERROR;
865 goto done;
867 tab_fd = get_tempfile_fd(tab_file);
869 if (st->opts.default_permissions &&
870 chmod(get_tempfile_path(tab_file), st->opts.default_permissions) < 0) {
871 err = REFTABLE_IO_ERROR;
872 goto done;
875 wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush,
876 &tab_fd, &st->opts);
877 err = stack_write_compact(st, wr, first, last, config);
878 if (err < 0)
879 goto done;
881 err = reftable_writer_close(wr);
882 if (err < 0)
883 goto done;
885 err = close_tempfile_gently(tab_file);
886 if (err < 0)
887 goto done;
889 *tab_file_out = tab_file;
890 tab_file = NULL;
892 done:
893 delete_tempfile(&tab_file);
894 reftable_writer_free(wr);
895 strbuf_release(&next_name);
896 strbuf_release(&tab_file_path);
897 return err;
900 static int stack_write_compact(struct reftable_stack *st,
901 struct reftable_writer *wr,
902 size_t first, size_t last,
903 struct reftable_log_expiry_config *config)
905 size_t subtabs_len = last - first + 1;
906 struct reftable_table *subtabs = reftable_calloc(
907 last - first + 1, sizeof(*subtabs));
908 struct reftable_merged_table *mt = NULL;
909 struct reftable_iterator it = { NULL };
910 struct reftable_ref_record ref = { NULL };
911 struct reftable_log_record log = { NULL };
912 uint64_t entries = 0;
913 int err = 0;
915 for (size_t i = first, j = 0; i <= last; i++) {
916 struct reftable_reader *t = st->readers[i];
917 reftable_table_from_reader(&subtabs[j++], t);
918 st->stats.bytes += t->size;
920 reftable_writer_set_limits(wr, st->readers[first]->min_update_index,
921 st->readers[last]->max_update_index);
923 err = reftable_new_merged_table(&mt, subtabs, subtabs_len,
924 st->opts.hash_id);
925 if (err < 0) {
926 reftable_free(subtabs);
927 goto done;
930 merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
931 err = reftable_iterator_seek_ref(&it, "");
932 if (err < 0)
933 goto done;
935 while (1) {
936 err = reftable_iterator_next_ref(&it, &ref);
937 if (err > 0) {
938 err = 0;
939 break;
941 if (err < 0)
942 goto done;
944 if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
945 continue;
948 err = reftable_writer_add_ref(wr, &ref);
949 if (err < 0)
950 goto done;
951 entries++;
953 reftable_iterator_destroy(&it);
955 merged_table_init_iter(mt, &it, BLOCK_TYPE_LOG);
956 err = reftable_iterator_seek_log(&it, "");
957 if (err < 0)
958 goto done;
960 while (1) {
961 err = reftable_iterator_next_log(&it, &log);
962 if (err > 0) {
963 err = 0;
964 break;
966 if (err < 0)
967 goto done;
968 if (first == 0 && reftable_log_record_is_deletion(&log)) {
969 continue;
972 if (config && config->min_update_index > 0 &&
973 log.update_index < config->min_update_index) {
974 continue;
977 if (config && config->time > 0 &&
978 log.value.update.time < config->time) {
979 continue;
982 err = reftable_writer_add_log(wr, &log);
983 if (err < 0)
984 goto done;
985 entries++;
988 done:
989 reftable_iterator_destroy(&it);
990 if (mt)
991 reftable_merged_table_free(mt);
992 reftable_ref_record_release(&ref);
993 reftable_log_record_release(&log);
994 st->stats.entries_written += entries;
995 return err;
999 * Compact all tables in the range `[first, last)` into a single new table.
1001 * This function returns `0` on success or a code `< 0` on failure. When the
1002 * stack or any of the tables in the specified range are already locked then
1003 * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that
1004 * callers can either ignore, or they may choose to retry compaction after some
1005 * amount of time.
1007 static int stack_compact_range(struct reftable_stack *st,
1008 size_t first, size_t last,
1009 struct reftable_log_expiry_config *expiry)
1011 struct strbuf tables_list_buf = STRBUF_INIT;
1012 struct strbuf new_table_name = STRBUF_INIT;
1013 struct strbuf new_table_path = STRBUF_INIT;
1014 struct strbuf table_name = STRBUF_INIT;
1015 struct lock_file tables_list_lock = LOCK_INIT;
1016 struct lock_file *table_locks = NULL;
1017 struct tempfile *new_table = NULL;
1018 int is_empty_table = 0, err = 0;
1019 size_t i;
1021 if (first > last || (!expiry && first == last)) {
1022 err = 0;
1023 goto done;
1026 st->stats.attempts++;
1029 * Hold the lock so that we can read "tables.list" and lock all tables
1030 * which are part of the user-specified range.
1032 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1033 LOCK_NO_DEREF);
1034 if (err < 0) {
1035 if (errno == EEXIST)
1036 err = REFTABLE_LOCK_ERROR;
1037 else
1038 err = REFTABLE_IO_ERROR;
1039 goto done;
1042 err = stack_uptodate(st);
1043 if (err)
1044 goto done;
1047 * Lock all tables in the user-provided range. This is the slice of our
1048 * stack which we'll compact.
1050 REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1);
1051 for (i = first; i <= last; i++) {
1052 stack_filename(&table_name, st, reader_name(st->readers[i]));
1054 err = hold_lock_file_for_update(&table_locks[i - first],
1055 table_name.buf, LOCK_NO_DEREF);
1056 if (err < 0) {
1057 if (errno == EEXIST)
1058 err = REFTABLE_LOCK_ERROR;
1059 else
1060 err = REFTABLE_IO_ERROR;
1061 goto done;
1065 * We need to close the lockfiles as we might otherwise easily
1066 * run into file descriptor exhaustion when we compress a lot
1067 * of tables.
1069 err = close_lock_file_gently(&table_locks[i - first]);
1070 if (err < 0) {
1071 err = REFTABLE_IO_ERROR;
1072 goto done;
1077 * We have locked all tables in our range and can thus release the
1078 * "tables.list" lock while compacting the locked tables. This allows
1079 * concurrent updates to the stack to proceed.
1081 err = rollback_lock_file(&tables_list_lock);
1082 if (err < 0) {
1083 err = REFTABLE_IO_ERROR;
1084 goto done;
1088 * Compact the now-locked tables into a new table. Note that compacting
1089 * these tables may end up with an empty new table in case tombstones
1090 * end up cancelling out all refs in that range.
1092 err = stack_compact_locked(st, first, last, expiry, &new_table);
1093 if (err < 0) {
1094 if (err != REFTABLE_EMPTY_TABLE_ERROR)
1095 goto done;
1096 is_empty_table = 1;
1100 * Now that we have written the new, compacted table we need to re-lock
1101 * "tables.list". We'll then replace the compacted range of tables with
1102 * the new table.
1104 err = hold_lock_file_for_update(&tables_list_lock, st->list_file,
1105 LOCK_NO_DEREF);
1106 if (err < 0) {
1107 if (errno == EEXIST)
1108 err = REFTABLE_LOCK_ERROR;
1109 else
1110 err = REFTABLE_IO_ERROR;
1111 goto done;
1114 if (st->opts.default_permissions) {
1115 if (chmod(get_lock_file_path(&tables_list_lock),
1116 st->opts.default_permissions) < 0) {
1117 err = REFTABLE_IO_ERROR;
1118 goto done;
1123 * If the resulting compacted table is not empty, then we need to move
1124 * it into place now.
1126 if (!is_empty_table) {
1127 format_name(&new_table_name, st->readers[first]->min_update_index,
1128 st->readers[last]->max_update_index);
1129 strbuf_addstr(&new_table_name, ".ref");
1130 stack_filename(&new_table_path, st, new_table_name.buf);
1132 err = rename_tempfile(&new_table, new_table_path.buf);
1133 if (err < 0) {
1134 err = REFTABLE_IO_ERROR;
1135 goto done;
1140 * Write the new "tables.list" contents with the compacted table we
1141 * have just written. In case the compacted table became empty we
1142 * simply skip writing it.
1144 for (i = 0; i < first; i++)
1145 strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1146 if (!is_empty_table)
1147 strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf);
1148 for (i = last + 1; i < st->merged->stack_len; i++)
1149 strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1151 err = write_in_full(get_lock_file_fd(&tables_list_lock),
1152 tables_list_buf.buf, tables_list_buf.len);
1153 if (err < 0) {
1154 err = REFTABLE_IO_ERROR;
1155 unlink(new_table_path.buf);
1156 goto done;
1159 err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock));
1160 if (err < 0) {
1161 err = REFTABLE_IO_ERROR;
1162 unlink(new_table_path.buf);
1163 goto done;
1166 err = commit_lock_file(&tables_list_lock);
1167 if (err < 0) {
1168 err = REFTABLE_IO_ERROR;
1169 unlink(new_table_path.buf);
1170 goto done;
1174 * Reload the stack before deleting the compacted tables. We can only
1175 * delete the files after we closed them on Windows, so this needs to
1176 * happen first.
1178 err = reftable_stack_reload_maybe_reuse(st, first < last);
1179 if (err < 0)
1180 goto done;
1183 * Delete the old tables. They may still be in use by concurrent
1184 * readers, so it is expected that unlinking tables may fail.
1186 for (i = first; i <= last; i++) {
1187 struct lock_file *table_lock = &table_locks[i - first];
1188 char *table_path = get_locked_file_path(table_lock);
1189 unlink(table_path);
1190 free(table_path);
1193 done:
1194 rollback_lock_file(&tables_list_lock);
1195 for (i = first; table_locks && i <= last; i++)
1196 rollback_lock_file(&table_locks[i - first]);
1197 reftable_free(table_locks);
1199 delete_tempfile(&new_table);
1200 strbuf_release(&new_table_name);
1201 strbuf_release(&new_table_path);
1203 strbuf_release(&tables_list_buf);
1204 strbuf_release(&table_name);
1205 return err;
1208 int reftable_stack_compact_all(struct reftable_stack *st,
1209 struct reftable_log_expiry_config *config)
1211 return stack_compact_range(st, 0, st->merged->stack_len ?
1212 st->merged->stack_len - 1 : 0, config);
1215 static int stack_compact_range_stats(struct reftable_stack *st,
1216 size_t first, size_t last,
1217 struct reftable_log_expiry_config *config)
1219 int err = stack_compact_range(st, first, last, config);
1220 if (err == REFTABLE_LOCK_ERROR)
1221 st->stats.failures++;
1222 return err;
1225 static int segment_size(struct segment *s)
1227 return s->end - s->start;
1230 struct segment suggest_compaction_segment(uint64_t *sizes, size_t n,
1231 uint8_t factor)
1233 struct segment seg = { 0 };
1234 uint64_t bytes;
1235 size_t i;
1237 if (!factor)
1238 factor = DEFAULT_GEOMETRIC_FACTOR;
1241 * If there are no tables or only a single one then we don't have to
1242 * compact anything. The sequence is geometric by definition already.
1244 if (n <= 1)
1245 return seg;
1248 * Find the ending table of the compaction segment needed to restore the
1249 * geometric sequence. Note that the segment end is exclusive.
1251 * To do so, we iterate backwards starting from the most recent table
1252 * until a valid segment end is found. If the preceding table is smaller
1253 * than the current table multiplied by the geometric factor (2), the
1254 * compaction segment end has been identified.
1256 * Tables after the ending point are not added to the byte count because
1257 * they are already valid members of the geometric sequence. Due to the
1258 * properties of a geometric sequence, it is not possible for the sum of
1259 * these tables to exceed the value of the ending point table.
1261 * Example table size sequence requiring no compaction:
1262 * 64, 32, 16, 8, 4, 2, 1
1264 * Example table size sequence where compaction segment end is set to
1265 * the last table. Since the segment end is exclusive, the last table is
1266 * excluded during subsequent compaction and the table with size 3 is
1267 * the final table included:
1268 * 64, 32, 16, 8, 4, 3, 1
1270 for (i = n - 1; i > 0; i--) {
1271 if (sizes[i - 1] < sizes[i] * factor) {
1272 seg.end = i + 1;
1273 bytes = sizes[i];
1274 break;
1279 * Find the starting table of the compaction segment by iterating
1280 * through the remaining tables and keeping track of the accumulated
1281 * size of all tables seen from the segment end table. The previous
1282 * table is compared to the accumulated size because the tables from the
1283 * segment end are merged backwards recursively.
1285 * Note that we keep iterating even after we have found the first
1286 * starting point. This is because there may be tables in the stack
1287 * preceding that first starting point which violate the geometric
1288 * sequence.
1290 * Example compaction segment start set to table with size 32:
1291 * 128, 32, 16, 8, 4, 3, 1
1293 for (; i > 0; i--) {
1294 uint64_t curr = bytes;
1295 bytes += sizes[i - 1];
1297 if (sizes[i - 1] < curr * factor) {
1298 seg.start = i - 1;
1299 seg.bytes = bytes;
1303 return seg;
1306 static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
1308 uint64_t *sizes =
1309 reftable_calloc(st->merged->stack_len, sizeof(*sizes));
1310 int version = (st->opts.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2;
1311 int overhead = header_size(version) - 1;
1312 int i = 0;
1313 for (i = 0; i < st->merged->stack_len; i++) {
1314 sizes[i] = st->readers[i]->size - overhead;
1316 return sizes;
1319 int reftable_stack_auto_compact(struct reftable_stack *st)
1321 uint64_t *sizes = stack_table_sizes_for_compaction(st);
1322 struct segment seg =
1323 suggest_compaction_segment(sizes, st->merged->stack_len,
1324 st->opts.auto_compaction_factor);
1325 reftable_free(sizes);
1326 if (segment_size(&seg) > 0)
1327 return stack_compact_range_stats(st, seg.start, seg.end - 1,
1328 NULL);
1330 return 0;
1333 struct reftable_compaction_stats *
1334 reftable_stack_compaction_stats(struct reftable_stack *st)
1336 return &st->stats;
1339 int reftable_stack_read_ref(struct reftable_stack *st, const char *refname,
1340 struct reftable_ref_record *ref)
1342 struct reftable_table tab = { NULL };
1343 reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st));
1344 return reftable_table_read_ref(&tab, refname, ref);
1347 int reftable_stack_read_log(struct reftable_stack *st, const char *refname,
1348 struct reftable_log_record *log)
1350 struct reftable_iterator it = {0};
1351 int err;
1353 reftable_stack_init_log_iterator(st, &it);
1354 err = reftable_iterator_seek_log(&it, refname);
1355 if (err)
1356 goto done;
1358 err = reftable_iterator_next_log(&it, log);
1359 if (err)
1360 goto done;
1362 if (strcmp(log->refname, refname) ||
1363 reftable_log_record_is_deletion(log)) {
1364 err = 1;
1365 goto done;
1368 done:
1369 if (err) {
1370 reftable_log_record_release(log);
1372 reftable_iterator_destroy(&it);
1373 return err;
1376 static int is_table_name(const char *s)
1378 const char *dot = strrchr(s, '.');
1379 return dot && !strcmp(dot, ".ref");
1382 static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
1383 const char *name)
1385 int err = 0;
1386 uint64_t update_idx = 0;
1387 struct reftable_block_source src = { NULL };
1388 struct reftable_reader *rd = NULL;
1389 struct strbuf table_path = STRBUF_INIT;
1390 stack_filename(&table_path, st, name);
1392 err = reftable_block_source_from_file(&src, table_path.buf);
1393 if (err < 0)
1394 goto done;
1396 err = reftable_new_reader(&rd, &src, name);
1397 if (err < 0)
1398 goto done;
1400 update_idx = reftable_reader_max_update_index(rd);
1401 reftable_reader_free(rd);
1403 if (update_idx <= max) {
1404 unlink(table_path.buf);
1406 done:
1407 strbuf_release(&table_path);
1410 static int reftable_stack_clean_locked(struct reftable_stack *st)
1412 uint64_t max = reftable_merged_table_max_update_index(
1413 reftable_stack_merged_table(st));
1414 DIR *dir = opendir(st->reftable_dir);
1415 struct dirent *d = NULL;
1416 if (!dir) {
1417 return REFTABLE_IO_ERROR;
1420 while ((d = readdir(dir))) {
1421 int i = 0;
1422 int found = 0;
1423 if (!is_table_name(d->d_name))
1424 continue;
1426 for (i = 0; !found && i < st->readers_len; i++) {
1427 found = !strcmp(reader_name(st->readers[i]), d->d_name);
1429 if (found)
1430 continue;
1432 remove_maybe_stale_table(st, max, d->d_name);
1435 closedir(dir);
1436 return 0;
1439 int reftable_stack_clean(struct reftable_stack *st)
1441 struct reftable_addition *add = NULL;
1442 int err = reftable_stack_new_addition(&add, st);
1443 if (err < 0) {
1444 goto done;
1447 err = reftable_stack_reload(st);
1448 if (err < 0) {
1449 goto done;
1452 err = reftable_stack_clean_locked(st);
1454 done:
1455 reftable_addition_destroy(add);
1456 return err;
1459 int reftable_stack_print_directory(const char *stackdir, uint32_t hash_id)
1461 struct reftable_stack *stack = NULL;
1462 struct reftable_write_options opts = { .hash_id = hash_id };
1463 struct reftable_merged_table *merged = NULL;
1464 struct reftable_table table = { NULL };
1466 int err = reftable_new_stack(&stack, stackdir, &opts);
1467 if (err < 0)
1468 goto done;
1470 merged = reftable_stack_merged_table(stack);
1471 reftable_table_from_merged_table(&table, merged);
1472 err = reftable_table_print(&table);
1473 done:
1474 if (stack)
1475 reftable_stack_destroy(stack);
1476 return err;