reftable/stack: fix race in up-to-date check
[alt-git.git] / reftable / stack.c
blob77a387a86c11d2c981231863f3de3b55a19fde73
1 /*
2 Copyright 2020 Google LLC
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
9 #include "stack.h"
11 #include "system.h"
12 #include "merged.h"
13 #include "reader.h"
14 #include "refname.h"
15 #include "reftable-error.h"
16 #include "reftable-record.h"
17 #include "reftable-merged.h"
18 #include "writer.h"
20 #include "tempfile.h"
22 static int stack_try_add(struct reftable_stack *st,
23 int (*write_table)(struct reftable_writer *wr,
24 void *arg),
25 void *arg);
26 static int stack_write_compact(struct reftable_stack *st,
27 struct reftable_writer *wr, int first, int last,
28 struct reftable_log_expiry_config *config);
29 static int stack_check_addition(struct reftable_stack *st,
30 const char *new_tab_name);
31 static void reftable_addition_close(struct reftable_addition *add);
32 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
33 int reuse_open);
35 static void stack_filename(struct strbuf *dest, struct reftable_stack *st,
36 const char *name)
38 strbuf_reset(dest);
39 strbuf_addstr(dest, st->reftable_dir);
40 strbuf_addstr(dest, "/");
41 strbuf_addstr(dest, name);
44 static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz)
46 int *fdp = (int *)arg;
47 return write_in_full(*fdp, data, sz);
50 int reftable_new_stack(struct reftable_stack **dest, const char *dir,
51 struct reftable_write_options config)
53 struct reftable_stack *p =
54 reftable_calloc(sizeof(struct reftable_stack));
55 struct strbuf list_file_name = STRBUF_INIT;
56 int err = 0;
58 if (config.hash_id == 0) {
59 config.hash_id = GIT_SHA1_FORMAT_ID;
62 *dest = NULL;
64 strbuf_reset(&list_file_name);
65 strbuf_addstr(&list_file_name, dir);
66 strbuf_addstr(&list_file_name, "/tables.list");
68 p->list_file = strbuf_detach(&list_file_name, NULL);
69 p->list_fd = -1;
70 p->reftable_dir = xstrdup(dir);
71 p->config = config;
73 err = reftable_stack_reload_maybe_reuse(p, 1);
74 if (err < 0) {
75 reftable_stack_destroy(p);
76 } else {
77 *dest = p;
79 return err;
82 static int fd_read_lines(int fd, char ***namesp)
84 off_t size = lseek(fd, 0, SEEK_END);
85 char *buf = NULL;
86 int err = 0;
87 if (size < 0) {
88 err = REFTABLE_IO_ERROR;
89 goto done;
91 err = lseek(fd, 0, SEEK_SET);
92 if (err < 0) {
93 err = REFTABLE_IO_ERROR;
94 goto done;
97 buf = reftable_malloc(size + 1);
98 if (read_in_full(fd, buf, size) != size) {
99 err = REFTABLE_IO_ERROR;
100 goto done;
102 buf[size] = 0;
104 parse_names(buf, size, namesp);
106 done:
107 reftable_free(buf);
108 return err;
111 int read_lines(const char *filename, char ***namesp)
113 int fd = open(filename, O_RDONLY);
114 int err = 0;
115 if (fd < 0) {
116 if (errno == ENOENT) {
117 *namesp = reftable_calloc(sizeof(char *));
118 return 0;
121 return REFTABLE_IO_ERROR;
123 err = fd_read_lines(fd, namesp);
124 close(fd);
125 return err;
128 struct reftable_merged_table *
129 reftable_stack_merged_table(struct reftable_stack *st)
131 return st->merged;
134 static int has_name(char **names, const char *name)
136 while (*names) {
137 if (!strcmp(*names, name))
138 return 1;
139 names++;
141 return 0;
144 /* Close and free the stack */
145 void reftable_stack_destroy(struct reftable_stack *st)
147 char **names = NULL;
148 int err = 0;
149 if (st->merged) {
150 reftable_merged_table_free(st->merged);
151 st->merged = NULL;
154 err = read_lines(st->list_file, &names);
155 if (err < 0) {
156 FREE_AND_NULL(names);
159 if (st->readers) {
160 int i = 0;
161 struct strbuf filename = STRBUF_INIT;
162 for (i = 0; i < st->readers_len; i++) {
163 const char *name = reader_name(st->readers[i]);
164 strbuf_reset(&filename);
165 if (names && !has_name(names, name)) {
166 stack_filename(&filename, st, name);
168 reftable_reader_free(st->readers[i]);
170 if (filename.len) {
171 /* On Windows, can only unlink after closing. */
172 unlink(filename.buf);
175 strbuf_release(&filename);
176 st->readers_len = 0;
177 FREE_AND_NULL(st->readers);
180 if (st->list_fd >= 0) {
181 close(st->list_fd);
182 st->list_fd = -1;
185 FREE_AND_NULL(st->list_file);
186 FREE_AND_NULL(st->reftable_dir);
187 reftable_free(st);
188 free_names(names);
191 static struct reftable_reader **stack_copy_readers(struct reftable_stack *st,
192 int cur_len)
194 struct reftable_reader **cur =
195 reftable_calloc(sizeof(struct reftable_reader *) * cur_len);
196 int i = 0;
197 for (i = 0; i < cur_len; i++) {
198 cur[i] = st->readers[i];
200 return cur;
203 static int reftable_stack_reload_once(struct reftable_stack *st, char **names,
204 int reuse_open)
206 int cur_len = !st->merged ? 0 : st->merged->stack_len;
207 struct reftable_reader **cur = stack_copy_readers(st, cur_len);
208 int err = 0;
209 int names_len = names_length(names);
210 struct reftable_reader **new_readers =
211 reftable_calloc(sizeof(struct reftable_reader *) * names_len);
212 struct reftable_table *new_tables =
213 reftable_calloc(sizeof(struct reftable_table) * names_len);
214 int new_readers_len = 0;
215 struct reftable_merged_table *new_merged = NULL;
216 struct strbuf table_path = STRBUF_INIT;
217 int i;
219 while (*names) {
220 struct reftable_reader *rd = NULL;
221 char *name = *names++;
223 /* this is linear; we assume compaction keeps the number of
224 tables under control so this is not quadratic. */
225 int j = 0;
226 for (j = 0; reuse_open && j < cur_len; j++) {
227 if (cur[j] && 0 == strcmp(cur[j]->name, name)) {
228 rd = cur[j];
229 cur[j] = NULL;
230 break;
234 if (!rd) {
235 struct reftable_block_source src = { NULL };
236 stack_filename(&table_path, st, name);
238 err = reftable_block_source_from_file(&src,
239 table_path.buf);
240 if (err < 0)
241 goto done;
243 err = reftable_new_reader(&rd, &src, name);
244 if (err < 0)
245 goto done;
248 new_readers[new_readers_len] = rd;
249 reftable_table_from_reader(&new_tables[new_readers_len], rd);
250 new_readers_len++;
253 /* success! */
254 err = reftable_new_merged_table(&new_merged, new_tables,
255 new_readers_len, st->config.hash_id);
256 if (err < 0)
257 goto done;
259 new_tables = NULL;
260 st->readers_len = new_readers_len;
261 if (st->merged) {
262 merged_table_release(st->merged);
263 reftable_merged_table_free(st->merged);
265 if (st->readers) {
266 reftable_free(st->readers);
268 st->readers = new_readers;
269 new_readers = NULL;
270 new_readers_len = 0;
272 new_merged->suppress_deletions = 1;
273 st->merged = new_merged;
274 for (i = 0; i < cur_len; i++) {
275 if (cur[i]) {
276 const char *name = reader_name(cur[i]);
277 stack_filename(&table_path, st, name);
279 reader_close(cur[i]);
280 reftable_reader_free(cur[i]);
282 /* On Windows, can only unlink after closing. */
283 unlink(table_path.buf);
287 done:
288 for (i = 0; i < new_readers_len; i++) {
289 reader_close(new_readers[i]);
290 reftable_reader_free(new_readers[i]);
292 reftable_free(new_readers);
293 reftable_free(new_tables);
294 reftable_free(cur);
295 strbuf_release(&table_path);
296 return err;
299 /* return negative if a before b. */
300 static int tv_cmp(struct timeval *a, struct timeval *b)
302 time_t diff = a->tv_sec - b->tv_sec;
303 int udiff = a->tv_usec - b->tv_usec;
305 if (diff != 0)
306 return diff;
308 return udiff;
311 static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
312 int reuse_open)
314 char **names = NULL, **names_after = NULL;
315 struct timeval deadline;
316 int64_t delay = 0;
317 int tries = 0, err;
318 int fd = -1;
320 err = gettimeofday(&deadline, NULL);
321 if (err < 0)
322 goto out;
323 deadline.tv_sec += 3;
325 while (1) {
326 struct timeval now;
328 err = gettimeofday(&now, NULL);
329 if (err < 0)
330 goto out;
333 * Only look at deadlines after the first few times. This
334 * simplifies debugging in GDB.
336 tries++;
337 if (tries > 3 && tv_cmp(&now, &deadline) >= 0)
338 goto out;
340 fd = open(st->list_file, O_RDONLY);
341 if (fd < 0) {
342 if (errno != ENOENT) {
343 err = REFTABLE_IO_ERROR;
344 goto out;
347 names = reftable_calloc(sizeof(char *));
348 } else {
349 err = fd_read_lines(fd, &names);
350 if (err < 0)
351 goto out;
354 err = reftable_stack_reload_once(st, names, reuse_open);
355 if (!err)
356 break;
357 if (err != REFTABLE_NOT_EXIST_ERROR)
358 goto out;
361 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
362 * writer. Check if there was one by checking if the name list
363 * changed.
365 err = read_lines(st->list_file, &names_after);
366 if (err < 0)
367 goto out;
368 if (names_equal(names_after, names)) {
369 err = REFTABLE_NOT_EXIST_ERROR;
370 goto out;
373 free_names(names);
374 names = NULL;
375 free_names(names_after);
376 names_after = NULL;
377 close(fd);
378 fd = -1;
380 delay = delay + (delay * rand()) / RAND_MAX + 1;
381 sleep_millisec(delay);
384 out:
386 * Invalidate the stat cache. It is sufficient to only close the file
387 * descriptor and keep the cached stat info because we never use the
388 * latter when the former is negative.
390 if (st->list_fd >= 0) {
391 close(st->list_fd);
392 st->list_fd = -1;
396 * Cache stat information in case it provides a useful signal to us.
397 * According to POSIX, "The st_ino and st_dev fields taken together
398 * uniquely identify the file within the system." That being said,
399 * Windows is not POSIX compliant and we do not have these fields
400 * available. So the information we have there is insufficient to
401 * determine whether two file descriptors point to the same file.
403 * While we could fall back to using other signals like the file's
404 * mtime, those are not sufficient to avoid races. We thus refrain from
405 * using the stat cache on such systems and fall back to the secondary
406 * caching mechanism, which is to check whether contents of the file
407 * have changed.
409 * On other systems which are POSIX compliant we must keep the file
410 * descriptor open. This is to avoid a race condition where two
411 * processes access the reftable stack at the same point in time:
413 * 1. A reads the reftable stack and caches its stat info.
415 * 2. B updates the stack, appending a new table to "tables.list".
416 * This will both use a new inode and result in a different file
417 * size, thus invalidating A's cache in theory.
419 * 3. B decides to auto-compact the stack and merges two tables. The
420 * file size now matches what A has cached again. Furthermore, the
421 * filesystem may decide to recycle the inode number of the file
422 * we have replaced in (2) because it is not in use anymore.
424 * 4. A reloads the reftable stack. Neither the inode number nor the
425 * file size changed. If the timestamps did not change either then
426 * we think the cached copy of our stack is up-to-date.
428 * By keeping the file descriptor open the inode number cannot be
429 * recycled, mitigating the race.
431 if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
432 st->list_st.st_dev && st->list_st.st_ino) {
433 st->list_fd = fd;
434 fd = -1;
437 if (fd >= 0)
438 close(fd);
439 free_names(names);
440 free_names(names_after);
441 return err;
444 /* -1 = error
445 0 = up to date
446 1 = changed. */
447 static int stack_uptodate(struct reftable_stack *st)
449 char **names = NULL;
450 int err;
451 int i = 0;
454 * When we have cached stat information available then we use it to
455 * verify whether the file has been rewritten.
457 * Note that we explicitly do not want to use `stat_validity_check()`
458 * and friends here because they may end up not comparing the `st_dev`
459 * and `st_ino` fields. These functions thus cannot guarantee that we
460 * indeed still have the same file.
462 if (st->list_fd >= 0) {
463 struct stat list_st;
465 if (stat(st->list_file, &list_st) < 0) {
467 * It's fine for "tables.list" to not exist. In that
468 * case, we have to refresh when the loaded stack has
469 * any readers.
471 if (errno == ENOENT)
472 return !!st->readers_len;
473 return REFTABLE_IO_ERROR;
477 * When "tables.list" refers to the same file we can assume
478 * that it didn't change. This is because we always use
479 * rename(3P) to update the file and never write to it
480 * directly.
482 if (st->list_st.st_dev == list_st.st_dev &&
483 st->list_st.st_ino == list_st.st_ino)
484 return 0;
487 err = read_lines(st->list_file, &names);
488 if (err < 0)
489 return err;
491 for (i = 0; i < st->readers_len; i++) {
492 if (!names[i]) {
493 err = 1;
494 goto done;
497 if (strcmp(st->readers[i]->name, names[i])) {
498 err = 1;
499 goto done;
503 if (names[st->merged->stack_len]) {
504 err = 1;
505 goto done;
508 done:
509 free_names(names);
510 return err;
513 int reftable_stack_reload(struct reftable_stack *st)
515 int err = stack_uptodate(st);
516 if (err > 0)
517 return reftable_stack_reload_maybe_reuse(st, 1);
518 return err;
521 int reftable_stack_add(struct reftable_stack *st,
522 int (*write)(struct reftable_writer *wr, void *arg),
523 void *arg)
525 int err = stack_try_add(st, write, arg);
526 if (err < 0) {
527 if (err == REFTABLE_LOCK_ERROR) {
528 /* Ignore error return, we want to propagate
529 REFTABLE_LOCK_ERROR.
531 reftable_stack_reload(st);
533 return err;
536 if (!st->disable_auto_compact)
537 return reftable_stack_auto_compact(st);
539 return 0;
542 static void format_name(struct strbuf *dest, uint64_t min, uint64_t max)
544 char buf[100];
545 uint32_t rnd = (uint32_t)git_rand();
546 snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x",
547 min, max, rnd);
548 strbuf_reset(dest);
549 strbuf_addstr(dest, buf);
552 struct reftable_addition {
553 struct tempfile *lock_file;
554 struct reftable_stack *stack;
556 char **new_tables;
557 int new_tables_len;
558 uint64_t next_update_index;
561 #define REFTABLE_ADDITION_INIT {0}
563 static int reftable_stack_init_addition(struct reftable_addition *add,
564 struct reftable_stack *st)
566 struct strbuf lock_file_name = STRBUF_INIT;
567 int err = 0;
568 add->stack = st;
570 strbuf_addf(&lock_file_name, "%s.lock", st->list_file);
572 add->lock_file = create_tempfile(lock_file_name.buf);
573 if (!add->lock_file) {
574 if (errno == EEXIST) {
575 err = REFTABLE_LOCK_ERROR;
576 } else {
577 err = REFTABLE_IO_ERROR;
579 goto done;
581 if (st->config.default_permissions) {
582 if (chmod(add->lock_file->filename.buf, st->config.default_permissions) < 0) {
583 err = REFTABLE_IO_ERROR;
584 goto done;
588 err = stack_uptodate(st);
589 if (err < 0)
590 goto done;
592 if (err > 1) {
593 err = REFTABLE_LOCK_ERROR;
594 goto done;
597 add->next_update_index = reftable_stack_next_update_index(st);
598 done:
599 if (err) {
600 reftable_addition_close(add);
602 strbuf_release(&lock_file_name);
603 return err;
606 static void reftable_addition_close(struct reftable_addition *add)
608 int i = 0;
609 struct strbuf nm = STRBUF_INIT;
610 for (i = 0; i < add->new_tables_len; i++) {
611 stack_filename(&nm, add->stack, add->new_tables[i]);
612 unlink(nm.buf);
613 reftable_free(add->new_tables[i]);
614 add->new_tables[i] = NULL;
616 reftable_free(add->new_tables);
617 add->new_tables = NULL;
618 add->new_tables_len = 0;
620 delete_tempfile(&add->lock_file);
621 strbuf_release(&nm);
624 void reftable_addition_destroy(struct reftable_addition *add)
626 if (!add) {
627 return;
629 reftable_addition_close(add);
630 reftable_free(add);
633 int reftable_addition_commit(struct reftable_addition *add)
635 struct strbuf table_list = STRBUF_INIT;
636 int lock_file_fd = get_tempfile_fd(add->lock_file);
637 int i = 0;
638 int err = 0;
640 if (add->new_tables_len == 0)
641 goto done;
643 for (i = 0; i < add->stack->merged->stack_len; i++) {
644 strbuf_addstr(&table_list, add->stack->readers[i]->name);
645 strbuf_addstr(&table_list, "\n");
647 for (i = 0; i < add->new_tables_len; i++) {
648 strbuf_addstr(&table_list, add->new_tables[i]);
649 strbuf_addstr(&table_list, "\n");
652 err = write_in_full(lock_file_fd, table_list.buf, table_list.len);
653 strbuf_release(&table_list);
654 if (err < 0) {
655 err = REFTABLE_IO_ERROR;
656 goto done;
659 err = rename_tempfile(&add->lock_file, add->stack->list_file);
660 if (err < 0) {
661 err = REFTABLE_IO_ERROR;
662 goto done;
665 /* success, no more state to clean up. */
666 for (i = 0; i < add->new_tables_len; i++) {
667 reftable_free(add->new_tables[i]);
669 reftable_free(add->new_tables);
670 add->new_tables = NULL;
671 add->new_tables_len = 0;
673 err = reftable_stack_reload_maybe_reuse(add->stack, 1);
674 if (err)
675 goto done;
677 if (!add->stack->disable_auto_compact)
678 err = reftable_stack_auto_compact(add->stack);
680 done:
681 reftable_addition_close(add);
682 return err;
685 int reftable_stack_new_addition(struct reftable_addition **dest,
686 struct reftable_stack *st)
688 int err = 0;
689 struct reftable_addition empty = REFTABLE_ADDITION_INIT;
690 *dest = reftable_calloc(sizeof(**dest));
691 **dest = empty;
692 err = reftable_stack_init_addition(*dest, st);
693 if (err) {
694 reftable_free(*dest);
695 *dest = NULL;
697 return err;
700 static int stack_try_add(struct reftable_stack *st,
701 int (*write_table)(struct reftable_writer *wr,
702 void *arg),
703 void *arg)
705 struct reftable_addition add = REFTABLE_ADDITION_INIT;
706 int err = reftable_stack_init_addition(&add, st);
707 if (err < 0)
708 goto done;
709 if (err > 0) {
710 err = REFTABLE_LOCK_ERROR;
711 goto done;
714 err = reftable_addition_add(&add, write_table, arg);
715 if (err < 0)
716 goto done;
718 err = reftable_addition_commit(&add);
719 done:
720 reftable_addition_close(&add);
721 return err;
724 int reftable_addition_add(struct reftable_addition *add,
725 int (*write_table)(struct reftable_writer *wr,
726 void *arg),
727 void *arg)
729 struct strbuf temp_tab_file_name = STRBUF_INIT;
730 struct strbuf tab_file_name = STRBUF_INIT;
731 struct strbuf next_name = STRBUF_INIT;
732 struct reftable_writer *wr = NULL;
733 int err = 0;
734 int tab_fd = 0;
736 strbuf_reset(&next_name);
737 format_name(&next_name, add->next_update_index, add->next_update_index);
739 stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
740 strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
742 tab_fd = mkstemp(temp_tab_file_name.buf);
743 if (tab_fd < 0) {
744 err = REFTABLE_IO_ERROR;
745 goto done;
747 if (add->stack->config.default_permissions) {
748 if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) {
749 err = REFTABLE_IO_ERROR;
750 goto done;
753 wr = reftable_new_writer(reftable_fd_write, &tab_fd,
754 &add->stack->config);
755 err = write_table(wr, arg);
756 if (err < 0)
757 goto done;
759 err = reftable_writer_close(wr);
760 if (err == REFTABLE_EMPTY_TABLE_ERROR) {
761 err = 0;
762 goto done;
764 if (err < 0)
765 goto done;
767 err = close(tab_fd);
768 tab_fd = 0;
769 if (err < 0) {
770 err = REFTABLE_IO_ERROR;
771 goto done;
774 err = stack_check_addition(add->stack, temp_tab_file_name.buf);
775 if (err < 0)
776 goto done;
778 if (wr->min_update_index < add->next_update_index) {
779 err = REFTABLE_API_ERROR;
780 goto done;
783 format_name(&next_name, wr->min_update_index, wr->max_update_index);
784 strbuf_addstr(&next_name, ".ref");
786 stack_filename(&tab_file_name, add->stack, next_name.buf);
789 On windows, this relies on rand() picking a unique destination name.
790 Maybe we should do retry loop as well?
792 err = rename(temp_tab_file_name.buf, tab_file_name.buf);
793 if (err < 0) {
794 err = REFTABLE_IO_ERROR;
795 goto done;
798 add->new_tables = reftable_realloc(add->new_tables,
799 sizeof(*add->new_tables) *
800 (add->new_tables_len + 1));
801 add->new_tables[add->new_tables_len] = strbuf_detach(&next_name, NULL);
802 add->new_tables_len++;
803 done:
804 if (tab_fd > 0) {
805 close(tab_fd);
806 tab_fd = 0;
808 if (temp_tab_file_name.len > 0) {
809 unlink(temp_tab_file_name.buf);
812 strbuf_release(&temp_tab_file_name);
813 strbuf_release(&tab_file_name);
814 strbuf_release(&next_name);
815 reftable_writer_free(wr);
816 return err;
819 uint64_t reftable_stack_next_update_index(struct reftable_stack *st)
821 int sz = st->merged->stack_len;
822 if (sz > 0)
823 return reftable_reader_max_update_index(st->readers[sz - 1]) +
825 return 1;
828 static int stack_compact_locked(struct reftable_stack *st, int first, int last,
829 struct strbuf *temp_tab,
830 struct reftable_log_expiry_config *config)
832 struct strbuf next_name = STRBUF_INIT;
833 int tab_fd = -1;
834 struct reftable_writer *wr = NULL;
835 int err = 0;
837 format_name(&next_name,
838 reftable_reader_min_update_index(st->readers[first]),
839 reftable_reader_max_update_index(st->readers[last]));
841 stack_filename(temp_tab, st, next_name.buf);
842 strbuf_addstr(temp_tab, ".temp.XXXXXX");
844 tab_fd = mkstemp(temp_tab->buf);
845 wr = reftable_new_writer(reftable_fd_write, &tab_fd, &st->config);
847 err = stack_write_compact(st, wr, first, last, config);
848 if (err < 0)
849 goto done;
850 err = reftable_writer_close(wr);
851 if (err < 0)
852 goto done;
854 err = close(tab_fd);
855 tab_fd = 0;
857 done:
858 reftable_writer_free(wr);
859 if (tab_fd > 0) {
860 close(tab_fd);
861 tab_fd = 0;
863 if (err != 0 && temp_tab->len > 0) {
864 unlink(temp_tab->buf);
865 strbuf_release(temp_tab);
867 strbuf_release(&next_name);
868 return err;
871 static int stack_write_compact(struct reftable_stack *st,
872 struct reftable_writer *wr, int first, int last,
873 struct reftable_log_expiry_config *config)
875 int subtabs_len = last - first + 1;
876 struct reftable_table *subtabs = reftable_calloc(
877 sizeof(struct reftable_table) * (last - first + 1));
878 struct reftable_merged_table *mt = NULL;
879 int err = 0;
880 struct reftable_iterator it = { NULL };
881 struct reftable_ref_record ref = { NULL };
882 struct reftable_log_record log = { NULL };
884 uint64_t entries = 0;
886 int i = 0, j = 0;
887 for (i = first, j = 0; i <= last; i++) {
888 struct reftable_reader *t = st->readers[i];
889 reftable_table_from_reader(&subtabs[j++], t);
890 st->stats.bytes += t->size;
892 reftable_writer_set_limits(wr, st->readers[first]->min_update_index,
893 st->readers[last]->max_update_index);
895 err = reftable_new_merged_table(&mt, subtabs, subtabs_len,
896 st->config.hash_id);
897 if (err < 0) {
898 reftable_free(subtabs);
899 goto done;
902 err = reftable_merged_table_seek_ref(mt, &it, "");
903 if (err < 0)
904 goto done;
906 while (1) {
907 err = reftable_iterator_next_ref(&it, &ref);
908 if (err > 0) {
909 err = 0;
910 break;
912 if (err < 0) {
913 break;
916 if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
917 continue;
920 err = reftable_writer_add_ref(wr, &ref);
921 if (err < 0) {
922 break;
924 entries++;
926 reftable_iterator_destroy(&it);
928 err = reftable_merged_table_seek_log(mt, &it, "");
929 if (err < 0)
930 goto done;
932 while (1) {
933 err = reftable_iterator_next_log(&it, &log);
934 if (err > 0) {
935 err = 0;
936 break;
938 if (err < 0) {
939 break;
941 if (first == 0 && reftable_log_record_is_deletion(&log)) {
942 continue;
945 if (config && config->min_update_index > 0 &&
946 log.update_index < config->min_update_index) {
947 continue;
950 if (config && config->time > 0 &&
951 log.value.update.time < config->time) {
952 continue;
955 err = reftable_writer_add_log(wr, &log);
956 if (err < 0) {
957 break;
959 entries++;
962 done:
963 reftable_iterator_destroy(&it);
964 if (mt) {
965 merged_table_release(mt);
966 reftable_merged_table_free(mt);
968 reftable_ref_record_release(&ref);
969 reftable_log_record_release(&log);
970 st->stats.entries_written += entries;
971 return err;
974 /* < 0: error. 0 == OK, > 0 attempt failed; could retry. */
975 static int stack_compact_range(struct reftable_stack *st, int first, int last,
976 struct reftable_log_expiry_config *expiry)
978 struct strbuf temp_tab_file_name = STRBUF_INIT;
979 struct strbuf new_table_name = STRBUF_INIT;
980 struct strbuf lock_file_name = STRBUF_INIT;
981 struct strbuf ref_list_contents = STRBUF_INIT;
982 struct strbuf new_table_path = STRBUF_INIT;
983 int err = 0;
984 int have_lock = 0;
985 int lock_file_fd = -1;
986 int compact_count = last - first + 1;
987 char **listp = NULL;
988 char **delete_on_success =
989 reftable_calloc(sizeof(char *) * (compact_count + 1));
990 char **subtable_locks =
991 reftable_calloc(sizeof(char *) * (compact_count + 1));
992 int i = 0;
993 int j = 0;
994 int is_empty_table = 0;
996 if (first > last || (!expiry && first == last)) {
997 err = 0;
998 goto done;
1001 st->stats.attempts++;
1003 strbuf_reset(&lock_file_name);
1004 strbuf_addstr(&lock_file_name, st->list_file);
1005 strbuf_addstr(&lock_file_name, ".lock");
1007 lock_file_fd =
1008 open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666);
1009 if (lock_file_fd < 0) {
1010 if (errno == EEXIST) {
1011 err = 1;
1012 } else {
1013 err = REFTABLE_IO_ERROR;
1015 goto done;
1017 /* Don't want to write to the lock for now. */
1018 close(lock_file_fd);
1019 lock_file_fd = -1;
1021 have_lock = 1;
1022 err = stack_uptodate(st);
1023 if (err != 0)
1024 goto done;
1026 for (i = first, j = 0; i <= last; i++) {
1027 struct strbuf subtab_file_name = STRBUF_INIT;
1028 struct strbuf subtab_lock = STRBUF_INIT;
1029 int sublock_file_fd = -1;
1031 stack_filename(&subtab_file_name, st,
1032 reader_name(st->readers[i]));
1034 strbuf_reset(&subtab_lock);
1035 strbuf_addbuf(&subtab_lock, &subtab_file_name);
1036 strbuf_addstr(&subtab_lock, ".lock");
1038 sublock_file_fd = open(subtab_lock.buf,
1039 O_EXCL | O_CREAT | O_WRONLY, 0666);
1040 if (sublock_file_fd >= 0) {
1041 close(sublock_file_fd);
1042 } else if (sublock_file_fd < 0) {
1043 if (errno == EEXIST) {
1044 err = 1;
1045 } else {
1046 err = REFTABLE_IO_ERROR;
1050 subtable_locks[j] = subtab_lock.buf;
1051 delete_on_success[j] = subtab_file_name.buf;
1052 j++;
1054 if (err != 0)
1055 goto done;
1058 err = unlink(lock_file_name.buf);
1059 if (err < 0)
1060 goto done;
1061 have_lock = 0;
1063 err = stack_compact_locked(st, first, last, &temp_tab_file_name,
1064 expiry);
1065 /* Compaction + tombstones can create an empty table out of non-empty
1066 * tables. */
1067 is_empty_table = (err == REFTABLE_EMPTY_TABLE_ERROR);
1068 if (is_empty_table) {
1069 err = 0;
1071 if (err < 0)
1072 goto done;
1074 lock_file_fd =
1075 open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666);
1076 if (lock_file_fd < 0) {
1077 if (errno == EEXIST) {
1078 err = 1;
1079 } else {
1080 err = REFTABLE_IO_ERROR;
1082 goto done;
1084 have_lock = 1;
1085 if (st->config.default_permissions) {
1086 if (chmod(lock_file_name.buf, st->config.default_permissions) < 0) {
1087 err = REFTABLE_IO_ERROR;
1088 goto done;
1092 format_name(&new_table_name, st->readers[first]->min_update_index,
1093 st->readers[last]->max_update_index);
1094 strbuf_addstr(&new_table_name, ".ref");
1096 stack_filename(&new_table_path, st, new_table_name.buf);
1098 if (!is_empty_table) {
1099 /* retry? */
1100 err = rename(temp_tab_file_name.buf, new_table_path.buf);
1101 if (err < 0) {
1102 err = REFTABLE_IO_ERROR;
1103 goto done;
1107 for (i = 0; i < first; i++) {
1108 strbuf_addstr(&ref_list_contents, st->readers[i]->name);
1109 strbuf_addstr(&ref_list_contents, "\n");
1111 if (!is_empty_table) {
1112 strbuf_addbuf(&ref_list_contents, &new_table_name);
1113 strbuf_addstr(&ref_list_contents, "\n");
1115 for (i = last + 1; i < st->merged->stack_len; i++) {
1116 strbuf_addstr(&ref_list_contents, st->readers[i]->name);
1117 strbuf_addstr(&ref_list_contents, "\n");
1120 err = write_in_full(lock_file_fd, ref_list_contents.buf, ref_list_contents.len);
1121 if (err < 0) {
1122 err = REFTABLE_IO_ERROR;
1123 unlink(new_table_path.buf);
1124 goto done;
1126 err = close(lock_file_fd);
1127 lock_file_fd = -1;
1128 if (err < 0) {
1129 err = REFTABLE_IO_ERROR;
1130 unlink(new_table_path.buf);
1131 goto done;
1134 err = rename(lock_file_name.buf, st->list_file);
1135 if (err < 0) {
1136 err = REFTABLE_IO_ERROR;
1137 unlink(new_table_path.buf);
1138 goto done;
1140 have_lock = 0;
1142 /* Reload the stack before deleting. On windows, we can only delete the
1143 files after we closed them.
1145 err = reftable_stack_reload_maybe_reuse(st, first < last);
1147 listp = delete_on_success;
1148 while (*listp) {
1149 if (strcmp(*listp, new_table_path.buf)) {
1150 unlink(*listp);
1152 listp++;
1155 done:
1156 free_names(delete_on_success);
1158 listp = subtable_locks;
1159 while (*listp) {
1160 unlink(*listp);
1161 listp++;
1163 free_names(subtable_locks);
1164 if (lock_file_fd >= 0) {
1165 close(lock_file_fd);
1166 lock_file_fd = -1;
1168 if (have_lock) {
1169 unlink(lock_file_name.buf);
1171 strbuf_release(&new_table_name);
1172 strbuf_release(&new_table_path);
1173 strbuf_release(&ref_list_contents);
1174 strbuf_release(&temp_tab_file_name);
1175 strbuf_release(&lock_file_name);
1176 return err;
1179 int reftable_stack_compact_all(struct reftable_stack *st,
1180 struct reftable_log_expiry_config *config)
1182 return stack_compact_range(st, 0, st->merged->stack_len - 1, config);
1185 static int stack_compact_range_stats(struct reftable_stack *st, int first,
1186 int last,
1187 struct reftable_log_expiry_config *config)
1189 int err = stack_compact_range(st, first, last, config);
1190 if (err > 0) {
1191 st->stats.failures++;
1193 return err;
1196 static int segment_size(struct segment *s)
1198 return s->end - s->start;
1201 int fastlog2(uint64_t sz)
1203 int l = 0;
1204 if (sz == 0)
1205 return 0;
1206 for (; sz; sz /= 2) {
1207 l++;
1209 return l - 1;
1212 struct segment *sizes_to_segments(int *seglen, uint64_t *sizes, int n)
1214 struct segment *segs = reftable_calloc(sizeof(struct segment) * n);
1215 int next = 0;
1216 struct segment cur = { 0 };
1217 int i = 0;
1219 if (n == 0) {
1220 *seglen = 0;
1221 return segs;
1223 for (i = 0; i < n; i++) {
1224 int log = fastlog2(sizes[i]);
1225 if (cur.log != log && cur.bytes > 0) {
1226 struct segment fresh = {
1227 .start = i,
1230 segs[next++] = cur;
1231 cur = fresh;
1234 cur.log = log;
1235 cur.end = i + 1;
1236 cur.bytes += sizes[i];
1238 segs[next++] = cur;
1239 *seglen = next;
1240 return segs;
1243 struct segment suggest_compaction_segment(uint64_t *sizes, int n)
1245 int seglen = 0;
1246 struct segment *segs = sizes_to_segments(&seglen, sizes, n);
1247 struct segment min_seg = {
1248 .log = 64,
1250 int i = 0;
1251 for (i = 0; i < seglen; i++) {
1252 if (segment_size(&segs[i]) == 1) {
1253 continue;
1256 if (segs[i].log < min_seg.log) {
1257 min_seg = segs[i];
1261 while (min_seg.start > 0) {
1262 int prev = min_seg.start - 1;
1263 if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev])) {
1264 break;
1267 min_seg.start = prev;
1268 min_seg.bytes += sizes[prev];
1271 reftable_free(segs);
1272 return min_seg;
1275 static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
1277 uint64_t *sizes =
1278 reftable_calloc(sizeof(uint64_t) * st->merged->stack_len);
1279 int version = (st->config.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2;
1280 int overhead = header_size(version) - 1;
1281 int i = 0;
1282 for (i = 0; i < st->merged->stack_len; i++) {
1283 sizes[i] = st->readers[i]->size - overhead;
1285 return sizes;
1288 int reftable_stack_auto_compact(struct reftable_stack *st)
1290 uint64_t *sizes = stack_table_sizes_for_compaction(st);
1291 struct segment seg =
1292 suggest_compaction_segment(sizes, st->merged->stack_len);
1293 reftable_free(sizes);
1294 if (segment_size(&seg) > 0)
1295 return stack_compact_range_stats(st, seg.start, seg.end - 1,
1296 NULL);
1298 return 0;
1301 struct reftable_compaction_stats *
1302 reftable_stack_compaction_stats(struct reftable_stack *st)
1304 return &st->stats;
1307 int reftable_stack_read_ref(struct reftable_stack *st, const char *refname,
1308 struct reftable_ref_record *ref)
1310 struct reftable_table tab = { NULL };
1311 reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st));
1312 return reftable_table_read_ref(&tab, refname, ref);
1315 int reftable_stack_read_log(struct reftable_stack *st, const char *refname,
1316 struct reftable_log_record *log)
1318 struct reftable_iterator it = { NULL };
1319 struct reftable_merged_table *mt = reftable_stack_merged_table(st);
1320 int err = reftable_merged_table_seek_log(mt, &it, refname);
1321 if (err)
1322 goto done;
1324 err = reftable_iterator_next_log(&it, log);
1325 if (err)
1326 goto done;
1328 if (strcmp(log->refname, refname) ||
1329 reftable_log_record_is_deletion(log)) {
1330 err = 1;
1331 goto done;
1334 done:
1335 if (err) {
1336 reftable_log_record_release(log);
1338 reftable_iterator_destroy(&it);
1339 return err;
1342 static int stack_check_addition(struct reftable_stack *st,
1343 const char *new_tab_name)
1345 int err = 0;
1346 struct reftable_block_source src = { NULL };
1347 struct reftable_reader *rd = NULL;
1348 struct reftable_table tab = { NULL };
1349 struct reftable_ref_record *refs = NULL;
1350 struct reftable_iterator it = { NULL };
1351 int cap = 0;
1352 int len = 0;
1353 int i = 0;
1355 if (st->config.skip_name_check)
1356 return 0;
1358 err = reftable_block_source_from_file(&src, new_tab_name);
1359 if (err < 0)
1360 goto done;
1362 err = reftable_new_reader(&rd, &src, new_tab_name);
1363 if (err < 0)
1364 goto done;
1366 err = reftable_reader_seek_ref(rd, &it, "");
1367 if (err > 0) {
1368 err = 0;
1369 goto done;
1371 if (err < 0)
1372 goto done;
1374 while (1) {
1375 struct reftable_ref_record ref = { NULL };
1376 err = reftable_iterator_next_ref(&it, &ref);
1377 if (err > 0) {
1378 break;
1380 if (err < 0)
1381 goto done;
1383 if (len >= cap) {
1384 cap = 2 * cap + 1;
1385 refs = reftable_realloc(refs, cap * sizeof(refs[0]));
1388 refs[len++] = ref;
1391 reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st));
1393 err = validate_ref_record_addition(tab, refs, len);
1395 done:
1396 for (i = 0; i < len; i++) {
1397 reftable_ref_record_release(&refs[i]);
1400 free(refs);
1401 reftable_iterator_destroy(&it);
1402 reftable_reader_free(rd);
1403 return err;
1406 static int is_table_name(const char *s)
1408 const char *dot = strrchr(s, '.');
1409 return dot && !strcmp(dot, ".ref");
1412 static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
1413 const char *name)
1415 int err = 0;
1416 uint64_t update_idx = 0;
1417 struct reftable_block_source src = { NULL };
1418 struct reftable_reader *rd = NULL;
1419 struct strbuf table_path = STRBUF_INIT;
1420 stack_filename(&table_path, st, name);
1422 err = reftable_block_source_from_file(&src, table_path.buf);
1423 if (err < 0)
1424 goto done;
1426 err = reftable_new_reader(&rd, &src, name);
1427 if (err < 0)
1428 goto done;
1430 update_idx = reftable_reader_max_update_index(rd);
1431 reftable_reader_free(rd);
1433 if (update_idx <= max) {
1434 unlink(table_path.buf);
1436 done:
1437 strbuf_release(&table_path);
1440 static int reftable_stack_clean_locked(struct reftable_stack *st)
1442 uint64_t max = reftable_merged_table_max_update_index(
1443 reftable_stack_merged_table(st));
1444 DIR *dir = opendir(st->reftable_dir);
1445 struct dirent *d = NULL;
1446 if (!dir) {
1447 return REFTABLE_IO_ERROR;
1450 while ((d = readdir(dir))) {
1451 int i = 0;
1452 int found = 0;
1453 if (!is_table_name(d->d_name))
1454 continue;
1456 for (i = 0; !found && i < st->readers_len; i++) {
1457 found = !strcmp(reader_name(st->readers[i]), d->d_name);
1459 if (found)
1460 continue;
1462 remove_maybe_stale_table(st, max, d->d_name);
1465 closedir(dir);
1466 return 0;
1469 int reftable_stack_clean(struct reftable_stack *st)
1471 struct reftable_addition *add = NULL;
1472 int err = reftable_stack_new_addition(&add, st);
1473 if (err < 0) {
1474 goto done;
1477 err = reftable_stack_reload(st);
1478 if (err < 0) {
1479 goto done;
1482 err = reftable_stack_clean_locked(st);
1484 done:
1485 reftable_addition_destroy(add);
1486 return err;
1489 int reftable_stack_print_directory(const char *stackdir, uint32_t hash_id)
1491 struct reftable_stack *stack = NULL;
1492 struct reftable_write_options cfg = { .hash_id = hash_id };
1493 struct reftable_merged_table *merged = NULL;
1494 struct reftable_table table = { NULL };
1496 int err = reftable_new_stack(&stack, stackdir, cfg);
1497 if (err < 0)
1498 goto done;
1500 merged = reftable_stack_merged_table(stack);
1501 reftable_table_from_merged_table(&table, merged);
1502 err = reftable_table_print(&table);
1503 done:
1504 if (stack)
1505 reftable_stack_destroy(stack);
1506 return err;