move pack cache entries to front upon any access, not just when cache is full
[got-portable.git] / lib / fileindex.c
blobfc53af5c7c224d102e314eefbb6c3378fec7b4bc
1 /*
2 * Copyright (c) 2018, 2019 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include "got_compat.h"
19 #include <sys/queue.h>
20 #include <sys/stat.h>
22 #include <errno.h>
23 #include <dirent.h>
24 #include <fcntl.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <limits.h>
30 #include <unistd.h>
32 #include "got_error.h"
33 #include "got_object.h"
34 #include "got_path.h"
36 #include "got_lib_hash.h"
37 #include "got_lib_fileindex.h"
38 #include "got_lib_worktree.h"
40 /* got_fileindex_entry flags */
41 #define GOT_FILEIDX_F_PATH_LEN 0x00000fff
42 #define GOT_FILEIDX_F_STAGE 0x0000f000
43 #define GOT_FILEIDX_F_STAGE_SHIFT 12
44 #define GOT_FILEIDX_F_NOT_FLUSHED 0x00010000
45 #define GOT_FILEIDX_F_NO_BLOB 0x00020000
46 #define GOT_FILEIDX_F_NO_COMMIT 0x00040000
47 #define GOT_FILEIDX_F_NO_FILE_ON_DISK 0x00080000
48 #define GOT_FILEIDX_F_REMOVE_ON_FLUSH 0x00100000
49 #define GOT_FILEIDX_F_SKIPPED 0x00200000
51 struct got_fileindex {
52 struct got_fileindex_tree entries;
53 uint32_t version;
54 int nentries; /* Does not include entries marked for removal. */
55 #define GOT_FILEIDX_MAX_ENTRIES INT32_MAX
56 enum got_hash_algorithm algo;
59 mode_t
60 got_fileindex_entry_perms_get(struct got_fileindex_entry *ie)
62 return ((ie->mode & GOT_FILEIDX_MODE_PERMS) >>
63 GOT_FILEIDX_MODE_PERMS_SHIFT);
66 static void
67 fileindex_entry_perms_set(struct got_fileindex_entry *ie, mode_t mode)
69 ie->mode &= ~GOT_FILEIDX_MODE_PERMS;
70 ie->mode |= ((mode << GOT_FILEIDX_MODE_PERMS_SHIFT) &
71 GOT_FILEIDX_MODE_PERMS);
74 mode_t
75 got_fileindex_perms_to_st(struct got_fileindex_entry *ie)
77 mode_t perms = got_fileindex_entry_perms_get(ie);
78 int type = got_fileindex_entry_filetype_get(ie);
79 uint32_t ftype;
81 if (type == GOT_FILEIDX_MODE_REGULAR_FILE ||
82 type == GOT_FILEIDX_MODE_BAD_SYMLINK)
83 ftype = S_IFREG;
84 else
85 ftype = S_IFLNK;
87 return (ftype | (perms & (S_IRWXU | S_IRWXG | S_IRWXO)));
90 const struct got_error *
91 got_fileindex_entry_update(struct got_fileindex_entry *ie,
92 int wt_fd, const char *ondisk_path, struct got_object_id *blob,
93 struct got_object_id *commit, int update_timestamps)
95 struct stat sb;
97 if (fstatat(wt_fd, ondisk_path, &sb, AT_SYMLINK_NOFOLLOW) != 0) {
98 if (!((ie->flags & GOT_FILEIDX_F_NO_FILE_ON_DISK) &&
99 errno == ENOENT))
100 return got_error_from_errno2("fstatat", ondisk_path);
101 sb.st_mode = GOT_DEFAULT_FILE_MODE;
102 } else {
103 if (sb.st_mode & S_IFDIR)
104 return got_error_set_errno(EISDIR, ondisk_path);
105 ie->flags &= ~GOT_FILEIDX_F_NO_FILE_ON_DISK;
108 if ((ie->flags & GOT_FILEIDX_F_NO_FILE_ON_DISK) == 0) {
109 if (update_timestamps) {
110 ie->ctime_sec = sb.st_ctim.tv_sec;
111 ie->ctime_nsec = sb.st_ctim.tv_nsec;
112 ie->mtime_sec = sb.st_mtim.tv_sec;
113 ie->mtime_nsec = sb.st_mtim.tv_nsec;
115 ie->uid = sb.st_uid;
116 ie->gid = sb.st_gid;
117 ie->size = (sb.st_size & 0xffffffff);
118 if (S_ISLNK(sb.st_mode)) {
119 got_fileindex_entry_filetype_set(ie,
120 GOT_FILEIDX_MODE_SYMLINK);
121 fileindex_entry_perms_set(ie, 0);
122 } else {
123 got_fileindex_entry_filetype_set(ie,
124 GOT_FILEIDX_MODE_REGULAR_FILE);
125 fileindex_entry_perms_set(ie,
126 sb.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO));
130 if (blob) {
131 memmove(&ie->blob, blob, sizeof(ie->blob));
132 ie->flags &= ~GOT_FILEIDX_F_NO_BLOB;
133 } else
134 ie->flags |= GOT_FILEIDX_F_NO_BLOB;
136 if (commit) {
137 memmove(&ie->commit, commit, sizeof(ie->commit));
138 ie->flags &= ~GOT_FILEIDX_F_NO_COMMIT;
139 } else
140 ie->flags |= GOT_FILEIDX_F_NO_COMMIT;
142 return NULL;
145 void
146 got_fileindex_entry_mark_deleted_from_disk(struct got_fileindex_entry *ie)
148 ie->flags |= GOT_FILEIDX_F_NO_FILE_ON_DISK;
151 void
152 got_fileindex_entry_mark_skipped(struct got_fileindex_entry *ie)
154 ie->flags |= GOT_FILEIDX_F_SKIPPED;
157 const struct got_error *
158 got_fileindex_entry_alloc(struct got_fileindex_entry **ie,
159 const char *relpath)
161 size_t len;
163 *ie = calloc(1, sizeof(**ie));
164 if (*ie == NULL)
165 return got_error_from_errno("calloc");
167 (*ie)->path = strdup(relpath);
168 if ((*ie)->path == NULL) {
169 const struct got_error *err = got_error_from_errno("strdup");
170 free(*ie);
171 *ie = NULL;
172 return err;
175 len = strlen(relpath);
176 if (len > GOT_FILEIDX_F_PATH_LEN)
177 len = GOT_FILEIDX_F_PATH_LEN;
178 (*ie)->flags |= len;
180 return NULL;
183 void
184 got_fileindex_entry_free(struct got_fileindex_entry *ie)
186 free(ie->path);
187 free(ie);
190 size_t
191 got_fileindex_entry_path_len(const struct got_fileindex_entry *ie)
193 return (size_t)(ie->flags & GOT_FILEIDX_F_PATH_LEN);
196 uint32_t
197 got_fileindex_entry_stage_get(const struct got_fileindex_entry *ie)
199 return ((ie->flags & GOT_FILEIDX_F_STAGE) >> GOT_FILEIDX_F_STAGE_SHIFT);
202 void
203 got_fileindex_entry_stage_set(struct got_fileindex_entry *ie, uint32_t stage)
205 ie->flags &= ~GOT_FILEIDX_F_STAGE;
206 ie->flags |= ((stage << GOT_FILEIDX_F_STAGE_SHIFT) &
207 GOT_FILEIDX_F_STAGE);
211 got_fileindex_entry_filetype_get(struct got_fileindex_entry *ie)
213 return (ie->mode & GOT_FILEIDX_MODE_FILE_TYPE_ONDISK);
216 void
217 got_fileindex_entry_filetype_set(struct got_fileindex_entry *ie, int type)
219 ie->mode &= ~GOT_FILEIDX_MODE_FILE_TYPE_ONDISK;
220 ie->mode |= (type & GOT_FILEIDX_MODE_FILE_TYPE_ONDISK);
223 void
224 got_fileindex_entry_staged_filetype_set(struct got_fileindex_entry *ie,
225 int type)
227 ie->mode &= ~GOT_FILEIDX_MODE_FILE_TYPE_STAGED;
228 ie->mode |= ((type << GOT_FILEIDX_MODE_FILE_TYPE_STAGED_SHIFT) &
229 GOT_FILEIDX_MODE_FILE_TYPE_STAGED);
233 got_fileindex_entry_staged_filetype_get(struct got_fileindex_entry *ie)
235 return (ie->mode & GOT_FILEIDX_MODE_FILE_TYPE_STAGED) >>
236 GOT_FILEIDX_MODE_FILE_TYPE_STAGED_SHIFT;
240 got_fileindex_entry_has_blob(struct got_fileindex_entry *ie)
242 return (ie->flags & GOT_FILEIDX_F_NO_BLOB) == 0;
246 got_fileindex_entry_has_commit(struct got_fileindex_entry *ie)
248 return (ie->flags & GOT_FILEIDX_F_NO_COMMIT) == 0;
252 got_fileindex_entry_has_file_on_disk(struct got_fileindex_entry *ie)
254 return (ie->flags & GOT_FILEIDX_F_NO_FILE_ON_DISK) == 0;
258 got_fileindex_entry_was_skipped(struct got_fileindex_entry *ie)
260 return (ie->flags & GOT_FILEIDX_F_SKIPPED) != 0;
263 static const struct got_error *
264 add_entry(struct got_fileindex *fileindex, struct got_fileindex_entry *ie)
266 if (fileindex->nentries >= GOT_FILEIDX_MAX_ENTRIES)
267 return got_error(GOT_ERR_NO_SPACE);
269 if (RB_INSERT(got_fileindex_tree, &fileindex->entries, ie) != NULL)
270 return got_error_path(ie->path, GOT_ERR_FILEIDX_DUP_ENTRY);
272 fileindex->nentries++;
273 return NULL;
276 const struct got_error *
277 got_fileindex_entry_add(struct got_fileindex *fileindex,
278 struct got_fileindex_entry *ie)
280 /* Flag this entry until it gets written out to disk. */
281 ie->flags |= GOT_FILEIDX_F_NOT_FLUSHED;
283 return add_entry(fileindex, ie);
286 void
287 got_fileindex_entry_remove(struct got_fileindex *fileindex,
288 struct got_fileindex_entry *ie)
291 * Removing an entry from the RB tree immediately breaks
292 * in-progress iterations over file index entries.
293 * So flag this entry for removal and remove it once the index
294 * is written out to disk. Meanwhile, pretend this entry no longer
295 * exists if we get queried for it again before then.
297 ie->flags |= GOT_FILEIDX_F_REMOVE_ON_FLUSH;
298 fileindex->nentries--;
301 struct got_fileindex_entry *
302 got_fileindex_entry_get(struct got_fileindex *fileindex, const char *path,
303 size_t path_len)
305 struct got_fileindex_entry *ie;
306 struct got_fileindex_entry key;
307 memset(&key, 0, sizeof(key));
308 key.path = (char *)path;
309 key.flags = (path_len & GOT_FILEIDX_F_PATH_LEN);
310 ie = RB_FIND(got_fileindex_tree, &fileindex->entries, &key);
311 if (ie && (ie->flags & GOT_FILEIDX_F_REMOVE_ON_FLUSH))
312 return NULL;
313 return ie;
316 const struct got_error *
317 got_fileindex_for_each_entry_safe(struct got_fileindex *fileindex,
318 got_fileindex_cb cb, void *cb_arg)
320 const struct got_error *err;
321 struct got_fileindex_entry *ie, *tmp;
323 RB_FOREACH_SAFE(ie, got_fileindex_tree, &fileindex->entries, tmp) {
324 if (ie->flags & GOT_FILEIDX_F_REMOVE_ON_FLUSH)
325 continue;
326 err = (*cb)(cb_arg, ie);
327 if (err)
328 return err;
330 return NULL;
333 struct got_fileindex *
334 got_fileindex_alloc(enum got_hash_algorithm algo)
336 struct got_fileindex *fileindex;
338 fileindex = calloc(1, sizeof(*fileindex));
339 if (fileindex == NULL)
340 return NULL;
342 fileindex->version = GOT_FILE_INDEX_VERSION;
343 fileindex->algo = algo;
344 RB_INIT(&fileindex->entries);
345 return fileindex;
348 void
349 got_fileindex_free(struct got_fileindex *fileindex)
351 struct got_fileindex_entry *ie;
353 while ((ie = RB_MIN(got_fileindex_tree, &fileindex->entries))) {
354 RB_REMOVE(got_fileindex_tree, &fileindex->entries, ie);
355 got_fileindex_entry_free(ie);
357 free(fileindex);
360 static const struct got_error *
361 write_fileindex_val64(struct got_hash *ctx, uint64_t val, FILE *outfile)
363 size_t n;
365 val = htobe64(val);
366 got_hash_update(ctx, &val, sizeof(val));
367 n = fwrite(&val, 1, sizeof(val), outfile);
368 if (n != sizeof(val))
369 return got_ferror(outfile, GOT_ERR_IO);
370 return NULL;
373 static const struct got_error *
374 write_fileindex_val32(struct got_hash *ctx, uint32_t val, FILE *outfile)
376 size_t n;
378 val = htobe32(val);
379 got_hash_update(ctx, &val, sizeof(val));
380 n = fwrite(&val, 1, sizeof(val), outfile);
381 if (n != sizeof(val))
382 return got_ferror(outfile, GOT_ERR_IO);
383 return NULL;
386 static const struct got_error *
387 write_fileindex_val16(struct got_hash *ctx, uint16_t val, FILE *outfile)
389 size_t n;
391 val = htobe16(val);
392 got_hash_update(ctx, &val, sizeof(val));
393 n = fwrite(&val, 1, sizeof(val), outfile);
394 if (n != sizeof(val))
395 return got_ferror(outfile, GOT_ERR_IO);
396 return NULL;
399 static const struct got_error *
400 write_fileindex_path(struct got_hash *ctx, const char *path, FILE *outfile)
402 size_t n, len, pad = 0;
403 static const uint8_t zero[8] = { 0 };
405 len = strlen(path);
406 while ((len + pad) % 8 != 0)
407 pad++;
408 if (pad == 0)
409 pad = 8; /* NUL-terminate */
411 got_hash_update(ctx, path, len);
412 n = fwrite(path, 1, len, outfile);
413 if (n != len)
414 return got_ferror(outfile, GOT_ERR_IO);
415 got_hash_update(ctx, zero, pad);
416 n = fwrite(zero, 1, pad, outfile);
417 if (n != pad)
418 return got_ferror(outfile, GOT_ERR_IO);
419 return NULL;
422 static const struct got_error *
423 write_fileindex_entry(struct got_hash *ctx, struct got_fileindex_entry *ie,
424 FILE *outfile)
426 const struct got_error *err;
427 size_t n;
428 uint32_t stage;
429 size_t digest_len = got_hash_digest_length(ctx->algo);
431 err = write_fileindex_val64(ctx, ie->ctime_sec, outfile);
432 if (err)
433 return err;
434 err = write_fileindex_val64(ctx, ie->ctime_nsec, outfile);
435 if (err)
436 return err;
437 err = write_fileindex_val64(ctx, ie->mtime_sec, outfile);
438 if (err)
439 return err;
440 err = write_fileindex_val64(ctx, ie->mtime_nsec, outfile);
441 if (err)
442 return err;
444 err = write_fileindex_val32(ctx, ie->uid, outfile);
445 if (err)
446 return err;
447 err = write_fileindex_val32(ctx, ie->gid, outfile);
448 if (err)
449 return err;
450 err = write_fileindex_val32(ctx, ie->size, outfile);
451 if (err)
452 return err;
454 err = write_fileindex_val16(ctx, ie->mode, outfile);
455 if (err)
456 return err;
458 got_hash_update(ctx, ie->blob.hash, digest_len);
459 n = fwrite(ie->blob.hash, 1, digest_len, outfile);
460 if (n != digest_len)
461 return got_ferror(outfile, GOT_ERR_IO);
463 got_hash_update(ctx, ie->commit.hash, digest_len);
464 n = fwrite(ie->commit.hash, 1, digest_len, outfile);
465 if (n != digest_len)
466 return got_ferror(outfile, GOT_ERR_IO);
468 err = write_fileindex_val32(ctx, ie->flags, outfile);
469 if (err)
470 return err;
472 err = write_fileindex_path(ctx, ie->path, outfile);
473 if (err)
474 return err;
476 stage = got_fileindex_entry_stage_get(ie);
477 if (stage == GOT_FILEIDX_STAGE_MODIFY ||
478 stage == GOT_FILEIDX_STAGE_ADD) {
479 got_hash_update(ctx, ie->staged_blob.hash, digest_len);
480 n = fwrite(ie->staged_blob.hash, 1, digest_len,
481 outfile);
482 if (n != digest_len)
483 return got_ferror(outfile, GOT_ERR_IO);
486 return NULL;
489 const struct got_error *
490 got_fileindex_write(struct got_fileindex *fileindex, FILE *outfile)
492 const struct got_error *err = NULL;
493 struct got_fileindex_hdr hdr;
494 struct got_hash ctx;
495 uint8_t hash[GOT_HASH_DIGEST_MAXLEN];
496 size_t n, digest_len = got_hash_digest_length(fileindex->algo);
497 struct got_fileindex_entry *ie, *tmp;
499 memset(hash, 0, sizeof(hash));
501 got_hash_init(&ctx, fileindex->algo);
503 hdr.signature = htobe32(GOT_FILE_INDEX_SIGNATURE);
504 hdr.version = htobe32(GOT_FILE_INDEX_VERSION);
505 hdr.nentries = htobe32(fileindex->nentries);
506 hdr.algo = htobe32(fileindex->algo);
508 got_hash_update(&ctx, &hdr.signature, sizeof(hdr.signature));
509 got_hash_update(&ctx, &hdr.version, sizeof(hdr.version));
510 got_hash_update(&ctx, &hdr.nentries, sizeof(hdr.nentries));
511 got_hash_update(&ctx, &hdr.algo, sizeof(hdr.algo));
512 n = fwrite(&hdr.signature, 1, sizeof(hdr.signature), outfile);
513 if (n != sizeof(hdr.signature))
514 return got_ferror(outfile, GOT_ERR_IO);
515 n = fwrite(&hdr.version, 1, sizeof(hdr.version), outfile);
516 if (n != sizeof(hdr.version))
517 return got_ferror(outfile, GOT_ERR_IO);
518 n = fwrite(&hdr.nentries, 1, sizeof(hdr.nentries), outfile);
519 if (n != sizeof(hdr.nentries))
520 return got_ferror(outfile, GOT_ERR_IO);
521 n = fwrite(&hdr.algo, 1, sizeof(hdr.algo), outfile);
522 if (n != sizeof(hdr.nentries))
523 return got_ferror(outfile, GOT_ERR_IO);
525 RB_FOREACH_SAFE(ie, got_fileindex_tree, &fileindex->entries, tmp) {
526 ie->flags &= ~GOT_FILEIDX_F_NOT_FLUSHED;
527 ie->flags &= ~GOT_FILEIDX_F_SKIPPED;
528 if (ie->flags & GOT_FILEIDX_F_REMOVE_ON_FLUSH) {
529 RB_REMOVE(got_fileindex_tree, &fileindex->entries, ie);
530 got_fileindex_entry_free(ie);
531 continue;
533 err = write_fileindex_entry(&ctx, ie, outfile);
534 if (err)
535 return err;
538 got_hash_final(&ctx, hash);
539 n = fwrite(hash, 1, digest_len, outfile);
540 if (n != digest_len)
541 return got_ferror(outfile, GOT_ERR_IO);
543 if (fflush(outfile) != 0)
544 return got_error_from_errno("fflush");
546 return NULL;
549 static const struct got_error *
550 read_fileindex_val64(uint64_t *val, struct got_hash *ctx, FILE *infile)
552 size_t n;
554 n = fread(val, 1, sizeof(*val), infile);
555 if (n != sizeof(*val))
556 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
557 got_hash_update(ctx, val, sizeof(*val));
558 *val = be64toh(*val);
559 return NULL;
562 static const struct got_error *
563 read_fileindex_val32(uint32_t *val, struct got_hash *ctx, FILE *infile)
565 size_t n;
567 n = fread(val, 1, sizeof(*val), infile);
568 if (n != sizeof(*val))
569 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
570 got_hash_update(ctx, val, sizeof(*val));
571 *val = be32toh(*val);
572 return NULL;
575 static const struct got_error *
576 read_fileindex_val16(uint16_t *val, struct got_hash *ctx, FILE *infile)
578 size_t n;
580 n = fread(val, 1, sizeof(*val), infile);
581 if (n != sizeof(*val))
582 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
583 got_hash_update(ctx, val, sizeof(*val));
584 *val = be16toh(*val);
585 return NULL;
588 static const struct got_error *
589 read_fileindex_path(char **path, struct got_hash *ctx, FILE *infile)
591 const size_t chunk_size = 8;
592 char p[PATH_MAX];
593 size_t n, len = 0;
595 do {
596 if (len + chunk_size > sizeof(p))
597 return got_error(GOT_ERR_FILEIDX_BAD);
599 n = fread(&p[len], 1, chunk_size, infile);
600 if (n != chunk_size)
601 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
603 got_hash_update(ctx, &p[len], chunk_size);
604 len += chunk_size;
605 } while (memchr(&p[len - chunk_size], '\0', chunk_size) == NULL);
607 *path = strdup(p);
608 if (*path == NULL)
609 return got_error_from_errno("strdup");
610 return NULL;
613 static const struct got_error *
614 read_fileindex_entry(struct got_fileindex_entry **iep, struct got_hash *ctx,
615 FILE *infile, uint32_t version, enum got_hash_algorithm algo)
617 const struct got_error *err;
618 struct got_fileindex_entry *ie;
619 size_t n, digest_len = got_hash_digest_length(algo);
621 *iep = NULL;
623 ie = calloc(1, sizeof(*ie));
624 if (ie == NULL)
625 return got_error_from_errno("calloc");
627 err = read_fileindex_val64(&ie->ctime_sec, ctx, infile);
628 if (err)
629 goto done;
630 err = read_fileindex_val64(&ie->ctime_nsec, ctx, infile);
631 if (err)
632 goto done;
633 err = read_fileindex_val64(&ie->mtime_sec, ctx, infile);
634 if (err)
635 goto done;
636 err = read_fileindex_val64(&ie->mtime_nsec, ctx, infile);
637 if (err)
638 goto done;
640 err = read_fileindex_val32(&ie->uid, ctx, infile);
641 if (err)
642 goto done;
643 err = read_fileindex_val32(&ie->gid, ctx, infile);
644 if (err)
645 goto done;
646 err = read_fileindex_val32(&ie->size, ctx, infile);
647 if (err)
648 goto done;
650 err = read_fileindex_val16(&ie->mode, ctx, infile);
651 if (err)
652 goto done;
654 ie->blob.algo = algo;
655 n = fread(ie->blob.hash, 1, digest_len, infile);
656 if (n != digest_len) {
657 err = got_ferror(infile, GOT_ERR_FILEIDX_BAD);
658 goto done;
660 got_hash_update(ctx, ie->blob.hash, digest_len);
662 ie->commit.algo = algo;
663 n = fread(ie->commit.hash, 1, digest_len, infile);
664 if (n != digest_len) {
665 err = got_ferror(infile, GOT_ERR_FILEIDX_BAD);
666 goto done;
668 got_hash_update(ctx, ie->commit.hash, digest_len);
670 err = read_fileindex_val32(&ie->flags, ctx, infile);
671 if (err)
672 goto done;
674 err = read_fileindex_path(&ie->path, ctx, infile);
675 if (err)
676 goto done;
678 if (version >= 2) {
679 uint32_t stage = got_fileindex_entry_stage_get(ie);
680 if (stage == GOT_FILEIDX_STAGE_MODIFY ||
681 stage == GOT_FILEIDX_STAGE_ADD) {
682 ie->staged_blob.algo = algo;
683 n = fread(ie->staged_blob.hash, 1, digest_len,
684 infile);
685 if (n != digest_len) {
686 err = got_ferror(infile, GOT_ERR_FILEIDX_BAD);
687 goto done;
689 got_hash_update(ctx, ie->staged_blob.hash,
690 digest_len);
692 } else {
693 /* GOT_FILE_INDEX_VERSION 1 does not support staging. */
694 ie->flags &= ~GOT_FILEIDX_F_STAGE;
697 done:
698 if (err)
699 got_fileindex_entry_free(ie);
700 else
701 *iep = ie;
702 return err;
705 const struct got_error *
706 got_fileindex_read(struct got_fileindex *fileindex, FILE *infile,
707 enum got_hash_algorithm repo_algo)
709 const struct got_error *err = NULL;
710 struct got_fileindex_hdr hdr;
711 struct got_hash ctx;
712 struct got_fileindex_entry *ie;
713 enum got_hash_algorithm algo = repo_algo;
714 uint8_t hash_expected[GOT_HASH_DIGEST_MAXLEN];
715 uint8_t hash[GOT_HASH_DIGEST_MAXLEN];
716 size_t n, digest_len;
717 uint32_t version;
718 int i;
720 n = fread(&hdr.signature, 1, sizeof(hdr.signature), infile);
721 if (n != sizeof(hdr.signature)) {
722 if (n == 0) /* EOF */
723 return NULL;
724 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
726 n = fread(&hdr.version, 1, sizeof(hdr.version), infile);
727 if (n != sizeof(hdr.version)) {
728 if (n == 0) /* EOF */
729 return NULL;
730 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
732 n = fread(&hdr.nentries, 1, sizeof(hdr.nentries), infile);
733 if (n != sizeof(hdr.nentries)) {
734 if (n == 0) /* EOF */
735 return NULL;
736 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
738 version = be32toh(hdr.version);
740 if (version >= 3) {
741 n = fread(&hdr.algo, 1, sizeof(hdr.algo), infile);
742 if (n != sizeof(hdr.algo)) {
743 if (n == 0) /* EOF */
744 return NULL;
745 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
747 algo = be32toh(hdr.algo);
748 if (algo != repo_algo) {
749 const char *fmt = "unknown";
751 if (repo_algo == GOT_HASH_SHA1)
752 fmt = "sha1";
753 else if (repo_algo == GOT_HASH_SHA256)
754 fmt = "sha256";
756 return got_error_path(fmt, GOT_ERR_OBJECT_FORMAT);
760 digest_len = got_hash_digest_length(algo);
762 got_hash_init(&ctx, algo);
763 got_hash_update(&ctx, &hdr.signature, sizeof(hdr.signature));
764 got_hash_update(&ctx, &hdr.version, sizeof(hdr.version));
765 got_hash_update(&ctx, &hdr.nentries, sizeof(hdr.nentries));
766 if (version >= 3)
767 got_hash_update(&ctx, &hdr.algo, sizeof(hdr.algo));
769 hdr.signature = be32toh(hdr.signature);
770 hdr.version = be32toh(hdr.version);
771 hdr.nentries = be32toh(hdr.nentries);
773 if (hdr.signature != GOT_FILE_INDEX_SIGNATURE)
774 return got_error(GOT_ERR_FILEIDX_SIG);
775 if (hdr.version > GOT_FILE_INDEX_VERSION)
776 return got_error(GOT_ERR_FILEIDX_VER);
778 fileindex->version = version;
779 fileindex->algo = algo;
780 for (i = 0; i < hdr.nentries; i++) {
781 err = read_fileindex_entry(&ie, &ctx, infile,
782 hdr.version, algo);
783 if (err)
784 return err;
785 err = add_entry(fileindex, ie);
786 if (err) {
787 got_fileindex_entry_free(ie);
788 return err;
792 n = fread(hash_expected, 1, digest_len, infile);
793 if (n != digest_len)
794 return got_ferror(infile, GOT_ERR_FILEIDX_BAD);
795 got_hash_final(&ctx, hash);
796 if (got_hash_cmp(algo, hash, hash_expected) != 0)
797 return got_error(GOT_ERR_FILEIDX_CSUM);
799 return NULL;
802 uint32_t
803 got_fileindex_get_version(struct got_fileindex *fileindex)
805 return fileindex->version;
808 static struct got_fileindex_entry *
809 walk_fileindex(struct got_fileindex *fileindex, struct got_fileindex_entry *ie)
811 struct got_fileindex_entry *next;
813 next = RB_NEXT(got_fileindex_tree, &fileindex->entries, ie);
815 /* Skip entries which were added or removed by diff callbacks. */
816 while (next && (next->flags & (GOT_FILEIDX_F_NOT_FLUSHED |
817 GOT_FILEIDX_F_REMOVE_ON_FLUSH)))
818 next = RB_NEXT(got_fileindex_tree, &fileindex->entries, next);
820 return next;
823 static const struct got_error *
824 diff_fileindex_tree(struct got_fileindex *, struct got_fileindex_entry **ie,
825 struct got_tree_object *tree, const char *, const char *,
826 struct got_repository *, struct got_fileindex_diff_tree_cb *, void *);
828 static const struct got_error *
829 walk_tree(struct got_tree_entry **next, struct got_fileindex *fileindex,
830 struct got_fileindex_entry **ie, struct got_tree_object *tree, int *tidx,
831 const char *path, const char *entry_name, struct got_repository *repo,
832 struct got_fileindex_diff_tree_cb *cb, void *cb_arg)
834 const struct got_error *err = NULL;
835 struct got_tree_entry *te = got_object_tree_get_entry(tree, *tidx);
837 if (!got_object_tree_entry_is_submodule(te) &&
838 S_ISDIR(got_tree_entry_get_mode(te))) {
839 char *subpath;
840 struct got_tree_object *subtree;
842 if (asprintf(&subpath, "%s%s%s", path,
843 path[0] == '\0' ? "" : "/",
844 got_tree_entry_get_name(te)) == -1)
845 return got_error_from_errno("asprintf");
847 err = got_object_open_as_tree(&subtree, repo,
848 got_tree_entry_get_id(te));
849 if (err) {
850 free(subpath);
851 return err;
854 err = diff_fileindex_tree(fileindex, ie, subtree, subpath,
855 entry_name, repo, cb, cb_arg);
856 free(subpath);
857 got_object_tree_close(subtree);
858 if (err)
859 return err;
862 (*tidx)++;
863 *next = got_object_tree_get_entry(tree, *tidx);
864 return NULL;
867 static const struct got_error *
868 diff_fileindex_tree(struct got_fileindex *fileindex,
869 struct got_fileindex_entry **ie, struct got_tree_object *tree,
870 const char *path, const char *entry_name, struct got_repository *repo,
871 struct got_fileindex_diff_tree_cb *cb, void *cb_arg)
873 const struct got_error *err = NULL;
874 struct got_tree_entry *te = NULL;
875 size_t path_len = strlen(path);
876 struct got_fileindex_entry *next;
877 int tidx = 0;
879 te = got_object_tree_get_entry(tree, tidx);
880 while ((*ie && got_path_is_child((*ie)->path, path, path_len)) || te) {
881 if (te && *ie) {
882 char *te_path;
883 const char *te_name = got_tree_entry_get_name(te);
884 int cmp;
885 if (asprintf(&te_path, "%s/%s", path, te_name) == -1) {
886 err = got_error_from_errno("asprintf");
887 break;
889 cmp = got_path_cmp((*ie)->path, te_path,
890 got_fileindex_entry_path_len(*ie), strlen(te_path));
891 free(te_path);
892 if (cmp == 0) {
893 if (got_path_is_child((*ie)->path, path,
894 path_len) &&
895 !got_object_tree_entry_is_submodule(te) &&
896 (entry_name == NULL ||
897 strcmp(te_name, entry_name) == 0)) {
898 err = cb->diff_old_new(cb_arg, *ie, te,
899 path);
900 if (err || entry_name)
901 break;
903 *ie = walk_fileindex(fileindex, *ie);
904 err = walk_tree(&te, fileindex, ie, tree, &tidx,
905 path, entry_name, repo, cb, cb_arg);
906 } else if (cmp < 0) {
907 next = walk_fileindex(fileindex, *ie);
908 if (got_path_is_child((*ie)->path, path,
909 path_len) && entry_name == NULL) {
910 err = cb->diff_old(cb_arg, *ie, path);
911 if (err || entry_name)
912 break;
914 *ie = next;
915 } else {
916 if ((entry_name == NULL ||
917 strcmp(te_name, entry_name) == 0)) {
918 err = cb->diff_new(cb_arg, te, path);
919 if (err || entry_name)
920 break;
922 err = walk_tree(&te, fileindex, ie, tree, &tidx,
923 path, entry_name, repo, cb, cb_arg);
925 if (err)
926 break;
927 } else if (*ie) {
928 next = walk_fileindex(fileindex, *ie);
929 if (got_path_is_child((*ie)->path, path, path_len) &&
930 (entry_name == NULL ||
931 (te && strcmp(got_tree_entry_get_name(te),
932 entry_name) == 0))) {
933 err = cb->diff_old(cb_arg, *ie, path);
934 if (err || entry_name)
935 break;
937 *ie = next;
938 } else if (te) {
939 if (!got_object_tree_entry_is_submodule(te) &&
940 (entry_name == NULL ||
941 strcmp(got_tree_entry_get_name(te), entry_name)
942 == 0)) {
943 err = cb->diff_new(cb_arg, te, path);
944 if (err || entry_name)
945 break;
947 err = walk_tree(&te, fileindex, ie, tree, &tidx, path,
948 entry_name, repo, cb, cb_arg);
949 if (err)
950 break;
954 return err;
957 const struct got_error *
958 got_fileindex_diff_tree(struct got_fileindex *fileindex,
959 struct got_tree_object *tree, const char *path, const char *entry_name,
960 struct got_repository *repo,
961 struct got_fileindex_diff_tree_cb *cb, void *cb_arg)
963 struct got_fileindex_entry *ie;
964 ie = RB_MIN(got_fileindex_tree, &fileindex->entries);
965 while (ie && !got_path_is_child(ie->path, path, strlen(path)))
966 ie = walk_fileindex(fileindex, ie);
967 return diff_fileindex_tree(fileindex, &ie, tree, path, entry_name, repo,
968 cb, cb_arg);
971 static const struct got_error *
972 diff_fileindex_dir(struct got_fileindex *, struct got_fileindex_entry **,
973 struct got_pathlist_head *, int, const char *, const char *,
974 struct got_repository *, struct got_fileindex_diff_dir_cb *, void *);
976 static struct dirent *
977 copy_dirent(const struct dirent *de)
979 size_t amt = de->d_reclen;
980 struct dirent *copy;
982 copy = malloc(amt);
983 if (copy != NULL) {
984 memcpy(copy, de, amt);
986 return copy;
989 static const struct got_error *
990 read_dirlist(struct got_pathlist_head *dirlist, DIR *dir, const char *path)
992 const struct got_error *err = NULL;
993 struct got_pathlist_entry *new = NULL;
994 struct dirent *de = NULL;
996 for (;;) {
997 errno = 0;
998 if ((de = readdir(dir)) == NULL) {
999 if (errno != 0) {
1000 err = got_error_from_errno("readdir");
1002 break;
1005 if (strcmp(de->d_name, ".") == 0 ||
1006 strcmp(de->d_name, "..") == 0 ||
1007 (path[0] == '\0' &&
1008 strcmp(de->d_name, GOT_WORKTREE_GOT_DIR) == 0) ||
1009 (path[0] == '\0' &&
1010 strcmp(de->d_name, GOT_WORKTREE_CVG_DIR) == 0)) {
1011 continue;
1014 de = copy_dirent(de);
1015 if (de == NULL) {
1016 err = got_error_from_errno("malloc");
1017 break;
1019 err = got_pathlist_insert(&new, dirlist, de->d_name, de);
1020 if (err) {
1021 free(de);
1022 break;
1024 if (new == NULL) {
1025 err = got_error(GOT_ERR_DIR_DUP_ENTRY);
1026 free(de);
1027 break;
1031 return err;
1034 static int
1035 have_tracked_file_in_dir(struct got_fileindex *fileindex, const char *path)
1037 struct got_fileindex_entry *ie;
1038 size_t path_len = strlen(path);
1039 int cmp;
1041 ie = RB_ROOT(&fileindex->entries);
1042 while (ie) {
1043 if (got_path_is_child(ie->path, path, path_len))
1044 return 1;
1045 cmp = got_path_cmp(path, ie->path, path_len,
1046 got_fileindex_entry_path_len(ie));
1047 if (cmp < 0)
1048 ie = RB_LEFT(ie, entry);
1049 else if (cmp > 0)
1050 ie = RB_RIGHT(ie, entry);
1051 else
1052 break;
1055 return 0;
1058 static const struct got_error *
1059 walk_dir(struct got_pathlist_entry **next, struct got_fileindex *fileindex,
1060 struct got_fileindex_entry **ie, struct got_pathlist_entry *dle, int fd,
1061 const char *path, const char *rootpath, struct got_repository *repo,
1062 int ignore, struct got_fileindex_diff_dir_cb *cb, void *cb_arg)
1064 const struct got_error *err = NULL;
1065 struct dirent *de = dle->data;
1066 DIR *subdir = NULL;
1067 int subdirfd = -1;
1069 *next = NULL;
1071 /* Must traverse ignored directories if they contain tracked files. */
1072 if (de->d_type == DT_DIR && ignore &&
1073 have_tracked_file_in_dir(fileindex, path))
1074 ignore = 0;
1076 if (de->d_type == DT_DIR && !ignore) {
1077 char *subpath;
1078 char *subdirpath;
1079 struct got_pathlist_head subdirlist;
1081 TAILQ_INIT(&subdirlist);
1083 if (asprintf(&subpath, "%s%s%s", path,
1084 path[0] == '\0' ? "" : "/", de->d_name) == -1)
1085 return got_error_from_errno("asprintf");
1087 if (asprintf(&subdirpath, "%s/%s", rootpath, subpath) == -1) {
1088 free(subpath);
1089 return got_error_from_errno("asprintf");
1092 subdirfd = openat(fd, de->d_name,
1093 O_RDONLY | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
1094 if (subdirfd == -1) {
1095 if (errno == EACCES) {
1096 *next = TAILQ_NEXT(dle, entry);
1097 return NULL;
1099 err = got_error_from_errno2("openat", subdirpath);
1100 free(subpath);
1101 free(subdirpath);
1102 return err;
1105 subdir = fdopendir(subdirfd);
1106 if (subdir == NULL) {
1107 err = got_error_from_errno2("fdopendir", path);
1108 close(subdirfd);
1109 free(subpath);
1110 free(subdirpath);
1111 return err;
1113 subdirfd = -1;
1114 err = read_dirlist(&subdirlist, subdir, subdirpath);
1115 if (err) {
1116 free(subpath);
1117 free(subdirpath);
1118 closedir(subdir);
1119 return err;
1121 err = diff_fileindex_dir(fileindex, ie, &subdirlist,
1122 dirfd(subdir), rootpath, subpath, repo, cb, cb_arg);
1123 if (subdir && closedir(subdir) == -1 && err == NULL)
1124 err = got_error_from_errno2("closedir", subdirpath);
1125 free(subpath);
1126 free(subdirpath);
1127 got_pathlist_free(&subdirlist, GOT_PATHLIST_FREE_DATA);
1128 if (err)
1129 return err;
1132 *next = TAILQ_NEXT(dle, entry);
1133 return NULL;
1136 static const struct got_error *
1137 dirent_type_fixup(struct dirent *de, const char *rootpath, const char *path)
1139 const struct got_error *err;
1140 char *dir_path;
1141 int type;
1143 if (de->d_type != DT_UNKNOWN)
1144 return NULL;
1146 /* DT_UNKNOWN occurs on NFS mounts without "readdir plus" RPC. */
1147 if (asprintf(&dir_path, "%s/%s", rootpath, path) == -1)
1148 return got_error_from_errno("asprintf");
1149 err = got_path_dirent_type(&type, dir_path, de);
1150 free(dir_path);
1151 if (err)
1152 return err;
1154 de->d_type = type;
1155 return NULL;
1158 static const struct got_error *
1159 diff_fileindex_dir(struct got_fileindex *fileindex,
1160 struct got_fileindex_entry **ie, struct got_pathlist_head *dirlist,
1161 int dirfd, const char *rootpath, const char *path,
1162 struct got_repository *repo,
1163 struct got_fileindex_diff_dir_cb *cb, void *cb_arg)
1165 const struct got_error *err = NULL;
1166 struct dirent *de = NULL;
1167 size_t path_len = strlen(path);
1168 struct got_pathlist_entry *dle;
1169 int ignore;
1171 if (cb->diff_traverse) {
1172 err = cb->diff_traverse(cb_arg, path, dirfd);
1173 if (err)
1174 return err;
1177 dle = TAILQ_FIRST(dirlist);
1178 while ((*ie && got_path_is_child((*ie)->path, path, path_len)) || dle) {
1179 if (dle && *ie) {
1180 char *de_path;
1181 int cmp;
1182 de = dle->data;
1183 err = dirent_type_fixup(de, rootpath, path);
1184 if (err)
1185 break;
1186 if (asprintf(&de_path, "%s/%s", path,
1187 de->d_name) == -1) {
1188 err = got_error_from_errno("asprintf");
1189 break;
1191 cmp = got_path_cmp((*ie)->path, de_path,
1192 got_fileindex_entry_path_len(*ie),
1193 strlen(path) + 1 + strlen(de->d_name));
1194 free(de_path);
1195 if (cmp == 0) {
1196 err = cb->diff_old_new(cb_arg, *ie, de, path,
1197 dirfd);
1198 if (err)
1199 break;
1200 *ie = walk_fileindex(fileindex, *ie);
1201 err = walk_dir(&dle, fileindex, ie, dle, dirfd,
1202 path, rootpath, repo, 0, cb, cb_arg);
1203 } else if (cmp < 0 ) {
1204 err = cb->diff_old(cb_arg, *ie, path);
1205 if (err)
1206 break;
1207 *ie = walk_fileindex(fileindex, *ie);
1208 } else {
1209 err = cb->diff_new(&ignore, cb_arg, de, path,
1210 dirfd);
1211 if (err)
1212 break;
1213 err = walk_dir(&dle, fileindex, ie, dle, dirfd,
1214 path, rootpath, repo, ignore, cb, cb_arg);
1216 if (err)
1217 break;
1218 } else if (*ie) {
1219 err = cb->diff_old(cb_arg, *ie, path);
1220 if (err)
1221 break;
1222 *ie = walk_fileindex(fileindex, *ie);
1223 } else if (dle) {
1224 de = dle->data;
1225 err = dirent_type_fixup(de, rootpath, path);
1226 if (err)
1227 break;
1228 err = cb->diff_new(&ignore, cb_arg, de, path, dirfd);
1229 if (err)
1230 break;
1231 err = walk_dir(&dle, fileindex, ie, dle, dirfd, path,
1232 rootpath, repo, ignore, cb, cb_arg);
1233 if (err)
1234 break;
1238 return err;
1241 const struct got_error *
1242 got_fileindex_diff_dir(struct got_fileindex *fileindex, int fd,
1243 const char *rootpath, const char *path, struct got_repository *repo,
1244 struct got_fileindex_diff_dir_cb *cb, void *cb_arg)
1246 const struct got_error *err;
1247 struct got_fileindex_entry *ie;
1248 struct got_pathlist_head dirlist;
1249 int fd2;
1250 DIR *dir;
1252 TAILQ_INIT(&dirlist);
1255 * Duplicate the file descriptor so we can call closedir() below
1256 * without closing the file descriptor passed in by our caller.
1258 fd2 = dup(fd);
1259 if (fd2 == -1)
1260 return got_error_from_errno2("dup", path);
1261 if (lseek(fd2, 0, SEEK_SET) == -1) {
1262 err = got_error_from_errno2("lseek", path);
1263 close(fd2);
1264 return err;
1266 dir = fdopendir(fd2);
1267 if (dir == NULL) {
1268 err = got_error_from_errno2("fdopendir", path);
1269 close(fd2);
1270 return err;
1272 err = read_dirlist(&dirlist, dir, path);
1273 if (err) {
1274 closedir(dir);
1275 return err;
1278 ie = RB_MIN(got_fileindex_tree, &fileindex->entries);
1279 while (ie && !got_path_is_child(ie->path, path, strlen(path)))
1280 ie = walk_fileindex(fileindex, ie);
1281 err = diff_fileindex_dir(fileindex, &ie, &dirlist, dirfd(dir),
1282 rootpath, path, repo, cb, cb_arg);
1284 if (closedir(dir) == -1 && err == NULL)
1285 err = got_error_from_errno2("closedir", path);
1286 got_pathlist_free(&dirlist, GOT_PATHLIST_FREE_DATA);
1287 return err;
1290 struct got_object_id *
1291 got_fileindex_entry_get_staged_blob_id(struct got_object_id *id,
1292 struct got_fileindex_entry *ie)
1294 return memcpy(id, &ie->staged_blob, sizeof(*id));
1297 struct got_object_id *
1298 got_fileindex_entry_get_blob_id(struct got_object_id *id,
1299 struct got_fileindex_entry *ie)
1301 return memcpy(id, &ie->blob, sizeof(*id));
1304 struct got_object_id *
1305 got_fileindex_entry_get_commit_id(struct got_object_id *id,
1306 struct got_fileindex_entry *ie)
1308 return memcpy(id, &ie->commit, sizeof(*id));
1311 RB_GENERATE(got_fileindex_tree, got_fileindex_entry, entry, got_fileindex_cmp);