full integration of rev-cache into git, completed test suite
[git/spearce.git] / rev-cache.c
blob6becd4b86cc3a660edaea504d0e00cad88e2b979
1 #include "cache.h"
2 #include "object.h"
3 #include "commit.h"
4 #include "tree.h"
5 #include "tree-walk.h"
6 #include "blob.h"
7 #include "tag.h"
8 #include "diff.h"
9 #include "revision.h"
10 #include "rev-cache.h"
11 #include "run-command.h"
12 #include "string-list.h"
15 struct bad_slice {
16 unsigned char sha1[20];
17 struct bad_slice *next;
20 struct cache_slice_pointer {
21 char signature[8]; /* REVCOPTR */
22 char version;
23 char path[PATH_MAX + 1];
26 /* list resembles pack index format */
27 static uint32_t fanout[0xff + 2];
29 static unsigned char *idx_map;
30 static int idx_size;
31 static struct rc_index_header idx_head;
32 static char no_idx, add_to_pending;
33 static struct bad_slice *bad_slices;
34 static unsigned char *idx_caches;
36 static struct strbuf *acc_buffer;
38 #define SLOP 5
40 /* initialization */
42 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst)
44 static struct rc_index_entry entry[4];
45 static int cur;
47 if (!dst)
48 dst = &entry[cur++ & 0x3];
50 dst->sha1 = src->sha1;
51 dst->is_start = !!(src->flags & 0x80);
52 dst->cache_index = src->flags & 0x7f;
53 dst->pos = ntohl(src->pos);
55 return dst;
58 struct rc_index_entry_ondisk *to_disked_rc_index_entry(struct rc_index_entry *src, struct rc_index_entry_ondisk *dst)
60 static struct rc_index_entry_ondisk entry[4];
61 static int cur;
63 if (!dst)
64 dst = &entry[cur++ & 0x3];
66 if (dst->sha1 != src->sha1)
67 hashcpy(dst->sha1, src->sha1);
68 dst->flags = (unsigned char)src->is_start << 7 | (unsigned char)src->cache_index;
69 dst->pos = htonl(src->pos);
71 return dst;
74 struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondisk *src, struct rc_object_entry *dst)
76 static struct rc_object_entry entry[4];
77 static int cur;
79 if (!dst)
80 dst = &entry[cur++ & 0x3];
82 dst->type = src->flags >> 5;
83 dst->is_end = !!(src->flags & 0x10);
84 dst->is_start = !!(src->flags & 0x08);
85 dst->uninteresting = !!(src->flags & 0x04);
86 dst->include = !!(src->flags & 0x02);
87 dst->flag = !!(src->flags & 0x01);
89 dst->sha1 = src->sha1;
90 dst->merge_nr = src->merge_nr;
91 dst->split_nr = src->split_nr;
93 dst->size_size = src->sizes >> 5;
94 dst->padding = src->sizes & 0x1f;
96 dst->date = ntohl(src->date);
97 dst->path = ntohs(src->path);
99 return dst;
102 struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry *src, struct rc_object_entry_ondisk *dst)
104 static struct rc_object_entry_ondisk entry[4];
105 static int cur;
107 if (!dst)
108 dst = &entry[cur++ & 0x3];
110 dst->flags = (unsigned char)src->type << 5;
111 dst->flags |= (unsigned char)src->is_end << 4;
112 dst->flags |= (unsigned char)src->is_start << 3;
113 dst->flags |= (unsigned char)src->uninteresting << 2;
114 dst->flags |= (unsigned char)src->include << 1;
115 dst->flags |= (unsigned char)src->flag;
117 if (dst->sha1 != src->sha1)
118 hashcpy(dst->sha1, src->sha1);
119 dst->merge_nr = src->merge_nr;
120 dst->split_nr = src->split_nr;
122 dst->sizes = (unsigned char)src->size_size << 5;
123 dst->sizes |= (unsigned char)src->padding;
125 dst->date = htonl(src->date);
126 dst->path = htons(src->path);
128 return dst;
131 static void mark_bad_slice(unsigned char *sha1)
133 struct bad_slice *bad;
135 bad = xcalloc(sizeof(struct bad_slice), 1);
136 hashcpy(bad->sha1, sha1);
138 bad->next = bad_slices;
139 bad_slices = bad;
142 static int is_bad_slice(unsigned char *sha1)
144 struct bad_slice *bad = bad_slices;
146 while (bad) {
147 if (!hashcmp(bad->sha1, sha1))
148 return 1;
149 bad = bad->next;
152 return 0;
155 static int get_index_head(unsigned char *map, int len, struct rc_index_header *head, uint32_t *fanout, unsigned char **caches)
157 struct rc_index_header whead;
158 int i, index = sizeof(struct rc_index_header);
160 memcpy(&whead, map, sizeof(struct rc_index_header));
161 if (memcmp(whead.signature, "REVINDEX", 8) || whead.version != SUPPORTED_REVINDEX_VERSION)
162 return -1;
164 memcpy(head->signature, "REVINDEX", 8);
165 head->version = whead.version;
166 head->ofs_objects = ntohl(whead.ofs_objects);
167 head->object_nr = ntohl(whead.object_nr);
168 head->cache_nr = whead.cache_nr;
169 head->max_date = ntohl(whead.max_date);
171 if (len < index + head->cache_nr * 20 + 0x100 * sizeof(uint32_t))
172 return -2;
174 *caches = xmalloc(head->cache_nr * 20);
175 memcpy(*caches, map + index, head->cache_nr * 20);
176 index += head->cache_nr * 20;
178 memcpy(fanout, map + index, 0x100 * sizeof(uint32_t));
179 for (i = 0; i <= 0xff; i++)
180 fanout[i] = ntohl(fanout[i]);
181 fanout[0x100] = len;
183 return 0;
186 /* added in init_index */
187 static void cleanup_cache_slices(void)
189 if (idx_map) {
190 free(idx_caches);
191 munmap(idx_map, idx_size);
192 idx_map = 0;
197 static int init_index(void)
199 int fd;
200 struct stat fi;
202 fd = open(git_path("rev-cache/index"), O_RDONLY);
203 if (fd == -1 || fstat(fd, &fi))
204 goto end;
205 if (fi.st_size < sizeof(struct rc_index_header))
206 goto end;
208 idx_size = fi.st_size;
209 idx_map = xmmap(0, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
210 close(fd);
211 if (idx_map == MAP_FAILED)
212 goto end;
213 if (get_index_head(idx_map, fi.st_size, &idx_head, fanout, &idx_caches))
214 goto end;
216 atexit(cleanup_cache_slices);
218 return 0;
220 end:
221 idx_map = 0;
222 no_idx = 1;
223 return -1;
226 /* this assumes index is already loaded */
227 static struct rc_index_entry_ondisk *search_index_1(unsigned char *sha1)
229 int start, end, starti, endi, i, len, r;
230 struct rc_index_entry_ondisk *ie;
232 if (!idx_map)
233 return 0;
235 /* binary search */
236 start = fanout[(int)sha1[0]];
237 end = fanout[(int)sha1[0] + 1];
238 len = (end - start) / sizeof(struct rc_index_entry_ondisk);
239 if (!len || len * sizeof(struct rc_index_entry_ondisk) != end - start)
240 return 0;
242 starti = 0;
243 endi = len - 1;
244 for (;;) {
245 i = (endi + starti) / 2;
246 ie = (struct rc_index_entry_ondisk *)(idx_map + start + i * sizeof(struct rc_index_entry_ondisk));
247 r = hashcmp(sha1, ie->sha1);
249 if (r) {
250 if (starti + 1 == endi) {
251 starti++;
252 continue;
253 } else if (starti == endi)
254 break;
256 if (r > 0)
257 starti = i;
258 else /* r < 0 */
259 endi = i;
260 } else
261 return ie;
264 return 0;
267 static struct rc_index_entry *search_index(unsigned char *sha1)
269 struct rc_index_entry_ondisk *ied = search_index_1(sha1);
271 if (ied)
272 return from_disked_rc_index_entry(ied, 0);
274 return 0;
277 unsigned char *get_cache_slice(struct commit *commit)
279 struct rc_index_entry *ie;
280 unsigned char *sha1;
282 if (!idx_map) {
283 if (no_idx)
284 return 0;
285 init_index();
288 if (commit->date > idx_head.max_date)
289 return 0;
291 ie = search_index(commit->object.sha1);
292 if (ie && ie->cache_index < idx_head.cache_nr) {
293 sha1 = idx_caches + ie->cache_index * 20;
295 if (is_bad_slice(sha1))
296 return 0;
297 return sha1;
300 return 0;
304 /* traversal */
306 static unsigned long decode_size(unsigned char *str, int len);
308 /* on failure */
309 static void restore_commit(struct commit *commit)
311 commit->object.flags &= ~(ADDED | SEEN | FACE_VALUE);
313 if (!commit->object.parsed) {
314 while (pop_commit(&commit->parents))
317 parse_commit(commit);
322 static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsigned char *ptr, struct rc_object_entry *entry)
324 struct blob *blob;
325 struct tree *tree;
326 struct object *obj;
327 unsigned long size;
329 size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
330 switch (entry->type) {
331 case OBJ_TREE :
332 if (!revs->tree_objects)
333 return;
335 tree = lookup_tree(entry->sha1);
336 if (!tree)
337 return;
339 tree->size = size;
340 commit->tree = tree;
341 obj = (struct object *)tree;
342 break;
344 case OBJ_BLOB :
345 if (!revs->blob_objects)
346 return;
348 blob = lookup_blob(entry->sha1);
349 if (!blob)
350 return;
352 obj = (struct object *)blob;
353 break;
355 default :
356 /* tag objects aren't really supposed to be here */
357 return;
360 obj->flags |= FACE_VALUE;
361 if (add_to_pending)
362 add_pending_object(revs, obj, "");
365 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
366 struct commit_list **unwork, int *ipath_nr, int *upath_nr, char *ioutside)
368 struct rc_index_entry *iep;
369 struct rc_object_entry *oep;
370 struct commit_list *prev, *wp, **wpp;
371 int retval;
373 iep = search_index(commit->object.sha1);
374 oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
375 if (commit->object.flags & UNINTERESTING) {
376 ++*upath_nr;
377 oep->uninteresting = 1;
378 } else
379 ++*ipath_nr;
381 oep->include = 1;
382 to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
383 retval = iep->pos;
385 /* include any others in the work array */
386 prev = 0;
387 wpp = work;
388 wp = *work;
389 while (wp) {
390 struct object *obj = &wp->item->object;
391 struct commit *co;
393 /* is this in our cache slice? */
394 iep = search_index(obj->sha1);
395 if (!iep || hashcmp(idx_caches + iep->cache_index * 20, head->sha1)) {
396 /* there are interesing objects outside the slice */
397 if (!(obj->flags & UNINTERESTING))
398 *ioutside = 1;
400 prev = wp;
401 wp = wp->next;
402 wpp = &wp;
403 continue;
406 if (iep->pos < retval)
407 retval = iep->pos;
409 oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
411 /* mark this for later */
412 oep->include = 1;
413 oep->uninteresting = !!(obj->flags & UNINTERESTING);
414 to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
416 /* count even if not in slice so we can stop enumerating if possible */
417 if (obj->flags & UNINTERESTING)
418 ++*upath_nr;
419 else
420 ++*ipath_nr;
422 /* remove from work list */
423 co = pop_commit(wpp);
424 wp = *wpp;
425 if (prev)
426 prev->next = wp;
428 /* ...and store in temp list so we can restore work on failure */
429 commit_list_insert(co, unwork);
432 return retval;
435 #define IPATH 0x40
436 #define UPATH 0x80
438 #define GET_COUNT(x) ((x) & 0x3f)
439 #define SET_COUNT(x, s) ((x) = ((x) & ~0x3f) | ((s) & 0x3f))
441 static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *map,
442 struct rev_info *revs, struct commit *commit,
443 unsigned long *date_so_far, int *slop_so_far,
444 struct commit_list ***queue, struct commit_list **work)
446 struct commit_list *insert_cache = 0, *myq = 0, **myqp = &myq, *mywork = 0, **myworkp = &mywork, *unwork = 0;
447 struct commit **last_objects, *co;
448 unsigned long date = date_so_far ? *date_so_far : ~0ul;
449 int i, ipath_nr = 0, upath_nr = 0, orig_obj_nr = 0,
450 total_path_nr = head->path_nr, retval = -1, slop = slop_so_far ? *slop_so_far : SLOP;
451 char consume_children = 0, ioutside = 0;
452 unsigned char *paths;
454 /* take note in case we need to regress */
455 orig_obj_nr = revs->pending.nr;
457 i = setup_traversal(head, map, commit, work, &unwork, &ipath_nr, &upath_nr, &ioutside);
458 if (i < 0)
459 return -1;
461 paths = xcalloc(total_path_nr, sizeof(uint16_t));
462 last_objects = xcalloc(total_path_nr, sizeof(struct commit *));
464 /* i already set */
465 while (i < head->size) {
466 struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
467 int path = entry->path;
468 struct object *obj;
469 int index = i;
471 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
473 /* add extra objects if necessary */
474 if (entry->type != OBJ_COMMIT) {
475 if (consume_children)
476 handle_noncommit(revs, co, map + index, entry);
478 continue;
479 } else
480 consume_children = 0;
482 if (path >= total_path_nr)
483 goto end;
485 /* in one of our branches?
486 * uninteresting trumps interesting */
487 if (entry->include)
488 paths[path] |= entry->uninteresting ? UPATH : IPATH;
489 else if (!paths[path])
490 continue;
492 /* date stuff */
493 if (revs->max_age != -1 && entry->date < revs->max_age)
494 paths[path] |= UPATH;
496 /* lookup object */
497 co = lookup_commit(entry->sha1);
498 obj = &co->object;
500 if (obj->flags & UNINTERESTING)
501 paths[path] |= UPATH;
503 if ((paths[path] & IPATH) && (paths[path] & UPATH)) {
504 paths[path] = UPATH;
505 ipath_nr--;
507 /* mark edge */
508 if (last_objects[path]) {
509 parse_commit(last_objects[path]);
511 /* we needn't worry about the unique field; that will be valid as
512 * long as we're not a end entry */
513 last_objects[path]->object.flags &= ~FACE_VALUE;
514 last_objects[path] = 0;
516 obj->flags |= BOUNDARY;
519 /* now we gotta re-assess the whole interesting thing... */
520 entry->uninteresting = !!(paths[path] & UPATH);
522 /* first close paths */
523 if (entry->split_nr) {
524 int j, off = index + sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE(entry->merge_nr);
526 for (j = 0; j < entry->split_nr; j++) {
527 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
529 if (p >= total_path_nr)
530 goto end;
532 /* boundary commit? */
533 if ((paths[p] & IPATH) && entry->uninteresting) {
534 if (last_objects[p]) {
535 parse_commit(last_objects[p]);
537 last_objects[p]->object.flags &= ~FACE_VALUE;
538 last_objects[p] = 0;
540 obj->flags |= BOUNDARY;
541 } else if (last_objects[p] && !last_objects[p]->object.parsed) {
542 commit_list_insert(co, &last_objects[p]->parents);
545 /* can't close a merge path until all are parents have been encountered */
546 if (GET_COUNT(paths[p])) {
547 SET_COUNT(paths[p], GET_COUNT(paths[p]) - 1);
549 if (GET_COUNT(paths[p]))
550 continue;
553 if (paths[p] & IPATH)
554 ipath_nr--;
555 else
556 upath_nr--;
558 paths[p] = 0;
559 last_objects[p] = 0;
563 /* make topo relations */
564 if (last_objects[path] && !last_objects[path]->object.parsed) {
565 commit_list_insert(co, &last_objects[path]->parents);
568 /* we've been here already */
569 if (obj->flags & ADDED) {
570 if (entry->uninteresting && !(obj->flags & UNINTERESTING)) {
571 obj->flags |= UNINTERESTING;
572 mark_parents_uninteresting(co);
573 upath_nr--;
574 } else if (!entry->uninteresting)
575 ipath_nr--;
577 paths[path] = 0;
578 continue;
581 /* initialize commit */
582 if (!entry->is_end) {
583 co->date = entry->date;
584 obj->flags |= ADDED | FACE_VALUE;
585 } else
586 parse_commit(co);
588 obj->flags |= SEEN;
590 if (entry->uninteresting)
591 obj->flags |= UNINTERESTING;
592 else if (co->date < date)
593 date = co->date;
595 /* we need to know what the edges are */
596 last_objects[path] = co;
598 /* add to list */
599 if (slop && !(revs->min_age != -1 && co->date > revs->min_age)) {
601 if (!(obj->flags & UNINTERESTING) || revs->show_all) {
602 if (entry->is_end)
603 myworkp = &commit_list_insert(co, myworkp)->next;
604 else
605 myqp = &commit_list_insert(co, myqp)->next;
607 /* add children to list as well */
608 if (obj->flags & UNINTERESTING)
609 consume_children = 0;
610 else
611 consume_children = 1;
616 /* should we continue? */
617 if (!slop) {
618 if (!upath_nr) {
619 break;
620 } else if (ioutside || revs->show_all) {
621 /* pass it back to rev-list
622 * we purposely ignore everything outside this cache, so we don't needlessly traverse the whole
623 * thing on uninteresting, but that does mean that we may need to bounce back
624 * and forth a few times with rev-list */
625 myworkp = &commit_list_insert(co, myworkp)->next;
627 paths[path] = 0;
628 upath_nr--;
629 } else {
630 break;
632 } else if (!ipath_nr && co->date <= date)
633 slop--;
634 else
635 slop = SLOP;
637 /* open parents */
638 if (entry->merge_nr) {
639 int j, off = index + sizeof(struct rc_object_entry_ondisk);
640 char flag = entry->uninteresting ? UPATH : IPATH;
642 for (j = 0; j < entry->merge_nr; j++) {
643 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
645 if (p >= total_path_nr)
646 goto end;
648 if (paths[p] & flag)
649 continue;
651 if (flag == IPATH)
652 ipath_nr++;
653 else
654 upath_nr++;
656 paths[p] |= flag;
659 /* make sure we don't use this path before all our parents have had their say */
660 SET_COUNT(paths[path], entry->merge_nr);
665 if (date_so_far)
666 *date_so_far = date;
667 if (slop_so_far)
668 *slop_so_far = slop;
669 retval = 0;
671 /* success: attach to given lists */
672 if (myqp != &myq) {
673 **queue = myq;
674 *queue = myqp;
677 while ((co = pop_commit(&mywork)) != 0) {
678 insert_by_date_cached(co, work, insert_cache, &insert_cache);
681 /* free backup */
682 while (pop_commit(&unwork))
685 end:
686 free(paths);
687 free(last_objects);
689 /* failure: restore work to previous condition
690 * (cache corruption should *not* be fatal) */
691 if (retval) {
692 while ((co = pop_commit(&unwork)) != 0) {
693 restore_commit(co);
694 co->object.flags |= SEEN;
695 insert_by_date(co, work);
698 /* free lists */
699 while ((co = pop_commit(&myq)) != 0)
700 restore_commit(co);
702 while ((co = pop_commit(&mywork)) != 0)
703 restore_commit(co);
705 /* truncate object array */
706 for (i = orig_obj_nr; i < revs->pending.nr; i++) {
707 struct object *obj = revs->pending.objects[i].item;
709 obj->flags &= ~FACE_VALUE;
711 revs->pending.nr = orig_obj_nr;
714 return retval;
717 static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
719 int t;
721 memcpy(head, map, sizeof(struct rc_slice_header));
722 head->ofs_objects = ntohl(head->ofs_objects);
723 head->object_nr = ntohl(head->object_nr);
724 head->size = ntohl(head->size);
725 head->path_nr = ntohs(head->path_nr);
727 if (memcmp(head->signature, "REVCACHE", 8))
728 return -1;
729 if (head->version != SUPPORTED_REVCACHE_VERSION)
730 return -2;
731 if (hashcmp(head->sha1, cache_sha1))
732 return -3;
733 t = sizeof(struct rc_slice_header);
734 if (t != head->ofs_objects || t >= len)
735 return -4;
737 head->size = len;
739 return 0;
742 int open_cache_slice(unsigned char *sha1, int flags)
744 int fd;
745 char signature[8];
747 fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), flags);
748 if (fd <= 0)
749 goto end;
751 if (read(fd, signature, 8) != 8)
752 goto end;
754 /* a normal revision slice */
755 if (!memcmp(signature, "REVCACHE", 8)) {
756 lseek(fd, 0, SEEK_SET);
757 return fd;
760 /* slice pointer */
761 if (!memcmp(signature, "REVCOPTR", 8)) {
762 struct cache_slice_pointer ptr;
764 lseek(fd, 0, SEEK_SET);
765 if (read_in_full(fd, &ptr, sizeof(ptr)) != sizeof(ptr))
766 goto end;
768 if (ptr.version != SUPPORTED_REVCOPTR_VERSION)
769 goto end;
771 close(fd);
772 fd = open(ptr.path, flags);
774 return fd;
777 end:
778 if (fd > 0)
779 close(fd);
781 return -1;
784 int traverse_cache_slice(struct rev_info *revs,
785 unsigned char *cache_sha1, struct commit *commit,
786 unsigned long *date_so_far, int *slop_so_far,
787 struct commit_list ***queue, struct commit_list **work)
789 int fd = -1, retval = -3;
790 struct stat fi;
791 struct rc_slice_header head;
792 struct rev_cache_info *rci;
793 unsigned char *map = MAP_FAILED;
795 /* the index should've been loaded already to find cache_sha1, but it's good
796 * to be absolutely sure... */
797 if (!idx_map)
798 init_index();
799 if (!idx_map)
800 return -1;
802 /* load options */
803 rci = &revs->rev_cache_info;
804 add_to_pending = rci->add_to_pending;
806 memset(&head, 0, sizeof(struct rc_slice_header));
808 fd = open_cache_slice(cache_sha1, O_RDONLY);
809 if (fd == -1)
810 goto end;
811 if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
812 goto end;
814 map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
815 if (map == MAP_FAILED)
816 goto end;
817 if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
818 goto end;
820 retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
822 end:
823 if (map != MAP_FAILED)
824 munmap(map, fi.st_size);
825 if (fd != -1)
826 close(fd);
828 /* remember this! */
829 if (retval)
830 mark_bad_slice(cache_sha1);
832 return retval;
837 /* generation */
839 static int is_endpoint(struct commit *commit)
841 struct commit_list *list = commit->parents;
843 while (list) {
844 if (!(list->item->object.flags & UNINTERESTING))
845 return 0;
847 list = list->next;
850 return 1;
853 /* ensures branch is self-contained: parents are either all interesting or all uninteresting */
854 static void make_legs(struct rev_info *revs)
856 struct commit_list *list, **plist;
857 int total = 0;
859 /* attach plist to end of commits list */
860 list = revs->commits;
861 while (list && list->next)
862 list = list->next;
864 if (list)
865 plist = &list->next;
866 else
867 return;
869 /* duplicates don't matter, as get_revision() ignores them */
870 for (list = revs->commits; list; list = list->next) {
871 struct commit *item = list->item;
872 struct commit_list *parents = item->parents;
874 if (item->object.flags & UNINTERESTING)
875 continue;
876 if (is_endpoint(item))
877 continue;
879 while (parents) {
880 struct commit *p = parents->item;
881 parents = parents->next;
883 if (!(p->object.flags & UNINTERESTING))
884 continue;
886 p->object.flags &= ~UNINTERESTING;
887 parse_commit(p);
888 plist = &commit_list_insert(p, plist)->next;
890 if (!(p->object.flags & SEEN))
891 total++;
895 if (total)
896 sort_in_topological_order(&revs->commits, 1);
901 struct path_track {
902 struct commit *commit;
903 int path; /* for keeping track of children */
905 struct path_track *next, *prev;
908 static unsigned char *paths;
909 static int path_nr = 1, path_sz;
911 static struct path_track *path_track;
912 static struct path_track *path_track_alloc;
914 #define PATH_IN_USE 0x80 /* biggest bit we can get as a char */
916 static int get_new_path(void)
918 int i;
920 for (i = 1; i < path_nr; i++)
921 if (!paths[i])
922 break;
924 if (i == path_nr) {
925 if (path_nr >= path_sz) {
926 path_sz += 50;
927 paths = xrealloc(paths, path_sz);
928 memset(paths + path_sz - 50, 0, 50);
930 path_nr++;
933 paths[i] = PATH_IN_USE;
934 return i;
937 static void remove_path_track(struct path_track **ppt, char total_free)
939 struct path_track *t = *ppt;
941 if (t->next)
942 t->next->prev = t->prev;
943 if (t->prev)
944 t->prev->next = t->next;
946 t = t->next;
948 if (total_free)
949 free(*ppt);
950 else {
951 (*ppt)->next = path_track_alloc;
952 path_track_alloc = *ppt;
955 *ppt = t;
958 static struct path_track *make_path_track(struct path_track **head, struct commit *commit)
960 struct path_track *pt;
962 if (path_track_alloc) {
963 pt = path_track_alloc;
964 path_track_alloc = pt->next;
965 } else
966 pt = xmalloc(sizeof(struct path_track));
968 memset(pt, 0, sizeof(struct path_track));
969 pt->commit = commit;
971 pt->next = *head;
972 if (*head)
973 (*head)->prev = pt;
974 *head = pt;
976 return pt;
979 static void add_path_to_track(struct commit *commit, int path)
981 make_path_track(&path_track, commit);
982 path_track->path = path;
985 static void handle_paths(struct commit *commit, struct rc_object_entry *object, struct strbuf *merge_str, struct strbuf *split_str)
987 int child_nr, parent_nr, open_parent_nr, this_path;
988 struct commit_list *list;
989 struct commit *first_parent;
990 struct path_track **ppt, *pt;
992 /* we can only re-use a closed path once all it's children have been encountered,
993 * as we need to keep track of commit boundaries */
994 ppt = &path_track;
995 pt = *ppt;
996 child_nr = 0;
997 while (pt) {
998 if (pt->commit == commit) {
999 uint16_t write_path;
1001 if (paths[pt->path] != PATH_IN_USE)
1002 paths[pt->path]--;
1004 /* make sure we can handle this */
1005 child_nr++;
1006 if (child_nr > 0x7f)
1007 die("%s: too many branches! rev-cache can only handle %d parents/children per commit",
1008 sha1_to_hex(object->sha1), 0x7f);
1010 /* add to split list */
1011 object->split_nr++;
1012 write_path = htons((uint16_t)pt->path);
1013 strbuf_add(split_str, &write_path, sizeof(uint16_t));
1015 remove_path_track(ppt, 0);
1016 pt = *ppt;
1017 } else {
1018 pt = pt->next;
1019 ppt = &pt;
1023 /* initialize our self! */
1024 if (!commit->indegree) {
1025 commit->indegree = get_new_path();
1026 object->is_start = 1;
1029 this_path = commit->indegree;
1030 paths[this_path] = PATH_IN_USE;
1031 object->path = this_path;
1033 /* count interesting parents */
1034 parent_nr = open_parent_nr = 0;
1035 first_parent = 0;
1036 for (list = commit->parents; list; list = list->next) {
1037 if (list->item->object.flags & UNINTERESTING) {
1038 object->is_end = 1;
1039 continue;
1042 parent_nr++;
1043 if (!list->item->indegree)
1044 open_parent_nr++;
1045 if (!first_parent)
1046 first_parent = list->item;
1049 if (!parent_nr)
1050 return;
1052 if (parent_nr == 1 && open_parent_nr == 1) {
1053 first_parent->indegree = this_path;
1054 return;
1057 /* bail out on obscene parent/child #s */
1058 if (parent_nr > 0x7f)
1059 die("%s: too many parents in merge! rev-cache can only handle %d parents/children per commit",
1060 sha1_to_hex(object->sha1), 0x7f);
1062 /* make merge list */
1063 object->merge_nr = parent_nr;
1064 paths[this_path] = parent_nr;
1066 for (list = commit->parents; list; list = list->next) {
1067 struct commit *p = list->item;
1068 uint16_t write_path;
1070 if (p->object.flags & UNINTERESTING)
1071 continue;
1073 /* unfortunately due to boundary tracking we can't re-use merge paths
1074 * (unable to guarantee last parent path = this -> last won't always be able to
1075 * set this as a boundary object */
1076 if (!p->indegree)
1077 p->indegree = get_new_path();
1079 write_path = htons((uint16_t)p->indegree);
1080 strbuf_add(merge_str, &write_path, sizeof(uint16_t));
1082 /* make sure path is properly ended */
1083 add_path_to_track(p, this_path);
1089 static int encode_size(unsigned long size, unsigned char *out)
1091 int len = 0;
1093 while (size) {
1094 *out++ = (unsigned char)(size & 0xff);
1095 size >>= 8;
1096 len++;
1099 return len;
1102 static unsigned long decode_size(unsigned char *str, int len)
1104 unsigned long size = 0;
1105 int shift = 0;
1107 while (len--) {
1108 size |= (unsigned long)*str << shift;
1109 shift += 8;
1110 str++;
1113 return size;
1116 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
1117 struct strbuf *merge_str, struct strbuf *split_str)
1119 struct rc_object_entry entry;
1120 unsigned char size_str[7];
1121 unsigned long size;
1122 enum object_type type;
1123 void *data;
1125 if (entryp)
1126 sha1 = entryp->sha1;
1128 /* retrieve size data */
1129 data = read_sha1_file(sha1, &type, &size);
1131 if (data)
1132 free(data);
1134 /* initialize! */
1135 if (!entryp) {
1136 memset(&entry, 0, sizeof(entry));
1137 entry.sha1 = (unsigned char *)sha1;
1138 entry.type = type;
1140 if (merge_str)
1141 entry.merge_nr = merge_str->len / sizeof(uint16_t);
1142 if (split_str)
1143 entry.split_nr = split_str->len / sizeof(uint16_t);
1145 entryp = &entry;
1148 entryp->size_size = encode_size(size, size_str);
1150 /* write the muvabitch */
1151 strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
1153 if (merge_str)
1154 strbuf_add(acc_buffer, merge_str->buf, merge_str->len);
1155 if (split_str)
1156 strbuf_add(acc_buffer, split_str->buf, split_str->len);
1158 strbuf_add(acc_buffer, size_str, entryp->size_size);
1161 /* returns non-zero to continue parsing, 0 to skip */
1162 typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
1164 /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
1165 static int dump_tree(struct tree *tree, dump_tree_fn fn)
1167 struct tree_desc desc;
1168 struct name_entry entry;
1169 struct tree *subtree;
1170 int r;
1172 if (parse_tree(tree))
1173 return -1;
1175 init_tree_desc(&desc, tree->buffer, tree->size);
1176 while (tree_entry(&desc, &entry)) {
1177 switch (fn(entry.sha1, entry.path, entry.mode)) {
1178 case 0 :
1179 goto continue_loop;
1180 default :
1181 break;
1184 if (S_ISDIR(entry.mode)) {
1185 subtree = lookup_tree(entry.sha1);
1186 if (!subtree)
1187 return -2;
1189 if ((r = dump_tree(subtree, fn)) < 0)
1190 return r;
1193 continue_loop:
1194 continue;
1197 return 0;
1200 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
1202 strbuf_add(acc_buffer, sha1, 20);
1204 return 1;
1207 static void tree_addremove(struct diff_options *options,
1208 int whatnow, unsigned mode,
1209 const unsigned char *sha1,
1210 const char *concatpath)
1212 if (whatnow != '+')
1213 return;
1215 strbuf_add(acc_buffer, sha1, 20);
1218 static void tree_change(struct diff_options *options,
1219 unsigned old_mode, unsigned new_mode,
1220 const unsigned char *old_sha1,
1221 const unsigned char *new_sha1,
1222 const char *concatpath)
1224 if (!hashcmp(old_sha1, new_sha1))
1225 return;
1227 strbuf_add(acc_buffer, new_sha1, 20);
1230 static int add_unique_objects(struct commit *commit)
1232 struct commit_list *list;
1233 struct strbuf os, ost, *orig_buf;
1234 struct diff_options opts;
1235 int i, j, next;
1236 char is_first = 1;
1238 /* ...no, calculate unique objects */
1239 strbuf_init(&os, 0);
1240 strbuf_init(&ost, 0);
1241 orig_buf = acc_buffer;
1243 diff_setup(&opts);
1244 DIFF_OPT_SET(&opts, RECURSIVE);
1245 DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
1246 opts.change = tree_change;
1247 opts.add_remove = tree_addremove;
1249 /* this is only called for non-ends (ie. all parents interesting) */
1250 for (list = commit->parents; list; list = list->next) {
1251 if (is_first)
1252 acc_buffer = &os;
1253 else
1254 acc_buffer = &ost;
1256 strbuf_setlen(acc_buffer, 0);
1257 diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
1258 qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
1260 /* take intersection */
1261 if (!is_first) {
1262 for (next = i = j = 0; i < os.len; i += 20) {
1263 while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
1264 j += 20;
1266 if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
1267 continue;
1269 if (next != i)
1270 memcpy(os.buf + next, os.buf + i, 20);
1271 next += 20;
1274 if (next != i)
1275 strbuf_setlen(&os, next);
1276 } else
1277 is_first = 0;
1280 /* no parents (!) */
1281 if (is_first) {
1282 acc_buffer = &os;
1283 dump_tree(commit->tree, dump_tree_callback);
1286 /* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
1287 acc_buffer = orig_buf;
1288 for (i = 0; i < os.len; i += 20)
1289 add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
1291 /* last but not least, the main tree */
1292 add_object_entry(commit->tree->object.sha1, 0, 0, 0);
1294 return i / 20 + 1;
1297 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
1299 unsigned char *map = mapping->map;
1300 int i = *index, object_nr = 0;
1301 struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1303 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1304 while (i < mapping->size) {
1305 int pos = i;
1307 entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1308 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1310 if (entry->type == OBJ_COMMIT) {
1311 *index = pos;
1312 return object_nr;
1315 strbuf_add(acc_buffer, map + pos, i - pos);
1316 object_nr++;
1319 *index = 0;
1320 return object_nr;
1323 static int add_objects_verbatim(struct rev_cache_info *rci, struct commit *commit)
1325 struct rev_cache_slice_map *map;
1326 char found = 0;
1327 struct rc_index_entry *ie;
1328 struct rc_object_entry *entry;
1329 int object_nr, i;
1331 if (!rci->maps)
1332 return -1;
1334 /* check if we can continue where we left off */
1335 map = rci->last_map;
1336 if (!map)
1337 goto search_me;
1339 i = map->last_index;
1340 entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1341 if (hashcmp(entry->sha1, commit->object.sha1))
1342 goto search_me;
1344 found = 1;
1346 search_me:
1347 if (!found) {
1348 ie = search_index(commit->object.sha1);
1349 if (!ie || ie->cache_index >= idx_head.cache_nr)
1350 return -2;
1352 map = rci->maps + ie->cache_index;
1353 if (!map->size)
1354 return -3;
1356 i = ie->pos;
1357 entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1358 if (entry->type != OBJ_COMMIT || hashcmp(entry->sha1, commit->object.sha1))
1359 return -4;
1362 /* can't handle end commits */
1363 if (entry->is_end)
1364 return -5;
1366 object_nr = add_objects_verbatim_1(map, &i);
1368 /* remember this */
1369 if (i) {
1370 rci->last_map = map;
1371 map->last_index = i;
1372 } else
1373 rci->last_map = 0;
1375 return object_nr;
1378 static void init_revcache_directory(void)
1380 struct stat fi;
1382 if (stat(git_path("rev-cache"), &fi) || !S_ISDIR(fi.st_mode))
1383 if (mkdir(git_path("rev-cache"), 0777))
1384 die("can't make rev-cache directory");
1388 void init_rev_cache_info(struct rev_cache_info *rci)
1390 memset(rci, 0, sizeof(struct rev_cache_info));
1392 rci->objects = 1;
1393 rci->legs = 0;
1394 rci->make_index = 1;
1395 rci->fuse_me = 0;
1397 rci->overwrite_all = 0;
1399 rci->add_to_pending = 1;
1401 rci->ignore_size = 0;
1404 void maybe_fill_with_defaults(struct rev_cache_info *rci)
1406 static struct rev_cache_info def_rci;
1408 if (rci)
1409 return;
1411 init_rev_cache_info(&def_rci);
1412 rci = &def_rci;
1415 int make_cache_slice(struct rev_cache_info *rci,
1416 struct rev_info *revs, struct commit_list **starts, struct commit_list **ends,
1417 unsigned char *cache_sha1)
1419 struct rev_info therevs;
1420 struct strbuf buffer, startlist, endlist;
1421 struct rc_slice_header head;
1422 struct commit *commit;
1423 unsigned char sha1[20];
1424 struct strbuf merge_paths, split_paths;
1425 int object_nr, total_sz, fd;
1426 char file[PATH_MAX], *newfile;
1427 struct rev_cache_info *trci;
1428 git_SHA_CTX ctx;
1430 maybe_fill_with_defaults(rci);
1432 init_revcache_directory();
1433 strcpy(file, git_path("rev-cache/XXXXXX"));
1434 fd = xmkstemp(file);
1436 strbuf_init(&buffer, 0);
1437 strbuf_init(&startlist, 0);
1438 strbuf_init(&endlist, 0);
1439 strbuf_init(&merge_paths, 0);
1440 strbuf_init(&split_paths, 0);
1441 acc_buffer = &buffer;
1443 if (!revs) {
1444 revs = &therevs;
1445 init_revisions(revs, 0);
1447 /* we're gonna assume no one else has already traversed this... */
1448 while ((commit = pop_commit(starts)))
1449 add_pending_object(revs, &commit->object, 0);
1451 while ((commit = pop_commit(ends))) {
1452 commit->object.flags |= UNINTERESTING;
1453 add_pending_object(revs, &commit->object, 0);
1457 /* write head placeholder */
1458 memset(&head, 0, sizeof(head));
1459 head.ofs_objects = htonl(sizeof(head));
1460 xwrite(fd, &head, sizeof(head));
1462 /* init revisions! */
1463 revs->tree_objects = 1;
1464 revs->blob_objects = 1;
1465 revs->topo_order = 1;
1466 revs->lifo = 1;
1468 /* re-use info from other caches if possible */
1469 trci = &revs->rev_cache_info;
1470 init_rev_cache_info(trci);
1471 trci->add_to_pending = 0;
1473 setup_revisions(0, 0, revs, 0);
1474 if (prepare_revision_walk(revs))
1475 die("died preparing revision walk");
1477 if (rci->legs)
1478 make_legs(revs);
1480 object_nr = total_sz = 0;
1481 while ((commit = get_revision(revs)) != 0) {
1482 struct rc_object_entry object;
1483 int t;
1485 strbuf_setlen(&merge_paths, 0);
1486 strbuf_setlen(&split_paths, 0);
1488 memset(&object, 0, sizeof(object));
1489 object.type = OBJ_COMMIT;
1490 object.date = commit->date;
1491 object.sha1 = commit->object.sha1;
1493 handle_paths(commit, &object, &merge_paths, &split_paths);
1495 if (object.is_end) {
1496 strbuf_add(&endlist, object.sha1, 20);
1497 if (ends)
1498 commit_list_insert(commit, ends);
1500 /* the two *aren't* mutually exclusive */
1501 if (object.is_start) {
1502 strbuf_add(&startlist, object.sha1, 20);
1503 if (starts)
1504 commit_list_insert(commit, starts);
1507 commit->indegree = 0;
1509 add_object_entry(0, &object, &merge_paths, &split_paths);
1510 object_nr++;
1512 if (rci->objects && !object.is_end) {
1513 if (rci->fuse_me && (t = add_objects_verbatim(rci, commit)) >= 0)
1514 /* yay! we did it! */
1515 object_nr += t;
1516 else
1517 /* add all unique children for this commit */
1518 object_nr += add_unique_objects(commit);
1521 /* print every ~1MB or so */
1522 if (buffer.len > 1000000) {
1523 write_in_full(fd, buffer.buf, buffer.len);
1524 total_sz += buffer.len;
1526 strbuf_setlen(&buffer, 0);
1530 if (buffer.len) {
1531 write_in_full(fd, buffer.buf, buffer.len);
1532 total_sz += buffer.len;
1535 /* go ahead a free some stuff... */
1536 strbuf_release(&buffer);
1537 strbuf_release(&merge_paths);
1538 strbuf_release(&split_paths);
1539 if (path_sz)
1540 free(paths);
1541 while (path_track_alloc)
1542 remove_path_track(&path_track_alloc, 1);
1544 /* the meaning of the hash name is more or less irrelevant, it's the uniqueness that matters */
1545 strbuf_add(&endlist, startlist.buf, startlist.len);
1546 git_SHA1_Init(&ctx);
1547 git_SHA1_Update(&ctx, endlist.buf, endlist.len);
1548 git_SHA1_Final(sha1, &ctx);
1550 /* now actually initialize header */
1551 strcpy(head.signature, "REVCACHE");
1552 head.version = SUPPORTED_REVCACHE_VERSION;
1554 head.object_nr = htonl(object_nr);
1555 head.size = htonl(ntohl(head.ofs_objects) + total_sz);
1556 head.path_nr = htons(path_nr);
1557 hashcpy(head.sha1, sha1);
1559 /* some info! */
1560 fprintf(stderr, "objects: %d\n", object_nr);
1561 fprintf(stderr, "paths: %d\n", path_nr);
1563 lseek(fd, 0, SEEK_SET);
1564 xwrite(fd, &head, sizeof(head));
1566 if (rci->make_index && make_cache_index(rci, sha1, fd, ntohl(head.size)) < 0)
1567 die("can't update index");
1569 close(fd);
1571 newfile = git_path("rev-cache/%s", sha1_to_hex(sha1));
1572 if (rename(file, newfile))
1573 die("can't move temp file");
1575 /* let our caller know what we've just made */
1576 if (cache_sha1)
1577 hashcpy(cache_sha1, sha1);
1579 strbuf_release(&endlist);
1580 strbuf_release(&startlist);
1582 return 0;
1586 static int index_sort_hash(const void *a, const void *b)
1588 return hashcmp(((struct rc_index_entry_ondisk *)a)->sha1, ((struct rc_index_entry_ondisk *)b)->sha1);
1591 static int write_cache_index(struct strbuf *body)
1593 struct rc_index_header whead;
1594 struct lock_file *lk;
1595 int fd, i;
1597 /* clear index map if loaded */
1598 if (idx_map) {
1599 munmap(idx_map, idx_size);
1600 idx_map = 0;
1603 lk = xcalloc(sizeof(struct lock_file), 1);
1604 fd = hold_lock_file_for_update(lk, git_path("rev-cache/index"), 0);
1605 if (fd < 0) {
1606 free(lk);
1607 return -1;
1610 /* endianness yay! */
1611 memset(&whead, 0, sizeof(whead));
1612 memcpy(whead.signature, "REVINDEX", 8);
1613 whead.version = idx_head.version;
1614 whead.ofs_objects = htonl(idx_head.ofs_objects);
1615 whead.object_nr = htonl(idx_head.object_nr);
1616 whead.cache_nr = idx_head.cache_nr;
1617 whead.max_date = htonl(idx_head.max_date);
1619 write(fd, &whead, sizeof(struct rc_index_header));
1620 write_in_full(fd, idx_caches, idx_head.cache_nr * 20);
1622 for (i = 0; i <= 0xff; i++)
1623 fanout[i] = htonl(fanout[i]);
1624 write_in_full(fd, fanout, 0x100 * sizeof(uint32_t));
1626 write_in_full(fd, body->buf, body->len);
1628 if (commit_lock_file(lk) < 0)
1629 return -2;
1631 /* lk freed by lockfile.c */
1633 return 0;
1636 int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
1637 int fd, unsigned int size)
1639 struct strbuf buffer;
1640 int i, cache_index, cur;
1641 unsigned char *map;
1642 unsigned long max_date;
1644 maybe_fill_with_defaults(rci);
1646 if (!idx_map)
1647 init_index();
1649 lseek(fd, 0, SEEK_SET);
1650 map = xmmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1651 if (map == MAP_FAILED)
1652 return -1;
1654 strbuf_init(&buffer, 0);
1655 if (idx_map) {
1656 strbuf_add(&buffer, idx_map + fanout[0], fanout[0x100] - fanout[0]);
1657 } else {
1658 /* not an update */
1659 memset(&idx_head, 0, sizeof(struct rc_index_header));
1660 idx_caches = 0;
1662 strcpy(idx_head.signature, "REVINDEX");
1663 idx_head.version = SUPPORTED_REVINDEX_VERSION;
1664 idx_head.ofs_objects = sizeof(struct rc_index_header) + 0x100 * sizeof(uint32_t);
1667 /* are we remaking a slice? */
1668 for (i = 0; i < idx_head.cache_nr; i++)
1669 if (!hashcmp(idx_caches + i * 20, cache_sha1))
1670 break;
1672 if (i == idx_head.cache_nr) {
1673 cache_index = idx_head.cache_nr++;
1674 idx_head.ofs_objects += 20;
1676 idx_caches = xrealloc(idx_caches, idx_head.cache_nr * 20);
1677 hashcpy(idx_caches + cache_index * 20, cache_sha1);
1678 } else
1679 cache_index = i;
1681 i = sizeof(struct rc_slice_header); /* offset */
1682 max_date = idx_head.max_date;
1683 while (i < size) {
1684 struct rc_index_entry index_entry, *entry;
1685 struct rc_index_entry_ondisk *disked_entry;
1686 struct rc_object_entry *object_entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1687 unsigned long date;
1688 int off, pos = i;
1690 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(object_entry);
1692 if (object_entry->type != OBJ_COMMIT)
1693 continue;
1695 /* don't include ends; otherwise we'll find ourselves in loops */
1696 if (object_entry->is_end)
1697 continue;
1699 /* handle index duplication
1700 * -> keep old copy unless new one is a start -- based on expected usage, older ones will be more
1701 * likely to lead to greater slice traversals than new ones */
1702 date = object_entry->date;
1703 if (date > idx_head.max_date) {
1704 disked_entry = 0;
1705 if (date > max_date)
1706 max_date = date;
1707 } else
1708 disked_entry = search_index_1(object_entry->sha1);
1710 if (disked_entry && !object_entry->is_start && !rci->overwrite_all)
1711 continue;
1712 else if (disked_entry) {
1713 /* mmm, pointer arithmetic... tasty */ /* (entry - idx_map = offset, so cast is valid) */
1714 off = (unsigned int)((unsigned char *)disked_entry - idx_map) - fanout[0];
1715 disked_entry = (struct rc_index_entry_ondisk *)(buffer.buf + off);
1716 entry = from_disked_rc_index_entry(disked_entry, 0);
1717 } else
1718 entry = &index_entry;
1720 memset(entry, 0, sizeof(index_entry));
1721 entry->sha1 = object_entry->sha1;
1722 entry->is_start = object_entry->is_start;
1723 entry->cache_index = cache_index;
1724 entry->pos = pos;
1726 if (entry == &index_entry) {
1727 strbuf_add(&buffer, to_disked_rc_index_entry(entry, 0), sizeof(struct rc_index_entry_ondisk));
1728 idx_head.object_nr++;
1729 } else
1730 to_disked_rc_index_entry(entry, disked_entry);
1734 idx_head.max_date = max_date;
1735 qsort(buffer.buf, buffer.len / sizeof(struct rc_index_entry_ondisk), sizeof(struct rc_index_entry_ondisk), index_sort_hash);
1737 /* generate fanout */
1738 cur = 0x00;
1739 for (i = 0; i < buffer.len; i += sizeof(struct rc_index_entry_ondisk)) {
1740 struct rc_index_entry_ondisk *entry = (struct rc_index_entry_ondisk *)(buffer.buf + i);
1742 while (cur <= entry->sha1[0])
1743 fanout[cur++] = i + idx_head.ofs_objects;
1746 while (cur <= 0xff)
1747 fanout[cur++] = idx_head.ofs_objects + buffer.len;
1749 /* BOOM! */
1750 if (write_cache_index(&buffer))
1751 return -1;
1753 munmap(map, size);
1754 strbuf_release(&buffer);
1756 /* idx_map is unloaded without cleanup_cache_slices(), so regardless of previous index existence
1757 * we can still free this up */
1758 free(idx_caches);
1760 return 0;
1764 void starts_from_slices(struct rev_info *revs, unsigned int flags, unsigned char *which, int n)
1766 struct commit *commit;
1767 int i;
1769 if (!idx_map)
1770 init_index();
1771 if (!idx_map)
1772 return;
1774 for (i = idx_head.ofs_objects; i < idx_size; i += sizeof(struct rc_index_entry_ondisk)) {
1775 struct rc_index_entry *entry = RC_OBTAIN_INDEX_ENTRY(idx_map + i);
1777 if (!entry->is_start)
1778 continue;
1780 /* only include entries in 'which' slices */
1781 if (n) {
1782 int j;
1784 for (j = 0; j < n; j++)
1785 if (!hashcmp(idx_caches + entry->cache_index * 20, which + j * 20))
1786 break;
1788 if (j == n)
1789 continue;
1792 commit = lookup_commit(entry->sha1);
1793 if (!commit)
1794 continue;
1796 commit->object.flags |= flags;
1797 add_pending_object(revs, &commit->object, 0);
1803 struct slice_fd_time {
1804 unsigned char sha1[20];
1805 int fd;
1806 struct stat fi;
1809 int slice_time_sort(const void *a, const void *b)
1811 unsigned long at, bt;
1813 at = ((struct slice_fd_time *)a)->fi.st_ctime;
1814 bt = ((struct slice_fd_time *)b)->fi.st_ctime;
1816 if (at == bt)
1817 return 0;
1819 return at > bt ? 1 : -1;
1822 int regenerate_cache_index(struct rev_cache_info *rci)
1824 DIR *dirh;
1825 int i;
1826 struct slice_fd_time info;
1827 struct strbuf slices;
1829 /* first remove old index if it exists */
1830 unlink_or_warn(git_path("rev-cache/index"));
1832 strbuf_init(&slices, 0);
1834 dirh = opendir(git_path("rev-cache"));
1835 if (dirh) {
1836 struct dirent *de;
1837 struct stat fi;
1838 int fd;
1839 unsigned char sha1[20];
1841 while ((de = readdir(dirh))) {
1842 if (de->d_name[0] == '.')
1843 continue;
1845 if (get_sha1_hex(de->d_name, sha1))
1846 continue;
1848 /* open with RDWR because of mmap call in make_cache_index() */
1849 fd = open_cache_slice(sha1, O_RDONLY);
1850 if (fd < 0 || fstat(fd, &fi)) {
1851 warning("bad cache found [%s]; fuse recommended", de->d_name);
1852 if (fd > 0)
1853 close(fd);
1854 continue;
1857 hashcpy(info.sha1, sha1);
1858 info.fd = fd;
1859 memcpy(&info.fi, &fi, sizeof(struct stat));
1861 strbuf_add(&slices, &info, sizeof(info));
1864 closedir(dirh);
1867 /* we want oldest first -> upon overlap, older slices are more likely to have a larger section,
1868 * as of the overlapped commit */
1869 qsort(slices.buf, slices.len / sizeof(info), sizeof(info), slice_time_sort);
1871 for (i = 0; i < slices.len; i += sizeof(info)) {
1872 struct slice_fd_time *infop = (struct slice_fd_time *)(slices.buf + i);
1873 struct stat *fip = &infop->fi;
1874 int fd = infop->fd;
1876 if (make_cache_index(rci, infop->sha1, fd, fip->st_size) < 0)
1877 die("error writing cache");
1879 close(fd);
1882 strbuf_release(&slices);
1884 return 0;
1887 static int add_slices_for_fuse(struct rev_cache_info *rci, struct string_list *files, struct strbuf *ignore)
1889 unsigned char sha1[20];
1890 char base[PATH_MAX];
1891 int baselen, i, slice_nr = 0;
1892 struct stat fi;
1893 DIR *dirh;
1894 struct dirent *de;
1896 strncpy(base, git_path("rev-cache"), sizeof(base));
1897 baselen = strlen(base);
1899 dirh = opendir(base);
1900 if (!dirh)
1901 return 0;
1903 while ((de = readdir(dirh))) {
1904 if (de->d_name[0] == '.')
1905 continue;
1907 base[baselen] = '/';
1908 strncpy(base + baselen + 1, de->d_name, sizeof(base) - baselen - 1);
1910 if (get_sha1_hex(de->d_name, sha1)) {
1911 /* whatever it is, we don't need it... */
1912 string_list_insert(base, files);
1913 continue;
1916 /* _theoretically_ it is possible a slice < ignore_size to map objects not covered by, yet reachable from,
1917 * a slice >= ignore_size, meaning that we could potentially delete an 'unfused' slice; but if that
1918 * ever *did* happen their cache structure'd be so fucked up they might as well refuse the entire thing.
1919 * and at any rate the worst it'd do is make rev-list revert to standard walking in that (small) bit.
1921 if (rci->ignore_size) {
1922 if (stat(base, &fi))
1923 warning("can't query file %s\n", base);
1924 else if (fi.st_size >= rci->ignore_size) {
1925 strbuf_add(ignore, sha1, 20);
1926 continue;
1928 } else {
1929 /* check if a pointer */
1930 struct cache_slice_pointer ptr;
1931 int fd = open(base, O_RDONLY);
1933 if (fd < 0)
1934 goto dont_save;
1935 if (sizeof(ptr) != read_in_full(fd, &ptr, sizeof(ptr)))
1936 goto dont_save;
1938 close(fd);
1939 if (!strcmp(ptr.signature, "REVCOPTR")) {
1940 strbuf_add(ignore, sha1, 20);
1941 continue;
1945 dont_save:
1946 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1947 if (!hashcmp(idx_caches + i * 20, sha1))
1948 break;
1951 if (i >= 0)
1952 rci->maps[i].size = 1;
1954 string_list_insert(base, files);
1955 slice_nr++;
1958 closedir(dirh);
1960 return slice_nr;
1963 /* the most work-intensive attributes in the cache are the unique objects and size, both
1964 * of which can be re-used. although path structures will be isomorphic, path generation is
1965 * not particularly expensive, and at any rate we need to re-sort the commits */
1966 int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
1968 unsigned char cache_sha1[20];
1969 struct string_list files = {0, 0, 0, 1}; /* dup */
1970 struct strbuf ignore;
1971 int i;
1973 maybe_fill_with_defaults(rci);
1975 if (!idx_map)
1976 init_index();
1977 if (!idx_map)
1978 return -1;
1980 strbuf_init(&ignore, 0);
1981 rci->maps = xcalloc(idx_head.cache_nr, sizeof(struct rev_cache_slice_map));
1982 if (add_slices_for_fuse(rci, &files, &ignore) <= 1) {
1983 printf("nothing to fuse\n");
1984 return 1;
1987 if (ignore.len) {
1988 starts_from_slices(revs, UNINTERESTING, (unsigned char *)ignore.buf, ignore.len / 20);
1989 strbuf_release(&ignore);
1992 /* initialize mappings */
1993 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1994 struct rev_cache_slice_map *map = rci->maps + i;
1995 struct stat fi;
1996 int fd;
1998 if (!map->size)
1999 continue;
2000 map->size = 0;
2002 /* pointers are never fused, so we can use open directly */
2003 fd = open(git_path("rev-cache/%s", sha1_to_hex(idx_caches + i * 20)), O_RDONLY);
2004 if (fd <= 0 || fstat(fd, &fi))
2005 continue;
2006 if (fi.st_size < sizeof(struct rc_slice_header))
2007 continue;
2009 map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
2010 if (map->map == MAP_FAILED)
2011 continue;
2013 close(fd);
2014 map->size = fi.st_size;
2017 rci->make_index = 0;
2018 rci->fuse_me = 1;
2019 if (make_cache_slice(rci, revs, 0, 0, cache_sha1) < 0)
2020 die("can't make cache slice");
2022 printf("%s\n", sha1_to_hex(cache_sha1));
2024 /* clean up time! */
2025 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
2026 struct rev_cache_slice_map *map = rci->maps + i;
2028 if (!map->size)
2029 continue;
2031 munmap(map->map, map->size);
2033 free(rci->maps);
2034 cleanup_cache_slices();
2036 for (i = 0; i < files.nr; i++) {
2037 char *name = files.items[i].string;
2039 fprintf(stderr, "removing %s\n", name);
2040 unlink_or_warn(name);
2043 string_list_clear(&files, 0);
2045 return regenerate_cache_index(rci);
2048 static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
2050 struct rc_slice_header head;
2051 int fd, len, retval = -1;
2052 unsigned char *map = MAP_FAILED;
2053 struct stat fi;
2055 len = strlen(slice_path);
2056 if (len < 40)
2057 return -2;
2058 if (get_sha1_hex(slice_path + len - 40, sha1))
2059 return -3;
2061 fd = open(slice_path, O_RDONLY);
2062 if (fd == -1)
2063 goto end;
2064 if (fstat(fd, &fi) || fi.st_size < sizeof(head))
2065 goto end;
2067 map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
2068 if (map == MAP_FAILED)
2069 goto end;
2070 if (get_cache_slice_header(sha1, map, fi.st_size, &head))
2071 goto end;
2073 retval = 0;
2075 end:
2076 if (map != MAP_FAILED)
2077 munmap(map, sizeof(head));
2078 if (fd > 0)
2079 close(fd);
2081 return retval;
2084 int make_cache_slice_pointer(struct rev_cache_info *rci, const char *slice_path)
2086 struct cache_slice_pointer ptr;
2087 int fd;
2088 unsigned char sha1[20];
2090 maybe_fill_with_defaults(rci);
2091 rci->overwrite_all = 1;
2093 if (verify_cache_slice(slice_path, sha1) < 0)
2094 return -1;
2096 strcpy(ptr.signature, "REVCOPTR");
2097 ptr.version = SUPPORTED_REVCOPTR_VERSION;
2098 strcpy(ptr.path, make_nonrelative_path(slice_path));
2100 fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), O_RDWR | O_CREAT | O_TRUNC, 0666);
2101 if (fd < 0)
2102 return -2;
2104 write_in_full(fd, &ptr, sizeof(ptr));
2105 make_cache_index(rci, sha1, fd, sizeof(ptr));
2107 close(fd);
2109 return 0;