Merge branch 'dk/blame-el' into pu
[git/spearce.git] / rev-cache.c
blob3595f665fece3619a07046ab4092afc2c1a55885
1 #include "cache.h"
2 #include "object.h"
3 #include "commit.h"
4 #include "tree.h"
5 #include "tree-walk.h"
6 #include "blob.h"
7 #include "tag.h"
8 #include "diff.h"
9 #include "revision.h"
10 #include "rev-cache.h"
11 #include "run-command.h"
12 #include "string-list.h"
15 struct bad_slice {
16 unsigned char sha1[20];
17 struct bad_slice *next;
20 struct name_list {
21 unsigned char sha1[20];
22 unsigned int len;
23 struct name_list *next;
25 char buf[FLEX_ARRAY];
28 struct cache_slice_pointer {
29 char signature[8]; /* REVCOPTR */
30 char version;
31 char path[PATH_MAX + 1];
34 /* list resembles pack index format */
35 static uint32_t fanout[0xff + 2];
37 static unsigned char *idx_map;
38 static int idx_size;
39 static struct rc_index_header idx_head;
40 static char no_idx, add_to_pending, add_names;
41 static unsigned char *idx_caches;
43 static struct bad_slice *bad_slices;
44 static struct name_list *name_lists, *cur_name_list;
46 static struct strbuf *acc_name_buffer;
47 static struct strbuf *acc_buffer;
49 #define SLOP 5
51 /* initialization */
53 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst)
55 static struct rc_index_entry entry[4];
56 static int cur;
58 if (!dst)
59 dst = &entry[cur++ & 0x3];
61 dst->sha1 = src->sha1;
62 dst->is_start = !!(src->flags & 0x80);
63 dst->cache_index = src->flags & 0x7f;
64 dst->pos = ntohl(src->pos);
66 return dst;
69 struct rc_index_entry_ondisk *to_disked_rc_index_entry(struct rc_index_entry *src, struct rc_index_entry_ondisk *dst)
71 static struct rc_index_entry_ondisk entry[4];
72 static int cur;
74 if (!dst)
75 dst = &entry[cur++ & 0x3];
77 if (dst->sha1 != src->sha1)
78 hashcpy(dst->sha1, src->sha1);
79 dst->flags = (unsigned char)src->is_start << 7 | (unsigned char)src->cache_index;
80 dst->pos = htonl(src->pos);
82 return dst;
85 struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondisk *src, struct rc_object_entry *dst)
87 static struct rc_object_entry entry[4];
88 static int cur;
90 if (!dst)
91 dst = &entry[cur++ & 0x3];
93 dst->type = src->flags >> 5 & 0x03;
94 dst->is_end = !!(src->flags & 0x10);
95 dst->is_start = !!(src->flags & 0x08);
96 dst->uninteresting = !!(src->flags & 0x04);
97 dst->include = !!(src->flags & 0x02);
98 dst->flag = !!(src->flags & 0x01);
100 dst->sha1 = src->sha1;
101 dst->merge_nr = src->merge_nr;
102 dst->split_nr = src->split_nr;
104 dst->size_size = src->sizes >> 5 & 0x03;
105 dst->name_size = src->sizes >> 2 & 0x03;
106 dst->padding = src->sizes & 0x02;
108 dst->date = ntohl(src->date);
109 dst->path = ntohs(src->path);
111 return dst;
114 struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry *src, struct rc_object_entry_ondisk *dst)
116 static struct rc_object_entry_ondisk entry[4];
117 static int cur;
119 if (!dst)
120 dst = &entry[cur++ & 0x3];
122 dst->flags = (unsigned char)src->type << 5;
123 dst->flags |= (unsigned char)src->is_end << 4;
124 dst->flags |= (unsigned char)src->is_start << 3;
125 dst->flags |= (unsigned char)src->uninteresting << 2;
126 dst->flags |= (unsigned char)src->include << 1;
127 dst->flags |= (unsigned char)src->flag;
129 if (dst->sha1 != src->sha1)
130 hashcpy(dst->sha1, src->sha1);
131 dst->merge_nr = src->merge_nr;
132 dst->split_nr = src->split_nr;
134 dst->sizes = (unsigned char)src->size_size << 5;
135 dst->sizes |= (unsigned char)src->name_size << 2;
136 dst->sizes |= (unsigned char)src->padding;
138 dst->date = htonl(src->date);
139 dst->path = htons(src->path);
141 return dst;
144 static void mark_bad_slice(unsigned char *sha1)
146 struct bad_slice *bad;
148 bad = xcalloc(sizeof(struct bad_slice), 1);
149 hashcpy(bad->sha1, sha1);
151 bad->next = bad_slices;
152 bad_slices = bad;
155 static int is_bad_slice(unsigned char *sha1)
157 struct bad_slice *bad = bad_slices;
159 while (bad) {
160 if (!hashcmp(bad->sha1, sha1))
161 return 1;
162 bad = bad->next;
165 return 0;
168 static int get_index_head(unsigned char *map, int len, struct rc_index_header *head, uint32_t *fanout, unsigned char **caches)
170 struct rc_index_header whead;
171 int i, index = sizeof(struct rc_index_header);
173 memcpy(&whead, map, sizeof(struct rc_index_header));
174 if (memcmp(whead.signature, "REVINDEX", 8) || whead.version != SUPPORTED_REVINDEX_VERSION)
175 return -1;
177 memcpy(head->signature, "REVINDEX", 8);
178 head->version = whead.version;
179 head->ofs_objects = ntohl(whead.ofs_objects);
180 head->object_nr = ntohl(whead.object_nr);
181 head->cache_nr = whead.cache_nr;
182 head->max_date = ntohl(whead.max_date);
184 if (len < index + head->cache_nr * 20 + 0x100 * sizeof(uint32_t))
185 return -2;
187 *caches = xmalloc(head->cache_nr * 20);
188 memcpy(*caches, map + index, head->cache_nr * 20);
189 index += head->cache_nr * 20;
191 memcpy(fanout, map + index, 0x100 * sizeof(uint32_t));
192 for (i = 0; i <= 0xff; i++)
193 fanout[i] = ntohl(fanout[i]);
194 fanout[0x100] = len;
196 return 0;
199 /* added in init_index */
200 static void cleanup_cache_slices(void)
202 if (idx_map) {
203 free(idx_caches);
204 munmap(idx_map, idx_size);
205 idx_map = 0;
208 while (name_lists) {
209 struct name_list *nl = name_lists->next;
210 free(name_lists);
211 name_lists = nl;
216 static int init_index(void)
218 int fd;
219 struct stat fi;
221 fd = open(git_path("rev-cache/index"), O_RDONLY);
222 if (fd == -1 || fstat(fd, &fi))
223 goto end;
224 if (fi.st_size < sizeof(struct rc_index_header))
225 goto end;
227 idx_size = fi.st_size;
228 idx_map = xmmap(0, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
229 close(fd);
230 if (idx_map == MAP_FAILED)
231 goto end;
232 if (get_index_head(idx_map, fi.st_size, &idx_head, fanout, &idx_caches))
233 goto end;
235 atexit(cleanup_cache_slices);
237 return 0;
239 end:
240 idx_map = 0;
241 no_idx = 1;
242 return -1;
245 /* this assumes index is already loaded */
246 static struct rc_index_entry_ondisk *search_index_1(unsigned char *sha1)
248 int start, end, starti, endi, i, len, r;
249 struct rc_index_entry_ondisk *ie;
251 if (!idx_map)
252 return 0;
254 /* binary search */
255 start = fanout[(int)sha1[0]];
256 end = fanout[(int)sha1[0] + 1];
257 len = (end - start) / sizeof(struct rc_index_entry_ondisk);
258 if (!len || len * sizeof(struct rc_index_entry_ondisk) != end - start)
259 return 0;
261 starti = 0;
262 endi = len - 1;
263 for (;;) {
264 i = (endi + starti) / 2;
265 ie = (struct rc_index_entry_ondisk *)(idx_map + start + i * sizeof(struct rc_index_entry_ondisk));
266 r = hashcmp(sha1, ie->sha1);
268 if (r) {
269 if (starti + 1 == endi) {
270 starti++;
271 continue;
272 } else if (starti == endi)
273 break;
275 if (r > 0)
276 starti = i;
277 else /* r < 0 */
278 endi = i;
279 } else
280 return ie;
283 return 0;
286 static struct rc_index_entry *search_index(unsigned char *sha1)
288 struct rc_index_entry_ondisk *ied = search_index_1(sha1);
290 if (ied)
291 return from_disked_rc_index_entry(ied, 0);
293 return 0;
296 unsigned char *get_cache_slice(struct commit *commit)
298 struct rc_index_entry *ie;
299 unsigned char *sha1;
301 if (!idx_map) {
302 if (no_idx)
303 return 0;
304 init_index();
307 if (commit->date > idx_head.max_date)
308 return 0;
310 ie = search_index(commit->object.sha1);
311 if (ie && ie->cache_index < idx_head.cache_nr) {
312 sha1 = idx_caches + ie->cache_index * 20;
314 if (is_bad_slice(sha1))
315 return 0;
316 return sha1;
319 return 0;
323 /* traversal */
325 static unsigned long decode_size(unsigned char *str, int len);
327 /* on failure */
328 static void restore_commit(struct commit *commit)
330 commit->object.flags &= ~(ADDED | SEEN | FACE_VALUE);
332 if (!commit->object.parsed) {
333 while (pop_commit(&commit->parents))
336 parse_commit(commit);
341 static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsigned char *ptr, struct rc_object_entry *entry)
343 struct blob *blob;
344 struct tree *tree;
345 struct object *obj;
346 unsigned long size, name_index;
348 size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
349 switch (entry->type) {
350 case OBJ_TREE :
351 if (!revs->tree_objects)
352 return;
354 tree = lookup_tree(entry->sha1);
355 if (!tree)
356 return;
358 tree->size = size;
359 commit->tree = tree;
360 obj = (struct object *)tree;
361 break;
363 case OBJ_BLOB :
364 if (!revs->blob_objects)
365 return;
367 blob = lookup_blob(entry->sha1);
368 if (!blob)
369 return;
371 obj = (struct object *)blob;
372 break;
374 default :
375 /* tag objects aren't really supposed to be here */
376 return;
379 if (add_names && cur_name_list) {
380 name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
382 if (name_index >= cur_name_list->len)
383 name_index = 0;
384 } else name_index = 0;
386 obj->flags |= FACE_VALUE;
387 if (add_to_pending) {
388 char *name = "";
390 if (name_index)
391 name = cur_name_list->buf + name_index;
393 add_pending_object(revs, obj, name);
397 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
398 struct commit_list **unwork, int *ipath_nr, int *upath_nr, char *ioutside)
400 struct rc_index_entry *iep;
401 struct rc_object_entry *oep;
402 struct commit_list *prev, *wp, **wpp;
403 int retval;
405 iep = search_index(commit->object.sha1);
406 oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
407 if (commit->object.flags & UNINTERESTING) {
408 ++*upath_nr;
409 oep->uninteresting = 1;
410 } else
411 ++*ipath_nr;
413 oep->include = 1;
414 to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
415 retval = iep->pos;
417 /* include any others in the work array */
418 prev = 0;
419 wpp = work;
420 wp = *work;
421 while (wp) {
422 struct object *obj = &wp->item->object;
423 struct commit *co;
425 /* is this in our cache slice? */
426 iep = search_index(obj->sha1);
427 if (!iep || hashcmp(idx_caches + iep->cache_index * 20, head->sha1)) {
428 /* there are interesing objects outside the slice */
429 if (!(obj->flags & UNINTERESTING))
430 *ioutside = 1;
432 prev = wp;
433 wp = wp->next;
434 wpp = &wp;
435 continue;
438 if (iep->pos < retval)
439 retval = iep->pos;
441 oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
443 /* mark this for later */
444 oep->include = 1;
445 oep->uninteresting = !!(obj->flags & UNINTERESTING);
446 to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
448 /* count even if not in slice so we can stop enumerating if possible */
449 if (obj->flags & UNINTERESTING)
450 ++*upath_nr;
451 else
452 ++*ipath_nr;
454 /* remove from work list */
455 co = pop_commit(wpp);
456 wp = *wpp;
457 if (prev)
458 prev->next = wp;
460 /* ...and store in temp list so we can restore work on failure */
461 commit_list_insert(co, unwork);
464 return retval;
467 #define IPATH 0x40
468 #define UPATH 0x80
470 #define GET_COUNT(x) ((x) & 0x3f)
471 #define SET_COUNT(x, s) ((x) = ((x) & ~0x3f) | ((s) & 0x3f))
473 static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *map,
474 struct rev_info *revs, struct commit *commit,
475 unsigned long *date_so_far, int *slop_so_far,
476 struct commit_list ***queue, struct commit_list **work)
478 struct commit_list *insert_cache = 0, *myq = 0, **myqp = &myq, *mywork = 0, **myworkp = &mywork, *unwork = 0;
479 struct commit **last_objects, *co;
480 unsigned long date = date_so_far ? *date_so_far : ~0ul;
481 int i, ipath_nr = 0, upath_nr = 0, orig_obj_nr = 0,
482 total_path_nr = head->path_nr, retval = -1, slop = slop_so_far ? *slop_so_far : SLOP;
483 char consume_children = 0, ioutside = 0;
484 unsigned char *paths;
486 /* take note in case we need to regress */
487 orig_obj_nr = revs->pending.nr;
489 i = setup_traversal(head, map, commit, work, &unwork, &ipath_nr, &upath_nr, &ioutside);
490 if (i < 0)
491 return -1;
493 paths = xcalloc(total_path_nr, sizeof(uint16_t));
494 last_objects = xcalloc(total_path_nr, sizeof(struct commit *));
496 /* i already set */
497 while (i < head->size) {
498 struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
499 int path = entry->path;
500 struct object *obj;
501 int index = i;
503 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
505 /* add extra objects if necessary */
506 if (entry->type != OBJ_COMMIT) {
507 if (consume_children)
508 handle_noncommit(revs, co, map + index, entry);
510 continue;
511 } else
512 consume_children = 0;
514 if (path >= total_path_nr)
515 goto end;
517 /* in one of our branches?
518 * uninteresting trumps interesting */
519 if (entry->include)
520 paths[path] |= entry->uninteresting ? UPATH : IPATH;
521 else if (!paths[path])
522 continue;
524 /* date stuff */
525 if (revs->max_age != -1 && entry->date < revs->max_age)
526 paths[path] |= UPATH;
528 /* lookup object */
529 co = lookup_commit(entry->sha1);
530 obj = &co->object;
532 if (obj->flags & UNINTERESTING)
533 paths[path] |= UPATH;
535 if ((paths[path] & IPATH) && (paths[path] & UPATH)) {
536 paths[path] = UPATH;
537 ipath_nr--;
539 /* mark edge */
540 if (last_objects[path]) {
541 parse_commit(last_objects[path]);
543 /* we needn't worry about the unique field; that will be valid as
544 * long as we're not a end entry */
545 last_objects[path]->object.flags &= ~FACE_VALUE;
546 last_objects[path] = 0;
548 obj->flags |= BOUNDARY;
551 /* now we gotta re-assess the whole interesting thing... */
552 entry->uninteresting = !!(paths[path] & UPATH);
554 /* first close paths */
555 if (entry->split_nr) {
556 int j, off = index + sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE(entry->merge_nr);
558 for (j = 0; j < entry->split_nr; j++) {
559 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
561 if (p >= total_path_nr)
562 goto end;
564 /* boundary commit? */
565 if ((paths[p] & IPATH) && entry->uninteresting) {
566 if (last_objects[p]) {
567 parse_commit(last_objects[p]);
569 last_objects[p]->object.flags &= ~FACE_VALUE;
570 last_objects[p] = 0;
572 obj->flags |= BOUNDARY;
573 } else if (last_objects[p] && !last_objects[p]->object.parsed) {
574 commit_list_insert(co, &last_objects[p]->parents);
577 /* can't close a merge path until all are parents have been encountered */
578 if (GET_COUNT(paths[p])) {
579 SET_COUNT(paths[p], GET_COUNT(paths[p]) - 1);
581 if (GET_COUNT(paths[p]))
582 continue;
585 if (paths[p] & IPATH)
586 ipath_nr--;
587 else
588 upath_nr--;
590 paths[p] = 0;
591 last_objects[p] = 0;
595 /* make topo relations */
596 if (last_objects[path] && !last_objects[path]->object.parsed) {
597 commit_list_insert(co, &last_objects[path]->parents);
600 /* we've been here already */
601 if (obj->flags & ADDED) {
602 if (entry->uninteresting && !(obj->flags & UNINTERESTING)) {
603 obj->flags |= UNINTERESTING;
604 mark_parents_uninteresting(co);
605 upath_nr--;
606 } else if (!entry->uninteresting)
607 ipath_nr--;
609 paths[path] = 0;
610 continue;
613 /* initialize commit */
614 if (!entry->is_end) {
615 co->date = entry->date;
616 obj->flags |= ADDED | FACE_VALUE;
617 } else
618 parse_commit(co);
620 obj->flags |= SEEN;
622 if (entry->uninteresting)
623 obj->flags |= UNINTERESTING;
624 else if (co->date < date)
625 date = co->date;
627 /* we need to know what the edges are */
628 last_objects[path] = co;
630 /* add to list */
631 if (slop && !(revs->min_age != -1 && co->date > revs->min_age)) {
633 if (!(obj->flags & UNINTERESTING) || revs->show_all) {
634 if (entry->is_end)
635 myworkp = &commit_list_insert(co, myworkp)->next;
636 else
637 myqp = &commit_list_insert(co, myqp)->next;
639 /* add children to list as well */
640 if (obj->flags & UNINTERESTING)
641 consume_children = 0;
642 else
643 consume_children = 1;
648 /* should we continue? */
649 if (!slop) {
650 if (!upath_nr) {
651 break;
652 } else if (ioutside || revs->show_all) {
653 /* pass it back to rev-list
654 * we purposely ignore everything outside this cache, so we don't needlessly traverse the whole
655 * thing on uninteresting, but that does mean that we may need to bounce back
656 * and forth a few times with rev-list */
657 myworkp = &commit_list_insert(co, myworkp)->next;
659 paths[path] = 0;
660 upath_nr--;
661 } else {
662 break;
664 } else if (!ipath_nr && co->date <= date)
665 slop--;
666 else
667 slop = SLOP;
669 /* open parents */
670 if (entry->merge_nr) {
671 int j, off = index + sizeof(struct rc_object_entry_ondisk);
672 char flag = entry->uninteresting ? UPATH : IPATH;
674 for (j = 0; j < entry->merge_nr; j++) {
675 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
677 if (p >= total_path_nr)
678 goto end;
680 if (paths[p] & flag)
681 continue;
683 if (flag == IPATH)
684 ipath_nr++;
685 else
686 upath_nr++;
688 paths[p] |= flag;
691 /* make sure we don't use this path before all our parents have had their say */
692 SET_COUNT(paths[path], entry->merge_nr);
697 if (date_so_far)
698 *date_so_far = date;
699 if (slop_so_far)
700 *slop_so_far = slop;
701 retval = 0;
703 /* success: attach to given lists */
704 if (myqp != &myq) {
705 **queue = myq;
706 *queue = myqp;
709 while ((co = pop_commit(&mywork)) != 0) {
710 insert_by_date_cached(co, work, insert_cache, &insert_cache);
713 /* free backup */
714 while (pop_commit(&unwork))
717 end:
718 free(paths);
719 free(last_objects);
721 /* failure: restore work to previous condition
722 * (cache corruption should *not* be fatal) */
723 if (retval) {
724 while ((co = pop_commit(&unwork)) != 0) {
725 restore_commit(co);
726 co->object.flags |= SEEN;
727 insert_by_date(co, work);
730 /* free lists */
731 while ((co = pop_commit(&myq)) != 0)
732 restore_commit(co);
734 while ((co = pop_commit(&mywork)) != 0)
735 restore_commit(co);
737 /* truncate object array */
738 for (i = orig_obj_nr; i < revs->pending.nr; i++) {
739 struct object *obj = revs->pending.objects[i].item;
741 obj->flags &= ~FACE_VALUE;
743 revs->pending.nr = orig_obj_nr;
746 return retval;
749 static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
751 struct name_list *nl = name_lists;
753 while (nl) {
754 if (!hashcmp(nl->sha1, head->sha1))
755 break;
756 nl = nl->next;
759 if (nl)
760 return nl;
762 nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
763 nl->len = head->name_size;
764 hashcpy(nl->sha1, head->sha1);
766 lseek(fd, head->size, SEEK_SET);
767 read_in_full(fd, nl->buf, head->name_size);
769 nl->next = name_lists;
770 name_lists = nl;
772 return nl;
775 static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
777 int t;
779 if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
780 return -1;
782 head->ofs_objects = ntohl(head->ofs_objects);
783 head->object_nr = ntohl(head->object_nr);
784 head->size = ntohl(head->size);
785 head->path_nr = ntohs(head->path_nr);
786 head->name_size = ntohl(head->name_size);
788 if (memcmp(head->signature, "REVCACHE", 8))
789 return -1;
790 if (head->version != SUPPORTED_REVCACHE_VERSION)
791 return -2;
792 if (hashcmp(head->sha1, cache_sha1))
793 return -3;
794 t = sizeof(struct rc_slice_header);
795 if (t != head->ofs_objects)
796 return -4;
797 if (head->size + head->name_size != len)
798 return -5;
800 return 0;
803 int open_cache_slice(unsigned char *sha1, int flags)
805 int fd;
806 char signature[8];
808 fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), flags);
809 if (fd <= 0)
810 goto end;
812 if (read(fd, signature, 8) != 8)
813 goto end;
815 /* a normal revision slice */
816 if (!memcmp(signature, "REVCACHE", 8)) {
817 lseek(fd, 0, SEEK_SET);
818 return fd;
821 /* slice pointer */
822 if (!memcmp(signature, "REVCOPTR", 8)) {
823 struct cache_slice_pointer ptr;
825 lseek(fd, 0, SEEK_SET);
826 if (read_in_full(fd, &ptr, sizeof(ptr)) != sizeof(ptr))
827 goto end;
829 if (ptr.version != SUPPORTED_REVCOPTR_VERSION)
830 goto end;
832 close(fd);
833 fd = open(ptr.path, flags);
835 return fd;
838 end:
839 if (fd > 0)
840 close(fd);
842 return -1;
845 int traverse_cache_slice(struct rev_info *revs,
846 unsigned char *cache_sha1, struct commit *commit,
847 unsigned long *date_so_far, int *slop_so_far,
848 struct commit_list ***queue, struct commit_list **work)
850 int fd = -1, t, retval;
851 struct stat fi;
852 struct rc_slice_header head;
853 struct rev_cache_info *rci;
854 unsigned char *map = MAP_FAILED;
856 /* the index should've been loaded already to find cache_sha1, but it's good
857 * to be absolutely sure... */
858 if (!idx_map)
859 init_index();
860 if (!idx_map)
861 return -1;
863 /* load options */
864 rci = &revs->rev_cache_info;
865 add_to_pending = rci->add_to_pending;
866 add_names = rci->add_names;
868 memset(&head, 0, sizeof(struct rc_slice_header));
869 # define ERROR(x) do { retval = (x); goto end; } while (0);
871 fd = open_cache_slice(cache_sha1, O_RDONLY);
872 if (fd == -1)
873 ERROR(-1);
874 if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
875 ERROR(-2);
877 if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
878 ERROR(-t);
879 if (add_names)
880 cur_name_list = get_cache_slice_name_list(&head, fd);
882 map = xmmap(0, head.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
883 if (map == MAP_FAILED)
884 ERROR(-3);
886 retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
888 end:
889 if (map != MAP_FAILED)
890 munmap(map, head.size);
891 if (fd != -1)
892 close(fd);
894 /* remember this! */
895 if (retval)
896 mark_bad_slice(cache_sha1);
898 # undef ERROR
899 return retval;
904 /* generation */
906 static int is_endpoint(struct commit *commit)
908 struct commit_list *list = commit->parents;
910 while (list) {
911 if (!(list->item->object.flags & UNINTERESTING))
912 return 0;
914 list = list->next;
917 return 1;
920 /* ensures branch is self-contained: parents are either all interesting or all uninteresting */
921 static void make_legs(struct rev_info *revs)
923 struct commit_list *list, **plist;
924 int total = 0;
926 /* attach plist to end of commits list */
927 list = revs->commits;
928 while (list && list->next)
929 list = list->next;
931 if (list)
932 plist = &list->next;
933 else
934 return;
936 /* duplicates don't matter, as get_revision() ignores them */
937 for (list = revs->commits; list; list = list->next) {
938 struct commit *item = list->item;
939 struct commit_list *parents = item->parents;
941 if (item->object.flags & UNINTERESTING)
942 continue;
943 if (is_endpoint(item))
944 continue;
946 while (parents) {
947 struct commit *p = parents->item;
948 parents = parents->next;
950 if (!(p->object.flags & UNINTERESTING))
951 continue;
953 p->object.flags &= ~UNINTERESTING;
954 parse_commit(p);
955 plist = &commit_list_insert(p, plist)->next;
957 if (!(p->object.flags & SEEN))
958 total++;
962 if (total)
963 sort_in_topological_order(&revs->commits, 1);
968 struct path_track {
969 struct commit *commit;
970 int path; /* for keeping track of children */
972 struct path_track *next, *prev;
975 static unsigned char *paths;
976 static int path_nr = 1, path_sz;
978 static struct path_track *path_track;
979 static struct path_track *path_track_alloc;
981 #define PATH_IN_USE 0x80 /* biggest bit we can get as a char */
983 static int get_new_path(void)
985 int i;
987 for (i = 1; i < path_nr; i++)
988 if (!paths[i])
989 break;
991 if (i == path_nr) {
992 if (path_nr >= path_sz) {
993 path_sz += 50;
994 paths = xrealloc(paths, path_sz);
995 memset(paths + path_sz - 50, 0, 50);
997 path_nr++;
1000 paths[i] = PATH_IN_USE;
1001 return i;
1004 static void remove_path_track(struct path_track **ppt, char total_free)
1006 struct path_track *t = *ppt;
1008 if (t->next)
1009 t->next->prev = t->prev;
1010 if (t->prev)
1011 t->prev->next = t->next;
1013 t = t->next;
1015 if (total_free)
1016 free(*ppt);
1017 else {
1018 (*ppt)->next = path_track_alloc;
1019 path_track_alloc = *ppt;
1022 *ppt = t;
1025 static struct path_track *make_path_track(struct path_track **head, struct commit *commit)
1027 struct path_track *pt;
1029 if (path_track_alloc) {
1030 pt = path_track_alloc;
1031 path_track_alloc = pt->next;
1032 } else
1033 pt = xmalloc(sizeof(struct path_track));
1035 memset(pt, 0, sizeof(struct path_track));
1036 pt->commit = commit;
1038 pt->next = *head;
1039 if (*head)
1040 (*head)->prev = pt;
1041 *head = pt;
1043 return pt;
1046 static void add_path_to_track(struct commit *commit, int path)
1048 make_path_track(&path_track, commit);
1049 path_track->path = path;
1052 static void handle_paths(struct commit *commit, struct rc_object_entry *object, struct strbuf *merge_str, struct strbuf *split_str)
1054 int child_nr, parent_nr, open_parent_nr, this_path;
1055 struct commit_list *list;
1056 struct commit *first_parent;
1057 struct path_track **ppt, *pt;
1059 /* we can only re-use a closed path once all it's children have been encountered,
1060 * as we need to keep track of commit boundaries */
1061 ppt = &path_track;
1062 pt = *ppt;
1063 child_nr = 0;
1064 while (pt) {
1065 if (pt->commit == commit) {
1066 uint16_t write_path;
1068 if (paths[pt->path] != PATH_IN_USE)
1069 paths[pt->path]--;
1071 /* make sure we can handle this */
1072 child_nr++;
1073 if (child_nr > 0x7f)
1074 die("%s: too many branches! rev-cache can only handle %d parents/children per commit",
1075 sha1_to_hex(object->sha1), 0x7f);
1077 /* add to split list */
1078 object->split_nr++;
1079 write_path = htons((uint16_t)pt->path);
1080 strbuf_add(split_str, &write_path, sizeof(uint16_t));
1082 remove_path_track(ppt, 0);
1083 pt = *ppt;
1084 } else {
1085 pt = pt->next;
1086 ppt = &pt;
1090 /* initialize our self! */
1091 if (!commit->indegree) {
1092 commit->indegree = get_new_path();
1093 object->is_start = 1;
1096 this_path = commit->indegree;
1097 paths[this_path] = PATH_IN_USE;
1098 object->path = this_path;
1100 /* count interesting parents */
1101 parent_nr = open_parent_nr = 0;
1102 first_parent = 0;
1103 for (list = commit->parents; list; list = list->next) {
1104 if (list->item->object.flags & UNINTERESTING) {
1105 object->is_end = 1;
1106 continue;
1109 parent_nr++;
1110 if (!list->item->indegree)
1111 open_parent_nr++;
1112 if (!first_parent)
1113 first_parent = list->item;
1116 if (!parent_nr)
1117 return;
1119 if (parent_nr == 1 && open_parent_nr == 1) {
1120 first_parent->indegree = this_path;
1121 return;
1124 /* bail out on obscene parent/child #s */
1125 if (parent_nr > 0x7f)
1126 die("%s: too many parents in merge! rev-cache can only handle %d parents/children per commit",
1127 sha1_to_hex(object->sha1), 0x7f);
1129 /* make merge list */
1130 object->merge_nr = parent_nr;
1131 paths[this_path] = parent_nr;
1133 for (list = commit->parents; list; list = list->next) {
1134 struct commit *p = list->item;
1135 uint16_t write_path;
1137 if (p->object.flags & UNINTERESTING)
1138 continue;
1140 /* unfortunately due to boundary tracking we can't re-use merge paths
1141 * (unable to guarantee last parent path = this -> last won't always be able to
1142 * set this as a boundary object */
1143 if (!p->indegree)
1144 p->indegree = get_new_path();
1146 write_path = htons((uint16_t)p->indegree);
1147 strbuf_add(merge_str, &write_path, sizeof(uint16_t));
1149 /* make sure path is properly ended */
1150 add_path_to_track(p, this_path);
1156 static int encode_size(unsigned long size, unsigned char *out)
1158 int len = 0;
1160 while (size) {
1161 *out++ = (unsigned char)(size & 0xff);
1162 size >>= 8;
1163 len++;
1166 return len;
1169 static unsigned long decode_size(unsigned char *str, int len)
1171 unsigned long size = 0;
1172 int shift = 0;
1174 while (len--) {
1175 size |= (unsigned long)*str << shift;
1176 shift += 8;
1177 str++;
1180 return size;
1184 #define NL_HASH_TABLE_SIZE (0xffff + 1)
1185 #define NL_HASH_NUMBER (NL_HASH_TABLE_SIZE >> 3)
1187 struct name_list_hash {
1188 int ind;
1189 struct name_list_hash *next;
1192 static struct name_list_hash **nl_hash_table;
1193 static unsigned char *nl_hashes;
1195 /* FNV-1a hash */
1196 static unsigned int hash_name(const char *name)
1198 unsigned int hash = 2166136261ul;
1199 const char *p = name;
1201 while (*p) {
1202 hash ^= *p++;
1203 hash *= 16777619ul;
1206 return hash & 0xffff;
1209 static int name_in_list(const char *name)
1211 unsigned int h = hash_name(name);
1212 struct name_list_hash *entry = nl_hash_table[h];
1214 while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
1215 entry = entry->next;
1217 if (entry)
1218 return entry->ind;
1220 /* add name to buffer and create hash reference */
1221 entry = xcalloc(1, sizeof(struct name_list_hash));
1222 entry->ind = acc_name_buffer->len;
1223 strbuf_add(acc_name_buffer, name, strlen(name) + 1);
1225 entry->next = nl_hash_table[h];
1226 nl_hash_table[h] = entry;
1228 nl_hashes[h / 8] |= h % 8;
1230 return entry->ind;
1233 static void init_name_list_hash(void)
1235 nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
1236 nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
1239 static void cleanup_name_list_hash(void)
1241 int i;
1243 for (i = 0; i < NL_HASH_NUMBER; i++) {
1244 int j, ind = nl_hashes[i];
1246 if (!ind)
1247 continue;
1249 for (j = 0; j < 8; j++) {
1250 struct name_list_hash **entryp;
1252 if (!(ind & 1 << j))
1253 continue;
1255 entryp = &nl_hash_table[i * 8 + j];
1256 while (*entryp) {
1257 struct name_list_hash *t = (*entryp)->next;
1259 free(*entryp);
1260 *entryp = t;
1263 } /* code overhang! */
1265 free(nl_hashes);
1266 free(nl_hash_table);
1269 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
1270 struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
1272 struct rc_object_entry entry;
1273 unsigned char size_str[7], name_str[7];
1274 enum object_type type;
1275 void *data;
1277 if (entryp)
1278 sha1 = entryp->sha1;
1280 if (!size) {
1281 /* retrieve size data */
1282 data = read_sha1_file(sha1, &type, &size);
1284 if (data)
1285 free(data);
1288 /* initialize! */
1289 if (!entryp) {
1290 memset(&entry, 0, sizeof(entry));
1291 entry.sha1 = (unsigned char *)sha1;
1292 entry.type = type;
1294 if (merge_str)
1295 entry.merge_nr = merge_str->len / sizeof(uint16_t);
1296 if (split_str)
1297 entry.split_nr = split_str->len / sizeof(uint16_t);
1299 entryp = &entry;
1302 entryp->size_size = encode_size(size, size_str);
1304 if (name)
1305 entryp->name_size = encode_size(name_in_list(name), name_str);
1307 /* write the muvabitch */
1308 strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
1310 if (merge_str)
1311 strbuf_add(acc_buffer, merge_str->buf, merge_str->len);
1312 if (split_str)
1313 strbuf_add(acc_buffer, split_str->buf, split_str->len);
1315 strbuf_add(acc_buffer, size_str, entryp->size_size);
1317 if (name)
1318 strbuf_add(acc_buffer, name_str, entryp->name_size);
1321 /* returns non-zero to continue parsing, 0 to skip */
1322 typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
1324 /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
1325 static int dump_tree(struct tree *tree, dump_tree_fn fn, char *base)
1327 struct tree_desc desc;
1328 struct name_entry entry;
1329 struct tree *subtree;
1330 char concatpath[PATH_MAX];
1331 int r, baselen;
1333 if (parse_tree(tree))
1334 return -1;
1336 baselen = strlen(base);
1337 strcpy(concatpath, base);
1339 init_tree_desc(&desc, tree->buffer, tree->size);
1340 while (tree_entry(&desc, &entry)) {
1341 if (baselen + strlen(entry.path) + 1 >= PATH_MAX)
1342 die("we have a problem: %s%s is too big for me to handle", base, entry.path);
1343 strcpy(concatpath + baselen, entry.path);
1345 switch (fn(entry.sha1, concatpath, entry.mode)) {
1346 case 0 :
1347 goto continue_loop;
1348 default :
1349 break;
1352 if (S_ISDIR(entry.mode)) {
1353 subtree = lookup_tree(entry.sha1);
1354 if (!subtree)
1355 return -2;
1357 strcat(concatpath, "/");
1358 if ((r = dump_tree(subtree, fn, concatpath)) < 0)
1359 return r;
1362 continue_loop:
1363 continue;
1366 return 0;
1369 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
1371 strbuf_add(acc_buffer, sha1, 20);
1372 strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
1374 strbuf_add(acc_name_buffer, path, strlen(path) + 1);
1376 return 1;
1379 static void tree_addremove(struct diff_options *options,
1380 int whatnow, unsigned mode,
1381 const unsigned char *sha1,
1382 const char *concatpath)
1384 if (whatnow != '+')
1385 return;
1387 strbuf_add(acc_buffer, sha1, 20);
1388 strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
1390 strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
1393 static void tree_change(struct diff_options *options,
1394 unsigned old_mode, unsigned new_mode,
1395 const unsigned char *old_sha1,
1396 const unsigned char *new_sha1,
1397 const char *concatpath)
1399 if (!hashcmp(old_sha1, new_sha1))
1400 return;
1402 strbuf_add(acc_buffer, new_sha1, 20);
1403 strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
1405 strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
1408 static int add_unique_objects(struct commit *commit)
1410 struct commit_list *list;
1411 struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
1412 struct diff_options opts;
1413 int i, j, next;
1414 char is_first = 1;
1416 /* ...no, calculate unique objects */
1417 strbuf_init(&os, 0);
1418 strbuf_init(&ost, 0);
1419 strbuf_init(&names, 0);
1420 orig_buf = acc_buffer;
1421 orig_name_buf = acc_name_buffer;
1422 acc_name_buffer = &names;
1424 diff_setup(&opts);
1425 DIFF_OPT_SET(&opts, RECURSIVE);
1426 DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
1427 opts.change = tree_change;
1428 opts.add_remove = tree_addremove;
1429 # define ENTRY_SIZE (20 + sizeof(size_t))
1431 /* this is only called for non-ends (ie. all parents interesting) */
1432 for (list = commit->parents; list; list = list->next) {
1433 if (is_first)
1434 acc_buffer = &os;
1435 else
1436 acc_buffer = &ost;
1438 strbuf_setlen(acc_buffer, 0);
1439 diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
1440 qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);
1442 /* take intersection */
1443 if (!is_first) {
1444 for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
1445 while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
1446 j += ENTRY_SIZE;
1448 if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
1449 continue;
1451 if (next != i)
1452 memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
1453 next += ENTRY_SIZE;
1456 if (next != i)
1457 strbuf_setlen(&os, next);
1458 } else
1459 is_first = 0;
1462 /* no parents (!) */
1463 if (is_first) {
1464 acc_buffer = &os;
1465 dump_tree(commit->tree, dump_tree_callback, "");
1468 /* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
1469 acc_buffer = orig_buf;
1470 acc_name_buffer = orig_name_buf;
1471 for (i = 0; i < os.len; i += ENTRY_SIZE)
1472 add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);
1474 /* last but not least, the main tree */
1475 add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);
1477 strbuf_release(&ost);
1478 strbuf_release(&os);
1479 strbuf_release(&names);
1481 return i / ENTRY_SIZE + 1;
1482 # undef ENTRY_SIZE
1485 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
1487 int i = *index, object_nr = 0;
1488 unsigned char *map = mapping->map;
1489 struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1490 unsigned long size;
1492 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1493 while (i < mapping->size) {
1494 char *name;
1495 int name_index, pos = i;
1497 entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1498 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1500 if (entry->type == OBJ_COMMIT) {
1501 *index = pos;
1502 return object_nr;
1505 name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
1506 if (name_index && name_index < mapping->name_size)
1507 name = mapping->names + name_index;
1508 else
1509 name = 0;
1511 size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
1513 add_object_entry(0, entry, 0, 0, name, size);
1514 object_nr++;
1517 *index = 0;
1518 return object_nr;
1521 static int add_objects_verbatim(struct rev_cache_info *rci, struct commit *commit)
1523 struct rev_cache_slice_map *map;
1524 char found = 0;
1525 struct rc_index_entry *ie;
1526 struct rc_object_entry *entry;
1527 int object_nr, i;
1529 if (!rci->maps)
1530 return -1;
1532 /* check if we can continue where we left off */
1533 map = rci->last_map;
1534 if (!map)
1535 goto search_me;
1537 i = map->last_index;
1538 entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1539 if (hashcmp(entry->sha1, commit->object.sha1))
1540 goto search_me;
1542 found = 1;
1544 search_me:
1545 if (!found) {
1546 ie = search_index(commit->object.sha1);
1547 if (!ie || ie->cache_index >= idx_head.cache_nr)
1548 return -2;
1550 map = rci->maps + ie->cache_index;
1551 if (!map->size)
1552 return -3;
1554 i = ie->pos;
1555 entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1556 if (entry->type != OBJ_COMMIT || hashcmp(entry->sha1, commit->object.sha1))
1557 return -4;
1560 /* can't handle end commits */
1561 if (entry->is_end)
1562 return -5;
1564 object_nr = add_objects_verbatim_1(map, &i);
1566 /* remember this */
1567 if (i) {
1568 rci->last_map = map;
1569 map->last_index = i;
1570 } else
1571 rci->last_map = 0;
1573 return object_nr;
1576 static void init_revcache_directory(void)
1578 struct stat fi;
1580 if (stat(git_path("rev-cache"), &fi) || !S_ISDIR(fi.st_mode))
1581 if (mkdir(git_path("rev-cache"), 0777))
1582 die("can't make rev-cache directory");
1586 void init_rev_cache_info(struct rev_cache_info *rci)
1588 memset(rci, 0, sizeof(struct rev_cache_info));
1590 rci->objects = 1;
1591 rci->legs = 0;
1592 rci->make_index = 1;
1593 rci->fuse_me = 0;
1595 rci->overwrite_all = 0;
1597 rci->add_to_pending = 1;
1598 rci->add_names = 1;
1600 rci->ignore_size = 0;
1603 void maybe_fill_with_defaults(struct rev_cache_info *rci)
1605 static struct rev_cache_info def_rci;
1607 if (rci)
1608 return;
1610 init_rev_cache_info(&def_rci);
1611 rci = &def_rci;
1614 int make_cache_slice(struct rev_cache_info *rci,
1615 struct rev_info *revs, struct commit_list **starts, struct commit_list **ends,
1616 unsigned char *cache_sha1)
1618 struct rev_info therevs;
1619 struct strbuf buffer, startlist, endlist;
1620 struct rc_slice_header head;
1621 struct commit *commit;
1622 unsigned char sha1[20];
1623 struct strbuf merge_paths, split_paths, namelist;
1624 int object_nr, total_sz, fd;
1625 char file[PATH_MAX], null, *newfile;
1626 struct rev_cache_info *trci;
1627 git_SHA_CTX ctx;
1629 maybe_fill_with_defaults(rci);
1631 init_revcache_directory();
1632 strcpy(file, git_path("rev-cache/XXXXXX"));
1633 fd = xmkstemp(file);
1635 strbuf_init(&buffer, 0);
1636 strbuf_init(&startlist, 0);
1637 strbuf_init(&endlist, 0);
1638 strbuf_init(&merge_paths, 0);
1639 strbuf_init(&split_paths, 0);
1640 strbuf_init(&namelist, 0);
1641 acc_buffer = &buffer;
1642 acc_name_buffer = &namelist;
1644 null = 0;
1645 strbuf_add(&namelist, &null, 1);
1646 init_name_list_hash();
1648 if (!revs) {
1649 revs = &therevs;
1650 init_revisions(revs, 0);
1652 /* we're gonna assume no one else has already traversed this... */
1653 while ((commit = pop_commit(starts)))
1654 add_pending_object(revs, &commit->object, 0);
1656 while ((commit = pop_commit(ends))) {
1657 commit->object.flags |= UNINTERESTING;
1658 add_pending_object(revs, &commit->object, 0);
1662 /* write head placeholder */
1663 memset(&head, 0, sizeof(head));
1664 head.ofs_objects = htonl(sizeof(head));
1665 xwrite(fd, &head, sizeof(head));
1667 /* init revisions! */
1668 revs->tree_objects = 1;
1669 revs->blob_objects = 1;
1670 revs->topo_order = 1;
1671 revs->lifo = 1;
1673 /* re-use info from other caches if possible */
1674 trci = &revs->rev_cache_info;
1675 init_rev_cache_info(trci);
1676 trci->add_to_pending = 0;
1677 trci->add_names = 0;
1679 setup_revisions(0, 0, revs, 0);
1680 if (prepare_revision_walk(revs))
1681 die("died preparing revision walk");
1683 if (rci->legs)
1684 make_legs(revs);
1686 object_nr = total_sz = 0;
1687 while ((commit = get_revision(revs)) != 0) {
1688 struct rc_object_entry object;
1689 int t;
1691 strbuf_setlen(&merge_paths, 0);
1692 strbuf_setlen(&split_paths, 0);
1694 memset(&object, 0, sizeof(object));
1695 object.type = OBJ_COMMIT;
1696 object.date = commit->date;
1697 object.sha1 = commit->object.sha1;
1699 handle_paths(commit, &object, &merge_paths, &split_paths);
1701 if (object.is_end) {
1702 strbuf_add(&endlist, object.sha1, 20);
1703 if (ends)
1704 commit_list_insert(commit, ends);
1706 /* the two *aren't* mutually exclusive */
1707 if (object.is_start) {
1708 strbuf_add(&startlist, object.sha1, 20);
1709 if (starts)
1710 commit_list_insert(commit, starts);
1713 commit->indegree = 0;
1715 add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
1716 object_nr++;
1718 if (rci->objects && !object.is_end) {
1719 if (rci->fuse_me && (t = add_objects_verbatim(rci, commit)) >= 0)
1720 /* yay! we did it! */
1721 object_nr += t;
1722 else
1723 /* add all unique children for this commit */
1724 object_nr += add_unique_objects(commit);
1727 /* print every ~1MB or so */
1728 if (buffer.len > 1000000) {
1729 write_in_full(fd, buffer.buf, buffer.len);
1730 total_sz += buffer.len;
1732 strbuf_setlen(&buffer, 0);
1736 if (buffer.len) {
1737 write_in_full(fd, buffer.buf, buffer.len);
1738 total_sz += buffer.len;
1741 /* write path name lookup list */
1742 head.name_size = htonl(namelist.len);
1743 write_in_full(fd, namelist.buf, namelist.len);
1745 /* go ahead a free some stuff... */
1746 strbuf_release(&buffer);
1747 strbuf_release(&merge_paths);
1748 strbuf_release(&split_paths);
1749 strbuf_release(&namelist);
1750 cleanup_name_list_hash();
1751 if (path_sz)
1752 free(paths);
1753 while (path_track_alloc)
1754 remove_path_track(&path_track_alloc, 1);
1756 /* the meaning of the hash name is more or less irrelevant, it's the uniqueness that matters */
1757 strbuf_add(&endlist, startlist.buf, startlist.len);
1758 git_SHA1_Init(&ctx);
1759 git_SHA1_Update(&ctx, endlist.buf, endlist.len);
1760 git_SHA1_Final(sha1, &ctx);
1762 /* now actually initialize header */
1763 strcpy(head.signature, "REVCACHE");
1764 head.version = SUPPORTED_REVCACHE_VERSION;
1766 head.object_nr = htonl(object_nr);
1767 head.size = htonl(ntohl(head.ofs_objects) + total_sz);
1768 head.path_nr = htons(path_nr);
1769 hashcpy(head.sha1, sha1);
1771 /* some info! */
1772 fprintf(stderr, "objects: %d\n", object_nr);
1773 fprintf(stderr, "paths: %d\n", path_nr);
1775 lseek(fd, 0, SEEK_SET);
1776 xwrite(fd, &head, sizeof(head));
1778 if (rci->make_index && make_cache_index(rci, sha1, fd, ntohl(head.size)) < 0)
1779 die("can't update index");
1781 close(fd);
1783 newfile = git_path("rev-cache/%s", sha1_to_hex(sha1));
1784 if (rename(file, newfile))
1785 die("can't move temp file");
1787 /* let our caller know what we've just made */
1788 if (cache_sha1)
1789 hashcpy(cache_sha1, sha1);
1791 strbuf_release(&endlist);
1792 strbuf_release(&startlist);
1794 return 0;
1798 static int index_sort_hash(const void *a, const void *b)
1800 return hashcmp(((struct rc_index_entry_ondisk *)a)->sha1, ((struct rc_index_entry_ondisk *)b)->sha1);
1803 static int write_cache_index(struct strbuf *body)
1805 struct rc_index_header whead;
1806 struct lock_file *lk;
1807 int fd, i;
1809 /* clear index map if loaded */
1810 if (idx_map) {
1811 munmap(idx_map, idx_size);
1812 idx_map = 0;
1815 lk = xcalloc(sizeof(struct lock_file), 1);
1816 fd = hold_lock_file_for_update(lk, git_path("rev-cache/index"), 0);
1817 if (fd < 0) {
1818 free(lk);
1819 return -1;
1822 /* endianness yay! */
1823 memset(&whead, 0, sizeof(whead));
1824 memcpy(whead.signature, "REVINDEX", 8);
1825 whead.version = idx_head.version;
1826 whead.ofs_objects = htonl(idx_head.ofs_objects);
1827 whead.object_nr = htonl(idx_head.object_nr);
1828 whead.cache_nr = idx_head.cache_nr;
1829 whead.max_date = htonl(idx_head.max_date);
1831 write(fd, &whead, sizeof(struct rc_index_header));
1832 write_in_full(fd, idx_caches, idx_head.cache_nr * 20);
1834 for (i = 0; i <= 0xff; i++)
1835 fanout[i] = htonl(fanout[i]);
1836 write_in_full(fd, fanout, 0x100 * sizeof(uint32_t));
1838 write_in_full(fd, body->buf, body->len);
1840 if (commit_lock_file(lk) < 0)
1841 return -2;
1843 /* lk freed by lockfile.c */
1845 return 0;
1848 int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
1849 int fd, unsigned int size)
1851 struct strbuf buffer;
1852 int i, cache_index, cur;
1853 unsigned char *map;
1854 unsigned long max_date;
1856 maybe_fill_with_defaults(rci);
1858 if (!idx_map)
1859 init_index();
1861 lseek(fd, 0, SEEK_SET);
1862 map = xmmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1863 if (map == MAP_FAILED)
1864 return -1;
1866 strbuf_init(&buffer, 0);
1867 if (idx_map) {
1868 strbuf_add(&buffer, idx_map + fanout[0], fanout[0x100] - fanout[0]);
1869 } else {
1870 /* not an update */
1871 memset(&idx_head, 0, sizeof(struct rc_index_header));
1872 idx_caches = 0;
1874 strcpy(idx_head.signature, "REVINDEX");
1875 idx_head.version = SUPPORTED_REVINDEX_VERSION;
1876 idx_head.ofs_objects = sizeof(struct rc_index_header) + 0x100 * sizeof(uint32_t);
1879 /* are we remaking a slice? */
1880 for (i = 0; i < idx_head.cache_nr; i++)
1881 if (!hashcmp(idx_caches + i * 20, cache_sha1))
1882 break;
1884 if (i == idx_head.cache_nr) {
1885 cache_index = idx_head.cache_nr++;
1886 idx_head.ofs_objects += 20;
1888 idx_caches = xrealloc(idx_caches, idx_head.cache_nr * 20);
1889 hashcpy(idx_caches + cache_index * 20, cache_sha1);
1890 } else
1891 cache_index = i;
1893 i = sizeof(struct rc_slice_header); /* offset */
1894 max_date = idx_head.max_date;
1895 while (i < size) {
1896 struct rc_index_entry index_entry, *entry;
1897 struct rc_index_entry_ondisk *disked_entry;
1898 struct rc_object_entry *object_entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1899 unsigned long date;
1900 int off, pos = i;
1902 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(object_entry);
1904 if (object_entry->type != OBJ_COMMIT)
1905 continue;
1907 /* don't include ends; otherwise we'll find ourselves in loops */
1908 if (object_entry->is_end)
1909 continue;
1911 /* handle index duplication
1912 * -> keep old copy unless new one is a start -- based on expected usage, older ones will be more
1913 * likely to lead to greater slice traversals than new ones */
1914 date = object_entry->date;
1915 if (date > idx_head.max_date) {
1916 disked_entry = 0;
1917 if (date > max_date)
1918 max_date = date;
1919 } else
1920 disked_entry = search_index_1(object_entry->sha1);
1922 if (disked_entry && !object_entry->is_start && !rci->overwrite_all)
1923 continue;
1924 else if (disked_entry) {
1925 /* mmm, pointer arithmetic... tasty */ /* (entry - idx_map = offset, so cast is valid) */
1926 off = (unsigned int)((unsigned char *)disked_entry - idx_map) - fanout[0];
1927 disked_entry = (struct rc_index_entry_ondisk *)(buffer.buf + off);
1928 entry = from_disked_rc_index_entry(disked_entry, 0);
1929 } else
1930 entry = &index_entry;
1932 memset(entry, 0, sizeof(index_entry));
1933 entry->sha1 = object_entry->sha1;
1934 entry->is_start = object_entry->is_start;
1935 entry->cache_index = cache_index;
1936 entry->pos = pos;
1938 if (entry == &index_entry) {
1939 strbuf_add(&buffer, to_disked_rc_index_entry(entry, 0), sizeof(struct rc_index_entry_ondisk));
1940 idx_head.object_nr++;
1941 } else
1942 to_disked_rc_index_entry(entry, disked_entry);
1946 idx_head.max_date = max_date;
1947 qsort(buffer.buf, buffer.len / sizeof(struct rc_index_entry_ondisk), sizeof(struct rc_index_entry_ondisk), index_sort_hash);
1949 /* generate fanout */
1950 cur = 0x00;
1951 for (i = 0; i < buffer.len; i += sizeof(struct rc_index_entry_ondisk)) {
1952 struct rc_index_entry_ondisk *entry = (struct rc_index_entry_ondisk *)(buffer.buf + i);
1954 while (cur <= entry->sha1[0])
1955 fanout[cur++] = i + idx_head.ofs_objects;
1958 while (cur <= 0xff)
1959 fanout[cur++] = idx_head.ofs_objects + buffer.len;
1961 /* BOOM! */
1962 if (write_cache_index(&buffer))
1963 return -1;
1965 munmap(map, size);
1966 strbuf_release(&buffer);
1968 /* idx_map is unloaded without cleanup_cache_slices(), so regardless of previous index existence
1969 * we can still free this up */
1970 free(idx_caches);
1972 return 0;
1976 void starts_from_slices(struct rev_info *revs, unsigned int flags, unsigned char *which, int n)
1978 struct commit *commit;
1979 int i;
1981 if (!idx_map)
1982 init_index();
1983 if (!idx_map)
1984 return;
1986 for (i = idx_head.ofs_objects; i < idx_size; i += sizeof(struct rc_index_entry_ondisk)) {
1987 struct rc_index_entry *entry = RC_OBTAIN_INDEX_ENTRY(idx_map + i);
1989 if (!entry->is_start)
1990 continue;
1992 /* only include entries in 'which' slices */
1993 if (n) {
1994 int j;
1996 for (j = 0; j < n; j++)
1997 if (!hashcmp(idx_caches + entry->cache_index * 20, which + j * 20))
1998 break;
2000 if (j == n)
2001 continue;
2004 commit = lookup_commit(entry->sha1);
2005 if (!commit)
2006 continue;
2008 commit->object.flags |= flags;
2009 add_pending_object(revs, &commit->object, 0);
2015 struct slice_fd_time {
2016 unsigned char sha1[20];
2017 int fd;
2018 struct stat fi;
2021 int slice_time_sort(const void *a, const void *b)
2023 unsigned long at, bt;
2025 at = ((struct slice_fd_time *)a)->fi.st_ctime;
2026 bt = ((struct slice_fd_time *)b)->fi.st_ctime;
2028 if (at == bt)
2029 return 0;
2031 return at > bt ? 1 : -1;
2034 int regenerate_cache_index(struct rev_cache_info *rci)
2036 DIR *dirh;
2037 int i;
2038 struct slice_fd_time info;
2039 struct strbuf slices;
2041 /* first remove old index if it exists */
2042 unlink_or_warn(git_path("rev-cache/index"));
2044 strbuf_init(&slices, 0);
2046 dirh = opendir(git_path("rev-cache"));
2047 if (dirh) {
2048 struct dirent *de;
2049 struct stat fi;
2050 int fd;
2051 unsigned char sha1[20];
2053 while ((de = readdir(dirh))) {
2054 if (de->d_name[0] == '.')
2055 continue;
2057 if (get_sha1_hex(de->d_name, sha1))
2058 continue;
2060 /* open with RDWR because of mmap call in make_cache_index() */
2061 fd = open_cache_slice(sha1, O_RDONLY);
2062 if (fd < 0 || fstat(fd, &fi)) {
2063 warning("bad cache found [%s]; fuse recommended", de->d_name);
2064 if (fd > 0)
2065 close(fd);
2066 continue;
2069 hashcpy(info.sha1, sha1);
2070 info.fd = fd;
2071 memcpy(&info.fi, &fi, sizeof(struct stat));
2073 strbuf_add(&slices, &info, sizeof(info));
2076 closedir(dirh);
2079 /* we want oldest first -> upon overlap, older slices are more likely to have a larger section,
2080 * as of the overlapped commit */
2081 qsort(slices.buf, slices.len / sizeof(info), sizeof(info), slice_time_sort);
2083 for (i = 0; i < slices.len; i += sizeof(info)) {
2084 struct slice_fd_time *infop = (struct slice_fd_time *)(slices.buf + i);
2085 struct stat *fip = &infop->fi;
2086 int fd = infop->fd;
2088 if (make_cache_index(rci, infop->sha1, fd, fip->st_size) < 0)
2089 die("error writing cache");
2091 close(fd);
2094 strbuf_release(&slices);
2096 return 0;
2099 static int add_slices_for_fuse(struct rev_cache_info *rci, struct string_list *files, struct strbuf *ignore)
2101 unsigned char sha1[20];
2102 char base[PATH_MAX];
2103 int baselen, i, slice_nr = 0;
2104 struct stat fi;
2105 DIR *dirh;
2106 struct dirent *de;
2108 strncpy(base, git_path("rev-cache"), sizeof(base));
2109 baselen = strlen(base);
2111 dirh = opendir(base);
2112 if (!dirh)
2113 return 0;
2115 while ((de = readdir(dirh))) {
2116 if (de->d_name[0] == '.')
2117 continue;
2119 base[baselen] = '/';
2120 strncpy(base + baselen + 1, de->d_name, sizeof(base) - baselen - 1);
2122 if (get_sha1_hex(de->d_name, sha1)) {
2123 /* whatever it is, we don't need it... */
2124 string_list_insert(base, files);
2125 continue;
2128 /* _theoretically_ it is possible a slice < ignore_size to map objects not covered by, yet reachable from,
2129 * a slice >= ignore_size, meaning that we could potentially delete an 'unfused' slice; but if that
2130 * ever *did* happen their cache structure'd be so fucked up they might as well refuse the entire thing.
2131 * and at any rate the worst it'd do is make rev-list revert to standard walking in that (small) bit.
2133 if (rci->ignore_size) {
2134 if (stat(base, &fi))
2135 warning("can't query file %s\n", base);
2136 else if (fi.st_size >= rci->ignore_size) {
2137 strbuf_add(ignore, sha1, 20);
2138 continue;
2140 } else {
2141 /* check if a pointer */
2142 struct cache_slice_pointer ptr;
2143 int fd = open(base, O_RDONLY);
2145 if (fd < 0)
2146 goto dont_save;
2147 if (sizeof(ptr) != read_in_full(fd, &ptr, sizeof(ptr)))
2148 goto dont_save;
2150 close(fd);
2151 if (!strcmp(ptr.signature, "REVCOPTR")) {
2152 strbuf_add(ignore, sha1, 20);
2153 continue;
2157 dont_save:
2158 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
2159 if (!hashcmp(idx_caches + i * 20, sha1))
2160 break;
2163 if (i >= 0)
2164 rci->maps[i].size = 1;
2166 string_list_insert(base, files);
2167 slice_nr++;
2170 closedir(dirh);
2172 return slice_nr;
2175 /* the most work-intensive attributes in the cache are the unique objects and size, both
2176 * of which can be re-used. although path structures will be isomorphic, path generation is
2177 * not particularly expensive, and at any rate we need to re-sort the commits */
2178 int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
2180 unsigned char cache_sha1[20];
2181 struct string_list files = {0, 0, 0, 1}; /* dup */
2182 struct strbuf ignore;
2183 int i;
2185 maybe_fill_with_defaults(rci);
2187 if (!idx_map)
2188 init_index();
2189 if (!idx_map)
2190 return -1;
2192 strbuf_init(&ignore, 0);
2193 rci->maps = xcalloc(idx_head.cache_nr, sizeof(struct rev_cache_slice_map));
2194 if (add_slices_for_fuse(rci, &files, &ignore) <= 1) {
2195 printf("nothing to fuse\n");
2196 return 1;
2199 if (ignore.len) {
2200 starts_from_slices(revs, UNINTERESTING, (unsigned char *)ignore.buf, ignore.len / 20);
2201 strbuf_release(&ignore);
2204 /* initialize mappings */
2205 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
2206 struct rev_cache_slice_map *map = rci->maps + i;
2207 struct stat fi;
2208 struct rc_slice_header head;
2209 int fd;
2211 if (!map->size)
2212 continue;
2213 map->size = 0;
2215 /* pointers are never fused, so we can use open directly */
2216 fd = open(git_path("rev-cache/%s", sha1_to_hex(idx_caches + i * 20)), O_RDONLY);
2217 if (fd <= 0 || fstat(fd, &fi))
2218 continue;
2219 if (fi.st_size < sizeof(struct rc_slice_header))
2220 continue;
2221 if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
2222 continue;
2224 map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
2225 if (map->map == MAP_FAILED)
2226 continue;
2228 lseek(fd, head.size, SEEK_SET);
2229 map->names = xcalloc(head.name_size, 1);
2230 read_in_full(fd, map->names, head.name_size);
2232 close(fd);
2233 map->size = head.size;
2234 map->name_size = head.name_size;
2237 rci->make_index = 0;
2238 rci->fuse_me = 1;
2239 if (make_cache_slice(rci, revs, 0, 0, cache_sha1) < 0)
2240 die("can't make cache slice");
2242 printf("%s\n", sha1_to_hex(cache_sha1));
2244 /* clean up time! */
2245 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
2246 struct rev_cache_slice_map *map = rci->maps + i;
2248 if (!map->size)
2249 continue;
2251 free(map->names);
2252 munmap(map->map, map->size);
2254 free(rci->maps);
2255 cleanup_cache_slices();
2257 for (i = 0; i < files.nr; i++) {
2258 char *name = files.items[i].string;
2260 fprintf(stderr, "removing %s\n", name);
2261 unlink_or_warn(name);
2264 string_list_clear(&files, 0);
2266 return regenerate_cache_index(rci);
2269 static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
2271 struct rc_slice_header head;
2272 int fd, len, retval = -1;
2273 struct stat fi;
2275 len = strlen(slice_path);
2276 if (len < 40)
2277 return -2;
2278 if (get_sha1_hex(slice_path + len - 40, sha1))
2279 return -3;
2281 fd = open(slice_path, O_RDONLY);
2282 if (fd == -1)
2283 goto end;
2284 if (fstat(fd, &fi) || fi.st_size < sizeof(head))
2285 goto end;
2287 if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
2288 goto end;
2290 retval = 0;
2292 end:
2293 if (fd > 0)
2294 close(fd);
2296 return retval;
2299 int make_cache_slice_pointer(struct rev_cache_info *rci, const char *slice_path)
2301 struct cache_slice_pointer ptr;
2302 int fd;
2303 unsigned char sha1[20];
2305 maybe_fill_with_defaults(rci);
2306 rci->overwrite_all = 1;
2308 if (verify_cache_slice(slice_path, sha1) < 0)
2309 return -1;
2311 strcpy(ptr.signature, "REVCOPTR");
2312 ptr.version = SUPPORTED_REVCOPTR_VERSION;
2313 strcpy(ptr.path, make_nonrelative_path(slice_path));
2315 fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), O_RDWR | O_CREAT | O_TRUNC, 0666);
2316 if (fd < 0)
2317 return -2;
2319 write_in_full(fd, &ptr, sizeof(ptr));
2320 make_cache_index(rci, sha1, fd, sizeof(ptr));
2322 close(fd);
2324 return 0;