administrative functions for rev-cache, start of integration into git
[git/spearce.git] / rev-cache.c
blob8ca97d3ac1e8f9dfe123e75fb38ea8f86aa7ec6c
1 #include "cache.h"
2 #include "object.h"
3 #include "commit.h"
4 #include "tree.h"
5 #include "tree-walk.h"
6 #include "blob.h"
7 #include "tag.h"
8 #include "diff.h"
9 #include "revision.h"
10 #include "rev-cache.h"
11 #include "run-command.h"
12 #include "string-list.h"
14 struct cache_slice_pointer {
15 char signature[8]; /* REVCOPTR */
16 char version;
17 char path[PATH_MAX + 1];
20 /* list resembles pack index format */
21 static uint32_t fanout[0xff + 2];
23 static unsigned char *idx_map;
24 static int idx_size;
25 static struct rc_index_header idx_head;
26 static unsigned char *idx_caches;
27 static char no_idx;
29 static struct strbuf *acc_buffer;
31 #define SLOP 5
33 /* initialization */
35 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst)
37 static struct rc_index_entry entry[4];
38 static int cur;
40 if (!dst)
41 dst = &entry[cur++ & 0x3];
43 dst->sha1 = src->sha1;
44 dst->is_start = !!(src->flags & 0x80);
45 dst->cache_index = src->flags & 0x7f;
46 dst->pos = ntohl(src->pos);
48 return dst;
51 struct rc_index_entry_ondisk *to_disked_rc_index_entry(struct rc_index_entry *src, struct rc_index_entry_ondisk *dst)
53 static struct rc_index_entry_ondisk entry[4];
54 static int cur;
56 if (!dst)
57 dst = &entry[cur++ & 0x3];
59 if (dst->sha1 != src->sha1)
60 hashcpy(dst->sha1, src->sha1);
61 dst->flags = (unsigned char)src->is_start << 7 | (unsigned char)src->cache_index;
62 dst->pos = htonl(src->pos);
64 return dst;
67 struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondisk *src, struct rc_object_entry *dst)
69 static struct rc_object_entry entry[4];
70 static int cur;
72 if (!dst)
73 dst = &entry[cur++ & 0x3];
75 dst->type = src->flags >> 5;
76 dst->is_end = !!(src->flags & 0x10);
77 dst->is_start = !!(src->flags & 0x08);
78 dst->uninteresting = !!(src->flags & 0x04);
79 dst->include = !!(src->flags & 0x02);
80 dst->flag = !!(src->flags & 0x01);
82 dst->sha1 = src->sha1;
83 dst->merge_nr = src->merge_nr;
84 dst->split_nr = src->split_nr;
86 dst->size_size = src->sizes >> 5;
87 dst->padding = src->sizes & 0x1f;
89 dst->date = ntohl(src->date);
90 dst->path = ntohs(src->path);
92 return dst;
95 struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry *src, struct rc_object_entry_ondisk *dst)
97 static struct rc_object_entry_ondisk entry[4];
98 static int cur;
100 if (!dst)
101 dst = &entry[cur++ & 0x3];
103 dst->flags = (unsigned char)src->type << 5;
104 dst->flags |= (unsigned char)src->is_end << 4;
105 dst->flags |= (unsigned char)src->is_start << 3;
106 dst->flags |= (unsigned char)src->uninteresting << 2;
107 dst->flags |= (unsigned char)src->include << 1;
108 dst->flags |= (unsigned char)src->flag;
110 if (dst->sha1 != src->sha1)
111 hashcpy(dst->sha1, src->sha1);
112 dst->merge_nr = src->merge_nr;
113 dst->split_nr = src->split_nr;
115 dst->sizes = (unsigned char)src->size_size << 5;
116 dst->sizes |= (unsigned char)src->padding;
118 dst->date = htonl(src->date);
119 dst->path = htons(src->path);
121 return dst;
124 static int get_index_head(unsigned char *map, int len, struct rc_index_header *head, uint32_t *fanout, unsigned char **caches)
126 struct rc_index_header whead;
127 int i, index = sizeof(struct rc_index_header);
129 memcpy(&whead, map, sizeof(struct rc_index_header));
130 if (memcmp(whead.signature, "REVINDEX", 8) || whead.version != SUPPORTED_REVINDEX_VERSION)
131 return -1;
133 memcpy(head->signature, "REVINDEX", 8);
134 head->version = whead.version;
135 head->ofs_objects = ntohl(whead.ofs_objects);
136 head->object_nr = ntohl(whead.object_nr);
137 head->cache_nr = whead.cache_nr;
138 head->max_date = ntohl(whead.max_date);
140 if (len < index + head->cache_nr * 20 + 0x100 * sizeof(uint32_t))
141 return -2;
143 *caches = xmalloc(head->cache_nr * 20);
144 memcpy(*caches, map + index, head->cache_nr * 20);
145 index += head->cache_nr * 20;
147 memcpy(fanout, map + index, 0x100 * sizeof(uint32_t));
148 for (i = 0; i <= 0xff; i++)
149 fanout[i] = ntohl(fanout[i]);
150 fanout[0x100] = len;
152 return 0;
155 /* added in init_index */
156 static void cleanup_cache_slices(void)
158 if (idx_map) {
159 free(idx_caches);
160 munmap(idx_map, idx_size);
161 idx_map = 0;
166 static int init_index(void)
168 int fd;
169 struct stat fi;
171 fd = open(git_path("rev-cache/index"), O_RDONLY);
172 if (fd == -1 || fstat(fd, &fi))
173 goto end;
174 if (fi.st_size < sizeof(struct rc_index_header))
175 goto end;
177 idx_size = fi.st_size;
178 idx_map = xmmap(0, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
179 close(fd);
180 if (idx_map == MAP_FAILED)
181 goto end;
182 if (get_index_head(idx_map, fi.st_size, &idx_head, fanout, &idx_caches))
183 goto end;
185 atexit(cleanup_cache_slices);
187 return 0;
189 end:
190 idx_map = 0;
191 no_idx = 1;
192 return -1;
195 /* this assumes index is already loaded */
196 static struct rc_index_entry_ondisk *search_index_1(unsigned char *sha1)
198 int start, end, starti, endi, i, len, r;
199 struct rc_index_entry_ondisk *ie;
201 if (!idx_map)
202 return 0;
204 /* binary search */
205 start = fanout[(int)sha1[0]];
206 end = fanout[(int)sha1[0] + 1];
207 len = (end - start) / sizeof(struct rc_index_entry_ondisk);
208 if (!len || len * sizeof(struct rc_index_entry_ondisk) != end - start)
209 return 0;
211 starti = 0;
212 endi = len - 1;
213 for (;;) {
214 i = (endi + starti) / 2;
215 ie = (struct rc_index_entry_ondisk *)(idx_map + start + i * sizeof(struct rc_index_entry_ondisk));
216 r = hashcmp(sha1, ie->sha1);
218 if (r) {
219 if (starti + 1 == endi) {
220 starti++;
221 continue;
222 } else if (starti == endi)
223 break;
225 if (r > 0)
226 starti = i;
227 else /* r < 0 */
228 endi = i;
229 } else
230 return ie;
233 return 0;
236 static struct rc_index_entry *search_index(unsigned char *sha1)
238 struct rc_index_entry_ondisk *ied = search_index_1(sha1);
240 if (ied)
241 return from_disked_rc_index_entry(ied, 0);
243 return 0;
246 unsigned char *get_cache_slice(struct commit *commit)
248 struct rc_index_entry *ie;
250 if (!idx_map) {
251 if (no_idx)
252 return 0;
253 init_index();
256 if (commit->date > idx_head.max_date)
257 return 0;
259 ie = search_index(commit->object.sha1);
260 if (ie && ie->cache_index < idx_head.cache_nr)
261 return idx_caches + ie->cache_index * 20;
263 return 0;
267 /* traversal */
269 static unsigned long decode_size(unsigned char *str, int len);
271 static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsigned char *ptr, struct rc_object_entry *entry)
273 struct blob *blob;
274 struct tree *tree;
275 struct object *obj;
276 unsigned long size;
278 size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
279 switch (entry->type) {
280 case OBJ_TREE :
281 if (!revs->tree_objects)
282 return;
284 tree = lookup_tree(entry->sha1);
285 if (!tree)
286 return;
288 tree->size = size;
289 commit->tree = tree;
290 obj = (struct object *)tree;
291 break;
293 case OBJ_BLOB :
294 if (!revs->blob_objects)
295 return;
297 blob = lookup_blob(entry->sha1);
298 if (!blob)
299 return;
301 obj = (struct object *)blob;
302 break;
304 default :
305 /* tag objects aren't really supposed to be here */
306 return;
309 obj->flags |= FACE_VALUE;
310 add_pending_object(revs, obj, "");
313 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work)
315 struct rc_index_entry *iep;
316 struct rc_object_entry *oep;
317 struct commit_list *prev, *wp, **wpp;
318 int retval;
320 iep = search_index(commit->object.sha1), 0;
321 oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
323 /* the .uniniteresting bit isn't strictly necessary, as we check the object during traversal as well,
324 * but we might as well initialize it while we're at it */
325 oep->include = 1;
326 oep->uninteresting = !!(commit->object.flags & UNINTERESTING);
327 to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
328 retval = iep->pos;
330 /* include any others in the work array */
331 prev = 0;
332 wpp = work;
333 wp = *work;
334 while (wp) {
335 struct object *obj = &wp->item->object;
336 struct commit *co;
338 /* is this in our cache slice? */
339 iep = search_index(obj->sha1);
340 if (!iep || hashcmp(idx_caches + iep->cache_index * 20, head->sha1)) {
341 prev = wp;
342 wp = wp->next;
343 wpp = &wp;
344 continue;
347 if (iep->pos < retval)
348 retval = iep->pos;
350 oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
352 /* mark this for later */
353 oep->include = 1;
354 oep->uninteresting = !!(obj->flags & UNINTERESTING);
355 to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
357 /* remove from work list */
358 co = pop_commit(wpp);
359 wp = *wpp;
360 if (prev)
361 prev->next = wp;
364 return retval;
367 #define IPATH 0x40
368 #define UPATH 0x80
370 #define GET_COUNT(x) ((x) & 0x3f)
371 #define SET_COUNT(x, s) ((x) = ((x) & ~0x3f) | ((s) & 0x3f))
373 static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *map,
374 struct rev_info *revs, struct commit *commit,
375 unsigned long *date_so_far, int *slop_so_far,
376 struct commit_list ***queue, struct commit_list **work)
378 struct commit_list *insert_cache = 0;
379 struct commit **last_objects, *co;
380 int i, total_path_nr = head->path_nr, retval = -1;
381 char consume_children = 0;
382 unsigned char *paths;
384 i = setup_traversal(head, map, commit, work);
385 if (i < 0)
386 return -1;
388 paths = xcalloc(total_path_nr, sizeof(uint16_t));
389 last_objects = xcalloc(total_path_nr, sizeof(struct commit *));
391 /* i already set */
392 while (i < head->size) {
393 struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
394 int path = entry->path;
395 struct object *obj;
396 int index = i;
398 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
400 /* add extra objects if necessary */
401 if (entry->type != OBJ_COMMIT) {
402 if (consume_children)
403 handle_noncommit(revs, co, map + index, entry);
405 continue;
406 } else
407 consume_children = 0;
409 if (path >= total_path_nr)
410 goto end;
412 /* in one of our branches?
413 * uninteresting trumps interesting */
414 if (entry->include)
415 paths[path] |= entry->uninteresting ? UPATH : IPATH;
416 else if (!paths[path])
417 continue;
419 /* date stuff */
420 if (revs->max_age != -1 && entry->date < revs->max_age)
421 paths[path] |= UPATH;
423 /* lookup object */
424 co = lookup_commit(entry->sha1);
425 obj = &co->object;
427 if (obj->flags & UNINTERESTING)
428 paths[path] |= UPATH;
430 if ((paths[path] & IPATH) && (paths[path] & UPATH)) {
431 paths[path] = UPATH;
433 /* mark edge */
434 if (last_objects[path]) {
435 parse_commit(last_objects[path]);
437 /* we needn't worry about the unique field; that will be valid as
438 * long as we're not a end entry */
439 last_objects[path]->object.flags &= ~FACE_VALUE;
440 last_objects[path] = 0;
444 /* now we gotta re-assess the whole interesting thing... */
445 entry->uninteresting = !!(paths[path] & UPATH);
447 /* first close paths */
448 if (entry->split_nr) {
449 int j, off = index + sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE(entry->merge_nr);
451 for (j = 0; j < entry->split_nr; j++) {
452 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
454 if (p >= total_path_nr)
455 goto end;
457 /* boundary commit? */
458 if ((paths[p] & IPATH) && entry->uninteresting) {
459 if (last_objects[p]) {
460 parse_commit(last_objects[p]);
462 last_objects[p]->object.flags &= ~FACE_VALUE;
463 last_objects[p] = 0;
465 } else if (last_objects[p] && !last_objects[p]->object.parsed)
466 commit_list_insert(co, &last_objects[p]->parents);
468 /* can't close a merge path until all are parents have been encountered */
469 if (GET_COUNT(paths[p])) {
470 SET_COUNT(paths[p], GET_COUNT(paths[p]) - 1);
472 if (GET_COUNT(paths[p]))
473 continue;
476 paths[p] = 0;
477 last_objects[p] = 0;
481 /* make topo relations */
482 if (last_objects[path] && !last_objects[path]->object.parsed)
483 commit_list_insert(co, &last_objects[path]->parents);
485 /* initialize commit */
486 if (!entry->is_end) {
487 co->date = entry->date;
488 obj->flags |= ADDED | FACE_VALUE;
489 } else
490 parse_commit(co);
492 obj->flags |= SEEN;
494 if (entry->uninteresting)
495 obj->flags |= UNINTERESTING;
497 /* we need to know what the edges are */
498 last_objects[path] = co;
500 /* add to list */
501 if (!(obj->flags & UNINTERESTING) || revs->show_all) {
502 if (entry->is_end)
503 insert_by_date_cached(co, work, insert_cache, &insert_cache);
504 else
505 *queue = &commit_list_insert(co, *queue)->next;
507 /* add children to list as well */
508 if (obj->flags & UNINTERESTING)
509 consume_children = 0;
510 else
511 consume_children = 1;
514 /* open parents */
515 if (entry->merge_nr) {
516 int j, off = index + sizeof(struct rc_object_entry_ondisk);
517 char flag = entry->uninteresting ? UPATH : IPATH;
519 for (j = 0; j < entry->merge_nr; j++) {
520 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
522 if (p >= total_path_nr)
523 goto end;
525 if (paths[p] & flag)
526 continue;
528 paths[p] |= flag;
531 /* make sure we don't use this path before all our parents have had their say */
532 SET_COUNT(paths[path], entry->merge_nr);
537 retval = 0;
539 end:
540 free(paths);
541 free(last_objects);
543 return retval;
546 static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
548 int t;
550 memcpy(head, map, sizeof(struct rc_slice_header));
551 head->ofs_objects = ntohl(head->ofs_objects);
552 head->object_nr = ntohl(head->object_nr);
553 head->size = ntohl(head->size);
554 head->path_nr = ntohs(head->path_nr);
556 if (memcmp(head->signature, "REVCACHE", 8))
557 return -1;
558 if (head->version != SUPPORTED_REVCACHE_VERSION)
559 return -2;
560 if (hashcmp(head->sha1, cache_sha1))
561 return -3;
562 t = sizeof(struct rc_slice_header);
563 if (t != head->ofs_objects || t >= len)
564 return -4;
566 head->size = len;
568 return 0;
571 int open_cache_slice(unsigned char *sha1, int flags)
573 int fd;
574 char signature[8];
576 fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), flags);
577 if (fd <= 0)
578 goto end;
580 if (read(fd, signature, 8) != 8)
581 goto end;
583 /* a normal revision slice */
584 if (!memcmp(signature, "REVCACHE", 8)) {
585 lseek(fd, 0, SEEK_SET);
586 return fd;
589 /* slice pointer */
590 if (!memcmp(signature, "REVCOPTR", 8)) {
591 struct cache_slice_pointer ptr;
593 lseek(fd, 0, SEEK_SET);
594 if (read_in_full(fd, &ptr, sizeof(ptr)) != sizeof(ptr))
595 goto end;
597 if (ptr.version != SUPPORTED_REVCOPTR_VERSION)
598 goto end;
600 close(fd);
601 fd = open(ptr.path, flags);
603 return fd;
606 end:
607 if (fd > 0)
608 close(fd);
610 return -1;
613 int traverse_cache_slice(struct rev_info *revs,
614 unsigned char *cache_sha1, struct commit *commit,
615 unsigned long *date_so_far, int *slop_so_far,
616 struct commit_list ***queue, struct commit_list **work)
618 int fd = -1, retval = -3;
619 struct stat fi;
620 struct rc_slice_header head;
621 struct rev_cache_info *rci;
622 unsigned char *map = MAP_FAILED;
624 /* the index should've been loaded already to find cache_sha1, but it's good
625 * to be absolutely sure... */
626 if (!idx_map)
627 init_index();
628 if (!idx_map)
629 return -1;
631 /* load options */
632 rci = &revs->rev_cache_info;
634 memset(&head, 0, sizeof(struct rc_slice_header));
636 fd = open_cache_slice(cache_sha1, O_RDONLY);
637 if (fd == -1)
638 goto end;
639 if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
640 goto end;
642 map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
643 if (map == MAP_FAILED)
644 goto end;
645 if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
646 goto end;
648 retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
650 end:
651 if (map != MAP_FAILED)
652 munmap(map, fi.st_size);
653 if (fd != -1)
654 close(fd);
656 return retval;
661 /* generation */
663 static int is_endpoint(struct commit *commit)
665 struct commit_list *list = commit->parents;
667 while (list) {
668 if (!(list->item->object.flags & UNINTERESTING))
669 return 0;
671 list = list->next;
674 return 1;
677 /* ensures branch is self-contained: parents are either all interesting or all uninteresting */
678 static void make_legs(struct rev_info *revs)
680 struct commit_list *list, **plist;
681 int total = 0;
683 /* attach plist to end of commits list */
684 list = revs->commits;
685 while (list && list->next)
686 list = list->next;
688 if (list)
689 plist = &list->next;
690 else
691 return;
693 /* duplicates don't matter, as get_revision() ignores them */
694 for (list = revs->commits; list; list = list->next) {
695 struct commit *item = list->item;
696 struct commit_list *parents = item->parents;
698 if (item->object.flags & UNINTERESTING)
699 continue;
700 if (is_endpoint(item))
701 continue;
703 while (parents) {
704 struct commit *p = parents->item;
705 parents = parents->next;
707 if (!(p->object.flags & UNINTERESTING))
708 continue;
710 p->object.flags &= ~UNINTERESTING;
711 parse_commit(p);
712 plist = &commit_list_insert(p, plist)->next;
714 if (!(p->object.flags & SEEN))
715 total++;
719 if (total)
720 sort_in_topological_order(&revs->commits, 1);
725 struct path_track {
726 struct commit *commit;
727 int path; /* for keeping track of children */
729 struct path_track *next, *prev;
732 static unsigned char *paths;
733 static int path_nr = 1, path_sz;
735 static struct path_track *path_track;
736 static struct path_track *path_track_alloc;
738 #define PATH_IN_USE 0x80 /* biggest bit we can get as a char */
740 static int get_new_path(void)
742 int i;
744 for (i = 1; i < path_nr; i++)
745 if (!paths[i])
746 break;
748 if (i == path_nr) {
749 if (path_nr >= path_sz) {
750 path_sz += 50;
751 paths = xrealloc(paths, path_sz);
752 memset(paths + path_sz - 50, 0, 50);
754 path_nr++;
757 paths[i] = PATH_IN_USE;
758 return i;
761 static void remove_path_track(struct path_track **ppt, char total_free)
763 struct path_track *t = *ppt;
765 if (t->next)
766 t->next->prev = t->prev;
767 if (t->prev)
768 t->prev->next = t->next;
770 t = t->next;
772 if (total_free)
773 free(*ppt);
774 else {
775 (*ppt)->next = path_track_alloc;
776 path_track_alloc = *ppt;
779 *ppt = t;
782 static struct path_track *make_path_track(struct path_track **head, struct commit *commit)
784 struct path_track *pt;
786 if (path_track_alloc) {
787 pt = path_track_alloc;
788 path_track_alloc = pt->next;
789 } else
790 pt = xmalloc(sizeof(struct path_track));
792 memset(pt, 0, sizeof(struct path_track));
793 pt->commit = commit;
795 pt->next = *head;
796 if (*head)
797 (*head)->prev = pt;
798 *head = pt;
800 return pt;
803 static void add_path_to_track(struct commit *commit, int path)
805 make_path_track(&path_track, commit);
806 path_track->path = path;
809 static void handle_paths(struct commit *commit, struct rc_object_entry *object, struct strbuf *merge_str, struct strbuf *split_str)
811 int child_nr, parent_nr, open_parent_nr, this_path;
812 struct commit_list *list;
813 struct commit *first_parent;
814 struct path_track **ppt, *pt;
816 /* we can only re-use a closed path once all it's children have been encountered,
817 * as we need to keep track of commit boundaries */
818 ppt = &path_track;
819 pt = *ppt;
820 child_nr = 0;
821 while (pt) {
822 if (pt->commit == commit) {
823 uint16_t write_path;
825 if (paths[pt->path] != PATH_IN_USE)
826 paths[pt->path]--;
828 /* make sure we can handle this */
829 child_nr++;
830 if (child_nr > 0x7f)
831 die("%s: too many branches! rev-cache can only handle %d parents/children per commit",
832 sha1_to_hex(object->sha1), 0x7f);
834 /* add to split list */
835 object->split_nr++;
836 write_path = htons((uint16_t)pt->path);
837 strbuf_add(split_str, &write_path, sizeof(uint16_t));
839 remove_path_track(ppt, 0);
840 pt = *ppt;
841 } else {
842 pt = pt->next;
843 ppt = &pt;
847 /* initialize our self! */
848 if (!commit->indegree) {
849 commit->indegree = get_new_path();
850 object->is_start = 1;
853 this_path = commit->indegree;
854 paths[this_path] = PATH_IN_USE;
855 object->path = this_path;
857 /* count interesting parents */
858 parent_nr = open_parent_nr = 0;
859 first_parent = 0;
860 for (list = commit->parents; list; list = list->next) {
861 if (list->item->object.flags & UNINTERESTING) {
862 object->is_end = 1;
863 continue;
866 parent_nr++;
867 if (!list->item->indegree)
868 open_parent_nr++;
869 if (!first_parent)
870 first_parent = list->item;
873 if (!parent_nr)
874 return;
876 if (parent_nr == 1 && open_parent_nr == 1) {
877 first_parent->indegree = this_path;
878 return;
881 /* bail out on obscene parent/child #s */
882 if (parent_nr > 0x7f)
883 die("%s: too many parents in merge! rev-cache can only handle %d parents/children per commit",
884 sha1_to_hex(object->sha1), 0x7f);
886 /* make merge list */
887 object->merge_nr = parent_nr;
888 paths[this_path] = parent_nr;
890 for (list = commit->parents; list; list = list->next) {
891 struct commit *p = list->item;
892 uint16_t write_path;
894 if (p->object.flags & UNINTERESTING)
895 continue;
897 /* unfortunately due to boundary tracking we can't re-use merge paths
898 * (unable to guarantee last parent path = this -> last won't always be able to
899 * set this as a boundary object */
900 if (!p->indegree)
901 p->indegree = get_new_path();
903 write_path = htons((uint16_t)p->indegree);
904 strbuf_add(merge_str, &write_path, sizeof(uint16_t));
906 /* make sure path is properly ended */
907 add_path_to_track(p, this_path);
913 static int encode_size(unsigned long size, unsigned char *out)
915 int len = 0;
917 while (size) {
918 *out++ = (unsigned char)(size & 0xff);
919 size >>= 8;
920 len++;
923 return len;
926 static unsigned long decode_size(unsigned char *str, int len)
928 unsigned long size = 0;
929 int shift = 0;
931 while (len--) {
932 size |= (unsigned long)*str << shift;
933 shift += 8;
934 str++;
937 return size;
940 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
941 struct strbuf *merge_str, struct strbuf *split_str)
943 struct rc_object_entry entry;
944 unsigned char size_str[7];
945 unsigned long size;
946 enum object_type type;
947 void *data;
949 if (entryp)
950 sha1 = entryp->sha1;
952 /* retrieve size data */
953 data = read_sha1_file(sha1, &type, &size);
955 if (data)
956 free(data);
958 /* initialize! */
959 if (!entryp) {
960 memset(&entry, 0, sizeof(entry));
961 entry.sha1 = (unsigned char *)sha1;
962 entry.type = type;
964 if (merge_str)
965 entry.merge_nr = merge_str->len / sizeof(uint16_t);
966 if (split_str)
967 entry.split_nr = split_str->len / sizeof(uint16_t);
969 entryp = &entry;
972 entryp->size_size = encode_size(size, size_str);
974 /* write the muvabitch */
975 strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
977 if (merge_str)
978 strbuf_add(acc_buffer, merge_str->buf, merge_str->len);
979 if (split_str)
980 strbuf_add(acc_buffer, split_str->buf, split_str->len);
982 strbuf_add(acc_buffer, size_str, entryp->size_size);
985 /* returns non-zero to continue parsing, 0 to skip */
986 typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
988 /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
989 static int dump_tree(struct tree *tree, dump_tree_fn fn)
991 struct tree_desc desc;
992 struct name_entry entry;
993 struct tree *subtree;
994 int r;
996 if (parse_tree(tree))
997 return -1;
999 init_tree_desc(&desc, tree->buffer, tree->size);
1000 while (tree_entry(&desc, &entry)) {
1001 switch (fn(entry.sha1, entry.path, entry.mode)) {
1002 case 0 :
1003 goto continue_loop;
1004 default :
1005 break;
1008 if (S_ISDIR(entry.mode)) {
1009 subtree = lookup_tree(entry.sha1);
1010 if (!subtree)
1011 return -2;
1013 if ((r = dump_tree(subtree, fn)) < 0)
1014 return r;
1017 continue_loop:
1018 continue;
1021 return 0;
1024 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
1026 strbuf_add(acc_buffer, sha1, 20);
1028 return 1;
1031 static void tree_addremove(struct diff_options *options,
1032 int whatnow, unsigned mode,
1033 const unsigned char *sha1,
1034 const char *concatpath)
1036 if (whatnow != '+')
1037 return;
1039 strbuf_add(acc_buffer, sha1, 20);
1042 static void tree_change(struct diff_options *options,
1043 unsigned old_mode, unsigned new_mode,
1044 const unsigned char *old_sha1,
1045 const unsigned char *new_sha1,
1046 const char *concatpath)
1048 if (!hashcmp(old_sha1, new_sha1))
1049 return;
1051 strbuf_add(acc_buffer, new_sha1, 20);
1054 static int add_unique_objects(struct commit *commit)
1056 struct commit_list *list;
1057 struct strbuf os, ost, *orig_buf;
1058 struct diff_options opts;
1059 int i, j, next;
1060 char is_first = 1;
1062 /* ...no, calculate unique objects */
1063 strbuf_init(&os, 0);
1064 strbuf_init(&ost, 0);
1065 orig_buf = acc_buffer;
1067 diff_setup(&opts);
1068 DIFF_OPT_SET(&opts, RECURSIVE);
1069 DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
1070 opts.change = tree_change;
1071 opts.add_remove = tree_addremove;
1073 /* this is only called for non-ends (ie. all parents interesting) */
1074 for (list = commit->parents; list; list = list->next) {
1075 if (is_first)
1076 acc_buffer = &os;
1077 else
1078 acc_buffer = &ost;
1080 strbuf_setlen(acc_buffer, 0);
1081 diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
1082 qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
1084 /* take intersection */
1085 if (!is_first) {
1086 for (next = i = j = 0; i < os.len; i += 20) {
1087 while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
1088 j += 20;
1090 if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
1091 continue;
1093 if (next != i)
1094 memcpy(os.buf + next, os.buf + i, 20);
1095 next += 20;
1098 if (next != i)
1099 strbuf_setlen(&os, next);
1100 } else
1101 is_first = 0;
1104 /* no parents (!) */
1105 if (is_first) {
1106 acc_buffer = &os;
1107 dump_tree(commit->tree, dump_tree_callback);
1110 /* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
1111 acc_buffer = orig_buf;
1112 for (i = 0; i < os.len; i += 20)
1113 add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
1115 /* last but not least, the main tree */
1116 add_object_entry(commit->tree->object.sha1, 0, 0, 0);
1118 return i / 20 + 1;
1121 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
1123 unsigned char *map = mapping->map;
1124 int i = *index, object_nr = 0;
1125 struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1127 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1128 while (i < mapping->size) {
1129 int pos = i;
1131 entry = RC_OBTAIN_OBJECT_ENTRY(map + i;
1132 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1134 if (entry->type == OBJ_COMMIT) {
1135 *index = pos;
1136 return object_nr;
1139 strbuf_add(acc_buffer, map + pos, i - pos);
1140 object_nr++;
1143 *index = 0;
1144 return object_nr;
1147 static int add_objects_verbatim(struct rev_cache_info *rci, struct commit *commit)
1149 struct rev_cache_slice_map *map;
1150 char found = 0;
1151 struct rc_index_entry *ie;
1152 struct rc_object_entry *entry;
1153 int object_nr, i;
1155 if (!rci->maps)
1156 return -1;
1158 /* check if we can continue where we left off */
1159 map = rci->last_map;
1160 if (!map)
1161 goto search_me;
1163 i = map->last_index;
1164 entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1165 if (hashcmp(entry->sha1, commit->object.sha1))
1166 goto search_me;
1168 found = 1;
1170 search_me:
1171 if (!found) {
1172 ie = search_index(commit->object.sha1);
1173 if (!ie || ie->cache_index >= idx_head.cache_nr)
1174 return -2;
1176 map = rci->maps + ie->cache_index;
1177 if (!map->size)
1178 return -3;
1180 i = ie->pos;
1181 entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1182 if (entry->type != OBJ_COMMIT || hashcmp(entry->sha1, commit->object.sha1))
1183 return -4;
1186 /* can't handle end commits */
1187 if (entry->is_end)
1188 return -5;
1190 object_nr = add_objects_verbatim_1(map, &i);
1192 /* remember this */
1193 if (i) {
1194 rci->last_map = map;
1195 map->last_index = i;
1196 } else
1197 rci->last_map = 0;
1199 return object_nr;
1202 static void init_revcache_directory(void)
1204 struct stat fi;
1206 if (stat(git_path("rev-cache"), &fi) || !S_ISDIR(fi.st_mode))
1207 if (mkdir(git_path("rev-cache"), 0777))
1208 die("can't make rev-cache directory");
1212 void init_rev_cache_info(struct rev_cache_info *rci)
1214 memset(rci, 0, sizeof(struct rev_cache_info));
1216 rci->objects = 1;
1217 rci->legs = 0;
1218 rci->make_index = 1;
1219 rci->fuse_me = 0;
1221 rci->overwrite_all = 0;
1223 rci->add_to_pending = 1;
1225 rci->ignore_size = 0;
1228 void maybe_fill_with_defaults(struct rev_cache_info *rci)
1230 static struct rev_cache_info def_rci;
1232 if (rci)
1233 return;
1235 init_rev_cache_info(&def_rci);
1236 rci = &def_rci;
1239 int make_cache_slice(struct rev_cache_info *rci,
1240 struct rev_info *revs, struct commit_list **starts, struct commit_list **ends,
1241 unsigned char *cache_sha1)
1243 struct rev_info therevs;
1244 struct strbuf buffer, startlist, endlist;
1245 struct rc_slice_header head;
1246 struct commit *commit;
1247 unsigned char sha1[20];
1248 struct strbuf merge_paths, split_paths;
1249 int object_nr, total_sz, fd;
1250 char file[PATH_MAX], *newfile;
1251 struct rev_cache_info *trci;
1252 git_SHA_CTX ctx;
1254 maybe_fill_with_defaults(rci);
1256 init_revcache_directory();
1257 strcpy(file, git_path("rev-cache/XXXXXX"));
1258 fd = xmkstemp(file);
1260 strbuf_init(&buffer, 0);
1261 strbuf_init(&startlist, 0);
1262 strbuf_init(&endlist, 0);
1263 strbuf_init(&merge_paths, 0);
1264 strbuf_init(&split_paths, 0);
1265 acc_buffer = &buffer;
1267 if (!revs) {
1268 revs = &therevs;
1269 init_revisions(revs, 0);
1271 /* we're gonna assume no one else has already traversed this... */
1272 while ((commit = pop_commit(starts)))
1273 add_pending_object(revs, &commit->object, 0);
1275 while ((commit = pop_commit(ends))) {
1276 commit->object.flags |= UNINTERESTING;
1277 add_pending_object(revs, &commit->object, 0);
1281 /* write head placeholder */
1282 memset(&head, 0, sizeof(head));
1283 head.ofs_objects = htonl(sizeof(head));
1284 xwrite(fd, &head, sizeof(head));
1286 /* init revisions! */
1287 revs->tree_objects = 1;
1288 revs->blob_objects = 1;
1289 revs->topo_order = 1;
1290 revs->lifo = 1;
1292 /* re-use info from other caches if possible */
1293 trci = &revs->rev_cache_info;
1294 init_rev_cache_info(trci);
1295 trci->add_to_pending = 0;
1297 setup_revisions(0, 0, revs, 0);
1298 if (prepare_revision_walk(revs))
1299 die("died preparing revision walk");
1301 if (rci->legs)
1302 make_legs(revs);
1304 object_nr = total_sz = 0;
1305 while ((commit = get_revision(revs)) != 0) {
1306 struct rc_object_entry object;
1307 int t;
1309 strbuf_setlen(&merge_paths, 0);
1310 strbuf_setlen(&split_paths, 0);
1312 memset(&object, 0, sizeof(object));
1313 object.type = OBJ_COMMIT;
1314 object.date = commit->date;
1315 object.sha1 = commit->object.sha1;
1317 handle_paths(commit, &object, &merge_paths, &split_paths);
1319 if (object.is_end) {
1320 strbuf_add(&endlist, object.sha1, 20);
1321 if (ends)
1322 commit_list_insert(commit, ends);
1324 /* the two *aren't* mutually exclusive */
1325 if (object.is_start) {
1326 strbuf_add(&startlist, object.sha1, 20);
1327 if (starts)
1328 commit_list_insert(commit, starts);
1331 commit->indegree = 0;
1333 add_object_entry(0, &object, &merge_paths, &split_paths);
1334 object_nr++;
1336 if (rci->objects && !object.is_end) {
1337 if (rci->fuse_me && (t = add_objects_verbatim(rci, commit)) >= 0)
1338 /* yay! we did it! */
1339 object_nr += t;
1340 else
1341 /* add all unique children for this commit */
1342 object_nr += add_unique_objects(commit);
1345 /* print every ~1MB or so */
1346 if (buffer.len > 1000000) {
1347 write_in_full(fd, buffer.buf, buffer.len);
1348 total_sz += buffer.len;
1350 strbuf_setlen(&buffer, 0);
1354 if (buffer.len) {
1355 write_in_full(fd, buffer.buf, buffer.len);
1356 total_sz += buffer.len;
1359 /* go ahead a free some stuff... */
1360 strbuf_release(&buffer);
1361 strbuf_release(&merge_paths);
1362 strbuf_release(&split_paths);
1363 if (path_sz)
1364 free(paths);
1365 while (path_track_alloc)
1366 remove_path_track(&path_track_alloc, 1);
1368 /* the meaning of the hash name is more or less irrelevant, it's the uniqueness that matters */
1369 strbuf_add(&endlist, startlist.buf, startlist.len);
1370 git_SHA1_Init(&ctx);
1371 git_SHA1_Update(&ctx, endlist.buf, endlist.len);
1372 git_SHA1_Final(sha1, &ctx);
1374 /* now actually initialize header */
1375 strcpy(head.signature, "REVCACHE");
1376 head.version = SUPPORTED_REVCACHE_VERSION;
1378 head.object_nr = htonl(object_nr);
1379 head.size = htonl(ntohl(head.ofs_objects) + total_sz);
1380 head.path_nr = htons(path_nr);
1381 hashcpy(head.sha1, sha1);
1383 /* some info! */
1384 fprintf(stderr, "objects: %d\n", object_nr);
1385 fprintf(stderr, "paths: %d\n", path_nr);
1387 lseek(fd, 0, SEEK_SET);
1388 xwrite(fd, &head, sizeof(head));
1390 if (rci->make_index && make_cache_index(rci, sha1, fd, ntohl(head.size)) < 0)
1391 die("can't update index");
1393 close(fd);
1395 newfile = git_path("rev-cache/%s", sha1_to_hex(sha1));
1396 if (rename(file, newfile))
1397 die("can't move temp file");
1399 /* let our caller know what we've just made */
1400 if (cache_sha1)
1401 hashcpy(cache_sha1, sha1);
1403 strbuf_release(&endlist);
1404 strbuf_release(&startlist);
1406 return 0;
1410 static int index_sort_hash(const void *a, const void *b)
1412 return hashcmp(((struct rc_index_entry_ondisk *)a)->sha1, ((struct rc_index_entry_ondisk *)b)->sha1);
1415 static int write_cache_index(struct strbuf *body)
1417 struct rc_index_header whead;
1418 struct lock_file *lk;
1419 int fd, i;
1421 /* clear index map if loaded */
1422 if (idx_map) {
1423 munmap(idx_map, idx_size);
1424 idx_map = 0;
1427 lk = xcalloc(sizeof(struct lock_file), 1);
1428 fd = hold_lock_file_for_update(lk, git_path("rev-cache/index"), 0);
1429 if (fd < 0) {
1430 free(lk);
1431 return -1;
1434 /* endianness yay! */
1435 memset(&whead, 0, sizeof(whead));
1436 memcpy(whead.signature, "REVINDEX", 8);
1437 whead.version = idx_head.version;
1438 whead.ofs_objects = htonl(idx_head.ofs_objects);
1439 whead.object_nr = htonl(idx_head.object_nr);
1440 whead.cache_nr = idx_head.cache_nr;
1441 whead.max_date = htonl(idx_head.max_date);
1443 write(fd, &whead, sizeof(struct rc_index_header));
1444 write_in_full(fd, idx_caches, idx_head.cache_nr * 20);
1446 for (i = 0; i <= 0xff; i++)
1447 fanout[i] = htonl(fanout[i]);
1448 write_in_full(fd, fanout, 0x100 * sizeof(uint32_t));
1450 write_in_full(fd, body->buf, body->len);
1452 if (commit_lock_file(lk) < 0)
1453 return -2;
1455 /* lk freed by lockfile.c */
1457 return 0;
1460 int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
1461 int fd, unsigned int size)
1463 struct strbuf buffer;
1464 int i, cache_index, cur;
1465 unsigned char *map;
1466 unsigned long max_date;
1468 maybe_fill_with_defaults(rci);
1470 if (!idx_map)
1471 init_index();
1473 lseek(fd, 0, SEEK_SET);
1474 map = xmmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1475 if (map == MAP_FAILED)
1476 return -1;
1478 strbuf_init(&buffer, 0);
1479 if (idx_map) {
1480 strbuf_add(&buffer, idx_map + fanout[0], fanout[0x100] - fanout[0]);
1481 } else {
1482 /* not an update */
1483 memset(&idx_head, 0, sizeof(struct rc_index_header));
1484 idx_caches = 0;
1486 strcpy(idx_head.signature, "REVINDEX");
1487 idx_head.version = SUPPORTED_REVINDEX_VERSION;
1488 idx_head.ofs_objects = sizeof(struct rc_index_header) + 0x100 * sizeof(uint32_t);
1491 /* are we remaking a slice? */
1492 for (i = 0; i < idx_head.cache_nr; i++)
1493 if (!hashcmp(idx_caches + i * 20, cache_sha1))
1494 break;
1496 if (i == idx_head.cache_nr) {
1497 cache_index = idx_head.cache_nr++;
1498 idx_head.ofs_objects += 20;
1500 idx_caches = xrealloc(idx_caches, idx_head.cache_nr * 20);
1501 hashcpy(idx_caches + cache_index * 20, cache_sha1);
1502 } else
1503 cache_index = i;
1505 i = sizeof(struct rc_slice_header); /* offset */
1506 max_date = idx_head.max_date;
1507 while (i < size) {
1508 struct rc_index_entry index_entry, *entry;
1509 struct rc_index_entry_ondisk *disked_entry;
1510 struct rc_object_entry *object_entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1511 unsigned long date;
1512 int off, pos = i;
1514 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(object_entry);
1516 if (object_entry->type != OBJ_COMMIT)
1517 continue;
1519 /* don't include ends; otherwise we'll find ourselves in loops */
1520 if (object_entry->is_end)
1521 continue;
1523 /* handle index duplication
1524 * -> keep old copy unless new one is a start -- based on expected usage, older ones will be more
1525 * likely to lead to greater slice traversals than new ones */
1526 date = object_entry->date;
1527 if (date > idx_head.max_date) {
1528 disked_entry = 0;
1529 if (date > max_date)
1530 max_date = date;
1531 } else
1532 disked_entry = search_index_1(object_entry->sha1);
1534 if (disked_entry && !object_entry->is_start && !rci->overwrite_all)
1535 continue;
1536 else if (disked_entry) {
1537 /* mmm, pointer arithmetic... tasty */ /* (entry - idx_map = offset, so cast is valid) */
1538 off = (unsigned int)((unsigned char *)disked_entry - idx_map) - fanout[0];
1539 disked_entry = (struct rc_index_entry_ondisk *)(buffer.buf + off);
1540 entry = from_disked_rc_index_entry(disked_entry, 0);
1541 } else
1542 entry = &index_entry;
1544 memset(entry, 0, sizeof(index_entry));
1545 entry->sha1 = object_entry->sha1;
1546 entry->is_start = object_entry->is_start;
1547 entry->cache_index = cache_index;
1548 entry->pos = pos;
1550 if (entry == &index_entry) {
1551 strbuf_add(&buffer, to_disked_rc_index_entry(entry, 0), sizeof(struct rc_index_entry_ondisk));
1552 idx_head.object_nr++;
1553 } else
1554 to_disked_rc_index_entry(entry, disked_entry);
1558 idx_head.max_date = max_date;
1559 qsort(buffer.buf, buffer.len / sizeof(struct rc_index_entry_ondisk), sizeof(struct rc_index_entry_ondisk), index_sort_hash);
1561 /* generate fanout */
1562 cur = 0x00;
1563 for (i = 0; i < buffer.len; i += sizeof(struct rc_index_entry_ondisk)) {
1564 struct rc_index_entry_ondisk *entry = (struct rc_index_entry_ondisk *)(buffer.buf + i);
1566 while (cur <= entry->sha1[0])
1567 fanout[cur++] = i + idx_head.ofs_objects;
1570 while (cur <= 0xff)
1571 fanout[cur++] = idx_head.ofs_objects + buffer.len;
1573 /* BOOM! */
1574 if (write_cache_index(&buffer))
1575 return -1;
1577 munmap(map, size);
1578 strbuf_release(&buffer);
1580 /* idx_map is unloaded without cleanup_cache_slices(), so regardless of previous index existence
1581 * we can still free this up */
1582 free(idx_caches);
1584 return 0;
1588 void starts_from_slices(struct rev_info *revs, unsigned int flags, unsigned char *which, int n)
1590 struct commit *commit;
1591 int i;
1593 if (!idx_map)
1594 init_index();
1595 if (!idx_map)
1596 return;
1598 for (i = idx_head.ofs_objects; i < idx_size; i += sizeof(struct rc_index_entry_ondisk)) {
1599 struct rc_index_entry *entry = RC_OBTAIN_INDEX_ENTRY(idx_map + i);
1601 if (!entry->is_start)
1602 continue;
1604 /* only include entries in 'which' slices */
1605 if (n) {
1606 int j;
1608 for (j = 0; j < n; j++)
1609 if (!hashcmp(idx_caches + entry->cache_index * 20, which + j * 20))
1610 break;
1612 if (j == n)
1613 continue;
1616 commit = lookup_commit(entry->sha1);
1617 if (!commit)
1618 continue;
1620 commit->object.flags |= flags;
1621 add_pending_object(revs, &commit->object, 0);
1627 struct slice_fd_time {
1628 unsigned char sha1[20];
1629 int fd;
1630 struct stat fi;
1633 int slice_time_sort(const void *a, const void *b)
1635 unsigned long at, bt;
1637 at = ((struct slice_fd_time *)a)->fi.st_ctime;
1638 bt = ((struct slice_fd_time *)b)->fi.st_ctime;
1640 if (at == bt)
1641 return 0;
1643 return at > bt ? 1 : -1;
1646 int regenerate_cache_index(struct rev_cache_info *rci)
1648 DIR *dirh;
1649 int i;
1650 struct slice_fd_time info;
1651 struct strbuf slices;
1653 /* first remove old index if it exists */
1654 unlink_or_warn(git_path("rev-cache/index"));
1656 strbuf_init(&slices, 0);
1658 dirh = opendir(git_path("rev-cache"));
1659 if (dirh) {
1660 struct dirent *de;
1661 struct stat fi;
1662 int fd;
1663 unsigned char sha1[20];
1665 while ((de = readdir(dirh))) {
1666 if (de->d_name[0] == '.')
1667 continue;
1669 if (get_sha1_hex(de->d_name, sha1))
1670 continue;
1672 /* open with RDWR because of mmap call in make_cache_index() */
1673 fd = open_cache_slice(sha1, O_RDONLY);
1674 if (fd < 0 || fstat(fd, &fi)) {
1675 warning("bad cache found [%s]; fuse recommended", de->d_name);
1676 if (fd > 0)
1677 close(fd);
1678 continue;
1681 hashcpy(info.sha1, sha1);
1682 info.fd = fd;
1683 memcpy(&info.fi, &fi, sizeof(struct stat));
1685 strbuf_add(&slices, &info, sizeof(info));
1688 closedir(dirh);
1691 /* we want oldest first -> upon overlap, older slices are more likely to have a larger section,
1692 * as of the overlapped commit */
1693 qsort(slices.buf, slices.len / sizeof(info), sizeof(info), slice_time_sort);
1695 for (i = 0; i < slices.len; i += sizeof(info)) {
1696 struct slice_fd_time *infop = (struct slice_fd_time *)(slices.buf + i);
1697 struct stat *fip = &infop->fi;
1698 int fd = infop->fd;
1700 if (make_cache_index(rci, infop->sha1, fd, fip->st_size) < 0)
1701 die("error writing cache");
1703 close(fd);
1706 strbuf_release(&slices);
1708 return 0;
1711 static int add_slices_for_fuse(struct rev_cache_info *rci, struct string_list *files, struct strbuf *ignore)
1713 unsigned char sha1[20];
1714 char base[PATH_MAX];
1715 int baselen, i, slice_nr = 0;
1716 struct stat fi;
1717 DIR *dirh;
1718 struct dirent *de;
1720 strncpy(base, git_path("rev-cache"), sizeof(base));
1721 baselen = strlen(base);
1723 dirh = opendir(base);
1724 if (!dirh)
1725 return 0;
1727 while ((de = readdir(dirh))) {
1728 if (de->d_name[0] == '.')
1729 continue;
1731 base[baselen] = '/';
1732 strncpy(base + baselen + 1, de->d_name, sizeof(base) - baselen - 1);
1734 if (get_sha1_hex(de->d_name, sha1)) {
1735 /* whatever it is, we don't need it... */
1736 string_list_insert(base, files);
1737 continue;
1740 /* _theoretically_ it is possible a slice < ignore_size to map objects not covered by, yet reachable from,
1741 * a slice >= ignore_size, meaning that we could potentially delete an 'unfused' slice; but if that
1742 * ever *did* happen their cache structure'd be so fucked up they might as well refuse the entire thing.
1743 * and at any rate the worst it'd do is make rev-list revert to standard walking in that (small) bit.
1745 if (rci->ignore_size) {
1746 if (stat(base, &fi))
1747 warning("can't query file %s\n", base);
1748 else if (fi.st_size >= rci->ignore_size) {
1749 strbuf_add(ignore, sha1, 20);
1750 continue;
1752 } else {
1753 /* check if a pointer */
1754 struct cache_slice_pointer ptr;
1755 int fd = open(base, O_RDONLY);
1757 if (fd < 0)
1758 goto dont_save;
1759 if (sizeof(ptr) != read_in_full(fd, &ptr, sizeof(ptr)))
1760 goto dont_save;
1762 close(fd);
1763 if (!strcmp(ptr.signature, "REVCOPTR")) {
1764 strbuf_add(ignore, sha1, 20);
1765 continue;
1769 dont_save:
1770 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1771 if (!hashcmp(idx_caches + i * 20, sha1))
1772 break;
1775 if (i >= 0)
1776 rci->maps[i].size = 1;
1778 string_list_insert(base, files);
1779 slice_nr++;
1782 closedir(dirh);
1784 return slice_nr;
1787 /* the most work-intensive attributes in the cache are the unique objects and size, both
1788 * of which can be re-used. although path structures will be isomorphic, path generation is
1789 * not particularly expensive, and at any rate we need to re-sort the commits */
1790 int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
1792 unsigned char cache_sha1[20];
1793 struct string_list files = {0, 0, 0, 1}; /* dup */
1794 struct strbuf ignore;
1795 int i;
1797 maybe_fill_with_defaults(rci);
1799 if (!idx_map)
1800 init_index();
1801 if (!idx_map)
1802 return -1;
1804 strbuf_init(&ignore, 0);
1805 rci->maps = xcalloc(idx_head.cache_nr, sizeof(struct rev_cache_slice_map));
1806 if (add_slices_for_fuse(rci, &files, &ignore) <= 1) {
1807 printf("nothing to fuse\n");
1808 return 1;
1811 if (ignore.len) {
1812 starts_from_slices(revs, UNINTERESTING, (unsigned char *)ignore.buf, ignore.len / 20);
1813 strbuf_release(&ignore);
1816 /* initialize mappings */
1817 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1818 struct rev_cache_slice_map *map = rci->maps + i;
1819 struct stat fi;
1820 int fd;
1822 if (!map->size)
1823 continue;
1824 map->size = 0;
1826 /* pointers are never fused, so we can use open directly */
1827 fd = open(git_path("rev-cache/%s", sha1_to_hex(idx_caches + i * 20)), O_RDONLY);
1828 if (fd <= 0 || fstat(fd, &fi))
1829 continue;
1830 if (fi.st_size < sizeof(struct rc_slice_header))
1831 continue;
1833 map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
1834 if (map->map == MAP_FAILED)
1835 continue;
1837 close(fd);
1838 map->size = fi.st_size;
1841 rci->make_index = 0;
1842 rci->fuse_me = 1;
1843 if (make_cache_slice(rci, revs, 0, 0, cache_sha1) < 0)
1844 die("can't make cache slice");
1846 printf("%s\n", sha1_to_hex(cache_sha1));
1848 /* clean up time! */
1849 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1850 struct rev_cache_slice_map *map = rci->maps + i;
1852 if (!map->size)
1853 continue;
1855 munmap(map->map, map->size);
1857 free(rci->maps);
1858 cleanup_cache_slices();
1860 for (i = 0; i < files.nr; i++) {
1861 char *name = files.items[i].string;
1863 fprintf(stderr, "removing %s\n", name);
1864 unlink_or_warn(name);
1867 string_list_clear(&files, 0);
1869 return regenerate_cache_index(rci);
1872 static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
1874 struct rc_slice_header head;
1875 int fd, len, retval = -1;
1876 unsigned char *map = MAP_FAILED;
1877 struct stat fi;
1879 len = strlen(slice_path);
1880 if (len < 40)
1881 return -2;
1882 if (get_sha1_hex(slice_path + len - 40, sha1))
1883 return -3;
1885 fd = open(slice_path, O_RDONLY);
1886 if (fd == -1)
1887 goto end;
1888 if (fstat(fd, &fi) || fi.st_size < sizeof(head))
1889 goto end;
1891 map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
1892 if (map == MAP_FAILED)
1893 goto end;
1894 if (get_cache_slice_header(sha1, map, fi.st_size, &head))
1895 goto end;
1897 retval = 0;
1899 end:
1900 if (map != MAP_FAILED)
1901 munmap(map, sizeof(head));
1902 if (fd > 0)
1903 close(fd);
1905 return retval;
1908 int make_cache_slice_pointer(struct rev_cache_info *rci, const char *slice_path)
1910 struct cache_slice_pointer ptr;
1911 int fd;
1912 unsigned char sha1[20];
1914 maybe_fill_with_defaults(rci);
1915 rci->overwrite_all = 1;
1917 if (verify_cache_slice(slice_path, sha1) < 0)
1918 return -1;
1920 strcpy(ptr.signature, "REVCOPTR");
1921 ptr.version = SUPPORTED_REVCOPTR_VERSION;
1922 strcpy(ptr.path, make_nonrelative_path(slice_path));
1924 fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), O_RDWR | O_CREAT | O_TRUNC, 0666);
1925 if (fd < 0)
1926 return -2;
1928 write_in_full(fd, &ptr, sizeof(ptr));
1929 make_cache_index(rci, sha1, fd, sizeof(ptr));
1931 close(fd);
1933 return 0;