reftable: document reading and writing indices
[alt-git.git] / pack-bitmap.c
blob229a11fb00fc20257385aca4cd653c26f8ce6b31
1 #include "git-compat-util.h"
2 #include "commit.h"
3 #include "gettext.h"
4 #include "hex.h"
5 #include "strbuf.h"
6 #include "tag.h"
7 #include "diff.h"
8 #include "revision.h"
9 #include "progress.h"
10 #include "list-objects.h"
11 #include "pack.h"
12 #include "pack-bitmap.h"
13 #include "pack-revindex.h"
14 #include "pack-objects.h"
15 #include "packfile.h"
16 #include "repository.h"
17 #include "trace2.h"
18 #include "object-file.h"
19 #include "object-store-ll.h"
20 #include "list-objects-filter-options.h"
21 #include "midx.h"
22 #include "config.h"
25 * An entry on the bitmap index, representing the bitmap for a given
26 * commit.
28 struct stored_bitmap {
29 struct object_id oid;
30 struct ewah_bitmap *root;
31 struct stored_bitmap *xor;
32 int flags;
36 * The active bitmap index for a repository. By design, repositories only have
37 * a single bitmap index available (the index for the biggest packfile in
38 * the repository), since bitmap indexes need full closure.
40 * If there is more than one bitmap index available (e.g. because of alternates),
41 * the active bitmap index is the largest one.
43 struct bitmap_index {
45 * The pack or multi-pack index (MIDX) that this bitmap index belongs
46 * to.
48 * Exactly one of these must be non-NULL; this specifies the object
49 * order used to interpret this bitmap.
51 struct packed_git *pack;
52 struct multi_pack_index *midx;
55 * Mark the first `reuse_objects` in the packfile as reused:
56 * they will be sent as-is without using them for repacking
57 * calculations
59 uint32_t reuse_objects;
61 /* mmapped buffer of the whole bitmap index */
62 unsigned char *map;
63 size_t map_size; /* size of the mmaped buffer */
64 size_t map_pos; /* current position when loading the index */
67 * Type indexes.
69 * Each bitmap marks which objects in the packfile are of the given
70 * type. This provides type information when yielding the objects from
71 * the packfile during a walk, which allows for better delta bases.
73 struct ewah_bitmap *commits;
74 struct ewah_bitmap *trees;
75 struct ewah_bitmap *blobs;
76 struct ewah_bitmap *tags;
78 /* Map from object ID -> `stored_bitmap` for all the bitmapped commits */
79 kh_oid_map_t *bitmaps;
81 /* Number of bitmapped commits */
82 uint32_t entry_count;
84 /* If not NULL, this is a name-hash cache pointing into map. */
85 uint32_t *hashes;
87 /* The checksum of the packfile or MIDX; points into map. */
88 const unsigned char *checksum;
91 * If not NULL, this point into the commit table extension
92 * (within the memory mapped region `map`).
94 unsigned char *table_lookup;
97 * Extended index.
99 * When trying to perform bitmap operations with objects that are not
100 * packed in `pack`, these objects are added to this "fake index" and
101 * are assumed to appear at the end of the packfile for all operations
103 struct eindex {
104 struct object **objects;
105 uint32_t *hashes;
106 uint32_t count, alloc;
107 kh_oid_pos_t *positions;
108 } ext_index;
110 /* Bitmap result of the last performed walk */
111 struct bitmap *result;
113 /* "have" bitmap from the last performed walk */
114 struct bitmap *haves;
116 /* Version of the bitmap index */
117 unsigned int version;
120 static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
122 struct ewah_bitmap *parent;
123 struct ewah_bitmap *composed;
125 if (!st->xor)
126 return st->root;
128 composed = ewah_pool_new();
129 parent = lookup_stored_bitmap(st->xor);
130 ewah_xor(st->root, parent, composed);
132 ewah_pool_free(st->root);
133 st->root = composed;
134 st->xor = NULL;
136 return composed;
140 * Read a bitmap from the current read position on the mmaped
141 * index, and increase the read position accordingly
143 static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
145 struct ewah_bitmap *b = ewah_pool_new();
147 ssize_t bitmap_size = ewah_read_mmap(b,
148 index->map + index->map_pos,
149 index->map_size - index->map_pos);
151 if (bitmap_size < 0) {
152 error(_("failed to load bitmap index (corrupted?)"));
153 ewah_pool_free(b);
154 return NULL;
157 index->map_pos += bitmap_size;
158 return b;
161 static uint32_t bitmap_num_objects(struct bitmap_index *index)
163 if (index->midx)
164 return index->midx->num_objects;
165 return index->pack->num_objects;
168 static int load_bitmap_header(struct bitmap_index *index)
170 struct bitmap_disk_header *header = (void *)index->map;
171 size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
173 if (index->map_size < header_size + the_hash_algo->rawsz)
174 return error(_("corrupted bitmap index (too small)"));
176 if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
177 return error(_("corrupted bitmap index file (wrong header)"));
179 index->version = ntohs(header->version);
180 if (index->version != 1)
181 return error(_("unsupported version '%d' for bitmap index file"), index->version);
183 /* Parse known bitmap format options */
185 uint32_t flags = ntohs(header->options);
186 size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
187 unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
189 if ((flags & BITMAP_OPT_FULL_DAG) == 0)
190 BUG("unsupported options for bitmap index file "
191 "(Git requires BITMAP_OPT_FULL_DAG)");
193 if (flags & BITMAP_OPT_HASH_CACHE) {
194 if (cache_size > index_end - index->map - header_size)
195 return error(_("corrupted bitmap index file (too short to fit hash cache)"));
196 index->hashes = (void *)(index_end - cache_size);
197 index_end -= cache_size;
200 if (flags & BITMAP_OPT_LOOKUP_TABLE) {
201 size_t table_size = st_mult(ntohl(header->entry_count),
202 BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
203 if (table_size > index_end - index->map - header_size)
204 return error(_("corrupted bitmap index file (too short to fit lookup table)"));
205 if (git_env_bool("GIT_TEST_READ_COMMIT_TABLE", 1))
206 index->table_lookup = (void *)(index_end - table_size);
207 index_end -= table_size;
211 index->entry_count = ntohl(header->entry_count);
212 index->checksum = header->checksum;
213 index->map_pos += header_size;
214 return 0;
217 static struct stored_bitmap *store_bitmap(struct bitmap_index *index,
218 struct ewah_bitmap *root,
219 const struct object_id *oid,
220 struct stored_bitmap *xor_with,
221 int flags)
223 struct stored_bitmap *stored;
224 khiter_t hash_pos;
225 int ret;
227 stored = xmalloc(sizeof(struct stored_bitmap));
228 stored->root = root;
229 stored->xor = xor_with;
230 stored->flags = flags;
231 oidcpy(&stored->oid, oid);
233 hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret);
236 * A 0 return code means the insertion succeeded with no changes,
237 * because the SHA1 already existed on the map. This is bad, there
238 * shouldn't be duplicated commits in the index.
240 if (ret == 0) {
241 error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid));
242 return NULL;
245 kh_value(index->bitmaps, hash_pos) = stored;
246 return stored;
249 static inline uint32_t read_be32(const unsigned char *buffer, size_t *pos)
251 uint32_t result = get_be32(buffer + *pos);
252 (*pos) += sizeof(result);
253 return result;
256 static inline uint8_t read_u8(const unsigned char *buffer, size_t *pos)
258 return buffer[(*pos)++];
261 #define MAX_XOR_OFFSET 160
263 static int nth_bitmap_object_oid(struct bitmap_index *index,
264 struct object_id *oid,
265 uint32_t n)
267 if (index->midx)
268 return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1;
269 return nth_packed_object_id(oid, index->pack, n);
272 static int load_bitmap_entries_v1(struct bitmap_index *index)
274 uint32_t i;
275 struct stored_bitmap *recent_bitmaps[MAX_XOR_OFFSET] = { NULL };
277 for (i = 0; i < index->entry_count; ++i) {
278 int xor_offset, flags;
279 struct ewah_bitmap *bitmap = NULL;
280 struct stored_bitmap *xor_bitmap = NULL;
281 uint32_t commit_idx_pos;
282 struct object_id oid;
284 if (index->map_size - index->map_pos < 6)
285 return error(_("corrupt ewah bitmap: truncated header for entry %d"), i);
287 commit_idx_pos = read_be32(index->map, &index->map_pos);
288 xor_offset = read_u8(index->map, &index->map_pos);
289 flags = read_u8(index->map, &index->map_pos);
291 if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0)
292 return error(_("corrupt ewah bitmap: commit index %u out of range"),
293 (unsigned)commit_idx_pos);
295 bitmap = read_bitmap_1(index);
296 if (!bitmap)
297 return -1;
299 if (xor_offset > MAX_XOR_OFFSET || xor_offset > i)
300 return error(_("corrupted bitmap pack index"));
302 if (xor_offset > 0) {
303 xor_bitmap = recent_bitmaps[(i - xor_offset) % MAX_XOR_OFFSET];
305 if (!xor_bitmap)
306 return error(_("invalid XOR offset in bitmap pack index"));
309 recent_bitmaps[i % MAX_XOR_OFFSET] = store_bitmap(
310 index, bitmap, &oid, xor_bitmap, flags);
313 return 0;
316 char *midx_bitmap_filename(struct multi_pack_index *midx)
318 struct strbuf buf = STRBUF_INIT;
320 get_midx_filename(&buf, midx->object_dir);
321 strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx)));
323 return strbuf_detach(&buf, NULL);
326 char *pack_bitmap_filename(struct packed_git *p)
328 size_t len;
330 if (!strip_suffix(p->pack_name, ".pack", &len))
331 BUG("pack_name does not end in .pack");
332 return xstrfmt("%.*s.bitmap", (int)len, p->pack_name);
335 static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
336 struct multi_pack_index *midx)
338 struct stat st;
339 char *bitmap_name = midx_bitmap_filename(midx);
340 int fd = git_open(bitmap_name);
341 uint32_t i, preferred_pack;
342 struct packed_git *preferred;
344 if (fd < 0) {
345 if (errno != ENOENT)
346 warning_errno("cannot open '%s'", bitmap_name);
347 free(bitmap_name);
348 return -1;
350 free(bitmap_name);
352 if (fstat(fd, &st)) {
353 error_errno(_("cannot fstat bitmap file"));
354 close(fd);
355 return -1;
358 if (bitmap_git->pack || bitmap_git->midx) {
359 struct strbuf buf = STRBUF_INIT;
360 get_midx_filename(&buf, midx->object_dir);
361 trace2_data_string("bitmap", the_repository,
362 "ignoring extra midx bitmap file", buf.buf);
363 close(fd);
364 strbuf_release(&buf);
365 return -1;
368 bitmap_git->midx = midx;
369 bitmap_git->map_size = xsize_t(st.st_size);
370 bitmap_git->map_pos = 0;
371 bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ,
372 MAP_PRIVATE, fd, 0);
373 close(fd);
375 if (load_bitmap_header(bitmap_git) < 0)
376 goto cleanup;
378 if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) {
379 error(_("checksum doesn't match in MIDX and bitmap"));
380 goto cleanup;
383 if (load_midx_revindex(bitmap_git->midx)) {
384 warning(_("multi-pack bitmap is missing required reverse index"));
385 goto cleanup;
388 for (i = 0; i < bitmap_git->midx->num_packs; i++) {
389 if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
390 warning(_("could not open pack %s"),
391 bitmap_git->midx->pack_names[i]);
392 goto cleanup;
396 if (midx_preferred_pack(bitmap_git->midx, &preferred_pack) < 0) {
397 warning(_("could not determine MIDX preferred pack"));
398 goto cleanup;
401 preferred = bitmap_git->midx->packs[preferred_pack];
402 if (!is_pack_valid(preferred)) {
403 warning(_("preferred pack (%s) is invalid"),
404 preferred->pack_name);
405 goto cleanup;
408 return 0;
410 cleanup:
411 munmap(bitmap_git->map, bitmap_git->map_size);
412 bitmap_git->map_size = 0;
413 bitmap_git->map_pos = 0;
414 bitmap_git->map = NULL;
415 bitmap_git->midx = NULL;
416 return -1;
419 static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git *packfile)
421 int fd;
422 struct stat st;
423 char *bitmap_name;
425 bitmap_name = pack_bitmap_filename(packfile);
426 fd = git_open(bitmap_name);
428 if (fd < 0) {
429 if (errno != ENOENT)
430 warning_errno("cannot open '%s'", bitmap_name);
431 free(bitmap_name);
432 return -1;
434 free(bitmap_name);
436 if (fstat(fd, &st)) {
437 error_errno(_("cannot fstat bitmap file"));
438 close(fd);
439 return -1;
442 if (bitmap_git->pack || bitmap_git->midx) {
443 trace2_data_string("bitmap", the_repository,
444 "ignoring extra bitmap file", packfile->pack_name);
445 close(fd);
446 return -1;
449 if (!is_pack_valid(packfile)) {
450 close(fd);
451 return -1;
454 bitmap_git->pack = packfile;
455 bitmap_git->map_size = xsize_t(st.st_size);
456 bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ, MAP_PRIVATE, fd, 0);
457 bitmap_git->map_pos = 0;
458 close(fd);
460 if (load_bitmap_header(bitmap_git) < 0) {
461 munmap(bitmap_git->map, bitmap_git->map_size);
462 bitmap_git->map = NULL;
463 bitmap_git->map_size = 0;
464 bitmap_git->map_pos = 0;
465 bitmap_git->pack = NULL;
466 return -1;
469 trace2_data_string("bitmap", the_repository, "opened bitmap file",
470 packfile->pack_name);
471 return 0;
474 static int load_reverse_index(struct repository *r, struct bitmap_index *bitmap_git)
476 if (bitmap_is_midx(bitmap_git)) {
477 uint32_t i;
478 int ret;
481 * The multi-pack-index's .rev file is already loaded via
482 * open_pack_bitmap_1().
484 * But we still need to open the individual pack .rev files,
485 * since we will need to make use of them in pack-objects.
487 for (i = 0; i < bitmap_git->midx->num_packs; i++) {
488 ret = load_pack_revindex(r, bitmap_git->midx->packs[i]);
489 if (ret)
490 return ret;
492 return 0;
494 return load_pack_revindex(r, bitmap_git->pack);
497 static int load_bitmap(struct repository *r, struct bitmap_index *bitmap_git)
499 assert(bitmap_git->map);
501 bitmap_git->bitmaps = kh_init_oid_map();
502 bitmap_git->ext_index.positions = kh_init_oid_pos();
504 if (load_reverse_index(r, bitmap_git))
505 goto failed;
507 if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) ||
508 !(bitmap_git->trees = read_bitmap_1(bitmap_git)) ||
509 !(bitmap_git->blobs = read_bitmap_1(bitmap_git)) ||
510 !(bitmap_git->tags = read_bitmap_1(bitmap_git)))
511 goto failed;
513 if (!bitmap_git->table_lookup && load_bitmap_entries_v1(bitmap_git) < 0)
514 goto failed;
516 return 0;
518 failed:
519 munmap(bitmap_git->map, bitmap_git->map_size);
520 bitmap_git->map = NULL;
521 bitmap_git->map_size = 0;
523 kh_destroy_oid_map(bitmap_git->bitmaps);
524 bitmap_git->bitmaps = NULL;
526 kh_destroy_oid_pos(bitmap_git->ext_index.positions);
527 bitmap_git->ext_index.positions = NULL;
529 return -1;
532 static int open_pack_bitmap(struct repository *r,
533 struct bitmap_index *bitmap_git)
535 struct packed_git *p;
536 int ret = -1;
538 for (p = get_all_packs(r); p; p = p->next) {
539 if (open_pack_bitmap_1(bitmap_git, p) == 0) {
540 ret = 0;
542 * The only reason to keep looking is to report
543 * duplicates.
545 if (!trace2_is_enabled())
546 break;
550 return ret;
553 static int open_midx_bitmap(struct repository *r,
554 struct bitmap_index *bitmap_git)
556 int ret = -1;
557 struct multi_pack_index *midx;
559 assert(!bitmap_git->map);
561 for (midx = get_multi_pack_index(r); midx; midx = midx->next) {
562 if (!open_midx_bitmap_1(bitmap_git, midx))
563 ret = 0;
565 return ret;
568 static int open_bitmap(struct repository *r,
569 struct bitmap_index *bitmap_git)
571 int found;
573 assert(!bitmap_git->map);
575 found = !open_midx_bitmap(r, bitmap_git);
578 * these will all be skipped if we opened a midx bitmap; but run it
579 * anyway if tracing is enabled to report the duplicates
581 if (!found || trace2_is_enabled())
582 found |= !open_pack_bitmap(r, bitmap_git);
584 return found ? 0 : -1;
587 struct bitmap_index *prepare_bitmap_git(struct repository *r)
589 struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
591 if (!open_bitmap(r, bitmap_git) && !load_bitmap(r, bitmap_git))
592 return bitmap_git;
594 free_bitmap_index(bitmap_git);
595 return NULL;
598 struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
600 struct repository *r = the_repository;
601 struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
603 if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
604 return bitmap_git;
606 free_bitmap_index(bitmap_git);
607 return NULL;
610 struct include_data {
611 struct bitmap_index *bitmap_git;
612 struct bitmap *base;
613 struct bitmap *seen;
616 struct bitmap_lookup_table_triplet {
617 uint32_t commit_pos;
618 uint64_t offset;
619 uint32_t xor_row;
622 struct bitmap_lookup_table_xor_item {
623 struct object_id oid;
624 uint64_t offset;
628 * Given a `triplet` struct pointer and pointer `p`, this
629 * function reads the triplet beginning at `p` into the struct.
630 * Note that this function assumes that there is enough memory
631 * left for filling the `triplet` struct from `p`.
633 static int bitmap_lookup_table_get_triplet_by_pointer(struct bitmap_lookup_table_triplet *triplet,
634 const unsigned char *p)
636 if (!triplet)
637 return -1;
639 triplet->commit_pos = get_be32(p);
640 p += sizeof(uint32_t);
641 triplet->offset = get_be64(p);
642 p += sizeof(uint64_t);
643 triplet->xor_row = get_be32(p);
644 return 0;
648 * This function gets the raw triplet from `row`'th row in the
649 * lookup table and fills that data to the `triplet`.
651 static int bitmap_lookup_table_get_triplet(struct bitmap_index *bitmap_git,
652 uint32_t pos,
653 struct bitmap_lookup_table_triplet *triplet)
655 unsigned char *p = NULL;
656 if (pos >= bitmap_git->entry_count)
657 return error(_("corrupt bitmap lookup table: triplet position out of index"));
659 p = bitmap_git->table_lookup + st_mult(pos, BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
661 return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
665 * Searches for a matching triplet. `commit_pos` is a pointer
666 * to the wanted commit position value. `table_entry` points to
667 * a triplet in lookup table. The first 4 bytes of each
668 * triplet (pointed by `table_entry`) are compared with `*commit_pos`.
670 static int triplet_cmp(const void *commit_pos, const void *table_entry)
673 uint32_t a = *(uint32_t *)commit_pos;
674 uint32_t b = get_be32(table_entry);
675 if (a > b)
676 return 1;
677 else if (a < b)
678 return -1;
680 return 0;
683 static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git,
684 struct object_id *oid,
685 uint32_t *result)
687 int found;
689 if (bitmap_is_midx(bitmap_git))
690 found = bsearch_midx(oid, bitmap_git->midx, result);
691 else
692 found = bsearch_pack(oid, bitmap_git->pack, result);
694 return found;
698 * `bsearch_triplet_by_pos` function searches for the raw triplet
699 * having commit position same as `commit_pos` and fills `triplet`
700 * object from the raw triplet. Returns 1 on success and 0 on
701 * failure.
703 static int bitmap_bsearch_triplet_by_pos(uint32_t commit_pos,
704 struct bitmap_index *bitmap_git,
705 struct bitmap_lookup_table_triplet *triplet)
707 unsigned char *p = bsearch(&commit_pos, bitmap_git->table_lookup, bitmap_git->entry_count,
708 BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH, triplet_cmp);
710 if (!p)
711 return -1;
713 return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
716 static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_git,
717 struct commit *commit)
719 uint32_t commit_pos, xor_row;
720 uint64_t offset;
721 int flags;
722 struct bitmap_lookup_table_triplet triplet;
723 struct object_id *oid = &commit->object.oid;
724 struct ewah_bitmap *bitmap;
725 struct stored_bitmap *xor_bitmap = NULL;
726 const int bitmap_header_size = 6;
727 static struct bitmap_lookup_table_xor_item *xor_items = NULL;
728 static size_t xor_items_nr = 0, xor_items_alloc = 0;
729 static int is_corrupt = 0;
730 int xor_flags;
731 khiter_t hash_pos;
732 struct bitmap_lookup_table_xor_item *xor_item;
734 if (is_corrupt)
735 return NULL;
737 if (!bitmap_bsearch_pos(bitmap_git, oid, &commit_pos))
738 return NULL;
740 if (bitmap_bsearch_triplet_by_pos(commit_pos, bitmap_git, &triplet) < 0)
741 return NULL;
743 xor_items_nr = 0;
744 offset = triplet.offset;
745 xor_row = triplet.xor_row;
747 while (xor_row != 0xffffffff) {
748 ALLOC_GROW(xor_items, xor_items_nr + 1, xor_items_alloc);
750 if (xor_items_nr + 1 >= bitmap_git->entry_count) {
751 error(_("corrupt bitmap lookup table: xor chain exceeds entry count"));
752 goto corrupt;
755 if (bitmap_lookup_table_get_triplet(bitmap_git, xor_row, &triplet) < 0)
756 goto corrupt;
758 xor_item = &xor_items[xor_items_nr];
759 xor_item->offset = triplet.offset;
761 if (nth_bitmap_object_oid(bitmap_git, &xor_item->oid, triplet.commit_pos) < 0) {
762 error(_("corrupt bitmap lookup table: commit index %u out of range"),
763 triplet.commit_pos);
764 goto corrupt;
767 hash_pos = kh_get_oid_map(bitmap_git->bitmaps, xor_item->oid);
770 * If desired bitmap is already stored, we don't need
771 * to iterate further. Because we know that bitmaps
772 * that are needed to be parsed to parse this bitmap
773 * has already been stored. So, assign this stored bitmap
774 * to the xor_bitmap.
776 if (hash_pos < kh_end(bitmap_git->bitmaps) &&
777 (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos)))
778 break;
779 xor_items_nr++;
780 xor_row = triplet.xor_row;
783 while (xor_items_nr) {
784 xor_item = &xor_items[xor_items_nr - 1];
785 bitmap_git->map_pos = xor_item->offset;
786 if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
787 error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
788 oid_to_hex(&xor_item->oid));
789 goto corrupt;
792 bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
793 xor_flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
794 bitmap = read_bitmap_1(bitmap_git);
796 if (!bitmap)
797 goto corrupt;
799 xor_bitmap = store_bitmap(bitmap_git, bitmap, &xor_item->oid, xor_bitmap, xor_flags);
800 xor_items_nr--;
803 bitmap_git->map_pos = offset;
804 if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
805 error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
806 oid_to_hex(oid));
807 goto corrupt;
811 * Don't bother reading the commit's index position or its xor
812 * offset:
814 * - The commit's index position is irrelevant to us, since
815 * load_bitmap_entries_v1 only uses it to learn the object
816 * id which is used to compute the hashmap's key. We already
817 * have an object id, so no need to look it up again.
819 * - The xor_offset is unusable for us, since it specifies how
820 * many entries previous to ours we should look at. This
821 * makes sense when reading the bitmaps sequentially (as in
822 * load_bitmap_entries_v1()), since we can keep track of
823 * each bitmap as we read them.
825 * But it can't work for us, since the bitmap's don't have a
826 * fixed size. So we learn the position of the xor'd bitmap
827 * from the commit table (and resolve it to a bitmap in the
828 * above if-statement).
830 * Instead, we can skip ahead and immediately read the flags and
831 * ewah bitmap.
833 bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
834 flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
835 bitmap = read_bitmap_1(bitmap_git);
837 if (!bitmap)
838 goto corrupt;
840 return store_bitmap(bitmap_git, bitmap, oid, xor_bitmap, flags);
842 corrupt:
843 free(xor_items);
844 is_corrupt = 1;
845 return NULL;
848 struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
849 struct commit *commit)
851 khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps,
852 commit->object.oid);
853 if (hash_pos >= kh_end(bitmap_git->bitmaps)) {
854 struct stored_bitmap *bitmap = NULL;
855 if (!bitmap_git->table_lookup)
856 return NULL;
858 /* this is a fairly hot codepath - no trace2_region please */
859 /* NEEDSWORK: cache misses aren't recorded */
860 bitmap = lazy_bitmap_for_commit(bitmap_git, commit);
861 if (!bitmap)
862 return NULL;
863 return lookup_stored_bitmap(bitmap);
865 return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos));
868 static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,
869 const struct object_id *oid)
871 kh_oid_pos_t *positions = bitmap_git->ext_index.positions;
872 khiter_t pos = kh_get_oid_pos(positions, *oid);
874 if (pos < kh_end(positions)) {
875 int bitmap_pos = kh_value(positions, pos);
876 return bitmap_pos + bitmap_num_objects(bitmap_git);
879 return -1;
882 static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git,
883 const struct object_id *oid)
885 uint32_t pos;
886 off_t offset = find_pack_entry_one(oid->hash, bitmap_git->pack);
887 if (!offset)
888 return -1;
890 if (offset_to_pack_pos(bitmap_git->pack, offset, &pos) < 0)
891 return -1;
892 return pos;
895 static int bitmap_position_midx(struct bitmap_index *bitmap_git,
896 const struct object_id *oid)
898 uint32_t want, got;
899 if (!bsearch_midx(oid, bitmap_git->midx, &want))
900 return -1;
902 if (midx_to_pack_pos(bitmap_git->midx, want, &got) < 0)
903 return -1;
904 return got;
907 static int bitmap_position(struct bitmap_index *bitmap_git,
908 const struct object_id *oid)
910 int pos;
911 if (bitmap_is_midx(bitmap_git))
912 pos = bitmap_position_midx(bitmap_git, oid);
913 else
914 pos = bitmap_position_packfile(bitmap_git, oid);
915 return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid);
918 static int ext_index_add_object(struct bitmap_index *bitmap_git,
919 struct object *object, const char *name)
921 struct eindex *eindex = &bitmap_git->ext_index;
923 khiter_t hash_pos;
924 int hash_ret;
925 int bitmap_pos;
927 hash_pos = kh_put_oid_pos(eindex->positions, object->oid, &hash_ret);
928 if (hash_ret > 0) {
929 if (eindex->count >= eindex->alloc) {
930 eindex->alloc = (eindex->alloc + 16) * 3 / 2;
931 REALLOC_ARRAY(eindex->objects, eindex->alloc);
932 REALLOC_ARRAY(eindex->hashes, eindex->alloc);
935 bitmap_pos = eindex->count;
936 eindex->objects[eindex->count] = object;
937 eindex->hashes[eindex->count] = pack_name_hash(name);
938 kh_value(eindex->positions, hash_pos) = bitmap_pos;
939 eindex->count++;
940 } else {
941 bitmap_pos = kh_value(eindex->positions, hash_pos);
944 return bitmap_pos + bitmap_num_objects(bitmap_git);
947 struct bitmap_show_data {
948 struct bitmap_index *bitmap_git;
949 struct bitmap *base;
952 static void show_object(struct object *object, const char *name, void *data_)
954 struct bitmap_show_data *data = data_;
955 int bitmap_pos;
957 bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
959 if (bitmap_pos < 0)
960 bitmap_pos = ext_index_add_object(data->bitmap_git, object,
961 name);
963 bitmap_set(data->base, bitmap_pos);
966 static void show_commit(struct commit *commit UNUSED,
967 void *data UNUSED)
971 static int add_to_include_set(struct bitmap_index *bitmap_git,
972 struct include_data *data,
973 struct commit *commit,
974 int bitmap_pos)
976 struct ewah_bitmap *partial;
978 if (data->seen && bitmap_get(data->seen, bitmap_pos))
979 return 0;
981 if (bitmap_get(data->base, bitmap_pos))
982 return 0;
984 partial = bitmap_for_commit(bitmap_git, commit);
985 if (partial) {
986 bitmap_or_ewah(data->base, partial);
987 return 0;
990 bitmap_set(data->base, bitmap_pos);
991 return 1;
994 static int should_include(struct commit *commit, void *_data)
996 struct include_data *data = _data;
997 int bitmap_pos;
999 bitmap_pos = bitmap_position(data->bitmap_git, &commit->object.oid);
1000 if (bitmap_pos < 0)
1001 bitmap_pos = ext_index_add_object(data->bitmap_git,
1002 (struct object *)commit,
1003 NULL);
1005 if (!add_to_include_set(data->bitmap_git, data, commit, bitmap_pos)) {
1006 struct commit_list *parent = commit->parents;
1008 while (parent) {
1009 parent->item->object.flags |= SEEN;
1010 parent = parent->next;
1013 return 0;
1016 return 1;
1019 static int should_include_obj(struct object *obj, void *_data)
1021 struct include_data *data = _data;
1022 int bitmap_pos;
1024 bitmap_pos = bitmap_position(data->bitmap_git, &obj->oid);
1025 if (bitmap_pos < 0)
1026 return 1;
1027 if ((data->seen && bitmap_get(data->seen, bitmap_pos)) ||
1028 bitmap_get(data->base, bitmap_pos)) {
1029 obj->flags |= SEEN;
1030 return 0;
1032 return 1;
1035 static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
1036 struct bitmap **base,
1037 struct commit *commit)
1039 struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit);
1041 if (!or_with)
1042 return 0;
1044 if (!*base)
1045 *base = ewah_to_bitmap(or_with);
1046 else
1047 bitmap_or_ewah(*base, or_with);
1049 return 1;
1052 static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git,
1053 struct rev_info *revs,
1054 struct bitmap *base,
1055 struct bitmap *seen)
1057 struct include_data incdata;
1058 struct bitmap_show_data show_data;
1060 if (!base)
1061 base = bitmap_new();
1063 incdata.bitmap_git = bitmap_git;
1064 incdata.base = base;
1065 incdata.seen = seen;
1067 revs->include_check = should_include;
1068 revs->include_check_obj = should_include_obj;
1069 revs->include_check_data = &incdata;
1071 if (prepare_revision_walk(revs))
1072 die(_("revision walk setup failed"));
1074 show_data.bitmap_git = bitmap_git;
1075 show_data.base = base;
1077 traverse_commit_list(revs, show_commit, show_object, &show_data);
1079 revs->include_check = NULL;
1080 revs->include_check_obj = NULL;
1081 revs->include_check_data = NULL;
1083 return base;
1086 struct bitmap_boundary_cb {
1087 struct bitmap_index *bitmap_git;
1088 struct bitmap *base;
1090 struct object_array boundary;
1093 static void show_boundary_commit(struct commit *commit, void *_data)
1095 struct bitmap_boundary_cb *data = _data;
1097 if (commit->object.flags & BOUNDARY)
1098 add_object_array(&commit->object, "", &data->boundary);
1100 if (commit->object.flags & UNINTERESTING) {
1101 if (bitmap_walk_contains(data->bitmap_git, data->base,
1102 &commit->object.oid))
1103 return;
1105 add_commit_to_bitmap(data->bitmap_git, &data->base, commit);
1109 static void show_boundary_object(struct object *object UNUSED,
1110 const char *name UNUSED,
1111 void *data UNUSED)
1113 BUG("should not be called");
1116 static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
1117 struct rev_info *revs,
1118 struct object_list *roots)
1120 struct bitmap_boundary_cb cb;
1121 struct object_list *root;
1122 unsigned int i;
1123 unsigned int tmp_blobs, tmp_trees, tmp_tags;
1124 int any_missing = 0;
1126 cb.bitmap_git = bitmap_git;
1127 cb.base = bitmap_new();
1128 object_array_init(&cb.boundary);
1130 revs->ignore_missing_links = 1;
1133 * OR in any existing reachability bitmaps among `roots` into
1134 * `cb.base`.
1136 for (root = roots; root; root = root->next) {
1137 struct object *object = root->item;
1138 if (object->type != OBJ_COMMIT ||
1139 bitmap_walk_contains(bitmap_git, cb.base, &object->oid))
1140 continue;
1142 if (add_commit_to_bitmap(bitmap_git, &cb.base,
1143 (struct commit *)object))
1144 continue;
1146 any_missing = 1;
1149 if (!any_missing)
1150 goto cleanup;
1152 tmp_blobs = revs->blob_objects;
1153 tmp_trees = revs->tree_objects;
1154 tmp_tags = revs->blob_objects;
1155 revs->blob_objects = 0;
1156 revs->tree_objects = 0;
1157 revs->tag_objects = 0;
1160 * We didn't have complete coverage of the roots. First setup a
1161 * revision walk to (a) OR in any bitmaps that are UNINTERESTING
1162 * between the tips and boundary, and (b) record the boundary.
1164 trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
1165 if (prepare_revision_walk(revs))
1166 die("revision walk setup failed");
1167 trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
1169 trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
1170 revs->boundary = 1;
1171 traverse_commit_list_filtered(revs,
1172 show_boundary_commit,
1173 show_boundary_object,
1174 &cb, NULL);
1175 revs->boundary = 0;
1176 trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
1178 revs->blob_objects = tmp_blobs;
1179 revs->tree_objects = tmp_trees;
1180 revs->tag_objects = tmp_tags;
1182 reset_revision_walk();
1183 clear_object_flags(UNINTERESTING);
1186 * Then add the boundary commit(s) as fill-in traversal tips.
1188 trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
1189 for (i = 0; i < cb.boundary.nr; i++) {
1190 struct object *obj = cb.boundary.objects[i].item;
1191 if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
1192 obj->flags |= SEEN;
1193 else
1194 add_pending_object(revs, obj, "");
1196 if (revs->pending.nr)
1197 cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
1198 trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
1200 cleanup:
1201 object_array_clear(&cb.boundary);
1202 revs->ignore_missing_links = 0;
1204 return cb.base;
1207 static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
1208 struct rev_info *revs,
1209 struct object_list *roots,
1210 struct bitmap *seen)
1212 struct bitmap *base = NULL;
1213 int needs_walk = 0;
1215 struct object_list *not_mapped = NULL;
1218 * Go through all the roots for the walk. The ones that have bitmaps
1219 * on the bitmap index will be `or`ed together to form an initial
1220 * global reachability analysis.
1222 * The ones without bitmaps in the index will be stored in the
1223 * `not_mapped_list` for further processing.
1225 while (roots) {
1226 struct object *object = roots->item;
1227 roots = roots->next;
1229 if (object->type == OBJ_COMMIT &&
1230 add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) {
1231 object->flags |= SEEN;
1232 continue;
1235 object_list_insert(object, &not_mapped);
1239 * Best case scenario: We found bitmaps for all the roots,
1240 * so the resulting `or` bitmap has the full reachability analysis
1242 if (!not_mapped)
1243 return base;
1245 roots = not_mapped;
1248 * Let's iterate through all the roots that don't have bitmaps to
1249 * check if we can determine them to be reachable from the existing
1250 * global bitmap.
1252 * If we cannot find them in the existing global bitmap, we'll need
1253 * to push them to an actual walk and run it until we can confirm
1254 * they are reachable
1256 while (roots) {
1257 struct object *object = roots->item;
1258 int pos;
1260 roots = roots->next;
1261 pos = bitmap_position(bitmap_git, &object->oid);
1263 if (pos < 0 || base == NULL || !bitmap_get(base, pos)) {
1264 object->flags &= ~UNINTERESTING;
1265 add_pending_object(revs, object, "");
1266 needs_walk = 1;
1267 } else {
1268 object->flags |= SEEN;
1272 if (needs_walk) {
1274 * This fill-in traversal may walk over some objects
1275 * again, since we have already traversed in order to
1276 * find the boundary.
1278 * But this extra walk should be extremely cheap, since
1279 * all commit objects are loaded into memory, and
1280 * because we skip walking to parents that are
1281 * UNINTERESTING, since it will be marked in the haves
1282 * bitmap already (or it has an on-disk bitmap, since
1283 * OR-ing it in covers all of its ancestors).
1285 base = fill_in_bitmap(bitmap_git, revs, base, seen);
1288 object_list_free(&not_mapped);
1290 return base;
1293 static void show_extended_objects(struct bitmap_index *bitmap_git,
1294 struct rev_info *revs,
1295 show_reachable_fn show_reach)
1297 struct bitmap *objects = bitmap_git->result;
1298 struct eindex *eindex = &bitmap_git->ext_index;
1299 uint32_t i;
1301 for (i = 0; i < eindex->count; ++i) {
1302 struct object *obj;
1304 if (!bitmap_get(objects, st_add(bitmap_num_objects(bitmap_git), i)))
1305 continue;
1307 obj = eindex->objects[i];
1308 if ((obj->type == OBJ_BLOB && !revs->blob_objects) ||
1309 (obj->type == OBJ_TREE && !revs->tree_objects) ||
1310 (obj->type == OBJ_TAG && !revs->tag_objects))
1311 continue;
1313 show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0);
1317 static void init_type_iterator(struct ewah_iterator *it,
1318 struct bitmap_index *bitmap_git,
1319 enum object_type type)
1321 switch (type) {
1322 case OBJ_COMMIT:
1323 ewah_iterator_init(it, bitmap_git->commits);
1324 break;
1326 case OBJ_TREE:
1327 ewah_iterator_init(it, bitmap_git->trees);
1328 break;
1330 case OBJ_BLOB:
1331 ewah_iterator_init(it, bitmap_git->blobs);
1332 break;
1334 case OBJ_TAG:
1335 ewah_iterator_init(it, bitmap_git->tags);
1336 break;
1338 default:
1339 BUG("object type %d not stored by bitmap type index", type);
1340 break;
1344 static void show_objects_for_type(
1345 struct bitmap_index *bitmap_git,
1346 enum object_type object_type,
1347 show_reachable_fn show_reach)
1349 size_t i = 0;
1350 uint32_t offset;
1352 struct ewah_iterator it;
1353 eword_t filter;
1355 struct bitmap *objects = bitmap_git->result;
1357 init_type_iterator(&it, bitmap_git, object_type);
1359 for (i = 0; i < objects->word_alloc &&
1360 ewah_iterator_next(&filter, &it); i++) {
1361 eword_t word = objects->words[i] & filter;
1362 size_t pos = (i * BITS_IN_EWORD);
1364 if (!word)
1365 continue;
1367 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1368 struct packed_git *pack;
1369 struct object_id oid;
1370 uint32_t hash = 0, index_pos;
1371 off_t ofs;
1373 if ((word >> offset) == 0)
1374 break;
1376 offset += ewah_bit_ctz64(word >> offset);
1378 if (bitmap_is_midx(bitmap_git)) {
1379 struct multi_pack_index *m = bitmap_git->midx;
1380 uint32_t pack_id;
1382 index_pos = pack_pos_to_midx(m, pos + offset);
1383 ofs = nth_midxed_offset(m, index_pos);
1384 nth_midxed_object_oid(&oid, m, index_pos);
1386 pack_id = nth_midxed_pack_int_id(m, index_pos);
1387 pack = bitmap_git->midx->packs[pack_id];
1388 } else {
1389 index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
1390 ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
1391 nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
1393 pack = bitmap_git->pack;
1396 if (bitmap_git->hashes)
1397 hash = get_be32(bitmap_git->hashes + index_pos);
1399 show_reach(&oid, object_type, 0, hash, pack, ofs);
1404 static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
1405 struct object_list *roots)
1407 while (roots) {
1408 struct object *object = roots->item;
1409 roots = roots->next;
1411 if (bitmap_is_midx(bitmap_git)) {
1412 if (bsearch_midx(&object->oid, bitmap_git->midx, NULL))
1413 return 1;
1414 } else {
1415 if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0)
1416 return 1;
1420 return 0;
1423 static struct bitmap *find_tip_objects(struct bitmap_index *bitmap_git,
1424 struct object_list *tip_objects,
1425 enum object_type type)
1427 struct bitmap *result = bitmap_new();
1428 struct object_list *p;
1430 for (p = tip_objects; p; p = p->next) {
1431 int pos;
1433 if (p->item->type != type)
1434 continue;
1436 pos = bitmap_position(bitmap_git, &p->item->oid);
1437 if (pos < 0)
1438 continue;
1440 bitmap_set(result, pos);
1443 return result;
1446 static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git,
1447 struct object_list *tip_objects,
1448 struct bitmap *to_filter,
1449 enum object_type type)
1451 struct eindex *eindex = &bitmap_git->ext_index;
1452 struct bitmap *tips;
1453 struct ewah_iterator it;
1454 eword_t mask;
1455 uint32_t i;
1458 * The non-bitmap version of this filter never removes
1459 * objects which the other side specifically asked for,
1460 * so we must match that behavior.
1462 tips = find_tip_objects(bitmap_git, tip_objects, type);
1465 * We can use the type-level bitmap for 'type' to work in whole
1466 * words for the objects that are actually in the bitmapped
1467 * packfile.
1469 for (i = 0, init_type_iterator(&it, bitmap_git, type);
1470 i < to_filter->word_alloc && ewah_iterator_next(&mask, &it);
1471 i++) {
1472 if (i < tips->word_alloc)
1473 mask &= ~tips->words[i];
1474 to_filter->words[i] &= ~mask;
1478 * Clear any objects that weren't in the packfile (and so would
1479 * not have been caught by the loop above. We'll have to check
1480 * them individually.
1482 for (i = 0; i < eindex->count; i++) {
1483 size_t pos = st_add(i, bitmap_num_objects(bitmap_git));
1484 if (eindex->objects[i]->type == type &&
1485 bitmap_get(to_filter, pos) &&
1486 !bitmap_get(tips, pos))
1487 bitmap_unset(to_filter, pos);
1490 bitmap_free(tips);
1493 static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
1494 struct object_list *tip_objects,
1495 struct bitmap *to_filter)
1497 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter,
1498 OBJ_BLOB);
1501 static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
1502 uint32_t pos)
1504 unsigned long size;
1505 struct object_info oi = OBJECT_INFO_INIT;
1507 oi.sizep = &size;
1509 if (pos < bitmap_num_objects(bitmap_git)) {
1510 struct packed_git *pack;
1511 off_t ofs;
1513 if (bitmap_is_midx(bitmap_git)) {
1514 uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, pos);
1515 uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
1517 pack = bitmap_git->midx->packs[pack_id];
1518 ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
1519 } else {
1520 pack = bitmap_git->pack;
1521 ofs = pack_pos_to_offset(pack, pos);
1524 if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
1525 struct object_id oid;
1526 nth_bitmap_object_oid(bitmap_git, &oid,
1527 pack_pos_to_index(pack, pos));
1528 die(_("unable to get size of %s"), oid_to_hex(&oid));
1530 } else {
1531 struct eindex *eindex = &bitmap_git->ext_index;
1532 struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
1533 if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
1534 die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
1537 return size;
1540 static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
1541 struct object_list *tip_objects,
1542 struct bitmap *to_filter,
1543 unsigned long limit)
1545 struct eindex *eindex = &bitmap_git->ext_index;
1546 struct bitmap *tips;
1547 struct ewah_iterator it;
1548 eword_t mask;
1549 uint32_t i;
1551 tips = find_tip_objects(bitmap_git, tip_objects, OBJ_BLOB);
1553 for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB);
1554 i < to_filter->word_alloc && ewah_iterator_next(&mask, &it);
1555 i++) {
1556 eword_t word = to_filter->words[i] & mask;
1557 unsigned offset;
1559 for (offset = 0; offset < BITS_IN_EWORD; offset++) {
1560 uint32_t pos;
1562 if ((word >> offset) == 0)
1563 break;
1564 offset += ewah_bit_ctz64(word >> offset);
1565 pos = i * BITS_IN_EWORD + offset;
1567 if (!bitmap_get(tips, pos) &&
1568 get_size_by_pos(bitmap_git, pos) >= limit)
1569 bitmap_unset(to_filter, pos);
1573 for (i = 0; i < eindex->count; i++) {
1574 size_t pos = st_add(i, bitmap_num_objects(bitmap_git));
1575 if (eindex->objects[i]->type == OBJ_BLOB &&
1576 bitmap_get(to_filter, pos) &&
1577 !bitmap_get(tips, pos) &&
1578 get_size_by_pos(bitmap_git, pos) >= limit)
1579 bitmap_unset(to_filter, pos);
1582 bitmap_free(tips);
1585 static void filter_bitmap_tree_depth(struct bitmap_index *bitmap_git,
1586 struct object_list *tip_objects,
1587 struct bitmap *to_filter,
1588 unsigned long limit)
1590 if (limit)
1591 BUG("filter_bitmap_tree_depth given non-zero limit");
1593 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter,
1594 OBJ_TREE);
1595 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter,
1596 OBJ_BLOB);
1599 static void filter_bitmap_object_type(struct bitmap_index *bitmap_git,
1600 struct object_list *tip_objects,
1601 struct bitmap *to_filter,
1602 enum object_type object_type)
1604 if (object_type < OBJ_COMMIT || object_type > OBJ_TAG)
1605 BUG("filter_bitmap_object_type given invalid object");
1607 if (object_type != OBJ_TAG)
1608 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_TAG);
1609 if (object_type != OBJ_COMMIT)
1610 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_COMMIT);
1611 if (object_type != OBJ_TREE)
1612 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_TREE);
1613 if (object_type != OBJ_BLOB)
1614 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_BLOB);
1617 static int filter_bitmap(struct bitmap_index *bitmap_git,
1618 struct object_list *tip_objects,
1619 struct bitmap *to_filter,
1620 struct list_objects_filter_options *filter)
1622 if (!filter || filter->choice == LOFC_DISABLED)
1623 return 0;
1625 if (filter->choice == LOFC_BLOB_NONE) {
1626 if (bitmap_git)
1627 filter_bitmap_blob_none(bitmap_git, tip_objects,
1628 to_filter);
1629 return 0;
1632 if (filter->choice == LOFC_BLOB_LIMIT) {
1633 if (bitmap_git)
1634 filter_bitmap_blob_limit(bitmap_git, tip_objects,
1635 to_filter,
1636 filter->blob_limit_value);
1637 return 0;
1640 if (filter->choice == LOFC_TREE_DEPTH &&
1641 filter->tree_exclude_depth == 0) {
1642 if (bitmap_git)
1643 filter_bitmap_tree_depth(bitmap_git, tip_objects,
1644 to_filter,
1645 filter->tree_exclude_depth);
1646 return 0;
1649 if (filter->choice == LOFC_OBJECT_TYPE) {
1650 if (bitmap_git)
1651 filter_bitmap_object_type(bitmap_git, tip_objects,
1652 to_filter,
1653 filter->object_type);
1654 return 0;
1657 if (filter->choice == LOFC_COMBINE) {
1658 int i;
1659 for (i = 0; i < filter->sub_nr; i++) {
1660 if (filter_bitmap(bitmap_git, tip_objects, to_filter,
1661 &filter->sub[i]) < 0)
1662 return -1;
1664 return 0;
1667 /* filter choice not handled */
1668 return -1;
1671 static int can_filter_bitmap(struct list_objects_filter_options *filter)
1673 return !filter_bitmap(NULL, NULL, NULL, filter);
1677 static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
1678 struct bitmap *result)
1680 struct eindex *eindex = &bitmap_git->ext_index;
1681 uint32_t objects_nr;
1682 size_t i, pos;
1684 objects_nr = bitmap_num_objects(bitmap_git);
1685 pos = objects_nr / BITS_IN_EWORD;
1687 if (pos > result->word_alloc)
1688 pos = result->word_alloc;
1690 memset(result->words, 0x00, sizeof(eword_t) * pos);
1691 for (i = pos * BITS_IN_EWORD; i < objects_nr; i++)
1692 bitmap_unset(result, i);
1694 for (i = 0; i < eindex->count; ++i) {
1695 if (has_object_pack(&eindex->objects[i]->oid))
1696 bitmap_unset(result, objects_nr + i);
1700 struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
1701 int filter_provided_objects)
1703 unsigned int i;
1704 int use_boundary_traversal;
1706 struct object_list *wants = NULL;
1707 struct object_list *haves = NULL;
1709 struct bitmap *wants_bitmap = NULL;
1710 struct bitmap *haves_bitmap = NULL;
1712 struct bitmap_index *bitmap_git;
1715 * We can't do pathspec limiting with bitmaps, because we don't know
1716 * which commits are associated with which object changes (let alone
1717 * even which objects are associated with which paths).
1719 if (revs->prune)
1720 return NULL;
1722 if (!can_filter_bitmap(&revs->filter))
1723 return NULL;
1725 /* try to open a bitmapped pack, but don't parse it yet
1726 * because we may not need to use it */
1727 CALLOC_ARRAY(bitmap_git, 1);
1728 if (open_bitmap(revs->repo, bitmap_git) < 0)
1729 goto cleanup;
1731 for (i = 0; i < revs->pending.nr; ++i) {
1732 struct object *object = revs->pending.objects[i].item;
1734 if (object->type == OBJ_NONE)
1735 parse_object_or_die(&object->oid, NULL);
1737 while (object->type == OBJ_TAG) {
1738 struct tag *tag = (struct tag *) object;
1740 if (object->flags & UNINTERESTING)
1741 object_list_insert(object, &haves);
1742 else
1743 object_list_insert(object, &wants);
1745 object = parse_object_or_die(get_tagged_oid(tag), NULL);
1746 object->flags |= (tag->object.flags & UNINTERESTING);
1749 if (object->flags & UNINTERESTING)
1750 object_list_insert(object, &haves);
1751 else
1752 object_list_insert(object, &wants);
1755 use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1);
1756 if (use_boundary_traversal < 0) {
1757 prepare_repo_settings(revs->repo);
1758 use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal;
1761 if (!use_boundary_traversal) {
1763 * if we have a HAVES list, but none of those haves is contained
1764 * in the packfile that has a bitmap, we don't have anything to
1765 * optimize here
1767 if (haves && !in_bitmapped_pack(bitmap_git, haves))
1768 goto cleanup;
1771 /* if we don't want anything, we're done here */
1772 if (!wants)
1773 goto cleanup;
1776 * now we're going to use bitmaps, so load the actual bitmap entries
1777 * from disk. this is the point of no return; after this the rev_list
1778 * becomes invalidated and we must perform the revwalk through bitmaps
1780 if (load_bitmap(revs->repo, bitmap_git) < 0)
1781 goto cleanup;
1783 if (!use_boundary_traversal)
1784 object_array_clear(&revs->pending);
1786 if (haves) {
1787 if (use_boundary_traversal) {
1788 trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
1789 haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
1790 trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
1791 } else {
1792 trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
1793 revs->ignore_missing_links = 1;
1794 haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
1795 reset_revision_walk();
1796 revs->ignore_missing_links = 0;
1797 trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
1800 if (!haves_bitmap)
1801 BUG("failed to perform bitmap walk");
1804 if (use_boundary_traversal) {
1805 object_array_clear(&revs->pending);
1806 reset_revision_walk();
1809 wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap);
1811 if (!wants_bitmap)
1812 BUG("failed to perform bitmap walk");
1814 if (haves_bitmap)
1815 bitmap_and_not(wants_bitmap, haves_bitmap);
1817 filter_bitmap(bitmap_git,
1818 (revs->filter.choice && filter_provided_objects) ? NULL : wants,
1819 wants_bitmap,
1820 &revs->filter);
1822 if (revs->unpacked)
1823 filter_packed_objects_from_bitmap(bitmap_git, wants_bitmap);
1825 bitmap_git->result = wants_bitmap;
1826 bitmap_git->haves = haves_bitmap;
1828 object_list_free(&wants);
1829 object_list_free(&haves);
1831 return bitmap_git;
1833 cleanup:
1834 free_bitmap_index(bitmap_git);
1835 object_list_free(&wants);
1836 object_list_free(&haves);
1837 return NULL;
1841 * -1 means "stop trying further objects"; 0 means we may or may not have
1842 * reused, but you can keep feeding bits.
1844 static int try_partial_reuse(struct bitmap_index *bitmap_git,
1845 struct bitmapped_pack *pack,
1846 size_t bitmap_pos,
1847 uint32_t pack_pos,
1848 struct bitmap *reuse,
1849 struct pack_window **w_curs)
1851 off_t offset, delta_obj_offset;
1852 enum object_type type;
1853 unsigned long size;
1855 if (pack_pos >= pack->p->num_objects)
1856 return -1; /* not actually in the pack */
1858 offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos);
1859 type = unpack_object_header(pack->p, w_curs, &offset, &size);
1860 if (type < 0)
1861 return -1; /* broken packfile, punt */
1863 if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
1864 off_t base_offset;
1865 uint32_t base_pos;
1866 uint32_t base_bitmap_pos;
1869 * Find the position of the base object so we can look it up
1870 * in our bitmaps. If we can't come up with an offset, or if
1871 * that offset is not in the revidx, the pack is corrupt.
1872 * There's nothing we can do, so just punt on this object,
1873 * and the normal slow path will complain about it in
1874 * more detail.
1876 base_offset = get_delta_base(pack->p, w_curs, &offset, type,
1877 delta_obj_offset);
1878 if (!base_offset)
1879 return 0;
1881 offset_to_pack_pos(pack->p, base_offset, &base_pos);
1883 if (bitmap_is_midx(bitmap_git)) {
1885 * Cross-pack deltas are rejected for now, but could
1886 * theoretically be supported in the future.
1888 * We would need to ensure that we're sending both
1889 * halves of the delta/base pair, regardless of whether
1890 * or not the two cross a pack boundary. If they do,
1891 * then we must convert the delta to an REF_DELTA to
1892 * refer back to the base in the other pack.
1893 * */
1894 if (midx_pair_to_pack_pos(bitmap_git->midx,
1895 pack->pack_int_id,
1896 base_offset,
1897 &base_bitmap_pos) < 0) {
1898 return 0;
1900 } else {
1901 if (offset_to_pack_pos(pack->p, base_offset,
1902 &base_pos) < 0)
1903 return 0;
1905 * We assume delta dependencies always point backwards.
1906 * This lets us do a single pass, and is basically
1907 * always true due to the way OFS_DELTAs work. You would
1908 * not typically find REF_DELTA in a bitmapped pack,
1909 * since we only bitmap packs we write fresh, and
1910 * OFS_DELTA is the default). But let's double check to
1911 * make sure the pack wasn't written with odd
1912 * parameters.
1914 if (base_pos >= pack_pos)
1915 return 0;
1916 base_bitmap_pos = pack->bitmap_pos + base_pos;
1920 * And finally, if we're not sending the base as part of our
1921 * reuse chunk, then don't send this object either. The base
1922 * would come after us, along with other objects not
1923 * necessarily in the pack, which means we'd need to convert
1924 * to REF_DELTA on the fly. Better to just let the normal
1925 * object_entry code path handle it.
1927 if (!bitmap_get(reuse, base_bitmap_pos))
1928 return 0;
1932 * If we got here, then the object is OK to reuse. Mark it.
1934 bitmap_set(reuse, bitmap_pos);
1935 return 0;
1938 static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git,
1939 struct bitmapped_pack *pack,
1940 struct bitmap *reuse)
1942 struct bitmap *result = bitmap_git->result;
1943 struct pack_window *w_curs = NULL;
1944 size_t pos = pack->bitmap_pos / BITS_IN_EWORD;
1946 if (!pack->bitmap_pos) {
1948 * If we're processing the first (in the case of a MIDX, the
1949 * preferred pack) or the only (in the case of single-pack
1950 * bitmaps) pack, then we can reuse whole words at a time.
1952 * This is because we know that any deltas in this range *must*
1953 * have their bases chosen from the same pack, since:
1955 * - In the single pack case, there is no other pack to choose
1956 * them from.
1958 * - In the MIDX case, the first pack is the preferred pack, so
1959 * all ties are broken in favor of that pack (i.e. the one
1960 * we're currently processing). So any duplicate bases will be
1961 * resolved in favor of the pack we're processing.
1963 while (pos < result->word_alloc &&
1964 pos < pack->bitmap_nr / BITS_IN_EWORD &&
1965 result->words[pos] == (eword_t)~0)
1966 pos++;
1967 memset(reuse->words, 0xFF, pos * sizeof(eword_t));
1970 for (; pos < result->word_alloc; pos++) {
1971 eword_t word = result->words[pos];
1972 size_t offset;
1974 for (offset = 0; offset < BITS_IN_EWORD; offset++) {
1975 size_t bit_pos;
1976 uint32_t pack_pos;
1978 if (word >> offset == 0)
1979 break;
1981 offset += ewah_bit_ctz64(word >> offset);
1983 bit_pos = pos * BITS_IN_EWORD + offset;
1984 if (bit_pos < pack->bitmap_pos)
1985 continue;
1986 if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr)
1987 goto done;
1989 if (bitmap_is_midx(bitmap_git)) {
1990 uint32_t midx_pos;
1991 off_t ofs;
1993 midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
1994 ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
1996 if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0)
1997 BUG("could not find object in pack %s "
1998 "at offset %"PRIuMAX" in MIDX",
1999 pack_basename(pack->p), (uintmax_t)ofs);
2000 } else {
2001 pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos));
2002 if (pack_pos >= pack->p->num_objects)
2003 BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")",
2004 pack_basename(pack->p), (uintmax_t)pack_pos,
2005 pack->p->num_objects);
2008 if (try_partial_reuse(bitmap_git, pack, bit_pos,
2009 pack_pos, reuse, &w_curs) < 0) {
2011 * try_partial_reuse indicated we couldn't reuse
2012 * any bits, so there is no point in trying more
2013 * bits in the current word, or any other words
2014 * in result.
2016 * Jump out of both loops to avoid future
2017 * unnecessary calls to try_partial_reuse.
2019 goto done;
2024 done:
2025 unuse_pack(&w_curs);
2028 static int bitmapped_pack_cmp(const void *va, const void *vb)
2030 const struct bitmapped_pack *a = va;
2031 const struct bitmapped_pack *b = vb;
2033 if (a->bitmap_pos < b->bitmap_pos)
2034 return -1;
2035 if (a->bitmap_pos > b->bitmap_pos)
2036 return 1;
2037 return 0;
2040 void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
2041 struct bitmapped_pack **packs_out,
2042 size_t *packs_nr_out,
2043 struct bitmap **reuse_out,
2044 int multi_pack_reuse)
2046 struct repository *r = the_repository;
2047 struct bitmapped_pack *packs = NULL;
2048 struct bitmap *result = bitmap_git->result;
2049 struct bitmap *reuse;
2050 size_t i;
2051 size_t packs_nr = 0, packs_alloc = 0;
2052 size_t word_alloc;
2053 uint32_t objects_nr = 0;
2055 assert(result);
2057 load_reverse_index(r, bitmap_git);
2059 if (bitmap_is_midx(bitmap_git)) {
2060 for (i = 0; i < bitmap_git->midx->num_packs; i++) {
2061 struct bitmapped_pack pack;
2062 if (nth_bitmapped_pack(r, bitmap_git->midx, &pack, i) < 0) {
2063 warning(_("unable to load pack: '%s', disabling pack-reuse"),
2064 bitmap_git->midx->pack_names[i]);
2065 free(packs);
2066 return;
2069 if (!pack.bitmap_nr)
2070 continue;
2072 if (!multi_pack_reuse && pack.bitmap_pos) {
2074 * If we're only reusing a single pack, skip
2075 * over any packs which are not positioned at
2076 * the beginning of the MIDX bitmap.
2078 * This is consistent with the existing
2079 * single-pack reuse behavior, which only reuses
2080 * parts of the MIDX's preferred pack.
2082 continue;
2085 ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
2086 memcpy(&packs[packs_nr++], &pack, sizeof(pack));
2088 objects_nr += pack.p->num_objects;
2090 if (!multi_pack_reuse)
2091 break;
2094 QSORT(packs, packs_nr, bitmapped_pack_cmp);
2095 } else {
2096 ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
2098 packs[packs_nr].p = bitmap_git->pack;
2099 packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects;
2100 packs[packs_nr].bitmap_pos = 0;
2102 objects_nr = packs[packs_nr++].bitmap_nr;
2105 word_alloc = objects_nr / BITS_IN_EWORD;
2106 if (objects_nr % BITS_IN_EWORD)
2107 word_alloc++;
2108 reuse = bitmap_word_alloc(word_alloc);
2110 for (i = 0; i < packs_nr; i++)
2111 reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse);
2113 if (bitmap_is_empty(reuse)) {
2114 free(packs);
2115 bitmap_free(reuse);
2116 return;
2120 * Drop any reused objects from the result, since they will not
2121 * need to be handled separately.
2123 bitmap_and_not(result, reuse);
2124 *packs_out = packs;
2125 *packs_nr_out = packs_nr;
2126 *reuse_out = reuse;
2129 int bitmap_walk_contains(struct bitmap_index *bitmap_git,
2130 struct bitmap *bitmap, const struct object_id *oid)
2132 int idx;
2134 if (!bitmap)
2135 return 0;
2137 idx = bitmap_position(bitmap_git, oid);
2138 return idx >= 0 && bitmap_get(bitmap, idx);
2141 void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git,
2142 struct rev_info *revs,
2143 show_reachable_fn show_reachable)
2145 assert(bitmap_git->result);
2147 show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable);
2148 if (revs->tree_objects)
2149 show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable);
2150 if (revs->blob_objects)
2151 show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable);
2152 if (revs->tag_objects)
2153 show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable);
2155 show_extended_objects(bitmap_git, revs, show_reachable);
2158 static uint32_t count_object_type(struct bitmap_index *bitmap_git,
2159 enum object_type type)
2161 struct bitmap *objects = bitmap_git->result;
2162 struct eindex *eindex = &bitmap_git->ext_index;
2164 uint32_t i = 0, count = 0;
2165 struct ewah_iterator it;
2166 eword_t filter;
2168 init_type_iterator(&it, bitmap_git, type);
2170 while (i < objects->word_alloc && ewah_iterator_next(&filter, &it)) {
2171 eword_t word = objects->words[i++] & filter;
2172 count += ewah_bit_popcount64(word);
2175 for (i = 0; i < eindex->count; ++i) {
2176 if (eindex->objects[i]->type == type &&
2177 bitmap_get(objects,
2178 st_add(bitmap_num_objects(bitmap_git), i)))
2179 count++;
2182 return count;
2185 void count_bitmap_commit_list(struct bitmap_index *bitmap_git,
2186 uint32_t *commits, uint32_t *trees,
2187 uint32_t *blobs, uint32_t *tags)
2189 assert(bitmap_git->result);
2191 if (commits)
2192 *commits = count_object_type(bitmap_git, OBJ_COMMIT);
2194 if (trees)
2195 *trees = count_object_type(bitmap_git, OBJ_TREE);
2197 if (blobs)
2198 *blobs = count_object_type(bitmap_git, OBJ_BLOB);
2200 if (tags)
2201 *tags = count_object_type(bitmap_git, OBJ_TAG);
2204 struct bitmap_test_data {
2205 struct bitmap_index *bitmap_git;
2206 struct bitmap *base;
2207 struct bitmap *commits;
2208 struct bitmap *trees;
2209 struct bitmap *blobs;
2210 struct bitmap *tags;
2211 struct progress *prg;
2212 size_t seen;
2215 static void test_bitmap_type(struct bitmap_test_data *tdata,
2216 struct object *obj, int pos)
2218 enum object_type bitmap_type = OBJ_NONE;
2219 int bitmaps_nr = 0;
2221 if (bitmap_get(tdata->commits, pos)) {
2222 bitmap_type = OBJ_COMMIT;
2223 bitmaps_nr++;
2225 if (bitmap_get(tdata->trees, pos)) {
2226 bitmap_type = OBJ_TREE;
2227 bitmaps_nr++;
2229 if (bitmap_get(tdata->blobs, pos)) {
2230 bitmap_type = OBJ_BLOB;
2231 bitmaps_nr++;
2233 if (bitmap_get(tdata->tags, pos)) {
2234 bitmap_type = OBJ_TAG;
2235 bitmaps_nr++;
2238 if (bitmap_type == OBJ_NONE)
2239 die(_("object '%s' not found in type bitmaps"),
2240 oid_to_hex(&obj->oid));
2242 if (bitmaps_nr > 1)
2243 die(_("object '%s' does not have a unique type"),
2244 oid_to_hex(&obj->oid));
2246 if (bitmap_type != obj->type)
2247 die(_("object '%s': real type '%s', expected: '%s'"),
2248 oid_to_hex(&obj->oid),
2249 type_name(obj->type),
2250 type_name(bitmap_type));
2253 static void test_show_object(struct object *object,
2254 const char *name UNUSED,
2255 void *data)
2257 struct bitmap_test_data *tdata = data;
2258 int bitmap_pos;
2260 bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
2261 if (bitmap_pos < 0)
2262 die(_("object not in bitmap: '%s'"), oid_to_hex(&object->oid));
2263 test_bitmap_type(tdata, object, bitmap_pos);
2265 bitmap_set(tdata->base, bitmap_pos);
2266 display_progress(tdata->prg, ++tdata->seen);
2269 static void test_show_commit(struct commit *commit, void *data)
2271 struct bitmap_test_data *tdata = data;
2272 int bitmap_pos;
2274 bitmap_pos = bitmap_position(tdata->bitmap_git,
2275 &commit->object.oid);
2276 if (bitmap_pos < 0)
2277 die(_("object not in bitmap: '%s'"), oid_to_hex(&commit->object.oid));
2278 test_bitmap_type(tdata, &commit->object, bitmap_pos);
2280 bitmap_set(tdata->base, bitmap_pos);
2281 display_progress(tdata->prg, ++tdata->seen);
2284 void test_bitmap_walk(struct rev_info *revs)
2286 struct object *root;
2287 struct bitmap *result = NULL;
2288 size_t result_popcnt;
2289 struct bitmap_test_data tdata;
2290 struct bitmap_index *bitmap_git;
2291 struct ewah_bitmap *bm;
2293 if (!(bitmap_git = prepare_bitmap_git(revs->repo)))
2294 die(_("failed to load bitmap indexes"));
2296 if (revs->pending.nr != 1)
2297 die(_("you must specify exactly one commit to test"));
2299 fprintf_ln(stderr, "Bitmap v%d test (%d entries%s)",
2300 bitmap_git->version,
2301 bitmap_git->entry_count,
2302 bitmap_git->table_lookup ? "" : " loaded");
2304 root = revs->pending.objects[0].item;
2305 bm = bitmap_for_commit(bitmap_git, (struct commit *)root);
2307 if (bm) {
2308 fprintf_ln(stderr, "Found bitmap for '%s'. %d bits / %08x checksum",
2309 oid_to_hex(&root->oid), (int)bm->bit_size, ewah_checksum(bm));
2311 result = ewah_to_bitmap(bm);
2314 if (!result)
2315 die(_("commit '%s' doesn't have an indexed bitmap"), oid_to_hex(&root->oid));
2317 revs->tag_objects = 1;
2318 revs->tree_objects = 1;
2319 revs->blob_objects = 1;
2321 result_popcnt = bitmap_popcount(result);
2323 if (prepare_revision_walk(revs))
2324 die(_("revision walk setup failed"));
2326 tdata.bitmap_git = bitmap_git;
2327 tdata.base = bitmap_new();
2328 tdata.commits = ewah_to_bitmap(bitmap_git->commits);
2329 tdata.trees = ewah_to_bitmap(bitmap_git->trees);
2330 tdata.blobs = ewah_to_bitmap(bitmap_git->blobs);
2331 tdata.tags = ewah_to_bitmap(bitmap_git->tags);
2332 tdata.prg = start_progress("Verifying bitmap entries", result_popcnt);
2333 tdata.seen = 0;
2335 traverse_commit_list(revs, &test_show_commit, &test_show_object, &tdata);
2337 stop_progress(&tdata.prg);
2339 if (bitmap_equals(result, tdata.base))
2340 fprintf_ln(stderr, "OK!");
2341 else
2342 die(_("mismatch in bitmap results"));
2344 bitmap_free(result);
2345 bitmap_free(tdata.base);
2346 bitmap_free(tdata.commits);
2347 bitmap_free(tdata.trees);
2348 bitmap_free(tdata.blobs);
2349 bitmap_free(tdata.tags);
2350 free_bitmap_index(bitmap_git);
2353 int test_bitmap_commits(struct repository *r)
2355 struct object_id oid;
2356 MAYBE_UNUSED void *value;
2357 struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
2359 if (!bitmap_git)
2360 die(_("failed to load bitmap indexes"));
2363 * As this function is only used to print bitmap selected
2364 * commits, we don't have to read the commit table.
2366 if (bitmap_git->table_lookup) {
2367 if (load_bitmap_entries_v1(bitmap_git) < 0)
2368 die(_("failed to load bitmap indexes"));
2371 kh_foreach(bitmap_git->bitmaps, oid, value, {
2372 printf_ln("%s", oid_to_hex(&oid));
2375 free_bitmap_index(bitmap_git);
2377 return 0;
2380 int test_bitmap_hashes(struct repository *r)
2382 struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
2383 struct object_id oid;
2384 uint32_t i, index_pos;
2386 if (!bitmap_git || !bitmap_git->hashes)
2387 goto cleanup;
2389 for (i = 0; i < bitmap_num_objects(bitmap_git); i++) {
2390 if (bitmap_is_midx(bitmap_git))
2391 index_pos = pack_pos_to_midx(bitmap_git->midx, i);
2392 else
2393 index_pos = pack_pos_to_index(bitmap_git->pack, i);
2395 nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
2397 printf_ln("%s %"PRIu32"",
2398 oid_to_hex(&oid), get_be32(bitmap_git->hashes + index_pos));
2401 cleanup:
2402 free_bitmap_index(bitmap_git);
2404 return 0;
2407 int rebuild_bitmap(const uint32_t *reposition,
2408 struct ewah_bitmap *source,
2409 struct bitmap *dest)
2411 uint32_t pos = 0;
2412 struct ewah_iterator it;
2413 eword_t word;
2415 ewah_iterator_init(&it, source);
2417 while (ewah_iterator_next(&word, &it)) {
2418 uint32_t offset, bit_pos;
2420 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
2421 if ((word >> offset) == 0)
2422 break;
2424 offset += ewah_bit_ctz64(word >> offset);
2426 bit_pos = reposition[pos + offset];
2427 if (bit_pos > 0)
2428 bitmap_set(dest, bit_pos - 1);
2429 else /* can't reuse, we don't have the object */
2430 return -1;
2433 pos += BITS_IN_EWORD;
2435 return 0;
2438 uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
2439 struct packing_data *mapping)
2441 struct repository *r = the_repository;
2442 uint32_t i, num_objects;
2443 uint32_t *reposition;
2445 if (!bitmap_is_midx(bitmap_git))
2446 load_reverse_index(r, bitmap_git);
2447 else if (load_midx_revindex(bitmap_git->midx))
2448 BUG("rebuild_existing_bitmaps: missing required rev-cache "
2449 "extension");
2451 num_objects = bitmap_num_objects(bitmap_git);
2452 CALLOC_ARRAY(reposition, num_objects);
2454 for (i = 0; i < num_objects; ++i) {
2455 struct object_id oid;
2456 struct object_entry *oe;
2457 uint32_t index_pos;
2459 if (bitmap_is_midx(bitmap_git))
2460 index_pos = pack_pos_to_midx(bitmap_git->midx, i);
2461 else
2462 index_pos = pack_pos_to_index(bitmap_git->pack, i);
2463 nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
2464 oe = packlist_find(mapping, &oid);
2466 if (oe) {
2467 reposition[i] = oe_in_pack_pos(mapping, oe) + 1;
2468 if (bitmap_git->hashes && !oe->hash)
2469 oe->hash = get_be32(bitmap_git->hashes + index_pos);
2473 return reposition;
2476 void free_bitmap_index(struct bitmap_index *b)
2478 if (!b)
2479 return;
2481 if (b->map)
2482 munmap(b->map, b->map_size);
2483 ewah_pool_free(b->commits);
2484 ewah_pool_free(b->trees);
2485 ewah_pool_free(b->blobs);
2486 ewah_pool_free(b->tags);
2487 if (b->bitmaps) {
2488 struct stored_bitmap *sb;
2489 kh_foreach_value(b->bitmaps, sb, {
2490 ewah_pool_free(sb->root);
2491 free(sb);
2494 kh_destroy_oid_map(b->bitmaps);
2495 free(b->ext_index.objects);
2496 free(b->ext_index.hashes);
2497 kh_destroy_oid_pos(b->ext_index.positions);
2498 bitmap_free(b->result);
2499 bitmap_free(b->haves);
2500 if (bitmap_is_midx(b)) {
2502 * Multi-pack bitmaps need to have resources associated with
2503 * their on-disk reverse indexes unmapped so that stale .rev and
2504 * .bitmap files can be removed.
2506 * Unlike pack-based bitmaps, multi-pack bitmaps can be read and
2507 * written in the same 'git multi-pack-index write --bitmap'
2508 * process. Close resources so they can be removed safely on
2509 * platforms like Windows.
2511 close_midx_revindex(b->midx);
2513 free(b);
2516 int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git,
2517 const struct object_id *oid)
2519 return bitmap_git &&
2520 bitmap_walk_contains(bitmap_git, bitmap_git->haves, oid);
2523 static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
2524 enum object_type object_type)
2526 struct bitmap *result = bitmap_git->result;
2527 off_t total = 0;
2528 struct ewah_iterator it;
2529 eword_t filter;
2530 size_t i;
2532 init_type_iterator(&it, bitmap_git, object_type);
2533 for (i = 0; i < result->word_alloc &&
2534 ewah_iterator_next(&filter, &it); i++) {
2535 eword_t word = result->words[i] & filter;
2536 size_t base = (i * BITS_IN_EWORD);
2537 unsigned offset;
2539 if (!word)
2540 continue;
2542 for (offset = 0; offset < BITS_IN_EWORD; offset++) {
2543 if ((word >> offset) == 0)
2544 break;
2546 offset += ewah_bit_ctz64(word >> offset);
2548 if (bitmap_is_midx(bitmap_git)) {
2549 uint32_t pack_pos;
2550 uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, base + offset);
2551 off_t offset = nth_midxed_offset(bitmap_git->midx, midx_pos);
2553 uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
2554 struct packed_git *pack = bitmap_git->midx->packs[pack_id];
2556 if (offset_to_pack_pos(pack, offset, &pack_pos) < 0) {
2557 struct object_id oid;
2558 nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos);
2560 die(_("could not find '%s' in pack '%s' at offset %"PRIuMAX),
2561 oid_to_hex(&oid),
2562 pack->pack_name,
2563 (uintmax_t)offset);
2566 total += pack_pos_to_offset(pack, pack_pos + 1) - offset;
2567 } else {
2568 size_t pos = base + offset;
2569 total += pack_pos_to_offset(bitmap_git->pack, pos + 1) -
2570 pack_pos_to_offset(bitmap_git->pack, pos);
2575 return total;
2578 static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
2580 struct bitmap *result = bitmap_git->result;
2581 struct eindex *eindex = &bitmap_git->ext_index;
2582 off_t total = 0;
2583 struct object_info oi = OBJECT_INFO_INIT;
2584 off_t object_size;
2585 size_t i;
2587 oi.disk_sizep = &object_size;
2589 for (i = 0; i < eindex->count; i++) {
2590 struct object *obj = eindex->objects[i];
2592 if (!bitmap_get(result,
2593 st_add(bitmap_num_objects(bitmap_git), i)))
2594 continue;
2596 if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
2597 die(_("unable to get disk usage of '%s'"),
2598 oid_to_hex(&obj->oid));
2600 total += object_size;
2602 return total;
2605 off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git,
2606 struct rev_info *revs)
2608 off_t total = 0;
2610 total += get_disk_usage_for_type(bitmap_git, OBJ_COMMIT);
2611 if (revs->tree_objects)
2612 total += get_disk_usage_for_type(bitmap_git, OBJ_TREE);
2613 if (revs->blob_objects)
2614 total += get_disk_usage_for_type(bitmap_git, OBJ_BLOB);
2615 if (revs->tag_objects)
2616 total += get_disk_usage_for_type(bitmap_git, OBJ_TAG);
2618 total += get_disk_usage_for_extended(bitmap_git);
2620 return total;
2623 int bitmap_is_midx(struct bitmap_index *bitmap_git)
2625 return !!bitmap_git->midx;
2628 const struct string_list *bitmap_preferred_tips(struct repository *r)
2630 const struct string_list *dest;
2632 if (!repo_config_get_string_multi(r, "pack.preferbitmaptips", &dest))
2633 return dest;
2634 return NULL;
2637 int bitmap_is_preferred_refname(struct repository *r, const char *refname)
2639 const struct string_list *preferred_tips = bitmap_preferred_tips(r);
2640 struct string_list_item *item;
2642 if (!preferred_tips)
2643 return 0;
2645 for_each_string_list_item(item, preferred_tips) {
2646 if (starts_with(refname, item->string))
2647 return 1;
2650 return 0;
2653 static int verify_bitmap_file(const char *name)
2655 struct stat st;
2656 unsigned char *data;
2657 int fd = git_open(name);
2658 int res = 0;
2660 /* It is OK to not have the file. */
2661 if (fd < 0 || fstat(fd, &st)) {
2662 if (fd >= 0)
2663 close(fd);
2664 return 0;
2667 data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
2668 close(fd);
2669 if (!hashfile_checksum_valid(data, st.st_size))
2670 res = error(_("bitmap file '%s' has invalid checksum"),
2671 name);
2673 munmap(data, st.st_size);
2674 return res;
2677 int verify_bitmap_files(struct repository *r)
2679 int res = 0;
2681 for (struct multi_pack_index *m = get_multi_pack_index(r);
2682 m; m = m->next) {
2683 char *midx_bitmap_name = midx_bitmap_filename(m);
2684 res |= verify_bitmap_file(midx_bitmap_name);
2685 free(midx_bitmap_name);
2688 for (struct packed_git *p = get_all_packs(r);
2689 p; p = p->next) {
2690 char *pack_bitmap_name = pack_bitmap_filename(p);
2691 res |= verify_bitmap_file(pack_bitmap_name);
2692 free(pack_bitmap_name);
2695 return res;