1 #include "git-compat-util.h"
2 #include "environment.h"
5 #include "object-store-ll.h"
11 #include "pack-bitmap.h"
12 #include "hash-lookup.h"
13 #include "pack-objects.h"
15 #include "commit-reach.h"
16 #include "prio-queue.h"
19 #include "tree-walk.h"
21 struct bitmapped_commit
{
22 struct commit
*commit
;
23 struct ewah_bitmap
*bitmap
;
24 struct ewah_bitmap
*write_as
;
30 struct bitmap_writer
{
31 struct ewah_bitmap
*commits
;
32 struct ewah_bitmap
*trees
;
33 struct ewah_bitmap
*blobs
;
34 struct ewah_bitmap
*tags
;
36 kh_oid_map_t
*bitmaps
;
37 struct packing_data
*to_pack
;
39 struct bitmapped_commit
*selected
;
40 unsigned int selected_nr
, selected_alloc
;
42 struct progress
*progress
;
44 unsigned char pack_checksum
[GIT_MAX_RAWSZ
];
47 static struct bitmap_writer writer
;
49 void bitmap_writer_show_progress(int show
)
51 writer
.show_progress
= show
;
55 * Build the initial type index for the packfile or multi-pack-index
57 void bitmap_writer_build_type_index(struct packing_data
*to_pack
,
58 struct pack_idx_entry
**index
,
63 writer
.commits
= ewah_new();
64 writer
.trees
= ewah_new();
65 writer
.blobs
= ewah_new();
66 writer
.tags
= ewah_new();
67 ALLOC_ARRAY(to_pack
->in_pack_pos
, to_pack
->nr_objects
);
69 for (i
= 0; i
< index_nr
; ++i
) {
70 struct object_entry
*entry
= (struct object_entry
*)index
[i
];
71 enum object_type real_type
;
73 oe_set_in_pack_pos(to_pack
, entry
, i
);
75 switch (oe_type(entry
)) {
80 real_type
= oe_type(entry
);
84 real_type
= oid_object_info(to_pack
->repo
,
85 &entry
->idx
.oid
, NULL
);
91 ewah_set(writer
.commits
, i
);
95 ewah_set(writer
.trees
, i
);
99 ewah_set(writer
.blobs
, i
);
103 ewah_set(writer
.tags
, i
);
107 die("Missing type information for %s (%d/%d)",
108 oid_to_hex(&entry
->idx
.oid
), real_type
,
115 * Compute the actual bitmaps
118 static inline void push_bitmapped_commit(struct commit
*commit
)
120 if (writer
.selected_nr
>= writer
.selected_alloc
) {
121 writer
.selected_alloc
= (writer
.selected_alloc
+ 32) * 2;
122 REALLOC_ARRAY(writer
.selected
, writer
.selected_alloc
);
125 writer
.selected
[writer
.selected_nr
].commit
= commit
;
126 writer
.selected
[writer
.selected_nr
].bitmap
= NULL
;
127 writer
.selected
[writer
.selected_nr
].flags
= 0;
129 writer
.selected_nr
++;
132 static uint32_t find_object_pos(const struct object_id
*oid
, int *found
)
134 struct object_entry
*entry
= packlist_find(writer
.to_pack
, oid
);
139 warning("Failed to write bitmap index. Packfile doesn't have full closure "
140 "(object %s is missing)", oid_to_hex(oid
));
146 return oe_in_pack_pos(writer
.to_pack
, entry
);
149 static void compute_xor_offsets(void)
151 static const int MAX_XOR_OFFSET_SEARCH
= 10;
155 while (next
< writer
.selected_nr
) {
156 struct bitmapped_commit
*stored
= &writer
.selected
[next
];
159 struct ewah_bitmap
*best_bitmap
= stored
->bitmap
;
160 struct ewah_bitmap
*test_xor
;
162 for (i
= 1; i
<= MAX_XOR_OFFSET_SEARCH
; ++i
) {
168 test_xor
= ewah_pool_new();
169 ewah_xor(writer
.selected
[curr
].bitmap
, stored
->bitmap
, test_xor
);
171 if (test_xor
->buffer_size
< best_bitmap
->buffer_size
) {
172 if (best_bitmap
!= stored
->bitmap
)
173 ewah_pool_free(best_bitmap
);
175 best_bitmap
= test_xor
;
178 ewah_pool_free(test_xor
);
182 stored
->xor_offset
= best_offset
;
183 stored
->write_as
= best_bitmap
;
190 struct commit_list
*reverse_edges
;
191 struct bitmap
*commit_mask
;
192 struct bitmap
*bitmap
;
195 unsigned idx
; /* within selected array */
198 static void clear_bb_commit(struct bb_commit
*commit
)
200 free_commit_list(commit
->reverse_edges
);
201 bitmap_free(commit
->commit_mask
);
202 bitmap_free(commit
->bitmap
);
205 define_commit_slab(bb_data
, struct bb_commit
);
207 struct bitmap_builder
{
209 struct commit
**commits
;
210 size_t commits_nr
, commits_alloc
;
213 static void bitmap_builder_init(struct bitmap_builder
*bb
,
214 struct bitmap_writer
*writer
,
215 struct bitmap_index
*old_bitmap
)
217 struct rev_info revs
;
218 struct commit
*commit
;
219 struct commit_list
*reusable
= NULL
;
220 struct commit_list
*r
;
221 unsigned int i
, num_maximal
= 0;
223 memset(bb
, 0, sizeof(*bb
));
224 init_bb_data(&bb
->data
);
226 reset_revision_walk();
227 repo_init_revisions(writer
->to_pack
->repo
, &revs
, NULL
);
229 revs
.first_parent_only
= 1;
231 for (i
= 0; i
< writer
->selected_nr
; i
++) {
232 struct commit
*c
= writer
->selected
[i
].commit
;
233 struct bb_commit
*ent
= bb_data_at(&bb
->data
, c
);
239 ent
->commit_mask
= bitmap_new();
240 bitmap_set(ent
->commit_mask
, i
);
242 add_pending_object(&revs
, &c
->object
, "");
245 if (prepare_revision_walk(&revs
))
246 die("revision walk setup failed");
248 while ((commit
= get_revision(&revs
))) {
249 struct commit_list
*p
= commit
->parents
;
250 struct bb_commit
*c_ent
;
252 parse_commit_or_die(commit
);
254 c_ent
= bb_data_at(&bb
->data
, commit
);
257 * If there is no commit_mask, there is no reason to iterate
258 * over this commit; it is not selected (if it were, it would
259 * not have a blank commit mask) and all its children have
260 * existing bitmaps (see the comment starting with "This commit
261 * has an existing bitmap" below), so it does not contribute
262 * anything to the final bitmap file or its descendants.
264 if (!c_ent
->commit_mask
)
267 if (old_bitmap
&& bitmap_for_commit(old_bitmap
, commit
)) {
269 * This commit has an existing bitmap, so we can
270 * get its bits immediately without an object
271 * walk. That is, it is reusable as-is and there is no
272 * need to continue walking beyond it.
274 * Mark it as such and add it to bb->commits separately
275 * to avoid allocating a position in the commit mask.
277 commit_list_insert(commit
, &reusable
);
281 if (c_ent
->maximal
) {
283 ALLOC_GROW(bb
->commits
, bb
->commits_nr
+ 1, bb
->commits_alloc
);
284 bb
->commits
[bb
->commits_nr
++] = commit
;
288 struct bb_commit
*p_ent
= bb_data_at(&bb
->data
, p
->item
);
289 int c_not_p
, p_not_c
;
291 if (!p_ent
->commit_mask
) {
292 p_ent
->commit_mask
= bitmap_new();
296 c_not_p
= bitmap_is_subset(c_ent
->commit_mask
, p_ent
->commit_mask
);
297 p_not_c
= bitmap_is_subset(p_ent
->commit_mask
, c_ent
->commit_mask
);
303 bitmap_or(p_ent
->commit_mask
, c_ent
->commit_mask
);
309 free_commit_list(p_ent
->reverse_edges
);
310 p_ent
->reverse_edges
= NULL
;
313 if (c_ent
->maximal
) {
314 commit_list_insert(commit
, &p_ent
->reverse_edges
);
316 struct commit_list
*cc
= c_ent
->reverse_edges
;
318 for (; cc
; cc
= cc
->next
) {
319 if (!commit_list_contains(cc
->item
, p_ent
->reverse_edges
))
320 commit_list_insert(cc
->item
, &p_ent
->reverse_edges
);
326 bitmap_free(c_ent
->commit_mask
);
327 c_ent
->commit_mask
= NULL
;
330 for (r
= reusable
; r
; r
= r
->next
) {
331 ALLOC_GROW(bb
->commits
, bb
->commits_nr
+ 1, bb
->commits_alloc
);
332 bb
->commits
[bb
->commits_nr
++] = r
->item
;
335 trace2_data_intmax("pack-bitmap-write", the_repository
,
336 "num_selected_commits", writer
->selected_nr
);
337 trace2_data_intmax("pack-bitmap-write", the_repository
,
338 "num_maximal_commits", num_maximal
);
340 release_revisions(&revs
);
341 free_commit_list(reusable
);
344 static void bitmap_builder_clear(struct bitmap_builder
*bb
)
346 deep_clear_bb_data(&bb
->data
, clear_bb_commit
);
348 bb
->commits_nr
= bb
->commits_alloc
= 0;
351 static int fill_bitmap_tree(struct bitmap
*bitmap
,
356 struct tree_desc desc
;
357 struct name_entry entry
;
360 * If our bit is already set, then there is nothing to do. Both this
361 * tree and all of its children will be set.
363 pos
= find_object_pos(&tree
->object
.oid
, &found
);
366 if (bitmap_get(bitmap
, pos
))
368 bitmap_set(bitmap
, pos
);
370 if (parse_tree(tree
) < 0)
371 die("unable to load tree object %s",
372 oid_to_hex(&tree
->object
.oid
));
373 init_tree_desc(&desc
, &tree
->object
.oid
, tree
->buffer
, tree
->size
);
375 while (tree_entry(&desc
, &entry
)) {
376 switch (object_type(entry
.mode
)) {
378 if (fill_bitmap_tree(bitmap
,
379 lookup_tree(the_repository
, &entry
.oid
)) < 0)
383 pos
= find_object_pos(&entry
.oid
, &found
);
386 bitmap_set(bitmap
, pos
);
389 /* Gitlink, etc; not reachable */
394 free_tree_buffer(tree
);
398 static int reused_bitmaps_nr
;
400 static int fill_bitmap_commit(struct bb_commit
*ent
,
401 struct commit
*commit
,
402 struct prio_queue
*queue
,
403 struct prio_queue
*tree_queue
,
404 struct bitmap_index
*old_bitmap
,
405 const uint32_t *mapping
)
410 ent
->bitmap
= bitmap_new();
412 prio_queue_put(queue
, commit
);
415 struct commit_list
*p
;
416 struct commit
*c
= prio_queue_get(queue
);
418 if (old_bitmap
&& mapping
) {
419 struct ewah_bitmap
*old
= bitmap_for_commit(old_bitmap
, c
);
420 struct bitmap
*remapped
= bitmap_new();
422 * If this commit has an old bitmap, then translate that
423 * bitmap and add its bits to this one. No need to walk
424 * parents or the tree for this commit.
426 if (old
&& !rebuild_bitmap(mapping
, old
, remapped
)) {
427 bitmap_or(ent
->bitmap
, remapped
);
428 bitmap_free(remapped
);
432 bitmap_free(remapped
);
436 * Mark ourselves and queue our tree. The commit
437 * walk ensures we cover all parents.
439 pos
= find_object_pos(&c
->object
.oid
, &found
);
442 bitmap_set(ent
->bitmap
, pos
);
443 prio_queue_put(tree_queue
,
444 repo_get_commit_tree(the_repository
, c
));
446 for (p
= c
->parents
; p
; p
= p
->next
) {
447 pos
= find_object_pos(&p
->item
->object
.oid
, &found
);
450 if (!bitmap_get(ent
->bitmap
, pos
)) {
451 bitmap_set(ent
->bitmap
, pos
);
452 prio_queue_put(queue
, p
->item
);
457 while (tree_queue
->nr
) {
458 if (fill_bitmap_tree(ent
->bitmap
,
459 prio_queue_get(tree_queue
)) < 0)
465 static void store_selected(struct bb_commit
*ent
, struct commit
*commit
)
467 struct bitmapped_commit
*stored
= &writer
.selected
[ent
->idx
];
471 stored
->bitmap
= bitmap_to_ewah(ent
->bitmap
);
473 hash_pos
= kh_put_oid_map(writer
.bitmaps
, commit
->object
.oid
, &hash_ret
);
475 die("Duplicate entry when writing index: %s",
476 oid_to_hex(&commit
->object
.oid
));
477 kh_value(writer
.bitmaps
, hash_pos
) = stored
;
480 int bitmap_writer_build(struct packing_data
*to_pack
)
482 struct bitmap_builder bb
;
484 int nr_stored
= 0; /* for progress */
485 struct prio_queue queue
= { compare_commits_by_gen_then_commit_date
};
486 struct prio_queue tree_queue
= { NULL
};
487 struct bitmap_index
*old_bitmap
;
489 int closed
= 1; /* until proven otherwise */
491 writer
.bitmaps
= kh_init_oid_map();
492 writer
.to_pack
= to_pack
;
494 if (writer
.show_progress
)
495 writer
.progress
= start_progress("Building bitmaps", writer
.selected_nr
);
496 trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
499 old_bitmap
= prepare_bitmap_git(to_pack
->repo
);
501 mapping
= create_bitmap_mapping(old_bitmap
, to_pack
);
505 bitmap_builder_init(&bb
, &writer
, old_bitmap
);
506 for (i
= bb
.commits_nr
; i
> 0; i
--) {
507 struct commit
*commit
= bb
.commits
[i
-1];
508 struct bb_commit
*ent
= bb_data_at(&bb
.data
, commit
);
509 struct commit
*child
;
512 if (fill_bitmap_commit(ent
, commit
, &queue
, &tree_queue
,
513 old_bitmap
, mapping
) < 0) {
519 store_selected(ent
, commit
);
521 display_progress(writer
.progress
, nr_stored
);
524 while ((child
= pop_commit(&ent
->reverse_edges
))) {
525 struct bb_commit
*child_ent
=
526 bb_data_at(&bb
.data
, child
);
528 if (child_ent
->bitmap
)
529 bitmap_or(child_ent
->bitmap
, ent
->bitmap
);
531 child_ent
->bitmap
= bitmap_dup(ent
->bitmap
);
533 child_ent
->bitmap
= ent
->bitmap
;
538 bitmap_free(ent
->bitmap
);
541 clear_prio_queue(&queue
);
542 clear_prio_queue(&tree_queue
);
543 bitmap_builder_clear(&bb
);
544 free_bitmap_index(old_bitmap
);
547 trace2_region_leave("pack-bitmap-write", "building_bitmaps_total",
549 trace2_data_intmax("pack-bitmap-write", the_repository
,
550 "building_bitmaps_reused", reused_bitmaps_nr
);
552 stop_progress(&writer
.progress
);
555 compute_xor_offsets();
556 return closed
? 0 : -1;
560 * Select the commits that will be bitmapped
562 static inline unsigned int next_commit_index(unsigned int idx
)
564 static const unsigned int MIN_COMMITS
= 100;
565 static const unsigned int MAX_COMMITS
= 5000;
567 static const unsigned int MUST_REGION
= 100;
568 static const unsigned int MIN_REGION
= 20000;
570 unsigned int offset
, next
;
572 if (idx
<= MUST_REGION
)
575 if (idx
<= MIN_REGION
) {
576 offset
= idx
- MUST_REGION
;
577 return (offset
< MIN_COMMITS
) ? offset
: MIN_COMMITS
;
580 offset
= idx
- MIN_REGION
;
581 next
= (offset
< MAX_COMMITS
) ? offset
: MAX_COMMITS
;
583 return (next
> MIN_COMMITS
) ? next
: MIN_COMMITS
;
586 static int date_compare(const void *_a
, const void *_b
)
588 struct commit
*a
= *(struct commit
**)_a
;
589 struct commit
*b
= *(struct commit
**)_b
;
590 return (long)b
->date
- (long)a
->date
;
593 void bitmap_writer_select_commits(struct commit
**indexed_commits
,
594 unsigned int indexed_commits_nr
,
597 unsigned int i
= 0, j
, next
;
599 QSORT(indexed_commits
, indexed_commits_nr
, date_compare
);
601 if (indexed_commits_nr
< 100) {
602 for (i
= 0; i
< indexed_commits_nr
; ++i
)
603 push_bitmapped_commit(indexed_commits
[i
]);
607 if (writer
.show_progress
)
608 writer
.progress
= start_progress("Selecting bitmap commits", 0);
611 struct commit
*chosen
= NULL
;
613 next
= next_commit_index(i
);
615 if (i
+ next
>= indexed_commits_nr
)
618 if (max_bitmaps
> 0 && writer
.selected_nr
>= max_bitmaps
) {
619 writer
.selected_nr
= max_bitmaps
;
624 chosen
= indexed_commits
[i
];
626 chosen
= indexed_commits
[i
+ next
];
628 for (j
= 0; j
<= next
; ++j
) {
629 struct commit
*cm
= indexed_commits
[i
+ j
];
631 if ((cm
->object
.flags
& NEEDS_BITMAP
) != 0) {
636 if (cm
->parents
&& cm
->parents
->next
)
641 push_bitmapped_commit(chosen
);
644 display_progress(writer
.progress
, i
);
647 stop_progress(&writer
.progress
);
651 static int hashwrite_ewah_helper(void *f
, const void *buf
, size_t len
)
653 /* hashwrite will die on error */
654 hashwrite(f
, buf
, len
);
659 * Write the bitmap index to disk
661 static inline void dump_bitmap(struct hashfile
*f
, struct ewah_bitmap
*bitmap
)
663 if (ewah_serialize_to(bitmap
, hashwrite_ewah_helper
, f
) < 0)
664 die("Failed to write bitmap index");
667 static const struct object_id
*oid_access(size_t pos
, const void *table
)
669 const struct pack_idx_entry
* const *index
= table
;
670 return &index
[pos
]->oid
;
673 static void write_selected_commits_v1(struct hashfile
*f
,
674 uint32_t *commit_positions
,
679 for (i
= 0; i
< writer
.selected_nr
; ++i
) {
680 struct bitmapped_commit
*stored
= &writer
.selected
[i
];
683 offsets
[i
] = hashfile_total(f
);
685 hashwrite_be32(f
, commit_positions
[i
]);
686 hashwrite_u8(f
, stored
->xor_offset
);
687 hashwrite_u8(f
, stored
->flags
);
689 dump_bitmap(f
, stored
->write_as
);
693 static int table_cmp(const void *_va
, const void *_vb
, void *_data
)
695 uint32_t *commit_positions
= _data
;
696 uint32_t a
= commit_positions
[*(uint32_t *)_va
];
697 uint32_t b
= commit_positions
[*(uint32_t *)_vb
];
707 static void write_lookup_table(struct hashfile
*f
,
708 uint32_t *commit_positions
,
712 uint32_t *table
, *table_inv
;
714 ALLOC_ARRAY(table
, writer
.selected_nr
);
715 ALLOC_ARRAY(table_inv
, writer
.selected_nr
);
717 for (i
= 0; i
< writer
.selected_nr
; i
++)
721 * At the end of this sort table[j] = i means that the i'th
722 * bitmap corresponds to j'th bitmapped commit (among the selected
723 * commits) in lex order of OIDs.
725 QSORT_S(table
, writer
.selected_nr
, table_cmp
, commit_positions
);
727 /* table_inv helps us discover that relationship (i'th bitmap
728 * to j'th commit by j = table_inv[i])
730 for (i
= 0; i
< writer
.selected_nr
; i
++)
731 table_inv
[table
[i
]] = i
;
733 trace2_region_enter("pack-bitmap-write", "writing_lookup_table", the_repository
);
734 for (i
= 0; i
< writer
.selected_nr
; i
++) {
735 struct bitmapped_commit
*selected
= &writer
.selected
[table
[i
]];
736 uint32_t xor_offset
= selected
->xor_offset
;
741 * xor_index stores the index (in the bitmap entries)
742 * of the corresponding xor bitmap. But we need to convert
743 * this index into lookup table's index. So, table_inv[xor_index]
744 * gives us the index position w.r.t. the lookup table.
746 * If "k = table[i] - xor_offset" then the xor base is the k'th
747 * bitmap. `table_inv[k]` gives us the position of that bitmap
748 * in the lookup table.
750 uint32_t xor_index
= table
[i
] - xor_offset
;
751 xor_row
= table_inv
[xor_index
];
753 xor_row
= 0xffffffff;
756 hashwrite_be32(f
, commit_positions
[table
[i
]]);
757 hashwrite_be64(f
, (uint64_t)offsets
[table
[i
]]);
758 hashwrite_be32(f
, xor_row
);
760 trace2_region_leave("pack-bitmap-write", "writing_lookup_table", the_repository
);
766 static void write_hash_cache(struct hashfile
*f
,
767 struct pack_idx_entry
**index
,
772 for (i
= 0; i
< index_nr
; ++i
) {
773 struct object_entry
*entry
= (struct object_entry
*)index
[i
];
774 hashwrite_be32(f
, entry
->hash
);
778 void bitmap_writer_set_checksum(const unsigned char *sha1
)
780 hashcpy(writer
.pack_checksum
, sha1
);
783 void bitmap_writer_finish(struct pack_idx_entry
**index
,
785 const char *filename
,
788 static uint16_t default_version
= 1;
789 static uint16_t flags
= BITMAP_OPT_FULL_DAG
;
790 struct strbuf tmp_file
= STRBUF_INIT
;
792 uint32_t *commit_positions
= NULL
;
793 off_t
*offsets
= NULL
;
796 struct bitmap_disk_header header
;
798 int fd
= odb_mkstemp(&tmp_file
, "pack/tmp_bitmap_XXXXXX");
800 f
= hashfd(fd
, tmp_file
.buf
);
802 memcpy(header
.magic
, BITMAP_IDX_SIGNATURE
, sizeof(BITMAP_IDX_SIGNATURE
));
803 header
.version
= htons(default_version
);
804 header
.options
= htons(flags
| options
);
805 header
.entry_count
= htonl(writer
.selected_nr
);
806 hashcpy(header
.checksum
, writer
.pack_checksum
);
808 hashwrite(f
, &header
, sizeof(header
) - GIT_MAX_RAWSZ
+ the_hash_algo
->rawsz
);
809 dump_bitmap(f
, writer
.commits
);
810 dump_bitmap(f
, writer
.trees
);
811 dump_bitmap(f
, writer
.blobs
);
812 dump_bitmap(f
, writer
.tags
);
814 if (options
& BITMAP_OPT_LOOKUP_TABLE
)
815 CALLOC_ARRAY(offsets
, index_nr
);
817 ALLOC_ARRAY(commit_positions
, writer
.selected_nr
);
819 for (i
= 0; i
< writer
.selected_nr
; i
++) {
820 struct bitmapped_commit
*stored
= &writer
.selected
[i
];
821 int commit_pos
= oid_pos(&stored
->commit
->object
.oid
, index
, index_nr
, oid_access
);
824 BUG(_("trying to write commit not in index"));
826 commit_positions
[i
] = commit_pos
;
829 write_selected_commits_v1(f
, commit_positions
, offsets
);
831 if (options
& BITMAP_OPT_LOOKUP_TABLE
)
832 write_lookup_table(f
, commit_positions
, offsets
);
834 if (options
& BITMAP_OPT_HASH_CACHE
)
835 write_hash_cache(f
, index
, index_nr
);
837 finalize_hashfile(f
, NULL
, FSYNC_COMPONENT_PACK_METADATA
,
838 CSUM_HASH_IN_STREAM
| CSUM_FSYNC
| CSUM_CLOSE
);
840 if (adjust_shared_perm(tmp_file
.buf
))
841 die_errno("unable to make temporary bitmap file readable");
843 if (rename(tmp_file
.buf
, filename
))
844 die_errno("unable to rename temporary bitmap file to '%s'", filename
);
846 strbuf_release(&tmp_file
);
847 free(commit_positions
);