2 #include "object-store.h"
7 #include "list-objects.h"
9 #include "pack-revindex.h"
11 #include "pack-bitmap.h"
12 #include "sha1-lookup.h"
13 #include "pack-objects.h"
14 #include "commit-reach.h"
16 struct bitmapped_commit
{
17 struct commit
*commit
;
18 struct ewah_bitmap
*bitmap
;
19 struct ewah_bitmap
*write_as
;
25 struct bitmap_writer
{
26 struct ewah_bitmap
*commits
;
27 struct ewah_bitmap
*trees
;
28 struct ewah_bitmap
*blobs
;
29 struct ewah_bitmap
*tags
;
31 kh_oid_map_t
*bitmaps
;
33 struct packing_data
*to_pack
;
35 struct bitmapped_commit
*selected
;
36 unsigned int selected_nr
, selected_alloc
;
38 struct progress
*progress
;
40 unsigned char pack_checksum
[GIT_MAX_RAWSZ
];
43 static struct bitmap_writer writer
;
45 void bitmap_writer_show_progress(int show
)
47 writer
.show_progress
= show
;
51 * Build the initial type index for the packfile
53 void bitmap_writer_build_type_index(struct packing_data
*to_pack
,
54 struct pack_idx_entry
**index
,
59 writer
.commits
= ewah_new();
60 writer
.trees
= ewah_new();
61 writer
.blobs
= ewah_new();
62 writer
.tags
= ewah_new();
63 ALLOC_ARRAY(to_pack
->in_pack_pos
, to_pack
->nr_objects
);
65 for (i
= 0; i
< index_nr
; ++i
) {
66 struct object_entry
*entry
= (struct object_entry
*)index
[i
];
67 enum object_type real_type
;
69 oe_set_in_pack_pos(to_pack
, entry
, i
);
71 switch (oe_type(entry
)) {
76 real_type
= oe_type(entry
);
80 real_type
= oid_object_info(to_pack
->repo
,
81 &entry
->idx
.oid
, NULL
);
87 ewah_set(writer
.commits
, i
);
91 ewah_set(writer
.trees
, i
);
95 ewah_set(writer
.blobs
, i
);
99 ewah_set(writer
.tags
, i
);
103 die("Missing type information for %s (%d/%d)",
104 oid_to_hex(&entry
->idx
.oid
), real_type
,
111 * Compute the actual bitmaps
113 static struct object
**seen_objects
;
114 static unsigned int seen_objects_nr
, seen_objects_alloc
;
116 static inline void push_bitmapped_commit(struct commit
*commit
, struct ewah_bitmap
*reused
)
118 if (writer
.selected_nr
>= writer
.selected_alloc
) {
119 writer
.selected_alloc
= (writer
.selected_alloc
+ 32) * 2;
120 REALLOC_ARRAY(writer
.selected
, writer
.selected_alloc
);
123 writer
.selected
[writer
.selected_nr
].commit
= commit
;
124 writer
.selected
[writer
.selected_nr
].bitmap
= reused
;
125 writer
.selected
[writer
.selected_nr
].flags
= 0;
127 writer
.selected_nr
++;
130 static inline void mark_as_seen(struct object
*object
)
132 ALLOC_GROW(seen_objects
, seen_objects_nr
+ 1, seen_objects_alloc
);
133 seen_objects
[seen_objects_nr
++] = object
;
136 static inline void reset_all_seen(void)
139 for (i
= 0; i
< seen_objects_nr
; ++i
) {
140 seen_objects
[i
]->flags
&= ~(SEEN
| ADDED
| SHOWN
);
145 static uint32_t find_object_pos(const struct object_id
*oid
)
147 struct object_entry
*entry
= packlist_find(writer
.to_pack
, oid
);
150 die("Failed to write bitmap index. Packfile doesn't have full closure "
151 "(object %s is missing)", oid_to_hex(oid
));
154 return oe_in_pack_pos(writer
.to_pack
, entry
);
157 static void show_object(struct object
*object
, const char *name
, void *data
)
159 struct bitmap
*base
= data
;
160 bitmap_set(base
, find_object_pos(&object
->oid
));
161 mark_as_seen(object
);
164 static void show_commit(struct commit
*commit
, void *data
)
166 mark_as_seen((struct object
*)commit
);
170 add_to_include_set(struct bitmap
*base
, struct commit
*commit
)
173 uint32_t bitmap_pos
= find_object_pos(&commit
->object
.oid
);
175 if (bitmap_get(base
, bitmap_pos
))
178 hash_pos
= kh_get_oid_map(writer
.bitmaps
, commit
->object
.oid
);
179 if (hash_pos
< kh_end(writer
.bitmaps
)) {
180 struct bitmapped_commit
*bc
= kh_value(writer
.bitmaps
, hash_pos
);
181 bitmap_or_ewah(base
, bc
->bitmap
);
185 bitmap_set(base
, bitmap_pos
);
190 should_include(struct commit
*commit
, void *_data
)
192 struct bitmap
*base
= _data
;
194 if (!add_to_include_set(base
, commit
)) {
195 struct commit_list
*parent
= commit
->parents
;
197 mark_as_seen((struct object
*)commit
);
200 parent
->item
->object
.flags
|= SEEN
;
201 mark_as_seen((struct object
*)parent
->item
);
202 parent
= parent
->next
;
211 static void compute_xor_offsets(void)
213 static const int MAX_XOR_OFFSET_SEARCH
= 10;
217 while (next
< writer
.selected_nr
) {
218 struct bitmapped_commit
*stored
= &writer
.selected
[next
];
221 struct ewah_bitmap
*best_bitmap
= stored
->bitmap
;
222 struct ewah_bitmap
*test_xor
;
224 for (i
= 1; i
<= MAX_XOR_OFFSET_SEARCH
; ++i
) {
230 test_xor
= ewah_pool_new();
231 ewah_xor(writer
.selected
[curr
].bitmap
, stored
->bitmap
, test_xor
);
233 if (test_xor
->buffer_size
< best_bitmap
->buffer_size
) {
234 if (best_bitmap
!= stored
->bitmap
)
235 ewah_pool_free(best_bitmap
);
237 best_bitmap
= test_xor
;
240 ewah_pool_free(test_xor
);
244 stored
->xor_offset
= best_offset
;
245 stored
->write_as
= best_bitmap
;
251 void bitmap_writer_build(struct packing_data
*to_pack
)
253 static const double REUSE_BITMAP_THRESHOLD
= 0.2;
255 int i
, reuse_after
, need_reset
;
256 struct bitmap
*base
= bitmap_new();
257 struct rev_info revs
;
259 writer
.bitmaps
= kh_init_oid_map();
260 writer
.to_pack
= to_pack
;
262 if (writer
.show_progress
)
263 writer
.progress
= start_progress("Building bitmaps", writer
.selected_nr
);
265 repo_init_revisions(to_pack
->repo
, &revs
, NULL
);
266 revs
.tag_objects
= 1;
267 revs
.tree_objects
= 1;
268 revs
.blob_objects
= 1;
271 revs
.include_check
= should_include
;
272 reset_revision_walk();
274 reuse_after
= writer
.selected_nr
* REUSE_BITMAP_THRESHOLD
;
277 for (i
= writer
.selected_nr
- 1; i
>= 0; --i
) {
278 struct bitmapped_commit
*stored
;
279 struct object
*object
;
284 stored
= &writer
.selected
[i
];
285 object
= (struct object
*)stored
->commit
;
287 if (stored
->bitmap
== NULL
) {
288 if (i
< writer
.selected_nr
- 1 &&
290 !in_merge_bases(writer
.selected
[i
+ 1].commit
,
296 add_pending_object(&revs
, object
, "");
297 revs
.include_check_data
= base
;
299 if (prepare_revision_walk(&revs
))
300 die("revision walk setup failed");
302 traverse_commit_list(&revs
, show_commit
, show_object
, base
);
304 object_array_clear(&revs
.pending
);
306 stored
->bitmap
= bitmap_to_ewah(base
);
311 if (i
>= reuse_after
)
312 stored
->flags
|= BITMAP_FLAG_REUSE
;
314 hash_pos
= kh_put_oid_map(writer
.bitmaps
, object
->oid
, &hash_ret
);
316 die("Duplicate entry when writing index: %s",
317 oid_to_hex(&object
->oid
));
319 kh_value(writer
.bitmaps
, hash_pos
) = stored
;
320 display_progress(writer
.progress
, writer
.selected_nr
- i
);
324 stop_progress(&writer
.progress
);
326 compute_xor_offsets();
330 * Select the commits that will be bitmapped
332 static inline unsigned int next_commit_index(unsigned int idx
)
334 static const unsigned int MIN_COMMITS
= 100;
335 static const unsigned int MAX_COMMITS
= 5000;
337 static const unsigned int MUST_REGION
= 100;
338 static const unsigned int MIN_REGION
= 20000;
340 unsigned int offset
, next
;
342 if (idx
<= MUST_REGION
)
345 if (idx
<= MIN_REGION
) {
346 offset
= idx
- MUST_REGION
;
347 return (offset
< MIN_COMMITS
) ? offset
: MIN_COMMITS
;
350 offset
= idx
- MIN_REGION
;
351 next
= (offset
< MAX_COMMITS
) ? offset
: MAX_COMMITS
;
353 return (next
> MIN_COMMITS
) ? next
: MIN_COMMITS
;
356 static int date_compare(const void *_a
, const void *_b
)
358 struct commit
*a
= *(struct commit
**)_a
;
359 struct commit
*b
= *(struct commit
**)_b
;
360 return (long)b
->date
- (long)a
->date
;
363 void bitmap_writer_reuse_bitmaps(struct packing_data
*to_pack
)
365 struct bitmap_index
*bitmap_git
;
366 if (!(bitmap_git
= prepare_bitmap_git(to_pack
->repo
)))
369 writer
.reused
= kh_init_oid_map();
370 rebuild_existing_bitmaps(bitmap_git
, to_pack
, writer
.reused
,
371 writer
.show_progress
);
373 * NEEDSWORK: rebuild_existing_bitmaps() makes writer.reused reference
374 * some bitmaps in bitmap_git, so we can't free the latter.
378 static struct ewah_bitmap
*find_reused_bitmap(const struct object_id
*oid
)
385 hash_pos
= kh_get_oid_map(writer
.reused
, *oid
);
386 if (hash_pos
>= kh_end(writer
.reused
))
389 return kh_value(writer
.reused
, hash_pos
);
392 void bitmap_writer_select_commits(struct commit
**indexed_commits
,
393 unsigned int indexed_commits_nr
,
396 unsigned int i
= 0, j
, next
;
398 QSORT(indexed_commits
, indexed_commits_nr
, date_compare
);
400 if (writer
.show_progress
)
401 writer
.progress
= start_progress("Selecting bitmap commits", 0);
403 if (indexed_commits_nr
< 100) {
404 for (i
= 0; i
< indexed_commits_nr
; ++i
)
405 push_bitmapped_commit(indexed_commits
[i
], NULL
);
410 struct ewah_bitmap
*reused_bitmap
= NULL
;
411 struct commit
*chosen
= NULL
;
413 next
= next_commit_index(i
);
415 if (i
+ next
>= indexed_commits_nr
)
418 if (max_bitmaps
> 0 && writer
.selected_nr
>= max_bitmaps
) {
419 writer
.selected_nr
= max_bitmaps
;
424 chosen
= indexed_commits
[i
];
425 reused_bitmap
= find_reused_bitmap(&chosen
->object
.oid
);
427 chosen
= indexed_commits
[i
+ next
];
429 for (j
= 0; j
<= next
; ++j
) {
430 struct commit
*cm
= indexed_commits
[i
+ j
];
432 reused_bitmap
= find_reused_bitmap(&cm
->object
.oid
);
433 if (reused_bitmap
|| (cm
->object
.flags
& NEEDS_BITMAP
) != 0) {
438 if (cm
->parents
&& cm
->parents
->next
)
443 push_bitmapped_commit(chosen
, reused_bitmap
);
446 display_progress(writer
.progress
, i
);
449 stop_progress(&writer
.progress
);
453 static int hashwrite_ewah_helper(void *f
, const void *buf
, size_t len
)
455 /* hashwrite will die on error */
456 hashwrite(f
, buf
, len
);
461 * Write the bitmap index to disk
463 static inline void dump_bitmap(struct hashfile
*f
, struct ewah_bitmap
*bitmap
)
465 if (ewah_serialize_to(bitmap
, hashwrite_ewah_helper
, f
) < 0)
466 die("Failed to write bitmap index");
469 static const unsigned char *sha1_access(size_t pos
, void *table
)
471 struct pack_idx_entry
**index
= table
;
472 return index
[pos
]->oid
.hash
;
475 static void write_selected_commits_v1(struct hashfile
*f
,
476 struct pack_idx_entry
**index
,
481 for (i
= 0; i
< writer
.selected_nr
; ++i
) {
482 struct bitmapped_commit
*stored
= &writer
.selected
[i
];
485 sha1_pos(stored
->commit
->object
.oid
.hash
, index
, index_nr
, sha1_access
);
488 BUG("trying to write commit not in index");
490 hashwrite_be32(f
, commit_pos
);
491 hashwrite_u8(f
, stored
->xor_offset
);
492 hashwrite_u8(f
, stored
->flags
);
494 dump_bitmap(f
, stored
->write_as
);
498 static void write_hash_cache(struct hashfile
*f
,
499 struct pack_idx_entry
**index
,
504 for (i
= 0; i
< index_nr
; ++i
) {
505 struct object_entry
*entry
= (struct object_entry
*)index
[i
];
506 uint32_t hash_value
= htonl(entry
->hash
);
507 hashwrite(f
, &hash_value
, sizeof(hash_value
));
511 void bitmap_writer_set_checksum(unsigned char *sha1
)
513 hashcpy(writer
.pack_checksum
, sha1
);
516 void bitmap_writer_finish(struct pack_idx_entry
**index
,
518 const char *filename
,
521 static uint16_t default_version
= 1;
522 static uint16_t flags
= BITMAP_OPT_FULL_DAG
;
523 struct strbuf tmp_file
= STRBUF_INIT
;
526 struct bitmap_disk_header header
;
528 int fd
= odb_mkstemp(&tmp_file
, "pack/tmp_bitmap_XXXXXX");
530 f
= hashfd(fd
, tmp_file
.buf
);
532 memcpy(header
.magic
, BITMAP_IDX_SIGNATURE
, sizeof(BITMAP_IDX_SIGNATURE
));
533 header
.version
= htons(default_version
);
534 header
.options
= htons(flags
| options
);
535 header
.entry_count
= htonl(writer
.selected_nr
);
536 hashcpy(header
.checksum
, writer
.pack_checksum
);
538 hashwrite(f
, &header
, sizeof(header
) - GIT_MAX_RAWSZ
+ the_hash_algo
->rawsz
);
539 dump_bitmap(f
, writer
.commits
);
540 dump_bitmap(f
, writer
.trees
);
541 dump_bitmap(f
, writer
.blobs
);
542 dump_bitmap(f
, writer
.tags
);
543 write_selected_commits_v1(f
, index
, index_nr
);
545 if (options
& BITMAP_OPT_HASH_CACHE
)
546 write_hash_cache(f
, index
, index_nr
);
548 finalize_hashfile(f
, NULL
, CSUM_HASH_IN_STREAM
| CSUM_FSYNC
| CSUM_CLOSE
);
550 if (adjust_shared_perm(tmp_file
.buf
))
551 die_errno("unable to make temporary bitmap file readable");
553 if (rename(tmp_file
.buf
, filename
))
554 die_errno("unable to rename temporary bitmap file to '%s'", filename
);
556 strbuf_release(&tmp_file
);