1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "pseudo-merge.h"
9 #include "string-list.h"
11 #include "pack-bitmap.h"
17 #define DEFAULT_PSEUDO_MERGE_DECAY 1.0
18 #define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64
19 #define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1
20 #define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago")
21 #define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago")
22 #define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512
24 static double gitexp(double base
, int exp
)
38 static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group
*group
,
39 const struct pseudo_merge_matches
*matches
,
46 * The size of pseudo-merge groups decays according to a power series,
51 * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k'
52 * is the decay rate, and 'C' is a scaling value.
54 * The value of C depends on the number of groups, decay rate, and total
55 * number of commits. It is computed such that if there are M and N
56 * total groups and commits, respectively, that:
58 * N = f(0) + f(1) + ... f(M-1)
60 * Rearranging to isolate C, we get:
62 * N = \sum_{n=1}^M C / n^k
64 * N / C = \sum_{n=1}^M n^-k
66 * C = N / \sum_{n=1}^M n^-k
68 * For example, if we have a decay rate of 'k' being equal to 1.5, 'N'
69 * total commits equal to 10,000, and 'M' being equal to 6 groups, then
70 * the (rounded) group sizes are:
72 * { 5469, 1934, 1053, 684, 489, 372 }
74 * increasing the number of total groups, say to 10, scales the group
75 * sizes appropriately:
77 * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 }
79 for (n
= 0; n
< group
->max_merges
; n
++)
80 C
+= 1.0 / gitexp(n
+ 1, group
->decay
);
81 C
= matches
->unstable_nr
/ C
;
83 return (uint32_t)((C
/ gitexp(i
+ 1, group
->decay
)) + 0.5);
86 static void pseudo_merge_group_init(struct pseudo_merge_group
*group
)
88 memset(group
, 0, sizeof(struct pseudo_merge_group
));
90 strmap_init_with_options(&group
->matches
, NULL
, 0);
92 group
->decay
= DEFAULT_PSEUDO_MERGE_DECAY
;
93 group
->max_merges
= DEFAULT_PSEUDO_MERGE_MAX_MERGES
;
94 group
->sample_rate
= DEFAULT_PSEUDO_MERGE_SAMPLE_RATE
;
95 group
->threshold
= DEFAULT_PSEUDO_MERGE_THRESHOLD
;
96 group
->stable_threshold
= DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD
;
97 group
->stable_size
= DEFAULT_PSEUDO_MERGE_STABLE_SIZE
;
100 static int pseudo_merge_config(const char *var
, const char *value
,
101 const struct config_context
*ctx
,
104 struct string_list
*list
= cb_data
;
105 struct string_list_item
*item
;
106 struct pseudo_merge_group
*group
;
107 struct strbuf buf
= STRBUF_INIT
;
108 const char *sub
, *key
;
112 if (parse_config_key(var
, "bitmappseudomerge", &sub
, &sub_len
, &key
))
118 strbuf_add(&buf
, sub
, sub_len
);
120 item
= string_list_lookup(list
, buf
.buf
);
122 item
= string_list_insert(list
, buf
.buf
);
124 item
->util
= xmalloc(sizeof(struct pseudo_merge_group
));
125 pseudo_merge_group_init(item
->util
);
130 if (!strcmp(key
, "pattern")) {
131 struct strbuf re
= STRBUF_INIT
;
133 free(group
->pattern
);
135 strbuf_addch(&re
, '^');
136 strbuf_addstr(&re
, value
);
138 group
->pattern
= xcalloc(1, sizeof(regex_t
));
139 if (regcomp(group
->pattern
, re
.buf
, REG_EXTENDED
))
140 die(_("failed to load pseudo-merge regex for %s: '%s'"),
144 } else if (!strcmp(key
, "decay")) {
145 group
->decay
= git_config_double(var
, value
, ctx
->kvi
);
146 if (group
->decay
< 0) {
147 warning(_("%s must be non-negative, using default"), var
);
148 group
->decay
= DEFAULT_PSEUDO_MERGE_DECAY
;
150 } else if (!strcmp(key
, "samplerate")) {
151 group
->sample_rate
= git_config_double(var
, value
, ctx
->kvi
);
152 if (!(0 <= group
->sample_rate
&& group
->sample_rate
<= 1)) {
153 warning(_("%s must be between 0 and 1, using default"), var
);
154 group
->sample_rate
= DEFAULT_PSEUDO_MERGE_SAMPLE_RATE
;
156 } else if (!strcmp(key
, "threshold")) {
157 if (git_config_expiry_date(&group
->threshold
, var
, value
)) {
161 } else if (!strcmp(key
, "maxmerges")) {
162 group
->max_merges
= git_config_int(var
, value
, ctx
->kvi
);
163 if (group
->max_merges
< 0) {
164 warning(_("%s must be non-negative, using default"), var
);
165 group
->max_merges
= DEFAULT_PSEUDO_MERGE_MAX_MERGES
;
167 } else if (!strcmp(key
, "stablethreshold")) {
168 if (git_config_expiry_date(&group
->stable_threshold
, var
, value
)) {
172 } else if (!strcmp(key
, "stablesize")) {
173 group
->stable_size
= git_config_int(var
, value
, ctx
->kvi
);
174 if (group
->stable_size
<= 0) {
175 warning(_("%s must be positive, using default"), var
);
176 group
->stable_size
= DEFAULT_PSEUDO_MERGE_STABLE_SIZE
;
181 strbuf_release(&buf
);
186 void load_pseudo_merges_from_config(struct string_list
*list
)
188 struct string_list_item
*item
;
190 git_config(pseudo_merge_config
, list
);
192 for_each_string_list_item(item
, list
) {
193 struct pseudo_merge_group
*group
= item
->util
;
195 die(_("pseudo-merge group '%s' missing required pattern"),
197 if (group
->threshold
< group
->stable_threshold
)
198 die(_("pseudo-merge group '%s' has unstable threshold "
199 "before stable one"), item
->string
);
203 static int find_pseudo_merge_group_for_ref(const char *refname
,
204 const struct object_id
*oid
,
208 struct bitmap_writer
*writer
= _data
;
209 struct object_id peeled
;
214 if (!peel_iterated_oid(the_repository
, oid
, &peeled
))
217 c
= lookup_commit(the_repository
, oid
);
221 has_bitmap
= bitmap_writer_has_bitmapped_object_id(writer
, oid
);
223 for (i
= 0; i
< writer
->pseudo_merge_groups
.nr
; i
++) {
224 struct pseudo_merge_group
*group
;
225 struct pseudo_merge_matches
*matches
;
226 struct strbuf group_name
= STRBUF_INIT
;
227 regmatch_t captures
[16];
230 group
= writer
->pseudo_merge_groups
.items
[i
].util
;
231 if (regexec(group
->pattern
, refname
, ARRAY_SIZE(captures
),
235 if (captures
[ARRAY_SIZE(captures
) - 1].rm_so
!= -1)
236 warning(_("pseudo-merge regex from config has too many capture "
237 "groups (max=%"PRIuMAX
")"),
238 (uintmax_t)ARRAY_SIZE(captures
) - 2);
240 for (j
= !!group
->pattern
->re_nsub
; j
< ARRAY_SIZE(captures
); j
++) {
241 regmatch_t
*match
= &captures
[j
];
242 if (match
->rm_so
== -1)
246 strbuf_addch(&group_name
, '-');
248 strbuf_add(&group_name
, refname
+ match
->rm_so
,
249 match
->rm_eo
- match
->rm_so
);
252 matches
= strmap_get(&group
->matches
, group_name
.buf
);
254 matches
= xcalloc(1, sizeof(*matches
));
255 strmap_put(&group
->matches
, strbuf_detach(&group_name
, NULL
),
259 if (c
->date
<= group
->stable_threshold
) {
260 ALLOC_GROW(matches
->stable
, matches
->stable_nr
+ 1,
261 matches
->stable_alloc
);
262 matches
->stable
[matches
->stable_nr
++] = c
;
263 } else if (c
->date
<= group
->threshold
&& !has_bitmap
) {
264 ALLOC_GROW(matches
->unstable
, matches
->unstable_nr
+ 1,
265 matches
->unstable_alloc
);
266 matches
->unstable
[matches
->unstable_nr
++] = c
;
269 strbuf_release(&group_name
);
275 static struct commit
*push_pseudo_merge(struct pseudo_merge_group
*group
)
277 struct commit
*merge
;
279 ALLOC_GROW(group
->merges
, group
->merges_nr
+ 1, group
->merges_alloc
);
281 merge
= alloc_commit_node(the_repository
);
282 merge
->object
.parsed
= 1;
283 merge
->object
.flags
|= BITMAP_PSEUDO_MERGE
;
285 group
->merges
[group
->merges_nr
++] = merge
;
290 static struct pseudo_merge_commit_idx
*pseudo_merge_idx(kh_oid_map_t
*pseudo_merge_commits
,
291 const struct object_id
*oid
)
294 struct pseudo_merge_commit_idx
*pmc
;
296 khiter_t hash_pos
= kh_put_oid_map(pseudo_merge_commits
, *oid
,
300 CALLOC_ARRAY(pmc
, 1);
301 kh_value(pseudo_merge_commits
, hash_pos
) = pmc
;
303 pmc
= kh_value(pseudo_merge_commits
, hash_pos
);
309 #define MIN_PSEUDO_MERGE_SIZE 8
311 static void select_pseudo_merges_1(struct bitmap_writer
*writer
,
312 struct pseudo_merge_group
*group
,
313 struct pseudo_merge_matches
*matches
)
316 uint32_t stable_merges_nr
;
318 if (!matches
->stable_nr
&& !matches
->unstable_nr
)
319 return; /* all tips in this group already have bitmaps */
321 stable_merges_nr
= matches
->stable_nr
/ group
->stable_size
;
322 if (matches
->stable_nr
% group
->stable_size
)
325 /* make stable_merges_nr pseudo merges for stable commits */
326 for (i
= 0, j
= 0; i
< stable_merges_nr
; i
++) {
327 struct commit
*merge
;
328 struct commit_list
**p
;
330 merge
= push_pseudo_merge(group
);
334 * For each pseudo-merge created above, add parents to the
335 * allocated commit node from the stable set of commits
336 * (un-bitmapped, newer than the stable threshold).
340 struct pseudo_merge_commit_idx
*pmc
;
342 if (j
>= matches
->stable_nr
)
345 c
= matches
->stable
[j
++];
347 * Here and below, make sure that we keep our mapping of
348 * commits -> pseudo-merge(s) which include the key'd
351 pmc
= pseudo_merge_idx(writer
->pseudo_merge_commits
,
354 ALLOC_GROW(pmc
->pseudo_merge
, pmc
->nr
+ 1, pmc
->alloc
);
356 pmc
->pseudo_merge
[pmc
->nr
++] = writer
->pseudo_merges_nr
;
357 p
= commit_list_append(c
, p
);
358 } while (j
% group
->stable_size
);
360 bitmap_writer_push_commit(writer
, merge
, 1);
361 writer
->pseudo_merges_nr
++;
364 /* make up to group->max_merges pseudo merges for unstable commits */
365 for (i
= 0, j
= 0; i
< group
->max_merges
; i
++) {
366 struct commit
*merge
;
367 struct commit_list
**p
;
370 merge
= push_pseudo_merge(group
);
373 size
= pseudo_merge_group_size(group
, matches
, i
);
374 end
= size
< MIN_PSEUDO_MERGE_SIZE
? matches
->unstable_nr
: j
+ size
;
377 * For each pseudo-merge commit created above, add parents to
378 * the allocated commit node from the unstable set of commits
379 * (newer than the stable threshold).
381 * Account for the sample rate, since not every candidate from
382 * the set of stable commits will be included as a pseudo-merge
385 for (; j
< end
&& j
< matches
->unstable_nr
; j
++) {
386 struct commit
*c
= matches
->unstable
[j
];
387 struct pseudo_merge_commit_idx
*pmc
;
389 if (j
% (uint32_t)(1.0 / group
->sample_rate
))
392 pmc
= pseudo_merge_idx(writer
->pseudo_merge_commits
,
395 ALLOC_GROW(pmc
->pseudo_merge
, pmc
->nr
+ 1, pmc
->alloc
);
397 pmc
->pseudo_merge
[pmc
->nr
++] = writer
->pseudo_merges_nr
;
398 p
= commit_list_append(c
, p
);
401 bitmap_writer_push_commit(writer
, merge
, 1);
402 writer
->pseudo_merges_nr
++;
403 if (end
>= matches
->unstable_nr
)
408 static int commit_date_cmp(const void *va
, const void *vb
)
410 timestamp_t a
= (*(const struct commit
**)va
)->date
;
411 timestamp_t b
= (*(const struct commit
**)vb
)->date
;
420 static void sort_pseudo_merge_matches(struct pseudo_merge_matches
*matches
)
422 QSORT(matches
->stable
, matches
->stable_nr
, commit_date_cmp
);
423 QSORT(matches
->unstable
, matches
->unstable_nr
, commit_date_cmp
);
426 void select_pseudo_merges(struct bitmap_writer
*writer
,
427 struct commit
**commits
, size_t commits_nr
)
429 struct progress
*progress
= NULL
;
432 if (!writer
->pseudo_merge_groups
.nr
)
435 if (writer
->show_progress
)
436 progress
= start_progress("Selecting pseudo-merge commits",
437 writer
->pseudo_merge_groups
.nr
);
439 refs_for_each_ref(get_main_ref_store(the_repository
),
440 find_pseudo_merge_group_for_ref
, writer
);
442 for (i
= 0; i
< writer
->pseudo_merge_groups
.nr
; i
++) {
443 struct pseudo_merge_group
*group
;
444 struct hashmap_iter iter
;
445 struct strmap_entry
*e
;
447 group
= writer
->pseudo_merge_groups
.items
[i
].util
;
448 strmap_for_each_entry(&group
->matches
, &iter
, e
) {
449 struct pseudo_merge_matches
*matches
= e
->value
;
451 sort_pseudo_merge_matches(matches
);
453 select_pseudo_merges_1(writer
, group
, matches
);
456 display_progress(progress
, i
+ 1);
459 stop_progress(&progress
);
462 void free_pseudo_merge_map(struct pseudo_merge_map
*pm
)
465 for (i
= 0; i
< pm
->nr
; i
++) {
466 ewah_pool_free(pm
->v
[i
].commits
);
467 ewah_pool_free(pm
->v
[i
].bitmap
);
472 struct pseudo_merge_commit_ext
{
474 const unsigned char *ptr
;
477 static int pseudo_merge_ext_at(const struct pseudo_merge_map
*pm
,
478 struct pseudo_merge_commit_ext
*ext
, size_t at
)
480 if (at
>= pm
->map_size
)
481 return error(_("extended pseudo-merge read out-of-bounds "
482 "(%"PRIuMAX
" >= %"PRIuMAX
")"),
483 (uintmax_t)at
, (uintmax_t)pm
->map_size
);
484 if (at
+ 4 >= pm
->map_size
)
485 return error(_("extended pseudo-merge entry is too short "
486 "(%"PRIuMAX
" >= %"PRIuMAX
")"),
487 (uintmax_t)(at
+ 4), (uintmax_t)pm
->map_size
);
489 ext
->nr
= get_be32(pm
->map
+ at
);
490 ext
->ptr
= pm
->map
+ at
+ sizeof(uint32_t);
495 struct ewah_bitmap
*pseudo_merge_bitmap(const struct pseudo_merge_map
*pm
,
496 struct pseudo_merge
*merge
)
498 if (!merge
->loaded_commits
)
499 BUG("cannot use unloaded pseudo-merge bitmap");
501 if (!merge
->loaded_bitmap
) {
502 size_t at
= merge
->bitmap_at
;
504 merge
->bitmap
= read_bitmap(pm
->map
, pm
->map_size
, &at
);
505 merge
->loaded_bitmap
= 1;
508 return merge
->bitmap
;
511 struct pseudo_merge
*use_pseudo_merge(const struct pseudo_merge_map
*pm
,
512 struct pseudo_merge
*merge
)
514 if (!merge
->loaded_commits
) {
515 size_t pos
= merge
->at
;
517 merge
->commits
= read_bitmap(pm
->map
, pm
->map_size
, &pos
);
518 merge
->bitmap_at
= pos
;
519 merge
->loaded_commits
= 1;
524 static struct pseudo_merge
*pseudo_merge_at(const struct pseudo_merge_map
*pm
,
525 struct object_id
*oid
,
532 size_t mi
= lo
+ (hi
- lo
) / 2;
533 size_t got
= pm
->v
[mi
].at
;
536 return use_pseudo_merge(pm
, &pm
->v
[mi
]);
543 warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX
),
544 oid_to_hex(oid
), (uintmax_t)want
);
549 struct pseudo_merge_commit
{
551 uint64_t pseudo_merge_ofs
;
554 #define PSEUDO_MERGE_COMMIT_RAWSZ (sizeof(uint32_t)+sizeof(uint64_t))
556 static void read_pseudo_merge_commit_at(struct pseudo_merge_commit
*merge
,
557 const unsigned char *at
)
559 merge
->commit_pos
= get_be32(at
);
560 merge
->pseudo_merge_ofs
= get_be64(at
+ sizeof(uint32_t));
563 static int nth_pseudo_merge_ext(const struct pseudo_merge_map
*pm
,
564 struct pseudo_merge_commit_ext
*ext
,
565 struct pseudo_merge_commit
*merge
,
571 return error(_("extended pseudo-merge lookup out-of-bounds "
572 "(%"PRIu32
" >= %"PRIu32
")"), n
, ext
->nr
);
574 ofs
= get_be64(ext
->ptr
+ st_mult(n
, sizeof(uint64_t)));
575 if (ofs
>= pm
->map_size
)
576 return error(_("out-of-bounds read: (%"PRIuMAX
" >= %"PRIuMAX
")"),
577 (uintmax_t)ofs
, (uintmax_t)pm
->map_size
);
579 read_pseudo_merge_commit_at(merge
, pm
->map
+ ofs
);
584 static unsigned apply_pseudo_merge(const struct pseudo_merge_map
*pm
,
585 struct pseudo_merge
*merge
,
586 struct bitmap
*result
,
587 struct bitmap
*roots
)
589 if (merge
->satisfied
)
592 if (!ewah_bitmap_is_subset(merge
->commits
, roots
? roots
: result
))
595 bitmap_or_ewah(result
, pseudo_merge_bitmap(pm
, merge
));
597 bitmap_or_ewah(roots
, pseudo_merge_bitmap(pm
, merge
));
598 merge
->satisfied
= 1;
603 static int pseudo_merge_commit_cmp(const void *va
, const void *vb
)
605 struct pseudo_merge_commit merge
;
606 uint32_t key
= *(uint32_t*)va
;
608 read_pseudo_merge_commit_at(&merge
, vb
);
610 if (key
< merge
.commit_pos
)
612 if (key
> merge
.commit_pos
)
617 static struct pseudo_merge_commit
*find_pseudo_merge(const struct pseudo_merge_map
*pm
,
623 return bsearch(&pos
, pm
->commits
, pm
->commits_nr
,
624 PSEUDO_MERGE_COMMIT_RAWSZ
, pseudo_merge_commit_cmp
);
627 int apply_pseudo_merges_for_commit(const struct pseudo_merge_map
*pm
,
628 struct bitmap
*result
,
629 struct commit
*commit
, uint32_t commit_pos
)
631 struct pseudo_merge
*merge
;
632 struct pseudo_merge_commit
*merge_commit
;
635 merge_commit
= find_pseudo_merge(pm
, commit_pos
);
639 if (merge_commit
->pseudo_merge_ofs
& ((uint64_t)1<<63)) {
640 struct pseudo_merge_commit_ext ext
= { 0 };
641 off_t ofs
= merge_commit
->pseudo_merge_ofs
& ~((uint64_t)1<<63);
644 if (pseudo_merge_ext_at(pm
, &ext
, ofs
) < -1) {
645 warning(_("could not read extended pseudo-merge table "
647 oid_to_hex(&commit
->object
.oid
));
651 for (i
= 0; i
< ext
.nr
; i
++) {
652 if (nth_pseudo_merge_ext(pm
, &ext
, merge_commit
, i
) < 0)
655 merge
= pseudo_merge_at(pm
, &commit
->object
.oid
,
656 merge_commit
->pseudo_merge_ofs
);
661 if (apply_pseudo_merge(pm
, merge
, result
, NULL
))
665 merge
= pseudo_merge_at(pm
, &commit
->object
.oid
,
666 merge_commit
->pseudo_merge_ofs
);
671 if (apply_pseudo_merge(pm
, merge
, result
, NULL
))
676 cascade_pseudo_merges(pm
, result
, NULL
);
681 int cascade_pseudo_merges(const struct pseudo_merge_map
*pm
,
682 struct bitmap
*result
,
683 struct bitmap
*roots
)
685 unsigned any_satisfied
;
689 struct pseudo_merge
*merge
;
694 for (i
= 0; i
< pm
->nr
; i
++) {
695 merge
= use_pseudo_merge(pm
, &pm
->v
[i
]);
696 if (apply_pseudo_merge(pm
, merge
, result
, roots
)) {
701 } while (any_satisfied
);
706 struct pseudo_merge
*pseudo_merge_for_parents(const struct pseudo_merge_map
*pm
,
707 struct bitmap
*parents
)
709 struct pseudo_merge
*match
= NULL
;
716 * NOTE: this loop is quadratic in the worst-case (where no
717 * matching pseudo-merge bitmaps are found), but in practice
718 * this is OK for a few reasons:
720 * - Rejecting pseudo-merge bitmaps that do not match the
721 * given commit is done quickly (i.e. `bitmap_equals_ewah()`
722 * returns early when we know the two bitmaps aren't equal.
724 * - Already matched pseudo-merge bitmaps (which we track with
725 * the `->satisfied` bit here) are skipped as potential
728 * - The number of pseudo-merges should be small (in the
729 * hundreds for most repositories).
731 * If in the future this semi-quadratic behavior does become a
732 * problem, another approach would be to keep track of which
733 * pseudo-merges are still "viable" after enumerating the
734 * pseudo-merge commit's parents:
736 * - A pseudo-merge bitmap becomes non-viable when the bit(s)
737 * corresponding to one or more parent(s) of the given
738 * commit are not set in a candidate pseudo-merge's commits
741 * - After processing all bits, enumerate the remaining set of
742 * viable pseudo-merge bitmaps, and check that their
743 * popcount() matches the number of parents in the given
746 for (i
= 0; i
< pm
->nr
; i
++) {
747 struct pseudo_merge
*candidate
= use_pseudo_merge(pm
, &pm
->v
[i
]);
748 if (!candidate
|| candidate
->satisfied
)
750 if (!bitmap_equals_ewah(parents
, candidate
->commits
))
754 match
->satisfied
= 1;