Post 2.46-rc0 batch #1
[git.git] / pseudo-merge.c
blobf0fde13c47133d7a1761aa6c9b300a947ff51c79
1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "pseudo-merge.h"
5 #include "date.h"
6 #include "oid-array.h"
7 #include "strbuf.h"
8 #include "config.h"
9 #include "string-list.h"
10 #include "refs.h"
11 #include "pack-bitmap.h"
12 #include "commit.h"
13 #include "alloc.h"
14 #include "progress.h"
15 #include "hex.h"
17 #define DEFAULT_PSEUDO_MERGE_DECAY 1.0
18 #define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64
19 #define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1
20 #define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago")
21 #define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago")
22 #define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512
24 static double gitexp(double base, int exp)
26 double result = 1;
27 while (1) {
28 if (exp % 2)
29 result *= base;
30 exp >>= 1;
31 if (!exp)
32 break;
33 base *= base;
35 return result;
38 static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group,
39 const struct pseudo_merge_matches *matches,
40 uint32_t i)
42 double C = 0.0f;
43 uint32_t n;
46 * The size of pseudo-merge groups decays according to a power series,
47 * which looks like:
49 * f(n) = C * n^-k
51 * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k'
52 * is the decay rate, and 'C' is a scaling value.
54 * The value of C depends on the number of groups, decay rate, and total
55 * number of commits. It is computed such that if there are M and N
56 * total groups and commits, respectively, that:
58 * N = f(0) + f(1) + ... f(M-1)
60 * Rearranging to isolate C, we get:
62 * N = \sum_{n=1}^M C / n^k
64 * N / C = \sum_{n=1}^M n^-k
66 * C = N / \sum_{n=1}^M n^-k
68 * For example, if we have a decay rate of 'k' being equal to 1.5, 'N'
69 * total commits equal to 10,000, and 'M' being equal to 6 groups, then
70 * the (rounded) group sizes are:
72 * { 5469, 1934, 1053, 684, 489, 372 }
74 * increasing the number of total groups, say to 10, scales the group
75 * sizes appropriately:
77 * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 }
79 for (n = 0; n < group->max_merges; n++)
80 C += 1.0 / gitexp(n + 1, group->decay);
81 C = matches->unstable_nr / C;
83 return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5);
86 static void pseudo_merge_group_init(struct pseudo_merge_group *group)
88 memset(group, 0, sizeof(struct pseudo_merge_group));
90 strmap_init_with_options(&group->matches, NULL, 0);
92 group->decay = DEFAULT_PSEUDO_MERGE_DECAY;
93 group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;
94 group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
95 group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD;
96 group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD;
97 group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;
100 static int pseudo_merge_config(const char *var, const char *value,
101 const struct config_context *ctx,
102 void *cb_data)
104 struct string_list *list = cb_data;
105 struct string_list_item *item;
106 struct pseudo_merge_group *group;
107 struct strbuf buf = STRBUF_INIT;
108 const char *sub, *key;
109 size_t sub_len;
110 int ret = 0;
112 if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key))
113 goto done;
115 if (!sub_len)
116 goto done;
118 strbuf_add(&buf, sub, sub_len);
120 item = string_list_lookup(list, buf.buf);
121 if (!item) {
122 item = string_list_insert(list, buf.buf);
124 item->util = xmalloc(sizeof(struct pseudo_merge_group));
125 pseudo_merge_group_init(item->util);
128 group = item->util;
130 if (!strcmp(key, "pattern")) {
131 struct strbuf re = STRBUF_INIT;
133 free(group->pattern);
134 if (*value != '^')
135 strbuf_addch(&re, '^');
136 strbuf_addstr(&re, value);
138 group->pattern = xcalloc(1, sizeof(regex_t));
139 if (regcomp(group->pattern, re.buf, REG_EXTENDED))
140 die(_("failed to load pseudo-merge regex for %s: '%s'"),
141 sub, re.buf);
143 strbuf_release(&re);
144 } else if (!strcmp(key, "decay")) {
145 group->decay = git_config_double(var, value, ctx->kvi);
146 if (group->decay < 0) {
147 warning(_("%s must be non-negative, using default"), var);
148 group->decay = DEFAULT_PSEUDO_MERGE_DECAY;
150 } else if (!strcmp(key, "samplerate")) {
151 group->sample_rate = git_config_double(var, value, ctx->kvi);
152 if (!(0 <= group->sample_rate && group->sample_rate <= 1)) {
153 warning(_("%s must be between 0 and 1, using default"), var);
154 group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
156 } else if (!strcmp(key, "threshold")) {
157 if (git_config_expiry_date(&group->threshold, var, value)) {
158 ret = -1;
159 goto done;
161 } else if (!strcmp(key, "maxmerges")) {
162 group->max_merges = git_config_int(var, value, ctx->kvi);
163 if (group->max_merges < 0) {
164 warning(_("%s must be non-negative, using default"), var);
165 group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;
167 } else if (!strcmp(key, "stablethreshold")) {
168 if (git_config_expiry_date(&group->stable_threshold, var, value)) {
169 ret = -1;
170 goto done;
172 } else if (!strcmp(key, "stablesize")) {
173 group->stable_size = git_config_int(var, value, ctx->kvi);
174 if (group->stable_size <= 0) {
175 warning(_("%s must be positive, using default"), var);
176 group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;
180 done:
181 strbuf_release(&buf);
183 return ret;
186 void load_pseudo_merges_from_config(struct string_list *list)
188 struct string_list_item *item;
190 git_config(pseudo_merge_config, list);
192 for_each_string_list_item(item, list) {
193 struct pseudo_merge_group *group = item->util;
194 if (!group->pattern)
195 die(_("pseudo-merge group '%s' missing required pattern"),
196 item->string);
197 if (group->threshold < group->stable_threshold)
198 die(_("pseudo-merge group '%s' has unstable threshold "
199 "before stable one"), item->string);
203 static int find_pseudo_merge_group_for_ref(const char *refname,
204 const struct object_id *oid,
205 int flags UNUSED,
206 void *_data)
208 struct bitmap_writer *writer = _data;
209 struct object_id peeled;
210 struct commit *c;
211 uint32_t i;
212 int has_bitmap;
214 if (!peel_iterated_oid(the_repository, oid, &peeled))
215 oid = &peeled;
217 c = lookup_commit(the_repository, oid);
218 if (!c)
219 return 0;
221 has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid);
223 for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {
224 struct pseudo_merge_group *group;
225 struct pseudo_merge_matches *matches;
226 struct strbuf group_name = STRBUF_INIT;
227 regmatch_t captures[16];
228 size_t j;
230 group = writer->pseudo_merge_groups.items[i].util;
231 if (regexec(group->pattern, refname, ARRAY_SIZE(captures),
232 captures, 0))
233 continue;
235 if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1)
236 warning(_("pseudo-merge regex from config has too many capture "
237 "groups (max=%"PRIuMAX")"),
238 (uintmax_t)ARRAY_SIZE(captures) - 2);
240 for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) {
241 regmatch_t *match = &captures[j];
242 if (match->rm_so == -1)
243 continue;
245 if (group_name.len)
246 strbuf_addch(&group_name, '-');
248 strbuf_add(&group_name, refname + match->rm_so,
249 match->rm_eo - match->rm_so);
252 matches = strmap_get(&group->matches, group_name.buf);
253 if (!matches) {
254 matches = xcalloc(1, sizeof(*matches));
255 strmap_put(&group->matches, strbuf_detach(&group_name, NULL),
256 matches);
259 if (c->date <= group->stable_threshold) {
260 ALLOC_GROW(matches->stable, matches->stable_nr + 1,
261 matches->stable_alloc);
262 matches->stable[matches->stable_nr++] = c;
263 } else if (c->date <= group->threshold && !has_bitmap) {
264 ALLOC_GROW(matches->unstable, matches->unstable_nr + 1,
265 matches->unstable_alloc);
266 matches->unstable[matches->unstable_nr++] = c;
269 strbuf_release(&group_name);
272 return 0;
275 static struct commit *push_pseudo_merge(struct pseudo_merge_group *group)
277 struct commit *merge;
279 ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc);
281 merge = alloc_commit_node(the_repository);
282 merge->object.parsed = 1;
283 merge->object.flags |= BITMAP_PSEUDO_MERGE;
285 group->merges[group->merges_nr++] = merge;
287 return merge;
290 static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits,
291 const struct object_id *oid)
294 struct pseudo_merge_commit_idx *pmc;
295 int hash_ret;
296 khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid,
297 &hash_ret);
299 if (hash_ret) {
300 CALLOC_ARRAY(pmc, 1);
301 kh_value(pseudo_merge_commits, hash_pos) = pmc;
302 } else {
303 pmc = kh_value(pseudo_merge_commits, hash_pos);
306 return pmc;
309 #define MIN_PSEUDO_MERGE_SIZE 8
311 static void select_pseudo_merges_1(struct bitmap_writer *writer,
312 struct pseudo_merge_group *group,
313 struct pseudo_merge_matches *matches)
315 uint32_t i, j;
316 uint32_t stable_merges_nr;
318 if (!matches->stable_nr && !matches->unstable_nr)
319 return; /* all tips in this group already have bitmaps */
321 stable_merges_nr = matches->stable_nr / group->stable_size;
322 if (matches->stable_nr % group->stable_size)
323 stable_merges_nr++;
325 /* make stable_merges_nr pseudo merges for stable commits */
326 for (i = 0, j = 0; i < stable_merges_nr; i++) {
327 struct commit *merge;
328 struct commit_list **p;
330 merge = push_pseudo_merge(group);
331 p = &merge->parents;
334 * For each pseudo-merge created above, add parents to the
335 * allocated commit node from the stable set of commits
336 * (un-bitmapped, newer than the stable threshold).
338 do {
339 struct commit *c;
340 struct pseudo_merge_commit_idx *pmc;
342 if (j >= matches->stable_nr)
343 break;
345 c = matches->stable[j++];
347 * Here and below, make sure that we keep our mapping of
348 * commits -> pseudo-merge(s) which include the key'd
349 * commit up-to-date.
351 pmc = pseudo_merge_idx(writer->pseudo_merge_commits,
352 &c->object.oid);
354 ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);
356 pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;
357 p = commit_list_append(c, p);
358 } while (j % group->stable_size);
360 bitmap_writer_push_commit(writer, merge, 1);
361 writer->pseudo_merges_nr++;
364 /* make up to group->max_merges pseudo merges for unstable commits */
365 for (i = 0, j = 0; i < group->max_merges; i++) {
366 struct commit *merge;
367 struct commit_list **p;
368 uint32_t size, end;
370 merge = push_pseudo_merge(group);
371 p = &merge->parents;
373 size = pseudo_merge_group_size(group, matches, i);
374 end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size;
377 * For each pseudo-merge commit created above, add parents to
378 * the allocated commit node from the unstable set of commits
379 * (newer than the stable threshold).
381 * Account for the sample rate, since not every candidate from
382 * the set of stable commits will be included as a pseudo-merge
383 * parent.
385 for (; j < end && j < matches->unstable_nr; j++) {
386 struct commit *c = matches->unstable[j];
387 struct pseudo_merge_commit_idx *pmc;
389 if (j % (uint32_t)(1.0 / group->sample_rate))
390 continue;
392 pmc = pseudo_merge_idx(writer->pseudo_merge_commits,
393 &c->object.oid);
395 ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);
397 pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;
398 p = commit_list_append(c, p);
401 bitmap_writer_push_commit(writer, merge, 1);
402 writer->pseudo_merges_nr++;
403 if (end >= matches->unstable_nr)
404 break;
408 static int commit_date_cmp(const void *va, const void *vb)
410 timestamp_t a = (*(const struct commit **)va)->date;
411 timestamp_t b = (*(const struct commit **)vb)->date;
413 if (a < b)
414 return -1;
415 else if (a > b)
416 return 1;
417 return 0;
420 static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches)
422 QSORT(matches->stable, matches->stable_nr, commit_date_cmp);
423 QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp);
426 void select_pseudo_merges(struct bitmap_writer *writer,
427 struct commit **commits, size_t commits_nr)
429 struct progress *progress = NULL;
430 uint32_t i;
432 if (!writer->pseudo_merge_groups.nr)
433 return;
435 if (writer->show_progress)
436 progress = start_progress("Selecting pseudo-merge commits",
437 writer->pseudo_merge_groups.nr);
439 refs_for_each_ref(get_main_ref_store(the_repository),
440 find_pseudo_merge_group_for_ref, writer);
442 for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {
443 struct pseudo_merge_group *group;
444 struct hashmap_iter iter;
445 struct strmap_entry *e;
447 group = writer->pseudo_merge_groups.items[i].util;
448 strmap_for_each_entry(&group->matches, &iter, e) {
449 struct pseudo_merge_matches *matches = e->value;
451 sort_pseudo_merge_matches(matches);
453 select_pseudo_merges_1(writer, group, matches);
456 display_progress(progress, i + 1);
459 stop_progress(&progress);
462 void free_pseudo_merge_map(struct pseudo_merge_map *pm)
464 uint32_t i;
465 for (i = 0; i < pm->nr; i++) {
466 ewah_pool_free(pm->v[i].commits);
467 ewah_pool_free(pm->v[i].bitmap);
469 free(pm->v);
472 struct pseudo_merge_commit_ext {
473 uint32_t nr;
474 const unsigned char *ptr;
477 static int pseudo_merge_ext_at(const struct pseudo_merge_map *pm,
478 struct pseudo_merge_commit_ext *ext, size_t at)
480 if (at >= pm->map_size)
481 return error(_("extended pseudo-merge read out-of-bounds "
482 "(%"PRIuMAX" >= %"PRIuMAX")"),
483 (uintmax_t)at, (uintmax_t)pm->map_size);
484 if (at + 4 >= pm->map_size)
485 return error(_("extended pseudo-merge entry is too short "
486 "(%"PRIuMAX" >= %"PRIuMAX")"),
487 (uintmax_t)(at + 4), (uintmax_t)pm->map_size);
489 ext->nr = get_be32(pm->map + at);
490 ext->ptr = pm->map + at + sizeof(uint32_t);
492 return 0;
495 struct ewah_bitmap *pseudo_merge_bitmap(const struct pseudo_merge_map *pm,
496 struct pseudo_merge *merge)
498 if (!merge->loaded_commits)
499 BUG("cannot use unloaded pseudo-merge bitmap");
501 if (!merge->loaded_bitmap) {
502 size_t at = merge->bitmap_at;
504 merge->bitmap = read_bitmap(pm->map, pm->map_size, &at);
505 merge->loaded_bitmap = 1;
508 return merge->bitmap;
511 struct pseudo_merge *use_pseudo_merge(const struct pseudo_merge_map *pm,
512 struct pseudo_merge *merge)
514 if (!merge->loaded_commits) {
515 size_t pos = merge->at;
517 merge->commits = read_bitmap(pm->map, pm->map_size, &pos);
518 merge->bitmap_at = pos;
519 merge->loaded_commits = 1;
521 return merge;
524 static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm,
525 struct object_id *oid,
526 size_t want)
528 size_t lo = 0;
529 size_t hi = pm->nr;
531 while (lo < hi) {
532 size_t mi = lo + (hi - lo) / 2;
533 size_t got = pm->v[mi].at;
535 if (got == want)
536 return use_pseudo_merge(pm, &pm->v[mi]);
537 else if (got < want)
538 hi = mi;
539 else
540 lo = mi + 1;
543 warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX),
544 oid_to_hex(oid), (uintmax_t)want);
546 return NULL;
549 struct pseudo_merge_commit {
550 uint32_t commit_pos;
551 uint64_t pseudo_merge_ofs;
554 #define PSEUDO_MERGE_COMMIT_RAWSZ (sizeof(uint32_t)+sizeof(uint64_t))
556 static void read_pseudo_merge_commit_at(struct pseudo_merge_commit *merge,
557 const unsigned char *at)
559 merge->commit_pos = get_be32(at);
560 merge->pseudo_merge_ofs = get_be64(at + sizeof(uint32_t));
563 static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm,
564 struct pseudo_merge_commit_ext *ext,
565 struct pseudo_merge_commit *merge,
566 uint32_t n)
568 size_t ofs;
570 if (n >= ext->nr)
571 return error(_("extended pseudo-merge lookup out-of-bounds "
572 "(%"PRIu32" >= %"PRIu32")"), n, ext->nr);
574 ofs = get_be64(ext->ptr + st_mult(n, sizeof(uint64_t)));
575 if (ofs >= pm->map_size)
576 return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"),
577 (uintmax_t)ofs, (uintmax_t)pm->map_size);
579 read_pseudo_merge_commit_at(merge, pm->map + ofs);
581 return 0;
584 static unsigned apply_pseudo_merge(const struct pseudo_merge_map *pm,
585 struct pseudo_merge *merge,
586 struct bitmap *result,
587 struct bitmap *roots)
589 if (merge->satisfied)
590 return 0;
592 if (!ewah_bitmap_is_subset(merge->commits, roots ? roots : result))
593 return 0;
595 bitmap_or_ewah(result, pseudo_merge_bitmap(pm, merge));
596 if (roots)
597 bitmap_or_ewah(roots, pseudo_merge_bitmap(pm, merge));
598 merge->satisfied = 1;
600 return 1;
603 static int pseudo_merge_commit_cmp(const void *va, const void *vb)
605 struct pseudo_merge_commit merge;
606 uint32_t key = *(uint32_t*)va;
608 read_pseudo_merge_commit_at(&merge, vb);
610 if (key < merge.commit_pos)
611 return -1;
612 if (key > merge.commit_pos)
613 return 1;
614 return 0;
617 static struct pseudo_merge_commit *find_pseudo_merge(const struct pseudo_merge_map *pm,
618 uint32_t pos)
620 if (!pm->commits_nr)
621 return NULL;
623 return bsearch(&pos, pm->commits, pm->commits_nr,
624 PSEUDO_MERGE_COMMIT_RAWSZ, pseudo_merge_commit_cmp);
627 int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm,
628 struct bitmap *result,
629 struct commit *commit, uint32_t commit_pos)
631 struct pseudo_merge *merge;
632 struct pseudo_merge_commit *merge_commit;
633 int ret = 0;
635 merge_commit = find_pseudo_merge(pm, commit_pos);
636 if (!merge_commit)
637 return 0;
639 if (merge_commit->pseudo_merge_ofs & ((uint64_t)1<<63)) {
640 struct pseudo_merge_commit_ext ext = { 0 };
641 off_t ofs = merge_commit->pseudo_merge_ofs & ~((uint64_t)1<<63);
642 uint32_t i;
644 if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) {
645 warning(_("could not read extended pseudo-merge table "
646 "for commit %s"),
647 oid_to_hex(&commit->object.oid));
648 return ret;
651 for (i = 0; i < ext.nr; i++) {
652 if (nth_pseudo_merge_ext(pm, &ext, merge_commit, i) < 0)
653 return ret;
655 merge = pseudo_merge_at(pm, &commit->object.oid,
656 merge_commit->pseudo_merge_ofs);
658 if (!merge)
659 return ret;
661 if (apply_pseudo_merge(pm, merge, result, NULL))
662 ret++;
664 } else {
665 merge = pseudo_merge_at(pm, &commit->object.oid,
666 merge_commit->pseudo_merge_ofs);
668 if (!merge)
669 return ret;
671 if (apply_pseudo_merge(pm, merge, result, NULL))
672 ret++;
675 if (ret)
676 cascade_pseudo_merges(pm, result, NULL);
678 return ret;
681 int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
682 struct bitmap *result,
683 struct bitmap *roots)
685 unsigned any_satisfied;
686 int ret = 0;
688 do {
689 struct pseudo_merge *merge;
690 uint32_t i;
692 any_satisfied = 0;
694 for (i = 0; i < pm->nr; i++) {
695 merge = use_pseudo_merge(pm, &pm->v[i]);
696 if (apply_pseudo_merge(pm, merge, result, roots)) {
697 any_satisfied |= 1;
698 ret++;
701 } while (any_satisfied);
703 return ret;
706 struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
707 struct bitmap *parents)
709 struct pseudo_merge *match = NULL;
710 size_t i;
712 if (!pm->nr)
713 return NULL;
716 * NOTE: this loop is quadratic in the worst-case (where no
717 * matching pseudo-merge bitmaps are found), but in practice
718 * this is OK for a few reasons:
720 * - Rejecting pseudo-merge bitmaps that do not match the
721 * given commit is done quickly (i.e. `bitmap_equals_ewah()`
722 * returns early when we know the two bitmaps aren't equal.
724 * - Already matched pseudo-merge bitmaps (which we track with
725 * the `->satisfied` bit here) are skipped as potential
726 * candidates.
728 * - The number of pseudo-merges should be small (in the
729 * hundreds for most repositories).
731 * If in the future this semi-quadratic behavior does become a
732 * problem, another approach would be to keep track of which
733 * pseudo-merges are still "viable" after enumerating the
734 * pseudo-merge commit's parents:
736 * - A pseudo-merge bitmap becomes non-viable when the bit(s)
737 * corresponding to one or more parent(s) of the given
738 * commit are not set in a candidate pseudo-merge's commits
739 * bitmap.
741 * - After processing all bits, enumerate the remaining set of
742 * viable pseudo-merge bitmaps, and check that their
743 * popcount() matches the number of parents in the given
744 * commit.
746 for (i = 0; i < pm->nr; i++) {
747 struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]);
748 if (!candidate || candidate->satisfied)
749 continue;
750 if (!bitmap_equals_ewah(parents, candidate->commits))
751 continue;
753 match = candidate;
754 match->satisfied = 1;
755 break;
758 return match;