From a91aca44bf4b3746c3ce03583a1b3418d1610ef7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Mar 2017 08:29:49 -0500 Subject: [PATCH] ref-filter: use separate cache for contains_tag_algo The algorithm which powers "tag --contains" uses the TMP_MARK and UNINTERESTING bits, but never cleans up after itself. As a result, stale UNINTERESTING bits may impact later traversals (like "--merged"). We could fix this by clearing the bits after we're done with the --contains traversal. That would be enough to fix the existing problem, but it leaves future developers in a bad spot: they cannot add other traversals that operate simultaneously with --contains (e.g., if you wanted to add "--no-contains" and use both filters at the same time). Instead, we can use a commit slab to store our cached results, which will store the bits outside of the commit structs entirely. This adds an extra level of indirection, but in my tests (running "git tag --contains HEAD" on linux.git), there was no measurable slowdown. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 55 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 5cb49b7c24..7eeecc608f 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -15,6 +15,7 @@ #include "version.h" #include "trailer.h" #include "wt-status.h" +#include "commit-slab.h" static struct ref_msg { const char *gone; @@ -1470,15 +1471,22 @@ static void get_ref_atom_value(struct ref_array_item *ref, int atom, struct atom *v = &ref->value[atom]; } +/* + * Unknown has to be "0" here, because that's the default value for + * contains_cache slab entries that have not yet been assigned. + */ enum contains_result { - CONTAINS_UNKNOWN = -1, - CONTAINS_NO = 0, - CONTAINS_YES = 1 + CONTAINS_UNKNOWN = 0, + CONTAINS_NO, + CONTAINS_YES }; +define_commit_slab(contains_cache, enum contains_result); + struct ref_filter_cbdata { struct ref_array *array; struct ref_filter *filter; + struct contains_cache contains_cache; }; /* @@ -1509,20 +1517,22 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) * Do not recurse to find out, though, but return -1 if inconclusive. */ static enum contains_result contains_test(struct commit *candidate, - const struct commit_list *want) + const struct commit_list *want, + struct contains_cache *cache) { - /* was it previously marked as containing a want commit? */ - if (candidate->object.flags & TMP_MARK) - return CONTAINS_YES; - /* or marked as not possibly containing a want commit? */ - if (candidate->object.flags & UNINTERESTING) - return CONTAINS_NO; + enum contains_result *cached = contains_cache_at(cache, candidate); + + /* If we already have the answer cached, return that. */ + if (*cached) + return *cached; + /* or are we it? */ if (in_commit_list(want, candidate)) { - candidate->object.flags |= TMP_MARK; + *cached = CONTAINS_YES; return CONTAINS_YES; } + /* Otherwise, we don't know; prepare to recurse */ parse_commit_or_die(candidate); return CONTAINS_UNKNOWN; } @@ -1535,10 +1545,11 @@ static void push_to_contains_stack(struct commit *candidate, struct contains_sta } static enum contains_result contains_tag_algo(struct commit *candidate, - const struct commit_list *want) + const struct commit_list *want, + struct contains_cache *cache) { struct contains_stack contains_stack = { 0, 0, NULL }; - enum contains_result result = contains_test(candidate, want); + enum contains_result result = contains_test(candidate, want, cache); if (result != CONTAINS_UNKNOWN) return result; @@ -1550,16 +1561,16 @@ static enum contains_result contains_tag_algo(struct commit *candidate, struct commit_list *parents = entry->parents; if (!parents) { - commit->object.flags |= UNINTERESTING; + *contains_cache_at(cache, commit) = CONTAINS_NO; contains_stack.nr--; } /* * If we just popped the stack, parents->item has been marked, * therefore contains_test will return a meaningful yes/no. */ - else switch (contains_test(parents->item, want)) { + else switch (contains_test(parents->item, want, cache)) { case CONTAINS_YES: - commit->object.flags |= TMP_MARK; + *contains_cache_at(cache, commit) = CONTAINS_YES; contains_stack.nr--; break; case CONTAINS_NO: @@ -1571,13 +1582,14 @@ static enum contains_result contains_tag_algo(struct commit *candidate, } } free(contains_stack.contains_stack); - return contains_test(candidate, want); + return contains_test(candidate, want, cache); } -static int commit_contains(struct ref_filter *filter, struct commit *commit) +static int commit_contains(struct ref_filter *filter, struct commit *commit, + struct contains_cache *cache) { if (filter->with_commit_tag_algo) - return contains_tag_algo(commit, filter->with_commit) == CONTAINS_YES; + return contains_tag_algo(commit, filter->with_commit, cache) == CONTAINS_YES; return is_descendant_of(commit, filter->with_commit); } @@ -1774,7 +1786,7 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid, return 0; /* We perform the filtering for the '--contains' option */ if (filter->with_commit && - !commit_contains(filter, commit)) + !commit_contains(filter, commit, &ref_cbdata->contains_cache)) return 0; } @@ -1874,6 +1886,8 @@ int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int broken = 1; filter->kind = type & FILTER_REFS_KIND_MASK; + init_contains_cache(&ref_cbdata.contains_cache); + /* Simple per-ref filtering */ if (!filter->kind) die("filter_refs: invalid type"); @@ -1896,6 +1910,7 @@ int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int head_ref(ref_filter_handler, &ref_cbdata); } + clear_contains_cache(&ref_cbdata.contains_cache); /* Filters that need revision walking */ if (filter->merge_commit) -- 2.11.4.GIT