From cfe4112187cdecbd49844c80bb337c1671a2d7ab Mon Sep 17 00:00:00 2001 From: Stefan Sperling Date: Wed, 13 Oct 2021 11:09:15 +0000 Subject: [PATCH] use RB_TREE instead of STAILQ to manage packindex bloom filters; much faster --- lib/fetch.c | 2 ++ lib/got_lib_repository.h | 17 ++++++++++----- lib/object.c | 2 ++ lib/object_parse.c | 2 ++ lib/pack_create.c | 2 ++ lib/repository.c | 54 ++++++++++++++++++++++++++++-------------------- lib/repository_admin.c | 2 ++ lib/send.c | 2 ++ 8 files changed, 56 insertions(+), 27 deletions(-) diff --git a/lib/fetch.c b/lib/fetch.c index 14f895c1..2b173017 100644 --- a/lib/fetch.c +++ b/lib/fetch.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include #include diff --git a/lib/got_lib_repository.h b/lib/got_lib_repository.h index 7005ceff..831cb967 100644 --- a/lib/got_lib_repository.h +++ b/lib/got_lib_repository.h @@ -31,13 +31,20 @@ #define GOT_PACK_CACHE_SIZE 64 struct got_packidx_bloom_filter { - char path_packidx[PATH_MAX]; /* on-disk path */ - size_t path_packidx_len; + RB_ENTRY(got_packidx_bloom_filter) entry; + char path[PATH_MAX]; /* on-disk path */ + size_t path_len; struct bloom *bloom; - STAILQ_ENTRY(got_packidx_bloom_filter) entry; }; -STAILQ_HEAD(got_packidx_bloom_filter_head, got_packidx_bloom_filter); +RB_HEAD(got_packidx_bloom_filter_tree, got_packidx_bloom_filter); + +static inline int +got_packidx_bloom_filter_cmp(const struct got_packidx_bloom_filter *f1, + const struct got_packidx_bloom_filter *f2) +{ + return got_path_cmp(f1->path, f2->path, f1->path_len, f2->path_len); +} struct got_repository { char *path; @@ -52,7 +59,7 @@ struct got_repository { * Used to avoid opening a pack index in search of an * object ID which is not contained in this pack index. */ - struct got_packidx_bloom_filter_head packidx_bloom_filters; + struct got_packidx_bloom_filter_tree packidx_bloom_filters; /* Open file handles for pack files. */ struct got_pack packs[GOT_PACK_CACHE_SIZE]; diff --git a/lib/object.c b/lib/object.c index f997c6e8..29ea30e1 100644 --- a/lib/object.c +++ b/lib/object.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include #include diff --git a/lib/object_parse.c b/lib/object_parse.c index 7c603828..b78b7ba3 100644 --- a/lib/object_parse.c +++ b/lib/object_parse.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include #include diff --git a/lib/pack_create.c b/lib/pack_create.c index b181dec7..6ad71896 100644 --- a/lib/pack_create.c +++ b/lib/pack_create.c @@ -16,6 +16,8 @@ */ #include +#include +#include #include #include diff --git a/lib/repository.c b/lib/repository.c index c6f5465a..39a26086 100644 --- a/lib/repository.c +++ b/lib/repository.c @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include #include @@ -663,7 +665,7 @@ got_repo_open(struct got_repository **repop, const char *path, goto done; } - STAILQ_INIT(&repo->packidx_bloom_filters); + RB_INIT(&repo->packidx_bloom_filters); for (i = 0; i < nitems(repo->privsep_children); i++) { memset(&repo->privsep_children[i], 0, @@ -766,10 +768,10 @@ got_repo_close(struct got_repository *repo) got_packidx_close(repo->packidx_cache[i]); } - while (!STAILQ_EMPTY(&repo->packidx_bloom_filters)) { - struct got_packidx_bloom_filter *bf; - bf = STAILQ_FIRST(&repo->packidx_bloom_filters); - STAILQ_REMOVE_HEAD(&repo->packidx_bloom_filters, entry); + while ((bf = RB_MIN(got_packidx_bloom_filter_tree, + &repo->packidx_bloom_filters))) { + RB_REMOVE(got_packidx_bloom_filter_tree, + &repo->packidx_bloom_filters, bf); free(bf->bloom); free(bf); } @@ -999,19 +1001,29 @@ got_repo_is_packidx_filename(const char *name, size_t len) return 1; } +static struct got_packidx_bloom_filter * +get_packidx_bloom_filter(struct got_repository *repo, + const char *path, size_t path_len) +{ + struct got_packidx_bloom_filter key; + + if (strlcpy(key.path, path, sizeof(key.path)) >= sizeof(key.path)) + return NULL; /* XXX */ + key.path_len = path_len; + + return RB_FIND(got_packidx_bloom_filter_tree, + &repo->packidx_bloom_filters, &key); +} + static int check_packidx_bloom_filter(struct got_repository *repo, const char *path_packidx, struct got_object_id *id) { struct got_packidx_bloom_filter *bf; - STAILQ_FOREACH(bf, &repo->packidx_bloom_filters, entry) { - if (got_path_cmp(bf->path_packidx, path_packidx, - bf->path_packidx_len, strlen(path_packidx)) == 0) { - return bloom_check(bf->bloom, id->sha1, - sizeof(id->sha1)); - } - } + bf = get_packidx_bloom_filter(repo, path_packidx, strlen(path_packidx)); + if (bf) + return bloom_check(bf->bloom, id->sha1, sizeof(id->sha1)); /* No bloom filter means this pack index must be searched. */ return 1; @@ -1037,11 +1049,9 @@ add_packidx_bloom_filter(struct got_repository *repo, return NULL; /* Do we already have a filter for this pack index? */ - STAILQ_FOREACH(bf, &repo->packidx_bloom_filters, entry) { - if (got_path_cmp(bf->path_packidx, path_packidx, - bf->path_packidx_len, strlen(path_packidx)) == 0) - return NULL; - } + if (get_packidx_bloom_filter(repo, path_packidx, + strlen(path_packidx)) != NULL) + return NULL; bf = calloc(1, sizeof(*bf)); if (bf == NULL) @@ -1052,14 +1062,13 @@ add_packidx_bloom_filter(struct got_repository *repo, return got_error_from_errno("calloc"); } - - len = strlcpy(bf->path_packidx, path_packidx, sizeof(bf->path_packidx)); - if (len >= sizeof(bf->path_packidx)) { + len = strlcpy(bf->path, path_packidx, sizeof(bf->path)); + if (len >= sizeof(bf->path)) { free(bf->bloom); free(bf); return got_error(GOT_ERR_NO_SPACE); } - bf->path_packidx_len = len; + bf->path_len = len; /* Minimum size supported by our bloom filter is 1000 entries. */ bloom_init(bf->bloom, nobjects < 1000 ? 1000 : nobjects, 0.1); @@ -1069,7 +1078,8 @@ add_packidx_bloom_filter(struct got_repository *repo, bloom_add(bf->bloom, id->sha1, sizeof(id->sha1)); } - STAILQ_INSERT_TAIL(&repo->packidx_bloom_filters, bf, entry); + RB_INSERT(got_packidx_bloom_filter_tree, + &repo->packidx_bloom_filters, bf); return NULL; } diff --git a/lib/repository_admin.c b/lib/repository_admin.c index f73c3838..12174bd2 100644 --- a/lib/repository_admin.c +++ b/lib/repository_admin.c @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include #include diff --git a/lib/send.c b/lib/send.c index 9082a982..d983e298 100644 --- a/lib/send.c +++ b/lib/send.c @@ -17,6 +17,8 @@ #include #include +#include +#include #include #include #include -- 2.11.4.GIT