From fc65b00da7ebfc96d3b93a59ee99f820deb5c93e Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Thu, 7 Sep 2017 10:25:56 -0600 Subject: [PATCH] merge-recursive: change current file dir string_lists to hashmap The code was using two string_lists, one for the directories and one for the files. The code never checks the lists independently so we should be able to only use one list. The string_list also is a O(log n) for lookup and insertion. Switching this to use a hashmap will give O(1) which will save some time when there are millions of paths that will be checked. Signed-off-by: Kevin Willford Signed-off-by: Junio C Hamano --- merge-recursive.c | 56 ++++++++++++++++++++++++++++++++++++++++++++----------- merge-recursive.h | 3 +-- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index d47f40ea81..35af3761ba 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -24,6 +24,31 @@ #include "dir.h" #include "submodule.h" +struct path_hashmap_entry { + struct hashmap_entry e; + char path[FLEX_ARRAY]; +}; + +static int path_hashmap_cmp(const void *cmp_data, + const void *entry, + const void *entry_or_key, + const void *keydata) +{ + const struct path_hashmap_entry *a = entry; + const struct path_hashmap_entry *b = entry_or_key; + const char *key = keydata; + + if (ignore_case) + return strcasecmp(a->path, key ? key : b->path); + else + return strcmp(a->path, key ? key : b->path); +} + +static unsigned int path_hash(const char *path) +{ + return ignore_case ? strihash(path) : strhash(path); +} + static void flush_output(struct merge_options *o) { if (o->buffer_output < 2 && o->obuf.len) { @@ -314,15 +339,15 @@ static int save_files_dirs(const unsigned char *sha1, struct strbuf *base, const char *path, unsigned int mode, int stage, void *context) { + struct path_hashmap_entry *entry; int baselen = base->len; struct merge_options *o = context; strbuf_addstr(base, path); - if (S_ISDIR(mode)) - string_list_insert(&o->current_directory_set, base->buf); - else - string_list_insert(&o->current_file_set, base->buf); + FLEX_ALLOC_MEM(entry, path, base->buf, base->len); + hashmap_entry_init(entry, path_hash(entry->path)); + hashmap_add(&o->current_file_dir_set, entry); strbuf_setlen(base, baselen); return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0); @@ -642,6 +667,7 @@ static void add_flattened_path(struct strbuf *out, const char *s) static char *unique_path(struct merge_options *o, const char *path, const char *branch) { + struct path_hashmap_entry *entry; struct strbuf newpath = STRBUF_INIT; int suffix = 0; size_t base_len; @@ -650,14 +676,16 @@ static char *unique_path(struct merge_options *o, const char *path, const char * add_flattened_path(&newpath, branch); base_len = newpath.len; - while (string_list_has_string(&o->current_file_set, newpath.buf) || - string_list_has_string(&o->current_directory_set, newpath.buf) || + while (hashmap_get_from_hash(&o->current_file_dir_set, + path_hash(newpath.buf), newpath.buf) || (!o->call_depth && file_exists(newpath.buf))) { strbuf_setlen(&newpath, base_len); strbuf_addf(&newpath, "_%d", suffix++); } - string_list_insert(&o->current_file_set, newpath.buf); + FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len); + hashmap_entry_init(entry, path_hash(entry->path)); + hashmap_add(&o->current_file_dir_set, entry); return strbuf_detach(&newpath, NULL); } @@ -1941,8 +1969,14 @@ int merge_trees(struct merge_options *o, if (unmerged_cache()) { struct string_list *entries, *re_head, *re_merge; int i; - string_list_clear(&o->current_file_set, 1); - string_list_clear(&o->current_directory_set, 1); + /* + * Only need the hashmap while processing entries, so + * initialize it here and free it when we are done running + * through the entries. Keeping it in the merge_options as + * opposed to decaring a local hashmap is for convenience + * so that we don't have to pass it to around. + */ + hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 512); get_files_dirs(o, head); get_files_dirs(o, merge); @@ -1978,6 +2012,8 @@ cleanup: string_list_clear(re_head, 0); string_list_clear(entries, 1); + hashmap_free(&o->current_file_dir_set, 1); + free(re_merge); free(re_head); free(entries); @@ -2179,8 +2215,6 @@ void init_merge_options(struct merge_options *o) if (o->verbosity >= 5) o->buffer_output = 0; strbuf_init(&o->obuf, 0); - string_list_init(&o->current_file_set, 1); - string_list_init(&o->current_directory_set, 1); string_list_init(&o->df_conflict_file_set, 1); } diff --git a/merge-recursive.h b/merge-recursive.h index 735343b413..80d69d1401 100644 --- a/merge-recursive.h +++ b/merge-recursive.h @@ -25,8 +25,7 @@ struct merge_options { int show_rename_progress; int call_depth; struct strbuf obuf; - struct string_list current_file_set; - struct string_list current_directory_set; + struct hashmap current_file_dir_set; struct string_list df_conflict_file_set; }; -- 2.11.4.GIT