From c2e525d97f81bc178567cdf4dd7056ce6224eb58 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 5 Nov 2006 11:51:41 -0800 Subject: [PATCH] git-pickaxe: optimize by avoiding repeated read_sha1_file(). It turns out that pickaxe reads the same blob repeatedly while blame can reuse the blob already read for the parent when handling a child commit when it's parent's turn to pass its blame to the grandparent. Have a cache in the origin structure to keep the blob there, which will be garbage collected when the origin loses the last reference to it. Signed-off-by: Junio C Hamano --- builtin-pickaxe.c | 100 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 38 deletions(-) diff --git a/builtin-pickaxe.c b/builtin-pickaxe.c index 332e6a2e3c..f12b2d4544 100644 --- a/builtin-pickaxe.c +++ b/builtin-pickaxe.c @@ -40,6 +40,11 @@ static int max_score_digits; #define DEBUG 0 #endif +/* stats */ +static int num_read_blob; +static int num_get_patch; +static int num_commits; + #define PICKAXE_BLAME_MOVE 01 #define PICKAXE_BLAME_COPY 02 #define PICKAXE_BLAME_COPY_HARDER 04 @@ -63,10 +68,25 @@ static unsigned blame_copy_score; struct origin { int refcnt; struct commit *commit; + mmfile_t file; unsigned char blob_sha1[20]; char path[FLEX_ARRAY]; }; +static char *fill_origin_blob(struct origin *o, mmfile_t *file) +{ + if (!o->file.ptr) { + char type[10]; + num_read_blob++; + file->ptr = read_sha1_file(o->blob_sha1, type, + (unsigned long *)(&(file->size))); + o->file = *file; + } + else + *file = o->file; + return file->ptr; +} + static inline struct origin *origin_incref(struct origin *o) { if (o) @@ -77,6 +97,8 @@ static inline struct origin *origin_incref(struct origin *o) static void origin_decref(struct origin *o) { if (o && --o->refcnt <= 0) { + if (o->file.ptr) + free(o->file.ptr); memset(o, 0, sizeof(*o)); free(o); } @@ -431,25 +453,14 @@ static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o, static struct patch *get_patch(struct origin *parent, struct origin *origin) { mmfile_t file_p, file_o; - char type[10]; - char *blob_p, *blob_o; struct patch *patch; - blob_p = read_sha1_file(parent->blob_sha1, type, - (unsigned long *) &file_p.size); - blob_o = read_sha1_file(origin->blob_sha1, type, - (unsigned long *) &file_o.size); - file_p.ptr = blob_p; - file_o.ptr = blob_o; - if (!file_p.ptr || !file_o.ptr) { - free(blob_p); - free(blob_o); + fill_origin_blob(parent, &file_p); + fill_origin_blob(origin, &file_o); + if (!file_p.ptr || !file_o.ptr) return NULL; - } - patch = compare_buffer(&file_p, &file_o, 0); - free(blob_p); - free(blob_o); + num_get_patch++; return patch; } @@ -784,20 +795,14 @@ static int find_move_in_parent(struct scoreboard *sb, int last_in_target, made_progress; struct blame_entry *e, split[3]; mmfile_t file_p; - char type[10]; - char *blob_p; last_in_target = find_last_in_target(sb, target); if (last_in_target < 0) return 1; /* nothing remains for this target */ - blob_p = read_sha1_file(parent->blob_sha1, type, - (unsigned long *) &file_p.size); - file_p.ptr = blob_p; - if (!file_p.ptr) { - free(blob_p); + fill_origin_blob(parent, &file_p); + if (!file_p.ptr) return 0; - } made_progress = 1; while (made_progress) { @@ -814,7 +819,6 @@ static int find_move_in_parent(struct scoreboard *sb, decref_split(split); } } - free(blob_p); return 0; } @@ -900,8 +904,6 @@ static int find_copy_in_parent(struct scoreboard *sb, struct diff_filepair *p = diff_queued_diff.queue[i]; struct origin *norigin; mmfile_t file_p; - char type[10]; - char *blob; struct blame_entry this[3]; if (!DIFF_FILE_VALID(p->one)) @@ -912,9 +914,7 @@ static int find_copy_in_parent(struct scoreboard *sb, norigin = get_origin(sb, parent, p->one->path); hashcpy(norigin->blob_sha1, p->one->sha1); - blob = read_sha1_file(norigin->blob_sha1, type, - (unsigned long *) &file_p.size); - file_p.ptr = blob; + fill_origin_blob(norigin, &file_p); if (!file_p.ptr) continue; @@ -925,7 +925,6 @@ static int find_copy_in_parent(struct scoreboard *sb, this); decref_split(this); } - free(blob); origin_decref(norigin); } @@ -953,6 +952,28 @@ static int find_copy_in_parent(struct scoreboard *sb, return retval; } +/* The blobs of origin and porigin exactly match, so everything + * origin is suspected for can be blamed on the parent. + */ +static void pass_whole_blame(struct scoreboard *sb, + struct origin *origin, struct origin *porigin) +{ + struct blame_entry *e; + + if (!porigin->file.ptr && origin->file.ptr) { + /* Steal its file */ + porigin->file = origin->file; + origin->file.ptr = NULL; + } + for (e = sb->ent; e; e = e->next) { + if (cmp_suspect(e->suspect, origin)) + continue; + origin_incref(porigin); + origin_decref(e->suspect); + e->suspect = porigin; + } +} + #define MAXPARENT 16 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) @@ -986,13 +1007,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) if (!porigin) continue; if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) { - struct blame_entry *e; - for (e = sb->ent; e; e = e->next) - if (e->suspect == origin) { - origin_incref(porigin); - origin_decref(e->suspect); - e->suspect = porigin; - } + pass_whole_blame(sb, origin, porigin); origin_decref(porigin); goto finish; } @@ -1010,6 +1025,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) } } + num_commits++; for (i = 0, parent = commit->parents; i < MAXPARENT && parent; parent = parent->next, i++) { @@ -1068,7 +1084,8 @@ static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt) origin_incref(suspect); commit = suspect->commit; - parse_commit(commit); + if (!commit->object.parsed) + parse_commit(commit); if (!(commit->object.flags & UNINTERESTING) && !(revs->max_age != -1 && commit->date < revs->max_age)) pass_blame(sb, suspect, opt); @@ -1735,6 +1752,7 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) die("no such path %s in %s", path, final_commit_name); sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size); + num_read_blob++; lno = prepare_lines(&sb); if (bottom < 1) @@ -1772,5 +1790,11 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) free(ent); ent = e; } + + if (DEBUG) { + printf("num read blob: %d\n", num_read_blob); + printf("num get patch: %d\n", num_get_patch); + printf("num commits: %d\n", num_commits); + } return 0; } -- 2.11.4.GIT