Merge branch 'master' into jc/combine
[git/fastimport.git] / blame.c
blob6730b10b115250e088887879d075559a36ddc7b1
1 /*
2 * Copyright (C) 2006, Fredrik Kuivinen <freku045@student.liu.se>
3 */
5 #include <assert.h>
6 #include <time.h>
7 #include <sys/time.h>
8 #include <math.h>
10 #include "cache.h"
11 #include "refs.h"
12 #include "tag.h"
13 #include "commit.h"
14 #include "tree.h"
15 #include "blob.h"
16 #include "diff.h"
17 #include "diffcore.h"
18 #include "revision.h"
19 #include "xdiff-interface.h"
21 #define DEBUG 0
23 static const char blame_usage[] = "[-c] [-l] [--] file [commit]\n"
24 " -c, --compability Use the same output mode as git-annotate (Default: off)\n"
25 " -l, --long Show long commit SHA1 (Default: off)\n"
26 " -h, --help This message";
28 static struct commit **blame_lines;
29 static int num_blame_lines;
30 static char* blame_contents;
31 static int blame_len;
33 struct util_info {
34 int *line_map;
35 unsigned char sha1[20]; /* blob sha, not commit! */
36 char *buf;
37 unsigned long size;
38 int num_lines;
39 const char* pathname;
41 void* topo_data;
44 struct chunk {
45 int off1, len1; // ---
46 int off2, len2; // +++
49 struct patch {
50 struct chunk *chunks;
51 int num;
54 static void get_blob(struct commit *commit);
56 /* Only used for statistics */
57 static int num_get_patch = 0;
58 static int num_commits = 0;
59 static int patch_time = 0;
61 struct blame_diff_state {
62 struct xdiff_emit_state xm;
63 struct patch *ret;
66 static void process_u0_diff(void *state_, char *line, unsigned long len)
68 struct blame_diff_state *state = state_;
69 struct chunk *chunk;
71 if (len < 4 || line[0] != '@' || line[1] != '@')
72 return;
74 if (DEBUG)
75 printf("chunk line: %.*s", (int)len, line);
76 state->ret->num++;
77 state->ret->chunks = xrealloc(state->ret->chunks,
78 sizeof(struct chunk) * state->ret->num);
79 chunk = &state->ret->chunks[state->ret->num - 1];
81 assert(!strncmp(line, "@@ -", 4));
83 if (parse_hunk_header(line, len,
84 &chunk->off1, &chunk->len1,
85 &chunk->off2, &chunk->len2)) {
86 state->ret->num--;
87 return;
90 if (chunk->len1 == 0)
91 chunk->off1++;
92 if (chunk->len2 == 0)
93 chunk->off2++;
95 if (chunk->off1 > 0)
96 chunk->off1--;
97 if (chunk->off2 > 0)
98 chunk->off2--;
100 assert(chunk->off1 >= 0);
101 assert(chunk->off2 >= 0);
104 static struct patch *get_patch(struct commit *commit, struct commit *other)
106 struct blame_diff_state state;
107 xpparam_t xpp;
108 xdemitconf_t xecfg;
109 mmfile_t file_c, file_o;
110 xdemitcb_t ecb;
111 struct util_info *info_c = (struct util_info *)commit->object.util;
112 struct util_info *info_o = (struct util_info *)other->object.util;
113 struct timeval tv_start, tv_end;
115 get_blob(commit);
116 file_c.ptr = info_c->buf;
117 file_c.size = info_c->size;
119 get_blob(other);
120 file_o.ptr = info_o->buf;
121 file_o.size = info_o->size;
123 gettimeofday(&tv_start, NULL);
125 xpp.flags = XDF_NEED_MINIMAL;
126 xecfg.ctxlen = 0;
127 xecfg.flags = 0;
128 ecb.outf = xdiff_outf;
129 ecb.priv = &state;
130 memset(&state, 0, sizeof(state));
131 state.xm.consume = process_u0_diff;
132 state.ret = xmalloc(sizeof(struct patch));
133 state.ret->chunks = NULL;
134 state.ret->num = 0;
136 xdl_diff(&file_c, &file_o, &xpp, &xecfg, &ecb);
138 gettimeofday(&tv_end, NULL);
139 patch_time += 1000000 * (tv_end.tv_sec - tv_start.tv_sec) +
140 tv_end.tv_usec - tv_start.tv_usec;
142 num_get_patch++;
143 return state.ret;
146 static void free_patch(struct patch *p)
148 free(p->chunks);
149 free(p);
152 static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
153 int baselen, const char *pathname,
154 unsigned mode, int stage);
156 static unsigned char blob_sha1[20];
157 static const char* blame_file;
158 static int get_blob_sha1(struct tree *t, const char *pathname,
159 unsigned char *sha1)
161 int i;
162 const char *pathspec[2];
163 blame_file = pathname;
164 pathspec[0] = pathname;
165 pathspec[1] = NULL;
166 memset(blob_sha1, 0, sizeof(blob_sha1));
167 read_tree_recursive(t, "", 0, 0, pathspec, get_blob_sha1_internal);
169 for (i = 0; i < 20; i++) {
170 if (blob_sha1[i] != 0)
171 break;
174 if (i == 20)
175 return -1;
177 memcpy(sha1, blob_sha1, 20);
178 return 0;
181 static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
182 int baselen, const char *pathname,
183 unsigned mode, int stage)
185 if (S_ISDIR(mode))
186 return READ_TREE_RECURSIVE;
188 if (strncmp(blame_file, base, baselen) ||
189 strcmp(blame_file + baselen, pathname))
190 return -1;
192 memcpy(blob_sha1, sha1, 20);
193 return -1;
196 static void get_blob(struct commit *commit)
198 struct util_info *info = commit->object.util;
199 char type[20];
201 if (info->buf)
202 return;
204 info->buf = read_sha1_file(info->sha1, type, &info->size);
206 assert(!strcmp(type, blob_type));
209 /* For debugging only */
210 static void print_patch(struct patch *p)
212 int i;
213 printf("Num chunks: %d\n", p->num);
214 for (i = 0; i < p->num; i++) {
215 printf("%d,%d %d,%d\n", p->chunks[i].off1, p->chunks[i].len1,
216 p->chunks[i].off2, p->chunks[i].len2);
220 #if DEBUG
221 /* For debugging only */
222 static void print_map(struct commit *cmit, struct commit *other)
224 struct util_info *util = cmit->object.util;
225 struct util_info *util2 = other->object.util;
227 int i;
228 int max =
229 util->num_lines >
230 util2->num_lines ? util->num_lines : util2->num_lines;
231 int num;
233 for (i = 0; i < max; i++) {
234 printf("i: %d ", i);
235 num = -1;
237 if (i < util->num_lines) {
238 num = util->line_map[i];
239 printf("%d\t", num);
240 } else
241 printf("\t");
243 if (i < util2->num_lines) {
244 int num2 = util2->line_map[i];
245 printf("%d\t", num2);
246 if (num != -1 && num2 != num)
247 printf("---");
248 } else
249 printf("\t");
251 printf("\n");
254 #endif
256 // p is a patch from commit to other.
257 static void fill_line_map(struct commit *commit, struct commit *other,
258 struct patch *p)
260 struct util_info *util = commit->object.util;
261 struct util_info *util2 = other->object.util;
262 int *map = util->line_map;
263 int *map2 = util2->line_map;
264 int cur_chunk = 0;
265 int i1, i2;
267 if (p->num && DEBUG)
268 print_patch(p);
270 if (DEBUG)
271 printf("num lines 1: %d num lines 2: %d\n", util->num_lines,
272 util2->num_lines);
274 for (i1 = 0, i2 = 0; i1 < util->num_lines; i1++, i2++) {
275 struct chunk *chunk = NULL;
276 if (cur_chunk < p->num)
277 chunk = &p->chunks[cur_chunk];
279 if (chunk && chunk->off1 == i1) {
280 if (DEBUG && i2 != chunk->off2)
281 printf("i2: %d off2: %d\n", i2, chunk->off2);
283 assert(i2 == chunk->off2);
285 i1--;
286 i2--;
287 if (chunk->len1 > 0)
288 i1 += chunk->len1;
290 if (chunk->len2 > 0)
291 i2 += chunk->len2;
293 cur_chunk++;
294 } else {
295 if (i2 >= util2->num_lines)
296 break;
298 if (map[i1] != map2[i2] && map[i1] != -1) {
299 if (DEBUG)
300 printf("map: i1: %d %d %p i2: %d %d %p\n",
301 i1, map[i1],
302 i1 != -1 ? blame_lines[map[i1]] : NULL,
303 i2, map2[i2],
304 i2 != -1 ? blame_lines[map2[i2]] : NULL);
305 if (map2[i2] != -1 &&
306 blame_lines[map[i1]] &&
307 !blame_lines[map2[i2]])
308 map[i1] = map2[i2];
311 if (map[i1] == -1 && map2[i2] != -1)
312 map[i1] = map2[i2];
315 if (DEBUG > 1)
316 printf("l1: %d l2: %d i1: %d i2: %d\n",
317 map[i1], map2[i2], i1, i2);
321 static int map_line(struct commit *commit, int line)
323 struct util_info *info = commit->object.util;
324 assert(line >= 0 && line < info->num_lines);
325 return info->line_map[line];
328 static struct util_info* get_util(struct commit *commit)
330 struct util_info *util = commit->object.util;
332 if (util)
333 return util;
335 util = xmalloc(sizeof(struct util_info));
336 util->buf = NULL;
337 util->size = 0;
338 util->line_map = NULL;
339 util->num_lines = -1;
340 util->pathname = NULL;
341 commit->object.util = util;
342 return util;
345 static int fill_util_info(struct commit *commit)
347 struct util_info *util = commit->object.util;
349 assert(util);
350 assert(util->pathname);
352 if (get_blob_sha1(commit->tree, util->pathname, util->sha1))
353 return 1;
354 else
355 return 0;
358 static void alloc_line_map(struct commit *commit)
360 struct util_info *util = commit->object.util;
361 int i;
363 if (util->line_map)
364 return;
366 get_blob(commit);
368 util->num_lines = 0;
369 for (i = 0; i < util->size; i++) {
370 if (util->buf[i] == '\n')
371 util->num_lines++;
373 if(util->buf[util->size - 1] != '\n')
374 util->num_lines++;
376 util->line_map = xmalloc(sizeof(int) * util->num_lines);
378 for (i = 0; i < util->num_lines; i++)
379 util->line_map[i] = -1;
382 static void init_first_commit(struct commit* commit, const char* filename)
384 struct util_info* util = commit->object.util;
385 int i;
387 util->pathname = filename;
388 if (fill_util_info(commit))
389 die("fill_util_info failed");
391 alloc_line_map(commit);
393 util = commit->object.util;
395 for (i = 0; i < util->num_lines; i++)
396 util->line_map[i] = i;
400 static void process_commits(struct rev_info *rev, const char *path,
401 struct commit** initial)
403 int i;
404 struct util_info* util;
405 int lines_left;
406 int *blame_p;
407 int *new_lines;
408 int new_lines_len;
410 struct commit* commit = get_revision(rev);
411 assert(commit);
412 init_first_commit(commit, path);
414 util = commit->object.util;
415 num_blame_lines = util->num_lines;
416 blame_lines = xmalloc(sizeof(struct commit *) * num_blame_lines);
417 blame_contents = util->buf;
418 blame_len = util->size;
420 for (i = 0; i < num_blame_lines; i++)
421 blame_lines[i] = NULL;
423 lines_left = num_blame_lines;
424 blame_p = xmalloc(sizeof(int) * num_blame_lines);
425 new_lines = xmalloc(sizeof(int) * num_blame_lines);
426 do {
427 struct commit_list *parents;
428 int num_parents;
429 struct util_info *util;
431 if (DEBUG)
432 printf("\nProcessing commit: %d %s\n", num_commits,
433 sha1_to_hex(commit->object.sha1));
435 if (lines_left == 0)
436 return;
438 num_commits++;
439 memset(blame_p, 0, sizeof(int) * num_blame_lines);
440 new_lines_len = 0;
441 num_parents = 0;
442 for (parents = commit->parents;
443 parents != NULL; parents = parents->next)
444 num_parents++;
446 if(num_parents == 0)
447 *initial = commit;
449 if (fill_util_info(commit))
450 continue;
452 alloc_line_map(commit);
453 util = commit->object.util;
455 for (parents = commit->parents;
456 parents != NULL; parents = parents->next) {
457 struct commit *parent = parents->item;
458 struct patch *patch;
460 if (parse_commit(parent) < 0)
461 die("parse_commit error");
463 if (DEBUG)
464 printf("parent: %s\n",
465 sha1_to_hex(parent->object.sha1));
467 if (fill_util_info(parent)) {
468 num_parents--;
469 continue;
472 patch = get_patch(parent, commit);
473 alloc_line_map(parent);
474 fill_line_map(parent, commit, patch);
476 for (i = 0; i < patch->num; i++) {
477 int l;
478 for (l = 0; l < patch->chunks[i].len2; l++) {
479 int mapped_line =
480 map_line(commit, patch->chunks[i].off2 + l);
481 if (mapped_line != -1) {
482 blame_p[mapped_line]++;
483 if (blame_p[mapped_line] == num_parents)
484 new_lines[new_lines_len++] = mapped_line;
488 free_patch(patch);
491 if (DEBUG)
492 printf("parents: %d\n", num_parents);
494 for (i = 0; i < new_lines_len; i++) {
495 int mapped_line = new_lines[i];
496 if (blame_lines[mapped_line] == NULL) {
497 blame_lines[mapped_line] = commit;
498 lines_left--;
499 if (DEBUG)
500 printf("blame: mapped: %d i: %d\n",
501 mapped_line, i);
504 } while ((commit = get_revision(rev)) != NULL);
508 static int compare_tree_path(struct rev_info* revs,
509 struct commit* c1, struct commit* c2)
511 const char* paths[2];
512 struct util_info* util = c2->object.util;
513 paths[0] = util->pathname;
514 paths[1] = NULL;
516 diff_tree_setup_paths(get_pathspec(revs->prefix, paths));
517 return rev_compare_tree(c1->tree, c2->tree);
521 static int same_tree_as_empty_path(struct rev_info *revs, struct tree* t1,
522 const char* path)
524 const char* paths[2];
525 paths[0] = path;
526 paths[1] = NULL;
528 diff_tree_setup_paths(get_pathspec(revs->prefix, paths));
529 return rev_same_tree_as_empty(t1);
532 static const char* find_rename(struct commit* commit, struct commit* parent)
534 struct util_info* cutil = commit->object.util;
535 struct diff_options diff_opts;
536 const char *paths[1];
537 int i;
539 if (DEBUG) {
540 printf("find_rename commit: %s ",
541 sha1_to_hex(commit->object.sha1));
542 puts(sha1_to_hex(parent->object.sha1));
545 diff_setup(&diff_opts);
546 diff_opts.recursive = 1;
547 diff_opts.detect_rename = DIFF_DETECT_RENAME;
548 paths[0] = NULL;
549 diff_tree_setup_paths(paths);
550 if (diff_setup_done(&diff_opts) < 0)
551 die("diff_setup_done failed");
553 diff_tree_sha1(commit->tree->object.sha1, parent->tree->object.sha1,
554 "", &diff_opts);
555 diffcore_std(&diff_opts);
557 for (i = 0; i < diff_queued_diff.nr; i++) {
558 struct diff_filepair *p = diff_queued_diff.queue[i];
560 if (p->status == 'R' && !strcmp(p->one->path, cutil->pathname)) {
561 if (DEBUG)
562 printf("rename %s -> %s\n", p->one->path, p->two->path);
563 return p->two->path;
567 return 0;
570 static void simplify_commit(struct rev_info *revs, struct commit *commit)
572 struct commit_list **pp, *parent;
574 if (!commit->tree)
575 return;
577 if (!commit->parents) {
578 struct util_info* util = commit->object.util;
579 if (!same_tree_as_empty_path(revs, commit->tree,
580 util->pathname))
581 commit->object.flags |= TREECHANGE;
582 return;
585 pp = &commit->parents;
586 while ((parent = *pp) != NULL) {
587 struct commit *p = parent->item;
589 if (p->object.flags & UNINTERESTING) {
590 pp = &parent->next;
591 continue;
594 parse_commit(p);
595 switch (compare_tree_path(revs, p, commit)) {
596 case REV_TREE_SAME:
597 parent->next = NULL;
598 commit->parents = parent;
599 get_util(p)->pathname = get_util(commit)->pathname;
600 return;
602 case REV_TREE_NEW:
605 struct util_info* util = commit->object.util;
606 if (revs->remove_empty_trees &&
607 same_tree_as_empty_path(revs, p->tree,
608 util->pathname)) {
609 const char* new_name = find_rename(commit, p);
610 if (new_name) {
611 struct util_info* putil = get_util(p);
612 if (!putil->pathname)
613 putil->pathname = strdup(new_name);
614 } else {
615 *pp = parent->next;
616 continue;
621 /* fallthrough */
622 case REV_TREE_DIFFERENT:
623 pp = &parent->next;
624 if (!get_util(p)->pathname)
625 get_util(p)->pathname =
626 get_util(commit)->pathname;
627 continue;
629 die("bad tree compare for commit %s",
630 sha1_to_hex(commit->object.sha1));
632 commit->object.flags |= TREECHANGE;
636 struct commit_info
638 char* author;
639 char* author_mail;
640 unsigned long author_time;
641 char* author_tz;
644 static void get_commit_info(struct commit* commit, struct commit_info* ret)
646 int len;
647 char* tmp;
648 static char author_buf[1024];
650 tmp = strstr(commit->buffer, "\nauthor ") + 8;
651 len = strchr(tmp, '\n') - tmp;
652 ret->author = author_buf;
653 memcpy(ret->author, tmp, len);
655 tmp = ret->author;
656 tmp += len;
657 *tmp = 0;
658 while(*tmp != ' ')
659 tmp--;
660 ret->author_tz = tmp+1;
662 *tmp = 0;
663 while(*tmp != ' ')
664 tmp--;
665 ret->author_time = strtoul(tmp, NULL, 10);
667 *tmp = 0;
668 while(*tmp != ' ')
669 tmp--;
670 ret->author_mail = tmp + 1;
672 *tmp = 0;
675 static const char* format_time(unsigned long time, const char* tz_str)
677 static char time_buf[128];
678 time_t t = time;
679 int minutes, tz;
680 struct tm *tm;
682 tz = atoi(tz_str);
683 minutes = tz < 0 ? -tz : tz;
684 minutes = (minutes / 100)*60 + (minutes % 100);
685 minutes = tz < 0 ? -minutes : minutes;
686 t = time + minutes * 60;
687 tm = gmtime(&t);
689 strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
690 strcat(time_buf, tz_str);
691 return time_buf;
694 static void topo_setter(struct commit* c, void* data)
696 struct util_info* util = c->object.util;
697 util->topo_data = data;
700 static void* topo_getter(struct commit* c)
702 struct util_info* util = c->object.util;
703 return util->topo_data;
706 static int read_ancestry(const char *graft_file,
707 unsigned char **start_sha1)
709 FILE *fp = fopen(graft_file, "r");
710 char buf[1024];
711 if (!fp)
712 return -1;
713 while (fgets(buf, sizeof(buf), fp)) {
714 /* The format is just "Commit Parent1 Parent2 ...\n" */
715 int len = strlen(buf);
716 struct commit_graft *graft = read_graft_line(buf, len);
717 register_commit_graft(graft, 0);
718 if (!*start_sha1)
719 *start_sha1 = graft->sha1;
721 fclose(fp);
722 return 0;
725 int main(int argc, const char **argv)
727 int i;
728 struct commit *initial = NULL;
729 unsigned char sha1[20], *sha1_p = NULL;
731 const char *filename = NULL, *commit = NULL;
732 char filename_buf[256];
733 int sha1_len = 8;
734 int compability = 0;
735 int options = 1;
736 struct commit* start_commit;
738 const char* args[10];
739 struct rev_info rev;
741 struct commit_info ci;
742 const char *buf;
743 int max_digits;
744 int longest_file, longest_author;
745 int found_rename;
747 const char* prefix = setup_git_directory();
748 git_config(git_default_config);
750 for(i = 1; i < argc; i++) {
751 if(options) {
752 if(!strcmp(argv[i], "-h") ||
753 !strcmp(argv[i], "--help"))
754 usage(blame_usage);
755 else if(!strcmp(argv[i], "-l") ||
756 !strcmp(argv[i], "--long")) {
757 sha1_len = 40;
758 continue;
759 } else if(!strcmp(argv[i], "-c") ||
760 !strcmp(argv[i], "--compability")) {
761 compability = 1;
762 continue;
763 } else if(!strcmp(argv[i], "-S")) {
764 if (i + 1 < argc &&
765 !read_ancestry(argv[i + 1], &sha1_p)) {
766 compability = 1;
767 i++;
768 continue;
770 usage(blame_usage);
771 } else if(!strcmp(argv[i], "--")) {
772 options = 0;
773 continue;
774 } else if(argv[i][0] == '-')
775 usage(blame_usage);
776 else
777 options = 0;
780 if(!options) {
781 if(!filename)
782 filename = argv[i];
783 else if(!commit)
784 commit = argv[i];
785 else
786 usage(blame_usage);
790 if(!filename)
791 usage(blame_usage);
792 if (commit && sha1_p)
793 usage(blame_usage);
794 else if(!commit)
795 commit = "HEAD";
797 if(prefix)
798 sprintf(filename_buf, "%s%s", prefix, filename);
799 else
800 strcpy(filename_buf, filename);
801 filename = filename_buf;
803 if (!sha1_p) {
804 if (get_sha1(commit, sha1))
805 die("get_sha1 failed, commit '%s' not found", commit);
806 sha1_p = sha1;
808 start_commit = lookup_commit_reference(sha1_p);
809 get_util(start_commit)->pathname = filename;
810 if (fill_util_info(start_commit)) {
811 printf("%s not found in %s\n", filename, commit);
812 return 1;
816 init_revisions(&rev);
817 rev.remove_empty_trees = 1;
818 rev.topo_order = 1;
819 rev.prune_fn = simplify_commit;
820 rev.topo_setter = topo_setter;
821 rev.topo_getter = topo_getter;
822 rev.parents = 1;
823 rev.limited = 1;
825 commit_list_insert(start_commit, &rev.commits);
827 args[0] = filename;
828 args[1] = NULL;
829 diff_tree_setup_paths(args);
830 prepare_revision_walk(&rev);
831 process_commits(&rev, filename, &initial);
833 buf = blame_contents;
834 for (max_digits = 1, i = 10; i <= num_blame_lines + 1; max_digits++)
835 i *= 10;
837 longest_file = 0;
838 longest_author = 0;
839 found_rename = 0;
840 for (i = 0; i < num_blame_lines; i++) {
841 struct commit *c = blame_lines[i];
842 struct util_info* u;
843 if (!c)
844 c = initial;
845 u = c->object.util;
847 if (!found_rename && strcmp(filename, u->pathname))
848 found_rename = 1;
849 if (longest_file < strlen(u->pathname))
850 longest_file = strlen(u->pathname);
851 get_commit_info(c, &ci);
852 if (longest_author < strlen(ci.author))
853 longest_author = strlen(ci.author);
856 for (i = 0; i < num_blame_lines; i++) {
857 struct commit *c = blame_lines[i];
858 struct util_info* u;
860 if (!c)
861 c = initial;
863 u = c->object.util;
864 get_commit_info(c, &ci);
865 fwrite(sha1_to_hex(c->object.sha1), sha1_len, 1, stdout);
866 if(compability) {
867 printf("\t(%10s\t%10s\t%d)", ci.author,
868 format_time(ci.author_time, ci.author_tz), i+1);
869 } else {
870 if (found_rename)
871 printf(" %-*.*s", longest_file, longest_file,
872 u->pathname);
873 printf(" (%-*.*s %10s %*d) ",
874 longest_author, longest_author, ci.author,
875 format_time(ci.author_time, ci.author_tz),
876 max_digits, i+1);
879 if(i == num_blame_lines - 1) {
880 fwrite(buf, blame_len - (buf - blame_contents),
881 1, stdout);
882 if(blame_contents[blame_len-1] != '\n')
883 putc('\n', stdout);
884 } else {
885 char* next_buf = strchr(buf, '\n') + 1;
886 fwrite(buf, next_buf - buf, 1, stdout);
887 buf = next_buf;
891 if (DEBUG) {
892 printf("num get patch: %d\n", num_get_patch);
893 printf("num commits: %d\n", num_commits);
894 printf("patch time: %f\n", patch_time / 1000000.0);
895 printf("initial: %s\n", sha1_to_hex(initial->object.sha1));
898 return 0;