git-svnimport: if a limit is specified, respect it
[git/dscho.git] / blame.c
blob7e88833a37f9a2b7380b96510a331cf376c99264
1 /*
2 * Copyright (C) 2006, Fredrik Kuivinen <freku045@student.liu.se>
3 */
5 #include <assert.h>
6 #include <time.h>
7 #include <sys/time.h>
8 #include <math.h>
10 #include "cache.h"
11 #include "refs.h"
12 #include "tag.h"
13 #include "commit.h"
14 #include "tree.h"
15 #include "blob.h"
16 #include "diff.h"
17 #include "diffcore.h"
18 #include "revision.h"
20 #define DEBUG 0
22 static const char blame_usage[] = "[-c] [-l] [--] file [commit]\n"
23 " -c, --compability Use the same output mode as git-annotate (Default: off)\n"
24 " -l, --long Show long commit SHA1 (Default: off)\n"
25 " -h, --help This message";
27 static struct commit **blame_lines;
28 static int num_blame_lines;
29 static char* blame_contents;
30 static int blame_len;
32 struct util_info {
33 int *line_map;
34 unsigned char sha1[20]; /* blob sha, not commit! */
35 char *buf;
36 unsigned long size;
37 int num_lines;
38 const char* pathname;
40 void* topo_data;
43 struct chunk {
44 int off1, len1; // ---
45 int off2, len2; // +++
48 struct patch {
49 struct chunk *chunks;
50 int num;
53 static void get_blob(struct commit *commit);
55 /* Only used for statistics */
56 static int num_get_patch = 0;
57 static int num_commits = 0;
58 static int patch_time = 0;
60 #define TEMPFILE_PATH_LEN 60
61 static struct patch *get_patch(struct commit *commit, struct commit *other)
63 struct patch *ret;
64 struct util_info *info_c = (struct util_info *)commit->object.util;
65 struct util_info *info_o = (struct util_info *)other->object.util;
66 char tmp_path1[TEMPFILE_PATH_LEN], tmp_path2[TEMPFILE_PATH_LEN];
67 char diff_cmd[TEMPFILE_PATH_LEN*2 + 20];
68 struct timeval tv_start, tv_end;
69 int fd;
70 FILE *fin;
71 char buf[1024];
73 ret = xmalloc(sizeof(struct patch));
74 ret->chunks = NULL;
75 ret->num = 0;
77 get_blob(commit);
78 get_blob(other);
80 gettimeofday(&tv_start, NULL);
82 fd = git_mkstemp(tmp_path1, TEMPFILE_PATH_LEN, "git-blame-XXXXXX");
83 if (fd < 0)
84 die("unable to create temp-file: %s", strerror(errno));
86 if (xwrite(fd, info_c->buf, info_c->size) != info_c->size)
87 die("write failed: %s", strerror(errno));
88 close(fd);
90 fd = git_mkstemp(tmp_path2, TEMPFILE_PATH_LEN, "git-blame-XXXXXX");
91 if (fd < 0)
92 die("unable to create temp-file: %s", strerror(errno));
94 if (xwrite(fd, info_o->buf, info_o->size) != info_o->size)
95 die("write failed: %s", strerror(errno));
96 close(fd);
98 sprintf(diff_cmd, "diff -U 0 %s %s", tmp_path1, tmp_path2);
99 fin = popen(diff_cmd, "r");
100 if (!fin)
101 die("popen failed: %s", strerror(errno));
103 while (fgets(buf, sizeof(buf), fin)) {
104 struct chunk *chunk;
105 char *start, *sp;
107 if (buf[0] != '@' || buf[1] != '@')
108 continue;
110 if (DEBUG)
111 printf("chunk line: %s", buf);
112 ret->num++;
113 ret->chunks = xrealloc(ret->chunks,
114 sizeof(struct chunk) * ret->num);
115 chunk = &ret->chunks[ret->num - 1];
117 assert(!strncmp(buf, "@@ -", 4));
119 start = buf + 4;
120 sp = index(start, ' ');
121 *sp = '\0';
122 if (index(start, ',')) {
123 int ret =
124 sscanf(start, "%d,%d", &chunk->off1, &chunk->len1);
125 assert(ret == 2);
126 } else {
127 int ret = sscanf(start, "%d", &chunk->off1);
128 assert(ret == 1);
129 chunk->len1 = 1;
131 *sp = ' ';
133 start = sp + 1;
134 sp = index(start, ' ');
135 *sp = '\0';
136 if (index(start, ',')) {
137 int ret =
138 sscanf(start, "%d,%d", &chunk->off2, &chunk->len2);
139 assert(ret == 2);
140 } else {
141 int ret = sscanf(start, "%d", &chunk->off2);
142 assert(ret == 1);
143 chunk->len2 = 1;
145 *sp = ' ';
147 if (chunk->len1 == 0)
148 chunk->off1++;
149 if (chunk->len2 == 0)
150 chunk->off2++;
152 if (chunk->off1 > 0)
153 chunk->off1--;
154 if (chunk->off2 > 0)
155 chunk->off2--;
157 assert(chunk->off1 >= 0);
158 assert(chunk->off2 >= 0);
160 pclose(fin);
161 unlink(tmp_path1);
162 unlink(tmp_path2);
164 gettimeofday(&tv_end, NULL);
165 patch_time += 1000000 * (tv_end.tv_sec - tv_start.tv_sec) +
166 tv_end.tv_usec - tv_start.tv_usec;
168 num_get_patch++;
169 return ret;
172 static void free_patch(struct patch *p)
174 free(p->chunks);
175 free(p);
178 static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
179 int baselen, const char *pathname,
180 unsigned mode, int stage);
182 static unsigned char blob_sha1[20];
183 static const char* blame_file;
184 static int get_blob_sha1(struct tree *t, const char *pathname,
185 unsigned char *sha1)
187 int i;
188 const char *pathspec[2];
189 blame_file = pathname;
190 pathspec[0] = pathname;
191 pathspec[1] = NULL;
192 memset(blob_sha1, 0, sizeof(blob_sha1));
193 read_tree_recursive(t, "", 0, 0, pathspec, get_blob_sha1_internal);
195 for (i = 0; i < 20; i++) {
196 if (blob_sha1[i] != 0)
197 break;
200 if (i == 20)
201 return -1;
203 memcpy(sha1, blob_sha1, 20);
204 return 0;
207 static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
208 int baselen, const char *pathname,
209 unsigned mode, int stage)
211 if (S_ISDIR(mode))
212 return READ_TREE_RECURSIVE;
214 if (strncmp(blame_file, base, baselen) ||
215 strcmp(blame_file + baselen, pathname))
216 return -1;
218 memcpy(blob_sha1, sha1, 20);
219 return -1;
222 static void get_blob(struct commit *commit)
224 struct util_info *info = commit->object.util;
225 char type[20];
227 if (info->buf)
228 return;
230 info->buf = read_sha1_file(info->sha1, type, &info->size);
232 assert(!strcmp(type, "blob"));
235 /* For debugging only */
236 static void print_patch(struct patch *p)
238 int i;
239 printf("Num chunks: %d\n", p->num);
240 for (i = 0; i < p->num; i++) {
241 printf("%d,%d %d,%d\n", p->chunks[i].off1, p->chunks[i].len1,
242 p->chunks[i].off2, p->chunks[i].len2);
246 #if DEBUG
247 /* For debugging only */
248 static void print_map(struct commit *cmit, struct commit *other)
250 struct util_info *util = cmit->object.util;
251 struct util_info *util2 = other->object.util;
253 int i;
254 int max =
255 util->num_lines >
256 util2->num_lines ? util->num_lines : util2->num_lines;
257 int num;
259 for (i = 0; i < max; i++) {
260 printf("i: %d ", i);
261 num = -1;
263 if (i < util->num_lines) {
264 num = util->line_map[i];
265 printf("%d\t", num);
266 } else
267 printf("\t");
269 if (i < util2->num_lines) {
270 int num2 = util2->line_map[i];
271 printf("%d\t", num2);
272 if (num != -1 && num2 != num)
273 printf("---");
274 } else
275 printf("\t");
277 printf("\n");
280 #endif
282 // p is a patch from commit to other.
283 static void fill_line_map(struct commit *commit, struct commit *other,
284 struct patch *p)
286 struct util_info *util = commit->object.util;
287 struct util_info *util2 = other->object.util;
288 int *map = util->line_map;
289 int *map2 = util2->line_map;
290 int cur_chunk = 0;
291 int i1, i2;
293 if (p->num && DEBUG)
294 print_patch(p);
296 if (DEBUG)
297 printf("num lines 1: %d num lines 2: %d\n", util->num_lines,
298 util2->num_lines);
300 for (i1 = 0, i2 = 0; i1 < util->num_lines; i1++, i2++) {
301 struct chunk *chunk = NULL;
302 if (cur_chunk < p->num)
303 chunk = &p->chunks[cur_chunk];
305 if (chunk && chunk->off1 == i1) {
306 if (DEBUG && i2 != chunk->off2)
307 printf("i2: %d off2: %d\n", i2, chunk->off2);
309 assert(i2 == chunk->off2);
311 i1--;
312 i2--;
313 if (chunk->len1 > 0)
314 i1 += chunk->len1;
316 if (chunk->len2 > 0)
317 i2 += chunk->len2;
319 cur_chunk++;
320 } else {
321 if (i2 >= util2->num_lines)
322 break;
324 if (map[i1] != map2[i2] && map[i1] != -1) {
325 if (DEBUG)
326 printf("map: i1: %d %d %p i2: %d %d %p\n",
327 i1, map[i1],
328 i1 != -1 ? blame_lines[map[i1]] : NULL,
329 i2, map2[i2],
330 i2 != -1 ? blame_lines[map2[i2]] : NULL);
331 if (map2[i2] != -1 &&
332 blame_lines[map[i1]] &&
333 !blame_lines[map2[i2]])
334 map[i1] = map2[i2];
337 if (map[i1] == -1 && map2[i2] != -1)
338 map[i1] = map2[i2];
341 if (DEBUG > 1)
342 printf("l1: %d l2: %d i1: %d i2: %d\n",
343 map[i1], map2[i2], i1, i2);
347 static int map_line(struct commit *commit, int line)
349 struct util_info *info = commit->object.util;
350 assert(line >= 0 && line < info->num_lines);
351 return info->line_map[line];
354 static struct util_info* get_util(struct commit *commit)
356 struct util_info *util = commit->object.util;
358 if (util)
359 return util;
361 util = xmalloc(sizeof(struct util_info));
362 util->buf = NULL;
363 util->size = 0;
364 util->line_map = NULL;
365 util->num_lines = -1;
366 util->pathname = NULL;
367 commit->object.util = util;
368 return util;
371 static int fill_util_info(struct commit *commit)
373 struct util_info *util = commit->object.util;
375 assert(util);
376 assert(util->pathname);
378 if (get_blob_sha1(commit->tree, util->pathname, util->sha1))
379 return 1;
380 else
381 return 0;
384 static void alloc_line_map(struct commit *commit)
386 struct util_info *util = commit->object.util;
387 int i;
389 if (util->line_map)
390 return;
392 get_blob(commit);
394 util->num_lines = 0;
395 for (i = 0; i < util->size; i++) {
396 if (util->buf[i] == '\n')
397 util->num_lines++;
399 if(util->buf[util->size - 1] != '\n')
400 util->num_lines++;
402 util->line_map = xmalloc(sizeof(int) * util->num_lines);
404 for (i = 0; i < util->num_lines; i++)
405 util->line_map[i] = -1;
408 static void init_first_commit(struct commit* commit, const char* filename)
410 struct util_info* util = commit->object.util;
411 int i;
413 util->pathname = filename;
414 if (fill_util_info(commit))
415 die("fill_util_info failed");
417 alloc_line_map(commit);
419 util = commit->object.util;
421 for (i = 0; i < util->num_lines; i++)
422 util->line_map[i] = i;
426 static void process_commits(struct rev_info *rev, const char *path,
427 struct commit** initial)
429 int i;
430 struct util_info* util;
431 int lines_left;
432 int *blame_p;
433 int *new_lines;
434 int new_lines_len;
436 struct commit* commit = get_revision(rev);
437 assert(commit);
438 init_first_commit(commit, path);
440 util = commit->object.util;
441 num_blame_lines = util->num_lines;
442 blame_lines = xmalloc(sizeof(struct commit *) * num_blame_lines);
443 blame_contents = util->buf;
444 blame_len = util->size;
446 for (i = 0; i < num_blame_lines; i++)
447 blame_lines[i] = NULL;
449 lines_left = num_blame_lines;
450 blame_p = xmalloc(sizeof(int) * num_blame_lines);
451 new_lines = xmalloc(sizeof(int) * num_blame_lines);
452 do {
453 struct commit_list *parents;
454 int num_parents;
455 struct util_info *util;
457 if (DEBUG)
458 printf("\nProcessing commit: %d %s\n", num_commits,
459 sha1_to_hex(commit->object.sha1));
461 if (lines_left == 0)
462 return;
464 num_commits++;
465 memset(blame_p, 0, sizeof(int) * num_blame_lines);
466 new_lines_len = 0;
467 num_parents = 0;
468 for (parents = commit->parents;
469 parents != NULL; parents = parents->next)
470 num_parents++;
472 if(num_parents == 0)
473 *initial = commit;
475 if (fill_util_info(commit))
476 continue;
478 alloc_line_map(commit);
479 util = commit->object.util;
481 for (parents = commit->parents;
482 parents != NULL; parents = parents->next) {
483 struct commit *parent = parents->item;
484 struct patch *patch;
486 if (parse_commit(parent) < 0)
487 die("parse_commit error");
489 if (DEBUG)
490 printf("parent: %s\n",
491 sha1_to_hex(parent->object.sha1));
493 if (fill_util_info(parent)) {
494 num_parents--;
495 continue;
498 patch = get_patch(parent, commit);
499 alloc_line_map(parent);
500 fill_line_map(parent, commit, patch);
502 for (i = 0; i < patch->num; i++) {
503 int l;
504 for (l = 0; l < patch->chunks[i].len2; l++) {
505 int mapped_line =
506 map_line(commit, patch->chunks[i].off2 + l);
507 if (mapped_line != -1) {
508 blame_p[mapped_line]++;
509 if (blame_p[mapped_line] == num_parents)
510 new_lines[new_lines_len++] = mapped_line;
514 free_patch(patch);
517 if (DEBUG)
518 printf("parents: %d\n", num_parents);
520 for (i = 0; i < new_lines_len; i++) {
521 int mapped_line = new_lines[i];
522 if (blame_lines[mapped_line] == NULL) {
523 blame_lines[mapped_line] = commit;
524 lines_left--;
525 if (DEBUG)
526 printf("blame: mapped: %d i: %d\n",
527 mapped_line, i);
530 } while ((commit = get_revision(rev)) != NULL);
534 static int compare_tree_path(struct rev_info* revs,
535 struct commit* c1, struct commit* c2)
537 const char* paths[2];
538 struct util_info* util = c2->object.util;
539 paths[0] = util->pathname;
540 paths[1] = NULL;
542 diff_tree_setup_paths(get_pathspec(revs->prefix, paths));
543 return rev_compare_tree(c1->tree, c2->tree);
547 static int same_tree_as_empty_path(struct rev_info *revs, struct tree* t1,
548 const char* path)
550 const char* paths[2];
551 paths[0] = path;
552 paths[1] = NULL;
554 diff_tree_setup_paths(get_pathspec(revs->prefix, paths));
555 return rev_same_tree_as_empty(t1);
558 static const char* find_rename(struct commit* commit, struct commit* parent)
560 struct util_info* cutil = commit->object.util;
561 struct diff_options diff_opts;
562 const char *paths[1];
563 int i;
565 if (DEBUG) {
566 printf("find_rename commit: %s ",
567 sha1_to_hex(commit->object.sha1));
568 puts(sha1_to_hex(parent->object.sha1));
571 diff_setup(&diff_opts);
572 diff_opts.recursive = 1;
573 diff_opts.detect_rename = DIFF_DETECT_RENAME;
574 paths[0] = NULL;
575 diff_tree_setup_paths(paths);
576 if (diff_setup_done(&diff_opts) < 0)
577 die("diff_setup_done failed");
579 diff_tree_sha1(commit->tree->object.sha1, parent->tree->object.sha1,
580 "", &diff_opts);
581 diffcore_std(&diff_opts);
583 for (i = 0; i < diff_queued_diff.nr; i++) {
584 struct diff_filepair *p = diff_queued_diff.queue[i];
586 if (p->status == 'R' && !strcmp(p->one->path, cutil->pathname)) {
587 if (DEBUG)
588 printf("rename %s -> %s\n", p->one->path, p->two->path);
589 return p->two->path;
593 return 0;
596 static void simplify_commit(struct rev_info *revs, struct commit *commit)
598 struct commit_list **pp, *parent;
600 if (!commit->tree)
601 return;
603 if (!commit->parents) {
604 struct util_info* util = commit->object.util;
605 if (!same_tree_as_empty_path(revs, commit->tree,
606 util->pathname))
607 commit->object.flags |= TREECHANGE;
608 return;
611 pp = &commit->parents;
612 while ((parent = *pp) != NULL) {
613 struct commit *p = parent->item;
615 if (p->object.flags & UNINTERESTING) {
616 pp = &parent->next;
617 continue;
620 parse_commit(p);
621 switch (compare_tree_path(revs, p, commit)) {
622 case REV_TREE_SAME:
623 parent->next = NULL;
624 commit->parents = parent;
625 get_util(p)->pathname = get_util(commit)->pathname;
626 return;
628 case REV_TREE_NEW:
631 struct util_info* util = commit->object.util;
632 if (revs->remove_empty_trees &&
633 same_tree_as_empty_path(revs, p->tree,
634 util->pathname)) {
635 const char* new_name = find_rename(commit, p);
636 if (new_name) {
637 struct util_info* putil = get_util(p);
638 if (!putil->pathname)
639 putil->pathname = strdup(new_name);
640 } else {
641 *pp = parent->next;
642 continue;
647 /* fallthrough */
648 case REV_TREE_DIFFERENT:
649 pp = &parent->next;
650 if (!get_util(p)->pathname)
651 get_util(p)->pathname =
652 get_util(commit)->pathname;
653 continue;
655 die("bad tree compare for commit %s",
656 sha1_to_hex(commit->object.sha1));
658 commit->object.flags |= TREECHANGE;
662 struct commit_info
664 char* author;
665 char* author_mail;
666 unsigned long author_time;
667 char* author_tz;
670 static void get_commit_info(struct commit* commit, struct commit_info* ret)
672 int len;
673 char* tmp;
674 static char author_buf[1024];
676 tmp = strstr(commit->buffer, "\nauthor ") + 8;
677 len = index(tmp, '\n') - tmp;
678 ret->author = author_buf;
679 memcpy(ret->author, tmp, len);
681 tmp = ret->author;
682 tmp += len;
683 *tmp = 0;
684 while(*tmp != ' ')
685 tmp--;
686 ret->author_tz = tmp+1;
688 *tmp = 0;
689 while(*tmp != ' ')
690 tmp--;
691 ret->author_time = strtoul(tmp, NULL, 10);
693 *tmp = 0;
694 while(*tmp != ' ')
695 tmp--;
696 ret->author_mail = tmp + 1;
698 *tmp = 0;
701 static const char* format_time(unsigned long time, const char* tz_str)
703 static char time_buf[128];
704 time_t t = time;
705 int minutes, tz;
706 struct tm *tm;
708 tz = atoi(tz_str);
709 minutes = tz < 0 ? -tz : tz;
710 minutes = (minutes / 100)*60 + (minutes % 100);
711 minutes = tz < 0 ? -minutes : minutes;
712 t = time + minutes * 60;
713 tm = gmtime(&t);
715 strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
716 strcat(time_buf, tz_str);
717 return time_buf;
720 static void topo_setter(struct commit* c, void* data)
722 struct util_info* util = c->object.util;
723 util->topo_data = data;
726 static void* topo_getter(struct commit* c)
728 struct util_info* util = c->object.util;
729 return util->topo_data;
732 int main(int argc, const char **argv)
734 int i;
735 struct commit *initial = NULL;
736 unsigned char sha1[20];
738 const char *filename = NULL, *commit = NULL;
739 char filename_buf[256];
740 int sha1_len = 8;
741 int compability = 0;
742 int options = 1;
743 struct commit* start_commit;
745 const char* args[10];
746 struct rev_info rev;
748 struct commit_info ci;
749 const char *buf;
750 int max_digits;
751 int longest_file, longest_author;
752 int found_rename;
754 const char* prefix = setup_git_directory();
756 for(i = 1; i < argc; i++) {
757 if(options) {
758 if(!strcmp(argv[i], "-h") ||
759 !strcmp(argv[i], "--help"))
760 usage(blame_usage);
761 else if(!strcmp(argv[i], "-l") ||
762 !strcmp(argv[i], "--long")) {
763 sha1_len = 40;
764 continue;
765 } else if(!strcmp(argv[i], "-c") ||
766 !strcmp(argv[i], "--compability")) {
767 compability = 1;
768 continue;
769 } else if(!strcmp(argv[i], "--")) {
770 options = 0;
771 continue;
772 } else if(argv[i][0] == '-')
773 usage(blame_usage);
774 else
775 options = 0;
778 if(!options) {
779 if(!filename)
780 filename = argv[i];
781 else if(!commit)
782 commit = argv[i];
783 else
784 usage(blame_usage);
788 if(!filename)
789 usage(blame_usage);
790 if(!commit)
791 commit = "HEAD";
793 if(prefix)
794 sprintf(filename_buf, "%s%s", prefix, filename);
795 else
796 strcpy(filename_buf, filename);
797 filename = filename_buf;
799 if (get_sha1(commit, sha1))
800 die("get_sha1 failed, commit '%s' not found", commit);
801 start_commit = lookup_commit_reference(sha1);
802 get_util(start_commit)->pathname = filename;
803 if (fill_util_info(start_commit)) {
804 printf("%s not found in %s\n", filename, commit);
805 return 1;
809 init_revisions(&rev);
810 rev.remove_empty_trees = 1;
811 rev.topo_order = 1;
812 rev.prune_fn = simplify_commit;
813 rev.topo_setter = topo_setter;
814 rev.topo_getter = topo_getter;
815 rev.limited = 1;
817 commit_list_insert(start_commit, &rev.commits);
819 args[0] = filename;
820 args[1] = NULL;
821 diff_tree_setup_paths(args);
822 prepare_revision_walk(&rev);
823 process_commits(&rev, filename, &initial);
825 buf = blame_contents;
826 for (max_digits = 1, i = 10; i <= num_blame_lines + 1; max_digits++)
827 i *= 10;
829 longest_file = 0;
830 longest_author = 0;
831 found_rename = 0;
832 for (i = 0; i < num_blame_lines; i++) {
833 struct commit *c = blame_lines[i];
834 struct util_info* u;
835 if (!c)
836 c = initial;
837 u = c->object.util;
839 if (!found_rename && strcmp(filename, u->pathname))
840 found_rename = 1;
841 if (longest_file < strlen(u->pathname))
842 longest_file = strlen(u->pathname);
843 get_commit_info(c, &ci);
844 if (longest_author < strlen(ci.author))
845 longest_author = strlen(ci.author);
848 for (i = 0; i < num_blame_lines; i++) {
849 struct commit *c = blame_lines[i];
850 struct util_info* u;
852 if (!c)
853 c = initial;
855 u = c->object.util;
856 get_commit_info(c, &ci);
857 fwrite(sha1_to_hex(c->object.sha1), sha1_len, 1, stdout);
858 if(compability) {
859 printf("\t(%10s\t%10s\t%d)", ci.author,
860 format_time(ci.author_time, ci.author_tz), i+1);
861 } else {
862 if (found_rename)
863 printf(" %-*.*s", longest_file, longest_file,
864 u->pathname);
865 printf(" (%-*.*s %10s %*d) ",
866 longest_author, longest_author, ci.author,
867 format_time(ci.author_time, ci.author_tz),
868 max_digits, i+1);
871 if(i == num_blame_lines - 1) {
872 fwrite(buf, blame_len - (buf - blame_contents),
873 1, stdout);
874 if(blame_contents[blame_len-1] != '\n')
875 putc('\n', stdout);
876 } else {
877 char* next_buf = index(buf, '\n') + 1;
878 fwrite(buf, next_buf - buf, 1, stdout);
879 buf = next_buf;
883 if (DEBUG) {
884 printf("num get patch: %d\n", num_get_patch);
885 printf("num commits: %d\n", num_commits);
886 printf("patch time: %f\n", patch_time / 1000000.0);
887 printf("initial: %s\n", sha1_to_hex(initial->object.sha1));
890 return 0;