git-pickaxe: get rid of wasteful find_origin().
[git.git] / builtin-pickaxe.c
blobcf474b0d11e1508b7f885f3886abfdeaeee30b0c
1 /*
2 * Pickaxe
4 * Copyright (c) 2006, Junio C Hamano
5 */
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "xdiff-interface.h"
18 #include <time.h>
19 #include <sys/time.h>
21 static char pickaxe_usage[] =
22 "git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
23 " -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
24 " -l, --long Show long commit SHA1 (Default: off)\n"
25 " -t, --time Show raw timestamp (Default: off)\n"
26 " -f, --show-name Show original filename (Default: auto)\n"
27 " -n, --show-number Show original linenumber (Default: off)\n"
28 " -p, --porcelain Show in a format designed for machine consumption\n"
29 " -L n,m Process only line range n,m, counting from 1\n"
30 " -M, -C Find line movements within and across files\n"
31 " -S revs-file Use revisions from revs-file instead of calling git-rev-list\n";
33 static int longest_file;
34 static int longest_author;
35 static int max_orig_digits;
36 static int max_digits;
37 static int max_score_digits;
39 #define PICKAXE_BLAME_MOVE 01
40 #define PICKAXE_BLAME_COPY 02
41 #define PICKAXE_BLAME_COPY_HARDER 04
44 * blame for a blame_entry with score lower than these thresholds
45 * is not passed to the parent using move/copy logic.
47 static unsigned blame_move_score;
48 static unsigned blame_copy_score;
49 #define BLAME_DEFAULT_MOVE_SCORE 20
50 #define BLAME_DEFAULT_COPY_SCORE 40
52 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
53 #define METAINFO_SHOWN (1u<<12)
54 #define MORE_THAN_ONE_PATH (1u<<13)
57 * One blob in a commit
59 struct origin {
60 struct commit *commit;
61 unsigned char blob_sha1[20];
62 char path[FLEX_ARRAY];
65 struct blame_entry {
66 struct blame_entry *prev;
67 struct blame_entry *next;
69 /* the first line of this group in the final image;
70 * internally all line numbers are 0 based.
72 int lno;
74 /* how many lines this group has */
75 int num_lines;
77 /* the commit that introduced this group into the final image */
78 struct origin *suspect;
80 /* true if the suspect is truly guilty; false while we have not
81 * checked if the group came from one of its parents.
83 char guilty;
85 /* the line number of the first line of this group in the
86 * suspect's file; internally all line numbers are 0 based.
88 int s_lno;
90 /* how significant this entry is -- cached to avoid
91 * scanning the lines over and over
93 unsigned score;
96 struct scoreboard {
97 /* the final commit (i.e. where we started digging from) */
98 struct commit *final;
100 const char *path;
102 /* the contents in the final; pointed into by buf pointers of
103 * blame_entries
105 const char *final_buf;
106 unsigned long final_buf_size;
108 /* linked list of blames */
109 struct blame_entry *ent;
111 /* look-up a line in the final buffer */
112 int num_lines;
113 int *lineno;
116 static int cmp_suspect(struct origin *a, struct origin *b)
118 int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
119 if (cmp)
120 return cmp;
121 return strcmp(a->path, b->path);
124 static void coalesce(struct scoreboard *sb)
126 struct blame_entry *ent, *next;
128 for (ent = sb->ent; ent && (next = ent->next); ent = next) {
129 if (!cmp_suspect(ent->suspect, next->suspect) &&
130 ent->guilty == next->guilty &&
131 ent->s_lno + ent->num_lines == next->s_lno) {
132 ent->num_lines += next->num_lines;
133 ent->next = next->next;
134 if (ent->next)
135 ent->next->prev = ent;
136 free(next);
137 ent->score = 0;
138 next = ent; /* again */
143 static struct origin *get_origin(struct scoreboard *sb,
144 struct commit *commit,
145 const char *path)
147 struct blame_entry *e;
148 struct origin *o;
150 for (e = sb->ent; e; e = e->next) {
151 if (e->suspect->commit == commit &&
152 !strcmp(e->suspect->path, path))
153 return e->suspect;
155 o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
156 o->commit = commit;
157 strcpy(o->path, path);
158 return o;
161 static int fill_blob_sha1(struct origin *origin)
163 unsigned mode;
164 char type[10];
166 if (!is_null_sha1(origin->blob_sha1))
167 return 0;
168 if (get_tree_entry(origin->commit->object.sha1,
169 origin->path,
170 origin->blob_sha1, &mode))
171 goto error_out;
172 if (sha1_object_info(origin->blob_sha1, type, NULL) ||
173 strcmp(type, blob_type))
174 goto error_out;
175 return 0;
176 error_out:
177 hashclr(origin->blob_sha1);
178 return -1;
181 static struct origin *find_origin(struct scoreboard *sb,
182 struct commit *parent,
183 struct origin *origin)
185 struct origin *porigin = NULL;
186 struct diff_options diff_opts;
187 int i;
188 const char *paths[2];
190 /* See if the origin->path is different between parent
191 * and origin first. Most of the time they are the
192 * same and diff-tree is fairly efficient about this.
194 diff_setup(&diff_opts);
195 diff_opts.recursive = 1;
196 diff_opts.detect_rename = 0;
197 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
198 paths[0] = origin->path;
199 paths[1] = NULL;
201 diff_tree_setup_paths(paths, &diff_opts);
202 if (diff_setup_done(&diff_opts) < 0)
203 die("diff-setup");
204 diff_tree_sha1(parent->tree->object.sha1,
205 origin->commit->tree->object.sha1,
206 "", &diff_opts);
207 diffcore_std(&diff_opts);
209 /* It is either one entry that says "modified", or "created",
210 * or nothing.
212 if (!diff_queued_diff.nr) {
213 /* The path is the same as parent */
214 porigin = get_origin(sb, parent, origin->path);
215 hashcpy(porigin->blob_sha1, origin->blob_sha1);
217 else if (diff_queued_diff.nr != 1)
218 die("internal error in pickaxe::find_origin");
219 else {
220 struct diff_filepair *p = diff_queued_diff.queue[0];
221 switch (p->status) {
222 default:
223 die("internal error in pickaxe::find_origin (%c)",
224 p->status);
225 case 'M':
226 porigin = get_origin(sb, parent, origin->path);
227 hashcpy(porigin->blob_sha1, p->one->sha1);
228 break;
229 case 'A':
230 case 'T':
231 /* Did not exist in parent, or type changed */
232 break;
235 diff_flush(&diff_opts);
236 if (porigin)
237 return porigin;
239 /* Otherwise we would look for a rename */
241 diff_setup(&diff_opts);
242 diff_opts.recursive = 1;
243 diff_opts.detect_rename = DIFF_DETECT_RENAME;
244 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
245 paths[0] = NULL;
246 diff_tree_setup_paths(paths, &diff_opts);
247 if (diff_setup_done(&diff_opts) < 0)
248 die("diff-setup");
249 diff_tree_sha1(parent->tree->object.sha1,
250 origin->commit->tree->object.sha1,
251 "", &diff_opts);
252 diffcore_std(&diff_opts);
254 for (i = 0; i < diff_queued_diff.nr; i++) {
255 struct diff_filepair *p = diff_queued_diff.queue[i];
256 if ((p->status == 'R' || p->status == 'C') &&
257 !strcmp(p->two->path, origin->path)) {
258 porigin = get_origin(sb, parent, p->one->path);
259 hashcpy(porigin->blob_sha1, p->one->sha1);
260 break;
263 diff_flush(&diff_opts);
264 return porigin;
267 struct chunk {
268 /* line number in postimage; up to but not including this
269 * line is the same as preimage
271 int same;
273 /* preimage line number after this chunk */
274 int p_next;
276 /* postimage line number after this chunk */
277 int t_next;
280 struct patch {
281 struct chunk *chunks;
282 int num;
285 struct blame_diff_state {
286 struct xdiff_emit_state xm;
287 struct patch *ret;
288 unsigned hunk_post_context;
289 unsigned hunk_in_pre_context : 1;
292 static void process_u_diff(void *state_, char *line, unsigned long len)
294 struct blame_diff_state *state = state_;
295 struct chunk *chunk;
296 int off1, off2, len1, len2, num;
298 num = state->ret->num;
299 if (len < 4 || line[0] != '@' || line[1] != '@') {
300 if (state->hunk_in_pre_context && line[0] == ' ')
301 state->ret->chunks[num - 1].same++;
302 else {
303 state->hunk_in_pre_context = 0;
304 if (line[0] == ' ')
305 state->hunk_post_context++;
306 else
307 state->hunk_post_context = 0;
309 return;
312 if (num && state->hunk_post_context) {
313 chunk = &state->ret->chunks[num - 1];
314 chunk->p_next -= state->hunk_post_context;
315 chunk->t_next -= state->hunk_post_context;
317 state->ret->num = ++num;
318 state->ret->chunks = xrealloc(state->ret->chunks,
319 sizeof(struct chunk) * num);
320 chunk = &state->ret->chunks[num - 1];
321 if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
322 state->ret->num--;
323 return;
326 /* Line numbers in patch output are one based. */
327 off1--;
328 off2--;
330 chunk->same = len2 ? off2 : (off2 + 1);
332 chunk->p_next = off1 + (len1 ? len1 : 1);
333 chunk->t_next = chunk->same + len2;
334 state->hunk_in_pre_context = 1;
335 state->hunk_post_context = 0;
338 static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
339 int context)
341 struct blame_diff_state state;
342 xpparam_t xpp;
343 xdemitconf_t xecfg;
344 xdemitcb_t ecb;
346 xpp.flags = XDF_NEED_MINIMAL;
347 xecfg.ctxlen = context;
348 xecfg.flags = 0;
349 ecb.outf = xdiff_outf;
350 ecb.priv = &state;
351 memset(&state, 0, sizeof(state));
352 state.xm.consume = process_u_diff;
353 state.ret = xmalloc(sizeof(struct patch));
354 state.ret->chunks = NULL;
355 state.ret->num = 0;
357 xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
359 if (state.ret->num) {
360 struct chunk *chunk;
361 chunk = &state.ret->chunks[state.ret->num - 1];
362 chunk->p_next -= state.hunk_post_context;
363 chunk->t_next -= state.hunk_post_context;
365 return state.ret;
368 static struct patch *get_patch(struct origin *parent, struct origin *origin)
370 mmfile_t file_p, file_o;
371 char type[10];
372 char *blob_p, *blob_o;
373 struct patch *patch;
375 blob_p = read_sha1_file(parent->blob_sha1, type,
376 (unsigned long *) &file_p.size);
377 blob_o = read_sha1_file(origin->blob_sha1, type,
378 (unsigned long *) &file_o.size);
379 file_p.ptr = blob_p;
380 file_o.ptr = blob_o;
381 if (!file_p.ptr || !file_o.ptr) {
382 free(blob_p);
383 free(blob_o);
384 return NULL;
387 patch = compare_buffer(&file_p, &file_o, 0);
388 free(blob_p);
389 free(blob_o);
390 return patch;
393 static void free_patch(struct patch *p)
395 free(p->chunks);
396 free(p);
399 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
401 struct blame_entry *ent, *prev = NULL;
403 for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
404 prev = ent;
406 /* prev, if not NULL, is the last one that is below e */
407 e->prev = prev;
408 if (prev) {
409 e->next = prev->next;
410 prev->next = e;
412 else {
413 e->next = sb->ent;
414 sb->ent = e;
416 if (e->next)
417 e->next->prev = e;
420 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
422 struct blame_entry *p, *n;
423 p = dst->prev;
424 n = dst->next;
425 memcpy(dst, src, sizeof(*src));
426 dst->prev = p;
427 dst->next = n;
428 dst->score = 0;
431 static const char *nth_line(struct scoreboard *sb, int lno)
433 return sb->final_buf + sb->lineno[lno];
436 static void split_overlap(struct blame_entry split[3],
437 struct blame_entry *e,
438 int tlno, int plno, int same,
439 struct origin *parent)
441 /* it is known that lines between tlno to same came from
442 * parent, and e has an overlap with that range. it also is
443 * known that parent's line plno corresponds to e's line tlno.
445 * <---- e ----->
446 * <------>
447 * <------------>
448 * <------------>
449 * <------------------>
451 * Potentially we need to split e into three parts; before
452 * this chunk, the chunk to be blamed for parent, and after
453 * that portion.
455 int chunk_end_lno;
456 memset(split, 0, sizeof(struct blame_entry [3]));
458 if (e->s_lno < tlno) {
459 /* there is a pre-chunk part not blamed on parent */
460 split[0].suspect = e->suspect;
461 split[0].lno = e->lno;
462 split[0].s_lno = e->s_lno;
463 split[0].num_lines = tlno - e->s_lno;
464 split[1].lno = e->lno + tlno - e->s_lno;
465 split[1].s_lno = plno;
467 else {
468 split[1].lno = e->lno;
469 split[1].s_lno = plno + (e->s_lno - tlno);
472 if (same < e->s_lno + e->num_lines) {
473 /* there is a post-chunk part not blamed on parent */
474 split[2].suspect = e->suspect;
475 split[2].lno = e->lno + (same - e->s_lno);
476 split[2].s_lno = e->s_lno + (same - e->s_lno);
477 split[2].num_lines = e->s_lno + e->num_lines - same;
478 chunk_end_lno = split[2].lno;
480 else
481 chunk_end_lno = e->lno + e->num_lines;
482 split[1].num_lines = chunk_end_lno - split[1].lno;
484 if (split[1].num_lines < 1)
485 return;
486 split[1].suspect = parent;
489 static void split_blame(struct scoreboard *sb,
490 struct blame_entry split[3],
491 struct blame_entry *e)
493 struct blame_entry *new_entry;
495 if (split[0].suspect && split[2].suspect) {
496 /* we need to split e into two and add another for parent */
497 dup_entry(e, &split[0]);
499 new_entry = xmalloc(sizeof(*new_entry));
500 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
501 add_blame_entry(sb, new_entry);
503 new_entry = xmalloc(sizeof(*new_entry));
504 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
505 add_blame_entry(sb, new_entry);
507 else if (!split[0].suspect && !split[2].suspect)
508 /* parent covers the entire area */
509 dup_entry(e, &split[1]);
510 else if (split[0].suspect) {
511 dup_entry(e, &split[0]);
513 new_entry = xmalloc(sizeof(*new_entry));
514 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
515 add_blame_entry(sb, new_entry);
517 else {
518 dup_entry(e, &split[1]);
520 new_entry = xmalloc(sizeof(*new_entry));
521 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
522 add_blame_entry(sb, new_entry);
525 if (1) { /* sanity */
526 struct blame_entry *ent;
527 int lno = sb->ent->lno, corrupt = 0;
529 for (ent = sb->ent; ent; ent = ent->next) {
530 if (lno != ent->lno)
531 corrupt = 1;
532 if (ent->s_lno < 0)
533 corrupt = 1;
534 lno += ent->num_lines;
536 if (corrupt) {
537 lno = sb->ent->lno;
538 for (ent = sb->ent; ent; ent = ent->next) {
539 printf("L %8d l %8d n %8d\n",
540 lno, ent->lno, ent->num_lines);
541 lno = ent->lno + ent->num_lines;
543 die("oops");
548 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
549 int tlno, int plno, int same,
550 struct origin *parent)
552 struct blame_entry split[3];
554 split_overlap(split, e, tlno, plno, same, parent);
555 if (!split[1].suspect)
556 return;
557 split_blame(sb, split, e);
560 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
562 struct blame_entry *e;
563 int last_in_target = -1;
565 for (e = sb->ent; e; e = e->next) {
566 if (e->guilty || cmp_suspect(e->suspect, target))
567 continue;
568 if (last_in_target < e->s_lno + e->num_lines)
569 last_in_target = e->s_lno + e->num_lines;
571 return last_in_target;
574 static void blame_chunk(struct scoreboard *sb,
575 int tlno, int plno, int same,
576 struct origin *target, struct origin *parent)
578 struct blame_entry *e;
580 for (e = sb->ent; e; e = e->next) {
581 if (e->guilty || cmp_suspect(e->suspect, target))
582 continue;
583 if (same <= e->s_lno)
584 continue;
585 if (tlno < e->s_lno + e->num_lines)
586 blame_overlap(sb, e, tlno, plno, same, parent);
590 static int pass_blame_to_parent(struct scoreboard *sb,
591 struct origin *target,
592 struct origin *parent)
594 int i, last_in_target, plno, tlno;
595 struct patch *patch;
597 last_in_target = find_last_in_target(sb, target);
598 if (last_in_target < 0)
599 return 1; /* nothing remains for this target */
601 patch = get_patch(parent, target);
602 plno = tlno = 0;
603 for (i = 0; i < patch->num; i++) {
604 struct chunk *chunk = &patch->chunks[i];
606 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
607 plno = chunk->p_next;
608 tlno = chunk->t_next;
610 /* rest (i.e. anything above tlno) are the same as parent */
611 blame_chunk(sb, tlno, plno, last_in_target, target, parent);
613 free_patch(patch);
614 return 0;
617 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
619 unsigned score;
620 const char *cp, *ep;
622 if (e->score)
623 return e->score;
625 score = 1;
626 cp = nth_line(sb, e->lno);
627 ep = nth_line(sb, e->lno + e->num_lines);
628 while (cp < ep) {
629 unsigned ch = *((unsigned char *)cp);
630 if (isalnum(ch))
631 score++;
632 cp++;
634 e->score = score;
635 return score;
638 static void copy_split_if_better(struct scoreboard *sb,
639 struct blame_entry best_so_far[3],
640 struct blame_entry this[3])
642 if (!this[1].suspect)
643 return;
644 if (best_so_far[1].suspect) {
645 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
646 return;
648 memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
651 static void find_copy_in_blob(struct scoreboard *sb,
652 struct blame_entry *ent,
653 struct origin *parent,
654 struct blame_entry split[3],
655 mmfile_t *file_p)
657 const char *cp;
658 int cnt;
659 mmfile_t file_o;
660 struct patch *patch;
661 int i, plno, tlno;
663 cp = nth_line(sb, ent->lno);
664 file_o.ptr = (char*) cp;
665 cnt = ent->num_lines;
667 while (cnt && cp < sb->final_buf + sb->final_buf_size) {
668 if (*cp++ == '\n')
669 cnt--;
671 file_o.size = cp - file_o.ptr;
673 patch = compare_buffer(file_p, &file_o, 1);
675 memset(split, 0, sizeof(struct blame_entry [3]));
676 plno = tlno = 0;
677 for (i = 0; i < patch->num; i++) {
678 struct chunk *chunk = &patch->chunks[i];
680 /* tlno to chunk->same are the same as ent */
681 if (ent->num_lines <= tlno)
682 break;
683 if (tlno < chunk->same) {
684 struct blame_entry this[3];
685 split_overlap(this, ent,
686 tlno + ent->s_lno, plno,
687 chunk->same + ent->s_lno,
688 parent);
689 copy_split_if_better(sb, split, this);
691 plno = chunk->p_next;
692 tlno = chunk->t_next;
694 free_patch(patch);
697 static int find_move_in_parent(struct scoreboard *sb,
698 struct origin *target,
699 struct origin *parent)
701 int last_in_target;
702 struct blame_entry *e, split[3];
703 mmfile_t file_p;
704 char type[10];
705 char *blob_p;
707 last_in_target = find_last_in_target(sb, target);
708 if (last_in_target < 0)
709 return 1; /* nothing remains for this target */
711 blob_p = read_sha1_file(parent->blob_sha1, type,
712 (unsigned long *) &file_p.size);
713 file_p.ptr = blob_p;
714 if (!file_p.ptr) {
715 free(blob_p);
716 return 0;
719 for (e = sb->ent; e; e = e->next) {
720 if (e->guilty || cmp_suspect(e->suspect, target))
721 continue;
722 find_copy_in_blob(sb, e, parent, split, &file_p);
723 if (split[1].suspect &&
724 blame_move_score < ent_score(sb, &split[1]))
725 split_blame(sb, split, e);
727 free(blob_p);
728 return 0;
731 static int find_copy_in_parent(struct scoreboard *sb,
732 struct origin *target,
733 struct commit *parent,
734 struct origin *porigin,
735 int opt)
737 struct diff_options diff_opts;
738 const char *paths[1];
739 struct blame_entry *e;
740 int i;
742 if (find_last_in_target(sb, target) < 0)
743 return 1; /* nothing remains for this target */
745 diff_setup(&diff_opts);
746 diff_opts.recursive = 1;
747 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
749 /* Try "find copies harder" on new path */
750 if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
751 (!porigin || strcmp(target->path, porigin->path))) {
752 diff_opts.detect_rename = DIFF_DETECT_COPY;
753 diff_opts.find_copies_harder = 1;
755 paths[0] = NULL;
756 diff_tree_setup_paths(paths, &diff_opts);
757 if (diff_setup_done(&diff_opts) < 0)
758 die("diff-setup");
759 diff_tree_sha1(parent->tree->object.sha1,
760 target->commit->tree->object.sha1,
761 "", &diff_opts);
762 diffcore_std(&diff_opts);
766 * NEEDSWORK: This loop is wasteful in that it opens the same
767 * blob in the parent number of times. We should swap the
768 * loop inside out, which would require keeping track of
769 * "best blame so far" for blame entries that the current
770 * "target" is being suspected.
773 for (e = sb->ent; e; e = e->next) {
774 struct blame_entry split[3];
775 if (e->guilty || cmp_suspect(e->suspect, target))
776 continue;
778 memset(split, 0, sizeof(split));
779 for (i = 0; i < diff_queued_diff.nr; i++) {
780 struct diff_filepair *p = diff_queued_diff.queue[i];
781 struct origin *norigin;
782 mmfile_t file_p;
783 char type[10];
784 char *blob;
785 struct blame_entry this[3];
787 if (!DIFF_FILE_VALID(p->one))
788 continue; /* does not exist in parent */
789 if (porigin && !strcmp(p->one->path, porigin->path))
790 /* find_move already dealt with this path */
791 continue;
792 norigin = get_origin(sb, parent, p->one->path);
793 hashcpy(norigin->blob_sha1, p->one->sha1);
794 blob = read_sha1_file(norigin->blob_sha1, type,
795 (unsigned long *) &file_p.size);
796 file_p.ptr = blob;
797 if (!file_p.ptr) {
798 free(blob);
799 continue;
801 find_copy_in_blob(sb, e, norigin, this, &file_p);
802 copy_split_if_better(sb, split, this);
803 free(blob);
805 if (split[1].suspect &&
806 blame_copy_score < ent_score(sb, &split[1]))
807 split_blame(sb, split, e);
809 diff_flush(&diff_opts);
811 return 0;
814 #define MAXPARENT 16
816 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
818 int i;
819 struct commit *commit = origin->commit;
820 struct commit_list *parent;
821 struct origin *parent_origin[MAXPARENT], *porigin;
823 memset(parent_origin, 0, sizeof(parent_origin));
824 for (i = 0, parent = commit->parents;
825 i < MAXPARENT && parent;
826 parent = parent->next, i++) {
827 struct commit *p = parent->item;
829 if (parse_commit(p))
830 continue;
831 porigin = find_origin(sb, parent->item, origin);
832 if (!porigin)
833 continue;
834 if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
835 struct blame_entry *e;
836 for (e = sb->ent; e; e = e->next)
837 if (e->suspect == origin)
838 e->suspect = porigin;
839 return;
841 parent_origin[i] = porigin;
844 for (i = 0, parent = commit->parents;
845 i < MAXPARENT && parent;
846 parent = parent->next, i++) {
847 struct origin *porigin = parent_origin[i];
848 if (!porigin)
849 continue;
850 if (pass_blame_to_parent(sb, origin, porigin))
851 return;
855 * Optionally run "miff" to find moves in parents' files here.
857 if (opt & PICKAXE_BLAME_MOVE)
858 for (i = 0, parent = commit->parents;
859 i < MAXPARENT && parent;
860 parent = parent->next, i++) {
861 struct origin *porigin = parent_origin[i];
862 if (!porigin)
863 continue;
864 if (find_move_in_parent(sb, origin, porigin))
865 return;
869 * Optionally run "ciff" to find copies from parents' files here.
871 if (opt & PICKAXE_BLAME_COPY)
872 for (i = 0, parent = commit->parents;
873 i < MAXPARENT && parent;
874 parent = parent->next, i++) {
875 struct origin *porigin = parent_origin[i];
876 if (find_copy_in_parent(sb, origin, parent->item,
877 porigin, opt))
878 return;
882 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
884 while (1) {
885 struct blame_entry *ent;
886 struct commit *commit;
887 struct origin *suspect = NULL;
889 /* find one suspect to break down */
890 for (ent = sb->ent; !suspect && ent; ent = ent->next)
891 if (!ent->guilty)
892 suspect = ent->suspect;
893 if (!suspect)
894 return; /* all done */
896 commit = suspect->commit;
897 parse_commit(commit);
898 if (!(commit->object.flags & UNINTERESTING) &&
899 !(revs->max_age != -1 && commit->date < revs->max_age))
900 pass_blame(sb, suspect, opt);
902 /* Take responsibility for the remaining entries */
903 for (ent = sb->ent; ent; ent = ent->next)
904 if (!cmp_suspect(ent->suspect, suspect))
905 ent->guilty = 1;
907 coalesce(sb);
911 static const char *format_time(unsigned long time, const char *tz_str,
912 int show_raw_time)
914 static char time_buf[128];
915 time_t t = time;
916 int minutes, tz;
917 struct tm *tm;
919 if (show_raw_time) {
920 sprintf(time_buf, "%lu %s", time, tz_str);
921 return time_buf;
924 tz = atoi(tz_str);
925 minutes = tz < 0 ? -tz : tz;
926 minutes = (minutes / 100)*60 + (minutes % 100);
927 minutes = tz < 0 ? -minutes : minutes;
928 t = time + minutes * 60;
929 tm = gmtime(&t);
931 strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
932 strcat(time_buf, tz_str);
933 return time_buf;
936 struct commit_info
938 char *author;
939 char *author_mail;
940 unsigned long author_time;
941 char *author_tz;
943 /* filled only when asked for details */
944 char *committer;
945 char *committer_mail;
946 unsigned long committer_time;
947 char *committer_tz;
949 char *summary;
952 static void get_ac_line(const char *inbuf, const char *what,
953 int bufsz, char *person, char **mail,
954 unsigned long *time, char **tz)
956 int len;
957 char *tmp, *endp;
959 tmp = strstr(inbuf, what);
960 if (!tmp)
961 goto error_out;
962 tmp += strlen(what);
963 endp = strchr(tmp, '\n');
964 if (!endp)
965 len = strlen(tmp);
966 else
967 len = endp - tmp;
968 if (bufsz <= len) {
969 error_out:
970 /* Ugh */
971 person = *mail = *tz = "(unknown)";
972 *time = 0;
973 return;
975 memcpy(person, tmp, len);
977 tmp = person;
978 tmp += len;
979 *tmp = 0;
980 while (*tmp != ' ')
981 tmp--;
982 *tz = tmp+1;
984 *tmp = 0;
985 while (*tmp != ' ')
986 tmp--;
987 *time = strtoul(tmp, NULL, 10);
989 *tmp = 0;
990 while (*tmp != ' ')
991 tmp--;
992 *mail = tmp + 1;
993 *tmp = 0;
996 static void get_commit_info(struct commit *commit,
997 struct commit_info *ret,
998 int detailed)
1000 int len;
1001 char *tmp, *endp;
1002 static char author_buf[1024];
1003 static char committer_buf[1024];
1004 static char summary_buf[1024];
1006 /* We've operated without save_commit_buffer, so
1007 * we now need to populate them for output.
1009 if (!commit->buffer) {
1010 char type[20];
1011 unsigned long size;
1012 commit->buffer =
1013 read_sha1_file(commit->object.sha1, type, &size);
1015 ret->author = author_buf;
1016 get_ac_line(commit->buffer, "\nauthor ",
1017 sizeof(author_buf), author_buf, &ret->author_mail,
1018 &ret->author_time, &ret->author_tz);
1020 if (!detailed)
1021 return;
1023 ret->committer = committer_buf;
1024 get_ac_line(commit->buffer, "\ncommitter ",
1025 sizeof(committer_buf), committer_buf, &ret->committer_mail,
1026 &ret->committer_time, &ret->committer_tz);
1028 ret->summary = summary_buf;
1029 tmp = strstr(commit->buffer, "\n\n");
1030 if (!tmp) {
1031 error_out:
1032 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1033 return;
1035 tmp += 2;
1036 endp = strchr(tmp, '\n');
1037 if (!endp)
1038 goto error_out;
1039 len = endp - tmp;
1040 if (len >= sizeof(summary_buf))
1041 goto error_out;
1042 memcpy(summary_buf, tmp, len);
1043 summary_buf[len] = 0;
1046 #define OUTPUT_ANNOTATE_COMPAT 001
1047 #define OUTPUT_LONG_OBJECT_NAME 002
1048 #define OUTPUT_RAW_TIMESTAMP 004
1049 #define OUTPUT_PORCELAIN 010
1050 #define OUTPUT_SHOW_NAME 020
1051 #define OUTPUT_SHOW_NUMBER 040
1052 #define OUTPUT_SHOW_SCORE 0100
1054 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1056 int cnt;
1057 const char *cp;
1058 struct origin *suspect = ent->suspect;
1059 char hex[41];
1061 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1062 printf("%s%c%d %d %d\n",
1063 hex,
1064 ent->guilty ? ' ' : '*', // purely for debugging
1065 ent->s_lno + 1,
1066 ent->lno + 1,
1067 ent->num_lines);
1068 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1069 struct commit_info ci;
1070 suspect->commit->object.flags |= METAINFO_SHOWN;
1071 get_commit_info(suspect->commit, &ci, 1);
1072 printf("author %s\n", ci.author);
1073 printf("author-mail %s\n", ci.author_mail);
1074 printf("author-time %lu\n", ci.author_time);
1075 printf("author-tz %s\n", ci.author_tz);
1076 printf("committer %s\n", ci.committer);
1077 printf("committer-mail %s\n", ci.committer_mail);
1078 printf("committer-time %lu\n", ci.committer_time);
1079 printf("committer-tz %s\n", ci.committer_tz);
1080 printf("filename %s\n", suspect->path);
1081 printf("summary %s\n", ci.summary);
1083 else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1084 printf("filename %s\n", suspect->path);
1086 cp = nth_line(sb, ent->lno);
1087 for (cnt = 0; cnt < ent->num_lines; cnt++) {
1088 char ch;
1089 if (cnt)
1090 printf("%s %d %d\n", hex,
1091 ent->s_lno + 1 + cnt,
1092 ent->lno + 1 + cnt);
1093 putchar('\t');
1094 do {
1095 ch = *cp++;
1096 putchar(ch);
1097 } while (ch != '\n' &&
1098 cp < sb->final_buf + sb->final_buf_size);
1102 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1104 int cnt;
1105 const char *cp;
1106 struct origin *suspect = ent->suspect;
1107 struct commit_info ci;
1108 char hex[41];
1109 int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1111 get_commit_info(suspect->commit, &ci, 1);
1112 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1114 cp = nth_line(sb, ent->lno);
1115 for (cnt = 0; cnt < ent->num_lines; cnt++) {
1116 char ch;
1118 printf("%.*s", (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8, hex);
1119 if (opt & OUTPUT_ANNOTATE_COMPAT)
1120 printf("\t(%10s\t%10s\t%d)", ci.author,
1121 format_time(ci.author_time, ci.author_tz,
1122 show_raw_time),
1123 ent->lno + 1 + cnt);
1124 else {
1125 if (opt & OUTPUT_SHOW_SCORE)
1126 printf(" %*d", max_score_digits, ent->score);
1127 if (opt & OUTPUT_SHOW_NAME)
1128 printf(" %-*.*s", longest_file, longest_file,
1129 suspect->path);
1130 if (opt & OUTPUT_SHOW_NUMBER)
1131 printf(" %*d", max_orig_digits,
1132 ent->s_lno + 1 + cnt);
1133 printf(" (%-*.*s %10s %*d) ",
1134 longest_author, longest_author, ci.author,
1135 format_time(ci.author_time, ci.author_tz,
1136 show_raw_time),
1137 max_digits, ent->lno + 1 + cnt);
1139 do {
1140 ch = *cp++;
1141 putchar(ch);
1142 } while (ch != '\n' &&
1143 cp < sb->final_buf + sb->final_buf_size);
1147 static void output(struct scoreboard *sb, int option)
1149 struct blame_entry *ent;
1151 if (option & OUTPUT_PORCELAIN) {
1152 for (ent = sb->ent; ent; ent = ent->next) {
1153 struct blame_entry *oth;
1154 struct origin *suspect = ent->suspect;
1155 struct commit *commit = suspect->commit;
1156 if (commit->object.flags & MORE_THAN_ONE_PATH)
1157 continue;
1158 for (oth = ent->next; oth; oth = oth->next) {
1159 if ((oth->suspect->commit != commit) ||
1160 !strcmp(oth->suspect->path, suspect->path))
1161 continue;
1162 commit->object.flags |= MORE_THAN_ONE_PATH;
1163 break;
1168 for (ent = sb->ent; ent; ent = ent->next) {
1169 if (option & OUTPUT_PORCELAIN)
1170 emit_porcelain(sb, ent);
1171 else {
1172 emit_other(sb, ent, option);
1177 static int prepare_lines(struct scoreboard *sb)
1179 const char *buf = sb->final_buf;
1180 unsigned long len = sb->final_buf_size;
1181 int num = 0, incomplete = 0, bol = 1;
1183 if (len && buf[len-1] != '\n')
1184 incomplete++; /* incomplete line at the end */
1185 while (len--) {
1186 if (bol) {
1187 sb->lineno = xrealloc(sb->lineno,
1188 sizeof(int* ) * (num + 1));
1189 sb->lineno[num] = buf - sb->final_buf;
1190 bol = 0;
1192 if (*buf++ == '\n') {
1193 num++;
1194 bol = 1;
1197 sb->lineno = xrealloc(sb->lineno,
1198 sizeof(int* ) * (num + incomplete + 1));
1199 sb->lineno[num + incomplete] = buf - sb->final_buf;
1200 sb->num_lines = num + incomplete;
1201 return sb->num_lines;
1204 static int read_ancestry(const char *graft_file)
1206 FILE *fp = fopen(graft_file, "r");
1207 char buf[1024];
1208 if (!fp)
1209 return -1;
1210 while (fgets(buf, sizeof(buf), fp)) {
1211 /* The format is just "Commit Parent1 Parent2 ...\n" */
1212 int len = strlen(buf);
1213 struct commit_graft *graft = read_graft_line(buf, len);
1214 register_commit_graft(graft, 0);
1216 fclose(fp);
1217 return 0;
1220 static int lineno_width(int lines)
1222 int i, width;
1224 for (width = 1, i = 10; i <= lines + 1; width++)
1225 i *= 10;
1226 return width;
1229 static void find_alignment(struct scoreboard *sb, int *option)
1231 int longest_src_lines = 0;
1232 int longest_dst_lines = 0;
1233 unsigned largest_score = 0;
1234 struct blame_entry *e;
1236 for (e = sb->ent; e; e = e->next) {
1237 struct origin *suspect = e->suspect;
1238 struct commit_info ci;
1239 int num;
1241 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1242 suspect->commit->object.flags |= METAINFO_SHOWN;
1243 get_commit_info(suspect->commit, &ci, 1);
1244 if (strcmp(suspect->path, sb->path))
1245 *option |= OUTPUT_SHOW_NAME;
1246 num = strlen(suspect->path);
1247 if (longest_file < num)
1248 longest_file = num;
1249 num = strlen(ci.author);
1250 if (longest_author < num)
1251 longest_author = num;
1253 num = e->s_lno + e->num_lines;
1254 if (longest_src_lines < num)
1255 longest_src_lines = num;
1256 num = e->lno + e->num_lines;
1257 if (longest_dst_lines < num)
1258 longest_dst_lines = num;
1259 if (largest_score < ent_score(sb, e))
1260 largest_score = ent_score(sb, e);
1262 max_orig_digits = lineno_width(longest_src_lines);
1263 max_digits = lineno_width(longest_dst_lines);
1264 max_score_digits = lineno_width(largest_score);
1267 static int has_path_in_work_tree(const char *path)
1269 struct stat st;
1270 return !lstat(path, &st);
1273 static unsigned parse_score(const char *arg)
1275 char *end;
1276 unsigned long score = strtoul(arg, &end, 10);
1277 if (*end)
1278 return 0;
1279 return score;
1282 int cmd_pickaxe(int argc, const char **argv, const char *prefix)
1284 struct rev_info revs;
1285 const char *path;
1286 struct scoreboard sb;
1287 struct origin *o;
1288 struct blame_entry *ent;
1289 int i, seen_dashdash, unk, opt;
1290 long bottom, top, lno;
1291 int output_option = 0;
1292 const char *revs_file = NULL;
1293 const char *final_commit_name = NULL;
1294 char type[10];
1296 save_commit_buffer = 0;
1298 opt = 0;
1299 bottom = top = 0;
1300 seen_dashdash = 0;
1301 for (unk = i = 1; i < argc; i++) {
1302 const char *arg = argv[i];
1303 if (*arg != '-')
1304 break;
1305 else if (!strcmp("-c", arg))
1306 output_option |= OUTPUT_ANNOTATE_COMPAT;
1307 else if (!strcmp("-t", arg))
1308 output_option |= OUTPUT_RAW_TIMESTAMP;
1309 else if (!strcmp("-l", arg))
1310 output_option |= OUTPUT_LONG_OBJECT_NAME;
1311 else if (!strcmp("-S", arg) && ++i < argc)
1312 revs_file = argv[i];
1313 else if (!strncmp("-M", arg, 2)) {
1314 opt |= PICKAXE_BLAME_MOVE;
1315 blame_move_score = parse_score(arg+2);
1317 else if (!strncmp("-C", arg, 2)) {
1318 if (opt & PICKAXE_BLAME_COPY)
1319 opt |= PICKAXE_BLAME_COPY_HARDER;
1320 opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1321 blame_copy_score = parse_score(arg+2);
1323 else if (!strcmp("-L", arg) && ++i < argc) {
1324 char *term;
1325 arg = argv[i];
1326 if (bottom || top)
1327 die("More than one '-L n,m' option given");
1328 bottom = strtol(arg, &term, 10);
1329 if (*term == ',') {
1330 top = strtol(term + 1, &term, 10);
1331 if (*term)
1332 usage(pickaxe_usage);
1334 if (bottom && top && top < bottom) {
1335 unsigned long tmp;
1336 tmp = top; top = bottom; bottom = tmp;
1339 else if (!strcmp("--score-debug", arg))
1340 output_option |= OUTPUT_SHOW_SCORE;
1341 else if (!strcmp("-f", arg) ||
1342 !strcmp("--show-name", arg))
1343 output_option |= OUTPUT_SHOW_NAME;
1344 else if (!strcmp("-n", arg) ||
1345 !strcmp("--show-number", arg))
1346 output_option |= OUTPUT_SHOW_NUMBER;
1347 else if (!strcmp("-p", arg) ||
1348 !strcmp("--porcelain", arg))
1349 output_option |= OUTPUT_PORCELAIN;
1350 else if (!strcmp("--", arg)) {
1351 seen_dashdash = 1;
1352 i++;
1353 break;
1355 else
1356 argv[unk++] = arg;
1359 if (!blame_move_score)
1360 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1361 if (!blame_copy_score)
1362 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
1364 /* We have collected options unknown to us in argv[1..unk]
1365 * which are to be passed to revision machinery if we are
1366 * going to do the "bottom" procesing.
1368 * The remaining are:
1370 * (1) if seen_dashdash, its either
1371 * "-options -- <path>" or
1372 * "-options -- <path> <rev>".
1373 * but the latter is allowed only if there is no
1374 * options that we passed to revision machinery.
1376 * (2) otherwise, we may have "--" somewhere later and
1377 * might be looking at the first one of multiple 'rev'
1378 * parameters (e.g. " master ^next ^maint -- path").
1379 * See if there is a dashdash first, and give the
1380 * arguments before that to revision machinery.
1381 * After that there must be one 'path'.
1383 * (3) otherwise, its one of the three:
1384 * "-options <path> <rev>"
1385 * "-options <rev> <path>"
1386 * "-options <path>"
1387 * but again the first one is allowed only if
1388 * there is no options that we passed to revision
1389 * machinery.
1392 if (seen_dashdash) {
1393 /* (1) */
1394 if (argc <= i)
1395 usage(pickaxe_usage);
1396 path = argv[i];
1397 if (i + 1 == argc - 1) {
1398 if (unk != 1)
1399 usage(pickaxe_usage);
1400 argv[unk++] = argv[i + 1];
1402 else if (i + 1 != argc)
1403 /* garbage at end */
1404 usage(pickaxe_usage);
1406 else {
1407 int j;
1408 for (j = i; !seen_dashdash && j < argc; j++)
1409 if (!strcmp(argv[j], "--"))
1410 seen_dashdash = j;
1411 if (seen_dashdash) {
1412 if (seen_dashdash + 1 != argc - 1)
1413 usage(pickaxe_usage);
1414 path = argv[seen_dashdash + 1];
1415 for (j = i; j < seen_dashdash; j++)
1416 argv[unk++] = argv[j];
1418 else {
1419 /* (3) */
1420 path = argv[i];
1421 if (i + 1 == argc - 1) {
1422 final_commit_name = argv[i + 1];
1424 /* if (unk == 1) we could be getting
1425 * old-style
1427 if (unk == 1 && !has_path_in_work_tree(path)) {
1428 path = argv[i + 1];
1429 final_commit_name = argv[i];
1432 else if (i != argc - 1)
1433 usage(pickaxe_usage); /* garbage at end */
1435 if (!has_path_in_work_tree(path))
1436 die("cannot stat path %s: %s",
1437 path, strerror(errno));
1441 if (final_commit_name)
1442 argv[unk++] = final_commit_name;
1444 /* Now we got rev and path. We do not want the path pruning
1445 * but we may want "bottom" processing.
1447 argv[unk] = NULL;
1449 init_revisions(&revs, NULL);
1450 setup_revisions(unk, argv, &revs, "HEAD");
1451 memset(&sb, 0, sizeof(sb));
1453 /* There must be one and only one positive commit in the
1454 * revs->pending array.
1456 for (i = 0; i < revs.pending.nr; i++) {
1457 struct object *obj = revs.pending.objects[i].item;
1458 if (obj->flags & UNINTERESTING)
1459 continue;
1460 while (obj->type == OBJ_TAG)
1461 obj = deref_tag(obj, NULL, 0);
1462 if (obj->type != OBJ_COMMIT)
1463 die("Non commit %s?",
1464 revs.pending.objects[i].name);
1465 if (sb.final)
1466 die("More than one commit to dig from %s and %s?",
1467 revs.pending.objects[i].name,
1468 final_commit_name);
1469 sb.final = (struct commit *) obj;
1470 final_commit_name = revs.pending.objects[i].name;
1473 if (!sb.final) {
1474 /* "--not A B -- path" without anything positive */
1475 unsigned char head_sha1[20];
1477 final_commit_name = "HEAD";
1478 if (get_sha1(final_commit_name, head_sha1))
1479 die("No such ref: HEAD");
1480 sb.final = lookup_commit_reference(head_sha1);
1481 add_pending_object(&revs, &(sb.final->object), "HEAD");
1484 /* If we have bottom, this will mark the ancestors of the
1485 * bottom commits we would reach while traversing as
1486 * uninteresting.
1488 prepare_revision_walk(&revs);
1490 o = get_origin(&sb, sb.final, path);
1491 if (fill_blob_sha1(o))
1492 die("no such path %s in %s", path, final_commit_name);
1494 sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
1495 lno = prepare_lines(&sb);
1497 if (bottom < 1)
1498 bottom = 1;
1499 if (top < 1)
1500 top = lno;
1501 bottom--;
1502 if (lno < top)
1503 die("file %s has only %lu lines", path, lno);
1505 ent = xcalloc(1, sizeof(*ent));
1506 ent->lno = bottom;
1507 ent->num_lines = top - bottom;
1508 ent->suspect = o;
1509 ent->s_lno = bottom;
1511 sb.ent = ent;
1512 sb.path = path;
1514 if (revs_file && read_ancestry(revs_file))
1515 die("reading graft file %s failed: %s",
1516 revs_file, strerror(errno));
1518 assign_blame(&sb, &revs, opt);
1520 coalesce(&sb);
1522 if (!(output_option & OUTPUT_PORCELAIN))
1523 find_alignment(&sb, &output_option);
1525 output(&sb, output_option);
1526 free((void *)sb.final_buf);
1527 for (ent = sb.ent; ent; ) {
1528 struct blame_entry *e = ent->next;
1529 free(ent);
1530 ent = e;
1532 return 0;