Document git-diff-tree --always
[git/dscho.git] / combine-diff.c
blob6a9f3683c51879ab147f9a3f37c38fc1e8a46b47
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "diffcore.h"
5 #include "quote.h"
7 static int uninteresting(struct diff_filepair *p)
9 if (diff_unmodified_pair(p))
10 return 1;
11 return 0;
14 static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent)
16 struct diff_queue_struct *q = &diff_queued_diff;
17 struct combine_diff_path *p;
18 int i;
20 if (!n) {
21 struct combine_diff_path *list = NULL, **tail = &list;
22 for (i = 0; i < q->nr; i++) {
23 int len;
24 const char *path;
25 if (uninteresting(q->queue[i]))
26 continue;
27 path = q->queue[i]->two->path;
28 len = strlen(path);
29 p = xmalloc(combine_diff_path_size(num_parent, len));
30 p->path = (char*) &(p->parent[num_parent]);
31 memcpy(p->path, path, len);
32 p->path[len] = 0;
33 p->len = len;
34 p->next = NULL;
35 memset(p->parent, 0,
36 sizeof(p->parent[0]) * num_parent);
38 memcpy(p->sha1, q->queue[i]->two->sha1, 20);
39 p->mode = q->queue[i]->two->mode;
40 memcpy(p->parent[n].sha1, q->queue[i]->one->sha1, 20);
41 p->parent[n].mode = q->queue[i]->one->mode;
42 *tail = p;
43 tail = &p->next;
45 return list;
48 for (p = curr; p; p = p->next) {
49 int found = 0;
50 if (!p->len)
51 continue;
52 for (i = 0; i < q->nr; i++) {
53 const char *path;
54 int len;
56 if (uninteresting(q->queue[i]))
57 continue;
58 path = q->queue[i]->two->path;
59 len = strlen(path);
60 if (len == p->len && !memcmp(path, p->path, len)) {
61 found = 1;
62 memcpy(p->parent[n].sha1,
63 q->queue[i]->one->sha1, 20);
64 p->parent[n].mode = q->queue[i]->one->mode;
65 break;
68 if (!found)
69 p->len = 0;
71 return curr;
74 /* Lines lost from parent */
75 struct lline {
76 struct lline *next;
77 int len;
78 unsigned long parent_map;
79 char line[FLEX_ARRAY];
82 /* Lines surviving in the merge result */
83 struct sline {
84 struct lline *lost_head, **lost_tail;
85 char *bol;
86 int len;
87 /* bit 0 up to (N-1) are on if the parent has this line (i.e.
88 * we did not change it).
89 * bit N is used for "interesting" lines, including context.
91 unsigned long flag;
92 unsigned long *p_lno;
95 static char *grab_blob(const unsigned char *sha1, unsigned long *size)
97 char *blob;
98 char type[20];
99 if (!memcmp(sha1, null_sha1, 20)) {
100 /* deleted blob */
101 *size = 0;
102 return xcalloc(1, 1);
104 blob = read_sha1_file(sha1, type, size);
105 if (strcmp(type, "blob"))
106 die("object '%s' is not a blob!", sha1_to_hex(sha1));
107 return blob;
110 #define TMPPATHLEN 50
111 #define MAXLINELEN 10240
113 static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
115 int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
116 if (fd < 0)
117 die("unable to create temp-file");
118 if (write(fd, blob, size) != size)
119 die("unable to write temp-file");
120 close(fd);
123 static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
125 unsigned long size;
126 void *blob;
127 blob = grab_blob(sha1, &size);
128 write_to_temp_file(tmpfile, blob, size);
129 free(blob);
132 static int parse_num(char **cp_p, unsigned int *num_p)
134 char *cp = *cp_p;
135 unsigned int num = 0;
136 int read_some;
138 while ('0' <= *cp && *cp <= '9')
139 num = num * 10 + *cp++ - '0';
140 if (!(read_some = cp - *cp_p))
141 return -1;
142 *cp_p = cp;
143 *num_p = num;
144 return 0;
147 static int parse_hunk_header(char *line, int len,
148 unsigned int *ob, unsigned int *on,
149 unsigned int *nb, unsigned int *nn)
151 char *cp;
152 cp = line + 4;
153 if (parse_num(&cp, ob)) {
154 bad_line:
155 return error("malformed diff output: %s", line);
157 if (*cp == ',') {
158 cp++;
159 if (parse_num(&cp, on))
160 goto bad_line;
162 else
163 *on = 1;
164 if (*cp++ != ' ' || *cp++ != '+')
165 goto bad_line;
166 if (parse_num(&cp, nb))
167 goto bad_line;
168 if (*cp == ',') {
169 cp++;
170 if (parse_num(&cp, nn))
171 goto bad_line;
173 else
174 *nn = 1;
175 return -!!memcmp(cp, " @@", 3);
178 static void append_lost(struct sline *sline, int n, const char *line)
180 struct lline *lline;
181 int len = strlen(line);
182 unsigned long this_mask = (1UL<<n);
183 if (line[len-1] == '\n')
184 len--;
186 /* Check to see if we can squash things */
187 if (sline->lost_head) {
188 struct lline *last_one = NULL;
189 /* We cannot squash it with earlier one */
190 for (lline = sline->lost_head;
191 lline;
192 lline = lline->next)
193 if (lline->parent_map & this_mask)
194 last_one = lline;
195 lline = last_one ? last_one->next : sline->lost_head;
196 while (lline) {
197 if (lline->len == len &&
198 !memcmp(lline->line, line, len)) {
199 lline->parent_map |= this_mask;
200 return;
202 lline = lline->next;
206 lline = xmalloc(sizeof(*lline) + len + 1);
207 lline->len = len;
208 lline->next = NULL;
209 lline->parent_map = this_mask;
210 memcpy(lline->line, line, len);
211 lline->line[len] = 0;
212 *sline->lost_tail = lline;
213 sline->lost_tail = &lline->next;
216 static void combine_diff(const unsigned char *parent, const char *ourtmp,
217 struct sline *sline, int cnt, int n, int num_parent)
219 FILE *in;
220 char parent_tmp[TMPPATHLEN];
221 char cmd[TMPPATHLEN * 2 + 1024];
222 char line[MAXLINELEN];
223 unsigned int lno, ob, on, nb, nn, p_lno;
224 unsigned long nmask = (1UL << n);
225 struct sline *lost_bucket = NULL;
227 if (!cnt)
228 return; /* result deleted */
230 write_temp_blob(parent_tmp, parent);
231 sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
232 parent_tmp, ourtmp);
233 in = popen(cmd, "r");
234 if (!in)
235 die("cannot spawn %s", cmd);
237 lno = 1;
238 while (fgets(line, sizeof(line), in) != NULL) {
239 int len = strlen(line);
240 if (5 < len && !memcmp("@@ -", line, 4)) {
241 if (parse_hunk_header(line, len,
242 &ob, &on, &nb, &nn))
243 break;
244 lno = nb;
245 if (!nb)
246 /* @@ -1,2 +0,0 @@ to remove the
247 * first two lines...
249 nb = 1;
250 if (nn == 0)
251 /* @@ -X,Y +N,0 @@ removed Y lines
252 * that would have come *after* line N
253 * in the result. Our lost buckets hang
254 * to the line after the removed lines,
256 lost_bucket = &sline[nb];
257 else
258 lost_bucket = &sline[nb-1];
259 if (!sline[nb-1].p_lno)
260 sline[nb-1].p_lno =
261 xcalloc(num_parent,
262 sizeof(unsigned long));
263 sline[nb-1].p_lno[n] = ob;
264 continue;
266 if (!lost_bucket)
267 continue; /* not in any hunk yet */
268 switch (line[0]) {
269 case '-':
270 append_lost(lost_bucket, n, line+1);
271 break;
272 case '+':
273 sline[lno-1].flag |= nmask;
274 lno++;
275 break;
278 fclose(in);
279 unlink(parent_tmp);
281 /* Assign line numbers for this parent.
283 * sline[lno].p_lno[n] records the first line number
284 * (counting from 1) for parent N if the final hunk display
285 * started by showing sline[lno] (possibly showing the lost
286 * lines attached to it first).
288 for (lno = 0, p_lno = 1; lno < cnt; lno++) {
289 struct lline *ll;
290 sline[lno].p_lno[n] = p_lno;
292 /* How many lines would this sline advance the p_lno? */
293 ll = sline[lno].lost_head;
294 while (ll) {
295 if (ll->parent_map & nmask)
296 p_lno++; /* '-' means parent had it */
297 ll = ll->next;
299 if (!(sline[lno].flag & nmask))
300 p_lno++; /* no '+' means parent had it */
302 sline[lno].p_lno[n] = p_lno; /* trailer */
305 static unsigned long context = 3;
306 static char combine_marker = '@';
308 static int interesting(struct sline *sline, unsigned long all_mask)
310 /* If some parents lost lines here, or if we have added to
311 * some parent, it is interesting.
313 return ((sline->flag & all_mask) || sline->lost_head);
316 static unsigned long adjust_hunk_tail(struct sline *sline,
317 unsigned long all_mask,
318 unsigned long hunk_begin,
319 unsigned long i)
321 /* i points at the first uninteresting line. If the last line
322 * of the hunk was interesting only because it has some
323 * deletion, then it is not all that interesting for the
324 * purpose of giving trailing context lines. This is because
325 * we output '-' line and then unmodified sline[i-1] itself in
326 * that case which gives us one extra context line.
328 if ((hunk_begin + 1 <= i) && !(sline[i-1].flag & all_mask))
329 i--;
330 return i;
333 static unsigned long find_next(struct sline *sline,
334 unsigned long mark,
335 unsigned long i,
336 unsigned long cnt,
337 int uninteresting)
339 /* We have examined up to i-1 and are about to look at i.
340 * Find next interesting or uninteresting line. Here,
341 * "interesting" does not mean interesting(), but marked by
342 * the give_context() function below (i.e. it includes context
343 * lines that are not interesting to interesting() function
344 * that are surrounded by interesting() ones.
346 while (i < cnt)
347 if (uninteresting
348 ? !(sline[i].flag & mark)
349 : (sline[i].flag & mark))
350 return i;
351 else
352 i++;
353 return cnt;
356 static int give_context(struct sline *sline, unsigned long cnt, int num_parent)
358 unsigned long all_mask = (1UL<<num_parent) - 1;
359 unsigned long mark = (1UL<<num_parent);
360 unsigned long i;
362 /* Two groups of interesting lines may have a short gap of
363 * unintersting lines. Connect such groups to give them a
364 * bit of context.
366 * We first start from what the interesting() function says,
367 * and mark them with "mark", and paint context lines with the
368 * mark. So interesting() would still say false for such context
369 * lines but they are treated as "interesting" in the end.
371 i = find_next(sline, mark, 0, cnt, 0);
372 if (cnt <= i)
373 return 0;
375 while (i < cnt) {
376 unsigned long j = (context < i) ? (i - context) : 0;
377 unsigned long k;
379 /* Paint a few lines before the first interesting line. */
380 while (j < i)
381 sline[j++].flag |= mark;
383 again:
384 /* we know up to i is to be included. where does the
385 * next uninteresting one start?
387 j = find_next(sline, mark, i, cnt, 1);
388 if (cnt <= j)
389 break; /* the rest are all interesting */
391 /* lookahead context lines */
392 k = find_next(sline, mark, j, cnt, 0);
393 j = adjust_hunk_tail(sline, all_mask, i, j);
395 if (k < j + context) {
396 /* k is interesting and [j,k) are not, but
397 * paint them interesting because the gap is small.
399 while (j < k)
400 sline[j++].flag |= mark;
401 i = k;
402 goto again;
405 /* j is the first uninteresting line and there is
406 * no overlap beyond it within context lines. Paint
407 * the trailing edge a bit.
409 i = k;
410 k = (j + context < cnt) ? j + context : cnt;
411 while (j < k)
412 sline[j++].flag |= mark;
414 return 1;
417 static int make_hunks(struct sline *sline, unsigned long cnt,
418 int num_parent, int dense)
420 unsigned long all_mask = (1UL<<num_parent) - 1;
421 unsigned long mark = (1UL<<num_parent);
422 unsigned long i;
423 int has_interesting = 0;
425 for (i = 0; i < cnt; i++) {
426 if (interesting(&sline[i], all_mask))
427 sline[i].flag |= mark;
428 else
429 sline[i].flag &= ~mark;
431 if (!dense)
432 return give_context(sline, cnt, num_parent);
434 /* Look at each hunk, and if we have changes from only one
435 * parent, or the changes are the same from all but one
436 * parent, mark that uninteresting.
438 i = 0;
439 while (i < cnt) {
440 unsigned long j, hunk_begin, hunk_end;
441 unsigned long same_diff;
442 while (i < cnt && !(sline[i].flag & mark))
443 i++;
444 if (cnt <= i)
445 break; /* No more interesting hunks */
446 hunk_begin = i;
447 for (j = i + 1; j < cnt; j++) {
448 if (!(sline[j].flag & mark)) {
449 /* Look beyond the end to see if there
450 * is an interesting line after this
451 * hunk within context span.
453 unsigned long la; /* lookahead */
454 int contin = 0;
455 la = adjust_hunk_tail(sline, all_mask,
456 hunk_begin, j);
457 la = (la + context < cnt) ?
458 (la + context) : cnt;
459 while (j <= --la) {
460 if (sline[la].flag & mark) {
461 contin = 1;
462 break;
465 if (!contin)
466 break;
467 j = la;
470 hunk_end = j;
472 /* [i..hunk_end) are interesting. Now is it really
473 * interesting? We check if there are only two versions
474 * and the result matches one of them. That is, we look
475 * at:
476 * (+) line, which records lines added to which parents;
477 * this line appears in the result.
478 * (-) line, which records from what parents the line
479 * was removed; this line does not appear in the result.
480 * then check the set of parents the result has difference
481 * from, from all lines. If there are lines that has
482 * different set of parents that the result has differences
483 * from, that means we have more than two versions.
485 * Even when we have only two versions, if the result does
486 * not match any of the parents, the it should be considered
487 * interesting. In such a case, we would have all '+' line.
488 * After passing the above "two versions" test, that would
489 * appear as "the same set of parents" to be "all parents".
491 same_diff = 0;
492 has_interesting = 0;
493 for (j = i; j < hunk_end && !has_interesting; j++) {
494 unsigned long this_diff = sline[j].flag & all_mask;
495 struct lline *ll = sline[j].lost_head;
496 if (this_diff) {
497 /* This has some changes. Is it the
498 * same as others?
500 if (!same_diff)
501 same_diff = this_diff;
502 else if (same_diff != this_diff) {
503 has_interesting = 1;
504 break;
507 while (ll && !has_interesting) {
508 /* Lost this line from these parents;
509 * who are they? Are they the same?
511 this_diff = ll->parent_map;
512 if (!same_diff)
513 same_diff = this_diff;
514 else if (same_diff != this_diff) {
515 has_interesting = 1;
517 ll = ll->next;
521 if (!has_interesting && same_diff != all_mask) {
522 /* This hunk is not that interesting after all */
523 for (j = hunk_begin; j < hunk_end; j++)
524 sline[j].flag &= ~mark;
526 i = hunk_end;
529 has_interesting = give_context(sline, cnt, num_parent);
530 return has_interesting;
533 static void show_parent_lno(struct sline *sline, unsigned long l0, unsigned long l1, unsigned long cnt, int n)
535 l0 = sline[l0].p_lno[n];
536 l1 = sline[l1].p_lno[n];
537 printf(" -%lu,%lu", l0, l1-l0);
540 static void dump_sline(struct sline *sline, unsigned long cnt, int num_parent)
542 unsigned long mark = (1UL<<num_parent);
543 int i;
544 unsigned long lno = 0;
546 if (!cnt)
547 return; /* result deleted */
549 while (1) {
550 struct sline *sl = &sline[lno];
551 int hunk_end;
552 while (lno < cnt && !(sline[lno].flag & mark))
553 lno++;
554 if (cnt <= lno)
555 break;
556 for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
557 if (!(sline[hunk_end].flag & mark))
558 break;
559 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
560 for (i = 0; i < num_parent; i++)
561 show_parent_lno(sline, lno, hunk_end, cnt, i);
562 printf(" +%lu,%lu ", lno+1, hunk_end-lno);
563 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
564 putchar('\n');
565 while (lno < hunk_end) {
566 struct lline *ll;
567 int j;
568 unsigned long p_mask;
569 sl = &sline[lno++];
570 ll = sl->lost_head;
571 while (ll) {
572 for (j = 0; j < num_parent; j++) {
573 if (ll->parent_map & (1UL<<j))
574 putchar('-');
575 else
576 putchar(' ');
578 puts(ll->line);
579 ll = ll->next;
581 p_mask = 1;
582 for (j = 0; j < num_parent; j++) {
583 if (p_mask & sl->flag)
584 putchar('+');
585 else
586 putchar(' ');
587 p_mask <<= 1;
589 printf("%.*s\n", sl->len, sl->bol);
594 static void reuse_combine_diff(struct sline *sline, unsigned long cnt,
595 int i, int j)
597 /* We have already examined parent j and we know parent i
598 * and parent j are the same, so reuse the combined result
599 * of parent j for parent i.
601 unsigned long lno, imask, jmask;
602 imask = (1UL<<i);
603 jmask = (1UL<<j);
605 for (lno = 0; lno < cnt; lno++) {
606 struct lline *ll = sline->lost_head;
607 sline->p_lno[i] = sline->p_lno[j];
608 while (ll) {
609 if (ll->parent_map & jmask)
610 ll->parent_map |= imask;
611 ll = ll->next;
613 if (sline->flag & jmask)
614 sline->flag |= imask;
615 sline++;
617 /* the overall size of the file (sline[cnt]) */
618 sline->p_lno[i] = sline->p_lno[j];
621 int show_combined_diff(struct combine_diff_path *elem, int num_parent,
622 int dense, const char *header)
624 unsigned long size, cnt, lno;
625 char *result, *cp, *ep;
626 struct sline *sline; /* survived lines */
627 int mode_differs = 0;
628 int i, show_hunks, shown_header = 0;
629 char ourtmp_buf[TMPPATHLEN];
630 char *ourtmp = ourtmp_buf;
632 /* Read the result of merge first */
633 if (memcmp(elem->sha1, null_sha1, 20)) {
634 result = grab_blob(elem->sha1, &size);
635 write_to_temp_file(ourtmp, result, size);
637 else {
638 /* Used by diff-tree to read from the working tree */
639 struct stat st;
640 int fd;
641 ourtmp = elem->path;
642 if (0 <= (fd = open(ourtmp, O_RDONLY)) &&
643 !fstat(fd, &st)) {
644 int len = st.st_size;
645 int cnt = 0;
647 size = len;
648 result = xmalloc(len + 1);
649 while (cnt < len) {
650 int done = xread(fd, result+cnt, len-cnt);
651 if (done == 0)
652 break;
653 if (done < 0)
654 die("read error '%s'", ourtmp);
655 cnt += done;
657 result[len] = 0;
659 else {
660 /* deleted file */
661 size = 0;
662 result = xmalloc(1);
663 result[0] = 0;
664 ourtmp = "/dev/null";
666 if (0 <= fd)
667 close(fd);
670 for (cnt = 0, cp = result; cp - result < size; cp++) {
671 if (*cp == '\n')
672 cnt++;
674 if (size && result[size-1] != '\n')
675 cnt++; /* incomplete line */
677 sline = xcalloc(cnt+1, sizeof(*sline));
678 ep = result;
679 sline[0].bol = result;
680 for (lno = 0; lno <= cnt; lno++) {
681 sline[lno].lost_tail = &sline[lno].lost_head;
682 sline[lno].flag = 0;
684 for (lno = 0, cp = result; cp - result < size; cp++) {
685 if (*cp == '\n') {
686 sline[lno].len = cp - sline[lno].bol;
687 lno++;
688 if (lno < cnt)
689 sline[lno].bol = cp + 1;
692 if (size && result[size-1] != '\n')
693 sline[cnt-1].len = size - (sline[cnt-1].bol - result);
695 sline[0].p_lno = xcalloc((cnt+1) * num_parent, sizeof(unsigned long));
696 for (lno = 0; lno < cnt; lno++)
697 sline[lno+1].p_lno = sline[lno].p_lno + num_parent;
699 for (i = 0; i < num_parent; i++) {
700 int j;
701 for (j = 0; j < i; j++) {
702 if (!memcmp(elem->parent[i].sha1,
703 elem->parent[j].sha1, 20)) {
704 reuse_combine_diff(sline, cnt, i, j);
705 break;
708 if (i <= j)
709 combine_diff(elem->parent[i].sha1, ourtmp, sline,
710 cnt, i, num_parent);
711 if (elem->parent[i].mode != elem->mode)
712 mode_differs = 1;
715 show_hunks = make_hunks(sline, cnt, num_parent, dense);
717 if (show_hunks || mode_differs) {
718 const char *abb;
719 char null_abb[DEFAULT_ABBREV + 1];
721 memset(null_abb, '0', DEFAULT_ABBREV);
722 null_abb[DEFAULT_ABBREV] = 0;
723 if (header) {
724 shown_header++;
725 puts(header);
727 printf("diff --%s ", dense ? "cc" : "combined");
728 if (quote_c_style(elem->path, NULL, NULL, 0))
729 quote_c_style(elem->path, NULL, stdout, 0);
730 else
731 printf("%s", elem->path);
732 putchar('\n');
733 printf("index ");
734 for (i = 0; i < num_parent; i++) {
735 if (elem->parent[i].mode != elem->mode)
736 mode_differs = 1;
737 if (memcmp(elem->parent[i].sha1, null_sha1, 20))
738 abb = find_unique_abbrev(elem->parent[i].sha1,
739 DEFAULT_ABBREV);
740 else
741 abb = null_abb;
742 printf("%s%s", i ? "," : "", abb);
744 if (memcmp(elem->sha1, null_sha1, 20))
745 abb = find_unique_abbrev(elem->sha1, DEFAULT_ABBREV);
746 else
747 abb = null_abb;
748 printf("..%s\n", abb);
750 if (mode_differs) {
751 printf("mode ");
752 for (i = 0; i < num_parent; i++) {
753 printf("%s%06o", i ? "," : "",
754 elem->parent[i].mode);
756 printf("..%06o\n", elem->mode);
758 dump_sline(sline, cnt, num_parent);
760 if (ourtmp == ourtmp_buf)
761 unlink(ourtmp);
762 free(result);
764 for (i = 0; i < cnt; i++) {
765 if (sline[i].lost_head) {
766 struct lline *ll = sline[i].lost_head;
767 while (ll) {
768 struct lline *tmp = ll;
769 ll = ll->next;
770 free(tmp);
774 free(sline[0].p_lno);
775 free(sline);
776 return shown_header;
779 int diff_tree_combined_merge(const unsigned char *sha1,
780 const char *header, int dense)
782 struct commit *commit = lookup_commit(sha1);
783 struct diff_options diffopts;
784 struct commit_list *parents;
785 struct combine_diff_path *p, *paths = NULL;
786 int num_parent, i, num_paths;
788 diff_setup(&diffopts);
789 diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
790 diffopts.recursive = 1;
792 /* count parents */
793 for (parents = commit->parents, num_parent = 0;
794 parents;
795 parents = parents->next, num_parent++)
796 ; /* nothing */
798 /* find set of paths that everybody touches */
799 for (parents = commit->parents, i = 0;
800 parents;
801 parents = parents->next, i++) {
802 struct commit *parent = parents->item;
803 diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
804 &diffopts);
805 paths = intersect_paths(paths, i, num_parent);
806 diff_flush(&diffopts);
809 /* find out surviving paths */
810 for (num_paths = 0, p = paths; p; p = p->next) {
811 if (p->len)
812 num_paths++;
814 if (num_paths) {
815 for (p = paths; p; p = p->next) {
816 if (!p->len)
817 continue;
818 if (show_combined_diff(p, num_parent, dense, header))
819 header = NULL;
823 /* Clean things up */
824 while (paths) {
825 struct combine_diff_path *tmp = paths;
826 paths = paths->next;
827 free(tmp);
829 return 0;