diff-tree --always flag
[git/jrn.git] / combine-diff.c
blob210ffcbce38bf1155c6e9b054e0950ee07c979fe
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "diffcore.h"
5 #include "quote.h"
7 static int uninteresting(struct diff_filepair *p)
9 if (diff_unmodified_pair(p))
10 return 1;
11 if (!S_ISREG(p->one->mode) || !S_ISREG(p->two->mode))
12 return 1;
13 return 0;
16 static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent)
18 struct diff_queue_struct *q = &diff_queued_diff;
19 struct combine_diff_path *p;
20 int i;
22 if (!n) {
23 struct combine_diff_path *list = NULL, **tail = &list;
24 for (i = 0; i < q->nr; i++) {
25 int len;
26 const char *path;
27 if (uninteresting(q->queue[i]))
28 continue;
29 path = q->queue[i]->two->path;
30 len = strlen(path);
32 p = xmalloc(sizeof(*p) + len + 1 + num_parent * 20);
33 p->path = (char*) &(p->parent_sha1[num_parent][0]);
34 memcpy(p->path, path, len);
35 p->path[len] = 0;
36 p->len = len;
37 p->next = NULL;
38 memcpy(p->sha1, q->queue[i]->two->sha1, 20);
39 memcpy(p->parent_sha1[n], q->queue[i]->one->sha1, 20);
40 *tail = p;
41 tail = &p->next;
43 return list;
46 for (p = curr; p; p = p->next) {
47 int found = 0;
48 if (!p->len)
49 continue;
50 for (i = 0; i < q->nr; i++) {
51 const char *path;
52 int len;
54 if (uninteresting(q->queue[i]))
55 continue;
56 path = q->queue[i]->two->path;
57 len = strlen(path);
58 if (len == p->len && !memcmp(path, p->path, len)) {
59 found = 1;
60 memcpy(p->parent_sha1[n],
61 q->queue[i]->one->sha1, 20);
62 break;
65 if (!found)
66 p->len = 0;
68 return curr;
71 /* Lines lost from parent */
72 struct lline {
73 struct lline *next;
74 int len;
75 unsigned long parent_map;
76 char line[FLEX_ARRAY];
79 /* Lines surviving in the merge result */
80 struct sline {
81 struct lline *lost_head, **lost_tail;
82 char *bol;
83 int len;
84 /* bit 0 up to (N-1) are on if the parent has this line (i.e.
85 * we did not change it).
86 * bit N is used for "interesting" lines, including context.
88 unsigned long flag;
89 unsigned long *p_lno;
92 static char *grab_blob(const unsigned char *sha1, unsigned long *size)
94 char *blob;
95 char type[20];
96 if (!memcmp(sha1, null_sha1, 20)) {
97 /* deleted blob */
98 *size = 0;
99 return xcalloc(1, 1);
101 blob = read_sha1_file(sha1, type, size);
102 if (strcmp(type, "blob"))
103 die("object '%s' is not a blob!", sha1_to_hex(sha1));
104 return blob;
107 #define TMPPATHLEN 50
108 #define MAXLINELEN 10240
110 static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
112 int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
113 if (fd < 0)
114 die("unable to create temp-file");
115 if (write(fd, blob, size) != size)
116 die("unable to write temp-file");
117 close(fd);
120 static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
122 unsigned long size;
123 void *blob;
124 blob = grab_blob(sha1, &size);
125 write_to_temp_file(tmpfile, blob, size);
126 free(blob);
129 static int parse_num(char **cp_p, unsigned int *num_p)
131 char *cp = *cp_p;
132 unsigned int num = 0;
133 int read_some;
135 while ('0' <= *cp && *cp <= '9')
136 num = num * 10 + *cp++ - '0';
137 if (!(read_some = cp - *cp_p))
138 return -1;
139 *cp_p = cp;
140 *num_p = num;
141 return 0;
144 static int parse_hunk_header(char *line, int len,
145 unsigned int *ob, unsigned int *on,
146 unsigned int *nb, unsigned int *nn)
148 char *cp;
149 cp = line + 4;
150 if (parse_num(&cp, ob)) {
151 bad_line:
152 return error("malformed diff output: %s", line);
154 if (*cp == ',') {
155 cp++;
156 if (parse_num(&cp, on))
157 goto bad_line;
159 else
160 *on = 1;
161 if (*cp++ != ' ' || *cp++ != '+')
162 goto bad_line;
163 if (parse_num(&cp, nb))
164 goto bad_line;
165 if (*cp == ',') {
166 cp++;
167 if (parse_num(&cp, nn))
168 goto bad_line;
170 else
171 *nn = 1;
172 return -!!memcmp(cp, " @@", 3);
175 static void append_lost(struct sline *sline, int n, const char *line)
177 struct lline *lline;
178 int len = strlen(line);
179 unsigned long this_mask = (1UL<<n);
180 if (line[len-1] == '\n')
181 len--;
183 /* Check to see if we can squash things */
184 if (sline->lost_head) {
185 struct lline *last_one = NULL;
186 /* We cannot squash it with earlier one */
187 for (lline = sline->lost_head;
188 lline;
189 lline = lline->next)
190 if (lline->parent_map & this_mask)
191 last_one = lline;
192 lline = last_one ? last_one->next : sline->lost_head;
193 while (lline) {
194 if (lline->len == len &&
195 !memcmp(lline->line, line, len)) {
196 lline->parent_map |= this_mask;
197 return;
199 lline = lline->next;
203 lline = xmalloc(sizeof(*lline) + len + 1);
204 lline->len = len;
205 lline->next = NULL;
206 lline->parent_map = this_mask;
207 memcpy(lline->line, line, len);
208 lline->line[len] = 0;
209 *sline->lost_tail = lline;
210 sline->lost_tail = &lline->next;
213 static void combine_diff(const unsigned char *parent, const char *ourtmp,
214 struct sline *sline, int cnt, int n, int num_parent)
216 FILE *in;
217 char parent_tmp[TMPPATHLEN];
218 char cmd[TMPPATHLEN * 2 + 1024];
219 char line[MAXLINELEN];
220 unsigned int lno, ob, on, nb, nn, p_lno;
221 unsigned long nmask = (1UL << n);
222 struct sline *lost_bucket = NULL;
224 write_temp_blob(parent_tmp, parent);
225 sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
226 parent_tmp, ourtmp);
227 in = popen(cmd, "r");
228 if (!in)
229 die("cannot spawn %s", cmd);
231 lno = 1;
232 while (fgets(line, sizeof(line), in) != NULL) {
233 int len = strlen(line);
234 if (5 < len && !memcmp("@@ -", line, 4)) {
235 if (parse_hunk_header(line, len,
236 &ob, &on, &nb, &nn))
237 break;
238 lno = nb;
239 if (!nb)
240 /* @@ -1,2 +0,0 @@ to remove the
241 * first two lines...
243 nb = 1;
244 if (nn == 0)
245 /* @@ -X,Y +N,0 @@ removed Y lines
246 * that would have come *after* line N
247 * in the result. Our lost buckets hang
248 * to the line after the removed lines,
250 lost_bucket = &sline[nb];
251 else
252 lost_bucket = &sline[nb-1];
253 if (!sline[nb-1].p_lno)
254 sline[nb-1].p_lno =
255 xcalloc(num_parent,
256 sizeof(unsigned long));
257 sline[nb-1].p_lno[n] = ob;
258 continue;
260 if (!lost_bucket)
261 continue; /* not in any hunk yet */
262 switch (line[0]) {
263 case '-':
264 append_lost(lost_bucket, n, line+1);
265 break;
266 case '+':
267 sline[lno-1].flag |= nmask;
268 lno++;
269 break;
272 fclose(in);
273 unlink(parent_tmp);
275 /* Assign line numbers for this parent.
277 * sline[lno].p_lno[n] records the first line number
278 * (counting from 1) for parent N if the final hunk display
279 * started by showing sline[lno] (possibly showing the lost
280 * lines attached to it first).
282 for (lno = 0, p_lno = 1; lno < cnt; lno++) {
283 struct lline *ll;
284 sline[lno].p_lno[n] = p_lno;
286 /* How many lines would this sline advance the p_lno? */
287 ll = sline[lno].lost_head;
288 while (ll) {
289 if (ll->parent_map & nmask)
290 p_lno++; /* '-' means parent had it */
291 ll = ll->next;
293 if (!(sline[lno].flag & nmask))
294 p_lno++; /* no '+' means parent had it */
296 sline[lno].p_lno[n] = p_lno; /* trailer */
299 static unsigned long context = 3;
300 static char combine_marker = '@';
302 static int interesting(struct sline *sline, unsigned long all_mask)
304 /* If some parents lost lines here, or if we have added to
305 * some parent, it is interesting.
307 return ((sline->flag & all_mask) || sline->lost_head);
310 static unsigned long adjust_hunk_tail(struct sline *sline,
311 unsigned long all_mask,
312 unsigned long hunk_begin,
313 unsigned long i)
315 /* i points at the first uninteresting line. If the last line
316 * of the hunk was interesting only because it has some
317 * deletion, then it is not all that interesting for the
318 * purpose of giving trailing context lines. This is because
319 * we output '-' line and then unmodified sline[i-1] itself in
320 * that case which gives us one extra context line.
322 if ((hunk_begin + 1 <= i) && !(sline[i-1].flag & all_mask))
323 i--;
324 return i;
327 static unsigned long find_next(struct sline *sline,
328 unsigned long mark,
329 unsigned long i,
330 unsigned long cnt,
331 int uninteresting)
333 /* We have examined up to i-1 and are about to look at i.
334 * Find next interesting or uninteresting line. Here,
335 * "interesting" does not mean interesting(), but marked by
336 * the give_context() function below (i.e. it includes context
337 * lines that are not interesting to interesting() function
338 * that are surrounded by interesting() ones.
340 while (i < cnt)
341 if (uninteresting
342 ? !(sline[i].flag & mark)
343 : (sline[i].flag & mark))
344 return i;
345 else
346 i++;
347 return cnt;
350 static int give_context(struct sline *sline, unsigned long cnt, int num_parent)
352 unsigned long all_mask = (1UL<<num_parent) - 1;
353 unsigned long mark = (1UL<<num_parent);
354 unsigned long i;
356 /* Two groups of interesting lines may have a short gap of
357 * unintersting lines. Connect such groups to give them a
358 * bit of context.
360 * We first start from what the interesting() function says,
361 * and mark them with "mark", and paint context lines with the
362 * mark. So interesting() would still say false for such context
363 * lines but they are treated as "interesting" in the end.
365 i = find_next(sline, mark, 0, cnt, 0);
366 if (cnt <= i)
367 return 0;
369 while (i < cnt) {
370 unsigned long j = (context < i) ? (i - context) : 0;
371 unsigned long k;
373 /* Paint a few lines before the first interesting line. */
374 while (j < i)
375 sline[j++].flag |= mark;
377 again:
378 /* we know up to i is to be included. where does the
379 * next uninteresting one start?
381 j = find_next(sline, mark, i, cnt, 1);
382 if (cnt <= j)
383 break; /* the rest are all interesting */
385 /* lookahead context lines */
386 k = find_next(sline, mark, j, cnt, 0);
387 j = adjust_hunk_tail(sline, all_mask, i, j);
389 if (k < j + context) {
390 /* k is interesting and [j,k) are not, but
391 * paint them interesting because the gap is small.
393 while (j < k)
394 sline[j++].flag |= mark;
395 i = k;
396 goto again;
399 /* j is the first uninteresting line and there is
400 * no overlap beyond it within context lines. Paint
401 * the trailing edge a bit.
403 i = k;
404 k = (j + context < cnt) ? j + context : cnt;
405 while (j < k)
406 sline[j++].flag |= mark;
408 return 1;
411 static int make_hunks(struct sline *sline, unsigned long cnt,
412 int num_parent, int dense)
414 unsigned long all_mask = (1UL<<num_parent) - 1;
415 unsigned long mark = (1UL<<num_parent);
416 unsigned long i;
417 int has_interesting = 0;
419 for (i = 0; i < cnt; i++) {
420 if (interesting(&sline[i], all_mask))
421 sline[i].flag |= mark;
422 else
423 sline[i].flag &= ~mark;
425 if (!dense)
426 return give_context(sline, cnt, num_parent);
428 /* Look at each hunk, and if we have changes from only one
429 * parent, or the changes are the same from all but one
430 * parent, mark that uninteresting.
432 i = 0;
433 while (i < cnt) {
434 unsigned long j, hunk_begin, hunk_end;
435 unsigned long same_diff;
436 while (i < cnt && !(sline[i].flag & mark))
437 i++;
438 if (cnt <= i)
439 break; /* No more interesting hunks */
440 hunk_begin = i;
441 for (j = i + 1; j < cnt; j++) {
442 if (!(sline[j].flag & mark)) {
443 /* Look beyond the end to see if there
444 * is an interesting line after this
445 * hunk within context span.
447 unsigned long la; /* lookahead */
448 int contin = 0;
449 la = adjust_hunk_tail(sline, all_mask,
450 hunk_begin, j);
451 la = (la + context < cnt) ?
452 (la + context) : cnt;
453 while (j <= --la) {
454 if (sline[la].flag & mark) {
455 contin = 1;
456 break;
459 if (!contin)
460 break;
461 j = la;
464 hunk_end = j;
466 /* [i..hunk_end) are interesting. Now is it really
467 * interesting? We check if there are only two versions
468 * and the result matches one of them. That is, we look
469 * at:
470 * (+) line, which records lines added to which parents;
471 * this line appears in the result.
472 * (-) line, which records from what parents the line
473 * was removed; this line does not appear in the result.
474 * then check the set of parents the result has difference
475 * from, from all lines. If there are lines that has
476 * different set of parents that the result has differences
477 * from, that means we have more than two versions.
479 * Even when we have only two versions, if the result does
480 * not match any of the parents, the it should be considered
481 * interesting. In such a case, we would have all '+' line.
482 * After passing the above "two versions" test, that would
483 * appear as "the same set of parents" to be "all parents".
485 same_diff = 0;
486 has_interesting = 0;
487 for (j = i; j < hunk_end && !has_interesting; j++) {
488 unsigned long this_diff = sline[j].flag & all_mask;
489 struct lline *ll = sline[j].lost_head;
490 if (this_diff) {
491 /* This has some changes. Is it the
492 * same as others?
494 if (!same_diff)
495 same_diff = this_diff;
496 else if (same_diff != this_diff) {
497 has_interesting = 1;
498 break;
501 while (ll && !has_interesting) {
502 /* Lost this line from these parents;
503 * who are they? Are they the same?
505 this_diff = ll->parent_map;
506 if (!same_diff)
507 same_diff = this_diff;
508 else if (same_diff != this_diff) {
509 has_interesting = 1;
511 ll = ll->next;
515 if (!has_interesting && same_diff != all_mask) {
516 /* This hunk is not that interesting after all */
517 for (j = hunk_begin; j < hunk_end; j++)
518 sline[j].flag &= ~mark;
520 i = hunk_end;
523 has_interesting = give_context(sline, cnt, num_parent);
524 return has_interesting;
527 static void show_parent_lno(struct sline *sline, unsigned long l0, unsigned long l1, unsigned long cnt, int n)
529 l0 = sline[l0].p_lno[n];
530 l1 = sline[l1].p_lno[n];
531 printf(" -%lu,%lu", l0, l1-l0);
534 static void dump_sline(struct sline *sline, unsigned long cnt, int num_parent)
536 unsigned long mark = (1UL<<num_parent);
537 int i;
538 unsigned long lno = 0;
540 while (1) {
541 struct sline *sl = &sline[lno];
542 int hunk_end;
543 while (lno < cnt && !(sline[lno].flag & mark))
544 lno++;
545 if (cnt <= lno)
546 break;
547 for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
548 if (!(sline[hunk_end].flag & mark))
549 break;
550 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
551 for (i = 0; i < num_parent; i++)
552 show_parent_lno(sline, lno, hunk_end, cnt, i);
553 printf(" +%lu,%lu ", lno+1, hunk_end-lno);
554 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
555 putchar('\n');
556 while (lno < hunk_end) {
557 struct lline *ll;
558 int j;
559 unsigned long p_mask;
560 sl = &sline[lno++];
561 ll = sl->lost_head;
562 while (ll) {
563 for (j = 0; j < num_parent; j++) {
564 if (ll->parent_map & (1UL<<j))
565 putchar('-');
566 else
567 putchar(' ');
569 puts(ll->line);
570 ll = ll->next;
572 p_mask = 1;
573 for (j = 0; j < num_parent; j++) {
574 if (p_mask & sl->flag)
575 putchar('+');
576 else
577 putchar(' ');
578 p_mask <<= 1;
580 printf("%.*s\n", sl->len, sl->bol);
585 static void reuse_combine_diff(struct sline *sline, unsigned long cnt,
586 int i, int j)
588 /* We have already examined parent j and we know parent i
589 * and parent j are the same, so reuse the combined result
590 * of parent j for parent i.
592 unsigned long lno, imask, jmask;
593 imask = (1UL<<i);
594 jmask = (1UL<<j);
596 for (lno = 0; lno < cnt; lno++) {
597 struct lline *ll = sline->lost_head;
598 sline->p_lno[i] = sline->p_lno[j];
599 while (ll) {
600 if (ll->parent_map & jmask)
601 ll->parent_map |= imask;
602 ll = ll->next;
604 if (sline->flag & jmask)
605 sline->flag |= imask;
606 sline++;
610 int show_combined_diff(struct combine_diff_path *elem, int num_parent,
611 int dense, const char *header, int show_empty)
613 unsigned long size, cnt, lno;
614 char *result, *cp, *ep;
615 struct sline *sline; /* survived lines */
616 int i, show_hunks, shown_header = 0;
617 char ourtmp_buf[TMPPATHLEN];
618 char *ourtmp = ourtmp_buf;
620 /* Read the result of merge first */
621 if (memcmp(elem->sha1, null_sha1, 20)) {
622 result = grab_blob(elem->sha1, &size);
623 write_to_temp_file(ourtmp, result, size);
625 else {
626 struct stat st;
627 int fd;
628 ourtmp = elem->path;
629 if (0 <= (fd = open(ourtmp, O_RDONLY)) &&
630 !fstat(fd, &st)) {
631 int len = st.st_size;
632 int cnt = 0;
634 size = len;
635 result = xmalloc(len + 1);
636 while (cnt < len) {
637 int done = xread(fd, result+cnt, len-cnt);
638 if (done == 0)
639 break;
640 if (done < 0)
641 die("read error '%s'", ourtmp);
642 cnt += done;
644 result[len] = 0;
646 else {
647 /* deleted file */
648 size = 0;
649 result = xmalloc(1);
650 result[0] = 0;
651 ourtmp = "/dev/null";
653 if (0 <= fd)
654 close(fd);
657 for (cnt = 0, cp = result; cp - result < size; cp++) {
658 if (*cp == '\n')
659 cnt++;
661 if (result[size-1] != '\n')
662 cnt++; /* incomplete line */
664 sline = xcalloc(cnt+1, sizeof(*sline));
665 ep = result;
666 sline[0].bol = result;
667 for (lno = 0, cp = result; cp - result < size; cp++) {
668 if (*cp == '\n') {
669 sline[lno].lost_tail = &sline[lno].lost_head;
670 sline[lno].len = cp - sline[lno].bol;
671 sline[lno].flag = 0;
672 lno++;
673 if (lno < cnt)
674 sline[lno].bol = cp + 1;
677 if (result[size-1] != '\n') {
678 sline[cnt-1].lost_tail = &sline[cnt-1].lost_head;
679 sline[cnt-1].len = size - (sline[cnt-1].bol - result);
680 sline[cnt-1].flag = 0;
683 sline[0].p_lno = xcalloc((cnt+1) * num_parent, sizeof(unsigned long));
684 for (lno = 0; lno < cnt; lno++)
685 sline[lno+1].p_lno = sline[lno].p_lno + num_parent;
687 for (i = 0; i < num_parent; i++) {
688 int j;
689 for (j = 0; j < i; j++) {
690 if (!memcmp(elem->parent_sha1[i],
691 elem->parent_sha1[j], 20)) {
692 reuse_combine_diff(sline, cnt, i, j);
693 break;
696 if (i <= j)
697 combine_diff(elem->parent_sha1[i], ourtmp, sline,
698 cnt, i, num_parent);
701 show_hunks = make_hunks(sline, cnt, num_parent, dense);
703 if (show_hunks) {
704 if (header) {
705 shown_header++;
706 puts(header);
708 printf("diff --%s ", dense ? "cc" : "combined");
709 if (quote_c_style(elem->path, NULL, NULL, 0))
710 quote_c_style(elem->path, NULL, stdout, 0);
711 else
712 printf("%s", elem->path);
713 putchar('\n');
714 printf("index ");
715 for (i = 0; i < num_parent; i++) {
716 printf("%s%s",
717 i ? "," : "",
718 find_unique_abbrev(elem->parent_sha1[i],
719 DEFAULT_ABBREV));
721 printf("..%s\n",
722 find_unique_abbrev(elem->sha1, DEFAULT_ABBREV));
723 dump_sline(sline, cnt, num_parent);
725 if (ourtmp == ourtmp_buf)
726 unlink(ourtmp);
727 free(result);
729 for (i = 0; i < cnt; i++) {
730 if (sline[i].lost_head) {
731 struct lline *ll = sline[i].lost_head;
732 while (ll) {
733 struct lline *tmp = ll;
734 ll = ll->next;
735 free(tmp);
739 free(sline[0].p_lno);
740 free(sline);
741 return shown_header;
744 int diff_tree_combined_merge(const unsigned char *sha1,
745 const char *header,
746 int show_empty_merge, int dense)
748 struct commit *commit = lookup_commit(sha1);
749 struct diff_options diffopts;
750 struct commit_list *parents;
751 struct combine_diff_path *p, *paths = NULL;
752 int num_parent, i, num_paths;
754 diff_setup(&diffopts);
755 diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
756 diffopts.recursive = 1;
758 /* count parents */
759 for (parents = commit->parents, num_parent = 0;
760 parents;
761 parents = parents->next, num_parent++)
762 ; /* nothing */
764 /* find set of paths that everybody touches */
765 for (parents = commit->parents, i = 0;
766 parents;
767 parents = parents->next, i++) {
768 struct commit *parent = parents->item;
769 diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
770 &diffopts);
771 paths = intersect_paths(paths, i, num_parent);
772 diff_flush(&diffopts);
775 /* find out surviving paths */
776 for (num_paths = 0, p = paths; p; p = p->next) {
777 if (p->len)
778 num_paths++;
780 if (num_paths || show_empty_merge) {
781 for (p = paths; p; p = p->next) {
782 if (!p->len)
783 continue;
784 if (show_combined_diff(p, num_parent, dense, header,
785 show_empty_merge))
786 header = NULL;
790 /* Clean things up */
791 while (paths) {
792 struct combine_diff_path *tmp = paths;
793 paths = paths->next;
794 free(tmp);
796 return 0;