combine-diff: better hunk splitting.
[git/dscho.git] / combine-diff.c
blobdf52fa20ecb3ef1b3dfe5a57d15a91e69e266022
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "diffcore.h"
5 #include "quote.h"
7 struct path_list {
8 struct path_list *next;
9 int len;
10 char *path;
11 unsigned char sha1[20];
12 unsigned char parent_sha1[FLEX_ARRAY][20];
15 static int uninteresting(struct diff_filepair *p)
17 if (diff_unmodified_pair(p))
18 return 1;
19 if (!S_ISREG(p->one->mode) || !S_ISREG(p->two->mode))
20 return 1;
21 return 0;
24 static struct path_list *intersect_paths(struct path_list *curr,
25 int n, int num_parent)
27 struct diff_queue_struct *q = &diff_queued_diff;
28 struct path_list *p;
29 int i;
31 if (!n) {
32 struct path_list *list = NULL, **tail = &list;
33 for (i = 0; i < q->nr; i++) {
34 int len;
35 const char *path;
36 if (uninteresting(q->queue[i]))
37 continue;
38 path = q->queue[i]->two->path;
39 len = strlen(path);
41 p = xmalloc(sizeof(*p) + len + 1 + num_parent * 20);
42 p->path = (char*) &(p->parent_sha1[num_parent][0]);
43 memcpy(p->path, path, len);
44 p->path[len] = 0;
45 p->len = len;
46 p->next = NULL;
47 memcpy(p->sha1, q->queue[i]->two->sha1, 20);
48 memcpy(p->parent_sha1[n], q->queue[i]->one->sha1, 20);
49 *tail = p;
50 tail = &p->next;
52 return list;
55 for (p = curr; p; p = p->next) {
56 int found = 0;
57 if (!p->len)
58 continue;
59 for (i = 0; i < q->nr; i++) {
60 const char *path;
61 int len;
63 if (uninteresting(q->queue[i]))
64 continue;
65 path = q->queue[i]->two->path;
66 len = strlen(path);
67 if (len == p->len && !memcmp(path, p->path, len)) {
68 found = 1;
69 memcpy(p->parent_sha1[n],
70 q->queue[i]->one->sha1, 20);
71 break;
74 if (!found)
75 p->len = 0;
77 return curr;
80 struct lline {
81 struct lline *next;
82 int len;
83 unsigned long parent_map;
84 char line[FLEX_ARRAY];
87 struct sline {
88 struct lline *lost_head, **lost_tail;
89 char *bol;
90 int len;
91 unsigned long flag;
94 static char *grab_blob(const unsigned char *sha1, unsigned long *size)
96 char *blob;
97 char type[20];
98 if (!memcmp(sha1, null_sha1, 20)) {
99 /* deleted blob */
100 *size = 0;
101 return xcalloc(1, 1);
103 blob = read_sha1_file(sha1, type, size);
104 if (strcmp(type, "blob"))
105 die("object '%s' is not a blob!", sha1_to_hex(sha1));
106 return blob;
109 #define TMPPATHLEN 50
110 #define MAXLINELEN 10240
112 static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
114 int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
115 if (fd < 0)
116 die("unable to create temp-file");
117 if (write(fd, blob, size) != size)
118 die("unable to write temp-file");
119 close(fd);
122 static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
124 unsigned long size;
125 void *blob;
126 blob = grab_blob(sha1, &size);
127 write_to_temp_file(tmpfile, blob, size);
128 free(blob);
131 static int parse_num(char **cp_p, unsigned int *num_p)
133 char *cp = *cp_p;
134 unsigned int num = 0;
135 int read_some;
137 while ('0' <= *cp && *cp <= '9')
138 num = num * 10 + *cp++ - '0';
139 if (!(read_some = cp - *cp_p))
140 return -1;
141 *cp_p = cp;
142 *num_p = num;
143 return 0;
146 static int parse_hunk_header(char *line, int len,
147 unsigned int *ob, unsigned int *on,
148 unsigned int *nb, unsigned int *nn)
150 char *cp;
151 cp = line + 4;
152 if (parse_num(&cp, ob)) {
153 bad_line:
154 return error("malformed diff output: %s", line);
156 if (*cp == ',') {
157 cp++;
158 if (parse_num(&cp, on))
159 goto bad_line;
161 else
162 *on = 1;
163 if (*cp++ != ' ' || *cp++ != '+')
164 goto bad_line;
165 if (parse_num(&cp, nb))
166 goto bad_line;
167 if (*cp == ',') {
168 cp++;
169 if (parse_num(&cp, nn))
170 goto bad_line;
172 else
173 *nn = 1;
174 return -!!memcmp(cp, " @@", 3);
177 static void append_lost(struct sline *sline, int n, const char *line)
179 struct lline *lline;
180 int len = strlen(line);
181 unsigned long this_mask = (1UL<<n);
182 if (line[len-1] == '\n')
183 len--;
185 /* Check to see if we can squash things */
186 if (sline->lost_head) {
187 struct lline *last_one = NULL;
188 /* We cannot squash it with earlier one */
189 for (lline = sline->lost_head;
190 lline;
191 lline = lline->next)
192 if (lline->parent_map & this_mask)
193 last_one = lline;
194 lline = last_one ? last_one->next : sline->lost_head;
195 while (lline) {
196 if (lline->len == len &&
197 !memcmp(lline->line, line, len)) {
198 lline->parent_map |= this_mask;
199 return;
201 lline = lline->next;
205 lline = xmalloc(sizeof(*lline) + len + 1);
206 lline->len = len;
207 lline->next = NULL;
208 lline->parent_map = this_mask;
209 memcpy(lline->line, line, len);
210 lline->line[len] = 0;
211 *sline->lost_tail = lline;
212 sline->lost_tail = &lline->next;
215 static void combine_diff(const unsigned char *parent, const char *ourtmp,
216 struct sline *sline, int cnt, int n)
218 FILE *in;
219 char parent_tmp[TMPPATHLEN];
220 char cmd[TMPPATHLEN * 2 + 1024];
221 char line[MAXLINELEN];
222 unsigned int lno, ob, on, nb, nn;
223 unsigned long pmask = ~(1UL << n);
224 struct sline *lost_bucket = NULL;
226 write_temp_blob(parent_tmp, parent);
227 sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
228 parent_tmp, ourtmp);
229 in = popen(cmd, "r");
230 if (!in)
231 return;
233 lno = 1;
234 while (fgets(line, sizeof(line), in) != NULL) {
235 int len = strlen(line);
236 if (5 < len && !memcmp("@@ -", line, 4)) {
237 if (parse_hunk_header(line, len,
238 &ob, &on, &nb, &nn))
239 break;
240 lno = nb;
241 if (!nb) {
242 /* @@ -1,2 +0,0 @@ to remove the
243 * first two lines...
245 nb = 1;
247 lost_bucket = &sline[nb-1]; /* sline is 0 based */
248 continue;
250 if (!lost_bucket)
251 continue;
252 switch (line[0]) {
253 case '-':
254 append_lost(lost_bucket, n, line+1);
255 break;
256 case '+':
257 sline[lno-1].flag &= pmask;
258 lno++;
259 break;
262 fclose(in);
263 unlink(parent_tmp);
266 static unsigned long context = 3;
267 static char combine_marker = '@';
269 static int interesting(struct sline *sline, unsigned long all_mask)
271 return ((sline->flag & all_mask) != all_mask || sline->lost_head);
274 static unsigned long line_common_diff(struct sline *sline, unsigned long all_mask)
277 * Look at the line and see from which parents we have the
278 * same difference.
281 /* Lower bits of sline->flag records if the parent had this
282 * line, so XOR with all_mask gives us on-bits for parents we
283 * have differences with.
285 unsigned long common_adds = (sline->flag ^ all_mask) & all_mask;
286 unsigned long common_removes = all_mask;
288 /* If all the parents have this line, that also counts as
289 * having the same difference.
291 if (!common_adds)
292 common_adds = all_mask;
294 if (sline->lost_head) {
295 /* Lost head list records the lines removed from
296 * the parents, and parent_map records from which
297 * parent the line was removed.
299 struct lline *ll;
300 for (ll = sline->lost_head; ll; ll = ll->next) {
301 common_removes &= ll->parent_map;
304 return common_adds & common_removes;
307 static unsigned long line_all_diff(struct sline *sline, unsigned long all_mask)
310 * Look at the line and see from which parents we have some difference.
312 unsigned long different = (sline->flag ^ all_mask) & all_mask;
313 if (sline->lost_head) {
314 /* Lost head list records the lines removed from
315 * the parents, and parent_map records from which
316 * parent the line was removed.
318 struct lline *ll;
319 for (ll = sline->lost_head; ll; ll = ll->next) {
320 different |= ll->parent_map;
323 return different;
326 static unsigned long adjust_hunk_tail(struct sline *sline,
327 unsigned long all_mask,
328 unsigned long hunk_begin,
329 unsigned long i)
331 /* i points at the first uninteresting line.
332 * If the last line of the hunk was interesting
333 * only because it has some deletion, then
334 * it is not all that interesting for the
335 * purpose of giving trailing context lines.
337 if ((hunk_begin + 1 <= i) &&
338 ((sline[i-1].flag & all_mask) == all_mask))
339 i--;
340 return i;
343 static unsigned long next_interesting(struct sline *sline,
344 unsigned long mark,
345 unsigned long i,
346 unsigned long cnt,
347 int uninteresting)
349 while (i < cnt)
350 if (uninteresting ?
351 !(sline[i].flag & mark) :
352 (sline[i].flag & mark))
353 return i;
354 else
355 i++;
356 return cnt;
359 static int give_context(struct sline *sline, unsigned long cnt, int num_parent)
361 unsigned long all_mask = (1UL<<num_parent) - 1;
362 unsigned long mark = (1UL<<num_parent);
363 unsigned long i;
365 i = next_interesting(sline, mark, 0, cnt, 0);
366 if (cnt <= i)
367 return 0;
369 while (i < cnt) {
370 unsigned long j = (context < i) ? (i - context) : 0;
371 unsigned long k;
372 while (j < i)
373 sline[j++].flag |= mark;
375 again:
376 j = next_interesting(sline, mark, i, cnt, 1);
377 if (cnt <= j)
378 break; /* the rest are all interesting */
380 /* lookahead context lines */
381 k = next_interesting(sline, mark, j, cnt, 0);
382 j = adjust_hunk_tail(sline, all_mask, i, j);
384 if (k < j + context) {
385 /* k is interesting and [j,k) are not, but
386 * paint them interesting because the gap is small.
388 while (j < k)
389 sline[j++].flag |= mark;
390 i = k;
391 goto again;
394 /* j is the first uninteresting line and there is
395 * no overlap beyond it within context lines.
397 i = k;
398 k = (j + context < cnt) ? j + context : cnt;
399 while (j < k)
400 sline[j++].flag |= mark;
402 return 1;
405 static int make_hunks(struct sline *sline, unsigned long cnt,
406 int num_parent, int dense)
408 unsigned long all_mask = (1UL<<num_parent) - 1;
409 unsigned long mark = (1UL<<num_parent);
410 unsigned long i;
411 int has_interesting = 0;
413 for (i = 0; i < cnt; i++) {
414 if (interesting(&sline[i], all_mask))
415 sline[i].flag |= mark;
416 else
417 sline[i].flag &= ~mark;
419 if (!dense)
420 return give_context(sline, cnt, num_parent);
422 /* Look at each hunk, and if we have changes from only one
423 * parent, or the changes are the same from all but one
424 * parent, mark that uninteresting.
426 i = 0;
427 while (i < cnt) {
428 unsigned long j, hunk_begin, hunk_end;
429 int same, diff;
430 unsigned long same_diff, all_diff;
431 while (i < cnt && !(sline[i].flag & mark))
432 i++;
433 if (cnt <= i)
434 break; /* No more interesting hunks */
435 hunk_begin = i;
436 for (j = i + 1; j < cnt; j++) {
437 if (!(sline[j].flag & mark)) {
438 /* Look beyond the end to see if there
439 * is an interesting line after this
440 * hunk within context span.
442 unsigned long la; /* lookahead */
443 int contin = 0;
444 la = adjust_hunk_tail(sline, all_mask,
445 hunk_begin, j);
446 la = (la + context < cnt) ?
447 (la + context) : cnt;
448 while (j <= --la) {
449 if (sline[la].flag & mark) {
450 contin = 1;
451 break;
454 if (!contin)
455 break;
456 j = la;
459 hunk_end = j;
461 /* [i..hunk_end) are interesting. Now does it have
462 * the same change with all but one parent?
464 same_diff = all_mask;
465 all_diff = 0;
466 for (j = i; j < hunk_end; j++) {
467 same_diff &= line_common_diff(sline + j, all_mask);
468 all_diff |= line_all_diff(sline + j, all_mask);
470 diff = same = 0;
471 for (j = 0; j < num_parent; j++) {
472 if (same_diff & (1UL<<j))
473 same++;
474 if (all_diff & (1UL<<j))
475 diff++;
477 if ((num_parent - 1 <= same) || (diff == 1)) {
478 /* This hunk is not that interesting after all */
479 for (j = hunk_begin; j < hunk_end; j++)
480 sline[j].flag &= ~mark;
482 i = hunk_end;
485 has_interesting = give_context(sline, cnt, num_parent);
486 return has_interesting;
489 static void dump_sline(struct sline *sline, int cnt, int num_parent)
491 unsigned long mark = (1UL<<num_parent);
492 int i;
493 int lno = 0;
495 while (1) {
496 struct sline *sl = &sline[lno];
497 int hunk_end;
498 while (lno < cnt && !(sline[lno].flag & mark))
499 lno++;
500 if (cnt <= lno)
501 break;
502 for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
503 if (!(sline[hunk_end].flag & mark))
504 break;
505 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
506 printf(" +%d,%d ", lno+1, hunk_end-lno);
507 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
508 putchar('\n');
509 while (lno < hunk_end) {
510 struct lline *ll;
511 int j;
512 sl = &sline[lno++];
513 ll = sl->lost_head;
514 while (ll) {
515 for (j = 0; j < num_parent; j++) {
516 if (ll->parent_map & (1UL<<j))
517 putchar('-');
518 else
519 putchar(' ');
521 puts(ll->line);
522 ll = ll->next;
524 for (j = 0; j < num_parent; j++) {
525 if ((1UL<<j) & sl->flag)
526 putchar(' ');
527 else
528 putchar('+');
530 printf("%.*s\n", sl->len, sl->bol);
535 static int show_combined_diff(struct path_list *elem, int num_parent,
536 int dense, const char *header, int show_empty)
538 unsigned long size, cnt, lno;
539 char *result, *cp, *ep;
540 struct sline *sline; /* survived lines */
541 int i, show_hunks, shown_header = 0;
542 char ourtmp[TMPPATHLEN];
544 /* Read the result of merge first */
545 result = grab_blob(elem->sha1, &size);
546 write_to_temp_file(ourtmp, result, size);
548 for (cnt = 0, cp = result; cp - result < size; cp++) {
549 if (*cp == '\n')
550 cnt++;
552 if (result[size-1] != '\n')
553 cnt++; /* incomplete line */
555 sline = xcalloc(cnt, sizeof(*sline));
556 ep = result;
557 sline[0].bol = result;
558 for (lno = 0, cp = result; cp - result < size; cp++) {
559 if (*cp == '\n') {
560 sline[lno].lost_tail = &sline[lno].lost_head;
561 sline[lno].len = cp - sline[lno].bol;
562 sline[lno].flag = (1UL<<num_parent) - 1;
563 lno++;
564 if (lno < cnt)
565 sline[lno].bol = cp + 1;
568 if (result[size-1] != '\n') {
569 sline[cnt-1].lost_tail = &sline[cnt-1].lost_head;
570 sline[cnt-1].len = size - (sline[cnt-1].bol - result);
571 sline[cnt-1].flag = (1UL<<num_parent) - 1;
574 for (i = 0; i < num_parent; i++)
575 combine_diff(elem->parent_sha1[i], ourtmp, sline, cnt, i);
577 show_hunks = make_hunks(sline, cnt, num_parent, dense);
579 if (header && (show_hunks || show_empty)) {
580 shown_header++;
581 puts(header);
583 if (show_hunks) {
584 printf("diff --%s ", dense ? "cc" : "combined");
585 if (quote_c_style(elem->path, NULL, NULL, 0))
586 quote_c_style(elem->path, NULL, stdout, 0);
587 else
588 printf("%s", elem->path);
589 putchar('\n');
590 dump_sline(sline, cnt, num_parent);
592 unlink(ourtmp);
593 free(result);
595 for (i = 0; i < cnt; i++) {
596 if (sline[i].lost_head) {
597 struct lline *ll = sline[i].lost_head;
598 while (ll) {
599 struct lline *tmp = ll;
600 ll = ll->next;
601 free(tmp);
605 free(sline);
606 return shown_header;
609 int diff_tree_combined_merge(const unsigned char *sha1,
610 const char *header,
611 int show_empty_merge, int dense)
613 struct commit *commit = lookup_commit(sha1);
614 struct diff_options diffopts;
615 struct commit_list *parents;
616 struct path_list *p, *paths = NULL;
617 int num_parent, i, num_paths;
619 diff_setup(&diffopts);
620 diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
621 diffopts.recursive = 1;
623 /* count parents */
624 for (parents = commit->parents, num_parent = 0;
625 parents;
626 parents = parents->next, num_parent++)
627 ; /* nothing */
629 /* find set of paths that everybody touches */
630 for (parents = commit->parents, i = 0;
631 parents;
632 parents = parents->next, i++) {
633 struct commit *parent = parents->item;
634 diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
635 &diffopts);
636 paths = intersect_paths(paths, i, num_parent);
637 diff_flush(&diffopts);
640 /* find out surviving paths */
641 for (num_paths = 0, p = paths; p; p = p->next) {
642 if (p->len)
643 num_paths++;
645 if (num_paths || show_empty_merge) {
646 for (p = paths; p; p = p->next) {
647 if (!p->len)
648 continue;
649 if (show_combined_diff(p, num_parent, dense, header,
650 show_empty_merge))
651 header = NULL;
655 /* Clean things up */
656 while (paths) {
657 struct path_list *tmp = paths;
658 paths = paths->next;
659 free(tmp);
661 return 0;