Don't care about st_dev in the index file
[git.git] / diff.c
blob4b2d1054bae0a6f4b87f6da4a862216d241ff42c
1 /*
2 * Copyright (C) 2005 Junio C Hamano
3 */
4 #include <sys/types.h>
5 #include <sys/wait.h>
6 #include <signal.h>
7 #include "cache.h"
8 #include "diff.h"
9 #include "diffcore.h"
11 static const char *diff_opts = "-pu";
12 static unsigned char null_sha1[20] = { 0, };
14 static int reverse_diff;
15 static int generate_patch;
16 static int line_termination = '\n';
17 static int inter_name_termination = '\t';
19 static const char *external_diff(void)
21 static const char *external_diff_cmd = NULL;
22 static int done_preparing = 0;
24 if (done_preparing)
25 return external_diff_cmd;
28 * Default values above are meant to match the
29 * Linux kernel development style. Examples of
30 * alternative styles you can specify via environment
31 * variables are:
33 * GIT_DIFF_OPTS="-c";
35 if (gitenv("GIT_EXTERNAL_DIFF"))
36 external_diff_cmd = gitenv("GIT_EXTERNAL_DIFF");
38 /* In case external diff fails... */
39 diff_opts = gitenv("GIT_DIFF_OPTS") ? : diff_opts;
41 done_preparing = 1;
42 return external_diff_cmd;
45 /* Help to copy the thing properly quoted for the shell safety.
46 * any single quote is replaced with '\'', and the caller is
47 * expected to enclose the result within a single quote pair.
49 * E.g.
50 * original sq_expand result
51 * name ==> name ==> 'name'
52 * a b ==> a b ==> 'a b'
53 * a'b ==> a'\''b ==> 'a'\''b'
55 static char *sq_expand(const char *src)
57 static char *buf = NULL;
58 int cnt, c;
59 const char *cp;
60 char *bp;
62 /* count bytes needed to store the quoted string. */
63 for (cnt = 1, cp = src; *cp; cnt++, cp++)
64 if (*cp == '\'')
65 cnt += 3;
67 buf = xmalloc(cnt);
68 bp = buf;
69 while ((c = *src++)) {
70 if (c != '\'')
71 *bp++ = c;
72 else {
73 bp = strcpy(bp, "'\\''");
74 bp += 4;
77 *bp = 0;
78 return buf;
81 static struct diff_tempfile {
82 const char *name; /* filename external diff should read from */
83 char hex[41];
84 char mode[10];
85 char tmp_path[50];
86 } diff_temp[2];
88 static void builtin_diff(const char *name_a,
89 const char *name_b,
90 struct diff_tempfile *temp,
91 const char *xfrm_msg)
93 int i, next_at, cmd_size;
94 const char *diff_cmd = "diff -L'%s%s' -L'%s%s'";
95 const char *diff_arg = "'%s' '%s'||:"; /* "||:" is to return 0 */
96 const char *input_name_sq[2];
97 const char *path0[2];
98 const char *path1[2];
99 const char *name_sq[2];
100 char *cmd;
102 name_sq[0] = sq_expand(name_a);
103 name_sq[1] = sq_expand(name_b);
105 /* diff_cmd and diff_arg have 6 %s in total which makes
106 * the sum of these strings 12 bytes larger than required.
107 * we use 2 spaces around diff-opts, and we need to count
108 * terminating NUL, so we subtract 9 here.
110 cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
111 strlen(diff_arg) - 9);
112 for (i = 0; i < 2; i++) {
113 input_name_sq[i] = sq_expand(temp[i].name);
114 if (!strcmp(temp[i].name, "/dev/null")) {
115 path0[i] = "/dev/null";
116 path1[i] = "";
117 } else {
118 path0[i] = i ? "b/" : "a/";
119 path1[i] = name_sq[i];
121 cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
122 strlen(input_name_sq[i]));
125 cmd = xmalloc(cmd_size);
127 next_at = 0;
128 next_at += snprintf(cmd+next_at, cmd_size-next_at,
129 diff_cmd,
130 path0[0], path1[0], path0[1], path1[1]);
131 next_at += snprintf(cmd+next_at, cmd_size-next_at,
132 " %s ", diff_opts);
133 next_at += snprintf(cmd+next_at, cmd_size-next_at,
134 diff_arg, input_name_sq[0], input_name_sq[1]);
136 printf("diff --git a/%s b/%s\n", name_a, name_b);
137 if (!path1[0][0])
138 printf("new file mode %s\n", temp[1].mode);
139 else if (!path1[1][0])
140 printf("deleted file mode %s\n", temp[0].mode);
141 else {
142 if (strcmp(temp[0].mode, temp[1].mode)) {
143 printf("old mode %s\n", temp[0].mode);
144 printf("new mode %s\n", temp[1].mode);
146 if (xfrm_msg && xfrm_msg[0])
147 fputs(xfrm_msg, stdout);
149 if (strncmp(temp[0].mode, temp[1].mode, 3))
150 /* we do not run diff between different kind
151 * of objects.
153 exit(0);
155 fflush(NULL);
156 execlp("/bin/sh","sh", "-c", cmd, NULL);
159 struct diff_filespec *alloc_filespec(const char *path)
161 int namelen = strlen(path);
162 struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
163 spec->path = (char *)(spec + 1);
164 strcpy(spec->path, path);
165 spec->should_free = spec->should_munmap = 0;
166 spec->xfrm_flags = 0;
167 spec->size = 0;
168 spec->data = NULL;
169 spec->mode = 0;
170 memset(spec->sha1, 0, 20);
171 return spec;
174 void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
175 unsigned short mode)
177 if (mode) { /* just playing defensive */
178 spec->mode = mode;
179 memcpy(spec->sha1, sha1, 20);
180 spec->sha1_valid = !!memcmp(sha1, null_sha1, 20);
185 * Given a name and sha1 pair, if the dircache tells us the file in
186 * the work tree has that object contents, return true, so that
187 * prepare_temp_file() does not have to inflate and extract.
189 static int work_tree_matches(const char *name, const unsigned char *sha1)
191 struct cache_entry *ce;
192 struct stat st;
193 int pos, len;
195 /* We do not read the cache ourselves here, because the
196 * benchmark with my previous version that always reads cache
197 * shows that it makes things worse for diff-tree comparing
198 * two linux-2.6 kernel trees in an already checked out work
199 * tree. This is because most diff-tree comparisons deal with
200 * only a small number of files, while reading the cache is
201 * expensive for a large project, and its cost outweighs the
202 * savings we get by not inflating the object to a temporary
203 * file. Practically, this code only helps when we are used
204 * by diff-cache --cached, which does read the cache before
205 * calling us.
207 if (!active_cache)
208 return 0;
210 len = strlen(name);
211 pos = cache_name_pos(name, len);
212 if (pos < 0)
213 return 0;
214 ce = active_cache[pos];
215 if ((lstat(name, &st) < 0) ||
216 !S_ISREG(st.st_mode) || /* careful! */
217 ce_match_stat(ce, &st) ||
218 memcmp(sha1, ce->sha1, 20))
219 return 0;
220 /* we return 1 only when we can stat, it is a regular file,
221 * stat information matches, and sha1 recorded in the cache
222 * matches. I.e. we know the file in the work tree really is
223 * the same as the <name, sha1> pair.
225 return 1;
229 * While doing rename detection and pickaxe operation, we may need to
230 * grab the data for the blob (or file) for our own in-core comparison.
231 * diff_filespec has data and size fields for this purpose.
233 int diff_populate_filespec(struct diff_filespec *s)
235 int err = 0;
236 if (!DIFF_FILE_VALID(s))
237 die("internal error: asking to populate invalid file.");
238 if (S_ISDIR(s->mode))
239 return -1;
241 if (s->data)
242 return err;
243 if (!s->sha1_valid ||
244 work_tree_matches(s->path, s->sha1)) {
245 struct stat st;
246 int fd;
247 if (lstat(s->path, &st) < 0) {
248 if (errno == ENOENT) {
249 err_empty:
250 err = -1;
251 empty:
252 s->data = "";
253 s->size = 0;
254 return err;
257 s->size = st.st_size;
258 if (!s->size)
259 goto empty;
260 if (S_ISLNK(st.st_mode)) {
261 int ret;
262 s->data = xmalloc(s->size);
263 s->should_free = 1;
264 ret = readlink(s->path, s->data, s->size);
265 if (ret < 0) {
266 free(s->data);
267 goto err_empty;
269 return 0;
271 fd = open(s->path, O_RDONLY);
272 if (fd < 0)
273 goto err_empty;
274 s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
275 s->should_munmap = 1;
276 close(fd);
278 else {
279 char type[20];
280 s->data = read_sha1_file(s->sha1, type, &s->size);
281 s->should_free = 1;
283 return 0;
286 void diff_free_filepair(struct diff_filepair *p)
288 free(p->xfrm_msg);
289 free(p);
292 void diff_free_filespec_data(struct diff_filespec *s)
294 if (s->should_free)
295 free(s->data);
296 else if (s->should_munmap)
297 munmap(s->data, s->size);
298 s->should_free = s->should_munmap = 0;
299 s->data = NULL;
302 static void prep_temp_blob(struct diff_tempfile *temp,
303 void *blob,
304 unsigned long size,
305 unsigned char *sha1,
306 int mode)
308 int fd;
310 strcpy(temp->tmp_path, ".diff_XXXXXX");
311 fd = mkstemp(temp->tmp_path);
312 if (fd < 0)
313 die("unable to create temp-file");
314 if (write(fd, blob, size) != size)
315 die("unable to write temp-file");
316 close(fd);
317 temp->name = temp->tmp_path;
318 strcpy(temp->hex, sha1_to_hex(sha1));
319 temp->hex[40] = 0;
320 sprintf(temp->mode, "%06o", mode);
323 static void prepare_temp_file(const char *name,
324 struct diff_tempfile *temp,
325 struct diff_filespec *one)
327 if (!DIFF_FILE_VALID(one)) {
328 not_a_valid_file:
329 /* A '-' entry produces this for file-2, and
330 * a '+' entry produces this for file-1.
332 temp->name = "/dev/null";
333 strcpy(temp->hex, ".");
334 strcpy(temp->mode, ".");
335 return;
338 if (!one->sha1_valid ||
339 work_tree_matches(name, one->sha1)) {
340 struct stat st;
341 if (lstat(name, &st) < 0) {
342 if (errno == ENOENT)
343 goto not_a_valid_file;
344 die("stat(%s): %s", name, strerror(errno));
346 if (S_ISLNK(st.st_mode)) {
347 int ret;
348 char *buf, buf_[1024];
349 buf = ((sizeof(buf_) < st.st_size) ?
350 xmalloc(st.st_size) : buf_);
351 ret = readlink(name, buf, st.st_size);
352 if (ret < 0)
353 die("readlink(%s)", name);
354 prep_temp_blob(temp, buf, st.st_size,
355 (one->sha1_valid ?
356 one->sha1 : null_sha1),
357 (one->sha1_valid ?
358 one->mode : S_IFLNK));
360 else {
361 /* we can borrow from the file in the work tree */
362 temp->name = name;
363 if (!one->sha1_valid)
364 strcpy(temp->hex, sha1_to_hex(null_sha1));
365 else
366 strcpy(temp->hex, sha1_to_hex(one->sha1));
367 sprintf(temp->mode, "%06o",
368 S_IFREG |ce_permissions(st.st_mode));
370 return;
372 else {
373 if (diff_populate_filespec(one))
374 die("cannot read data blob for %s", one->path);
375 prep_temp_blob(temp, one->data, one->size,
376 one->sha1, one->mode);
380 static void remove_tempfile(void)
382 int i;
384 for (i = 0; i < 2; i++)
385 if (diff_temp[i].name == diff_temp[i].tmp_path) {
386 unlink(diff_temp[i].name);
387 diff_temp[i].name = NULL;
391 static void remove_tempfile_on_signal(int signo)
393 remove_tempfile();
396 /* An external diff command takes:
398 * diff-cmd name infile1 infile1-sha1 infile1-mode \
399 * infile2 infile2-sha1 infile2-mode [ rename-to ]
402 static void run_external_diff(const char *name,
403 const char *other,
404 struct diff_filespec *one,
405 struct diff_filespec *two,
406 const char *xfrm_msg)
408 struct diff_tempfile *temp = diff_temp;
409 pid_t pid;
410 int status;
411 static int atexit_asked = 0;
413 if (one && two) {
414 prepare_temp_file(name, &temp[0], one);
415 prepare_temp_file(other ? : name, &temp[1], two);
416 if (! atexit_asked &&
417 (temp[0].name == temp[0].tmp_path ||
418 temp[1].name == temp[1].tmp_path)) {
419 atexit_asked = 1;
420 atexit(remove_tempfile);
422 signal(SIGINT, remove_tempfile_on_signal);
425 fflush(NULL);
426 pid = fork();
427 if (pid < 0)
428 die("unable to fork");
429 if (!pid) {
430 const char *pgm = external_diff();
431 if (pgm) {
432 if (one && two) {
433 const char *exec_arg[10];
434 const char **arg = &exec_arg[0];
435 *arg++ = pgm;
436 *arg++ = name;
437 *arg++ = temp[0].name;
438 *arg++ = temp[0].hex;
439 *arg++ = temp[0].mode;
440 *arg++ = temp[1].name;
441 *arg++ = temp[1].hex;
442 *arg++ = temp[1].mode;
443 if (other) {
444 *arg++ = other;
445 *arg++ = xfrm_msg;
447 *arg = NULL;
448 execvp(pgm, (char *const*) exec_arg);
450 else
451 execlp(pgm, pgm, name, NULL);
454 * otherwise we use the built-in one.
456 if (one && two)
457 builtin_diff(name, other ? : name, temp, xfrm_msg);
458 else
459 printf("* Unmerged path %s\n", name);
460 exit(0);
462 if (waitpid(pid, &status, 0) < 0 ||
463 !WIFEXITED(status) || WEXITSTATUS(status)) {
464 /* Earlier we did not check the exit status because
465 * diff exits non-zero if files are different, and
466 * we are not interested in knowing that. It was a
467 * mistake which made it harder to quit a diff-*
468 * session that uses the git-apply-patch-script as
469 * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
470 * should also exit non-zero only when it wants to
471 * abort the entire diff-* session.
473 remove_tempfile();
474 fprintf(stderr, "external diff died, stopping at %s.\n", name);
475 exit(1);
477 remove_tempfile();
480 void diff_setup(int reverse_diff_)
482 reverse_diff = reverse_diff_;
485 struct diff_queue_struct diff_queued_diff;
487 void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp)
489 if (queue->alloc <= queue->nr) {
490 queue->alloc = alloc_nr(queue->alloc);
491 queue->queue = xrealloc(queue->queue,
492 sizeof(dp) * queue->alloc);
494 queue->queue[queue->nr++] = dp;
497 struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
498 struct diff_filespec *one,
499 struct diff_filespec *two)
501 struct diff_filepair *dp = xmalloc(sizeof(*dp));
502 dp->one = one;
503 dp->two = two;
504 dp->xfrm_msg = NULL;
505 dp->orig_order = queue->nr;
506 dp->xfrm_work = 0;
507 diff_q(queue, dp);
508 return dp;
511 static void diff_flush_raw(struct diff_filepair *p)
513 if (DIFF_PAIR_UNMERGED(p)) {
514 printf("U %s%c", p->one->path, line_termination);
515 return;
517 printf(":%06o %06o %s ",
518 p->one->mode, p->two->mode, sha1_to_hex(p->one->sha1));
519 printf("%s%c%s%c%s%c",
520 sha1_to_hex(p->two->sha1), inter_name_termination,
521 p->one->path, inter_name_termination,
522 p->two->path, line_termination);
525 static void diff_flush_patch(struct diff_filepair *p)
527 const char *name, *other;
529 name = p->one->path;
530 other = (strcmp(name, p->two->path) ? p->two->path : NULL);
531 if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
532 (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
533 return; /* no tree diffs in patch format */
535 if (DIFF_PAIR_UNMERGED(p))
536 run_external_diff(name, NULL, NULL, NULL, NULL);
537 else
538 run_external_diff(name, other, p->one, p->two, p->xfrm_msg);
541 static int uninteresting(struct diff_filepair *p)
543 /* This function is written stricter than necessary to support
544 * the currently implemented transformers, but the idea is to
545 * let transformers to produce diff_filepairs any way they want,
546 * and filter and clean them up here before producing the output.
548 struct diff_filespec *one, *two;
550 if (DIFF_PAIR_UNMERGED(p))
551 return 0; /* unmerged is interesting */
553 one = p->one;
554 two = p->two;
556 /* deletion, addition, mode change and renames are all interesting. */
557 if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) ||
558 (one->mode != two->mode) ||
559 strcmp(one->path, two->path))
560 return 0;
562 /* both are valid and point at the same path. that is, we are
563 * dealing with a change.
565 if (one->sha1_valid && two->sha1_valid &&
566 !memcmp(one->sha1, two->sha1, sizeof(one->sha1)))
567 return 1; /* no change */
568 if (!one->sha1_valid && !two->sha1_valid)
569 return 1; /* both look at the same file on the filesystem. */
570 return 0;
573 void diffcore_prune(void)
576 * Although rename/copy detection wants to have "no-change"
577 * entries fed into them, the downstream do not need to see
578 * them. This function removes such entries.
580 * The applications that use rename/copy should:
582 * (1) feed change and "no-change" entries via diff_queue().
583 * (2) call diffcore_rename, and any other future diffcore_xxx
584 * that would benefit by still having "no-change" entries.
585 * (3) call diffcore_prune
586 * (4) call other diffcore_xxx that do not need to see
587 * "no-change" entries.
589 struct diff_queue_struct *q = &diff_queued_diff;
590 struct diff_queue_struct outq;
591 int i;
593 outq.queue = NULL;
594 outq.nr = outq.alloc = 0;
596 for (i = 0; i < q->nr; i++) {
597 struct diff_filepair *p = q->queue[i];
598 if (!uninteresting(p))
599 diff_q(&outq, p);
600 else
601 diff_free_filepair(p);
603 free(q->queue);
604 *q = outq;
605 return;
608 static void diff_flush_one(struct diff_filepair *p)
610 if (uninteresting(p))
611 return;
612 if (generate_patch)
613 diff_flush_patch(p);
614 else
615 diff_flush_raw(p);
618 int diff_queue_is_empty(void)
620 struct diff_queue_struct *q = &diff_queued_diff;
621 int i;
623 for (i = 0; i < q->nr; i++) {
624 struct diff_filepair *p = q->queue[i];
625 if (!uninteresting(p))
626 return 0;
628 return 1;
631 void diff_flush(int diff_output_style)
633 struct diff_queue_struct *q = &diff_queued_diff;
634 int i;
636 generate_patch = 0;
637 switch (diff_output_style) {
638 case DIFF_FORMAT_HUMAN:
639 line_termination = '\n';
640 inter_name_termination = '\t';
641 break;
642 case DIFF_FORMAT_MACHINE:
643 line_termination = inter_name_termination = 0;
644 break;
645 case DIFF_FORMAT_PATCH:
646 generate_patch = 1;
647 break;
649 for (i = 0; i < q->nr; i++)
650 diff_flush_one(q->queue[i]);
651 for (i = 0; i < q->nr; i++) {
652 struct diff_filepair *p = q->queue[i];
653 diff_free_filespec_data(p->one);
654 diff_free_filespec_data(p->two);
655 free(p->xfrm_msg);
656 free(p);
658 free(q->queue);
659 q->queue = NULL;
660 q->nr = q->alloc = 0;
663 void diff_addremove(int addremove, unsigned mode,
664 const unsigned char *sha1,
665 const char *base, const char *path)
667 char concatpath[PATH_MAX];
668 struct diff_filespec *one, *two;
670 /* This may look odd, but it is a preparation for
671 * feeding "there are unchanged files which should
672 * not produce diffs, but when you are doing copy
673 * detection you would need them, so here they are"
674 * entries to the diff-core. They will be prefixed
675 * with something like '=' or '*' (I haven't decided
676 * which but should not make any difference).
677 * Feeding the same new and old to diff_change() should
678 * also have the same effect. diff_flush() should
679 * filter uninteresting ones out at the final output
680 * stage.
682 if (reverse_diff)
683 addremove = (addremove == '+' ? '-' :
684 addremove == '-' ? '+' : addremove);
686 if (!path) path = "";
687 sprintf(concatpath, "%s%s", base, path);
688 one = alloc_filespec(concatpath);
689 two = alloc_filespec(concatpath);
691 if (addremove != '+')
692 fill_filespec(one, sha1, mode);
693 if (addremove != '-')
694 fill_filespec(two, sha1, mode);
696 diff_queue(&diff_queued_diff, one, two);
699 void diff_guif(unsigned old_mode,
700 unsigned new_mode,
701 const unsigned char *old_sha1,
702 const unsigned char *new_sha1,
703 const char *old_path,
704 const char *new_path)
706 struct diff_filespec *one, *two;
708 if (reverse_diff) {
709 unsigned tmp;
710 const unsigned char *tmp_c;
711 tmp = old_mode; old_mode = new_mode; new_mode = tmp;
712 tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
714 one = alloc_filespec(old_path);
715 two = alloc_filespec(new_path);
716 if (old_mode)
717 fill_filespec(one, old_sha1, old_mode);
718 if (new_mode)
719 fill_filespec(two, new_sha1, new_mode);
720 diff_queue(&diff_queued_diff, one, two);
723 void diff_change(unsigned old_mode, unsigned new_mode,
724 const unsigned char *old_sha1,
725 const unsigned char *new_sha1,
726 const char *base, const char *path)
728 char concatpath[PATH_MAX];
729 struct diff_filespec *one, *two;
731 if (reverse_diff) {
732 unsigned tmp;
733 const unsigned char *tmp_c;
734 tmp = old_mode; old_mode = new_mode; new_mode = tmp;
735 tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
737 if (!path) path = "";
738 sprintf(concatpath, "%s%s", base, path);
739 one = alloc_filespec(concatpath);
740 two = alloc_filespec(concatpath);
741 fill_filespec(one, old_sha1, old_mode);
742 fill_filespec(two, new_sha1, new_mode);
744 diff_queue(&diff_queued_diff, one, two);
747 void diff_unmerge(const char *path)
749 struct diff_filespec *one, *two;
750 one = alloc_filespec(path);
751 two = alloc_filespec(path);
752 diff_queue(&diff_queued_diff, one, two);