2 * Copyright (C) 2005 Junio C Hamano
12 static const char *diff_opts
= "-pu";
13 static unsigned char null_sha1
[20] = { 0, };
15 static const char *external_diff(void)
17 static const char *external_diff_cmd
= NULL
;
18 static int done_preparing
= 0;
21 return external_diff_cmd
;
24 * Default values above are meant to match the
25 * Linux kernel development style. Examples of
26 * alternative styles you can specify via environment
31 if (gitenv("GIT_EXTERNAL_DIFF"))
32 external_diff_cmd
= gitenv("GIT_EXTERNAL_DIFF");
34 /* In case external diff fails... */
35 diff_opts
= gitenv("GIT_DIFF_OPTS") ? : diff_opts
;
38 return external_diff_cmd
;
41 /* Help to copy the thing properly quoted for the shell safety.
42 * any single quote is replaced with '\'', and the caller is
43 * expected to enclose the result within a single quote pair.
46 * original sq_expand result
47 * name ==> name ==> 'name'
48 * a b ==> a b ==> 'a b'
49 * a'b ==> a'\''b ==> 'a'\''b'
51 static char *sq_expand(const char *src
)
53 static char *buf
= NULL
;
58 /* count bytes needed to store the quoted string. */
59 for (cnt
= 1, cp
= src
; *cp
; cnt
++, cp
++)
65 while ((c
= *src
++)) {
69 bp
= strcpy(bp
, "'\\''");
77 static struct diff_tempfile
{
85 unsigned char blob_sha1
[20];
86 unsigned short mode
; /* file mode */
87 unsigned sha1_valid
: 1; /* if true, use blob_sha1 and trust mode;
88 * however with a NULL SHA1, read them
89 * from the file system.
90 * if false, use the name and read mode from
93 unsigned file_valid
: 1; /* if false the file does not even exist */
96 static void builtin_diff(const char *name_a
,
98 struct diff_tempfile
*temp
)
101 const char *diff_cmd
= "diff -L'%s%s' -L'%s%s'";
102 const char *diff_arg
= "'%s' '%s'||:"; /* "||:" is to return 0 */
103 const char *input_name_sq
[2];
104 const char *path0
[2];
105 const char *path1
[2];
106 const char *name_sq
[2];
109 name_sq
[0] = sq_expand(name_a
);
110 name_sq
[1] = sq_expand(name_b
);
112 /* diff_cmd and diff_arg have 6 %s in total which makes
113 * the sum of these strings 12 bytes larger than required.
114 * we use 2 spaces around diff-opts, and we need to count
115 * terminating NUL, so we subtract 9 here.
117 int cmd_size
= (strlen(diff_cmd
) + strlen(diff_opts
) +
118 strlen(diff_arg
) - 9);
119 for (i
= 0; i
< 2; i
++) {
120 input_name_sq
[i
] = sq_expand(temp
[i
].name
);
121 if (!strcmp(temp
[i
].name
, "/dev/null")) {
122 path0
[i
] = "/dev/null";
125 path0
[i
] = i
? "b/" : "a/";
126 path1
[i
] = name_sq
[i
];
128 cmd_size
+= (strlen(path0
[i
]) + strlen(path1
[i
]) +
129 strlen(input_name_sq
[i
]));
132 cmd
= xmalloc(cmd_size
);
135 next_at
+= snprintf(cmd
+next_at
, cmd_size
-next_at
,
137 path0
[0], path1
[0], path0
[1], path1
[1]);
138 next_at
+= snprintf(cmd
+next_at
, cmd_size
-next_at
,
140 next_at
+= snprintf(cmd
+next_at
, cmd_size
-next_at
,
141 diff_arg
, input_name_sq
[0], input_name_sq
[1]);
143 printf("diff --git a/%s b/%s\n", name_a
, name_b
);
145 printf("new file mode %s\n", temp
[1].mode
);
146 else if (!path1
[1][0])
147 printf("deleted file mode %s\n", temp
[0].mode
);
149 if (strcmp(temp
[0].mode
, temp
[1].mode
)) {
150 printf("old mode %s\n", temp
[0].mode
);
151 printf("new mode %s\n", temp
[1].mode
);
153 if (strcmp(name_a
, name_b
)) {
154 printf("rename old %s\n", name_a
);
155 printf("rename new %s\n", name_b
);
157 if (strncmp(temp
[0].mode
, temp
[1].mode
, 3))
158 /* we do not run diff between different kind
164 execlp("/bin/sh","sh", "-c", cmd
, NULL
);
168 * Given a name and sha1 pair, if the dircache tells us the file in
169 * the work tree has that object contents, return true, so that
170 * prepare_temp_file() does not have to inflate and extract.
172 static int work_tree_matches(const char *name
, const unsigned char *sha1
)
174 struct cache_entry
*ce
;
178 /* We do not read the cache ourselves here, because the
179 * benchmark with my previous version that always reads cache
180 * shows that it makes things worse for diff-tree comparing
181 * two linux-2.6 kernel trees in an already checked out work
182 * tree. This is because most diff-tree comparisons deal with
183 * only a small number of files, while reading the cache is
184 * expensive for a large project, and its cost outweighs the
185 * savings we get by not inflating the object to a temporary
186 * file. Practically, this code only helps when we are used
187 * by diff-cache --cached, which does read the cache before
194 pos
= cache_name_pos(name
, len
);
197 ce
= active_cache
[pos
];
198 if ((lstat(name
, &st
) < 0) ||
199 !S_ISREG(st
.st_mode
) ||
200 ce_match_stat(ce
, &st
) ||
201 memcmp(sha1
, ce
->sha1
, 20))
206 static void prep_temp_blob(struct diff_tempfile
*temp
,
214 strcpy(temp
->tmp_path
, ".diff_XXXXXX");
215 fd
= mkstemp(temp
->tmp_path
);
217 die("unable to create temp-file");
218 if (write(fd
, blob
, size
) != size
)
219 die("unable to write temp-file");
221 temp
->name
= temp
->tmp_path
;
222 strcpy(temp
->hex
, sha1_to_hex(sha1
));
224 sprintf(temp
->mode
, "%06o", mode
);
227 static void prepare_temp_file(const char *name
,
228 struct diff_tempfile
*temp
,
229 struct diff_spec
*one
)
231 if (!one
->file_valid
) {
233 /* A '-' entry produces this for file-2, and
234 * a '+' entry produces this for file-1.
236 temp
->name
= "/dev/null";
237 strcpy(temp
->hex
, ".");
238 strcpy(temp
->mode
, ".");
242 if (!one
->sha1_valid
||
243 work_tree_matches(name
, one
->blob_sha1
)) {
246 if (lstat(temp
->name
, &st
) < 0) {
248 goto not_a_valid_file
;
249 die("stat(%s): %s", temp
->name
, strerror(errno
));
251 if (S_ISLNK(st
.st_mode
)) {
253 char *buf
, buf_
[1024];
254 buf
= ((sizeof(buf_
) < st
.st_size
) ?
255 xmalloc(st
.st_size
) : buf_
);
256 ret
= readlink(name
, buf
, st
.st_size
);
258 die("readlink(%s)", name
);
259 prep_temp_blob(temp
, buf
, st
.st_size
,
261 one
->blob_sha1
: null_sha1
),
263 one
->mode
: S_IFLNK
));
266 if (!one
->sha1_valid
)
267 strcpy(temp
->hex
, sha1_to_hex(null_sha1
));
269 strcpy(temp
->hex
, sha1_to_hex(one
->blob_sha1
));
270 sprintf(temp
->mode
, "%06o",
271 S_IFREG
|ce_permissions(st
.st_mode
));
280 blob
= read_sha1_file(one
->blob_sha1
, type
, &size
);
281 if (!blob
|| strcmp(type
, "blob"))
282 die("unable to read blob object for %s (%s)",
283 name
, sha1_to_hex(one
->blob_sha1
));
284 prep_temp_blob(temp
, blob
, size
, one
->blob_sha1
, one
->mode
);
289 static void remove_tempfile(void)
293 for (i
= 0; i
< 2; i
++)
294 if (diff_temp
[i
].name
== diff_temp
[i
].tmp_path
) {
295 unlink(diff_temp
[i
].name
);
296 diff_temp
[i
].name
= NULL
;
300 static void remove_tempfile_on_signal(int signo
)
305 static int detect_rename
;
306 static int reverse_diff
;
307 static const char **pathspec
;
309 static int diff_rename_minimum_score
;
311 static int matches_pathspec(const char *name
)
319 namelen
= strlen(name
);
320 for (i
= 0; i
< speccnt
; i
++) {
321 int speclen
= strlen(pathspec
[i
]);
322 if (! strncmp(pathspec
[i
], name
, speclen
) &&
323 speclen
<= namelen
&&
324 (name
[speclen
] == 0 || name
[speclen
] == '/'))
330 /* An external diff command takes:
332 * diff-cmd name infile1 infile1-sha1 infile1-mode \
333 * infile2 infile2-sha1 infile2-mode [ rename-to ]
336 static void run_external_diff(const char *name
,
338 struct diff_spec
*one
,
339 struct diff_spec
*two
)
341 struct diff_tempfile
*temp
= diff_temp
;
344 static int atexit_asked
= 0;
347 struct diff_spec
*tmp_spec
;
348 tmp_spec
= one
; one
= two
; two
= tmp_spec
;
351 tmp
= name
; name
= other
; other
= tmp
;
355 if (!matches_pathspec(name
) && (!other
|| !matches_pathspec(other
)))
359 prepare_temp_file(name
, &temp
[0], one
);
360 prepare_temp_file(other
? : name
, &temp
[1], two
);
361 if (! atexit_asked
&&
362 (temp
[0].name
== temp
[0].tmp_path
||
363 temp
[1].name
== temp
[1].tmp_path
)) {
365 atexit(remove_tempfile
);
367 signal(SIGINT
, remove_tempfile_on_signal
);
373 die("unable to fork");
375 const char *pgm
= external_diff();
378 const char *exec_arg
[9];
379 const char **arg
= &exec_arg
[0];
382 *arg
++ = temp
[0].name
;
383 *arg
++ = temp
[0].hex
;
384 *arg
++ = temp
[0].mode
;
385 *arg
++ = temp
[1].name
;
386 *arg
++ = temp
[1].hex
;
387 *arg
++ = temp
[1].mode
;
391 execvp(pgm
, (char *const*) exec_arg
);
394 execlp(pgm
, pgm
, name
, NULL
);
397 * otherwise we use the built-in one.
400 builtin_diff(name
, other
? : name
, temp
);
402 printf("* Unmerged path %s\n", name
);
405 if (waitpid(pid
, &status
, 0) < 0 ||
406 !WIFEXITED(status
) || WEXITSTATUS(status
)) {
407 /* Earlier we did not check the exit status because
408 * diff exits non-zero if files are different, and
409 * we are not interested in knowing that. It was a
410 * mistake which made it harder to quit a diff-*
411 * session that uses the git-apply-patch-script as
412 * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
413 * should also exit non-zero only when it wants to
414 * abort the entire diff-* session.
417 fprintf(stderr
, "external diff died, stopping at %s.\n", name
);
424 * We do not detect circular renames. Just hold created and deleted
425 * entries and later attempt to match them up. If they do not match,
426 * then spit them out as deletes or creates as original.
429 static struct diff_spec_hold
{
430 struct diff_spec_hold
*next
;
435 #define SHOULD_FREE 2
436 #define SHOULD_MUNMAP 4
439 } *createdfile
, *deletedfile
;
441 static void hold_diff(const char *name
,
442 struct diff_spec
*one
,
443 struct diff_spec
*two
)
445 struct diff_spec_hold
**list
, *elem
;
447 if (one
->file_valid
&& two
->file_valid
)
448 die("internal error");
450 if (!detect_rename
) {
451 run_external_diff(name
, NULL
, one
, two
);
454 elem
= xmalloc(sizeof(*elem
) + strlen(name
));
455 strcpy(elem
->path
, name
);
459 if (one
->file_valid
) {
471 static int populate_data(struct diff_spec_hold
*s
)
477 if (s
->it
.sha1_valid
) {
478 s
->data
= read_sha1_file(s
->it
.blob_sha1
, type
, &s
->size
);
479 s
->flags
|= SHOULD_FREE
;
484 fd
= open(s
->path
, O_RDONLY
);
487 if (fstat(fd
, &st
)) {
491 s
->size
= st
.st_size
;
492 s
->data
= mmap(NULL
, s
->size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
497 s
->flags
|= SHOULD_MUNMAP
;
502 static void free_data(struct diff_spec_hold
*s
)
504 if (s
->flags
& SHOULD_FREE
)
506 else if (s
->flags
& SHOULD_MUNMAP
)
507 munmap(s
->data
, s
->size
);
508 s
->flags
&= ~(SHOULD_FREE
|SHOULD_MUNMAP
);
511 static void flush_remaining_diff(struct diff_spec_hold
*elem
,
514 static struct diff_spec null_file_spec
;
516 null_file_spec
.file_valid
= 0;
517 for ( ; elem
; elem
= elem
->next
) {
519 if (elem
->flags
& MATCHED
)
522 run_external_diff(elem
->path
, NULL
,
523 &null_file_spec
, &elem
->it
);
525 run_external_diff(elem
->path
, NULL
,
526 &elem
->it
, &null_file_spec
);
530 static int is_exact_match(struct diff_spec_hold
*src
,
531 struct diff_spec_hold
*dst
)
533 if (src
->it
.sha1_valid
&& dst
->it
.sha1_valid
&&
534 !memcmp(src
->it
.blob_sha1
, dst
->it
.blob_sha1
, 20))
536 if (populate_data(src
) || populate_data(dst
))
537 /* this is an error but will be caught downstream */
539 if (src
->size
== dst
->size
&&
540 !memcmp(src
->data
, dst
->data
, src
->size
))
545 #define MINIMUM_SCORE 5000
546 int estimate_similarity(struct diff_spec_hold
*src
, struct diff_spec_hold
*dst
)
548 /* src points at a deleted file and dst points at a created
549 * file. They may be quite similar, in which case we want to
550 * say src is renamed to dst.
552 * Compare them and return how similar they are, representing
553 * the score as an integer between 0 and 10000. 10000 is
554 * reserved for the case where they match exactly.
557 unsigned long delta_size
;
559 delta_size
= ((src
->size
< dst
->size
) ?
560 (dst
->size
- src
->size
) : (src
->size
- dst
->size
));
562 /* We would not consider rename followed by more than
563 * 20% edits; that is, delta_size must be smaller than
564 * (src->size + dst->size)/2 * 0.2, which means...
566 if ((src
->size
+ dst
->size
) < delta_size
* 10)
569 delta
= diff_delta(src
->data
, src
->size
,
570 dst
->data
, dst
->size
,
574 /* This "delta" is really xdiff with adler32 and all the
575 * overheads but it is a quick and dirty approximation.
577 * Now we will give some score to it. Let's say 20% edit gets
578 * 5000 points and 0% edit gets 9000 points. That is, every
579 * 1/20000 edit gets 1 point penalty. The amount of penalty is:
581 * (delta_size * 2 / (src->size + dst->size)) * 20000
584 return 9000 - (40000 * delta_size
/ (src
->size
+dst
->size
));
588 struct diff_spec_hold
*src
;
589 struct diff_spec_hold
*dst
;
593 static int score_compare(const void *a_
, const void *b_
)
595 const struct diff_score
*a
= a_
, *b
= b_
;
596 return b
->score
- a
->score
;
599 static void flush_rename_pair(struct diff_spec_hold
*src
,
600 struct diff_spec_hold
*dst
)
602 src
->flags
|= MATCHED
;
603 dst
->flags
|= MATCHED
;
606 run_external_diff(src
->path
, dst
->path
,
610 static void free_held_diff(struct diff_spec_hold
*list
)
612 struct diff_spec_hold
*h
;
613 for (h
= list
; list
; list
= h
) {
620 void diff_flush(void)
622 int num_create
, num_delete
, c
, d
;
623 struct diff_spec_hold
*elem
, *src
, *dst
;
624 struct diff_score
*mx
;
626 /* We really want to cull the candidates list early
627 * with cheap tests in order to avoid doing deltas.
629 for (dst
= createdfile
; dst
; dst
= dst
->next
) {
630 for (src
= deletedfile
; src
; src
= src
->next
) {
631 if (! is_exact_match(src
, dst
))
633 flush_rename_pair(src
, dst
);
638 /* Count surviving candidates */
639 for (num_create
= 0, elem
= createdfile
; elem
; elem
= elem
->next
)
640 if (!(elem
->flags
& MATCHED
))
643 for (num_delete
= 0, elem
= deletedfile
; elem
; elem
= elem
->next
)
644 if (!(elem
->flags
& MATCHED
))
647 if (num_create
== 0 || num_delete
== 0)
650 mx
= xmalloc(sizeof(*mx
) * num_create
* num_delete
);
651 for (c
= 0, dst
= createdfile
; dst
; dst
= dst
->next
) {
652 int base
= c
* num_delete
;
653 if (dst
->flags
& MATCHED
)
655 for (d
= 0, src
= deletedfile
; src
; src
= src
->next
) {
656 struct diff_score
*m
= &mx
[base
+d
];
657 if (src
->flags
& MATCHED
)
661 m
->score
= estimate_similarity(src
, dst
);
666 qsort(mx
, num_create
*num_delete
, sizeof(*mx
), score_compare
);
668 for (c
= 0; c
< num_create
* num_delete
; c
++) {
671 if ((src
->flags
& MATCHED
) || (dst
->flags
& MATCHED
))
674 "**score ** %d %s %s\n",
675 mx
[c
].score
, src
->path
, dst
->path
);
678 for (c
= 0; c
< num_create
* num_delete
; c
++) {
681 if ((src
->flags
& MATCHED
) || (dst
->flags
& MATCHED
))
683 if (mx
[c
].score
< diff_rename_minimum_score
)
685 flush_rename_pair(src
, dst
);
689 flush_remaining_diff(createdfile
, 1);
690 flush_remaining_diff(deletedfile
, 0);
691 free_held_diff(createdfile
);
692 free_held_diff(deletedfile
);
693 createdfile
= deletedfile
= NULL
;
696 void diff_setup(int detect_rename_
, int minimum_score_
, int reverse_diff_
,
697 const char **pathspec_
, int speccnt_
)
699 free_held_diff(createdfile
);
700 free_held_diff(deletedfile
);
701 createdfile
= deletedfile
= NULL
;
703 detect_rename
= detect_rename_
;
704 reverse_diff
= reverse_diff_
;
705 pathspec
= pathspec_
;
707 diff_rename_minimum_score
= minimum_score_
? : MINIMUM_SCORE
;
710 void diff_addremove(int addremove
, unsigned mode
,
711 const unsigned char *sha1
,
712 const char *base
, const char *path
)
714 char concatpath
[PATH_MAX
];
715 struct diff_spec spec
[2], *one
, *two
;
717 memcpy(spec
[0].blob_sha1
, sha1
, 20);
719 spec
[0].sha1_valid
= !!memcmp(sha1
, null_sha1
, 20);
720 spec
[0].file_valid
= 1;
721 spec
[1].file_valid
= 0;
723 if (addremove
== '+') {
724 one
= spec
+ 1; two
= spec
;
726 one
= spec
; two
= one
+ 1;
730 strcpy(concatpath
, base
);
731 strcat(concatpath
, path
);
733 hold_diff(path
? concatpath
: base
, one
, two
);
736 void diff_change(unsigned old_mode
, unsigned new_mode
,
737 const unsigned char *old_sha1
,
738 const unsigned char *new_sha1
,
739 const char *base
, const char *path
) {
740 char concatpath
[PATH_MAX
];
741 struct diff_spec spec
[2];
744 strcpy(concatpath
, base
);
745 strcat(concatpath
, path
);
748 memcpy(spec
[0].blob_sha1
, old_sha1
, 20);
749 spec
[0].mode
= old_mode
;
750 memcpy(spec
[1].blob_sha1
, new_sha1
, 20);
751 spec
[1].mode
= new_mode
;
752 spec
[0].sha1_valid
= !!memcmp(old_sha1
, null_sha1
, 20);
753 spec
[1].sha1_valid
= !!memcmp(new_sha1
, null_sha1
, 20);
754 spec
[1].file_valid
= spec
[0].file_valid
= 1;
756 /* We do not look at changed files as candidate for
757 * rename detection ever.
759 run_external_diff(path
? concatpath
: base
, NULL
, &spec
[0], &spec
[1]);
762 void diff_unmerge(const char *path
)
764 run_external_diff(path
, NULL
, NULL
, NULL
);