2 * Copyright (C) 2005 Junio C Hamano
12 static const char *diff_opts
= "-pu";
13 static unsigned char null_sha1
[20] = { 0, };
14 #define MAX_SCORE 10000
15 #define DEFAULT_MINIMUM_SCORE 5000
17 static const char *external_diff(void)
19 static const char *external_diff_cmd
= NULL
;
20 static int done_preparing
= 0;
23 return external_diff_cmd
;
26 * Default values above are meant to match the
27 * Linux kernel development style. Examples of
28 * alternative styles you can specify via environment
33 if (gitenv("GIT_EXTERNAL_DIFF"))
34 external_diff_cmd
= gitenv("GIT_EXTERNAL_DIFF");
36 /* In case external diff fails... */
37 diff_opts
= gitenv("GIT_DIFF_OPTS") ? : diff_opts
;
40 return external_diff_cmd
;
43 /* Help to copy the thing properly quoted for the shell safety.
44 * any single quote is replaced with '\'', and the caller is
45 * expected to enclose the result within a single quote pair.
48 * original sq_expand result
49 * name ==> name ==> 'name'
50 * a b ==> a b ==> 'a b'
51 * a'b ==> a'\''b ==> 'a'\''b'
53 static char *sq_expand(const char *src
)
55 static char *buf
= NULL
;
60 /* count bytes needed to store the quoted string. */
61 for (cnt
= 1, cp
= src
; *cp
; cnt
++, cp
++)
67 while ((c
= *src
++)) {
71 bp
= strcpy(bp
, "'\\''");
79 static struct diff_tempfile
{
87 unsigned char blob_sha1
[20];
88 unsigned short mode
; /* file mode */
89 unsigned sha1_valid
: 1; /* if true, use blob_sha1 and trust mode;
90 * if false, use the name and read from
93 unsigned file_valid
: 1; /* if false the file does not exist */
96 static void builtin_diff(const char *name_a
,
98 struct diff_tempfile
*temp
,
101 int i
, next_at
, cmd_size
;
102 const char *diff_cmd
= "diff -L'%s%s' -L'%s%s'";
103 const char *diff_arg
= "'%s' '%s'||:"; /* "||:" is to return 0 */
104 const char *input_name_sq
[2];
105 const char *path0
[2];
106 const char *path1
[2];
107 const char *name_sq
[2];
110 name_sq
[0] = sq_expand(name_a
);
111 name_sq
[1] = sq_expand(name_b
);
113 /* diff_cmd and diff_arg have 6 %s in total which makes
114 * the sum of these strings 12 bytes larger than required.
115 * we use 2 spaces around diff-opts, and we need to count
116 * terminating NUL, so we subtract 9 here.
118 cmd_size
= (strlen(diff_cmd
) + strlen(diff_opts
) +
119 strlen(diff_arg
) - 9);
120 for (i
= 0; i
< 2; i
++) {
121 input_name_sq
[i
] = sq_expand(temp
[i
].name
);
122 if (!strcmp(temp
[i
].name
, "/dev/null")) {
123 path0
[i
] = "/dev/null";
126 path0
[i
] = i
? "b/" : "a/";
127 path1
[i
] = name_sq
[i
];
129 cmd_size
+= (strlen(path0
[i
]) + strlen(path1
[i
]) +
130 strlen(input_name_sq
[i
]));
133 cmd
= xmalloc(cmd_size
);
136 next_at
+= snprintf(cmd
+next_at
, cmd_size
-next_at
,
138 path0
[0], path1
[0], path0
[1], path1
[1]);
139 next_at
+= snprintf(cmd
+next_at
, cmd_size
-next_at
,
141 next_at
+= snprintf(cmd
+next_at
, cmd_size
-next_at
,
142 diff_arg
, input_name_sq
[0], input_name_sq
[1]);
144 printf("diff --git a/%s b/%s\n", name_a
, name_b
);
146 printf("new file mode %s\n", temp
[1].mode
);
147 else if (!path1
[1][0])
148 printf("deleted file mode %s\n", temp
[0].mode
);
150 if (strcmp(temp
[0].mode
, temp
[1].mode
)) {
151 printf("old mode %s\n", temp
[0].mode
);
152 printf("new mode %s\n", temp
[1].mode
);
154 if (strcmp(name_a
, name_b
)) {
155 if (0 < rename_score
)
156 printf("rename similarity index %d%%\n",
158 rename_score
*100.0/MAX_SCORE
));
159 printf("rename old %s\n", name_a
);
160 printf("rename new %s\n", name_b
);
162 if (strncmp(temp
[0].mode
, temp
[1].mode
, 3))
163 /* we do not run diff between different kind
169 execlp("/bin/sh","sh", "-c", cmd
, NULL
);
173 * Given a name and sha1 pair, if the dircache tells us the file in
174 * the work tree has that object contents, return true, so that
175 * prepare_temp_file() does not have to inflate and extract.
177 static int work_tree_matches(const char *name
, const unsigned char *sha1
)
179 struct cache_entry
*ce
;
183 /* We do not read the cache ourselves here, because the
184 * benchmark with my previous version that always reads cache
185 * shows that it makes things worse for diff-tree comparing
186 * two linux-2.6 kernel trees in an already checked out work
187 * tree. This is because most diff-tree comparisons deal with
188 * only a small number of files, while reading the cache is
189 * expensive for a large project, and its cost outweighs the
190 * savings we get by not inflating the object to a temporary
191 * file. Practically, this code only helps when we are used
192 * by diff-cache --cached, which does read the cache before
199 pos
= cache_name_pos(name
, len
);
202 ce
= active_cache
[pos
];
203 if ((lstat(name
, &st
) < 0) ||
204 !S_ISREG(st
.st_mode
) ||
205 ce_match_stat(ce
, &st
) ||
206 memcmp(sha1
, ce
->sha1
, 20))
211 static void prep_temp_blob(struct diff_tempfile
*temp
,
219 strcpy(temp
->tmp_path
, ".diff_XXXXXX");
220 fd
= mkstemp(temp
->tmp_path
);
222 die("unable to create temp-file");
223 if (write(fd
, blob
, size
) != size
)
224 die("unable to write temp-file");
226 temp
->name
= temp
->tmp_path
;
227 strcpy(temp
->hex
, sha1_to_hex(sha1
));
229 sprintf(temp
->mode
, "%06o", mode
);
232 static void prepare_temp_file(const char *name
,
233 struct diff_tempfile
*temp
,
234 struct diff_spec
*one
)
236 if (!one
->file_valid
) {
238 /* A '-' entry produces this for file-2, and
239 * a '+' entry produces this for file-1.
241 temp
->name
= "/dev/null";
242 strcpy(temp
->hex
, ".");
243 strcpy(temp
->mode
, ".");
247 if (!one
->sha1_valid
||
248 work_tree_matches(name
, one
->blob_sha1
)) {
251 if (lstat(temp
->name
, &st
) < 0) {
253 goto not_a_valid_file
;
254 die("stat(%s): %s", temp
->name
, strerror(errno
));
256 if (S_ISLNK(st
.st_mode
)) {
258 char *buf
, buf_
[1024];
259 buf
= ((sizeof(buf_
) < st
.st_size
) ?
260 xmalloc(st
.st_size
) : buf_
);
261 ret
= readlink(name
, buf
, st
.st_size
);
263 die("readlink(%s)", name
);
264 prep_temp_blob(temp
, buf
, st
.st_size
,
266 one
->blob_sha1
: null_sha1
),
268 one
->mode
: S_IFLNK
));
271 if (!one
->sha1_valid
)
272 strcpy(temp
->hex
, sha1_to_hex(null_sha1
));
274 strcpy(temp
->hex
, sha1_to_hex(one
->blob_sha1
));
275 sprintf(temp
->mode
, "%06o",
276 S_IFREG
|ce_permissions(st
.st_mode
));
285 blob
= read_sha1_file(one
->blob_sha1
, type
, &size
);
286 if (!blob
|| strcmp(type
, "blob"))
287 die("unable to read blob object for %s (%s)",
288 name
, sha1_to_hex(one
->blob_sha1
));
289 prep_temp_blob(temp
, blob
, size
, one
->blob_sha1
, one
->mode
);
294 static void remove_tempfile(void)
298 for (i
= 0; i
< 2; i
++)
299 if (diff_temp
[i
].name
== diff_temp
[i
].tmp_path
) {
300 unlink(diff_temp
[i
].name
);
301 diff_temp
[i
].name
= NULL
;
305 static void remove_tempfile_on_signal(int signo
)
310 static int detect_rename
;
311 static int reverse_diff
;
312 static int diff_raw_output
= -1;
313 static const char **pathspec
;
315 static int minimum_score
;
317 static int matches_pathspec(const char *name
)
325 namelen
= strlen(name
);
326 for (i
= 0; i
< speccnt
; i
++) {
327 int speclen
= strlen(pathspec
[i
]);
328 if (! strncmp(pathspec
[i
], name
, speclen
) &&
329 speclen
<= namelen
&&
330 (name
[speclen
] == 0 || name
[speclen
] == '/'))
336 /* An external diff command takes:
338 * diff-cmd name infile1 infile1-sha1 infile1-mode \
339 * infile2 infile2-sha1 infile2-mode [ rename-to ]
342 static void run_external_diff(const char *name
,
344 struct diff_spec
*one
,
345 struct diff_spec
*two
,
348 struct diff_tempfile
*temp
= diff_temp
;
351 static int atexit_asked
= 0;
354 struct diff_spec
*tmp_spec
;
355 tmp_spec
= one
; one
= two
; two
= tmp_spec
;
358 tmp
= name
; name
= other
; other
= tmp
;
362 if (!matches_pathspec(name
) && (!other
|| !matches_pathspec(other
)))
366 prepare_temp_file(name
, &temp
[0], one
);
367 prepare_temp_file(other
? : name
, &temp
[1], two
);
368 if (! atexit_asked
&&
369 (temp
[0].name
== temp
[0].tmp_path
||
370 temp
[1].name
== temp
[1].tmp_path
)) {
372 atexit(remove_tempfile
);
374 signal(SIGINT
, remove_tempfile_on_signal
);
380 die("unable to fork");
382 const char *pgm
= external_diff();
385 const char *exec_arg
[9];
386 const char **arg
= &exec_arg
[0];
389 *arg
++ = temp
[0].name
;
390 *arg
++ = temp
[0].hex
;
391 *arg
++ = temp
[0].mode
;
392 *arg
++ = temp
[1].name
;
393 *arg
++ = temp
[1].hex
;
394 *arg
++ = temp
[1].mode
;
398 execvp(pgm
, (char *const*) exec_arg
);
401 execlp(pgm
, pgm
, name
, NULL
);
404 * otherwise we use the built-in one.
407 builtin_diff(name
, other
? : name
, temp
, rename_score
);
409 printf("* Unmerged path %s\n", name
);
412 if (waitpid(pid
, &status
, 0) < 0 ||
413 !WIFEXITED(status
) || WEXITSTATUS(status
)) {
414 /* Earlier we did not check the exit status because
415 * diff exits non-zero if files are different, and
416 * we are not interested in knowing that. It was a
417 * mistake which made it harder to quit a diff-*
418 * session that uses the git-apply-patch-script as
419 * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
420 * should also exit non-zero only when it wants to
421 * abort the entire diff-* session.
424 fprintf(stderr
, "external diff died, stopping at %s.\n", name
);
431 * We do not detect circular renames. Just hold created and deleted
432 * entries and later attempt to match them up. If they do not match,
433 * then spit them out as deletes or creates as original.
436 static struct diff_spec_hold
{
437 struct diff_spec_hold
*next
;
442 #define SHOULD_FREE 2
443 #define SHOULD_MUNMAP 4
446 } *createdfile
, *deletedfile
;
448 static void hold_diff(const char *name
,
449 struct diff_spec
*one
,
450 struct diff_spec
*two
)
452 struct diff_spec_hold
**list
, *elem
;
454 if (one
->file_valid
&& two
->file_valid
)
455 die("internal error");
457 if (!detect_rename
) {
458 run_external_diff(name
, NULL
, one
, two
, -1);
461 elem
= xmalloc(sizeof(*elem
) + strlen(name
));
462 strcpy(elem
->path
, name
);
466 if (one
->file_valid
) {
478 static int populate_data(struct diff_spec_hold
*s
)
484 if (s
->it
.sha1_valid
) {
485 s
->data
= read_sha1_file(s
->it
.blob_sha1
, type
, &s
->size
);
486 s
->flags
|= SHOULD_FREE
;
491 fd
= open(s
->path
, O_RDONLY
);
494 if (fstat(fd
, &st
)) {
498 s
->size
= st
.st_size
;
499 s
->data
= mmap(NULL
, s
->size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
504 s
->flags
|= SHOULD_MUNMAP
;
509 static void free_data(struct diff_spec_hold
*s
)
511 if (s
->flags
& SHOULD_FREE
)
513 else if (s
->flags
& SHOULD_MUNMAP
)
514 munmap(s
->data
, s
->size
);
515 s
->flags
&= ~(SHOULD_FREE
|SHOULD_MUNMAP
);
519 static void flush_remaining_diff(struct diff_spec_hold
*elem
,
522 static struct diff_spec null_file_spec
;
524 null_file_spec
.file_valid
= 0;
525 for ( ; elem
; elem
= elem
->next
) {
527 if (elem
->flags
& MATCHED
)
530 run_external_diff(elem
->path
, NULL
,
531 &null_file_spec
, &elem
->it
, -1);
533 run_external_diff(elem
->path
, NULL
,
534 &elem
->it
, &null_file_spec
, -1);
538 static int is_exact_match(struct diff_spec_hold
*src
,
539 struct diff_spec_hold
*dst
)
541 if (src
->it
.sha1_valid
&& dst
->it
.sha1_valid
&&
542 !memcmp(src
->it
.blob_sha1
, dst
->it
.blob_sha1
, 20))
544 if (populate_data(src
) || populate_data(dst
))
545 /* this is an error but will be caught downstream */
547 if (src
->size
== dst
->size
&&
548 !memcmp(src
->data
, dst
->data
, src
->size
))
553 int estimate_similarity(struct diff_spec_hold
*src
, struct diff_spec_hold
*dst
)
555 /* src points at a deleted file and dst points at a created
556 * file. They may be quite similar, in which case we want to
557 * say src is renamed to dst.
559 * Compare them and return how similar they are, representing
560 * the score as an integer between 0 and 10000, except
561 * where they match exactly it is considered better than anything
565 unsigned long delta_size
;
568 delta_size
= ((src
->size
< dst
->size
) ?
569 (dst
->size
- src
->size
) : (src
->size
- dst
->size
));
571 /* We would not consider rename followed by more than
572 * minimum_score/MAX_SCORE edits; that is, delta_size must be smaller
573 * than (src->size + dst->size)/2 * minimum_score/MAX_SCORE,
577 if ((src
->size
+dst
->size
)*minimum_score
< delta_size
*MAX_SCORE
*2)
580 delta
= diff_delta(src
->data
, src
->size
,
581 dst
->data
, dst
->size
,
585 /* This "delta" is really xdiff with adler32 and all the
586 * overheads but it is a quick and dirty approximation.
588 * Now we will give some score to it. 100% edit gets
589 * 0 points and 0% edit gets MAX_SCORE points. That is, every
590 * 1/MAX_SCORE edit gets 1 point penalty. The amount of penalty is:
592 * (delta_size * 2 / (src->size + dst->size)) * MAX_SCORE
595 score
= MAX_SCORE
-(MAX_SCORE
*2*delta_size
/(src
->size
+dst
->size
));
596 if (score
< 0) return 0;
597 if (MAX_SCORE
< score
) return MAX_SCORE
;
602 struct diff_spec_hold
*src
;
603 struct diff_spec_hold
*dst
;
607 static int score_compare(const void *a_
, const void *b_
)
609 const struct diff_score
*a
= a_
, *b
= b_
;
610 return b
->score
- a
->score
;
613 static void flush_rename_pair(struct diff_spec_hold
*src
,
614 struct diff_spec_hold
*dst
,
617 src
->flags
|= MATCHED
;
618 dst
->flags
|= MATCHED
;
621 run_external_diff(src
->path
, dst
->path
,
622 &src
->it
, &dst
->it
, rename_score
);
625 static void free_held_diff(struct diff_spec_hold
*list
)
627 struct diff_spec_hold
*h
;
628 for (h
= list
; list
; list
= h
) {
635 void diff_flush(void)
637 int num_create
, num_delete
, c
, d
;
638 struct diff_spec_hold
*elem
, *src
, *dst
;
639 struct diff_score
*mx
;
641 /* We really want to cull the candidates list early
642 * with cheap tests in order to avoid doing deltas.
644 * With the current callers, we should not have already
645 * matched entries at this point, but it is nonetheless
646 * checked for sanity.
648 for (dst
= createdfile
; dst
; dst
= dst
->next
) {
649 if (dst
->flags
& MATCHED
)
651 for (src
= deletedfile
; src
; src
= src
->next
) {
652 if (src
->flags
& MATCHED
)
654 if (! is_exact_match(src
, dst
))
656 flush_rename_pair(src
, dst
, MAX_SCORE
);
661 /* Count surviving candidates */
662 for (num_create
= 0, elem
= createdfile
; elem
; elem
= elem
->next
)
663 if (!(elem
->flags
& MATCHED
))
666 for (num_delete
= 0, elem
= deletedfile
; elem
; elem
= elem
->next
)
667 if (!(elem
->flags
& MATCHED
))
670 if (num_create
== 0 || num_delete
== 0)
673 mx
= xmalloc(sizeof(*mx
) * num_create
* num_delete
);
674 for (c
= 0, dst
= createdfile
; dst
; dst
= dst
->next
) {
675 int base
= c
* num_delete
;
676 if (dst
->flags
& MATCHED
)
678 for (d
= 0, src
= deletedfile
; src
; src
= src
->next
) {
679 struct diff_score
*m
= &mx
[base
+d
];
680 if (src
->flags
& MATCHED
)
684 m
->score
= estimate_similarity(src
, dst
);
689 qsort(mx
, num_create
*num_delete
, sizeof(*mx
), score_compare
);
692 for (c
= 0; c
< num_create
* num_delete
; c
++) {
695 if ((src
->flags
& MATCHED
) || (dst
->flags
& MATCHED
))
698 "**score ** %d %s %s\n",
699 mx
[c
].score
, src
->path
, dst
->path
);
703 for (c
= 0; c
< num_create
* num_delete
; c
++) {
706 if ((src
->flags
& MATCHED
) || (dst
->flags
& MATCHED
))
708 if (mx
[c
].score
< minimum_score
)
710 flush_rename_pair(src
, dst
, mx
[c
].score
);
715 flush_remaining_diff(createdfile
, 1);
716 flush_remaining_diff(deletedfile
, 0);
717 free_held_diff(createdfile
);
718 free_held_diff(deletedfile
);
719 createdfile
= deletedfile
= NULL
;
722 int diff_scoreopt_parse(const char *opt
)
724 int diglen
, num
, scale
, i
;
725 if (opt
[0] != '-' || opt
[1] != 'M')
726 return -1; /* that is not -M option */
727 diglen
= strspn(opt
+2, "0123456789");
728 if (diglen
== 0 || strlen(opt
+2) != diglen
)
729 return 0; /* use default */
730 sscanf(opt
+2, "%d", &num
);
731 for (i
= 0, scale
= 1; i
< diglen
; i
++)
734 /* user says num divided by scale and we say internally that
735 * is MAX_SCORE * num / scale.
737 return MAX_SCORE
* num
/ scale
;
740 void diff_setup(int detect_rename_
, int minimum_score_
, int reverse_diff_
,
741 int diff_raw_output_
,
742 const char **pathspec_
, int speccnt_
)
744 free_held_diff(createdfile
);
745 free_held_diff(deletedfile
);
746 createdfile
= deletedfile
= NULL
;
748 detect_rename
= detect_rename_
;
749 reverse_diff
= reverse_diff_
;
750 pathspec
= pathspec_
;
751 diff_raw_output
= diff_raw_output_
;
753 minimum_score
= minimum_score_
? : DEFAULT_MINIMUM_SCORE
;
756 static const char *git_object_type(unsigned mode
)
758 return S_ISDIR(mode
) ? "tree" : "blob";
761 void diff_addremove(int addremove
, unsigned mode
,
762 const unsigned char *sha1
,
763 const char *base
, const char *path
)
765 char concatpath
[PATH_MAX
];
766 struct diff_spec spec
[2], *one
, *two
;
768 if (0 <= diff_raw_output
) {
772 addremove
= (addremove
== '+' ? '-' : '+');
773 printf("%c%06o %s %s %s%s%c",
776 git_object_type(mode
), sha1_to_hex(sha1
),
777 base
, path
, diff_raw_output
);
783 memcpy(spec
[0].blob_sha1
, sha1
, 20);
785 spec
[0].sha1_valid
= !!memcmp(sha1
, null_sha1
, 20);
786 spec
[0].file_valid
= 1;
787 spec
[1].file_valid
= 0;
789 if (addremove
== '+') {
790 one
= spec
+ 1; two
= spec
;
792 one
= spec
; two
= one
+ 1;
796 strcpy(concatpath
, base
);
797 strcat(concatpath
, path
);
799 hold_diff(path
? concatpath
: base
, one
, two
);
802 void diff_change(unsigned old_mode
, unsigned new_mode
,
803 const unsigned char *old_sha1
,
804 const unsigned char *new_sha1
,
805 const char *base
, const char *path
) {
806 char concatpath
[PATH_MAX
];
807 struct diff_spec spec
[2];
809 if (0 <= diff_raw_output
) {
811 strcpy(old_hex
, sha1_to_hex(old_sha1
));
816 printf("*%06o->%06o %s %s->%s %s%s%c",
818 git_object_type(new_mode
),
819 sha1_to_hex(new_sha1
), old_hex
,
820 base
, path
, diff_raw_output
);
822 printf("*%06o->%06o %s %s->%s %s%s%c",
824 git_object_type(new_mode
),
825 old_hex
, sha1_to_hex(new_sha1
),
826 base
, path
, diff_raw_output
);
829 if (S_ISDIR(new_mode
))
833 strcpy(concatpath
, base
);
834 strcat(concatpath
, path
);
837 memcpy(spec
[0].blob_sha1
, old_sha1
, 20);
838 spec
[0].mode
= old_mode
;
839 memcpy(spec
[1].blob_sha1
, new_sha1
, 20);
840 spec
[1].mode
= new_mode
;
841 spec
[0].sha1_valid
= !!memcmp(old_sha1
, null_sha1
, 20);
842 spec
[1].sha1_valid
= !!memcmp(new_sha1
, null_sha1
, 20);
843 spec
[1].file_valid
= spec
[0].file_valid
= 1;
845 /* We do not look at changed files as candidate for
846 * rename detection ever.
848 run_external_diff(path
? concatpath
: base
, NULL
,
849 &spec
[0], &spec
[1], -1);
852 void diff_unmerge(const char *path
)
854 if (0 <= diff_raw_output
) {
855 printf("U %s%c", path
, diff_raw_output
);
858 run_external_diff(path
, NULL
, NULL
, NULL
, -1);