max-count in terms of intersection
[git/fastimport.git] / diff.c
blobffe8a5523406badc415b11f468a2aafb7c9611a9
1 /*
2 * Copyright (C) 2005 Junio C Hamano
3 */
4 #include <sys/types.h>
5 #include <sys/wait.h>
6 #include <signal.h>
7 #include "cache.h"
8 #include "quote.h"
9 #include "diff.h"
10 #include "diffcore.h"
12 static const char *diff_opts = "-pu";
14 static int use_size_cache;
16 int diff_rename_limit_default = -1;
18 static char *quote_one(const char *str)
20 int needlen;
21 char *xp;
23 if (!str)
24 return NULL;
25 needlen = quote_c_style(str, NULL, NULL, 0);
26 if (!needlen)
27 return strdup(str);
28 xp = xmalloc(needlen + 1);
29 quote_c_style(str, xp, NULL, 0);
30 return xp;
33 static char *quote_two(const char *one, const char *two)
35 int need_one = quote_c_style(one, NULL, NULL, 1);
36 int need_two = quote_c_style(two, NULL, NULL, 1);
37 char *xp;
39 if (need_one + need_two) {
40 if (!need_one) need_one = strlen(one);
41 if (!need_two) need_one = strlen(two);
43 xp = xmalloc(need_one + need_two + 3);
44 xp[0] = '"';
45 quote_c_style(one, xp + 1, NULL, 1);
46 quote_c_style(two, xp + need_one + 1, NULL, 1);
47 strcpy(xp + need_one + need_two + 1, "\"");
48 return xp;
50 need_one = strlen(one);
51 need_two = strlen(two);
52 xp = xmalloc(need_one + need_two + 1);
53 strcpy(xp, one);
54 strcpy(xp + need_one, two);
55 return xp;
58 static const char *external_diff(void)
60 static const char *external_diff_cmd = NULL;
61 static int done_preparing = 0;
62 const char *env_diff_opts;
64 if (done_preparing)
65 return external_diff_cmd;
68 * Default values above are meant to match the
69 * Linux kernel development style. Examples of
70 * alternative styles you can specify via environment
71 * variables are:
73 * GIT_DIFF_OPTS="-c";
75 external_diff_cmd = getenv("GIT_EXTERNAL_DIFF");
77 /* In case external diff fails... */
78 env_diff_opts = getenv("GIT_DIFF_OPTS");
79 if (env_diff_opts) diff_opts = env_diff_opts;
81 done_preparing = 1;
82 return external_diff_cmd;
85 #define TEMPFILE_PATH_LEN 50
87 static struct diff_tempfile {
88 const char *name; /* filename external diff should read from */
89 char hex[41];
90 char mode[10];
91 char tmp_path[TEMPFILE_PATH_LEN];
92 } diff_temp[2];
94 static int count_lines(const char *filename)
96 FILE *in;
97 int count, ch, completely_empty = 1, nl_just_seen = 0;
98 in = fopen(filename, "r");
99 count = 0;
100 while ((ch = fgetc(in)) != EOF)
101 if (ch == '\n') {
102 count++;
103 nl_just_seen = 1;
104 completely_empty = 0;
106 else {
107 nl_just_seen = 0;
108 completely_empty = 0;
110 fclose(in);
111 if (completely_empty)
112 return 0;
113 if (!nl_just_seen)
114 count++; /* no trailing newline */
115 return count;
118 static void print_line_count(int count)
120 switch (count) {
121 case 0:
122 printf("0,0");
123 break;
124 case 1:
125 printf("1");
126 break;
127 default:
128 printf("1,%d", count);
129 break;
133 static void copy_file(int prefix, const char *filename)
135 FILE *in;
136 int ch, nl_just_seen = 1;
137 in = fopen(filename, "r");
138 while ((ch = fgetc(in)) != EOF) {
139 if (nl_just_seen)
140 putchar(prefix);
141 putchar(ch);
142 if (ch == '\n')
143 nl_just_seen = 1;
144 else
145 nl_just_seen = 0;
147 fclose(in);
148 if (!nl_just_seen)
149 printf("\n\\ No newline at end of file\n");
152 static void emit_rewrite_diff(const char *name_a,
153 const char *name_b,
154 struct diff_tempfile *temp)
156 /* Use temp[i].name as input, name_a and name_b as labels */
157 int lc_a, lc_b;
158 lc_a = count_lines(temp[0].name);
159 lc_b = count_lines(temp[1].name);
160 printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
161 print_line_count(lc_a);
162 printf(" +");
163 print_line_count(lc_b);
164 printf(" @@\n");
165 if (lc_a)
166 copy_file('-', temp[0].name);
167 if (lc_b)
168 copy_file('+', temp[1].name);
171 static void builtin_diff(const char *name_a,
172 const char *name_b,
173 struct diff_tempfile *temp,
174 const char *xfrm_msg,
175 int complete_rewrite)
177 int i, next_at, cmd_size;
178 const char *const diff_cmd = "diff -L%s -L%s";
179 const char *const diff_arg = "-- %s %s||:"; /* "||:" is to return 0 */
180 const char *input_name_sq[2];
181 const char *label_path[2];
182 char *cmd;
184 /* diff_cmd and diff_arg have 4 %s in total which makes
185 * the sum of these strings 8 bytes larger than required.
186 * we use 2 spaces around diff-opts, and we need to count
187 * terminating NUL; we used to subtract 5 here, but we do not
188 * care about small leaks in this subprocess that is about
189 * to exec "diff" anymore.
191 cmd_size = (strlen(diff_cmd) + strlen(diff_opts) + strlen(diff_arg)
192 + 128);
194 for (i = 0; i < 2; i++) {
195 input_name_sq[i] = sq_quote(temp[i].name);
196 if (!strcmp(temp[i].name, "/dev/null"))
197 label_path[i] = "/dev/null";
198 else if (!i)
199 label_path[i] = sq_quote(quote_two("a/", name_a));
200 else
201 label_path[i] = sq_quote(quote_two("b/", name_b));
202 cmd_size += (strlen(label_path[i]) + strlen(input_name_sq[i]));
205 cmd = xmalloc(cmd_size);
207 next_at = 0;
208 next_at += snprintf(cmd+next_at, cmd_size-next_at,
209 diff_cmd, label_path[0], label_path[1]);
210 next_at += snprintf(cmd+next_at, cmd_size-next_at,
211 " %s ", diff_opts);
212 next_at += snprintf(cmd+next_at, cmd_size-next_at,
213 diff_arg, input_name_sq[0], input_name_sq[1]);
215 printf("diff --git %s %s\n",
216 quote_two("a/", name_a), quote_two("b/", name_b));
217 if (label_path[0][0] == '/') {
218 /* dev/null */
219 printf("new file mode %s\n", temp[1].mode);
220 if (xfrm_msg && xfrm_msg[0])
221 puts(xfrm_msg);
223 else if (label_path[1][0] == '/') {
224 printf("deleted file mode %s\n", temp[0].mode);
225 if (xfrm_msg && xfrm_msg[0])
226 puts(xfrm_msg);
228 else {
229 if (strcmp(temp[0].mode, temp[1].mode)) {
230 printf("old mode %s\n", temp[0].mode);
231 printf("new mode %s\n", temp[1].mode);
233 if (xfrm_msg && xfrm_msg[0])
234 puts(xfrm_msg);
235 if (strncmp(temp[0].mode, temp[1].mode, 3))
236 /* we do not run diff between different kind
237 * of objects.
239 exit(0);
240 if (complete_rewrite) {
241 fflush(NULL);
242 emit_rewrite_diff(name_a, name_b, temp);
243 exit(0);
246 fflush(NULL);
247 execlp("/bin/sh","sh", "-c", cmd, NULL);
250 struct diff_filespec *alloc_filespec(const char *path)
252 int namelen = strlen(path);
253 struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
255 memset(spec, 0, sizeof(*spec));
256 spec->path = (char *)(spec + 1);
257 memcpy(spec->path, path, namelen+1);
258 return spec;
261 void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
262 unsigned short mode)
264 if (mode) {
265 spec->mode = DIFF_FILE_CANON_MODE(mode);
266 memcpy(spec->sha1, sha1, 20);
267 spec->sha1_valid = !!memcmp(sha1, null_sha1, 20);
272 * Given a name and sha1 pair, if the dircache tells us the file in
273 * the work tree has that object contents, return true, so that
274 * prepare_temp_file() does not have to inflate and extract.
276 static int work_tree_matches(const char *name, const unsigned char *sha1)
278 struct cache_entry *ce;
279 struct stat st;
280 int pos, len;
282 /* We do not read the cache ourselves here, because the
283 * benchmark with my previous version that always reads cache
284 * shows that it makes things worse for diff-tree comparing
285 * two linux-2.6 kernel trees in an already checked out work
286 * tree. This is because most diff-tree comparisons deal with
287 * only a small number of files, while reading the cache is
288 * expensive for a large project, and its cost outweighs the
289 * savings we get by not inflating the object to a temporary
290 * file. Practically, this code only helps when we are used
291 * by diff-cache --cached, which does read the cache before
292 * calling us.
294 if (!active_cache)
295 return 0;
297 len = strlen(name);
298 pos = cache_name_pos(name, len);
299 if (pos < 0)
300 return 0;
301 ce = active_cache[pos];
302 if ((lstat(name, &st) < 0) ||
303 !S_ISREG(st.st_mode) || /* careful! */
304 ce_match_stat(ce, &st) ||
305 memcmp(sha1, ce->sha1, 20))
306 return 0;
307 /* we return 1 only when we can stat, it is a regular file,
308 * stat information matches, and sha1 recorded in the cache
309 * matches. I.e. we know the file in the work tree really is
310 * the same as the <name, sha1> pair.
312 return 1;
315 static struct sha1_size_cache {
316 unsigned char sha1[20];
317 unsigned long size;
318 } **sha1_size_cache;
319 static int sha1_size_cache_nr, sha1_size_cache_alloc;
321 static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
322 int find_only,
323 unsigned long size)
325 int first, last;
326 struct sha1_size_cache *e;
328 first = 0;
329 last = sha1_size_cache_nr;
330 while (last > first) {
331 int cmp, next = (last + first) >> 1;
332 e = sha1_size_cache[next];
333 cmp = memcmp(e->sha1, sha1, 20);
334 if (!cmp)
335 return e;
336 if (cmp < 0) {
337 last = next;
338 continue;
340 first = next+1;
342 /* not found */
343 if (find_only)
344 return NULL;
345 /* insert to make it at "first" */
346 if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
347 sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
348 sha1_size_cache = xrealloc(sha1_size_cache,
349 sha1_size_cache_alloc *
350 sizeof(*sha1_size_cache));
352 sha1_size_cache_nr++;
353 if (first < sha1_size_cache_nr)
354 memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
355 (sha1_size_cache_nr - first - 1) *
356 sizeof(*sha1_size_cache));
357 e = xmalloc(sizeof(struct sha1_size_cache));
358 sha1_size_cache[first] = e;
359 memcpy(e->sha1, sha1, 20);
360 e->size = size;
361 return e;
365 * While doing rename detection and pickaxe operation, we may need to
366 * grab the data for the blob (or file) for our own in-core comparison.
367 * diff_filespec has data and size fields for this purpose.
369 int diff_populate_filespec(struct diff_filespec *s, int size_only)
371 int err = 0;
372 if (!DIFF_FILE_VALID(s))
373 die("internal error: asking to populate invalid file.");
374 if (S_ISDIR(s->mode))
375 return -1;
377 if (!use_size_cache)
378 size_only = 0;
380 if (s->data)
381 return err;
382 if (!s->sha1_valid ||
383 work_tree_matches(s->path, s->sha1)) {
384 struct stat st;
385 int fd;
386 if (lstat(s->path, &st) < 0) {
387 if (errno == ENOENT) {
388 err_empty:
389 err = -1;
390 empty:
391 s->data = "";
392 s->size = 0;
393 return err;
396 s->size = st.st_size;
397 if (!s->size)
398 goto empty;
399 if (size_only)
400 return 0;
401 if (S_ISLNK(st.st_mode)) {
402 int ret;
403 s->data = xmalloc(s->size);
404 s->should_free = 1;
405 ret = readlink(s->path, s->data, s->size);
406 if (ret < 0) {
407 free(s->data);
408 goto err_empty;
410 return 0;
412 fd = open(s->path, O_RDONLY);
413 if (fd < 0)
414 goto err_empty;
415 s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
416 close(fd);
417 if (s->data == MAP_FAILED)
418 goto err_empty;
419 s->should_munmap = 1;
421 else {
422 char type[20];
423 struct sha1_size_cache *e;
425 if (size_only) {
426 e = locate_size_cache(s->sha1, 1, 0);
427 if (e) {
428 s->size = e->size;
429 return 0;
431 if (!sha1_object_info(s->sha1, type, &s->size))
432 locate_size_cache(s->sha1, 0, s->size);
434 else {
435 s->data = read_sha1_file(s->sha1, type, &s->size);
436 s->should_free = 1;
439 return 0;
442 void diff_free_filespec_data(struct diff_filespec *s)
444 if (s->should_free)
445 free(s->data);
446 else if (s->should_munmap)
447 munmap(s->data, s->size);
448 s->should_free = s->should_munmap = 0;
449 s->data = NULL;
452 static void prep_temp_blob(struct diff_tempfile *temp,
453 void *blob,
454 unsigned long size,
455 const unsigned char *sha1,
456 int mode)
458 int fd;
460 fd = git_mkstemp(temp->tmp_path, TEMPFILE_PATH_LEN, ".diff_XXXXXX");
461 if (fd < 0)
462 die("unable to create temp-file");
463 if (write(fd, blob, size) != size)
464 die("unable to write temp-file");
465 close(fd);
466 temp->name = temp->tmp_path;
467 strcpy(temp->hex, sha1_to_hex(sha1));
468 temp->hex[40] = 0;
469 sprintf(temp->mode, "%06o", mode);
472 static void prepare_temp_file(const char *name,
473 struct diff_tempfile *temp,
474 struct diff_filespec *one)
476 if (!DIFF_FILE_VALID(one)) {
477 not_a_valid_file:
478 /* A '-' entry produces this for file-2, and
479 * a '+' entry produces this for file-1.
481 temp->name = "/dev/null";
482 strcpy(temp->hex, ".");
483 strcpy(temp->mode, ".");
484 return;
487 if (!one->sha1_valid ||
488 work_tree_matches(name, one->sha1)) {
489 struct stat st;
490 if (lstat(name, &st) < 0) {
491 if (errno == ENOENT)
492 goto not_a_valid_file;
493 die("stat(%s): %s", name, strerror(errno));
495 if (S_ISLNK(st.st_mode)) {
496 int ret;
497 char *buf, buf_[1024];
498 buf = ((sizeof(buf_) < st.st_size) ?
499 xmalloc(st.st_size) : buf_);
500 ret = readlink(name, buf, st.st_size);
501 if (ret < 0)
502 die("readlink(%s)", name);
503 prep_temp_blob(temp, buf, st.st_size,
504 (one->sha1_valid ?
505 one->sha1 : null_sha1),
506 (one->sha1_valid ?
507 one->mode : S_IFLNK));
509 else {
510 /* we can borrow from the file in the work tree */
511 temp->name = name;
512 if (!one->sha1_valid)
513 strcpy(temp->hex, sha1_to_hex(null_sha1));
514 else
515 strcpy(temp->hex, sha1_to_hex(one->sha1));
516 /* Even though we may sometimes borrow the
517 * contents from the work tree, we always want
518 * one->mode. mode is trustworthy even when
519 * !(one->sha1_valid), as long as
520 * DIFF_FILE_VALID(one).
522 sprintf(temp->mode, "%06o", one->mode);
524 return;
526 else {
527 if (diff_populate_filespec(one, 0))
528 die("cannot read data blob for %s", one->path);
529 prep_temp_blob(temp, one->data, one->size,
530 one->sha1, one->mode);
534 static void remove_tempfile(void)
536 int i;
538 for (i = 0; i < 2; i++)
539 if (diff_temp[i].name == diff_temp[i].tmp_path) {
540 unlink(diff_temp[i].name);
541 diff_temp[i].name = NULL;
545 static void remove_tempfile_on_signal(int signo)
547 remove_tempfile();
550 /* An external diff command takes:
552 * diff-cmd name infile1 infile1-sha1 infile1-mode \
553 * infile2 infile2-sha1 infile2-mode [ rename-to ]
556 static void run_external_diff(const char *pgm,
557 const char *name,
558 const char *other,
559 struct diff_filespec *one,
560 struct diff_filespec *two,
561 const char *xfrm_msg,
562 int complete_rewrite)
564 struct diff_tempfile *temp = diff_temp;
565 pid_t pid;
566 int status;
567 static int atexit_asked = 0;
568 const char *othername;
570 othername = (other? other : name);
571 if (one && two) {
572 prepare_temp_file(name, &temp[0], one);
573 prepare_temp_file(othername, &temp[1], two);
574 if (! atexit_asked &&
575 (temp[0].name == temp[0].tmp_path ||
576 temp[1].name == temp[1].tmp_path)) {
577 atexit_asked = 1;
578 atexit(remove_tempfile);
580 signal(SIGINT, remove_tempfile_on_signal);
583 fflush(NULL);
584 pid = fork();
585 if (pid < 0)
586 die("unable to fork");
587 if (!pid) {
588 if (pgm) {
589 if (one && two) {
590 const char *exec_arg[10];
591 const char **arg = &exec_arg[0];
592 *arg++ = pgm;
593 *arg++ = name;
594 *arg++ = temp[0].name;
595 *arg++ = temp[0].hex;
596 *arg++ = temp[0].mode;
597 *arg++ = temp[1].name;
598 *arg++ = temp[1].hex;
599 *arg++ = temp[1].mode;
600 if (other) {
601 *arg++ = other;
602 *arg++ = xfrm_msg;
604 *arg = NULL;
605 execvp(pgm, (char *const*) exec_arg);
607 else
608 execlp(pgm, pgm, name, NULL);
611 * otherwise we use the built-in one.
613 if (one && two)
614 builtin_diff(name, othername, temp, xfrm_msg,
615 complete_rewrite);
616 else
617 printf("* Unmerged path %s\n", name);
618 exit(0);
620 if (waitpid(pid, &status, 0) < 0 ||
621 !WIFEXITED(status) || WEXITSTATUS(status)) {
622 /* Earlier we did not check the exit status because
623 * diff exits non-zero if files are different, and
624 * we are not interested in knowing that. It was a
625 * mistake which made it harder to quit a diff-*
626 * session that uses the git-apply-patch-script as
627 * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
628 * should also exit non-zero only when it wants to
629 * abort the entire diff-* session.
631 remove_tempfile();
632 fprintf(stderr, "external diff died, stopping at %s.\n", name);
633 exit(1);
635 remove_tempfile();
638 static void diff_fill_sha1_info(struct diff_filespec *one)
640 if (DIFF_FILE_VALID(one)) {
641 if (!one->sha1_valid) {
642 struct stat st;
643 if (stat(one->path, &st) < 0)
644 die("stat %s", one->path);
645 if (index_path(one->sha1, one->path, &st, 0))
646 die("cannot hash %s\n", one->path);
649 else
650 memset(one->sha1, 0, 20);
653 static void run_diff(struct diff_filepair *p, struct diff_options *o)
655 const char *pgm = external_diff();
656 char msg[PATH_MAX*2+300], *xfrm_msg;
657 struct diff_filespec *one;
658 struct diff_filespec *two;
659 const char *name;
660 const char *other;
661 char *name_munged, *other_munged;
662 int complete_rewrite = 0;
663 int len;
665 if (DIFF_PAIR_UNMERGED(p)) {
666 /* unmerged */
667 run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL,
669 return;
672 name = p->one->path;
673 other = (strcmp(name, p->two->path) ? p->two->path : NULL);
674 name_munged = quote_one(name);
675 other_munged = quote_one(other);
676 one = p->one; two = p->two;
678 diff_fill_sha1_info(one);
679 diff_fill_sha1_info(two);
681 len = 0;
682 switch (p->status) {
683 case DIFF_STATUS_COPIED:
684 len += snprintf(msg + len, sizeof(msg) - len,
685 "similarity index %d%%\n"
686 "copy from %s\n"
687 "copy to %s\n",
688 (int)(0.5 + p->score * 100.0/MAX_SCORE),
689 name_munged, other_munged);
690 break;
691 case DIFF_STATUS_RENAMED:
692 len += snprintf(msg + len, sizeof(msg) - len,
693 "similarity index %d%%\n"
694 "rename from %s\n"
695 "rename to %s\n",
696 (int)(0.5 + p->score * 100.0/MAX_SCORE),
697 name_munged, other_munged);
698 break;
699 case DIFF_STATUS_MODIFIED:
700 if (p->score) {
701 len += snprintf(msg + len, sizeof(msg) - len,
702 "dissimilarity index %d%%\n",
703 (int)(0.5 + p->score *
704 100.0/MAX_SCORE));
705 complete_rewrite = 1;
706 break;
708 /* fallthru */
709 default:
710 /* nothing */
714 if (memcmp(one->sha1, two->sha1, 20)) {
715 char one_sha1[41];
716 const char *index_fmt = o->full_index ? "index %s..%s" : "index %.7s..%.7s";
717 memcpy(one_sha1, sha1_to_hex(one->sha1), 41);
719 len += snprintf(msg + len, sizeof(msg) - len,
720 index_fmt, one_sha1, sha1_to_hex(two->sha1));
721 if (one->mode == two->mode)
722 len += snprintf(msg + len, sizeof(msg) - len,
723 " %06o", one->mode);
724 len += snprintf(msg + len, sizeof(msg) - len, "\n");
727 if (len)
728 msg[--len] = 0;
729 xfrm_msg = len ? msg : NULL;
731 if (!pgm &&
732 DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
733 (S_IFMT & one->mode) != (S_IFMT & two->mode)) {
734 /* a filepair that changes between file and symlink
735 * needs to be split into deletion and creation.
737 struct diff_filespec *null = alloc_filespec(two->path);
738 run_external_diff(NULL, name, other, one, null, xfrm_msg, 0);
739 free(null);
740 null = alloc_filespec(one->path);
741 run_external_diff(NULL, name, other, null, two, xfrm_msg, 0);
742 free(null);
744 else
745 run_external_diff(pgm, name, other, one, two, xfrm_msg,
746 complete_rewrite);
748 free(name_munged);
749 free(other_munged);
752 void diff_setup(struct diff_options *options)
754 memset(options, 0, sizeof(*options));
755 options->output_format = DIFF_FORMAT_RAW;
756 options->line_termination = '\n';
757 options->break_opt = -1;
758 options->rename_limit = -1;
760 options->change = diff_change;
761 options->add_remove = diff_addremove;
764 int diff_setup_done(struct diff_options *options)
766 if ((options->find_copies_harder &&
767 options->detect_rename != DIFF_DETECT_COPY) ||
768 (0 <= options->rename_limit && !options->detect_rename))
769 return -1;
770 if (options->detect_rename && options->rename_limit < 0)
771 options->rename_limit = diff_rename_limit_default;
772 if (options->setup & DIFF_SETUP_USE_CACHE) {
773 if (!active_cache)
774 /* read-cache does not die even when it fails
775 * so it is safe for us to do this here. Also
776 * it does not smudge active_cache or active_nr
777 * when it fails, so we do not have to worry about
778 * cleaning it up oufselves either.
780 read_cache();
782 if (options->setup & DIFF_SETUP_USE_SIZE_CACHE)
783 use_size_cache = 1;
785 return 0;
788 int diff_opt_parse(struct diff_options *options, const char **av, int ac)
790 const char *arg = av[0];
791 if (!strcmp(arg, "-p") || !strcmp(arg, "-u"))
792 options->output_format = DIFF_FORMAT_PATCH;
793 else if (!strcmp(arg, "-z"))
794 options->line_termination = 0;
795 else if (!strncmp(arg, "-l", 2))
796 options->rename_limit = strtoul(arg+2, NULL, 10);
797 else if (!strcmp(arg, "--full-index"))
798 options->full_index = 1;
799 else if (!strcmp(arg, "--name-only"))
800 options->output_format = DIFF_FORMAT_NAME;
801 else if (!strcmp(arg, "--name-status"))
802 options->output_format = DIFF_FORMAT_NAME_STATUS;
803 else if (!strcmp(arg, "-R"))
804 options->reverse_diff = 1;
805 else if (!strncmp(arg, "-S", 2))
806 options->pickaxe = arg + 2;
807 else if (!strcmp(arg, "-s"))
808 options->output_format = DIFF_FORMAT_NO_OUTPUT;
809 else if (!strncmp(arg, "-O", 2))
810 options->orderfile = arg + 2;
811 else if (!strncmp(arg, "--diff-filter=", 14))
812 options->filter = arg + 14;
813 else if (!strcmp(arg, "--pickaxe-all"))
814 options->pickaxe_opts = DIFF_PICKAXE_ALL;
815 else if (!strncmp(arg, "-B", 2)) {
816 if ((options->break_opt =
817 diff_scoreopt_parse(arg)) == -1)
818 return -1;
820 else if (!strncmp(arg, "-M", 2)) {
821 if ((options->rename_score =
822 diff_scoreopt_parse(arg)) == -1)
823 return -1;
824 options->detect_rename = DIFF_DETECT_RENAME;
826 else if (!strncmp(arg, "-C", 2)) {
827 if ((options->rename_score =
828 diff_scoreopt_parse(arg)) == -1)
829 return -1;
830 options->detect_rename = DIFF_DETECT_COPY;
832 else if (!strcmp(arg, "--find-copies-harder"))
833 options->find_copies_harder = 1;
834 else
835 return 0;
836 return 1;
839 static int parse_num(const char **cp_p)
841 unsigned long num, scale;
842 int ch, dot;
843 const char *cp = *cp_p;
845 num = 0;
846 scale = 1;
847 dot = 0;
848 for(;;) {
849 ch = *cp;
850 if ( !dot && ch == '.' ) {
851 scale = 1;
852 dot = 1;
853 } else if ( ch == '%' ) {
854 scale = dot ? scale*100 : 100;
855 cp++; /* % is always at the end */
856 break;
857 } else if ( ch >= '0' && ch <= '9' ) {
858 if ( scale < 100000 ) {
859 scale *= 10;
860 num = (num*10) + (ch-'0');
862 } else {
863 break;
865 cp++;
867 *cp_p = cp;
869 /* user says num divided by scale and we say internally that
870 * is MAX_SCORE * num / scale.
872 return (num >= scale) ? MAX_SCORE : (MAX_SCORE * num / scale);
875 int diff_scoreopt_parse(const char *opt)
877 int opt1, opt2, cmd;
879 if (*opt++ != '-')
880 return -1;
881 cmd = *opt++;
882 if (cmd != 'M' && cmd != 'C' && cmd != 'B')
883 return -1; /* that is not a -M, -C nor -B option */
885 opt1 = parse_num(&opt);
886 if (cmd != 'B')
887 opt2 = 0;
888 else {
889 if (*opt == 0)
890 opt2 = 0;
891 else if (*opt != '/')
892 return -1; /* we expect -B80/99 or -B80 */
893 else {
894 opt++;
895 opt2 = parse_num(&opt);
898 if (*opt != 0)
899 return -1;
900 return opt1 | (opt2 << 16);
903 struct diff_queue_struct diff_queued_diff;
905 void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp)
907 if (queue->alloc <= queue->nr) {
908 queue->alloc = alloc_nr(queue->alloc);
909 queue->queue = xrealloc(queue->queue,
910 sizeof(dp) * queue->alloc);
912 queue->queue[queue->nr++] = dp;
915 struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
916 struct diff_filespec *one,
917 struct diff_filespec *two)
919 struct diff_filepair *dp = xmalloc(sizeof(*dp));
920 dp->one = one;
921 dp->two = two;
922 dp->score = 0;
923 dp->status = 0;
924 dp->source_stays = 0;
925 dp->broken_pair = 0;
926 if (queue)
927 diff_q(queue, dp);
928 return dp;
931 void diff_free_filepair(struct diff_filepair *p)
933 diff_free_filespec_data(p->one);
934 diff_free_filespec_data(p->two);
935 free(p->one);
936 free(p->two);
937 free(p);
940 static void diff_flush_raw(struct diff_filepair *p,
941 int line_termination,
942 int inter_name_termination,
943 int output_format)
945 int two_paths;
946 char status[10];
947 const char *path_one, *path_two;
949 path_one = p->one->path;
950 path_two = p->two->path;
951 if (line_termination) {
952 path_one = quote_one(path_one);
953 path_two = quote_one(path_two);
956 if (p->score)
957 sprintf(status, "%c%03d", p->status,
958 (int)(0.5 + p->score * 100.0/MAX_SCORE));
959 else {
960 status[0] = p->status;
961 status[1] = 0;
963 switch (p->status) {
964 case DIFF_STATUS_COPIED:
965 case DIFF_STATUS_RENAMED:
966 two_paths = 1;
967 break;
968 case DIFF_STATUS_ADDED:
969 case DIFF_STATUS_DELETED:
970 two_paths = 0;
971 break;
972 default:
973 two_paths = 0;
974 break;
976 if (output_format != DIFF_FORMAT_NAME_STATUS) {
977 printf(":%06o %06o %s ",
978 p->one->mode, p->two->mode, sha1_to_hex(p->one->sha1));
979 printf("%s ", sha1_to_hex(p->two->sha1));
981 printf("%s%c%s", status, inter_name_termination, path_one);
982 if (two_paths)
983 printf("%c%s", inter_name_termination, path_two);
984 putchar(line_termination);
985 if (path_one != p->one->path)
986 free((void*)path_one);
987 if (path_two != p->two->path)
988 free((void*)path_two);
991 static void diff_flush_name(struct diff_filepair *p,
992 int inter_name_termination,
993 int line_termination)
995 char *path = p->two->path;
997 if (line_termination)
998 path = quote_one(p->two->path);
999 else
1000 path = p->two->path;
1001 printf("%s%c", path, line_termination);
1002 if (p->two->path != path)
1003 free(path);
1006 int diff_unmodified_pair(struct diff_filepair *p)
1008 /* This function is written stricter than necessary to support
1009 * the currently implemented transformers, but the idea is to
1010 * let transformers to produce diff_filepairs any way they want,
1011 * and filter and clean them up here before producing the output.
1013 struct diff_filespec *one, *two;
1015 if (DIFF_PAIR_UNMERGED(p))
1016 return 0; /* unmerged is interesting */
1018 one = p->one;
1019 two = p->two;
1021 /* deletion, addition, mode or type change
1022 * and rename are all interesting.
1024 if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) ||
1025 DIFF_PAIR_MODE_CHANGED(p) ||
1026 strcmp(one->path, two->path))
1027 return 0;
1029 /* both are valid and point at the same path. that is, we are
1030 * dealing with a change.
1032 if (one->sha1_valid && two->sha1_valid &&
1033 !memcmp(one->sha1, two->sha1, sizeof(one->sha1)))
1034 return 1; /* no change */
1035 if (!one->sha1_valid && !two->sha1_valid)
1036 return 1; /* both look at the same file on the filesystem. */
1037 return 0;
1040 static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o)
1042 if (diff_unmodified_pair(p))
1043 return;
1045 if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
1046 (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
1047 return; /* no tree diffs in patch format */
1049 run_diff(p, o);
1052 int diff_queue_is_empty(void)
1054 struct diff_queue_struct *q = &diff_queued_diff;
1055 int i;
1056 for (i = 0; i < q->nr; i++)
1057 if (!diff_unmodified_pair(q->queue[i]))
1058 return 0;
1059 return 1;
1062 #if DIFF_DEBUG
1063 void diff_debug_filespec(struct diff_filespec *s, int x, const char *one)
1065 fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n",
1066 x, one ? one : "",
1067 s->path,
1068 DIFF_FILE_VALID(s) ? "valid" : "invalid",
1069 s->mode,
1070 s->sha1_valid ? sha1_to_hex(s->sha1) : "");
1071 fprintf(stderr, "queue[%d] %s size %lu flags %d\n",
1072 x, one ? one : "",
1073 s->size, s->xfrm_flags);
1076 void diff_debug_filepair(const struct diff_filepair *p, int i)
1078 diff_debug_filespec(p->one, i, "one");
1079 diff_debug_filespec(p->two, i, "two");
1080 fprintf(stderr, "score %d, status %c stays %d broken %d\n",
1081 p->score, p->status ? p->status : '?',
1082 p->source_stays, p->broken_pair);
1085 void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
1087 int i;
1088 if (msg)
1089 fprintf(stderr, "%s\n", msg);
1090 fprintf(stderr, "q->nr = %d\n", q->nr);
1091 for (i = 0; i < q->nr; i++) {
1092 struct diff_filepair *p = q->queue[i];
1093 diff_debug_filepair(p, i);
1096 #endif
1098 static void diff_resolve_rename_copy(void)
1100 int i, j;
1101 struct diff_filepair *p, *pp;
1102 struct diff_queue_struct *q = &diff_queued_diff;
1104 diff_debug_queue("resolve-rename-copy", q);
1106 for (i = 0; i < q->nr; i++) {
1107 p = q->queue[i];
1108 p->status = 0; /* undecided */
1109 if (DIFF_PAIR_UNMERGED(p))
1110 p->status = DIFF_STATUS_UNMERGED;
1111 else if (!DIFF_FILE_VALID(p->one))
1112 p->status = DIFF_STATUS_ADDED;
1113 else if (!DIFF_FILE_VALID(p->two))
1114 p->status = DIFF_STATUS_DELETED;
1115 else if (DIFF_PAIR_TYPE_CHANGED(p))
1116 p->status = DIFF_STATUS_TYPE_CHANGED;
1118 /* from this point on, we are dealing with a pair
1119 * whose both sides are valid and of the same type, i.e.
1120 * either in-place edit or rename/copy edit.
1122 else if (DIFF_PAIR_RENAME(p)) {
1123 if (p->source_stays) {
1124 p->status = DIFF_STATUS_COPIED;
1125 continue;
1127 /* See if there is some other filepair that
1128 * copies from the same source as us. If so
1129 * we are a copy. Otherwise we are either a
1130 * copy if the path stays, or a rename if it
1131 * does not, but we already handled "stays" case.
1133 for (j = i + 1; j < q->nr; j++) {
1134 pp = q->queue[j];
1135 if (strcmp(pp->one->path, p->one->path))
1136 continue; /* not us */
1137 if (!DIFF_PAIR_RENAME(pp))
1138 continue; /* not a rename/copy */
1139 /* pp is a rename/copy from the same source */
1140 p->status = DIFF_STATUS_COPIED;
1141 break;
1143 if (!p->status)
1144 p->status = DIFF_STATUS_RENAMED;
1146 else if (memcmp(p->one->sha1, p->two->sha1, 20) ||
1147 p->one->mode != p->two->mode)
1148 p->status = DIFF_STATUS_MODIFIED;
1149 else {
1150 /* This is a "no-change" entry and should not
1151 * happen anymore, but prepare for broken callers.
1153 error("feeding unmodified %s to diffcore",
1154 p->one->path);
1155 p->status = DIFF_STATUS_UNKNOWN;
1158 diff_debug_queue("resolve-rename-copy done", q);
1161 void diff_flush(struct diff_options *options)
1163 struct diff_queue_struct *q = &diff_queued_diff;
1164 int i;
1165 int inter_name_termination = '\t';
1166 int diff_output_format = options->output_format;
1167 int line_termination = options->line_termination;
1169 if (!line_termination)
1170 inter_name_termination = 0;
1172 for (i = 0; i < q->nr; i++) {
1173 struct diff_filepair *p = q->queue[i];
1174 if ((diff_output_format == DIFF_FORMAT_NO_OUTPUT) ||
1175 (p->status == DIFF_STATUS_UNKNOWN))
1176 continue;
1177 if (p->status == 0)
1178 die("internal error in diff-resolve-rename-copy");
1179 switch (diff_output_format) {
1180 case DIFF_FORMAT_PATCH:
1181 diff_flush_patch(p, options);
1182 break;
1183 case DIFF_FORMAT_RAW:
1184 case DIFF_FORMAT_NAME_STATUS:
1185 diff_flush_raw(p, line_termination,
1186 inter_name_termination,
1187 diff_output_format);
1188 break;
1189 case DIFF_FORMAT_NAME:
1190 diff_flush_name(p,
1191 inter_name_termination,
1192 line_termination);
1193 break;
1195 diff_free_filepair(q->queue[i]);
1197 free(q->queue);
1198 q->queue = NULL;
1199 q->nr = q->alloc = 0;
1202 static void diffcore_apply_filter(const char *filter)
1204 int i;
1205 struct diff_queue_struct *q = &diff_queued_diff;
1206 struct diff_queue_struct outq;
1207 outq.queue = NULL;
1208 outq.nr = outq.alloc = 0;
1210 if (!filter)
1211 return;
1213 if (strchr(filter, DIFF_STATUS_FILTER_AON)) {
1214 int found;
1215 for (i = found = 0; !found && i < q->nr; i++) {
1216 struct diff_filepair *p = q->queue[i];
1217 if (((p->status == DIFF_STATUS_MODIFIED) &&
1218 ((p->score &&
1219 strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
1220 (!p->score &&
1221 strchr(filter, DIFF_STATUS_MODIFIED)))) ||
1222 ((p->status != DIFF_STATUS_MODIFIED) &&
1223 strchr(filter, p->status)))
1224 found++;
1226 if (found)
1227 return;
1229 /* otherwise we will clear the whole queue
1230 * by copying the empty outq at the end of this
1231 * function, but first clear the current entries
1232 * in the queue.
1234 for (i = 0; i < q->nr; i++)
1235 diff_free_filepair(q->queue[i]);
1237 else {
1238 /* Only the matching ones */
1239 for (i = 0; i < q->nr; i++) {
1240 struct diff_filepair *p = q->queue[i];
1242 if (((p->status == DIFF_STATUS_MODIFIED) &&
1243 ((p->score &&
1244 strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
1245 (!p->score &&
1246 strchr(filter, DIFF_STATUS_MODIFIED)))) ||
1247 ((p->status != DIFF_STATUS_MODIFIED) &&
1248 strchr(filter, p->status)))
1249 diff_q(&outq, p);
1250 else
1251 diff_free_filepair(p);
1254 free(q->queue);
1255 *q = outq;
1258 void diffcore_std(struct diff_options *options)
1260 if (options->paths && options->paths[0])
1261 diffcore_pathspec(options->paths);
1262 if (options->break_opt != -1)
1263 diffcore_break(options->break_opt);
1264 if (options->detect_rename)
1265 diffcore_rename(options);
1266 if (options->break_opt != -1)
1267 diffcore_merge_broken();
1268 if (options->pickaxe)
1269 diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
1270 if (options->orderfile)
1271 diffcore_order(options->orderfile);
1272 diff_resolve_rename_copy();
1273 diffcore_apply_filter(options->filter);
1277 void diffcore_std_no_resolve(struct diff_options *options)
1279 if (options->pickaxe)
1280 diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
1281 if (options->orderfile)
1282 diffcore_order(options->orderfile);
1283 diffcore_apply_filter(options->filter);
1286 void diff_addremove(struct diff_options *options,
1287 int addremove, unsigned mode,
1288 const unsigned char *sha1,
1289 const char *base, const char *path)
1291 char concatpath[PATH_MAX];
1292 struct diff_filespec *one, *two;
1294 /* This may look odd, but it is a preparation for
1295 * feeding "there are unchanged files which should
1296 * not produce diffs, but when you are doing copy
1297 * detection you would need them, so here they are"
1298 * entries to the diff-core. They will be prefixed
1299 * with something like '=' or '*' (I haven't decided
1300 * which but should not make any difference).
1301 * Feeding the same new and old to diff_change()
1302 * also has the same effect.
1303 * Before the final output happens, they are pruned after
1304 * merged into rename/copy pairs as appropriate.
1306 if (options->reverse_diff)
1307 addremove = (addremove == '+' ? '-' :
1308 addremove == '-' ? '+' : addremove);
1310 if (!path) path = "";
1311 sprintf(concatpath, "%s%s", base, path);
1312 one = alloc_filespec(concatpath);
1313 two = alloc_filespec(concatpath);
1315 if (addremove != '+')
1316 fill_filespec(one, sha1, mode);
1317 if (addremove != '-')
1318 fill_filespec(two, sha1, mode);
1320 diff_queue(&diff_queued_diff, one, two);
1323 void diff_change(struct diff_options *options,
1324 unsigned old_mode, unsigned new_mode,
1325 const unsigned char *old_sha1,
1326 const unsigned char *new_sha1,
1327 const char *base, const char *path)
1329 char concatpath[PATH_MAX];
1330 struct diff_filespec *one, *two;
1332 if (options->reverse_diff) {
1333 unsigned tmp;
1334 const unsigned char *tmp_c;
1335 tmp = old_mode; old_mode = new_mode; new_mode = tmp;
1336 tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
1338 if (!path) path = "";
1339 sprintf(concatpath, "%s%s", base, path);
1340 one = alloc_filespec(concatpath);
1341 two = alloc_filespec(concatpath);
1342 fill_filespec(one, old_sha1, old_mode);
1343 fill_filespec(two, new_sha1, new_mode);
1345 diff_queue(&diff_queued_diff, one, two);
1348 void diff_unmerge(struct diff_options *options,
1349 const char *path)
1351 struct diff_filespec *one, *two;
1352 one = alloc_filespec(path);
1353 two = alloc_filespec(path);
1354 diff_queue(&diff_queued_diff, one, two);