1 /* vi: set sw=4 ts=4: */
3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
5 * Copyright (C) 2010 by Matheus Izvekov <mizvekov@gmail.com>
6 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
7 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
9 * Sponsored in part by the Defense Advanced Research Projects
10 * Agency (DARPA) and Air Force Research Laboratory, Air Force
11 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
13 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
17 * The following code uses an algorithm due to Harold Stone,
18 * which finds a pair of longest identical subsequences in
21 * The major goal is to generate the match vector J.
22 * J[i] is the index of the line in file1 corresponding
23 * to line i in file0. J[i] = 0 if there is no
26 * Lines are hashed so as to work in core. All potential
27 * matches are located by sorting the lines of each file
28 * on the hash (called "value"). In particular, this
29 * collects the equivalence classes in file1 together.
30 * Subroutine equiv replaces the value of each line in
31 * file0 by the index of the first element of its
32 * matching equivalence in (the reordered) file1.
33 * To save space equiv squeezes file1 into a single
34 * array member in which the equivalence classes
35 * are simply concatenated, except that their first
36 * members are flagged by changing sign.
38 * Next the indices that point into member are unsorted into
39 * array class according to the original order of file0.
41 * The cleverness lies in routine stone. This marches
42 * through the lines of file0, developing a vector klist
43 * of "k-candidates". At step i a k-candidate is a matched
44 * pair of lines x,y (x in file0, y in file1) such that
45 * there is a common subsequence of length k
46 * between the first i lines of file0 and the first y
47 * lines of file1, but there is no such subsequence for
48 * any smaller y. x is the earliest possible mate to y
49 * that occurs in such a subsequence.
51 * Whenever any of the members of the equivalence class of
52 * lines in file1 matable to a line in file0 has serial number
53 * less than the y of some k-candidate, that k-candidate
54 * with the smallest such y is replaced. The new
55 * k-candidate is chained (via pred) to the current
56 * k-1 candidate so that the actual subsequence can
57 * be recovered. When a member has serial number greater
58 * that the y of all k-candidates, the klist is extended.
59 * At the end, the longest subsequence is pulled out
60 * and placed in the array J by unravel
62 * With J in hand, the matches there recorded are
63 * checked against reality to assure that no spurious
64 * matches have crept in due to hashing. If they have,
65 * they are broken, and "jackpot" is recorded--a harmless
66 * matter except that a true match for a spuriously
67 * mated line may now be unnecessarily reported as a change.
69 * Much of the complexity of the program comes simply
70 * from trying to minimize core utilization and
71 * maximize the range of doable problems by dynamically
72 * allocating what is needed and reusing what is not.
73 * The core requirements for problems larger than somewhat
74 * are (in words) 2*length(file0) + length(file1) +
75 * 3*(number of k-candidates installed), typically about
76 * 6n words for files of length n.
83 //config: diff compares two files or directories and outputs the
84 //config: differences between them in a form that can be given to
85 //config: the patch command.
87 //config:config FEATURE_DIFF_LONG_OPTIONS
88 //config: bool "Enable long options"
90 //config: depends on DIFF && LONG_OPTS
92 //config: Enable use of long options.
94 //config:config FEATURE_DIFF_DIR
95 //config: bool "Enable directory support"
97 //config: depends on DIFF
99 //config: This option enables support for directory and subdirectory
100 //config: comparison.
102 //kbuild:lib-$(CONFIG_DIFF) += diff.o
104 //applet:IF_DIFF(APPLET(diff, BB_DIR_USR_BIN, BB_SUID_DROP))
106 //usage:#define diff_trivial_usage
107 //usage: "[-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2"
108 //usage:#define diff_full_usage "\n\n"
109 //usage: "Compare files line by line and output the differences between them.\n"
110 //usage: "This implementation supports unified diffs only.\n"
111 //usage: "\n -a Treat all files as text"
112 //usage: "\n -b Ignore changes in the amount of whitespace"
113 //usage: "\n -B Ignore changes whose lines are all blank"
114 //usage: "\n -d Try hard to find a smaller set of changes"
115 //usage: "\n -i Ignore case differences"
116 //usage: "\n -L Use LABEL instead of the filename in the unified header"
117 //usage: "\n -N Treat absent files as empty"
118 //usage: "\n -q Output only whether files differ"
119 //usage: "\n -r Recurse"
120 //usage: "\n -S Start with FILE when comparing directories"
121 //usage: "\n -T Make tabs line up by prefixing a tab when necessary"
122 //usage: "\n -s Report when two files are the same"
123 //usage: "\n -t Expand tabs to spaces in output"
124 //usage: "\n -U Output LINES lines of context"
125 //usage: "\n -w Ignore all whitespace"
130 # define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
132 # define dbg_error_msg(...) ((void)0)
135 enum { /* print_status() and diffreg() return values */
136 STATUS_SAME
, /* files are the same */
137 STATUS_DIFFER
, /* files differ */
138 STATUS_BINARY
, /* binary files differ */
141 enum { /* Commandline flags */
146 FLAG_L
, /* never used, handled by getopt32 */
151 FLAG_S
, /* never used, handled by getopt32 */
154 FLAG_U
, /* never used, handled by getopt32 */
156 FLAG_u
, /* ignored, this is the default */
157 FLAG_p
, /* not implemented */
159 FLAG_E
, /* not implemented */
161 #define FLAG(x) (1 << FLAG_##x)
163 /* We cache file position to avoid excessive seeking */
164 typedef struct FILE_and_pos_t
{
170 smallint exit_status
;
172 const char *other_dir
;
176 #define G (*ptr_to_globals)
177 #define exit_status (G.exit_status )
178 #define opt_U_context (G.opt_U_context )
179 #define label (G.label )
181 #define INIT_G() do { \
182 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
190 TOK_EMPTY
= 1 << 9, /* Line fully processed, you can proceed to the next */
191 TOK_EOF
= 1 << 10, /* File ended */
192 /* Private (Only to be used by read_token() */
193 TOK_EOL
= 1 << 11, /* we saw EOL (sticky) */
194 TOK_SPACE
= 1 << 12, /* used -b code, means we are skipping spaces */
195 SHIFT_EOF
= (sizeof(token_t
)*8 - 8) - 1,
196 CHAR_MASK
= 0x1ff, /* 8th bit is used to distinguish EOF from 0xff */
199 /* Restores full EOF from one 8th bit: */
200 //#define TOK2CHAR(t) (((t) << SHIFT_EOF) >> SHIFT_EOF)
201 /* We don't really need the above, we only need to have EOF != any_real_char: */
202 #define TOK2CHAR(t) ((t) & CHAR_MASK)
204 static void seek_ft(FILE_and_pos_t
*ft
, off_t pos
)
206 if (ft
->ft_pos
!= pos
) {
208 fseeko(ft
->ft_fp
, pos
, SEEK_SET
);
212 /* Reads tokens from given fp, handling -b and -w flags
213 * The user must reset tok every line start
215 static int read_token(FILE_and_pos_t
*ft
, token_t tok
)
218 while (!(tok
& TOK_EOL
)) {
222 t
= fgetc(ft
->ft_fp
);
225 is_space
= (t
== EOF
|| isspace(t
));
227 /* If t == EOF (-1), set both TOK_EOF and TOK_EOL */
228 tok
|= (t
& (TOK_EOF
+ TOK_EOL
));
233 if (option_mask32
& FLAG(i
)) /* Handcoded tolower() */
234 t
= (t
>= 'A' && t
<= 'Z') ? t
- ('A' - 'a') : t
;
236 if ((option_mask32
& FLAG(w
)) && is_space
)
239 /* Trim char value to low 9 bits */
242 if (option_mask32
& FLAG(b
)) {
243 /* Was prev char whitespace? */
244 if (tok
& TOK_SPACE
) { /* yes */
245 if (is_space
) /* this one too, ignore it */
248 } else if (is_space
) {
249 /* 1st whitespace char.
250 * Set TOK_SPACE and replace char by ' ' */
255 tok
&= ~(TOK_EMPTY
+ CHAR_MASK
);
256 /* Assign char value (low 9 bits) and maybe set TOK_SPACE */
261 bb_error_msg("fp:%p tok:%x '%c'%s%s%s%s", fp
, tok
, tok
& 0xff
262 , tok
& TOK_EOF
? " EOF" : ""
263 , tok
& TOK_EOL
? " EOL" : ""
264 , tok
& TOK_EMPTY
? " EMPTY" : ""
265 , tok
& TOK_SPACE
? " SPACE" : ""
277 static int search(const int *c
, int k
, int y
, const struct cand
*list
)
281 if (list
[c
[k
]].y
< y
) /* quick look for typical case */
284 for (i
= 0, j
= k
+ 1;;) {
285 const int l
= (i
+ j
) >> 1;
287 const int t
= list
[c
[l
]].y
;
299 static unsigned isqrt(unsigned n
)
303 const unsigned y
= x
;
304 x
= ((n
/ x
) + x
) >> 1;
305 if (x
<= (y
+ 1) && x
>= (y
- 1))
310 static void stone(const int *a
, int n
, const int *b
, int *J
, int pref
)
312 const unsigned isq
= isqrt(n
);
313 const unsigned bound
=
314 (option_mask32
& FLAG(d
)) ? UINT_MAX
: MAX(256, isq
);
318 struct cand
*clist
= xzalloc(clistlen
* sizeof(clist
[0]));
321 int *klist
= xzalloc((n
+ 2) * sizeof(klist
[0]));
322 /*clist[0] = (struct cand){0}; - xzalloc did it */
325 for (cand
.x
= 1; cand
.x
<= n
; cand
.x
++) {
326 int j
= a
[cand
.x
], oldl
= 0;
327 unsigned numtries
= 0;
331 cand
.pred
= klist
[0];
334 if (cand
.y
<= clist
[cand
.pred
].y
)
336 l
= search(klist
, k
, cand
.y
, clist
);
338 cand
.pred
= klist
[l
- 1];
339 if (l
<= k
&& clist
[klist
[l
]].y
<= cand
.y
)
341 if (clen
== clistlen
) {
342 clistlen
= clistlen
* 11 / 10;
343 clist
= xrealloc(clist
, clistlen
* sizeof(clist
[0]));
356 } while ((cand
.y
= b
[++j
]) > 0 && numtries
< bound
);
359 for (q
= clist
+ klist
[k
]; q
->y
; q
= clist
+ q
->pred
)
360 J
[q
->x
+ pref
] = q
->y
+ pref
;
366 /* 'serial' is not used in the begining, so we reuse it
367 * to store line offsets, thus reducing memory pressure
376 static void equiv(struct line
*a
, int n
, struct line
*b
, int m
, int *c
)
380 while (i
<= n
&& j
<= m
) {
381 if (a
[i
].value
< b
[j
].value
)
383 else if (a
[i
].value
== b
[j
].value
)
394 while (b
[j
+ 1].value
== b
[j
].value
) {
402 static void unsort(const struct line
*f
, int l
, int *b
)
405 int *a
= xmalloc((l
+ 1) * sizeof(a
[0]));
406 for (i
= 1; i
<= l
; i
++)
407 a
[f
[i
].serial
] = f
[i
].value
;
408 for (i
= 1; i
<= l
; i
++)
413 static int line_compar(const void *a
, const void *b
)
415 #define l0 ((const struct line*)a)
416 #define l1 ((const struct line*)b)
417 int r
= l0
->value
- l1
->value
;
420 return l0
->serial
- l1
->serial
;
425 static void fetch(FILE_and_pos_t
*ft
, const off_t
*ix
, int a
, int b
, int ch
)
428 for (i
= a
; i
<= b
; i
++) {
429 seek_ft(ft
, ix
[i
- 1]);
431 if (option_mask32
& FLAG(T
))
433 for (j
= 0, col
= 0; j
< ix
[i
] - ix
[i
- 1]; j
++) {
434 int c
= fgetc(ft
->ft_fp
);
436 printf("\n\\ No newline at end of file\n");
440 if (c
== '\t' && (option_mask32
& FLAG(t
)))
441 do putchar(' '); while (++col
& 7);
450 /* Creates the match vector J, where J[i] is the index
451 * of the line in the new file corresponding to the line i
452 * in the old file. Lines start at 1 instead of 0, that value
453 * being used instead to denote no corresponding line.
454 * This vector is dynamically allocated and must be freed by the caller.
456 * * fp is an input parameter, where fp[0] and fp[1] are the open
457 * old file and new file respectively.
458 * * nlen is an output variable, where nlen[0] and nlen[1]
459 * gets the number of lines in the old and new file respectively.
460 * * ix is an output variable, where ix[0] and ix[1] gets
461 * assigned dynamically allocated vectors of the offsets of the lines
462 * of the old and new file respectively. These must be freed by the caller.
464 static NOINLINE
int *create_J(FILE_and_pos_t ft
[2], int nlen
[2], off_t
*ix
[2])
466 int *J
, slen
[2], *class, *member
;
467 struct line
*nfile
[2], *sfile
[2];
468 int pref
= 0, suff
= 0, i
, j
, delta
;
470 /* Lines of both files are hashed, and in the process
471 * their offsets are stored in the array ix[fileno]
472 * where fileno == 0 points to the old file, and
473 * fileno == 1 points to the new one.
475 for (i
= 0; i
< 2; i
++) {
479 nfile
[i
] = xmalloc((sz
+ 3) * sizeof(nfile
[i
][0]));
480 /* ft gets here without the correct position, cant use seek_ft */
482 fseeko(ft
[i
].ft_fp
, 0, SEEK_SET
);
485 /* We could zalloc nfile, but then zalloc starts showing in gprof at ~1% */
486 nfile
[i
][0].offset
= 0;
487 goto start
; /* saves code */
489 tok
= read_token(&ft
[i
], tok
);
490 if (!(tok
& TOK_EMPTY
)) {
491 /* Hash algorithm taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */
492 /*hash = hash * 128 - hash + TOK2CHAR(tok);
493 * gcc insists on optimizing above to "hash * 127 + ...", thus... */
494 unsigned o
= hash
- TOK2CHAR(tok
);
495 hash
= hash
* 128 - o
; /* we want SPEED here */
498 if (nlen
[i
]++ == sz
) {
500 nfile
[i
] = xrealloc(nfile
[i
], (sz
+ 3) * sizeof(nfile
[i
][0]));
502 /* line_compar needs hashes fit into positive int */
503 nfile
[i
][nlen
[i
]].value
= hash
& INT_MAX
;
504 /* like ftello(ft[i].ft_fp) but faster (avoids lseek syscall) */
505 nfile
[i
][nlen
[i
]].offset
= ft
[i
].ft_pos
;
507 /* EOF counts as a token, so we have to adjust it here */
508 nfile
[i
][nlen
[i
]].offset
++;
514 /* Exclude lone EOF line from the end of the file, to make fetch()'s job easier */
515 if (nfile
[i
][nlen
[i
]].offset
- nfile
[i
][nlen
[i
] - 1].offset
== 1)
517 /* Now we copy the line offsets into ix */
518 ix
[i
] = xmalloc((nlen
[i
] + 2) * sizeof(ix
[i
][0]));
519 for (j
= 0; j
< nlen
[i
] + 1; j
++)
520 ix
[i
][j
] = nfile
[i
][j
].offset
;
523 /* length of prefix and suffix is calculated */
524 for (; pref
< nlen
[0] && pref
< nlen
[1] &&
525 nfile
[0][pref
+ 1].value
== nfile
[1][pref
+ 1].value
;
527 for (; suff
< nlen
[0] - pref
&& suff
< nlen
[1] - pref
&&
528 nfile
[0][nlen
[0] - suff
].value
== nfile
[1][nlen
[1] - suff
].value
;
530 /* Arrays are pruned by the suffix and prefix length,
531 * the result being sorted and stored in sfile[fileno],
532 * and their sizes are stored in slen[fileno]
534 for (j
= 0; j
< 2; j
++) {
535 sfile
[j
] = nfile
[j
] + pref
;
536 slen
[j
] = nlen
[j
] - pref
- suff
;
537 for (i
= 0; i
<= slen
[j
]; i
++)
538 sfile
[j
][i
].serial
= i
;
539 qsort(sfile
[j
] + 1, slen
[j
], sizeof(*sfile
[j
]), line_compar
);
541 /* nfile arrays are reused to reduce memory pressure
542 * The #if zeroed out section performs the same task as the
543 * one in the #else section.
544 * Peak memory usage is higher, but one array copy is avoided
545 * by not using unsort()
548 member
= xmalloc((slen
[1] + 2) * sizeof(member
[0]));
549 equiv(sfile
[0], slen
[0], sfile
[1], slen
[1], member
);
552 class = xmalloc((slen
[0] + 1) * sizeof(class[0]));
553 for (i
= 1; i
<= slen
[0]; i
++) /* Unsorting */
554 class[sfile
[0][i
].serial
] = sfile
[0][i
].value
;
557 member
= (int *)nfile
[1];
558 equiv(sfile
[0], slen
[0], sfile
[1], slen
[1], member
);
559 member
= xrealloc(member
, (slen
[1] + 2) * sizeof(member
[0]));
561 class = (int *)nfile
[0];
562 unsort(sfile
[0], slen
[0], (int *)nfile
[0]);
563 class = xrealloc(class, (slen
[0] + 2) * sizeof(class[0]));
565 J
= xmalloc((nlen
[0] + 2) * sizeof(J
[0]));
566 /* The elements of J which fall inside the prefix and suffix regions
567 * are marked as unchanged, while the ones which fall outside
568 * are initialized with 0 (no matches), so that function stone can
569 * then assign them their right values
571 for (i
= 0, delta
= nlen
[1] - nlen
[0]; i
<= nlen
[0]; i
++)
572 J
[i
] = i
<= pref
? i
:
573 i
> (nlen
[0] - suff
) ? (i
+ delta
) : 0;
574 /* Here the magic is performed */
575 stone(class, slen
[0], member
, J
, pref
);
576 J
[nlen
[0] + 1] = nlen
[1] + 1;
581 /* Both files are rescanned, in an effort to find any lines
582 * which, due to limitations intrinsic to any hashing algorithm,
583 * are different but ended up confounded as the same
585 for (i
= 1; i
<= nlen
[0]; i
++) {
589 seek_ft(&ft
[0], ix
[0][i
- 1]);
590 seek_ft(&ft
[1], ix
[1][J
[i
] - 1]);
592 for (j
= J
[i
]; i
<= nlen
[0] && J
[i
] == j
; i
++, j
++) {
593 token_t tok0
= 0, tok1
= 0;
595 tok0
= read_token(&ft
[0], tok0
);
596 tok1
= read_token(&ft
[1], tok1
);
598 if (((tok0
^ tok1
) & TOK_EMPTY
) != 0 /* one is empty (not both) */
599 || (!(tok0
& TOK_EMPTY
) && TOK2CHAR(tok0
) != TOK2CHAR(tok1
))
601 J
[i
] = 0; /* Break the correspondence */
603 } while (!(tok0
& tok1
& TOK_EMPTY
));
610 static bool diff(FILE* fp
[2], char *file
[2])
614 FILE_and_pos_t ft
[2];
615 typedef struct { int a
, b
; } vec_t
[2];
617 int i
= 1, j
, k
, idx
= -1;
618 bool anychange
= false;
623 /* note that ft[i].ft_pos is unintitalized, create_J()
624 * must not assume otherwise */
625 J
= create_J(ft
, nlen
, ix
);
628 bool nonempty
= false;
633 for (v
[0].a
= i
; v
[0].a
<= nlen
[0] && J
[v
[0].a
] == J
[v
[0].a
- 1] + 1; v
[0].a
++)
635 v
[1].a
= J
[v
[0].a
- 1] + 1;
637 for (v
[0].b
= v
[0].a
- 1; v
[0].b
< nlen
[0] && !J
[v
[0].b
+ 1]; v
[0].b
++)
639 v
[1].b
= J
[v
[0].b
+ 1] - 1;
641 * Indicate that there is a difference between lines a and b of the 'from' file
642 * to get to lines c to d of the 'to' file. If a is greater than b then there
643 * are no lines in the 'from' file involved and this means that there were
644 * lines appended (beginning at b). If c is greater than d then there are
645 * lines missing from the 'to' file.
647 if (v
[0].a
<= v
[0].b
|| v
[1].a
<= v
[1].b
) {
649 * If this change is more than 'context' lines from the
650 * previous change, dump the record and reset it.
652 int ct
= (2 * opt_U_context
) + 1;
654 && v
[0].a
> vec
[idx
][0].b
+ ct
655 && v
[1].a
> vec
[idx
][1].b
+ ct
660 for (j
= 0; j
< 2; j
++)
661 for (k
= v
[j
].a
; k
< v
[j
].b
; k
++)
662 nonempty
|= (ix
[j
][k
+1] - ix
[j
][k
] != 1);
664 vec
= xrealloc_vector(vec
, 6, ++idx
);
665 memcpy(vec
[idx
], v
, sizeof(v
));
673 if (idx
< 0 || ((option_mask32
& FLAG(B
)) && !nonempty
))
675 if (!(option_mask32
& FLAG(q
))) {
677 vec_t span
, *cvp
= vec
;
680 /* Print the context/unidiff header first time through */
681 printf("--- %s\n", label
[0] ? label
[0] : file
[0]);
682 printf("+++ %s\n", label
[1] ? label
[1] : file
[1]);
686 for (j
= 0; j
< 2; j
++) {
687 int a
= span
[j
].a
= MAX(1, (*cvp
)[j
].a
- opt_U_context
);
688 int b
= span
[j
].b
= MIN(nlen
[j
], vec
[idx
][j
].b
+ opt_U_context
);
690 printf(" %c%d", j
? '+' : '-', MIN(a
, b
));
693 printf(",%d", (a
< b
) ? b
- a
+ 1 : 0);
697 * Output changes in "unified" diff format--the old and new lines
698 * are printed together.
700 for (lowa
= span
[0].a
; ; lowa
= (*cvp
++)[0].b
+ 1) {
701 bool end
= cvp
> &vec
[idx
];
702 fetch(&ft
[0], ix
[0], lowa
, end
? span
[0].b
: (*cvp
)[0].a
- 1, ' ');
705 for (j
= 0; j
< 2; j
++)
706 fetch(&ft
[j
], ix
[j
], (*cvp
)[j
].a
, (*cvp
)[j
].b
, j
? '+' : '-');
712 } while (i
<= nlen
[0]);
721 static int diffreg(char *file
[2])
724 bool binary
= false, differ
= false;
725 int status
= STATUS_SAME
, i
;
729 for (i
= 0; i
< 2; i
++) {
730 int fd
= open_or_warn_stdin(file
[i
]);
733 /* Our diff implementation is using seek.
734 * When we meet non-seekable file, we must make a temp copy.
736 if (lseek(fd
, 0, SEEK_SET
) == -1 && errno
== ESPIPE
) {
737 char name
[] = "/tmp/difXXXXXX";
738 int fd_tmp
= xmkstemp(name
);
741 if (bb_copyfd_eof(fd
, fd_tmp
) < 0)
743 if (fd
) /* Prevents closing of stdin */
747 fp
[i
] = fdopen(fd
, "r");
751 const size_t sz
= COMMON_BUFSIZE
/ 2;
752 char *const buf0
= bb_common_bufsiz1
;
753 char *const buf1
= buf0
+ sz
;
755 i
= fread(buf0
, 1, sz
, fp
[0]);
756 j
= fread(buf1
, 1, sz
, fp
[1]);
763 for (k
= 0; k
< i
; k
++) {
764 if (!buf0
[k
] || !buf1
[k
])
766 if (buf0
[k
] != buf1
[k
])
771 if (binary
&& !(option_mask32
& FLAG(a
)))
772 status
= STATUS_BINARY
;
773 else if (diff(fp
, file
))
774 status
= STATUS_DIFFER
;
776 if (status
!= STATUS_SAME
)
779 fclose_if_not_stdin(fp
[0]);
780 fclose_if_not_stdin(fp
[1]);
785 static void print_status(int status
, char *path
[2])
790 if ((option_mask32
& FLAG(q
)) || status
== STATUS_BINARY
)
791 printf("Files %s and %s differ\n", path
[0], path
[1]);
794 if (option_mask32
& FLAG(s
))
795 printf("Files %s and %s are identical\n", path
[0], path
[1]);
800 #if ENABLE_FEATURE_DIFF_DIR
807 /* This function adds a filename to dl, the directory listing. */
808 static int FAST_FUNC
add_to_dirlist(const char *filename
,
809 struct stat
*sb UNUSED_PARAM
,
810 void *userdata
, int depth UNUSED_PARAM
)
812 struct dlist
*const l
= userdata
;
813 const char *file
= filename
+ l
->len
;
816 l
->dl
= xrealloc_vector(l
->dl
, 6, l
->e
);
817 l
->dl
[l
->e
] = xstrdup(file
);
822 /* If recursion is not set, this function adds the directory
823 * to the list and prevents recursive_action from recursing into it.
825 static int FAST_FUNC
skip_dir(const char *filename
,
826 struct stat
*sb
, void *userdata
,
829 if (!(option_mask32
& FLAG(r
)) && depth
) {
830 add_to_dirlist(filename
, sb
, userdata
, depth
);
833 if (!(option_mask32
& FLAG(N
))) {
834 /* -r without -N: no need to recurse into dirs
835 * which do not exist on the "other side".
836 * Testcase: diff -r /tmp /
837 * (it would recurse deep into /proc without this code) */
838 struct dlist
*const l
= userdata
;
842 char *othername
= concat_path_file(G
.other_dir
, filename
);
843 int r
= stat(othername
, &osb
);
845 if (r
!= 0 || !S_ISDIR(osb
.st_mode
)) {
846 /* other dir doesn't have similarly named
847 * directory, don't recurse; return 1 upon
848 * exit, just like diffutils' diff */
857 static void diffdir(char *p
[2], const char *s_start
)
859 struct dlist list
[2];
862 memset(&list
, 0, sizeof(list
));
863 for (i
= 0; i
< 2; i
++) {
864 /*list[i].s = list[i].e = 0; - memset did it */
865 /*list[i].dl = NULL; */
867 G
.other_dir
= p
[1 - i
];
868 /* We need to trim root directory prefix.
869 * Using list.len to specify its length,
870 * add_to_dirlist will remove it. */
871 list
[i
].len
= strlen(p
[i
]);
872 recursive_action(p
[i
], ACTION_RECURSE
| ACTION_FOLLOWLINKS
,
873 add_to_dirlist
, skip_dir
, &list
[i
], 0);
874 /* Sort dl alphabetically.
875 * GNU diff does this ignoring any number of trailing dots.
876 * We don't, so for us dotted files almost always are
879 qsort_string_vector(list
[i
].dl
, list
[i
].e
);
880 /* If -S was set, find the starting point. */
883 while (list
[i
].s
< list
[i
].e
&& strcmp(list
[i
].dl
[list
[i
].s
], s_start
) < 0)
886 /* Now that both dirlist1 and dirlist2 contain sorted directory
887 * listings, we can start to go through dirlist1. If both listings
888 * contain the same file, then do a normal diff. Otherwise, behaviour
889 * is determined by whether the -N flag is set. */
895 dp
[0] = list
[0].s
< list
[0].e
? list
[0].dl
[list
[0].s
] : NULL
;
896 dp
[1] = list
[1].s
< list
[1].e
? list
[1].dl
[list
[1].s
] : NULL
;
897 if (!dp
[0] && !dp
[1])
899 pos
= !dp
[0] ? 1 : (!dp
[1] ? -1 : strcmp(dp
[0], dp
[1]));
901 if (pos
&& !(option_mask32
& FLAG(N
))) {
902 printf("Only in %s: %s\n", p
[k
], dp
[k
]);
905 char *fullpath
[2], *path
[2]; /* if -N */
907 for (i
= 0; i
< 2; i
++) {
908 if (pos
== 0 || i
== k
) {
909 path
[i
] = fullpath
[i
] = concat_path_file(p
[i
], dp
[i
]);
910 stat(fullpath
[i
], &stb
[i
]);
912 fullpath
[i
] = concat_path_file(p
[i
], dp
[1 - i
]);
913 path
[i
] = (char *)bb_dev_null
;
917 stat(fullpath
[k
], &stb
[1 - k
]);
919 if (S_ISDIR(stb
[0].st_mode
) && S_ISDIR(stb
[1].st_mode
))
920 printf("Common subdirectories: %s and %s\n", fullpath
[0], fullpath
[1]);
921 else if (!S_ISREG(stb
[0].st_mode
) && !S_ISDIR(stb
[0].st_mode
))
922 printf("File %s is not a regular file or directory and was skipped\n", fullpath
[0]);
923 else if (!S_ISREG(stb
[1].st_mode
) && !S_ISDIR(stb
[1].st_mode
))
924 printf("File %s is not a regular file or directory and was skipped\n", fullpath
[1]);
925 else if (S_ISDIR(stb
[0].st_mode
) != S_ISDIR(stb
[1].st_mode
)) {
926 if (S_ISDIR(stb
[0].st_mode
))
927 printf("File %s is a %s while file %s is a %s\n", fullpath
[0], "directory", fullpath
[1], "regular file");
929 printf("File %s is a %s while file %s is a %s\n", fullpath
[0], "regular file", fullpath
[1], "directory");
931 print_status(diffreg(path
), fullpath
);
943 if (ENABLE_FEATURE_CLEAN_UP
) {
950 #if ENABLE_FEATURE_DIFF_LONG_OPTIONS
951 static const char diff_longopts
[] ALIGN1
=
952 "ignore-case\0" No_argument
"i"
953 "ignore-tab-expansion\0" No_argument
"E"
954 "ignore-space-change\0" No_argument
"b"
955 "ignore-all-space\0" No_argument
"w"
956 "ignore-blank-lines\0" No_argument
"B"
957 "text\0" No_argument
"a"
958 "unified\0" Required_argument
"U"
959 "label\0" Required_argument
"L"
960 "show-c-function\0" No_argument
"p"
961 "brief\0" No_argument
"q"
962 "expand-tabs\0" No_argument
"t"
963 "initial-tab\0" No_argument
"T"
964 "recursive\0" No_argument
"r"
965 "new-file\0" No_argument
"N"
966 "report-identical-files\0" No_argument
"s"
967 "starting-file\0" Required_argument
"S"
968 "minimal\0" No_argument
"d"
972 int diff_main(int argc
, char **argv
) MAIN_EXTERNALLY_VISIBLE
;
973 int diff_main(int argc UNUSED_PARAM
, char **argv
)
976 char *file
[2], *s_start
= NULL
;
977 llist_t
*L_arg
= NULL
;
981 /* exactly 2 params; collect multiple -L <label>; -U N */
982 opt_complementary
= "=2:L::U+";
983 #if ENABLE_FEATURE_DIFF_LONG_OPTIONS
984 applet_long_options
= diff_longopts
;
986 getopt32(argv
, "abdiL:NqrsS:tTU:wupBE",
987 &L_arg
, &s_start
, &opt_U_context
);
990 label
[!!label
[0]] = llist_pop(&L_arg
);
991 xfunc_error_retval
= 2;
992 for (i
= 0; i
< 2; i
++) {
994 /* Compat: "diff file name_which_doesnt_exist" exits with 2 */
995 if (LONE_DASH(file
[i
])) {
996 fstat(STDIN_FILENO
, &stb
[i
]);
999 xstat(file
[i
], &stb
[i
]);
1001 xfunc_error_retval
= 1;
1002 if (gotstdin
&& (S_ISDIR(stb
[0].st_mode
) || S_ISDIR(stb
[1].st_mode
)))
1003 bb_error_msg_and_die("can't compare stdin to a directory");
1005 /* Compare metadata to check if the files are the same physical file.
1007 * Comment from diffutils source says:
1008 * POSIX says that two files are identical if st_ino and st_dev are
1009 * the same, but many file systems incorrectly assign the same (device,
1010 * inode) pair to two distinct files, including:
1011 * GNU/Linux NFS servers that export all local file systems as a
1012 * single NFS file system, if a local device number (st_dev) exceeds
1013 * 255, or if a local inode number (st_ino) exceeds 16777215.
1016 && stb
[0].st_ino
== stb
[1].st_ino
1017 && stb
[0].st_dev
== stb
[1].st_dev
1018 && stb
[0].st_size
== stb
[1].st_size
1019 && stb
[0].st_mtime
== stb
[1].st_mtime
1020 && stb
[0].st_ctime
== stb
[1].st_ctime
1021 && stb
[0].st_mode
== stb
[1].st_mode
1022 && stb
[0].st_nlink
== stb
[1].st_nlink
1023 && stb
[0].st_uid
== stb
[1].st_uid
1024 && stb
[0].st_gid
== stb
[1].st_gid
1026 /* files are physically the same; no need to compare them */
1030 if (S_ISDIR(stb
[0].st_mode
) && S_ISDIR(stb
[1].st_mode
)) {
1031 #if ENABLE_FEATURE_DIFF_DIR
1032 diffdir(file
, s_start
);
1034 bb_error_msg_and_die("no support for directory comparison");
1037 bool dirfile
= S_ISDIR(stb
[0].st_mode
) || S_ISDIR(stb
[1].st_mode
);
1038 bool dir
= S_ISDIR(stb
[1].st_mode
);
1040 const char *slash
= strrchr(file
[!dir
], '/');
1041 file
[dir
] = concat_path_file(file
[dir
], slash
? slash
+ 1 : file
[!dir
]);
1042 xstat(file
[dir
], &stb
[dir
]);
1044 /* diffreg can get non-regular files here */
1045 print_status(gotstdin
> 1 ? STATUS_SAME
: diffreg(file
), file
);