1 /* vi: set sw=4 ts=4: */
3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
5 * Copyright (C) 2010 by Matheus Izvekov <mizvekov@gmail.com>
6 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
7 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
9 * Sponsored in part by the Defense Advanced Research Projects
10 * Agency (DARPA) and Air Force Research Laboratory, Air Force
11 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
13 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
17 * The following code uses an algorithm due to Harold Stone,
18 * which finds a pair of longest identical subsequences in
21 * The major goal is to generate the match vector J.
22 * J[i] is the index of the line in file1 corresponding
23 * to line i in file0. J[i] = 0 if there is no
26 * Lines are hashed so as to work in core. All potential
27 * matches are located by sorting the lines of each file
28 * on the hash (called "value"). In particular, this
29 * collects the equivalence classes in file1 together.
30 * Subroutine equiv replaces the value of each line in
31 * file0 by the index of the first element of its
32 * matching equivalence in (the reordered) file1.
33 * To save space equiv squeezes file1 into a single
34 * array member in which the equivalence classes
35 * are simply concatenated, except that their first
36 * members are flagged by changing sign.
38 * Next the indices that point into member are unsorted into
39 * array class according to the original order of file0.
41 * The cleverness lies in routine stone. This marches
42 * through the lines of file0, developing a vector klist
43 * of "k-candidates". At step i a k-candidate is a matched
44 * pair of lines x,y (x in file0, y in file1) such that
45 * there is a common subsequence of length k
46 * between the first i lines of file0 and the first y
47 * lines of file1, but there is no such subsequence for
48 * any smaller y. x is the earliest possible mate to y
49 * that occurs in such a subsequence.
51 * Whenever any of the members of the equivalence class of
52 * lines in file1 matable to a line in file0 has serial number
53 * less than the y of some k-candidate, that k-candidate
54 * with the smallest such y is replaced. The new
55 * k-candidate is chained (via pred) to the current
56 * k-1 candidate so that the actual subsequence can
57 * be recovered. When a member has serial number greater
58 * that the y of all k-candidates, the klist is extended.
59 * At the end, the longest subsequence is pulled out
60 * and placed in the array J by unravel
62 * With J in hand, the matches there recorded are
63 * checked against reality to assure that no spurious
64 * matches have crept in due to hashing. If they have,
65 * they are broken, and "jackpot" is recorded--a harmless
66 * matter except that a true match for a spuriously
67 * mated line may now be unnecessarily reported as a change.
69 * Much of the complexity of the program comes simply
70 * from trying to minimize core utilization and
71 * maximize the range of doable problems by dynamically
72 * allocating what is needed and reusing what is not.
73 * The core requirements for problems larger than somewhat
74 * are (in words) 2*length(file0) + length(file1) +
75 * 3*(number of k-candidates installed), typically about
76 * 6n words for files of length n.
79 //usage:#define diff_trivial_usage
80 //usage: "[-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2"
81 //usage:#define diff_full_usage "\n\n"
82 //usage: "Compare files line by line and output the differences between them.\n"
83 //usage: "This implementation supports unified diffs only.\n"
84 //usage: "\n -a Treat all files as text"
85 //usage: "\n -b Ignore changes in the amount of whitespace"
86 //usage: "\n -B Ignore changes whose lines are all blank"
87 //usage: "\n -d Try hard to find a smaller set of changes"
88 //usage: "\n -i Ignore case differences"
89 //usage: "\n -L Use LABEL instead of the filename in the unified header"
90 //usage: "\n -N Treat absent files as empty"
91 //usage: "\n -q Output only whether files differ"
92 //usage: "\n -r Recurse"
93 //usage: "\n -S Start with FILE when comparing directories"
94 //usage: "\n -T Make tabs line up by prefixing a tab when necessary"
95 //usage: "\n -s Report when two files are the same"
96 //usage: "\n -t Expand tabs to spaces in output"
97 //usage: "\n -U Output LINES lines of context"
98 //usage: "\n -w Ignore all whitespace"
103 # define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
105 # define dbg_error_msg(...) ((void)0)
108 enum { /* print_status() and diffreg() return values */
109 STATUS_SAME
, /* files are the same */
110 STATUS_DIFFER
, /* files differ */
111 STATUS_BINARY
, /* binary files differ */
114 enum { /* Commandline flags */
119 FLAG_L
, /* never used, handled by getopt32 */
124 FLAG_S
, /* never used, handled by getopt32 */
127 FLAG_U
, /* never used, handled by getopt32 */
129 FLAG_u
, /* ignored, this is the default */
130 FLAG_p
, /* not implemented */
132 FLAG_E
, /* not implemented */
134 #define FLAG(x) (1 << FLAG_##x)
136 /* We cache file position to avoid excessive seeking */
137 typedef struct FILE_and_pos_t
{
143 smallint exit_status
;
145 const char *other_dir
;
149 #define G (*ptr_to_globals)
150 #define exit_status (G.exit_status )
151 #define opt_U_context (G.opt_U_context )
152 #define label (G.label )
154 #define INIT_G() do { \
155 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
163 TOK_EMPTY
= 1 << 9, /* Line fully processed, you can proceed to the next */
164 TOK_EOF
= 1 << 10, /* File ended */
165 /* Private (Only to be used by read_token() */
166 TOK_EOL
= 1 << 11, /* we saw EOL (sticky) */
167 TOK_SPACE
= 1 << 12, /* used -b code, means we are skipping spaces */
168 SHIFT_EOF
= (sizeof(token_t
)*8 - 8) - 1,
169 CHAR_MASK
= 0x1ff, /* 8th bit is used to distinguish EOF from 0xff */
172 /* Restores full EOF from one 8th bit: */
173 //#define TOK2CHAR(t) (((t) << SHIFT_EOF) >> SHIFT_EOF)
174 /* We don't really need the above, we only need to have EOF != any_real_char: */
175 #define TOK2CHAR(t) ((t) & CHAR_MASK)
177 static void seek_ft(FILE_and_pos_t
*ft
, off_t pos
)
179 if (ft
->ft_pos
!= pos
) {
181 fseeko(ft
->ft_fp
, pos
, SEEK_SET
);
185 /* Reads tokens from given fp, handling -b and -w flags
186 * The user must reset tok every line start
188 static int read_token(FILE_and_pos_t
*ft
, token_t tok
)
191 while (!(tok
& TOK_EOL
)) {
195 t
= fgetc(ft
->ft_fp
);
198 is_space
= (t
== EOF
|| isspace(t
));
200 /* If t == EOF (-1), set both TOK_EOF and TOK_EOL */
201 tok
|= (t
& (TOK_EOF
+ TOK_EOL
));
206 if (option_mask32
& FLAG(i
)) /* Handcoded tolower() */
207 t
= (t
>= 'A' && t
<= 'Z') ? t
- ('A' - 'a') : t
;
209 if ((option_mask32
& FLAG(w
)) && is_space
)
212 /* Trim char value to low 9 bits */
215 if (option_mask32
& FLAG(b
)) {
216 /* Was prev char whitespace? */
217 if (tok
& TOK_SPACE
) { /* yes */
218 if (is_space
) /* this one too, ignore it */
221 } else if (is_space
) {
222 /* 1st whitespace char.
223 * Set TOK_SPACE and replace char by ' ' */
228 tok
&= ~(TOK_EMPTY
+ CHAR_MASK
);
229 /* Assign char value (low 9 bits) and maybe set TOK_SPACE */
234 bb_error_msg("fp:%p tok:%x '%c'%s%s%s%s", fp
, tok
, tok
& 0xff
235 , tok
& TOK_EOF
? " EOF" : ""
236 , tok
& TOK_EOL
? " EOL" : ""
237 , tok
& TOK_EMPTY
? " EMPTY" : ""
238 , tok
& TOK_SPACE
? " SPACE" : ""
250 static int search(const int *c
, int k
, int y
, const struct cand
*list
)
254 if (list
[c
[k
]].y
< y
) /* quick look for typical case */
257 for (i
= 0, j
= k
+ 1;;) {
258 const int l
= (i
+ j
) >> 1;
260 const int t
= list
[c
[l
]].y
;
272 static unsigned isqrt(unsigned n
)
276 const unsigned y
= x
;
277 x
= ((n
/ x
) + x
) >> 1;
278 if (x
<= (y
+ 1) && x
>= (y
- 1))
283 static void stone(const int *a
, int n
, const int *b
, int *J
, int pref
)
285 const unsigned isq
= isqrt(n
);
286 const unsigned bound
=
287 (option_mask32
& FLAG(d
)) ? UINT_MAX
: MAX(256, isq
);
291 struct cand
*clist
= xzalloc(clistlen
* sizeof(clist
[0]));
294 int *klist
= xzalloc((n
+ 2) * sizeof(klist
[0]));
295 /*clist[0] = (struct cand){0}; - xzalloc did it */
298 for (cand
.x
= 1; cand
.x
<= n
; cand
.x
++) {
299 int j
= a
[cand
.x
], oldl
= 0;
300 unsigned numtries
= 0;
304 cand
.pred
= klist
[0];
307 if (cand
.y
<= clist
[cand
.pred
].y
)
309 l
= search(klist
, k
, cand
.y
, clist
);
311 cand
.pred
= klist
[l
- 1];
312 if (l
<= k
&& clist
[klist
[l
]].y
<= cand
.y
)
314 if (clen
== clistlen
) {
315 clistlen
= clistlen
* 11 / 10;
316 clist
= xrealloc(clist
, clistlen
* sizeof(clist
[0]));
329 } while ((cand
.y
= b
[++j
]) > 0 && numtries
< bound
);
332 for (q
= clist
+ klist
[k
]; q
->y
; q
= clist
+ q
->pred
)
333 J
[q
->x
+ pref
] = q
->y
+ pref
;
339 /* 'serial' is not used in the begining, so we reuse it
340 * to store line offsets, thus reducing memory pressure
349 static void equiv(struct line
*a
, int n
, struct line
*b
, int m
, int *c
)
353 while (i
<= n
&& j
<= m
) {
354 if (a
[i
].value
< b
[j
].value
)
356 else if (a
[i
].value
== b
[j
].value
)
367 while (b
[j
+ 1].value
== b
[j
].value
) {
375 static void unsort(const struct line
*f
, int l
, int *b
)
378 int *a
= xmalloc((l
+ 1) * sizeof(a
[0]));
379 for (i
= 1; i
<= l
; i
++)
380 a
[f
[i
].serial
] = f
[i
].value
;
381 for (i
= 1; i
<= l
; i
++)
386 static int line_compar(const void *a
, const void *b
)
388 #define l0 ((const struct line*)a)
389 #define l1 ((const struct line*)b)
390 int r
= l0
->value
- l1
->value
;
393 return l0
->serial
- l1
->serial
;
398 static void fetch(FILE_and_pos_t
*ft
, const off_t
*ix
, int a
, int b
, int ch
)
401 for (i
= a
; i
<= b
; i
++) {
402 seek_ft(ft
, ix
[i
- 1]);
404 if (option_mask32
& FLAG(T
))
406 for (j
= 0, col
= 0; j
< ix
[i
] - ix
[i
- 1]; j
++) {
407 int c
= fgetc(ft
->ft_fp
);
409 printf("\n\\ No newline at end of file\n");
413 if (c
== '\t' && (option_mask32
& FLAG(t
)))
414 do putchar(' '); while (++col
& 7);
423 /* Creates the match vector J, where J[i] is the index
424 * of the line in the new file corresponding to the line i
425 * in the old file. Lines start at 1 instead of 0, that value
426 * being used instead to denote no corresponding line.
427 * This vector is dynamically allocated and must be freed by the caller.
429 * * fp is an input parameter, where fp[0] and fp[1] are the open
430 * old file and new file respectively.
431 * * nlen is an output variable, where nlen[0] and nlen[1]
432 * gets the number of lines in the old and new file respectively.
433 * * ix is an output variable, where ix[0] and ix[1] gets
434 * assigned dynamically allocated vectors of the offsets of the lines
435 * of the old and new file respectively. These must be freed by the caller.
437 static NOINLINE
int *create_J(FILE_and_pos_t ft
[2], int nlen
[2], off_t
*ix
[2])
439 int *J
, slen
[2], *class, *member
;
440 struct line
*nfile
[2], *sfile
[2];
441 int pref
= 0, suff
= 0, i
, j
, delta
;
443 /* Lines of both files are hashed, and in the process
444 * their offsets are stored in the array ix[fileno]
445 * where fileno == 0 points to the old file, and
446 * fileno == 1 points to the new one.
448 for (i
= 0; i
< 2; i
++) {
452 nfile
[i
] = xmalloc((sz
+ 3) * sizeof(nfile
[i
][0]));
453 /* ft gets here without the correct position, cant use seek_ft */
455 fseeko(ft
[i
].ft_fp
, 0, SEEK_SET
);
458 /* We could zalloc nfile, but then zalloc starts showing in gprof at ~1% */
459 nfile
[i
][0].offset
= 0;
460 goto start
; /* saves code */
462 tok
= read_token(&ft
[i
], tok
);
463 if (!(tok
& TOK_EMPTY
)) {
464 /* Hash algorithm taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */
465 /*hash = hash * 128 - hash + TOK2CHAR(tok);
466 * gcc insists on optimizing above to "hash * 127 + ...", thus... */
467 unsigned o
= hash
- TOK2CHAR(tok
);
468 hash
= hash
* 128 - o
; /* we want SPEED here */
471 if (nlen
[i
]++ == sz
) {
473 nfile
[i
] = xrealloc(nfile
[i
], (sz
+ 3) * sizeof(nfile
[i
][0]));
475 /* line_compar needs hashes fit into positive int */
476 nfile
[i
][nlen
[i
]].value
= hash
& INT_MAX
;
477 /* like ftello(ft[i].ft_fp) but faster (avoids lseek syscall) */
478 nfile
[i
][nlen
[i
]].offset
= ft
[i
].ft_pos
;
480 /* EOF counts as a token, so we have to adjust it here */
481 nfile
[i
][nlen
[i
]].offset
++;
487 /* Exclude lone EOF line from the end of the file, to make fetch()'s job easier */
488 if (nfile
[i
][nlen
[i
]].offset
- nfile
[i
][nlen
[i
] - 1].offset
== 1)
490 /* Now we copy the line offsets into ix */
491 ix
[i
] = xmalloc((nlen
[i
] + 2) * sizeof(ix
[i
][0]));
492 for (j
= 0; j
< nlen
[i
] + 1; j
++)
493 ix
[i
][j
] = nfile
[i
][j
].offset
;
496 /* length of prefix and suffix is calculated */
497 for (; pref
< nlen
[0] && pref
< nlen
[1] &&
498 nfile
[0][pref
+ 1].value
== nfile
[1][pref
+ 1].value
;
500 for (; suff
< nlen
[0] - pref
&& suff
< nlen
[1] - pref
&&
501 nfile
[0][nlen
[0] - suff
].value
== nfile
[1][nlen
[1] - suff
].value
;
503 /* Arrays are pruned by the suffix and prefix length,
504 * the result being sorted and stored in sfile[fileno],
505 * and their sizes are stored in slen[fileno]
507 for (j
= 0; j
< 2; j
++) {
508 sfile
[j
] = nfile
[j
] + pref
;
509 slen
[j
] = nlen
[j
] - pref
- suff
;
510 for (i
= 0; i
<= slen
[j
]; i
++)
511 sfile
[j
][i
].serial
= i
;
512 qsort(sfile
[j
] + 1, slen
[j
], sizeof(*sfile
[j
]), line_compar
);
514 /* nfile arrays are reused to reduce memory pressure
515 * The #if zeroed out section performs the same task as the
516 * one in the #else section.
517 * Peak memory usage is higher, but one array copy is avoided
518 * by not using unsort()
521 member
= xmalloc((slen
[1] + 2) * sizeof(member
[0]));
522 equiv(sfile
[0], slen
[0], sfile
[1], slen
[1], member
);
525 class = xmalloc((slen
[0] + 1) * sizeof(class[0]));
526 for (i
= 1; i
<= slen
[0]; i
++) /* Unsorting */
527 class[sfile
[0][i
].serial
] = sfile
[0][i
].value
;
530 member
= (int *)nfile
[1];
531 equiv(sfile
[0], slen
[0], sfile
[1], slen
[1], member
);
532 member
= xrealloc(member
, (slen
[1] + 2) * sizeof(member
[0]));
534 class = (int *)nfile
[0];
535 unsort(sfile
[0], slen
[0], (int *)nfile
[0]);
536 class = xrealloc(class, (slen
[0] + 2) * sizeof(class[0]));
538 J
= xmalloc((nlen
[0] + 2) * sizeof(J
[0]));
539 /* The elements of J which fall inside the prefix and suffix regions
540 * are marked as unchanged, while the ones which fall outside
541 * are initialized with 0 (no matches), so that function stone can
542 * then assign them their right values
544 for (i
= 0, delta
= nlen
[1] - nlen
[0]; i
<= nlen
[0]; i
++)
545 J
[i
] = i
<= pref
? i
:
546 i
> (nlen
[0] - suff
) ? (i
+ delta
) : 0;
547 /* Here the magic is performed */
548 stone(class, slen
[0], member
, J
, pref
);
549 J
[nlen
[0] + 1] = nlen
[1] + 1;
554 /* Both files are rescanned, in an effort to find any lines
555 * which, due to limitations intrinsic to any hashing algorithm,
556 * are different but ended up confounded as the same
558 for (i
= 1; i
<= nlen
[0]; i
++) {
562 seek_ft(&ft
[0], ix
[0][i
- 1]);
563 seek_ft(&ft
[1], ix
[1][J
[i
] - 1]);
565 for (j
= J
[i
]; i
<= nlen
[0] && J
[i
] == j
; i
++, j
++) {
566 token_t tok0
= 0, tok1
= 0;
568 tok0
= read_token(&ft
[0], tok0
);
569 tok1
= read_token(&ft
[1], tok1
);
571 if (((tok0
^ tok1
) & TOK_EMPTY
) != 0 /* one is empty (not both) */
572 || (!(tok0
& TOK_EMPTY
) && TOK2CHAR(tok0
) != TOK2CHAR(tok1
))
574 J
[i
] = 0; /* Break the correspondence */
576 } while (!(tok0
& tok1
& TOK_EMPTY
));
583 static bool diff(FILE* fp
[2], char *file
[2])
587 FILE_and_pos_t ft
[2];
588 typedef struct { int a
, b
; } vec_t
[2];
590 int i
= 1, j
, k
, idx
= -1;
591 bool anychange
= false;
596 /* note that ft[i].ft_pos is unintitalized, create_J()
597 * must not assume otherwise */
598 J
= create_J(ft
, nlen
, ix
);
601 bool nonempty
= false;
606 for (v
[0].a
= i
; v
[0].a
<= nlen
[0] && J
[v
[0].a
] == J
[v
[0].a
- 1] + 1; v
[0].a
++)
608 v
[1].a
= J
[v
[0].a
- 1] + 1;
610 for (v
[0].b
= v
[0].a
- 1; v
[0].b
< nlen
[0] && !J
[v
[0].b
+ 1]; v
[0].b
++)
612 v
[1].b
= J
[v
[0].b
+ 1] - 1;
614 * Indicate that there is a difference between lines a and b of the 'from' file
615 * to get to lines c to d of the 'to' file. If a is greater than b then there
616 * are no lines in the 'from' file involved and this means that there were
617 * lines appended (beginning at b). If c is greater than d then there are
618 * lines missing from the 'to' file.
620 if (v
[0].a
<= v
[0].b
|| v
[1].a
<= v
[1].b
) {
622 * If this change is more than 'context' lines from the
623 * previous change, dump the record and reset it.
625 int ct
= (2 * opt_U_context
) + 1;
627 && v
[0].a
> vec
[idx
][0].b
+ ct
628 && v
[1].a
> vec
[idx
][1].b
+ ct
633 for (j
= 0; j
< 2; j
++)
634 for (k
= v
[j
].a
; k
< v
[j
].b
; k
++)
635 nonempty
|= (ix
[j
][k
+1] - ix
[j
][k
] != 1);
637 vec
= xrealloc_vector(vec
, 6, ++idx
);
638 memcpy(vec
[idx
], v
, sizeof(v
));
646 if (idx
< 0 || ((option_mask32
& FLAG(B
)) && !nonempty
))
648 if (!(option_mask32
& FLAG(q
))) {
650 vec_t span
, *cvp
= vec
;
653 /* Print the context/unidiff header first time through */
654 printf("--- %s\n", label
[0] ? label
[0] : file
[0]);
655 printf("+++ %s\n", label
[1] ? label
[1] : file
[1]);
659 for (j
= 0; j
< 2; j
++) {
660 int a
= span
[j
].a
= MAX(1, (*cvp
)[j
].a
- opt_U_context
);
661 int b
= span
[j
].b
= MIN(nlen
[j
], vec
[idx
][j
].b
+ opt_U_context
);
663 printf(" %c%d", j
? '+' : '-', MIN(a
, b
));
666 printf(",%d", (a
< b
) ? b
- a
+ 1 : 0);
670 * Output changes in "unified" diff format--the old and new lines
671 * are printed together.
673 for (lowa
= span
[0].a
; ; lowa
= (*cvp
++)[0].b
+ 1) {
674 bool end
= cvp
> &vec
[idx
];
675 fetch(&ft
[0], ix
[0], lowa
, end
? span
[0].b
: (*cvp
)[0].a
- 1, ' ');
678 for (j
= 0; j
< 2; j
++)
679 fetch(&ft
[j
], ix
[j
], (*cvp
)[j
].a
, (*cvp
)[j
].b
, j
? '+' : '-');
685 } while (i
<= nlen
[0]);
694 static int diffreg(char *file
[2])
697 bool binary
= false, differ
= false;
698 int status
= STATUS_SAME
, i
;
702 for (i
= 0; i
< 2; i
++) {
703 int fd
= open_or_warn_stdin(file
[i
]);
706 /* Our diff implementation is using seek.
707 * When we meet non-seekable file, we must make a temp copy.
709 if (lseek(fd
, 0, SEEK_SET
) == -1 && errno
== ESPIPE
) {
710 char name
[] = "/tmp/difXXXXXX";
711 int fd_tmp
= xmkstemp(name
);
714 if (bb_copyfd_eof(fd
, fd_tmp
) < 0)
716 if (fd
) /* Prevents closing of stdin */
720 fp
[i
] = fdopen(fd
, "r");
724 const size_t sz
= COMMON_BUFSIZE
/ 2;
725 char *const buf0
= bb_common_bufsiz1
;
726 char *const buf1
= buf0
+ sz
;
728 i
= fread(buf0
, 1, sz
, fp
[0]);
729 j
= fread(buf1
, 1, sz
, fp
[1]);
736 for (k
= 0; k
< i
; k
++) {
737 if (!buf0
[k
] || !buf1
[k
])
739 if (buf0
[k
] != buf1
[k
])
744 if (binary
&& !(option_mask32
& FLAG(a
)))
745 status
= STATUS_BINARY
;
746 else if (diff(fp
, file
))
747 status
= STATUS_DIFFER
;
749 if (status
!= STATUS_SAME
)
752 fclose_if_not_stdin(fp
[0]);
753 fclose_if_not_stdin(fp
[1]);
758 static void print_status(int status
, char *path
[2])
763 if ((option_mask32
& FLAG(q
)) || status
== STATUS_BINARY
)
764 printf("Files %s and %s differ\n", path
[0], path
[1]);
767 if (option_mask32
& FLAG(s
))
768 printf("Files %s and %s are identical\n", path
[0], path
[1]);
773 #if ENABLE_FEATURE_DIFF_DIR
780 /* This function adds a filename to dl, the directory listing. */
781 static int FAST_FUNC
add_to_dirlist(const char *filename
,
782 struct stat
*sb UNUSED_PARAM
,
783 void *userdata
, int depth UNUSED_PARAM
)
785 struct dlist
*const l
= userdata
;
786 const char *file
= filename
+ l
->len
;
789 l
->dl
= xrealloc_vector(l
->dl
, 6, l
->e
);
790 l
->dl
[l
->e
] = xstrdup(file
);
795 /* If recursion is not set, this function adds the directory
796 * to the list and prevents recursive_action from recursing into it.
798 static int FAST_FUNC
skip_dir(const char *filename
,
799 struct stat
*sb
, void *userdata
,
802 if (!(option_mask32
& FLAG(r
)) && depth
) {
803 add_to_dirlist(filename
, sb
, userdata
, depth
);
806 if (!(option_mask32
& FLAG(N
))) {
807 /* -r without -N: no need to recurse into dirs
808 * which do not exist on the "other side".
809 * Testcase: diff -r /tmp /
810 * (it would recurse deep into /proc without this code) */
811 struct dlist
*const l
= userdata
;
815 char *othername
= concat_path_file(G
.other_dir
, filename
);
816 int r
= stat(othername
, &osb
);
818 if (r
!= 0 || !S_ISDIR(osb
.st_mode
)) {
819 /* other dir doesn't have similarly named
820 * directory, don't recurse; return 1 upon
821 * exit, just like diffutils' diff */
830 static void diffdir(char *p
[2], const char *s_start
)
832 struct dlist list
[2];
835 memset(&list
, 0, sizeof(list
));
836 for (i
= 0; i
< 2; i
++) {
837 /*list[i].s = list[i].e = 0; - memset did it */
838 /*list[i].dl = NULL; */
840 G
.other_dir
= p
[1 - i
];
841 /* We need to trim root directory prefix.
842 * Using list.len to specify its length,
843 * add_to_dirlist will remove it. */
844 list
[i
].len
= strlen(p
[i
]);
845 recursive_action(p
[i
], ACTION_RECURSE
| ACTION_FOLLOWLINKS
,
846 add_to_dirlist
, skip_dir
, &list
[i
], 0);
847 /* Sort dl alphabetically.
848 * GNU diff does this ignoring any number of trailing dots.
849 * We don't, so for us dotted files almost always are
852 qsort_string_vector(list
[i
].dl
, list
[i
].e
);
853 /* If -S was set, find the starting point. */
856 while (list
[i
].s
< list
[i
].e
&& strcmp(list
[i
].dl
[list
[i
].s
], s_start
) < 0)
859 /* Now that both dirlist1 and dirlist2 contain sorted directory
860 * listings, we can start to go through dirlist1. If both listings
861 * contain the same file, then do a normal diff. Otherwise, behaviour
862 * is determined by whether the -N flag is set. */
868 dp
[0] = list
[0].s
< list
[0].e
? list
[0].dl
[list
[0].s
] : NULL
;
869 dp
[1] = list
[1].s
< list
[1].e
? list
[1].dl
[list
[1].s
] : NULL
;
870 if (!dp
[0] && !dp
[1])
872 pos
= !dp
[0] ? 1 : (!dp
[1] ? -1 : strcmp(dp
[0], dp
[1]));
874 if (pos
&& !(option_mask32
& FLAG(N
))) {
875 printf("Only in %s: %s\n", p
[k
], dp
[k
]);
878 char *fullpath
[2], *path
[2]; /* if -N */
880 for (i
= 0; i
< 2; i
++) {
881 if (pos
== 0 || i
== k
) {
882 path
[i
] = fullpath
[i
] = concat_path_file(p
[i
], dp
[i
]);
883 stat(fullpath
[i
], &stb
[i
]);
885 fullpath
[i
] = concat_path_file(p
[i
], dp
[1 - i
]);
886 path
[i
] = (char *)bb_dev_null
;
890 stat(fullpath
[k
], &stb
[1 - k
]);
892 if (S_ISDIR(stb
[0].st_mode
) && S_ISDIR(stb
[1].st_mode
))
893 printf("Common subdirectories: %s and %s\n", fullpath
[0], fullpath
[1]);
894 else if (!S_ISREG(stb
[0].st_mode
) && !S_ISDIR(stb
[0].st_mode
))
895 printf("File %s is not a regular file or directory and was skipped\n", fullpath
[0]);
896 else if (!S_ISREG(stb
[1].st_mode
) && !S_ISDIR(stb
[1].st_mode
))
897 printf("File %s is not a regular file or directory and was skipped\n", fullpath
[1]);
898 else if (S_ISDIR(stb
[0].st_mode
) != S_ISDIR(stb
[1].st_mode
)) {
899 if (S_ISDIR(stb
[0].st_mode
))
900 printf("File %s is a %s while file %s is a %s\n", fullpath
[0], "directory", fullpath
[1], "regular file");
902 printf("File %s is a %s while file %s is a %s\n", fullpath
[0], "regular file", fullpath
[1], "directory");
904 print_status(diffreg(path
), fullpath
);
916 if (ENABLE_FEATURE_CLEAN_UP
) {
923 #if ENABLE_FEATURE_DIFF_LONG_OPTIONS
924 static const char diff_longopts
[] ALIGN1
=
925 "ignore-case\0" No_argument
"i"
926 "ignore-tab-expansion\0" No_argument
"E"
927 "ignore-space-change\0" No_argument
"b"
928 "ignore-all-space\0" No_argument
"w"
929 "ignore-blank-lines\0" No_argument
"B"
930 "text\0" No_argument
"a"
931 "unified\0" Required_argument
"U"
932 "label\0" Required_argument
"L"
933 "show-c-function\0" No_argument
"p"
934 "brief\0" No_argument
"q"
935 "expand-tabs\0" No_argument
"t"
936 "initial-tab\0" No_argument
"T"
937 "recursive\0" No_argument
"r"
938 "new-file\0" No_argument
"N"
939 "report-identical-files\0" No_argument
"s"
940 "starting-file\0" Required_argument
"S"
941 "minimal\0" No_argument
"d"
945 int diff_main(int argc
, char **argv
) MAIN_EXTERNALLY_VISIBLE
;
946 int diff_main(int argc UNUSED_PARAM
, char **argv
)
949 char *file
[2], *s_start
= NULL
;
950 llist_t
*L_arg
= NULL
;
954 /* exactly 2 params; collect multiple -L <label>; -U N */
955 opt_complementary
= "=2:L::U+";
956 #if ENABLE_FEATURE_DIFF_LONG_OPTIONS
957 applet_long_options
= diff_longopts
;
959 getopt32(argv
, "abdiL:NqrsS:tTU:wupBE",
960 &L_arg
, &s_start
, &opt_U_context
);
963 label
[!!label
[0]] = llist_pop(&L_arg
);
964 xfunc_error_retval
= 2;
965 for (i
= 0; i
< 2; i
++) {
967 /* Compat: "diff file name_which_doesnt_exist" exits with 2 */
968 if (LONE_DASH(file
[i
])) {
969 fstat(STDIN_FILENO
, &stb
[i
]);
972 xstat(file
[i
], &stb
[i
]);
974 xfunc_error_retval
= 1;
975 if (gotstdin
&& (S_ISDIR(stb
[0].st_mode
) || S_ISDIR(stb
[1].st_mode
)))
976 bb_error_msg_and_die("can't compare stdin to a directory");
978 /* Compare metadata to check if the files are the same physical file.
980 * Comment from diffutils source says:
981 * POSIX says that two files are identical if st_ino and st_dev are
982 * the same, but many file systems incorrectly assign the same (device,
983 * inode) pair to two distinct files, including:
984 * GNU/Linux NFS servers that export all local file systems as a
985 * single NFS file system, if a local device number (st_dev) exceeds
986 * 255, or if a local inode number (st_ino) exceeds 16777215.
989 && stb
[0].st_ino
== stb
[1].st_ino
990 && stb
[0].st_dev
== stb
[1].st_dev
991 && stb
[0].st_size
== stb
[1].st_size
992 && stb
[0].st_mtime
== stb
[1].st_mtime
993 && stb
[0].st_ctime
== stb
[1].st_ctime
994 && stb
[0].st_mode
== stb
[1].st_mode
995 && stb
[0].st_nlink
== stb
[1].st_nlink
996 && stb
[0].st_uid
== stb
[1].st_uid
997 && stb
[0].st_gid
== stb
[1].st_gid
999 /* files are physically the same; no need to compare them */
1003 if (S_ISDIR(stb
[0].st_mode
) && S_ISDIR(stb
[1].st_mode
)) {
1004 #if ENABLE_FEATURE_DIFF_DIR
1005 diffdir(file
, s_start
);
1007 bb_error_msg_and_die("no support for directory comparison");
1010 bool dirfile
= S_ISDIR(stb
[0].st_mode
) || S_ISDIR(stb
[1].st_mode
);
1011 bool dir
= S_ISDIR(stb
[1].st_mode
);
1013 const char *slash
= strrchr(file
[!dir
], '/');
1014 file
[dir
] = concat_path_file(file
[dir
], slash
? slash
+ 1 : file
[!dir
]);
1015 xstat(file
[dir
], &stb
[dir
]);
1017 /* diffreg can get non-regular files here */
1018 print_status(gotstdin
> 1 ? STATUS_SAME
: diffreg(file
), file
);