2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
19 * Davide Libenzi <davidel@xmailserver.org>
25 * The basic idea of patience diff is to find lines that are unique in
26 * both files. These are intuitively the ones that we want to see as
29 * The maximal ordered sequence of such line pairs (where ordered means
30 * that the order in the sequence agrees with the order of the lines in
31 * both files) naturally defines an initial set of common lines.
33 * Now, the algorithm tries to extend the set of common lines by growing
34 * the line ranges where the files have identical lines.
36 * Between those common lines, the patience diff algorithm is applied
37 * recursively, until no unique line pairs can be found; these line ranges
38 * are handled by the well-known Myers algorithm.
41 #define NON_UNIQUE ULONG_MAX
44 * This is a hash mapping from line hash to line numbers in the first and
52 * 0 = unused entry, 1 = first line, 2 = second, etc.
53 * line2 is NON_UNIQUE if the line is not unique
54 * in either the first or the second file.
56 unsigned long line1
, line2
;
58 * "next" & "previous" are used for the longest common
60 * initially, "next" reflects only the order in file1.
62 struct entry
*next
, *previous
;
65 * If 1, this entry can serve as an anchor. See
66 * Documentation/diff-options.txt for more information.
69 } *entries
, *first
, *last
;
70 /* were common records found? */
71 unsigned long has_matches
;
72 mmfile_t
*file1
, *file2
;
77 static int is_anchor(xpparam_t
const *xpp
, const char *line
)
80 for (i
= 0; i
< xpp
->anchors_nr
; i
++) {
81 if (!strncmp(line
, xpp
->anchors
[i
], strlen(xpp
->anchors
[i
])))
87 /* The argument "pass" is 1 for the first file, 2 for the second. */
88 static void insert_record(xpparam_t
const *xpp
, int line
, struct hashmap
*map
,
91 xrecord_t
**records
= pass
== 1 ?
92 map
->env
->xdf1
.recs
: map
->env
->xdf2
.recs
;
93 xrecord_t
*record
= records
[line
- 1];
95 * After xdl_prepare_env() (or more precisely, due to
96 * xdl_classify_record()), the "ha" member of the records (AKA lines)
97 * is _not_ the hash anymore, but a linearized version of it. In
98 * other words, the "ha" member is guaranteed to start with 0 and
99 * the second record's ha can only be 0 or 1, etc.
101 * So we multiply ha by 2 in the hope that the hashing was
104 int index
= (int)((record
->ha
<< 1) % map
->alloc
);
106 while (map
->entries
[index
].line1
) {
107 if (map
->entries
[index
].hash
!= record
->ha
) {
108 if (++index
>= map
->alloc
)
113 map
->has_matches
= 1;
114 if (pass
== 1 || map
->entries
[index
].line2
)
115 map
->entries
[index
].line2
= NON_UNIQUE
;
117 map
->entries
[index
].line2
= line
;
122 map
->entries
[index
].line1
= line
;
123 map
->entries
[index
].hash
= record
->ha
;
124 map
->entries
[index
].anchor
= is_anchor(xpp
, map
->env
->xdf1
.recs
[line
- 1]->ptr
);
126 map
->first
= map
->entries
+ index
;
128 map
->last
->next
= map
->entries
+ index
;
129 map
->entries
[index
].previous
= map
->last
;
131 map
->last
= map
->entries
+ index
;
136 * This function has to be called for each recursion into the inter-hunk
137 * parts, as previously non-unique lines can become unique when being
138 * restricted to a smaller part of the files.
140 * It is assumed that env has been prepared using xdl_prepare().
142 static int fill_hashmap(mmfile_t
*file1
, mmfile_t
*file2
,
143 xpparam_t
const *xpp
, xdfenv_t
*env
,
144 struct hashmap
*result
,
145 int line1
, int count1
, int line2
, int count2
)
147 result
->file1
= file1
;
148 result
->file2
= file2
;
152 /* We know exactly how large we want the hash map */
153 result
->alloc
= count1
* 2;
154 result
->entries
= (struct entry
*)
155 xdl_malloc(result
->alloc
* sizeof(struct entry
));
156 if (!result
->entries
)
158 memset(result
->entries
, 0, result
->alloc
* sizeof(struct entry
));
160 /* First, fill with entries from the first file */
162 insert_record(xpp
, line1
++, result
, 1);
164 /* Then search for matches in the second file */
166 insert_record(xpp
, line2
++, result
, 2);
172 * Find the longest sequence with a smaller last element (meaning a smaller
173 * line2, as we construct the sequence with entries ordered by line1).
175 static int binary_search(struct entry
**sequence
, int longest
,
178 int left
= -1, right
= longest
;
180 while (left
+ 1 < right
) {
181 int middle
= left
+ (right
- left
) / 2;
182 /* by construction, no two entries can be equal */
183 if (sequence
[middle
]->line2
> entry
->line2
)
188 /* return the index in "sequence", _not_ the sequence length */
193 * The idea is to start with the list of common unique lines sorted by
194 * the order in file1. For each of these pairs, the longest (partial)
195 * sequence whose last element's line2 is smaller is determined.
197 * For efficiency, the sequences are kept in a list containing exactly one
198 * item per sequence length: the sequence with the smallest last
199 * element (in terms of line2).
201 static struct entry
*find_longest_common_sequence(struct hashmap
*map
)
203 struct entry
**sequence
= xdl_malloc(map
->nr
* sizeof(struct entry
*));
208 * If not -1, this entry in sequence must never be overridden.
209 * Therefore, overriding entries before this has no effect, so
210 * do not do that either.
214 for (entry
= map
->first
; entry
; entry
= entry
->next
) {
215 if (!entry
->line2
|| entry
->line2
== NON_UNIQUE
)
217 i
= binary_search(sequence
, longest
, entry
);
218 entry
->previous
= i
< 0 ? NULL
: sequence
[i
];
225 longest
= anchor_i
+ 1;
226 } else if (i
== longest
) {
231 /* No common unique lines were found */
237 /* Iterate starting at the last element, adjusting the "next" members */
238 entry
= sequence
[longest
- 1];
240 while (entry
->previous
) {
241 entry
->previous
->next
= entry
;
242 entry
= entry
->previous
;
248 static int match(struct hashmap
*map
, int line1
, int line2
)
250 xrecord_t
*record1
= map
->env
->xdf1
.recs
[line1
- 1];
251 xrecord_t
*record2
= map
->env
->xdf2
.recs
[line2
- 1];
252 return record1
->ha
== record2
->ha
;
255 static int patience_diff(mmfile_t
*file1
, mmfile_t
*file2
,
256 xpparam_t
const *xpp
, xdfenv_t
*env
,
257 int line1
, int count1
, int line2
, int count2
);
259 static int walk_common_sequence(struct hashmap
*map
, struct entry
*first
,
260 int line1
, int count1
, int line2
, int count2
)
262 int end1
= line1
+ count1
, end2
= line2
+ count2
;
266 /* Try to grow the line ranges of common lines */
268 next1
= first
->line1
;
269 next2
= first
->line2
;
270 while (next1
> line1
&& next2
> line2
&&
271 match(map
, next1
- 1, next2
- 1)) {
279 while (line1
< next1
&& line2
< next2
&&
280 match(map
, line1
, line2
)) {
286 if (next1
> line1
|| next2
> line2
) {
287 if (patience_diff(map
->file1
, map
->file2
,
289 line1
, next1
- line1
,
290 line2
, next2
- line2
))
297 while (first
->next
&&
298 first
->next
->line1
== first
->line1
+ 1 &&
299 first
->next
->line2
== first
->line2
+ 1)
302 line1
= first
->line1
+ 1;
303 line2
= first
->line2
+ 1;
309 static int fall_back_to_classic_diff(struct hashmap
*map
,
310 int line1
, int count1
, int line2
, int count2
)
314 memset(&xpp
, 0, sizeof(xpp
));
315 xpp
.flags
= map
->xpp
->flags
& ~XDF_DIFF_ALGORITHM_MASK
;
317 return xdl_fall_back_diff(map
->env
, &xpp
,
318 line1
, count1
, line2
, count2
);
322 * Recursively find the longest common sequence of unique lines,
323 * and if none was found, ask xdl_do_diff() to do the job.
325 * This function assumes that env was prepared with xdl_prepare_env().
327 static int patience_diff(mmfile_t
*file1
, mmfile_t
*file2
,
328 xpparam_t
const *xpp
, xdfenv_t
*env
,
329 int line1
, int count1
, int line2
, int count2
)
335 /* trivial case: one side is empty */
338 env
->xdf2
.rchg
[line2
++ - 1] = 1;
340 } else if (!count2
) {
342 env
->xdf1
.rchg
[line1
++ - 1] = 1;
346 memset(&map
, 0, sizeof(map
));
347 if (fill_hashmap(file1
, file2
, xpp
, env
, &map
,
348 line1
, count1
, line2
, count2
))
351 /* are there any matching lines at all? */
352 if (!map
.has_matches
) {
354 env
->xdf1
.rchg
[line1
++ - 1] = 1;
356 env
->xdf2
.rchg
[line2
++ - 1] = 1;
357 xdl_free(map
.entries
);
361 first
= find_longest_common_sequence(&map
);
363 result
= walk_common_sequence(&map
, first
,
364 line1
, count1
, line2
, count2
);
366 result
= fall_back_to_classic_diff(&map
,
367 line1
, count1
, line2
, count2
);
369 xdl_free(map
.entries
);
373 int xdl_do_patience_diff(mmfile_t
*file1
, mmfile_t
*file2
,
374 xpparam_t
const *xpp
, xdfenv_t
*env
)
376 if (xdl_prepare_env(file1
, file2
, xpp
, env
) < 0)
379 /* environment is cleaned up in xdl_diff() */
380 return patience_diff(file1
, file2
, xpp
, env
,
381 1, env
->xdf1
.nrec
, 1, env
->xdf2
.nrec
);