Success build TortoiseMerge.
[TortoiseGit.git] / src / TortoiseMerge / libsvn_diff / util.c
blob8d9ccc41229cc1bece4eb8322c131c6739231f0f
1 /*
2 * util.c : routines for doing diffs
4 * ====================================================================
5 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
20 #include <apr.h>
21 #include <apr_general.h>
23 #include "svn_error.h"
24 #include "svn_version.h"
25 #include "svn_io.h"
26 #include "svn_types.h"
27 #include "svn_ctype.h"
29 #include "diff.h"
30 #include "svn_diff.h"
31 /**
32 * An Adler-32 implementation per RFC1950.
34 * "The Adler-32 algorithm is much faster than the CRC32 algorithm yet
35 * still provides an extremely low probability of undetected errors"
39 * 65521 is the largest prime less than 65536.
40 * "That 65521 is prime is important to avoid a possible large class of
41 * two-byte errors that leave the check unchanged."
43 #define ADLER_MOD_BASE 65521
46 * "The modulo on unsigned long accumulators can be delayed for 5552 bytes,
47 * so the modulo operation time is negligible."
49 #define ADLER_MOD_BLOCK_SIZE 5552
53 * Start with CHECKSUM and update the checksum by processing a chunk
54 * of DATA sized LEN.
56 apr_uint32_t
57 svn_diff__adler32(apr_uint32_t checksum, const char *data, apr_size_t len)
59 const unsigned char *input = (const unsigned char *)data;
60 apr_uint32_t s1 = checksum & 0xFFFF;
61 apr_uint32_t s2 = checksum >> 16;
62 apr_uint32_t b;
63 apr_size_t blocks = len / ADLER_MOD_BLOCK_SIZE;
65 len %= ADLER_MOD_BLOCK_SIZE;
67 while (blocks--)
69 int count = ADLER_MOD_BLOCK_SIZE;
70 while (count--)
72 b = *input++;
73 s1 += b;
74 s2 += s1;
77 s1 %= ADLER_MOD_BASE;
78 s2 %= ADLER_MOD_BASE;
81 while (len--)
83 b = *input++;
84 s1 += b;
85 s2 += s1;
88 return ((s2 % ADLER_MOD_BASE) << 16) | (s1 % ADLER_MOD_BASE);
92 svn_boolean_t
93 svn_diff_contains_conflicts(svn_diff_t *diff)
95 while (diff != NULL)
97 if (diff->type == svn_diff__type_conflict)
99 return TRUE;
102 diff = diff->next;
105 return FALSE;
108 svn_boolean_t
109 svn_diff_contains_diffs(svn_diff_t *diff)
111 while (diff != NULL)
113 if (diff->type != svn_diff__type_common)
115 return TRUE;
118 diff = diff->next;
121 return FALSE;
124 svn_error_t *
125 svn_diff_output(svn_diff_t *diff,
126 void *output_baton,
127 const svn_diff_output_fns_t *vtable)
129 svn_error_t *(*output_fn)(void *,
130 apr_off_t, apr_off_t,
131 apr_off_t, apr_off_t,
132 apr_off_t, apr_off_t);
134 while (diff != NULL)
136 switch (diff->type)
138 case svn_diff__type_common:
139 output_fn = vtable->output_common;
140 break;
142 case svn_diff__type_diff_common:
143 output_fn = vtable->output_diff_common;
144 break;
146 case svn_diff__type_diff_modified:
147 output_fn = vtable->output_diff_modified;
148 break;
150 case svn_diff__type_diff_latest:
151 output_fn = vtable->output_diff_latest;
152 break;
154 case svn_diff__type_conflict:
155 output_fn = NULL;
156 if (vtable->output_conflict != NULL)
158 SVN_ERR(vtable->output_conflict(output_baton,
159 diff->original_start, diff->original_length,
160 diff->modified_start, diff->modified_length,
161 diff->latest_start, diff->latest_length,
162 diff->resolved_diff));
164 break;
166 default:
167 output_fn = NULL;
168 break;
171 if (output_fn != NULL)
173 SVN_ERR(output_fn(output_baton,
174 diff->original_start, diff->original_length,
175 diff->modified_start, diff->modified_length,
176 diff->latest_start, diff->latest_length));
179 diff = diff->next;
182 return SVN_NO_ERROR;
186 void
187 svn_diff__normalize_buffer(char **tgt,
188 apr_off_t *lengthp,
189 svn_diff__normalize_state_t *statep,
190 const char *buf,
191 const svn_diff_file_options_t *opts)
193 /* Variables for looping through BUF */
194 const char *curp, *endp;
196 /* Variable to record normalizing state */
197 svn_diff__normalize_state_t state = *statep;
199 /* Variables to track what needs copying into the target buffer */
200 const char *start = buf;
201 apr_size_t include_len = 0;
202 svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
204 /* Variable to record the state of the target buffer */
205 char *tgt_newend = *tgt;
207 /* If this is a noop, then just get out of here. */
208 if (! opts->ignore_space && ! opts->ignore_eol_style)
210 *tgt = (char *)buf;
211 return;
215 /* It only took me forever to get this routine right,
216 so here my thoughts go:
218 Below, we loop through the data, doing 2 things:
220 - Normalizing
221 - Copying other data
223 The routine tries its hardest *not* to copy data, but instead
224 returning a pointer into already normalized existing data.
226 To this end, a block 'other data' shouldn't be copied when found,
227 but only as soon as it can't be returned in-place.
229 On a character level, there are 3 possible operations:
231 - Skip the character (don't include in the normalized data)
232 - Include the character (do include in the normalizad data)
233 - Include as another character
234 This is essentially the same as skipping the current character
235 and inserting a given character in the output data.
237 The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
238 handle the character based operations. The macros themselves
239 collect character level data into blocks.
241 At all times designate the START, INCLUDED_LEN and CURP pointers
242 an included and and skipped block like this:
244 [ start, start + included_len ) [ start + included_len, curp )
245 INCLUDED EXCLUDED
247 When the routine flips from skipping to including, the last
248 included block has to be flushed to the output buffer.
251 /* Going from including to skipping; only schedules the current
252 included section for flushing.
253 Also, simply chop off the character if it's the first in the buffer,
254 so we can possibly just return the remainder of the buffer */
255 #define SKIP \
256 do { \
257 if (start == curp) \
258 ++start; \
259 last_skipped = TRUE; \
260 } while (0)
262 #define INCLUDE \
263 do { \
264 if (last_skipped) \
265 COPY_INCLUDED_SECTION; \
266 ++include_len; \
267 last_skipped = FALSE; \
268 } while (0)
270 #define COPY_INCLUDED_SECTION \
271 do { \
272 if (include_len > 0) \
274 memmove(tgt_newend, start, include_len); \
275 tgt_newend += include_len; \
276 include_len = 0; \
278 start = curp; \
279 } while (0)
281 /* Include the current character as character X.
282 If the current character already *is* X, add it to the
283 currently included region, increasing chances for consecutive
284 fully normalized blocks. */
285 #define INCLUDE_AS(x) \
286 do { \
287 if (*curp == (x)) \
288 INCLUDE; \
289 else \
291 INSERT((x)); \
292 SKIP; \
294 } while (0)
296 /* Insert character X in the output buffer */
297 #define INSERT(x) \
298 do { \
299 COPY_INCLUDED_SECTION; \
300 *tgt_newend++ = (x); \
301 } while (0)
303 for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
305 switch (*curp)
307 case '\r':
308 if (opts->ignore_eol_style)
309 INCLUDE_AS('\n');
310 else
311 INCLUDE;
312 state = svn_diff__normalize_state_cr;
313 break;
315 case '\n':
316 if (state == svn_diff__normalize_state_cr
317 && opts->ignore_eol_style)
318 SKIP;
319 else
320 INCLUDE;
321 state = svn_diff__normalize_state_normal;
322 break;
324 default:
325 if (svn_ctype_isspace(*curp)
326 && opts->ignore_space != svn_diff_file_ignore_space_none)
328 /* Whitespace but not '\r' or '\n' */
329 if (state != svn_diff__normalize_state_whitespace
330 && opts->ignore_space
331 == svn_diff_file_ignore_space_change)
332 /*### If we can postpone insertion of the space
333 until the next non-whitespace character,
334 we have a potential of reducing the number of copies:
335 If this space is followed by more spaces,
336 this will cause a block-copy.
337 If the next non-space block is considered normalized
338 *and* preceded by a space, we can take advantage of that. */
339 /* Note, the above optimization applies to 90% of the source
340 lines in our own code, since it (generally) doesn't use
341 more than one space per blank section, except for the
342 beginning of a line. */
343 INCLUDE_AS(' ');
344 else
345 SKIP;
346 state = svn_diff__normalize_state_whitespace;
348 else
350 /* Non-whitespace character, or whitespace character in
351 svn_diff_file_ignore_space_none mode. */
352 INCLUDE;
353 state = svn_diff__normalize_state_normal;
358 /* If we're not in whitespace, flush the last chunk of data.
359 * Note that this will work correctly when this is the last chunk of the
360 * file:
361 * * If there is an eol, it will either have been output when we entered
362 * the state_cr, or it will be output now.
363 * * If there is no eol and we're not in whitespace, then we just output
364 * everything below.
365 * * If there's no eol and we are in whitespace, we want to ignore
366 * whitespace unconditionally. */
368 if (*tgt == tgt_newend)
370 /* we haven't copied any data in to *tgt and our chunk consists
371 only of one block of (already normalized) data.
372 Just return the block. */
373 *tgt = (char *)start;
374 *lengthp = include_len;
376 else
378 COPY_INCLUDED_SECTION;
379 *lengthp = tgt_newend - *tgt;
382 *statep = state;
384 #undef SKIP
385 #undef INCLUDE
386 #undef INCLUDE_AS
387 #undef INSERT
388 #undef COPY_INCLUDED_SECTION
392 /* Return the library version number. */
393 const svn_version_t *
394 svn_diff_version(void)
396 SVN_VERSION_BODY;