src/TortoiseMerge/libsvn_diff/util.c

   1 /*
   2  * util.c :  routines for doing diffs
   3  *
   4  * ====================================================================
   5  * Copyright (c) 2000-2004 CollabNet.  All rights reserved.
   6  *
   7  * This software is licensed as described in the file COPYING, which
   8  * you should have received as part of this distribution.  The terms
   9  * are also available at http://subversion.tigris.org/license-1.html.
  10  * If newer versions of this license are posted there, you may use a
  11  * newer version instead, at your option.
  12  *
  13  * This software consists of voluntary contributions made by many
  14  * individuals.  For exact contribution history, see the revision
  15  * history and logs, available at http://subversion.tigris.org/.
  16  * ====================================================================
  17  */
  18
  19
  20 #include <apr.h>
  21 #include <apr_general.h>
  22
  23 #include "svn_error.h"
  24 #include "svn_version.h"
  25 #include "svn_io.h"
  26 #include "svn_types.h"
  27 #include "svn_ctype.h"
  28
  29 #include "diff.h"
  30 #include "svn_diff.h"
  31 /**
  32  * An Adler-32 implementation per RFC1950.
  33  *
  34  * "The Adler-32 algorithm is much faster than the CRC32 algorithm yet
  35  * still provides an extremely low probability of undetected errors"
  36  */
  37
  38 /*
  39  * 65521 is the largest prime less than 65536.
  40  * "That 65521 is prime is important to avoid a possible large class of
  41  *  two-byte errors that leave the check unchanged."
  42  */
  43 #define ADLER_MOD_BASE 65521
  44
  45 /*
  46  * "The modulo on unsigned long accumulators can be delayed for 5552 bytes,
  47  *  so the modulo operation time is negligible."
  48  */
  49 #define ADLER_MOD_BLOCK_SIZE 5552
  50
  51
  52 /*
  53  * Start with CHECKSUM and update the checksum by processing a chunk
  54  * of DATA sized LEN.
  55  */
  56 apr_uint32_t
  57 svn_diff__adler32(apr_uint32_t checksum, const char *data, apr_size_t len)
  58 {
  59   const unsigned char *input = (const unsigned char *)data;
  60   apr_uint32_t s1 = checksum & 0xFFFF;
  61   apr_uint32_t s2 = checksum >> 16;
  62   apr_uint32_t b;
  63   apr_size_t blocks = len / ADLER_MOD_BLOCK_SIZE;
  64
  65   len %= ADLER_MOD_BLOCK_SIZE;
  66
  67   while (blocks--)
  68     {
  69       int count = ADLER_MOD_BLOCK_SIZE;
  70       while (count--)
  71         {
  72           b = *input++;
  73           s1 += b;
  74           s2 += s1;
  75         }
  76
  77       s1 %= ADLER_MOD_BASE;
  78       s2 %= ADLER_MOD_BASE;
  79     }
  80
  81   while (len--)
  82     {
  83       b = *input++;
  84       s1 += b;
  85       s2 += s1;
  86     }
  87
  88   return ((s2 % ADLER_MOD_BASE) << 16) | (s1 % ADLER_MOD_BASE);
  89 }
  90
  91
  92 svn_boolean_t
  93 svn_diff_contains_conflicts(svn_diff_t *diff)
  94 {
  95   while (diff != NULL)
  96     {
  97       if (diff->type == svn_diff__type_conflict)
  98         {
  99           return TRUE;
 100         }
 101
 102       diff = diff->next;
 103     }
 104
 105   return FALSE;
 106 }
 107
 108 svn_boolean_t
 109 svn_diff_contains_diffs(svn_diff_t *diff)
 110 {
 111   while (diff != NULL)
 112     {
 113       if (diff->type != svn_diff__type_common)
 114         {
 115           return TRUE;
 116         }
 117
 118       diff = diff->next;
 119     }
 120
 121   return FALSE;
 122 }
 123
 124 svn_error_t *
 125 svn_diff_output(svn_diff_t *diff,
 126                 void *output_baton,
 127                 const svn_diff_output_fns_t *vtable)
 128 {
 129   svn_error_t *(*output_fn)(void *,
 130                             apr_off_t, apr_off_t,
 131                             apr_off_t, apr_off_t,
 132                             apr_off_t, apr_off_t);
 133
 134   while (diff != NULL)
 135     {
 136       switch (diff->type)
 137         {
 138         case svn_diff__type_common:
 139           output_fn = vtable->output_common;
 140           break;
 141
 142         case svn_diff__type_diff_common:
 143           output_fn = vtable->output_diff_common;
 144           break;
 145
 146         case svn_diff__type_diff_modified:
 147           output_fn = vtable->output_diff_modified;
 148           break;
 149
 150         case svn_diff__type_diff_latest:
 151           output_fn = vtable->output_diff_latest;
 152           break;
 153
 154         case svn_diff__type_conflict:
 155           output_fn = NULL;
 156           if (vtable->output_conflict != NULL)
 157             {
 158               SVN_ERR(vtable->output_conflict(output_baton,
 159                                diff->original_start, diff->original_length,
 160                                diff->modified_start, diff->modified_length,
 161                                diff->latest_start, diff->latest_length,
 162                                diff->resolved_diff));
 163             }
 164           break;
 165
 166         default:
 167           output_fn = NULL;
 168           break;
 169         }
 170
 171       if (output_fn != NULL)
 172         {
 173           SVN_ERR(output_fn(output_baton,
 174                             diff->original_start, diff->original_length,
 175                             diff->modified_start, diff->modified_length,
 176                             diff->latest_start, diff->latest_length));
 177         }
 178
 179       diff = diff->next;
 180     }
 181
 182   return SVN_NO_ERROR;
 183 }
 184
 185
 186 void
 187 svn_diff__normalize_buffer(char **tgt,
 188                            apr_off_t *lengthp,
 189                            svn_diff__normalize_state_t *statep,
 190                            const char *buf,
 191                            const svn_diff_file_options_t *opts)
 192 {
 193   /* Variables for looping through BUF */
 194   const char *curp, *endp;
 195
 196   /* Variable to record normalizing state */
 197   svn_diff__normalize_state_t state = *statep;
 198
 199   /* Variables to track what needs copying into the target buffer */
 200   const char *start = buf;
 201   apr_size_t include_len = 0;
 202   svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
 203
 204   /* Variable to record the state of the target buffer */
 205   char *tgt_newend = *tgt;
 206
 207   /* If this is a noop, then just get out of here. */
 208   if (! opts->ignore_space && ! opts->ignore_eol_style)
 209     {
 210       *tgt = (char *)buf;
 211       return;
 212     }
 213
 214
 215   /* It only took me forever to get this routine right,
 216      so here my thoughts go:
 217
 218     Below, we loop through the data, doing 2 things:
 219
 220      - Normalizing
 221      - Copying other data
 222
 223      The routine tries its hardest *not* to copy data, but instead
 224      returning a pointer into already normalized existing data.
 225
 226      To this end, a block 'other data' shouldn't be copied when found,
 227      but only as soon as it can't be returned in-place.
 228
 229      On a character level, there are 3 possible operations:
 230
 231      - Skip the character (don't include in the normalized data)
 232      - Include the character (do include in the normalizad data)
 233      - Include as another character
 234        This is essentially the same as skipping the current character
 235        and inserting a given character in the output data.
 236
 237     The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
 238     handle the character based operations.  The macros themselves
 239     collect character level data into blocks.
 240
 241     At all times designate the START, INCLUDED_LEN and CURP pointers
 242     an included and and skipped block like this:
 243
 244       [ start, start + included_len ) [ start + included_len, curp )
 245              INCLUDED                        EXCLUDED
 246
 247     When the routine flips from skipping to including, the last
 248     included block has to be flushed to the output buffer.
 249   */
 250
 251   /* Going from including to skipping; only schedules the current
 252      included section for flushing.
 253      Also, simply chop off the character if it's the first in the buffer,
 254      so we can possibly just return the remainder of the buffer */
 255 #define SKIP             \
 256   do {                   \
 257     if (start == curp)   \
 258        ++start;          \
 259     last_skipped = TRUE; \
 260   } while (0)
 261
 262 #define INCLUDE                \
 263   do {                         \
 264     if (last_skipped)          \
 265       COPY_INCLUDED_SECTION;   \
 266     ++include_len;             \
 267     last_skipped = FALSE;      \
 268   } while (0)
 269
 270 #define COPY_INCLUDED_SECTION                     \
 271   do {                                            \
 272     if (include_len > 0)                          \
 273       {                                           \
 274          memmove(tgt_newend, start, include_len); \
 275          tgt_newend += include_len;               \
 276          include_len = 0;                         \
 277       }                                           \
 278     start = curp;                                 \
 279   } while (0)
 280
 281   /* Include the current character as character X.
 282      If the current character already *is* X, add it to the
 283      currently included region, increasing chances for consecutive
 284      fully normalized blocks. */
 285 #define INCLUDE_AS(x)          \
 286   do {                         \
 287     if (*curp == (x))          \
 288       INCLUDE;                 \
 289     else                       \
 290       {                        \
 291         INSERT((x));           \
 292         SKIP;                  \
 293       }                        \
 294   } while (0)
 295
 296   /* Insert character X in the output buffer */
 297 #define INSERT(x)              \
 298   do {                         \
 299     COPY_INCLUDED_SECTION;     \
 300     *tgt_newend++ = (x);       \
 301   } while (0)
 302
 303   for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
 304     {
 305       switch (*curp)
 306         {
 307         case '\r':
 308           if (opts->ignore_eol_style)
 309             INCLUDE_AS('\n');
 310           else
 311             INCLUDE;
 312           state = svn_diff__normalize_state_cr;
 313           break;
 314
 315         case '\n':
 316           if (state == svn_diff__normalize_state_cr
 317               && opts->ignore_eol_style)
 318             SKIP;
 319           else
 320             INCLUDE;
 321           state = svn_diff__normalize_state_normal;
 322           break;
 323
 324         default:
 325           if (svn_ctype_isspace(*curp)
 326               && opts->ignore_space != svn_diff_file_ignore_space_none)
 327             {
 328               /* Whitespace but not '\r' or '\n' */
 329               if (state != svn_diff__normalize_state_whitespace
 330                   && opts->ignore_space
 331                      == svn_diff_file_ignore_space_change)
 332                 /*### If we can postpone insertion of the space
 333                   until the next non-whitespace character,
 334                   we have a potential of reducing the number of copies:
 335                   If this space is followed by more spaces,
 336                   this will cause a block-copy.
 337                   If the next non-space block is considered normalized
 338                   *and* preceded by a space, we can take advantage of that. */
 339                 /* Note, the above optimization applies to 90% of the source
 340                    lines in our own code, since it (generally) doesn't use
 341                    more than one space per blank section, except for the
 342                    beginning of a line. */
 343                 INCLUDE_AS(' ');
 344               else
 345                 SKIP;
 346               state = svn_diff__normalize_state_whitespace;
 347             }
 348           else
 349             {
 350               /* Non-whitespace character, or whitespace character in
 351                  svn_diff_file_ignore_space_none mode. */
 352               INCLUDE;
 353               state = svn_diff__normalize_state_normal;
 354             }
 355         }
 356     }
 357
 358   /* If we're not in whitespace, flush the last chunk of data.
 359    * Note that this will work correctly when this is the last chunk of the
 360    * file:
 361    * * If there is an eol, it will either have been output when we entered
 362    *   the state_cr, or it will be output now.
 363    * * If there is no eol and we're not in whitespace, then we just output
 364    *   everything below.
 365    * * If there's no eol and we are in whitespace, we want to ignore
 366    *   whitespace unconditionally. */
 367
 368   if (*tgt == tgt_newend)
 369     {
 370       /* we haven't copied any data in to *tgt and our chunk consists
 371          only of one block of (already normalized) data.
 372          Just return the block. */
 373       *tgt = (char *)start;
 374       *lengthp = include_len;
 375     }
 376   else
 377     {
 378       COPY_INCLUDED_SECTION;
 379       *lengthp = tgt_newend - *tgt;
 380     }
 381
 382   *statep = state;
 383
 384 #undef SKIP
 385 #undef INCLUDE
 386 #undef INCLUDE_AS
 387 #undef INSERT
 388 #undef COPY_INCLUDED_SECTION
 389 }
 390
 391
 392 /* Return the library version number. */
 393 const svn_version_t *
 394 svn_diff_version(void)
 395 {
 396   SVN_VERSION_BODY;
 397 }