src/TortoiseMerge/libsvn_diff/util.c

   1 /*
   2  * util.c :  routines for doing diffs
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <apr.h>
  26 #include <apr_general.h>
  27
  28 #include "svn_hash.h"
  29 #include "svn_pools.h"
  30 #include "svn_dirent_uri.h"
  31 #include "svn_props.h"
  32 #include "svn_mergeinfo.h"
  33 #include "svn_error.h"
  34 #include "svn_diff.h"
  35 #include "svn_types.h"
  36 #include "svn_ctype.h"
  37 #include "svn_utf.h"
  38 #include "svn_version.h"
  39
  40 #include "private/svn_diff_private.h"
  41 #include "diff.h"
  42
  43 #include "svn_private_config.h"
  44
  45
  46 svn_boolean_t
  47 svn_diff_contains_conflicts(svn_diff_t *diff)
  48 {
  49   while (diff != NULL)
  50     {
  51       if (diff->type == svn_diff__type_conflict)
  52         {
  53           return TRUE;
  54         }
  55
  56       diff = diff->next;
  57     }
  58
  59   return FALSE;
  60 }
  61
  62 svn_boolean_t
  63 svn_diff_contains_diffs(svn_diff_t *diff)
  64 {
  65   while (diff != NULL)
  66     {
  67       if (diff->type != svn_diff__type_common)
  68         {
  69           return TRUE;
  70         }
  71
  72       diff = diff->next;
  73     }
  74
  75   return FALSE;
  76 }
  77
  78 svn_error_t *
  79 svn_diff_output(svn_diff_t *diff,
  80                 void *output_baton,
  81                 const svn_diff_output_fns_t *vtable)
  82 {
  83   svn_error_t *(*output_fn)(void *,
  84                             apr_off_t, apr_off_t,
  85                             apr_off_t, apr_off_t,
  86                             apr_off_t, apr_off_t);
  87
  88   while (diff != NULL)
  89     {
  90       switch (diff->type)
  91         {
  92         case svn_diff__type_common:
  93           output_fn = vtable->output_common;
  94           break;
  95
  96         case svn_diff__type_diff_common:
  97           output_fn = vtable->output_diff_common;
  98           break;
  99
 100         case svn_diff__type_diff_modified:
 101           output_fn = vtable->output_diff_modified;
 102           break;
 103
 104         case svn_diff__type_diff_latest:
 105           output_fn = vtable->output_diff_latest;
 106           break;
 107
 108         case svn_diff__type_conflict:
 109           output_fn = NULL;
 110           if (vtable->output_conflict != NULL)
 111             {
 112               SVN_ERR(vtable->output_conflict(output_baton,
 113                                diff->original_start, diff->original_length,
 114                                diff->modified_start, diff->modified_length,
 115                                diff->latest_start, diff->latest_length,
 116                                diff->resolved_diff));
 117             }
 118           break;
 119
 120         default:
 121           output_fn = NULL;
 122           break;
 123         }
 124
 125       if (output_fn != NULL)
 126         {
 127           SVN_ERR(output_fn(output_baton,
 128                             diff->original_start, diff->original_length,
 129                             diff->modified_start, diff->modified_length,
 130                             diff->latest_start, diff->latest_length));
 131         }
 132
 133       diff = diff->next;
 134     }
 135
 136   return SVN_NO_ERROR;
 137 }
 138
 139
 140 void
 141 svn_diff__normalize_buffer(char **tgt,
 142                            apr_off_t *lengthp,
 143                            svn_diff__normalize_state_t *statep,
 144                            const char *buf,
 145                            const svn_diff_file_options_t *opts)
 146 {
 147   /* Variables for looping through BUF */
 148   const char *curp, *endp;
 149
 150   /* Variable to record normalizing state */
 151   svn_diff__normalize_state_t state = *statep;
 152
 153   /* Variables to track what needs copying into the target buffer */
 154   const char *start = buf;
 155   apr_size_t include_len = 0;
 156   svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
 157
 158   /* Variable to record the state of the target buffer */
 159   char *tgt_newend = *tgt;
 160
 161   /* If this is a noop, then just get out of here. */
 162   if (! opts->ignore_space && ! opts->ignore_eol_style)
 163     {
 164       *tgt = (char *)buf;
 165       return;
 166     }
 167
 168
 169   /* It only took me forever to get this routine right,
 170      so here my thoughts go:
 171
 172     Below, we loop through the data, doing 2 things:
 173
 174      - Normalizing
 175      - Copying other data
 176
 177      The routine tries its hardest *not* to copy data, but instead
 178      returning a pointer into already normalized existing data.
 179
 180      To this end, a block 'other data' shouldn't be copied when found,
 181      but only as soon as it can't be returned in-place.
 182
 183      On a character level, there are 3 possible operations:
 184
 185      - Skip the character (don't include in the normalized data)
 186      - Include the character (do include in the normalizad data)
 187      - Include as another character
 188        This is essentially the same as skipping the current character
 189        and inserting a given character in the output data.
 190
 191     The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
 192     handle the character based operations.  The macros themselves
 193     collect character level data into blocks.
 194
 195     At all times designate the START, INCLUDED_LEN and CURP pointers
 196     an included and and skipped block like this:
 197
 198       [ start, start + included_len ) [ start + included_len, curp )
 199              INCLUDED                        EXCLUDED
 200
 201     When the routine flips from skipping to including, the last
 202     included block has to be flushed to the output buffer.
 203   */
 204
 205   /* Going from including to skipping; only schedules the current
 206      included section for flushing.
 207      Also, simply chop off the character if it's the first in the buffer,
 208      so we can possibly just return the remainder of the buffer */
 209 #define SKIP             \
 210   do {                   \
 211     if (start == curp)   \
 212        ++start;          \
 213     last_skipped = TRUE; \
 214   } while (0)
 215
 216 #define INCLUDE                \
 217   do {                         \
 218     if (last_skipped)          \
 219       COPY_INCLUDED_SECTION;   \
 220     ++include_len;             \
 221     last_skipped = FALSE;      \
 222   } while (0)
 223
 224 #define COPY_INCLUDED_SECTION                     \
 225   do {                                            \
 226     if (include_len > 0)                          \
 227       {                                           \
 228          memmove(tgt_newend, start, include_len); \
 229          tgt_newend += include_len;               \
 230          include_len = 0;                         \
 231       }                                           \
 232     start = curp;                                 \
 233   } while (0)
 234
 235   /* Include the current character as character X.
 236      If the current character already *is* X, add it to the
 237      currently included region, increasing chances for consecutive
 238      fully normalized blocks. */
 239 #define INCLUDE_AS(x)          \
 240   do {                         \
 241     if (*curp == (x))          \
 242       INCLUDE;                 \
 243     else                       \
 244       {                        \
 245         INSERT((x));           \
 246         SKIP;                  \
 247       }                        \
 248   } while (0)
 249
 250   /* Insert character X in the output buffer */
 251 #define INSERT(x)              \
 252   do {                         \
 253     COPY_INCLUDED_SECTION;     \
 254     *tgt_newend++ = (x);       \
 255   } while (0)
 256
 257   for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
 258     {
 259       switch (*curp)
 260         {
 261         case '\r':
 262           if (opts->ignore_eol_style)
 263             INCLUDE_AS('\n');
 264           else
 265             INCLUDE;
 266           state = svn_diff__normalize_state_cr;
 267           break;
 268
 269         case '\n':
 270           if (state == svn_diff__normalize_state_cr
 271               && opts->ignore_eol_style)
 272             SKIP;
 273           else
 274             INCLUDE;
 275           state = svn_diff__normalize_state_normal;
 276           break;
 277
 278         default:
 279           if (svn_ctype_isspace(*curp)
 280               && opts->ignore_space != svn_diff_file_ignore_space_none)
 281             {
 282               /* Whitespace but not '\r' or '\n' */
 283               if (state != svn_diff__normalize_state_whitespace
 284                   && opts->ignore_space
 285                      == svn_diff_file_ignore_space_change)
 286                 /*### If we can postpone insertion of the space
 287                   until the next non-whitespace character,
 288                   we have a potential of reducing the number of copies:
 289                   If this space is followed by more spaces,
 290                   this will cause a block-copy.
 291                   If the next non-space block is considered normalized
 292                   *and* preceded by a space, we can take advantage of that. */
 293                 /* Note, the above optimization applies to 90% of the source
 294                    lines in our own code, since it (generally) doesn't use
 295                    more than one space per blank section, except for the
 296                    beginning of a line. */
 297                 INCLUDE_AS(' ');
 298               else
 299                 SKIP;
 300               state = svn_diff__normalize_state_whitespace;
 301             }
 302           else
 303             {
 304               /* Non-whitespace character, or whitespace character in
 305                  svn_diff_file_ignore_space_none mode. */
 306               INCLUDE;
 307               state = svn_diff__normalize_state_normal;
 308             }
 309         }
 310     }
 311
 312   /* If we're not in whitespace, flush the last chunk of data.
 313    * Note that this will work correctly when this is the last chunk of the
 314    * file:
 315    * * If there is an eol, it will either have been output when we entered
 316    *   the state_cr, or it will be output now.
 317    * * If there is no eol and we're not in whitespace, then we just output
 318    *   everything below.
 319    * * If there's no eol and we are in whitespace, we want to ignore
 320    *   whitespace unconditionally. */
 321
 322   if (*tgt == tgt_newend)
 323     {
 324       /* we haven't copied any data in to *tgt and our chunk consists
 325          only of one block of (already normalized) data.
 326          Just return the block. */
 327       *tgt = (char *)start;
 328       *lengthp = include_len;
 329     }
 330   else
 331     {
 332       COPY_INCLUDED_SECTION;
 333       *lengthp = tgt_newend - *tgt;
 334     }
 335
 336   *statep = state;
 337
 338 #undef SKIP
 339 #undef INCLUDE
 340 #undef INCLUDE_AS
 341 #undef INSERT
 342 #undef COPY_INCLUDED_SECTION
 343 }
 344
 345 svn_error_t *
 346 svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
 347                                         const char *header_encoding,
 348                                         apr_pool_t *scratch_pool)
 349 {
 350   const char *out_str;
 351
 352   SVN_ERR(svn_utf_cstring_from_utf8_ex2(
 353             &out_str,
 354             APR_EOL_STR
 355             SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR,
 356             header_encoding, scratch_pool));
 357   svn_stringbuf_appendcstr(stringbuf, out_str);
 358   return SVN_NO_ERROR;
 359 }
 360
 361 svn_error_t *
 362 svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
 363                                     const char *header_encoding,
 364                                     const char *hunk_delimiter,
 365                                     apr_off_t old_start,
 366                                     apr_off_t old_length,
 367                                     apr_off_t new_start,
 368                                     apr_off_t new_length,
 369                                     const char *hunk_extra_context,
 370                                     apr_pool_t *scratch_pool)
 371 {
 372   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 373                                       scratch_pool,
 374                                       "%s -%" APR_OFF_T_FMT,
 375                                       hunk_delimiter, old_start));
 376   /* If the hunk length is 1, suppress the number of lines in the hunk
 377    * (it is 1 implicitly) */
 378   if (old_length != 1)
 379     {
 380       SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 381                                           scratch_pool,
 382                                           ",%" APR_OFF_T_FMT, old_length));
 383     }
 384
 385   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 386                                       scratch_pool,
 387                                       " +%" APR_OFF_T_FMT, new_start));
 388   if (new_length != 1)
 389     {
 390       SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 391                                           scratch_pool,
 392                                           ",%" APR_OFF_T_FMT, new_length));
 393     }
 394
 395   if (hunk_extra_context == NULL)
 396       hunk_extra_context = "";
 397   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 398                                       scratch_pool,
 399                                       " %s%s%s" APR_EOL_STR,
 400                                       hunk_delimiter,
 401                                       hunk_extra_context[0] ? " " : "",
 402                                       hunk_extra_context));
 403   return SVN_NO_ERROR;
 404 }
 405
 406 svn_error_t *
 407 svn_diff__unidiff_write_header(svn_stream_t *output_stream,
 408                                const char *header_encoding,
 409                                const char *old_header,
 410                                const char *new_header,
 411                                apr_pool_t *scratch_pool)
 412 {
 413   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 414                                       scratch_pool,
 415                                       "--- %s" APR_EOL_STR
 416                                       "+++ %s" APR_EOL_STR,
 417                                       old_header,
 418                                       new_header));
 419   return SVN_NO_ERROR;
 420 }
 421
 422 /* A helper function for display_prop_diffs.  Output the differences between
 423    the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a
 424    human-readable form to OUTSTREAM, using ENCODING.  Use POOL for temporary
 425    allocations. */
 426 static svn_error_t *
 427 display_mergeinfo_diff(const char *old_mergeinfo_val,
 428                        const char *new_mergeinfo_val,
 429                        const char *encoding,
 430                        svn_stream_t *outstream,
 431                        apr_pool_t *pool)
 432 {
 433   apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted;
 434   apr_pool_t *iterpool = svn_pool_create(pool);
 435   apr_hash_index_t *hi;
 436
 437   if (old_mergeinfo_val)
 438     SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool));
 439   else
 440     old_mergeinfo_hash = NULL;
 441
 442   if (new_mergeinfo_val)
 443     SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool));
 444   else
 445     new_mergeinfo_hash = NULL;
 446
 447   SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash,
 448                               new_mergeinfo_hash,
 449                               TRUE, pool, pool));
 450
 451   for (hi = apr_hash_first(pool, deleted);
 452        hi; hi = apr_hash_next(hi))
 453     {
 454       const char *from_path = svn__apr_hash_index_key(hi);
 455       svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
 456       svn_string_t *merge_revstr;
 457
 458       svn_pool_clear(iterpool);
 459       SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
 460                                       iterpool));
 461
 462       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 463                                           _("   Reverse-merged %s:r%s%s"),
 464                                           from_path, merge_revstr->data,
 465                                           APR_EOL_STR));
 466     }
 467
 468   for (hi = apr_hash_first(pool, added);
 469        hi; hi = apr_hash_next(hi))
 470     {
 471       const char *from_path = svn__apr_hash_index_key(hi);
 472       svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
 473       svn_string_t *merge_revstr;
 474
 475       svn_pool_clear(iterpool);
 476       SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
 477                                       iterpool));
 478
 479       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 480                                           _("   Merged %s:r%s%s"),
 481                                           from_path, merge_revstr->data,
 482                                           APR_EOL_STR));
 483     }
 484
 485   svn_pool_destroy(iterpool);
 486   return SVN_NO_ERROR;
 487 }
 488
 489 svn_error_t *
 490 svn_diff__display_prop_diffs(svn_stream_t *outstream,
 491                              const char *encoding,
 492                              const apr_array_header_t *propchanges,
 493                              apr_hash_t *original_props,
 494                              svn_boolean_t pretty_print_mergeinfo,
 495                              apr_pool_t *pool)
 496 {
 497   apr_pool_t *iterpool = svn_pool_create(pool);
 498   int i;
 499
 500   for (i = 0; i < propchanges->nelts; i++)
 501     {
 502       const char *action;
 503       const svn_string_t *original_value;
 504       const svn_prop_t *propchange
 505         = &APR_ARRAY_IDX(propchanges, i, svn_prop_t);
 506
 507       if (original_props)
 508         original_value = svn_hash_gets(original_props, propchange->name);
 509       else
 510         original_value = NULL;
 511
 512       /* If the property doesn't exist on either side, or if it exists
 513          with the same value, skip it.  This can happen if the client is
 514          hitting an old mod_dav_svn server that doesn't understand the
 515          "send-all" REPORT style. */
 516       if ((! (original_value || propchange->value))
 517           || (original_value && propchange->value
 518               && svn_string_compare(original_value, propchange->value)))
 519         continue;
 520
 521       svn_pool_clear(iterpool);
 522
 523       if (! original_value)
 524         action = "Added";
 525       else if (! propchange->value)
 526         action = "Deleted";
 527       else
 528         action = "Modified";
 529       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 530                                           "%s: %s%s", action,
 531                                           propchange->name, APR_EOL_STR));
 532
 533       if (pretty_print_mergeinfo
 534           && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0)
 535         {
 536           const char *orig = original_value ? original_value->data : NULL;
 537           const char *val = propchange->value ? propchange->value->data : NULL;
 538           svn_error_t *err = display_mergeinfo_diff(orig, val, encoding,
 539                                                     outstream, iterpool);
 540
 541           /* Issue #3896: If we can't pretty-print mergeinfo differences
 542              because invalid mergeinfo is present, then don't let the diff
 543              fail, just print the diff as any other property. */
 544           if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
 545             {
 546               svn_error_clear(err);
 547             }
 548           else
 549             {
 550               SVN_ERR(err);
 551               continue;
 552             }
 553         }
 554
 555       {
 556         svn_diff_t *diff;
 557         svn_diff_file_options_t options = { 0 };
 558         const svn_string_t *orig
 559           = original_value ? original_value
 560                            : svn_string_create_empty(iterpool);
 561         const svn_string_t *val
 562           = propchange->value ? propchange->value
 563                               : svn_string_create_empty(iterpool);
 564
 565         SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options,
 566                                          iterpool));
 567
 568         /* UNIX patch will try to apply a diff even if the diff header
 569          * is missing. It tries to be helpful by asking the user for a
 570          * target filename when it can't determine the target filename
 571          * from the diff header. But there usually are no files which
 572          * UNIX patch could apply the property diff to, so we use "##"
 573          * instead of "@@" as the default hunk delimiter for property diffs.
 574          * We also supress the diff header. */
 575         SVN_ERR(svn_diff_mem_string_output_unified2(
 576                   outstream, diff, FALSE /* no header */, "##", NULL, NULL,
 577                   encoding, orig, val, iterpool));
 578       }
 579     }
 580   svn_pool_destroy(iterpool);
 581
 582   return SVN_NO_ERROR;
 583 }
 584
 585
 586 /* Return the library version number. */
 587 const svn_version_t *
 588 svn_diff_version(void)
 589 {
 590   SVN_VERSION_BODY;
 591 }