org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java

   1 /*
   2  * Copyright (C) 2008, Google Inc.
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or
   7  * without modification, are permitted provided that the following
   8  * conditions are met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above
  14  *   copyright notice, this list of conditions and the following
  15  *   disclaimer in the documentation and/or other materials provided
  16  *   with the distribution.
  17  *
  18  * - Neither the name of the Git Development Community nor the
  19  *   names of its contributors may be used to endorse or promote
  20  *   products derived from this software without specific prior
  21  *   written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  24  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  25  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  28  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  30  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  35  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 package org.spearce.jgit.patch;
  39
  40 import static org.spearce.jgit.lib.Constants.encodeASCII;
  41 import static org.spearce.jgit.util.RawParseUtils.decode;
  42 import static org.spearce.jgit.util.RawParseUtils.match;
  43 import static org.spearce.jgit.util.RawParseUtils.nextLF;
  44 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
  45
  46 import java.util.ArrayList;
  47 import java.util.Collections;
  48 import java.util.List;
  49
  50 import org.spearce.jgit.lib.AbbreviatedObjectId;
  51 import org.spearce.jgit.lib.Constants;
  52 import org.spearce.jgit.lib.FileMode;
  53 import org.spearce.jgit.util.QuotedString;
  54
  55 /** Patch header describing an action for a single file path. */
  56 public class FileHeader {
  57         /** Magical file name used for file adds or deletes. */
  58         public static final String DEV_NULL = "/dev/null";
  59
  60         private static final byte[] OLD_MODE = encodeASCII("old mode ");
  61
  62         private static final byte[] NEW_MODE = encodeASCII("new mode ");
  63
  64         protected static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode ");
  65
  66         protected static final byte[] NEW_FILE_MODE = encodeASCII("new file mode ");
  67
  68         private static final byte[] COPY_FROM = encodeASCII("copy from ");
  69
  70         private static final byte[] COPY_TO = encodeASCII("copy to ");
  71
  72         private static final byte[] RENAME_OLD = encodeASCII("rename old ");
  73
  74         private static final byte[] RENAME_NEW = encodeASCII("rename new ");
  75
  76         private static final byte[] RENAME_FROM = encodeASCII("rename from ");
  77
  78         private static final byte[] RENAME_TO = encodeASCII("rename to ");
  79
  80         private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index ");
  81
  82         private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index ");
  83
  84         protected static final byte[] INDEX = encodeASCII("index ");
  85
  86         static final byte[] OLD_NAME = encodeASCII("--- ");
  87
  88         static final byte[] NEW_NAME = encodeASCII("+++ ");
  89
  90         /** General type of change a single file-level patch describes. */
  91         public static enum ChangeType {
  92                 /** Add a new file to the project */
  93                 ADD,
  94
  95                 /** Modify an existing file in the project (content and/or mode) */
  96                 MODIFY,
  97
  98                 /** Delete an existing file from the project */
  99                 DELETE,
 100
 101                 /** Rename an existing file to a new location */
 102                 RENAME,
 103
 104                 /** Copy an existing file to a new location, keeping the original */
 105                 COPY;
 106         }
 107
 108         /** Type of patch used by this file. */
 109         public static enum PatchType {
 110                 /** A traditional unified diff style patch of a text file. */
 111                 UNIFIED,
 112
 113                 /** An empty patch with a message "Binary files ... differ" */
 114                 BINARY,
 115
 116                 /** A Git binary patch, holding pre and post image deltas */
 117                 GIT_BINARY;
 118         }
 119
 120         /** Buffer holding the patch data for this file. */
 121         final byte[] buf;
 122
 123         /** Offset within {@link #buf} to the "diff ..." line. */
 124         final int startOffset;
 125
 126         /** Position 1 past the end of this file within {@link #buf}. */
 127         int endOffset;
 128
 129         /** File name of the old (pre-image). */
 130         private String oldName;
 131
 132         /** File name of the new (post-image). */
 133         private String newName;
 134
 135         /** Old mode of the file, if described by the patch, else null. */
 136         private FileMode oldMode;
 137
 138         /** New mode of the file, if described by the patch, else null. */
 139         protected FileMode newMode;
 140
 141         /** General type of change indicated by the patch. */
 142         protected ChangeType changeType;
 143
 144         /** Similarity score if {@link #changeType} is a copy or rename. */
 145         private int score;
 146
 147         /** ObjectId listed on the index line for the old (pre-image) */
 148         private AbbreviatedObjectId oldId;
 149
 150         /** ObjectId listed on the index line for the new (post-image) */
 151         protected AbbreviatedObjectId newId;
 152
 153         /** Type of patch used to modify this file */
 154         PatchType patchType;
 155
 156         /** The hunks of this file */
 157         private List<HunkHeader> hunks;
 158
 159         /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
 160         BinaryHunk forwardBinaryHunk;
 161
 162         /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
 163         BinaryHunk reverseBinaryHunk;
 164
 165         FileHeader(final byte[] b, final int offset) {
 166                 buf = b;
 167                 startOffset = offset;
 168                 changeType = ChangeType.MODIFY; // unless otherwise designated
 169                 patchType = PatchType.UNIFIED;
 170         }
 171
 172         int getParentCount() {
 173                 return 1;
 174         }
 175
 176         /** @return the byte array holding this file's patch script. */
 177         public byte[] getBuffer() {
 178                 return buf;
 179         }
 180
 181         /** @return offset the start of this file's script in {@link #getBuffer()}. */
 182         public int getStartOffset() {
 183                 return startOffset;
 184         }
 185
 186         /** @return offset one past the end of the file script. */
 187         public int getEndOffset() {
 188                 return endOffset;
 189         }
 190
 191         /**
 192          * Get the old name associated with this file.
 193          * <p>
 194          * The meaning of the old name can differ depending on the semantic meaning
 195          * of this patch:
 196          * <ul>
 197          * <li><i>file add</i>: always <code>/dev/null</code></li>
 198          * <li><i>file modify</i>: always {@link #getNewName()}</li>
 199          * <li><i>file delete</i>: always the file being deleted</li>
 200          * <li><i>file copy</i>: source file the copy originates from</li>
 201          * <li><i>file rename</i>: source file the rename originates from</li>
 202          * </ul>
 203          *
 204          * @return old name for this file.
 205          */
 206         public String getOldName() {
 207                 return oldName;
 208         }
 209
 210         /**
 211          * Get the new name associated with this file.
 212          * <p>
 213          * The meaning of the new name can differ depending on the semantic meaning
 214          * of this patch:
 215          * <ul>
 216          * <li><i>file add</i>: always the file being created</li>
 217          * <li><i>file modify</i>: always {@link #getOldName()}</li>
 218          * <li><i>file delete</i>: always <code>/dev/null</code></li>
 219          * <li><i>file copy</i>: destination file the copy ends up at</li>
 220          * <li><i>file rename</i>: destination file the rename ends up at/li>
 221          * </ul>
 222          *
 223          * @return new name for this file.
 224          */
 225         public String getNewName() {
 226                 return newName;
 227         }
 228
 229         /** @return the old file mode, if described in the patch */
 230         public FileMode getOldMode() {
 231                 return oldMode;
 232         }
 233
 234         /** @return the new file mode, if described in the patch */
 235         public FileMode getNewMode() {
 236                 return newMode;
 237         }
 238
 239         /** @return the type of change this patch makes on {@link #getNewName()} */
 240         public ChangeType getChangeType() {
 241                 return changeType;
 242         }
 243
 244         /**
 245          * @return similarity score between {@link #getOldName()} and
 246          *         {@link #getNewName()} if {@link #getChangeType()} is
 247          *         {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
 248          */
 249         public int getScore() {
 250                 return score;
 251         }
 252
 253         /**
 254          * Get the old object id from the <code>index</code>.
 255          *
 256          * @return the object id; null if there is no index line
 257          */
 258         public AbbreviatedObjectId getOldId() {
 259                 return oldId;
 260         }
 261
 262         /**
 263          * Get the new object id from the <code>index</code>.
 264          *
 265          * @return the object id; null if there is no index line
 266          */
 267         public AbbreviatedObjectId getNewId() {
 268                 return newId;
 269         }
 270
 271         /** @return style of patch used to modify this file */
 272         public PatchType getPatchType() {
 273                 return patchType;
 274         }
 275
 276         /** @return true if this patch modifies metadata about a file */
 277         public boolean hasMetaDataChanges() {
 278                 return changeType != ChangeType.MODIFY || newMode != oldMode;
 279         }
 280
 281         /** @return hunks altering this file; in order of appearance in patch */
 282         public List<? extends HunkHeader> getHunks() {
 283                 if (hunks == null)
 284                         return Collections.emptyList();
 285                 return hunks;
 286         }
 287
 288         void addHunk(final HunkHeader h) {
 289                 if (h.getFileHeader() != this)
 290                         throw new IllegalArgumentException("Hunk belongs to another file");
 291                 if (hunks == null)
 292                         hunks = new ArrayList<HunkHeader>();
 293                 hunks.add(h);
 294         }
 295
 296         HunkHeader newHunkHeader(final int offset) {
 297                 return new HunkHeader(this, offset);
 298         }
 299
 300         /** @return if a {@link PatchType#GIT_BINARY}, the new-image delta/literal */
 301         public BinaryHunk getForwardBinaryHunk() {
 302                 return forwardBinaryHunk;
 303         }
 304
 305         /** @return if a {@link PatchType#GIT_BINARY}, the old-image delta/literal */
 306         public BinaryHunk getReverseBinaryHunk() {
 307                 return reverseBinaryHunk;
 308         }
 309
 310         /**
 311          * Parse a "diff --git" or "diff --cc" line.
 312          *
 313          * @param ptr
 314          *            first character after the "diff --git " or "diff --cc " part.
 315          * @param end
 316          *            one past the last position to parse.
 317          * @return first character after the LF at the end of the line; -1 on error.
 318          */
 319         int parseGitFileName(int ptr, final int end) {
 320                 final int eol = nextLF(buf, ptr);
 321                 final int bol = ptr;
 322                 if (eol >= end) {
 323                         return -1;
 324                 }
 325
 326                 // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
 327                 // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
 328                 // is only one way to split the line such that text to the left
 329                 // of the space matches the text to the right, excluding the part
 330                 // before the first slash.
 331                 //
 332
 333                 final int aStart = nextLF(buf, ptr, '/');
 334                 if (aStart >= eol)
 335                         return eol;
 336
 337                 while (ptr < eol) {
 338                         final int sp = nextLF(buf, ptr, ' ');
 339                         if (sp >= eol) {
 340                                 // We can't split the header, it isn't valid.
 341                                 // This may be OK if this is a rename patch.
 342                                 //
 343                                 return eol;
 344                         }
 345                         final int bStart = nextLF(buf, sp, '/');
 346                         if (bStart >= eol)
 347                                 return eol;
 348
 349                         // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
 350                         // we have a valid split.
 351                         //
 352                         if (eq(aStart, sp - 1, bStart, eol - 1)) {
 353                                 if (buf[bol] == '"') {
 354                                         // We're a double quoted name. The region better end
 355                                         // in a double quote too, and we need to decode the
 356                                         // characters before reading the name.
 357                                         //
 358                                         if (buf[sp - 2] != '"') {
 359                                                 return eol;
 360                                         }
 361                                         oldName = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1);
 362                                         oldName = p1(oldName);
 363                                 } else {
 364                                         oldName = decode(Constants.CHARSET, buf, aStart, sp - 1);
 365                                 }
 366                                 newName = oldName;
 367                                 return eol;
 368                         }
 369
 370                         // This split wasn't correct. Move past the space and try
 371                         // another split as the space must be part of the file name.
 372                         //
 373                         ptr = sp;
 374                 }
 375
 376                 return eol;
 377         }
 378
 379         int parseGitHeaders(int ptr, final int end) {
 380                 while (ptr < end) {
 381                         final int eol = nextLF(buf, ptr);
 382                         if (isHunkHdr(buf, ptr, eol) >= 1) {
 383                                 // First hunk header; break out and parse them later.
 384                                 break;
 385
 386                         } else if (match(buf, ptr, OLD_NAME) >= 0) {
 387                                 parseOldName(ptr, eol);
 388
 389                         } else if (match(buf, ptr, NEW_NAME) >= 0) {
 390                                 parseNewName(ptr, eol);
 391
 392                         } else if (match(buf, ptr, OLD_MODE) >= 0) {
 393                                 oldMode = parseFileMode(ptr + OLD_MODE.length, eol);
 394
 395                         } else if (match(buf, ptr, NEW_MODE) >= 0) {
 396                                 newMode = parseFileMode(ptr + NEW_MODE.length, eol);
 397
 398                         } else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) {
 399                                 oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol);
 400                                 newMode = FileMode.MISSING;
 401                                 changeType = ChangeType.DELETE;
 402
 403                         } else if (match(buf, ptr, NEW_FILE_MODE) >= 0) {
 404                                 parseNewFileMode(ptr, eol);
 405
 406                         } else if (match(buf, ptr, COPY_FROM) >= 0) {
 407                                 oldName = parseName(oldName, ptr + COPY_FROM.length, eol);
 408                                 changeType = ChangeType.COPY;
 409
 410                         } else if (match(buf, ptr, COPY_TO) >= 0) {
 411                                 newName = parseName(newName, ptr + COPY_TO.length, eol);
 412                                 changeType = ChangeType.COPY;
 413
 414                         } else if (match(buf, ptr, RENAME_OLD) >= 0) {
 415                                 oldName = parseName(oldName, ptr + RENAME_OLD.length, eol);
 416                                 changeType = ChangeType.RENAME;
 417
 418                         } else if (match(buf, ptr, RENAME_NEW) >= 0) {
 419                                 newName = parseName(newName, ptr + RENAME_NEW.length, eol);
 420                                 changeType = ChangeType.RENAME;
 421
 422                         } else if (match(buf, ptr, RENAME_FROM) >= 0) {
 423                                 oldName = parseName(oldName, ptr + RENAME_FROM.length, eol);
 424                                 changeType = ChangeType.RENAME;
 425
 426                         } else if (match(buf, ptr, RENAME_TO) >= 0) {
 427                                 newName = parseName(newName, ptr + RENAME_TO.length, eol);
 428                                 changeType = ChangeType.RENAME;
 429
 430                         } else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) {
 431                                 score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null);
 432
 433                         } else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) {
 434                                 score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null);
 435
 436                         } else if (match(buf, ptr, INDEX) >= 0) {
 437                                 parseIndexLine(ptr + INDEX.length, eol);
 438
 439                         } else {
 440                                 // Probably an empty patch (stat dirty).
 441                                 break;
 442                         }
 443
 444                         ptr = eol;
 445                 }
 446                 return ptr;
 447         }
 448
 449         protected void parseOldName(int ptr, final int eol) {
 450                 oldName = p1(parseName(oldName, ptr + OLD_NAME.length, eol));
 451                 if (oldName == DEV_NULL)
 452                         changeType = ChangeType.ADD;
 453         }
 454
 455         protected void parseNewName(int ptr, final int eol) {
 456                 newName = p1(parseName(newName, ptr + NEW_NAME.length, eol));
 457                 if (newName == DEV_NULL)
 458                         changeType = ChangeType.DELETE;
 459         }
 460
 461         protected void parseNewFileMode(int ptr, final int eol) {
 462                 oldMode = FileMode.MISSING;
 463                 newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol);
 464                 changeType = ChangeType.ADD;
 465         }
 466
 467         int parseTraditionalHeaders(int ptr, final int end) {
 468                 while (ptr < end) {
 469                         final int eol = nextLF(buf, ptr);
 470                         if (isHunkHdr(buf, ptr, eol) >= 1) {
 471                                 // First hunk header; break out and parse them later.
 472                                 break;
 473
 474                         } else if (match(buf, ptr, OLD_NAME) >= 0) {
 475                                 parseOldName(ptr, eol);
 476
 477                         } else if (match(buf, ptr, NEW_NAME) >= 0) {
 478                                 parseNewName(ptr, eol);
 479
 480                         } else {
 481                                 // Possibly an empty patch.
 482                                 break;
 483                         }
 484
 485                         ptr = eol;
 486                 }
 487                 return ptr;
 488         }
 489
 490         private String parseName(final String expect, int ptr, final int end) {
 491                 if (ptr == end)
 492                         return expect;
 493
 494                 String r;
 495                 if (buf[ptr] == '"') {
 496                         // New style GNU diff format
 497                         //
 498                         r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1);
 499                 } else {
 500                         // Older style GNU diff format, an optional tab ends the name.
 501                         //
 502                         int tab = end;
 503                         while (ptr < tab && buf[tab - 1] != '\t')
 504                                 tab--;
 505                         if (ptr == tab)
 506                                 tab = end;
 507                         r = decode(Constants.CHARSET, buf, ptr, tab - 1);
 508                 }
 509
 510                 if (r.equals(DEV_NULL))
 511                         r = DEV_NULL;
 512                 return r;
 513         }
 514
 515         private static String p1(final String r) {
 516                 final int s = r.indexOf('/');
 517                 return s > 0 ? r.substring(s + 1) : r;
 518         }
 519
 520         protected FileMode parseFileMode(int ptr, final int end) {
 521                 int tmp = 0;
 522                 while (ptr < end - 1) {
 523                         tmp <<= 3;
 524                         tmp += buf[ptr++] - '0';
 525                 }
 526                 return FileMode.fromBits(tmp);
 527         }
 528
 529         protected void parseIndexLine(int ptr, final int end) {
 530                 // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
 531                 // can be unique abbreviations
 532                 //
 533                 final int dot2 = nextLF(buf, ptr, '.');
 534                 final int mode = nextLF(buf, dot2, ' ');
 535
 536                 oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1);
 537                 newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1);
 538
 539                 if (mode < end)
 540                         newMode = oldMode = parseFileMode(mode, end);
 541         }
 542
 543         private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) {
 544                 if (aEnd - aPtr != bEnd - bPtr) {
 545                         return false;
 546                 }
 547                 while (aPtr < aEnd) {
 548                         if (buf[aPtr++] != buf[bPtr++])
 549                                 return false;
 550                 }
 551                 return true;
 552         }
 553
 554         /**
 555          * Determine if this is a patch hunk header.
 556          *
 557          * @param buf
 558          *            the buffer to scan
 559          * @param start
 560          *            first position in the buffer to evaluate
 561          * @param end
 562          *            last position to consider; usually the end of the buffer (
 563          *            <code>buf.length</code>) or the first position on the next
 564          *            line. This is only used to avoid very long runs of '@' from
 565          *            killing the scan loop.
 566          * @return the number of "ancestor revisions" in the hunk header. A
 567          *         traditional two-way diff ("@@ -...") returns 1; a combined diff
 568          *         for a 3 way-merge returns 3. If this is not a hunk header, 0 is
 569          *         returned instead.
 570          */
 571         static int isHunkHdr(final byte[] buf, final int start, final int end) {
 572                 int ptr = start;
 573                 while (ptr < end && buf[ptr] == '@')
 574                         ptr++;
 575                 if (ptr - start < 2)
 576                         return 0;
 577                 if (ptr == end || buf[ptr++] != ' ')
 578                         return 0;
 579                 if (ptr == end || buf[ptr++] != '-')
 580                         return 0;
 581                 return (ptr - 3) - start;
 582         }
 583 }