org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java

   1 /*
   2  * Copyright (C) 2008, Google Inc.
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or
   7  * without modification, are permitted provided that the following
   8  * conditions are met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above
  14  *   copyright notice, this list of conditions and the following
  15  *   disclaimer in the documentation and/or other materials provided
  16  *   with the distribution.
  17  *
  18  * - Neither the name of the Git Development Community nor the
  19  *   names of its contributors may be used to endorse or promote
  20  *   products derived from this software without specific prior
  21  *   written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  24  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  25  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  28  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  30  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  35  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 package org.spearce.jgit.patch;
  39
  40 import static org.spearce.jgit.lib.Constants.encodeASCII;
  41 import static org.spearce.jgit.util.RawParseUtils.decode;
  42 import static org.spearce.jgit.util.RawParseUtils.match;
  43 import static org.spearce.jgit.util.RawParseUtils.nextLF;
  44 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
  45
  46 import java.util.ArrayList;
  47 import java.util.Collections;
  48 import java.util.List;
  49
  50 import org.spearce.jgit.lib.AbbreviatedObjectId;
  51 import org.spearce.jgit.lib.Constants;
  52 import org.spearce.jgit.lib.FileMode;
  53 import org.spearce.jgit.util.QuotedString;
  54
  55 /** Patch header describing an action for a single file path. */
  56 public class FileHeader {
  57         /** Magical file name used for file adds or deletes. */
  58         public static final String DEV_NULL = "/dev/null";
  59
  60         private static final byte[] OLD_MODE = encodeASCII("old mode ");
  61
  62         private static final byte[] NEW_MODE = encodeASCII("new mode ");
  63
  64         private static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode ");
  65
  66         private static final byte[] NEW_FILE_MODE = encodeASCII("new file mode ");
  67
  68         private static final byte[] COPY_FROM = encodeASCII("copy from ");
  69
  70         private static final byte[] COPY_TO = encodeASCII("copy to ");
  71
  72         private static final byte[] RENAME_OLD = encodeASCII("rename old ");
  73
  74         private static final byte[] RENAME_NEW = encodeASCII("rename new ");
  75
  76         private static final byte[] RENAME_FROM = encodeASCII("rename from ");
  77
  78         private static final byte[] RENAME_TO = encodeASCII("rename to ");
  79
  80         private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index ");
  81
  82         private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index ");
  83
  84         private static final byte[] INDEX = encodeASCII("index ");
  85
  86         static final byte[] OLD_NAME = encodeASCII("--- ");
  87
  88         static final byte[] NEW_NAME = encodeASCII("+++ ");
  89
  90         /** General type of change a single file-level patch describes. */
  91         public static enum ChangeType {
  92                 /** Add a new file to the project */
  93                 ADD,
  94
  95                 /** Modify an existing file in the project (content and/or mode) */
  96                 MODIFY,
  97
  98                 /** Delete an existing file from the project */
  99                 DELETE,
 100
 101                 /** Rename an existing file to a new location */
 102                 RENAME,
 103
 104                 /** Copy an existing file to a new location, keeping the original */
 105                 COPY;
 106         }
 107
 108         /** Type of patch used by this file. */
 109         public static enum PatchType {
 110                 /** A traditional unified diff style patch of a text file. */
 111                 UNIFIED,
 112
 113                 /** An empty patch with a message "Binary files ... differ" */
 114                 BINARY,
 115
 116                 /** A Git binary patch, holding pre and post image deltas */
 117                 GIT_BINARY;
 118         }
 119
 120         /** Buffer holding the patch data for this file. */
 121         final byte[] buf;
 122
 123         /** Offset within {@link #buf} to the "diff ..." line. */
 124         final int startOffset;
 125
 126         /** Position 1 past the end of this file within {@link #buf}. */
 127         int endOffset;
 128
 129         /** File name of the old (pre-image). */
 130         private String oldName;
 131
 132         /** File name of the new (post-image). */
 133         private String newName;
 134
 135         /** Old mode of the file, if described by the patch, else null. */
 136         private FileMode oldMode;
 137
 138         /** New mode of the file, if described by the patch, else null. */
 139         private FileMode newMode;
 140
 141         /** General type of change indicated by the patch. */
 142         private ChangeType changeType;
 143
 144         /** Similarity score if {@link #changeType} is a copy or rename. */
 145         private int score;
 146
 147         /** ObjectId listed on the index line for the old (pre-image) */
 148         private AbbreviatedObjectId oldId;
 149
 150         /** ObjectId listed on the index line for the new (post-image) */
 151         private AbbreviatedObjectId newId;
 152
 153         /** Type of patch used to modify this file */
 154         PatchType patchType;
 155
 156         /** The hunks of this file */
 157         private List<HunkHeader> hunks;
 158
 159         /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
 160         BinaryHunk forwardBinaryHunk;
 161
 162         /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
 163         BinaryHunk reverseBinaryHunk;
 164
 165         FileHeader(final byte[] b, final int offset) {
 166                 buf = b;
 167                 startOffset = offset;
 168                 changeType = ChangeType.MODIFY; // unless otherwise designated
 169                 patchType = PatchType.UNIFIED;
 170         }
 171
 172         int getParentCount() {
 173                 return 1;
 174         }
 175
 176         /**
 177          * Get the old name associated with this file.
 178          * <p>
 179          * The meaning of the old name can differ depending on the semantic meaning
 180          * of this patch:
 181          * <ul>
 182          * <li><i>file add</i>: always <code>/dev/null</code></li>
 183          * <li><i>file modify</i>: always {@link #getNewName()}</li>
 184          * <li><i>file delete</i>: always the file being deleted</li>
 185          * <li><i>file copy</i>: source file the copy originates from</li>
 186          * <li><i>file rename</i>: source file the rename originates from</li>
 187          * </ul>
 188          *
 189          * @return old name for this file.
 190          */
 191         public String getOldName() {
 192                 return oldName;
 193         }
 194
 195         /**
 196          * Get the new name associated with this file.
 197          * <p>
 198          * The meaning of the new name can differ depending on the semantic meaning
 199          * of this patch:
 200          * <ul>
 201          * <li><i>file add</i>: always the file being created</li>
 202          * <li><i>file modify</i>: always {@link #getOldName()}</li>
 203          * <li><i>file delete</i>: always <code>/dev/null</code></li>
 204          * <li><i>file copy</i>: destination file the copy ends up at</li>
 205          * <li><i>file rename</i>: destination file the rename ends up at/li>
 206          * </ul>
 207          *
 208          * @return new name for this file.
 209          */
 210         public String getNewName() {
 211                 return newName;
 212         }
 213
 214         /** @return the old file mode, if described in the patch */
 215         public FileMode getOldMode() {
 216                 return oldMode;
 217         }
 218
 219         /** @return the new file mode, if described in the patch */
 220         public FileMode getNewMode() {
 221                 return newMode;
 222         }
 223
 224         /** @return the type of change this patch makes on {@link #getNewName()} */
 225         public ChangeType getChangeType() {
 226                 return changeType;
 227         }
 228
 229         /**
 230          * @return similarity score between {@link #getOldName()} and
 231          *         {@link #getNewName()} if {@link #getChangeType()} is
 232          *         {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
 233          */
 234         public int getScore() {
 235                 return score;
 236         }
 237
 238         /**
 239          * Get the old object id from the <code>index</code>.
 240          *
 241          * @return the object id; null if there is no index line
 242          */
 243         public AbbreviatedObjectId getOldId() {
 244                 return oldId;
 245         }
 246
 247         /**
 248          * Get the new object id from the <code>index</code>.
 249          *
 250          * @return the object id; null if there is no index line
 251          */
 252         public AbbreviatedObjectId getNewId() {
 253                 return newId;
 254         }
 255
 256         /** @return style of patch used to modify this file */
 257         public PatchType getPatchType() {
 258                 return patchType;
 259         }
 260
 261         /** @return true if this patch modifies metadata about a file */
 262         public boolean hasMetaDataChanges() {
 263                 return changeType != ChangeType.MODIFY || newMode != oldMode;
 264         }
 265
 266         /** @return hunks altering this file; in order of appearance in patch */
 267         public List<HunkHeader> getHunks() {
 268                 if (hunks == null)
 269                         return Collections.emptyList();
 270                 return hunks;
 271         }
 272
 273         void addHunk(final HunkHeader h) {
 274                 if (h.getFileHeader() != this)
 275                         throw new IllegalArgumentException("Hunk belongs to another file");
 276                 if (hunks == null)
 277                         hunks = new ArrayList<HunkHeader>();
 278                 hunks.add(h);
 279         }
 280
 281         HunkHeader newHunkHeader(final int offset) {
 282                 return new HunkHeader(this, offset);
 283         }
 284
 285         /** @return if a {@link PatchType#GIT_BINARY}, the new-image delta/literal */
 286         public BinaryHunk getForwardBinaryHunk() {
 287                 return forwardBinaryHunk;
 288         }
 289
 290         /** @return if a {@link PatchType#GIT_BINARY}, the old-image delta/literal */
 291         public BinaryHunk getReverseBinaryHunk() {
 292                 return reverseBinaryHunk;
 293         }
 294
 295         /**
 296          * Parse a "diff --git" or "diff --cc" line.
 297          *
 298          * @param ptr
 299          *            first character after the "diff --git " or "diff --cc " part.
 300          * @param end
 301          *            one past the last position to parse.
 302          * @return first character after the LF at the end of the line; -1 on error.
 303          */
 304         int parseGitFileName(int ptr, final int end) {
 305                 final int eol = nextLF(buf, ptr);
 306                 final int bol = ptr;
 307                 if (eol >= end) {
 308                         return -1;
 309                 }
 310
 311                 // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
 312                 // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
 313                 // is only one way to split the line such that text to the left
 314                 // of the space matches the text to the right, excluding the part
 315                 // before the first slash.
 316                 //
 317
 318                 final int aStart = nextLF(buf, ptr, '/');
 319                 if (aStart >= eol)
 320                         return eol;
 321
 322                 while (ptr < eol) {
 323                         final int sp = nextLF(buf, ptr, ' ');
 324                         if (sp >= eol) {
 325                                 // We can't split the header, it isn't valid.
 326                                 // This may be OK if this is a rename patch.
 327                                 //
 328                                 return eol;
 329                         }
 330                         final int bStart = nextLF(buf, sp, '/');
 331                         if (bStart >= eol)
 332                                 return eol;
 333
 334                         // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
 335                         // we have a valid split.
 336                         //
 337                         if (eq(aStart, sp - 1, bStart, eol - 1)) {
 338                                 if (buf[bol] == '"') {
 339                                         // We're a double quoted name. The region better end
 340                                         // in a double quote too, and we need to decode the
 341                                         // characters before reading the name.
 342                                         //
 343                                         if (buf[sp - 2] != '"') {
 344                                                 return eol;
 345                                         }
 346                                         oldName = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1);
 347                                         oldName = p1(oldName);
 348                                 } else {
 349                                         oldName = decode(Constants.CHARSET, buf, aStart, sp - 1);
 350                                 }
 351                                 newName = oldName;
 352                                 return eol;
 353                         }
 354
 355                         // This split wasn't correct. Move past the space and try
 356                         // another split as the space must be part of the file name.
 357                         //
 358                         ptr = sp;
 359                 }
 360
 361                 return eol;
 362         }
 363
 364         int parseGitHeaders(int ptr, final int end) {
 365                 while (ptr < end) {
 366                         final int eol = nextLF(buf, ptr);
 367                         if (isHunkHdr(buf, ptr, eol) >= 1) {
 368                                 // First hunk header; break out and parse them later.
 369                                 break;
 370
 371                         } else if (match(buf, ptr, OLD_NAME) >= 0) {
 372                                 oldName = p1(parseName(oldName, ptr + OLD_NAME.length, eol));
 373                                 if (oldName == DEV_NULL)
 374                                         changeType = ChangeType.ADD;
 375
 376                         } else if (match(buf, ptr, NEW_NAME) >= 0) {
 377                                 newName = p1(parseName(newName, ptr + NEW_NAME.length, eol));
 378                                 if (newName == DEV_NULL)
 379                                         changeType = ChangeType.DELETE;
 380
 381                         } else if (match(buf, ptr, OLD_MODE) >= 0) {
 382                                 oldMode = parseFileMode(ptr + OLD_MODE.length, eol);
 383
 384                         } else if (match(buf, ptr, NEW_MODE) >= 0) {
 385                                 newMode = parseFileMode(ptr + NEW_MODE.length, eol);
 386
 387                         } else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) {
 388                                 oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol);
 389                                 changeType = ChangeType.DELETE;
 390
 391                         } else if (match(buf, ptr, NEW_FILE_MODE) >= 0) {
 392                                 newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol);
 393                                 changeType = ChangeType.ADD;
 394
 395                         } else if (match(buf, ptr, COPY_FROM) >= 0) {
 396                                 oldName = parseName(oldName, ptr + COPY_FROM.length, eol);
 397                                 changeType = ChangeType.COPY;
 398
 399                         } else if (match(buf, ptr, COPY_TO) >= 0) {
 400                                 newName = parseName(newName, ptr + COPY_TO.length, eol);
 401                                 changeType = ChangeType.COPY;
 402
 403                         } else if (match(buf, ptr, RENAME_OLD) >= 0) {
 404                                 oldName = parseName(oldName, ptr + RENAME_OLD.length, eol);
 405                                 changeType = ChangeType.RENAME;
 406
 407                         } else if (match(buf, ptr, RENAME_NEW) >= 0) {
 408                                 newName = parseName(newName, ptr + RENAME_NEW.length, eol);
 409                                 changeType = ChangeType.RENAME;
 410
 411                         } else if (match(buf, ptr, RENAME_FROM) >= 0) {
 412                                 oldName = parseName(oldName, ptr + RENAME_FROM.length, eol);
 413                                 changeType = ChangeType.RENAME;
 414
 415                         } else if (match(buf, ptr, RENAME_TO) >= 0) {
 416                                 newName = parseName(newName, ptr + RENAME_TO.length, eol);
 417                                 changeType = ChangeType.RENAME;
 418
 419                         } else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) {
 420                                 score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null);
 421
 422                         } else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) {
 423                                 score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null);
 424
 425                         } else if (match(buf, ptr, INDEX) >= 0) {
 426                                 parseIndexLine(ptr + INDEX.length, eol);
 427
 428                         } else {
 429                                 // Probably an empty patch (stat dirty).
 430                                 break;
 431                         }
 432
 433                         ptr = eol;
 434                 }
 435                 return ptr;
 436         }
 437
 438         int parseTraditionalHeaders(int ptr, final int end) {
 439                 while (ptr < end) {
 440                         final int eol = nextLF(buf, ptr);
 441                         if (isHunkHdr(buf, ptr, eol) >= 1) {
 442                                 // First hunk header; break out and parse them later.
 443                                 break;
 444
 445                         } else if (match(buf, ptr, OLD_NAME) >= 0) {
 446                                 oldName = p1(parseName(oldName, ptr + OLD_NAME.length, eol));
 447                                 if (oldName == DEV_NULL)
 448                                         changeType = ChangeType.ADD;
 449
 450                         } else if (match(buf, ptr, NEW_NAME) >= 0) {
 451                                 newName = p1(parseName(newName, ptr + NEW_NAME.length, eol));
 452                                 if (newName == DEV_NULL)
 453                                         changeType = ChangeType.DELETE;
 454
 455                         } else {
 456                                 // Possibly an empty patch.
 457                                 break;
 458                         }
 459
 460                         ptr = eol;
 461                 }
 462                 return ptr;
 463         }
 464
 465         private String parseName(final String expect, int ptr, final int end) {
 466                 if (ptr == end)
 467                         return expect;
 468
 469                 String r;
 470                 if (buf[ptr] == '"') {
 471                         // New style GNU diff format
 472                         //
 473                         r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1);
 474                 } else {
 475                         // Older style GNU diff format, an optional tab ends the name.
 476                         //
 477                         int tab = end;
 478                         while (ptr < tab && buf[tab - 1] != '\t')
 479                                 tab--;
 480                         if (ptr == tab)
 481                                 tab = end;
 482                         r = decode(Constants.CHARSET, buf, ptr, tab - 1);
 483                 }
 484
 485                 if (r.equals(DEV_NULL))
 486                         r = DEV_NULL;
 487                 return r;
 488         }
 489
 490         private static String p1(final String r) {
 491                 final int s = r.indexOf('/');
 492                 return s > 0 ? r.substring(s + 1) : r;
 493         }
 494
 495         private FileMode parseFileMode(int ptr, final int end) {
 496                 int tmp = 0;
 497                 while (ptr < end - 1) {
 498                         tmp <<= 3;
 499                         tmp += buf[ptr++] - '0';
 500                 }
 501                 return FileMode.fromBits(tmp);
 502         }
 503
 504         private void parseIndexLine(int ptr, final int end) {
 505                 // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
 506                 // can be unique abbreviations
 507                 //
 508                 final int dot2 = nextLF(buf, ptr, '.');
 509                 final int mode = nextLF(buf, dot2, ' ');
 510
 511                 oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1);
 512                 newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1);
 513
 514                 if (mode < end)
 515                         newMode = oldMode = parseFileMode(mode, end);
 516         }
 517
 518         private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) {
 519                 if (aEnd - aPtr != bEnd - bPtr) {
 520                         return false;
 521                 }
 522                 while (aPtr < aEnd) {
 523                         if (buf[aPtr++] != buf[bPtr++])
 524                                 return false;
 525                 }
 526                 return true;
 527         }
 528
 529         /**
 530          * Determine if this is a patch hunk header.
 531          *
 532          * @param buf
 533          *            the buffer to scan
 534          * @param start
 535          *            first position in the buffer to evaluate
 536          * @param end
 537          *            last position to consider; usually the end of the buffer (
 538          *            <code>buf.length</code>) or the first position on the next
 539          *            line. This is only used to avoid very long runs of '@' from
 540          *            killing the scan loop.
 541          * @return the number of "ancestor revisions" in the hunk header. A
 542          *         traditional two-way diff ("@@ -...") returns 1; a combined diff
 543          *         for a 3 way-merge returns 3. If this is not a hunk header, 0 is
 544          *         returned instead.
 545          */
 546         static int isHunkHdr(final byte[] buf, final int start, final int end) {
 547                 int ptr = start;
 548                 while (ptr < end && buf[ptr] == '@')
 549                         ptr++;
 550                 if (ptr - start < 2)
 551                         return 0;
 552                 if (ptr == end || buf[ptr++] != ' ')
 553                         return 0;
 554                 if (ptr == end || buf[ptr++] != '-')
 555                         return 0;
 556                 return (ptr - 3) - start;
 557         }
 558 }