org.spearce.jgit/src/org/spearce/jgit/patch/Patch.java

   1 /*
   2  * Copyright (C) 2008, Google Inc.
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or
   7  * without modification, are permitted provided that the following
   8  * conditions are met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above
  14  *   copyright notice, this list of conditions and the following
  15  *   disclaimer in the documentation and/or other materials provided
  16  *   with the distribution.
  17  *
  18  * - Neither the name of the Git Development Community nor the
  19  *   names of its contributors may be used to endorse or promote
  20  *   products derived from this software without specific prior
  21  *   written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  24  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  25  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  28  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  30  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  35  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 package org.spearce.jgit.patch;
  39
  40 import static org.spearce.jgit.lib.Constants.encodeASCII;
  41 import static org.spearce.jgit.patch.FileHeader.isHunkHdr;
  42 import static org.spearce.jgit.patch.FileHeader.NEW_NAME;
  43 import static org.spearce.jgit.patch.FileHeader.OLD_NAME;
  44 import static org.spearce.jgit.util.RawParseUtils.match;
  45 import static org.spearce.jgit.util.RawParseUtils.nextLF;
  46
  47 import java.io.IOException;
  48 import java.io.InputStream;
  49 import java.util.ArrayList;
  50 import java.util.List;
  51
  52 import org.spearce.jgit.util.TemporaryBuffer;
  53
  54 /** A parsed collection of {@link FileHeader}s from a unified diff patch file */
  55 public class Patch {
  56         private static final byte[] DIFF_GIT = encodeASCII("diff --git ");
  57
  58         private static final byte[] DIFF_CC = encodeASCII("diff --cc ");
  59
  60         private static final byte[] DIFF_COMBINED = encodeASCII("diff --combined ");
  61
  62         private static final byte[][] BIN_HEADERS = new byte[][] {
  63                         encodeASCII("Binary files "), encodeASCII("Files "), };
  64
  65         private static final byte[] BIN_TRAILER = encodeASCII(" differ\n");
  66
  67         private static final byte[] GIT_BINARY = encodeASCII("GIT binary patch\n");
  68
  69         static final byte[] SIG_FOOTER = encodeASCII("-- \n");
  70
  71         /** The files, in the order they were parsed out of the input. */
  72         private final List<FileHeader> files;
  73
  74         /** Formatting errors, if any were identified. */
  75         private final List<FormatError> errors;
  76
  77         /** Create an empty patch. */
  78         public Patch() {
  79                 files = new ArrayList<FileHeader>();
  80                 errors = new ArrayList<FormatError>(0);
  81         }
  82
  83         /**
  84          * Add a single file to this patch.
  85          * <p>
  86          * Typically files should be added by parsing the text through one of this
  87          * class's parse methods.
  88          *
  89          * @param fh
  90          *            the header of the file.
  91          */
  92         public void addFile(final FileHeader fh) {
  93                 files.add(fh);
  94         }
  95
  96         /** @return list of files described in the patch, in occurrence order. */
  97         public List<? extends FileHeader> getFiles() {
  98                 return files;
  99         }
 100
 101         /**
 102          * Add a formatting error to this patch script.
 103          *
 104          * @param err
 105          *            the error description.
 106          */
 107         public void addError(final FormatError err) {
 108                 errors.add(err);
 109         }
 110
 111         /** @return collection of formatting errors, if any. */
 112         public List<FormatError> getErrors() {
 113                 return errors;
 114         }
 115
 116         /**
 117          * Parse a patch received from an InputStream.
 118          * <p>
 119          * Multiple parse calls on the same instance will concatenate the patch
 120          * data, but each parse input must start with a valid file header (don't
 121          * split a single file across parse calls).
 122          *
 123          * @param is
 124          *            the stream to read the patch data from. The stream is read
 125          *            until EOF is reached.
 126          * @throws IOException
 127          *             there was an error reading from the input stream.
 128          */
 129         public void parse(final InputStream is) throws IOException {
 130                 final byte[] buf = readFully(is);
 131                 parse(buf, 0, buf.length);
 132         }
 133
 134         private static byte[] readFully(final InputStream is) throws IOException {
 135                 final TemporaryBuffer b = new TemporaryBuffer();
 136                 try {
 137                         b.copy(is);
 138                         b.close();
 139                         return b.toByteArray();
 140                 } finally {
 141                         b.destroy();
 142                 }
 143         }
 144
 145         /**
 146          * Parse a patch stored in a byte[].
 147          * <p>
 148          * Multiple parse calls on the same instance will concatenate the patch
 149          * data, but each parse input must start with a valid file header (don't
 150          * split a single file across parse calls).
 151          *
 152          * @param buf
 153          *            the buffer to parse.
 154          * @param ptr
 155          *            starting position to parse from.
 156          * @param end
 157          *            1 past the last position to end parsing. The total length to
 158          *            be parsed is <code>end - ptr</code>.
 159          */
 160         public void parse(final byte[] buf, int ptr, final int end) {
 161                 while (ptr < end)
 162                         ptr = parseFile(buf, ptr, end);
 163         }
 164
 165         private int parseFile(final byte[] buf, int c, final int end) {
 166                 while (c < end) {
 167                         if (isHunkHdr(buf, c, end) >= 1) {
 168                                 // If we find a disconnected hunk header we might
 169                                 // have missed a file header previously. The hunk
 170                                 // isn't valid without knowing where it comes from.
 171                                 //
 172                                 error(buf, c, "Hunk disconnected from file");
 173                                 c = nextLF(buf, c);
 174                                 continue;
 175                         }
 176
 177                         // Valid git style patch?
 178                         //
 179                         if (match(buf, c, DIFF_GIT) >= 0)
 180                                 return parseDiffGit(buf, c, end);
 181                         if (match(buf, c, DIFF_CC) >= 0)
 182                                 return parseDiffCombined(DIFF_CC, buf, c, end);
 183                         if (match(buf, c, DIFF_COMBINED) >= 0)
 184                                 return parseDiffCombined(DIFF_COMBINED, buf, c, end);
 185
 186                         // Junk between files? Leading junk? Traditional
 187                         // (non-git generated) patch?
 188                         //
 189                         final int n = nextLF(buf, c);
 190                         if (n >= end) {
 191                                 // Patches cannot be only one line long. This must be
 192                                 // trailing junk that we should ignore.
 193                                 //
 194                                 return end;
 195                         }
 196
 197                         if (n - c < 6) {
 198                                 // A valid header must be at least 6 bytes on the
 199                                 // first line, e.g. "--- a/b\n".
 200                                 //
 201                                 c = n;
 202                                 continue;
 203                         }
 204
 205                         if (match(buf, c, OLD_NAME) >= 0 && match(buf, n, NEW_NAME) >= 0) {
 206                                 // Probably a traditional patch. Ensure we have at least
 207                                 // a "@@ -0,0" smelling line next. We only check the "@@ -".
 208                                 //
 209                                 final int f = nextLF(buf, n);
 210                                 if (f >= end)
 211                                         return end;
 212                                 if (isHunkHdr(buf, f, end) == 1)
 213                                         return parseTraditionalPatch(buf, c, end);
 214                         }
 215
 216                         c = n;
 217                 }
 218                 return c;
 219         }
 220
 221         private int parseDiffGit(final byte[] buf, final int start, final int end) {
 222                 final FileHeader fh = new FileHeader(buf, start);
 223                 int ptr = fh.parseGitFileName(start + DIFF_GIT.length, end);
 224                 if (ptr < 0)
 225                         return skipFile(buf, start, end);
 226
 227                 ptr = fh.parseGitHeaders(ptr, end);
 228                 ptr = parseHunks(fh, ptr, end);
 229                 fh.endOffset = ptr;
 230                 addFile(fh);
 231                 return ptr;
 232         }
 233
 234         private int parseDiffCombined(final byte[] hdr, final byte[] buf,
 235                         final int start, final int end) {
 236                 final CombinedFileHeader fh = new CombinedFileHeader(buf, start);
 237                 int ptr = fh.parseGitFileName(start + hdr.length, end);
 238                 if (ptr < 0)
 239                         return skipFile(buf, start, end);
 240
 241                 ptr = fh.parseGitHeaders(ptr, end);
 242                 ptr = parseHunks(fh, ptr, end);
 243                 fh.endOffset = ptr;
 244                 addFile(fh);
 245                 return ptr;
 246         }
 247
 248         private int parseTraditionalPatch(final byte[] buf, final int start,
 249                         final int end) {
 250                 final FileHeader fh = new FileHeader(buf, start);
 251                 int ptr = fh.parseTraditionalHeaders(start, end);
 252                 ptr = parseHunks(fh, ptr, end);
 253                 fh.endOffset = ptr;
 254                 addFile(fh);
 255                 return ptr;
 256         }
 257
 258         private static int skipFile(final byte[] buf, int ptr, final int end) {
 259                 ptr = nextLF(buf, ptr);
 260                 if (match(buf, ptr, OLD_NAME) >= 0)
 261                         ptr = nextLF(buf, ptr);
 262                 return ptr;
 263         }
 264
 265         private int parseHunks(final FileHeader fh, int c, final int end) {
 266                 final byte[] buf = fh.buf;
 267                 while (c < end) {
 268                         // If we see a file header at this point, we have all of the
 269                         // hunks for our current file. We should stop and report back
 270                         // with this position so it can be parsed again later.
 271                         //
 272                         if (match(buf, c, DIFF_GIT) >= 0)
 273                                 break;
 274                         if (match(buf, c, DIFF_CC) >= 0)
 275                                 break;
 276                         if (match(buf, c, DIFF_COMBINED) >= 0)
 277                                 break;
 278                         if (match(buf, c, OLD_NAME) >= 0)
 279                                 break;
 280                         if (match(buf, c, NEW_NAME) >= 0)
 281                                 break;
 282
 283                         if (isHunkHdr(buf, c, end) == fh.getParentCount()) {
 284                                 final HunkHeader h = fh.newHunkHeader(c);
 285                                 h.parseHeader(end);
 286                                 c = h.parseBody(this, end);
 287                                 h.endOffset = c;
 288                                 fh.addHunk(h);
 289                                 if (c < end) {
 290                                         switch (buf[c]) {
 291                                         case '@':
 292                                         case 'd':
 293                                         case '\n':
 294                                                 break;
 295                                         default:
 296                                                 if (match(buf, c, SIG_FOOTER) < 0)
 297                                                         warn(buf, c, "Unexpected hunk trailer");
 298                                         }
 299                                 }
 300                                 continue;
 301                         }
 302
 303                         final int eol = nextLF(buf, c);
 304                         if (fh.getHunks().isEmpty() && match(buf, c, GIT_BINARY) >= 0) {
 305                                 fh.patchType = FileHeader.PatchType.GIT_BINARY;
 306                                 return parseGitBinary(fh, eol, end);
 307                         }
 308
 309                         if (fh.getHunks().isEmpty() && BIN_TRAILER.length < eol - c
 310                                         && match(buf, eol - BIN_TRAILER.length, BIN_TRAILER) >= 0
 311                                         && matchAny(buf, c, BIN_HEADERS)) {
 312                                 // The patch is a binary file diff, with no deltas.
 313                                 //
 314                                 fh.patchType = FileHeader.PatchType.BINARY;
 315                                 return eol;
 316                         }
 317
 318                         // Skip this line and move to the next. Its probably garbage
 319                         // after the last hunk of a file.
 320                         //
 321                         c = eol;
 322                 }
 323
 324                 if (fh.getHunks().isEmpty()
 325                                 && fh.getPatchType() == FileHeader.PatchType.UNIFIED
 326                                 && !fh.hasMetaDataChanges()) {
 327                         // Hmm, an empty patch? If there is no metadata here we
 328                         // really have a binary patch that we didn't notice above.
 329                         //
 330                         fh.patchType = FileHeader.PatchType.BINARY;
 331                 }
 332
 333                 return c;
 334         }
 335
 336         private int parseGitBinary(final FileHeader fh, int c, final int end) {
 337                 final BinaryHunk postImage = new BinaryHunk(fh, c);
 338                 final int nEnd = postImage.parseHunk(c, end);
 339                 if (nEnd < 0) {
 340                         // Not a binary hunk.
 341                         //
 342                         error(fh.buf, c, "Missing forward-image in GIT binary patch");
 343                         return c;
 344                 }
 345                 c = nEnd;
 346                 postImage.endOffset = c;
 347                 fh.forwardBinaryHunk = postImage;
 348
 349                 final BinaryHunk preImage = new BinaryHunk(fh, c);
 350                 final int oEnd = preImage.parseHunk(c, end);
 351                 if (oEnd >= 0) {
 352                         c = oEnd;
 353                         preImage.endOffset = c;
 354                         fh.reverseBinaryHunk = preImage;
 355                 }
 356
 357                 return c;
 358         }
 359
 360         void warn(final byte[] buf, final int ptr, final String msg) {
 361                 addError(new FormatError(buf, ptr, FormatError.Severity.WARNING, msg));
 362         }
 363
 364         void error(final byte[] buf, final int ptr, final String msg) {
 365                 addError(new FormatError(buf, ptr, FormatError.Severity.ERROR, msg));
 366         }
 367
 368         private static boolean matchAny(final byte[] buf, final int c,
 369                         final byte[][] srcs) {
 370                 for (final byte[] s : srcs) {
 371                         if (match(buf, c, s) >= 0)
 372                                 return true;
 373                 }
 374                 return false;
 375         }
 376 }