org.spearce.jgit/src/org/spearce/jgit/patch/Patch.java

   1 /*
   2  * Copyright (C) 2008, Google Inc.
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or
   7  * without modification, are permitted provided that the following
   8  * conditions are met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above
  14  *   copyright notice, this list of conditions and the following
  15  *   disclaimer in the documentation and/or other materials provided
  16  *   with the distribution.
  17  *
  18  * - Neither the name of the Git Development Community nor the
  19  *   names of its contributors may be used to endorse or promote
  20  *   products derived from this software without specific prior
  21  *   written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  24  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  25  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  28  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  30  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  35  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 package org.spearce.jgit.patch;
  39
  40 import static org.spearce.jgit.lib.Constants.encodeASCII;
  41 import static org.spearce.jgit.patch.FileHeader.isHunkHdr;
  42 import static org.spearce.jgit.patch.FileHeader.NEW_NAME;
  43 import static org.spearce.jgit.patch.FileHeader.OLD_NAME;
  44 import static org.spearce.jgit.util.RawParseUtils.match;
  45 import static org.spearce.jgit.util.RawParseUtils.nextLF;
  46
  47 import java.io.IOException;
  48 import java.io.InputStream;
  49 import java.util.ArrayList;
  50 import java.util.List;
  51
  52 import org.spearce.jgit.util.TemporaryBuffer;
  53
  54 /** A parsed collection of {@link FileHeader}s from a unified diff patch file */
  55 public class Patch {
  56         private static final byte[] DIFF_GIT = encodeASCII("diff --git ");
  57
  58         private static final byte[] DIFF_CC = encodeASCII("diff --cc ");
  59
  60         private static final byte[][] BIN_HEADERS = new byte[][] {
  61                         encodeASCII("Binary files "), encodeASCII("Files "), };
  62
  63         private static final byte[] BIN_TRAILER = encodeASCII(" differ\n");
  64
  65         private static final byte[] GIT_BINARY = encodeASCII("GIT binary patch\n");
  66
  67         static final byte[] SIG_FOOTER = encodeASCII("-- \n");
  68
  69         /** The files, in the order they were parsed out of the input. */
  70         private final List<FileHeader> files;
  71
  72         /** Formatting errors, if any were identified. */
  73         private final List<FormatError> errors;
  74
  75         /** Create an empty patch. */
  76         public Patch() {
  77                 files = new ArrayList<FileHeader>();
  78                 errors = new ArrayList<FormatError>(0);
  79         }
  80
  81         /**
  82          * Add a single file to this patch.
  83          * <p>
  84          * Typically files should be added by parsing the text through one of this
  85          * class's parse methods.
  86          *
  87          * @param fh
  88          *            the header of the file.
  89          */
  90         public void addFile(final FileHeader fh) {
  91                 files.add(fh);
  92         }
  93
  94         /** @return list of files described in the patch, in occurrence order. */
  95         public List<FileHeader> getFiles() {
  96                 return files;
  97         }
  98
  99         /**
 100          * Add a formatting error to this patch script.
 101          *
 102          * @param err
 103          *            the error description.
 104          */
 105         public void addError(final FormatError err) {
 106                 errors.add(err);
 107         }
 108
 109         /** @return collection of formatting errors, if any. */
 110         public List<FormatError> getErrors() {
 111                 return errors;
 112         }
 113
 114         /**
 115          * Parse a patch received from an InputStream.
 116          * <p>
 117          * Multiple parse calls on the same instance will concatenate the patch
 118          * data, but each parse input must start with a valid file header (don't
 119          * split a single file across parse calls).
 120          *
 121          * @param is
 122          *            the stream to read the patch data from. The stream is read
 123          *            until EOF is reached.
 124          * @throws IOException
 125          *             there was an error reading from the input stream.
 126          */
 127         public void parse(final InputStream is) throws IOException {
 128                 final byte[] buf = readFully(is);
 129                 parse(buf, 0, buf.length);
 130         }
 131
 132         private static byte[] readFully(final InputStream is) throws IOException {
 133                 final TemporaryBuffer b = new TemporaryBuffer();
 134                 try {
 135                         b.copy(is);
 136                         b.close();
 137                         return b.toByteArray();
 138                 } finally {
 139                         b.destroy();
 140                 }
 141         }
 142
 143         /**
 144          * Parse a patch stored in a byte[].
 145          * <p>
 146          * Multiple parse calls on the same instance will concatenate the patch
 147          * data, but each parse input must start with a valid file header (don't
 148          * split a single file across parse calls).
 149          *
 150          * @param buf
 151          *            the buffer to parse.
 152          * @param ptr
 153          *            starting position to parse from.
 154          * @param end
 155          *            1 past the last position to end parsing. The total length to
 156          *            be parsed is <code>end - ptr</code>.
 157          */
 158         public void parse(final byte[] buf, int ptr, final int end) {
 159                 while (ptr < end)
 160                         ptr = parseFile(buf, ptr, end);
 161         }
 162
 163         private int parseFile(final byte[] buf, int c, final int end) {
 164                 while (c < end) {
 165                         if (isHunkHdr(buf, c, end) >= 1) {
 166                                 // If we find a disconnected hunk header we might
 167                                 // have missed a file header previously. The hunk
 168                                 // isn't valid without knowing where it comes from.
 169                                 //
 170                                 error(buf, c, "Hunk disconnected from file");
 171                                 c = nextLF(buf, c);
 172                                 continue;
 173                         }
 174
 175                         // Valid git style patch?
 176                         //
 177                         if (match(buf, c, DIFF_GIT) >= 0)
 178                                 return parseDiffGit(buf, c, end);
 179                         if (match(buf, c, DIFF_CC) >= 0)
 180                                 return parseDiffCC(buf, c, end);
 181
 182                         // Junk between files? Leading junk? Traditional
 183                         // (non-git generated) patch?
 184                         //
 185                         final int n = nextLF(buf, c);
 186                         if (n >= end) {
 187                                 // Patches cannot be only one line long. This must be
 188                                 // trailing junk that we should ignore.
 189                                 //
 190                                 return end;
 191                         }
 192
 193                         if (n - c < 6) {
 194                                 // A valid header must be at least 6 bytes on the
 195                                 // first line, e.g. "--- a/b\n".
 196                                 //
 197                                 c = n;
 198                                 continue;
 199                         }
 200
 201                         if (match(buf, c, OLD_NAME) >= 0 && match(buf, n, NEW_NAME) >= 0) {
 202                                 // Probably a traditional patch. Ensure we have at least
 203                                 // a "@@ -0,0" smelling line next. We only check the "@@ -".
 204                                 //
 205                                 final int f = nextLF(buf, n);
 206                                 if (f >= end)
 207                                         return end;
 208                                 if (isHunkHdr(buf, f, end) == 1)
 209                                         return parseTraditionalPatch(buf, c, end);
 210                         }
 211
 212                         c = n;
 213                 }
 214                 return c;
 215         }
 216
 217         private int parseDiffGit(final byte[] buf, final int start, final int end) {
 218                 final FileHeader fh = new FileHeader(buf, start);
 219                 int ptr = fh.parseGitFileName(start + DIFF_GIT.length, end);
 220                 if (ptr < 0)
 221                         return skipFile(buf, start, end);
 222
 223                 ptr = fh.parseGitHeaders(ptr, end);
 224                 ptr = parseHunks(fh, ptr, end);
 225                 fh.endOffset = ptr;
 226                 addFile(fh);
 227                 return ptr;
 228         }
 229
 230         private int parseDiffCC(final byte[] buf, final int start, final int end) {
 231                 final FileHeader fh = new FileHeader(buf, start);
 232                 int ptr = fh.parseGitFileName(start + DIFF_CC.length, end);
 233                 if (ptr < 0)
 234                         return skipFile(buf, start, end);
 235
 236                 // TODO Support parsing diff --cc headers
 237                 // TODO parse diff --cc hunks
 238                 warn(buf, start, "diff --cc format not supported");
 239                 fh.endOffset = ptr;
 240                 addFile(fh);
 241                 return ptr;
 242         }
 243
 244         private int parseTraditionalPatch(final byte[] buf, final int start,
 245                         final int end) {
 246                 final FileHeader fh = new FileHeader(buf, start);
 247                 int ptr = fh.parseTraditionalHeaders(start, end);
 248                 ptr = parseHunks(fh, ptr, end);
 249                 fh.endOffset = ptr;
 250                 addFile(fh);
 251                 return ptr;
 252         }
 253
 254         private static int skipFile(final byte[] buf, int ptr, final int end) {
 255                 ptr = nextLF(buf, ptr);
 256                 if (match(buf, ptr, OLD_NAME) >= 0)
 257                         ptr = nextLF(buf, ptr);
 258                 return ptr;
 259         }
 260
 261         private int parseHunks(final FileHeader fh, int c, final int end) {
 262                 final byte[] buf = fh.buf;
 263                 while (c < end) {
 264                         // If we see a file header at this point, we have all of the
 265                         // hunks for our current file. We should stop and report back
 266                         // with this position so it can be parsed again later.
 267                         //
 268                         if (match(buf, c, DIFF_GIT) >= 0)
 269                                 break;
 270                         if (match(buf, c, DIFF_CC) >= 0)
 271                                 break;
 272                         if (match(buf, c, OLD_NAME) >= 0)
 273                                 break;
 274                         if (match(buf, c, NEW_NAME) >= 0)
 275                                 break;
 276
 277                         if (isHunkHdr(buf, c, end) == 1) {
 278                                 final HunkHeader h = new HunkHeader(fh, c);
 279                                 h.parseHeader(end);
 280                                 c = h.parseBody(this, end);
 281                                 h.endOffset = c;
 282                                 fh.addHunk(h);
 283                                 if (c < end && buf[c] != '@' && buf[c] != 'd'
 284                                                 && match(buf, c, SIG_FOOTER) < 0) {
 285                                         warn(buf, c, "Unexpected hunk trailer");
 286                                 }
 287                                 continue;
 288                         }
 289
 290                         final int eol = nextLF(buf, c);
 291                         if (fh.getHunks().isEmpty() && match(buf, c, GIT_BINARY) >= 0) {
 292                                 fh.patchType = FileHeader.PatchType.GIT_BINARY;
 293                                 return parseGitBinary(fh, eol, end);
 294                         }
 295
 296                         if (fh.getHunks().isEmpty() && BIN_TRAILER.length < eol - c
 297                                         && match(buf, eol - BIN_TRAILER.length, BIN_TRAILER) >= 0
 298                                         && matchAny(buf, c, BIN_HEADERS)) {
 299                                 // The patch is a binary file diff, with no deltas.
 300                                 //
 301                                 fh.patchType = FileHeader.PatchType.BINARY;
 302                                 return eol;
 303                         }
 304
 305                         // Skip this line and move to the next. Its probably garbage
 306                         // after the last hunk of a file.
 307                         //
 308                         c = eol;
 309                 }
 310
 311                 if (fh.getHunks().isEmpty()
 312                                 && fh.getPatchType() == FileHeader.PatchType.UNIFIED
 313                                 && !fh.hasMetaDataChanges()) {
 314                         // Hmm, an empty patch? If there is no metadata here we
 315                         // really have a binary patch that we didn't notice above.
 316                         //
 317                         fh.patchType = FileHeader.PatchType.BINARY;
 318                 }
 319
 320                 return c;
 321         }
 322
 323         private int parseGitBinary(final FileHeader fh, int c, final int end) {
 324                 final BinaryHunk postImage = new BinaryHunk(fh, c);
 325                 final int nEnd = postImage.parseHunk(c, end);
 326                 if (nEnd < 0) {
 327                         // Not a binary hunk.
 328                         //
 329                         error(fh.buf, c, "Missing forward-image in GIT binary patch");
 330                         return c;
 331                 }
 332                 c = nEnd;
 333                 postImage.endOffset = c;
 334                 fh.forwardBinaryHunk = postImage;
 335
 336                 final BinaryHunk preImage = new BinaryHunk(fh, c);
 337                 final int oEnd = preImage.parseHunk(c, end);
 338                 if (oEnd >= 0) {
 339                         c = oEnd;
 340                         preImage.endOffset = c;
 341                         fh.reverseBinaryHunk = preImage;
 342                 }
 343
 344                 return c;
 345         }
 346
 347         void warn(final byte[] buf, final int ptr, final String msg) {
 348                 addError(new FormatError(buf, ptr, FormatError.Severity.WARNING, msg));
 349         }
 350
 351         void error(final byte[] buf, final int ptr, final String msg) {
 352                 addError(new FormatError(buf, ptr, FormatError.Severity.ERROR, msg));
 353         }
 354
 355         private static boolean matchAny(final byte[] buf, final int c,
 356                         final byte[][] srcs) {
 357                 for (final byte[] s : srcs) {
 358                         if (match(buf, c, s) >= 0)
 359                                 return true;
 360                 }
 361                 return false;
 362         }
 363 }