org.spearce.jgit/src/org/spearce/jgit/patch/Patch.java

   1 /*
   2  * Copyright (C) 2008, Google Inc.
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or
   7  * without modification, are permitted provided that the following
   8  * conditions are met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above
  14  *   copyright notice, this list of conditions and the following
  15  *   disclaimer in the documentation and/or other materials provided
  16  *   with the distribution.
  17  *
  18  * - Neither the name of the Git Development Community nor the
  19  *   names of its contributors may be used to endorse or promote
  20  *   products derived from this software without specific prior
  21  *   written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  24  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  25  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  28  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  30  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  35  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 package org.spearce.jgit.patch;
  39
  40 import static org.spearce.jgit.lib.Constants.encodeASCII;
  41 import static org.spearce.jgit.patch.FileHeader.HUNK_HDR;
  42 import static org.spearce.jgit.patch.FileHeader.NEW_NAME;
  43 import static org.spearce.jgit.patch.FileHeader.OLD_NAME;
  44 import static org.spearce.jgit.util.RawParseUtils.match;
  45 import static org.spearce.jgit.util.RawParseUtils.nextLF;
  46
  47 import java.io.IOException;
  48 import java.io.InputStream;
  49 import java.util.ArrayList;
  50 import java.util.List;
  51
  52 import org.spearce.jgit.util.TemporaryBuffer;
  53
  54 /** A parsed collection of {@link FileHeader}s from a unified diff patch file */
  55 public class Patch {
  56         private static final byte[] DIFF_GIT = encodeASCII("diff --git ");
  57
  58         private static final byte[] DIFF_CC = encodeASCII("diff --cc ");
  59
  60         private static final byte[][] BIN_HEADERS = new byte[][] {
  61                         encodeASCII("Binary files "), encodeASCII("Files "), };
  62
  63         private static final byte[] BIN_TRAILER = encodeASCII(" differ\n");
  64
  65         static final byte[] SIG_FOOTER = encodeASCII("-- \n");
  66
  67         /** The files, in the order they were parsed out of the input. */
  68         private final List<FileHeader> files;
  69
  70         /** Create an empty patch. */
  71         public Patch() {
  72                 files = new ArrayList<FileHeader>();
  73         }
  74
  75         /**
  76          * Add a single file to this patch.
  77          * <p>
  78          * Typically files should be added by parsing the text through one of this
  79          * class's parse methods.
  80          *
  81          * @param fh
  82          *            the header of the file.
  83          */
  84         public void addFile(final FileHeader fh) {
  85                 files.add(fh);
  86         }
  87
  88         /** @return list of files described in the patch, in occurrence order. */
  89         public List<FileHeader> getFiles() {
  90                 return files;
  91         }
  92
  93         /**
  94          * Parse a patch received from an InputStream.
  95          * <p>
  96          * Multiple parse calls on the same instance will concatenate the patch
  97          * data, but each parse input must start with a valid file header (don't
  98          * split a single file across parse calls).
  99          *
 100          * @param is
 101          *            the stream to read the patch data from. The stream is read
 102          *            until EOF is reached.
 103          * @throws IOException
 104          *             there was an error reading from the input stream.
 105          */
 106         public void parse(final InputStream is) throws IOException {
 107                 final byte[] buf = readFully(is);
 108                 parse(buf, 0, buf.length);
 109         }
 110
 111         private static byte[] readFully(final InputStream is) throws IOException {
 112                 final TemporaryBuffer b = new TemporaryBuffer();
 113                 try {
 114                         b.copy(is);
 115                         b.close();
 116                         return b.toByteArray();
 117                 } finally {
 118                         b.destroy();
 119                 }
 120         }
 121
 122         /**
 123          * Parse a patch stored in a byte[].
 124          * <p>
 125          * Multiple parse calls on the same instance will concatenate the patch
 126          * data, but each parse input must start with a valid file header (don't
 127          * split a single file across parse calls).
 128          *
 129          * @param buf
 130          *            the buffer to parse.
 131          * @param ptr
 132          *            starting position to parse from.
 133          * @param end
 134          *            1 past the last position to end parsing. The total length to
 135          *            be parsed is <code>end - ptr</code>.
 136          */
 137         public void parse(final byte[] buf, int ptr, final int end) {
 138                 while (ptr < end)
 139                         ptr = parseFile(buf, ptr);
 140         }
 141
 142         private int parseFile(final byte[] buf, int c) {
 143                 final int sz = buf.length;
 144                 while (c < sz) {
 145                         if (match(buf, c, HUNK_HDR) >= 0) {
 146                                 // If we find a disconnected hunk header we might
 147                                 // have missed a file header previously. The hunk
 148                                 // isn't valid without knowing where it comes from.
 149                                 //
 150
 151                                 // TODO handle a disconnected hunk fragment
 152                                 c = nextLF(buf, c);
 153                                 continue;
 154                         }
 155
 156                         // Valid git style patch?
 157                         //
 158                         if (match(buf, c, DIFF_GIT) >= 0)
 159                                 return parseDiffGit(buf, c);
 160                         if (match(buf, c, DIFF_CC) >= 0)
 161                                 return parseDiffCC(buf, c);
 162
 163                         // Junk between files? Leading junk? Traditional
 164                         // (non-git generated) patch?
 165                         //
 166                         final int n = nextLF(buf, c);
 167                         if (n >= sz) {
 168                                 // Patches cannot be only one line long. This must be
 169                                 // trailing junk that we should ignore.
 170                                 //
 171                                 return sz;
 172                         }
 173
 174                         if (n - c < 6) {
 175                                 // A valid header must be at least 6 bytes on the
 176                                 // first line, e.g. "--- a/b\n".
 177                                 //
 178                                 c = n;
 179                                 continue;
 180                         }
 181
 182                         if (match(buf, c, OLD_NAME) >= 0 && match(buf, n, NEW_NAME) >= 0) {
 183                                 // Probably a traditional patch. Ensure we have at least
 184                                 // a "@@ -0,0" smelling line next. We only check the "@@ -".
 185                                 //
 186                                 final int f = nextLF(buf, n);
 187                                 if (f >= sz)
 188                                         return sz;
 189                                 if (match(buf, f, HUNK_HDR) >= 0)
 190                                         return parseTraditionalPatch(buf, c);
 191                         }
 192
 193                         c = n;
 194                 }
 195                 return c;
 196         }
 197
 198         private int parseDiffGit(final byte[] buf, final int startOffset) {
 199                 final FileHeader fh = new FileHeader(buf, startOffset);
 200                 int ptr = fh.parseGitFileName(startOffset + DIFF_GIT.length);
 201                 if (ptr < 0)
 202                         return skipFile(buf, startOffset);
 203
 204                 ptr = fh.parseGitHeaders(ptr);
 205                 ptr = parseHunks(fh, buf, ptr);
 206                 fh.endOffset = ptr;
 207                 addFile(fh);
 208                 return ptr;
 209         }
 210
 211         private int parseDiffCC(final byte[] buf, final int startOffset) {
 212                 final FileHeader fh = new FileHeader(buf, startOffset);
 213                 int ptr = fh.parseGitFileName(startOffset + DIFF_CC.length);
 214                 if (ptr < 0)
 215                         return skipFile(buf, startOffset);
 216
 217                 // TODO Support parsing diff --cc headers
 218                 // TODO parse diff --cc hunks
 219                 fh.endOffset = ptr;
 220                 addFile(fh);
 221                 return ptr;
 222         }
 223
 224         private int parseTraditionalPatch(final byte[] buf, final int startOffset) {
 225                 final FileHeader fh = new FileHeader(buf, startOffset);
 226                 int ptr = fh.parseTraditionalHeaders(startOffset);
 227                 ptr = parseHunks(fh, buf, ptr);
 228                 fh.endOffset = ptr;
 229                 addFile(fh);
 230                 return ptr;
 231         }
 232
 233         private static int skipFile(final byte[] buf, int ptr) {
 234                 ptr = nextLF(buf, ptr);
 235                 if (match(buf, ptr, OLD_NAME) >= 0)
 236                         ptr = nextLF(buf, ptr);
 237                 return ptr;
 238         }
 239
 240         private int parseHunks(final FileHeader fh, final byte[] buf, int c) {
 241                 final int sz = buf.length;
 242                 while (c < sz) {
 243                         // If we see a file header at this point, we have all of the
 244                         // hunks for our current file. We should stop and report back
 245                         // with this position so it can be parsed again later.
 246                         //
 247                         if (match(buf, c, DIFF_GIT) >= 0)
 248                                 break;
 249                         if (match(buf, c, DIFF_CC) >= 0)
 250                                 break;
 251                         if (match(buf, c, OLD_NAME) >= 0)
 252                                 break;
 253                         if (match(buf, c, NEW_NAME) >= 0)
 254                                 break;
 255
 256                         if (match(buf, c, HUNK_HDR) >= 0) {
 257                                 final HunkHeader h = new HunkHeader(fh, c);
 258                                 h.parseHeader();
 259                                 c = h.parseBody();
 260                                 h.endOffset = c;
 261                                 fh.addHunk(h);
 262                                 if (c < sz && buf[c] != '@' && buf[c] != 'd'
 263                                                 && match(buf, c, SIG_FOOTER) < 0) {
 264                                         // TODO report on noise between hunks, might be an error
 265                                 }
 266                                 continue;
 267                         }
 268
 269                         final int eol = nextLF(buf, c);
 270                         if (fh.getHunks().isEmpty() && BIN_TRAILER.length < eol - c
 271                                         && match(buf, eol - BIN_TRAILER.length, BIN_TRAILER) >= 0
 272                                         && matchAny(buf, c, BIN_HEADERS)) {
 273                                 // The patch is a binary file diff, with no deltas.
 274                                 //
 275                                 fh.patchType = FileHeader.PatchType.BINARY;
 276                                 return eol;
 277                         }
 278
 279                         // Skip this line and move to the next. Its probably garbage
 280                         // after the last hunk of a file.
 281                         //
 282                         c = eol;
 283                 }
 284
 285                 if (fh.getHunks().isEmpty()
 286                                 && fh.getPatchType() == FileHeader.PatchType.UNIFIED
 287                                 && !fh.hasMetaDataChanges()) {
 288                         // Hmm, an empty patch? If there is no metadata here we
 289                         // really have a binary patch that we didn't notice above.
 290                         //
 291                         fh.patchType = FileHeader.PatchType.BINARY;
 292                 }
 293
 294                 return c;
 295         }
 296
 297         private static boolean matchAny(final byte[] buf, final int c,
 298                         final byte[][] srcs) {
 299                 for (final byte[] s : srcs) {
 300                         if (match(buf, c, s) >= 0)
 301                                 return true;
 302                 }
 303                 return false;
 304         }
 305 }