org.spearce.jgit/src/org/spearce/jgit/patch/Patch.java

   1 /*
   2  * Copyright (C) 2008, Google Inc.
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or
   7  * without modification, are permitted provided that the following
   8  * conditions are met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above
  14  *   copyright notice, this list of conditions and the following
  15  *   disclaimer in the documentation and/or other materials provided
  16  *   with the distribution.
  17  *
  18  * - Neither the name of the Git Development Community nor the
  19  *   names of its contributors may be used to endorse or promote
  20  *   products derived from this software without specific prior
  21  *   written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  24  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  25  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  28  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  30  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  35  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 package org.spearce.jgit.patch;
  39
  40 import static org.spearce.jgit.lib.Constants.encodeASCII;
  41 import static org.spearce.jgit.patch.FileHeader.HUNK_HDR;
  42 import static org.spearce.jgit.patch.FileHeader.NEW_NAME;
  43 import static org.spearce.jgit.patch.FileHeader.OLD_NAME;
  44 import static org.spearce.jgit.util.RawParseUtils.match;
  45 import static org.spearce.jgit.util.RawParseUtils.nextLF;
  46
  47 import java.io.IOException;
  48 import java.io.InputStream;
  49 import java.util.ArrayList;
  50 import java.util.List;
  51
  52 import org.spearce.jgit.util.TemporaryBuffer;
  53
  54 /** A parsed collection of {@link FileHeader}s from a unified diff patch file */
  55 public class Patch {
  56         private static final byte[] DIFF_GIT = encodeASCII("diff --git ");
  57
  58         private static final byte[] DIFF_CC = encodeASCII("diff --cc ");
  59
  60         static final byte[] SIG_FOOTER = encodeASCII("-- \n");
  61
  62         /** The files, in the order they were parsed out of the input. */
  63         private final List<FileHeader> files;
  64
  65         /** Create an empty patch. */
  66         public Patch() {
  67                 files = new ArrayList<FileHeader>();
  68         }
  69
  70         /**
  71          * Add a single file to this patch.
  72          * <p>
  73          * Typically files should be added by parsing the text through one of this
  74          * class's parse methods.
  75          *
  76          * @param fh
  77          *            the header of the file.
  78          */
  79         public void addFile(final FileHeader fh) {
  80                 files.add(fh);
  81         }
  82
  83         /** @return list of files described in the patch, in occurrence order. */
  84         public List<FileHeader> getFiles() {
  85                 return files;
  86         }
  87
  88         /**
  89          * Parse a patch received from an InputStream.
  90          * <p>
  91          * Multiple parse calls on the same instance will concatenate the patch
  92          * data, but each parse input must start with a valid file header (don't
  93          * split a single file across parse calls).
  94          *
  95          * @param is
  96          *            the stream to read the patch data from. The stream is read
  97          *            until EOF is reached.
  98          * @throws IOException
  99          *             there was an error reading from the input stream.
 100          */
 101         public void parse(final InputStream is) throws IOException {
 102                 final byte[] buf = readFully(is);
 103                 parse(buf, 0, buf.length);
 104         }
 105
 106         private static byte[] readFully(final InputStream is) throws IOException {
 107                 final TemporaryBuffer b = new TemporaryBuffer();
 108                 b.copy(is);
 109                 final byte[] buf = b.toByteArray();
 110                 return buf;
 111         }
 112
 113         /**
 114          * Parse a patch stored in a byte[].
 115          * <p>
 116          * Multiple parse calls on the same instance will concatenate the patch
 117          * data, but each parse input must start with a valid file header (don't
 118          * split a single file across parse calls).
 119          *
 120          * @param buf
 121          *            the buffer to parse.
 122          * @param ptr
 123          *            starting position to parse from.
 124          * @param end
 125          *            1 past the last position to end parsing. The total length to
 126          *            be parsed is <code>end - ptr</code>.
 127          */
 128         public void parse(final byte[] buf, int ptr, final int end) {
 129                 while (ptr < end)
 130                         ptr = parseFile(buf, ptr);
 131         }
 132
 133         private int parseFile(final byte[] buf, int c) {
 134                 final int sz = buf.length;
 135                 while (c < sz) {
 136                         if (match(buf, c, HUNK_HDR) >= 0) {
 137                                 // If we find a disconnected hunk header we might
 138                                 // have missed a file header previously. The hunk
 139                                 // isn't valid without knowing where it comes from.
 140                                 //
 141
 142                                 // TODO handle a disconnected hunk fragment
 143                                 c = nextLF(buf, c);
 144                                 continue;
 145                         }
 146
 147                         // Valid git style patch?
 148                         //
 149                         if (match(buf, c, DIFF_GIT) >= 0)
 150                                 return parseDiffGit(buf, c);
 151                         if (match(buf, c, DIFF_CC) >= 0)
 152                                 return parseDiffCC(buf, c);
 153
 154                         // Junk between files? Leading junk? Traditional
 155                         // (non-git generated) patch?
 156                         //
 157                         final int n = nextLF(buf, c);
 158                         if (n >= sz) {
 159                                 // Patches cannot be only one line long. This must be
 160                                 // trailing junk that we should ignore.
 161                                 //
 162                                 return sz;
 163                         }
 164
 165                         if (n - c < 6) {
 166                                 // A valid header must be at least 6 bytes on the
 167                                 // first line, e.g. "--- a/b\n".
 168                                 //
 169                                 c = n;
 170                                 continue;
 171                         }
 172
 173                         if (match(buf, c, OLD_NAME) >= 0 && match(buf, n, NEW_NAME) >= 0) {
 174                                 // Probably a traditional patch. Ensure we have at least
 175                                 // a "@@ -0,0" smelling line next. We only check the "@@ -".
 176                                 //
 177                                 final int f = nextLF(buf, n);
 178                                 if (f >= sz)
 179                                         return sz;
 180                                 if (match(buf, f, HUNK_HDR) >= 0)
 181                                         return parseTraditionalPatch(buf, c);
 182                         }
 183
 184                         c = n;
 185                 }
 186                 return c;
 187         }
 188
 189         private int parseDiffGit(final byte[] buf, final int startOffset) {
 190                 final FileHeader fh = new FileHeader(buf, startOffset);
 191                 int ptr = fh.parseGitFileName(startOffset + DIFF_GIT.length);
 192                 if (ptr < 0)
 193                         return skipFile(buf, startOffset);
 194
 195                 ptr = fh.parseGitHeaders(ptr);
 196                 ptr = parseHunks(fh, buf, ptr);
 197                 fh.endOffset = ptr;
 198                 addFile(fh);
 199                 return ptr;
 200         }
 201
 202         private int parseDiffCC(final byte[] buf, final int startOffset) {
 203                 final FileHeader fh = new FileHeader(buf, startOffset);
 204                 int ptr = fh.parseGitFileName(startOffset + DIFF_CC.length);
 205                 if (ptr < 0)
 206                         return skipFile(buf, startOffset);
 207
 208                 // TODO Support parsing diff --cc headers
 209                 // TODO parse diff --cc hunks
 210                 fh.endOffset = ptr;
 211                 addFile(fh);
 212                 return ptr;
 213         }
 214
 215         private int parseTraditionalPatch(final byte[] buf, final int startOffset) {
 216                 final FileHeader fh = new FileHeader(buf, startOffset);
 217                 int ptr = fh.parseTraditionalHeaders(startOffset);
 218                 ptr = parseHunks(fh, buf, ptr);
 219                 fh.endOffset = ptr;
 220                 addFile(fh);
 221                 return ptr;
 222         }
 223
 224         private static int skipFile(final byte[] buf, int ptr) {
 225                 ptr = nextLF(buf, ptr);
 226                 if (match(buf, ptr, OLD_NAME) >= 0)
 227                         ptr = nextLF(buf, ptr);
 228                 return ptr;
 229         }
 230
 231         private int parseHunks(final FileHeader fh, final byte[] buf, int c) {
 232                 final int sz = buf.length;
 233                 while (c < sz) {
 234                         // If we see a file header at this point, we have all of the
 235                         // hunks for our current file. We should stop and report back
 236                         // with this position so it can be parsed again later.
 237                         //
 238                         if (match(buf, c, DIFF_GIT) >= 0)
 239                                 return c;
 240                         if (match(buf, c, DIFF_CC) >= 0)
 241                                 return c;
 242                         if (match(buf, c, OLD_NAME) >= 0)
 243                                 return c;
 244                         if (match(buf, c, NEW_NAME) >= 0)
 245                                 return c;
 246
 247                         if (match(buf, c, HUNK_HDR) >= 0) {
 248                                 final HunkHeader h = new HunkHeader(fh, c);
 249                                 h.parseHeader();
 250                                 c = h.parseBody();
 251                                 h.endOffset = c;
 252                                 fh.addHunk(h);
 253                                 if (c < sz && buf[c] != '@' && buf[c] != 'd'
 254                                                 && match(buf, c, SIG_FOOTER) < 0) {
 255                                         // TODO report on noise between hunks, might be an error
 256                                 }
 257                                 continue;
 258                         }
 259
 260                         // Skip this line and move to the next. Its probably garbage
 261                         // after the last hunk of a file.
 262                         //
 263                         c = nextLF(buf, c);
 264                 }
 265                 return c;
 266         }
 267 }