2 * Copyright (C) 2008, Google Inc.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org
.spearce
.jgit
.patch
;
40 import static org
.spearce
.jgit
.lib
.Constants
.encodeASCII
;
41 import static org
.spearce
.jgit
.patch
.FileHeader
.HUNK_HDR
;
42 import static org
.spearce
.jgit
.patch
.FileHeader
.NEW_NAME
;
43 import static org
.spearce
.jgit
.patch
.FileHeader
.OLD_NAME
;
44 import static org
.spearce
.jgit
.util
.RawParseUtils
.match
;
45 import static org
.spearce
.jgit
.util
.RawParseUtils
.nextLF
;
47 import java
.io
.IOException
;
48 import java
.io
.InputStream
;
49 import java
.util
.ArrayList
;
50 import java
.util
.List
;
52 import org
.spearce
.jgit
.util
.TemporaryBuffer
;
54 /** A parsed collection of {@link FileHeader}s from a unified diff patch file */
56 private static final byte[] DIFF_GIT
= encodeASCII("diff --git ");
58 private static final byte[] DIFF_CC
= encodeASCII("diff --cc ");
60 static final byte[] SIG_FOOTER
= encodeASCII("-- \n");
62 /** The files, in the order they were parsed out of the input. */
63 private final List
<FileHeader
> files
;
65 /** Create an empty patch. */
67 files
= new ArrayList
<FileHeader
>();
71 * Add a single file to this patch.
73 * Typically files should be added by parsing the text through one of this
74 * class's parse methods.
77 * the header of the file.
79 public void addFile(final FileHeader fh
) {
83 /** @return list of files described in the patch, in occurrence order. */
84 public List
<FileHeader
> getFiles() {
89 * Parse a patch received from an InputStream.
91 * Multiple parse calls on the same instance will concatenate the patch
92 * data, but each parse input must start with a valid file header (don't
93 * split a single file across parse calls).
96 * the stream to read the patch data from. The stream is read
97 * until EOF is reached.
99 * there was an error reading from the input stream.
101 public void parse(final InputStream is
) throws IOException
{
102 final byte[] buf
= readFully(is
);
103 parse(buf
, 0, buf
.length
);
106 private static byte[] readFully(final InputStream is
) throws IOException
{
107 final TemporaryBuffer b
= new TemporaryBuffer();
109 final byte[] buf
= b
.toByteArray();
114 * Parse a patch stored in a byte[].
116 * Multiple parse calls on the same instance will concatenate the patch
117 * data, but each parse input must start with a valid file header (don't
118 * split a single file across parse calls).
121 * the buffer to parse.
123 * starting position to parse from.
125 * 1 past the last position to end parsing. The total length to
126 * be parsed is <code>end - ptr</code>.
128 public void parse(final byte[] buf
, int ptr
, final int end
) {
130 ptr
= parseFile(buf
, ptr
);
133 private int parseFile(final byte[] buf
, int c
) {
134 final int sz
= buf
.length
;
136 if (match(buf
, c
, HUNK_HDR
) >= 0) {
137 // If we find a disconnected hunk header we might
138 // have missed a file header previously. The hunk
139 // isn't valid without knowing where it comes from.
142 // TODO handle a disconnected hunk fragment
147 // Valid git style patch?
149 if (match(buf
, c
, DIFF_GIT
) >= 0)
150 return parseDiffGit(buf
, c
);
151 if (match(buf
, c
, DIFF_CC
) >= 0)
152 return parseDiffCC(buf
, c
);
154 // Junk between files? Leading junk? Traditional
155 // (non-git generated) patch?
157 final int n
= nextLF(buf
, c
);
159 // Patches cannot be only one line long. This must be
160 // trailing junk that we should ignore.
166 // A valid header must be at least 6 bytes on the
167 // first line, e.g. "--- a/b\n".
173 if (match(buf
, c
, OLD_NAME
) >= 0 && match(buf
, n
, NEW_NAME
) >= 0) {
174 // Probably a traditional patch. Ensure we have at least
175 // a "@@ -0,0" smelling line next. We only check the "@@ -".
177 final int f
= nextLF(buf
, n
);
180 if (match(buf
, f
, HUNK_HDR
) >= 0)
181 return parseTraditionalPatch(buf
, c
);
189 private int parseDiffGit(final byte[] buf
, final int startOffset
) {
190 final FileHeader fh
= new FileHeader(buf
, startOffset
);
191 int ptr
= fh
.parseGitFileName(startOffset
+ DIFF_GIT
.length
);
193 return skipFile(buf
, startOffset
);
195 ptr
= fh
.parseGitHeaders(ptr
);
196 ptr
= parseHunks(fh
, buf
, ptr
);
202 private int parseDiffCC(final byte[] buf
, final int startOffset
) {
203 final FileHeader fh
= new FileHeader(buf
, startOffset
);
204 int ptr
= fh
.parseGitFileName(startOffset
+ DIFF_CC
.length
);
206 return skipFile(buf
, startOffset
);
208 // TODO Support parsing diff --cc headers
209 // TODO parse diff --cc hunks
215 private int parseTraditionalPatch(final byte[] buf
, final int startOffset
) {
216 final FileHeader fh
= new FileHeader(buf
, startOffset
);
217 int ptr
= fh
.parseTraditionalHeaders(startOffset
);
218 ptr
= parseHunks(fh
, buf
, ptr
);
224 private static int skipFile(final byte[] buf
, int ptr
) {
225 ptr
= nextLF(buf
, ptr
);
226 if (match(buf
, ptr
, OLD_NAME
) >= 0)
227 ptr
= nextLF(buf
, ptr
);
231 private int parseHunks(final FileHeader fh
, final byte[] buf
, int c
) {
232 final int sz
= buf
.length
;
234 // If we see a file header at this point, we have all of the
235 // hunks for our current file. We should stop and report back
236 // with this position so it can be parsed again later.
238 if (match(buf
, c
, DIFF_GIT
) >= 0)
240 if (match(buf
, c
, DIFF_CC
) >= 0)
242 if (match(buf
, c
, OLD_NAME
) >= 0)
244 if (match(buf
, c
, NEW_NAME
) >= 0)
247 if (match(buf
, c
, HUNK_HDR
) >= 0) {
248 final HunkHeader h
= new HunkHeader(fh
, c
);
253 if (c
< sz
&& buf
[c
] != '@' && buf
[c
] != 'd'
254 && match(buf
, c
, SIG_FOOTER
) < 0) {
255 // TODO report on noise between hunks, might be an error
260 // Skip this line and move to the next. Its probably garbage
261 // after the last hunk of a file.