Add HunkHeader to represent a single hunk of a file within a patch
[egit/charleso.git] / org.spearce.jgit / src / org / spearce / jgit / patch / Patch.java
blobadc21d9abec82fe8d9f7dcb8a650962966037659
1 /*
2 * Copyright (C) 2008, Google Inc.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.patch;
40 import static org.spearce.jgit.lib.Constants.encodeASCII;
41 import static org.spearce.jgit.patch.FileHeader.HUNK_HDR;
42 import static org.spearce.jgit.patch.FileHeader.NEW_NAME;
43 import static org.spearce.jgit.patch.FileHeader.OLD_NAME;
44 import static org.spearce.jgit.util.RawParseUtils.match;
45 import static org.spearce.jgit.util.RawParseUtils.nextLF;
47 import java.io.IOException;
48 import java.io.InputStream;
49 import java.util.ArrayList;
50 import java.util.List;
52 import org.spearce.jgit.util.TemporaryBuffer;
54 /** A parsed collection of {@link FileHeader}s from a unified diff patch file */
55 public class Patch {
56 private static final byte[] DIFF_GIT = encodeASCII("diff --git ");
58 private static final byte[] DIFF_CC = encodeASCII("diff --cc ");
60 static final byte[] SIG_FOOTER = encodeASCII("-- \n");
62 /** The files, in the order they were parsed out of the input. */
63 private final List<FileHeader> files;
65 /** Create an empty patch. */
66 public Patch() {
67 files = new ArrayList<FileHeader>();
70 /**
71 * Add a single file to this patch.
72 * <p>
73 * Typically files should be added by parsing the text through one of this
74 * class's parse methods.
76 * @param fh
77 * the header of the file.
79 public void addFile(final FileHeader fh) {
80 files.add(fh);
83 /** @return list of files described in the patch, in occurrence order. */
84 public List<FileHeader> getFiles() {
85 return files;
88 /**
89 * Parse a patch received from an InputStream.
90 * <p>
91 * Multiple parse calls on the same instance will concatenate the patch
92 * data, but each parse input must start with a valid file header (don't
93 * split a single file across parse calls).
95 * @param is
96 * the stream to read the patch data from. The stream is read
97 * until EOF is reached.
98 * @throws IOException
99 * there was an error reading from the input stream.
101 public void parse(final InputStream is) throws IOException {
102 final byte[] buf = readFully(is);
103 parse(buf, 0, buf.length);
106 private static byte[] readFully(final InputStream is) throws IOException {
107 final TemporaryBuffer b = new TemporaryBuffer();
108 b.copy(is);
109 final byte[] buf = b.toByteArray();
110 return buf;
114 * Parse a patch stored in a byte[].
115 * <p>
116 * Multiple parse calls on the same instance will concatenate the patch
117 * data, but each parse input must start with a valid file header (don't
118 * split a single file across parse calls).
120 * @param buf
121 * the buffer to parse.
122 * @param ptr
123 * starting position to parse from.
124 * @param end
125 * 1 past the last position to end parsing. The total length to
126 * be parsed is <code>end - ptr</code>.
128 public void parse(final byte[] buf, int ptr, final int end) {
129 while (ptr < end)
130 ptr = parseFile(buf, ptr);
133 private int parseFile(final byte[] buf, int c) {
134 final int sz = buf.length;
135 while (c < sz) {
136 if (match(buf, c, HUNK_HDR) >= 0) {
137 // If we find a disconnected hunk header we might
138 // have missed a file header previously. The hunk
139 // isn't valid without knowing where it comes from.
142 // TODO handle a disconnected hunk fragment
143 c = nextLF(buf, c);
144 continue;
147 // Valid git style patch?
149 if (match(buf, c, DIFF_GIT) >= 0)
150 return parseDiffGit(buf, c);
151 if (match(buf, c, DIFF_CC) >= 0)
152 return parseDiffCC(buf, c);
154 // Junk between files? Leading junk? Traditional
155 // (non-git generated) patch?
157 final int n = nextLF(buf, c);
158 if (n >= sz) {
159 // Patches cannot be only one line long. This must be
160 // trailing junk that we should ignore.
162 return sz;
165 if (n - c < 6) {
166 // A valid header must be at least 6 bytes on the
167 // first line, e.g. "--- a/b\n".
169 c = n;
170 continue;
173 if (match(buf, c, OLD_NAME) >= 0 && match(buf, n, NEW_NAME) >= 0) {
174 // Probably a traditional patch. Ensure we have at least
175 // a "@@ -0,0" smelling line next. We only check the "@@ -".
177 final int f = nextLF(buf, n);
178 if (f >= sz)
179 return sz;
180 if (match(buf, f, HUNK_HDR) >= 0)
181 return parseTraditionalPatch(buf, c);
184 c = n;
186 return c;
189 private int parseDiffGit(final byte[] buf, final int startOffset) {
190 final FileHeader fh = new FileHeader(buf, startOffset);
191 int ptr = fh.parseGitFileName(startOffset + DIFF_GIT.length);
192 if (ptr < 0)
193 return skipFile(buf, startOffset);
195 ptr = fh.parseGitHeaders(ptr);
196 ptr = parseHunks(fh, buf, ptr);
197 fh.endOffset = ptr;
198 addFile(fh);
199 return ptr;
202 private int parseDiffCC(final byte[] buf, final int startOffset) {
203 final FileHeader fh = new FileHeader(buf, startOffset);
204 int ptr = fh.parseGitFileName(startOffset + DIFF_CC.length);
205 if (ptr < 0)
206 return skipFile(buf, startOffset);
208 // TODO Support parsing diff --cc headers
209 // TODO parse diff --cc hunks
210 fh.endOffset = ptr;
211 addFile(fh);
212 return ptr;
215 private int parseTraditionalPatch(final byte[] buf, final int startOffset) {
216 final FileHeader fh = new FileHeader(buf, startOffset);
217 int ptr = fh.parseTraditionalHeaders(startOffset);
218 ptr = parseHunks(fh, buf, ptr);
219 fh.endOffset = ptr;
220 addFile(fh);
221 return ptr;
224 private static int skipFile(final byte[] buf, int ptr) {
225 ptr = nextLF(buf, ptr);
226 if (match(buf, ptr, OLD_NAME) >= 0)
227 ptr = nextLF(buf, ptr);
228 return ptr;
231 private int parseHunks(final FileHeader fh, final byte[] buf, int c) {
232 final int sz = buf.length;
233 while (c < sz) {
234 // If we see a file header at this point, we have all of the
235 // hunks for our current file. We should stop and report back
236 // with this position so it can be parsed again later.
238 if (match(buf, c, DIFF_GIT) >= 0)
239 return c;
240 if (match(buf, c, DIFF_CC) >= 0)
241 return c;
242 if (match(buf, c, OLD_NAME) >= 0)
243 return c;
244 if (match(buf, c, NEW_NAME) >= 0)
245 return c;
247 if (match(buf, c, HUNK_HDR) >= 0) {
248 final HunkHeader h = new HunkHeader(fh, c);
249 h.parseHeader();
250 c = h.parseBody();
251 h.endOffset = c;
252 fh.addHunk(h);
253 if (c < sz && buf[c] != '@' && buf[c] != 'd'
254 && match(buf, c, SIG_FOOTER) < 0) {
255 // TODO report on noise between hunks, might be an error
257 continue;
260 // Skip this line and move to the next. Its probably garbage
261 // after the last hunk of a file.
263 c = nextLF(buf, c);
265 return c;