Test for non-git binary files and mark them as PatchType.BINARY
[egit/qmx.git] / org.spearce.jgit / src / org / spearce / jgit / patch / FileHeader.java
blobddf52a3160389233e64e30b16273577ff56cd58c
1 /*
2 * Copyright (C) 2008, Google Inc.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.patch;
40 import static org.spearce.jgit.lib.Constants.encodeASCII;
41 import static org.spearce.jgit.util.RawParseUtils.decode;
42 import static org.spearce.jgit.util.RawParseUtils.match;
43 import static org.spearce.jgit.util.RawParseUtils.nextLF;
44 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
46 import java.util.ArrayList;
47 import java.util.Collections;
48 import java.util.List;
50 import org.spearce.jgit.lib.AbbreviatedObjectId;
51 import org.spearce.jgit.lib.Constants;
52 import org.spearce.jgit.lib.FileMode;
53 import org.spearce.jgit.util.QuotedString;
55 /** Patch header describing an action for a single file path. */
56 public class FileHeader {
57 /** Magical file name used for file adds or deletes. */
58 public static final String DEV_NULL = "/dev/null";
60 private static final byte[] OLD_MODE = encodeASCII("old mode ");
62 private static final byte[] NEW_MODE = encodeASCII("new mode ");
64 private static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode ");
66 private static final byte[] NEW_FILE_MODE = encodeASCII("new file mode ");
68 private static final byte[] COPY_FROM = encodeASCII("copy from ");
70 private static final byte[] COPY_TO = encodeASCII("copy to ");
72 private static final byte[] RENAME_OLD = encodeASCII("rename old ");
74 private static final byte[] RENAME_NEW = encodeASCII("rename new ");
76 private static final byte[] RENAME_FROM = encodeASCII("rename from ");
78 private static final byte[] RENAME_TO = encodeASCII("rename to ");
80 private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index ");
82 private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index ");
84 private static final byte[] INDEX = encodeASCII("index ");
86 static final byte[] OLD_NAME = encodeASCII("--- ");
88 static final byte[] NEW_NAME = encodeASCII("+++ ");
90 static final byte[] HUNK_HDR = encodeASCII("@@ -");
92 /** General type of change a single file-level patch describes. */
93 public static enum ChangeType {
94 /** Add a new file to the project */
95 ADD,
97 /** Modify an existing file in the project (content and/or mode) */
98 MODIFY,
100 /** Delete an existing file from the project */
101 DELETE,
103 /** Rename an existing file to a new location */
104 RENAME,
106 /** Copy an existing file to a new location, keeping the original */
107 COPY;
110 /** Type of patch used by this file. */
111 public static enum PatchType {
112 /** A traditional unified diff style patch of a text file. */
113 UNIFIED,
115 /** An empty patch with a message "Binary files ... differ" */
116 BINARY,
118 /** A Git binary patch, holding pre and post image deltas */
119 GIT_BINARY;
122 /** Buffer holding the patch data for this file. */
123 final byte[] buf;
125 /** Offset within {@link #buf} to the "diff ..." line. */
126 final int startOffset;
128 /** Position 1 past the end of this file within {@link #buf}. */
129 int endOffset;
131 /** File name of the old (pre-image). */
132 private String oldName;
134 /** File name of the new (post-image). */
135 private String newName;
137 /** Old mode of the file, if described by the patch, else null. */
138 private FileMode oldMode;
140 /** New mode of the file, if described by the patch, else null. */
141 private FileMode newMode;
143 /** General type of change indicated by the patch. */
144 private ChangeType changeType;
146 /** Similarity score if {@link #changeType} is a copy or rename. */
147 private int score;
149 /** ObjectId listed on the index line for the old (pre-image) */
150 private AbbreviatedObjectId oldId;
152 /** ObjectId listed on the index line for the new (post-image) */
153 private AbbreviatedObjectId newId;
155 /** Type of patch used to modify this file */
156 PatchType patchType;
158 /** The hunks of this file */
159 private List<HunkHeader> hunks;
161 FileHeader(final byte[] b, final int offset) {
162 buf = b;
163 startOffset = offset;
164 changeType = ChangeType.MODIFY; // unless otherwise designated
165 patchType = PatchType.UNIFIED;
169 * Get the old name associated with this file.
170 * <p>
171 * The meaning of the old name can differ depending on the semantic meaning
172 * of this patch:
173 * <ul>
174 * <li><i>file add</i>: always <code>/dev/null</code></li>
175 * <li><i>file modify</i>: always {@link #getNewName()}</li>
176 * <li><i>file delete</i>: always the file being deleted</li>
177 * <li><i>file copy</i>: source file the copy originates from</li>
178 * <li><i>file rename</i>: source file the rename originates from</li>
179 * </ul>
181 * @return old name for this file.
183 public String getOldName() {
184 return oldName;
188 * Get the new name associated with this file.
189 * <p>
190 * The meaning of the new name can differ depending on the semantic meaning
191 * of this patch:
192 * <ul>
193 * <li><i>file add</i>: always the file being created</li>
194 * <li><i>file modify</i>: always {@link #getOldName()}</li>
195 * <li><i>file delete</i>: always <code>/dev/null</code></li>
196 * <li><i>file copy</i>: destination file the copy ends up at</li>
197 * <li><i>file rename</i>: destination file the rename ends up at/li>
198 * </ul>
200 * @return new name for this file.
202 public String getNewName() {
203 return newName;
206 /** @return the old file mode, if described in the patch */
207 public FileMode getOldMode() {
208 return oldMode;
211 /** @return the new file mode, if described in the patch */
212 public FileMode getNewMode() {
213 return newMode;
216 /** @return the type of change this patch makes on {@link #getNewName()} */
217 public ChangeType getChangeType() {
218 return changeType;
222 * @return similarity score between {@link #getOldName()} and
223 * {@link #getNewName()} if {@link #getChangeType()} is
224 * {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
226 public int getScore() {
227 return score;
231 * Get the old object id from the <code>index</code>.
233 * @return the object id; null if there is no index line
235 public AbbreviatedObjectId getOldId() {
236 return oldId;
240 * Get the new object id from the <code>index</code>.
242 * @return the object id; null if there is no index line
244 public AbbreviatedObjectId getNewId() {
245 return newId;
248 /** @return style of patch used to modify this file */
249 public PatchType getPatchType() {
250 return patchType;
253 /** @return hunks altering this file; in order of appearance in patch */
254 public List<HunkHeader> getHunks() {
255 if (hunks == null)
256 return Collections.emptyList();
257 return hunks;
260 void addHunk(final HunkHeader h) {
261 if (h.getFileHeader() != this)
262 throw new IllegalArgumentException("Hunk belongs to another file");
263 if (hunks == null)
264 hunks = new ArrayList<HunkHeader>();
265 hunks.add(h);
269 * Parse a "diff --git" or "diff --cc" line.
271 * @param ptr
272 * first character after the "diff --git " or "diff --cc " part.
273 * @return first character after the LF at the end of the line; -1 on error.
275 int parseGitFileName(int ptr) {
276 final int eol = nextLF(buf, ptr);
277 final int bol = ptr;
278 if (eol >= buf.length) {
279 return -1;
282 // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
283 // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
284 // is only one way to split the line such that text to the left
285 // of the space matches the text to the right, excluding the part
286 // before the first slash.
289 final int aStart = nextLF(buf, ptr, '/');
290 if (aStart >= eol)
291 return eol;
293 while (ptr < eol) {
294 final int sp = nextLF(buf, ptr, ' ');
295 if (sp >= eol) {
296 // We can't split the header, it isn't valid.
297 // This may be OK if this is a rename patch.
299 return eol;
301 final int bStart = nextLF(buf, sp, '/');
302 if (bStart >= eol)
303 return eol;
305 // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
306 // we have a valid split.
308 if (eq(aStart, sp - 1, bStart, eol - 1)) {
309 if (buf[bol] == '"') {
310 // We're a double quoted name. The region better end
311 // in a double quote too, and we need to decode the
312 // characters before reading the name.
314 if (buf[sp - 2] != '"') {
315 return eol;
317 oldName = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1);
318 oldName = p1(oldName);
319 } else {
320 oldName = decode(Constants.CHARSET, buf, aStart, sp - 1);
322 newName = oldName;
323 return eol;
326 // This split wasn't correct. Move past the space and try
327 // another split as the space must be part of the file name.
329 ptr = sp;
332 return eol;
335 int parseGitHeaders(int ptr) {
336 final int sz = buf.length;
337 while (ptr < sz) {
338 final int eol = nextLF(buf, ptr);
339 if (match(buf, ptr, HUNK_HDR) >= 0) {
340 // First hunk header; break out and parse them later.
341 break;
343 } else if (match(buf, ptr, OLD_NAME) >= 0) {
344 oldName = p1(parseName(oldName, ptr + OLD_NAME.length, eol));
345 if (oldName == DEV_NULL)
346 changeType = ChangeType.ADD;
348 } else if (match(buf, ptr, NEW_NAME) >= 0) {
349 newName = p1(parseName(newName, ptr + NEW_NAME.length, eol));
350 if (newName == DEV_NULL)
351 changeType = ChangeType.DELETE;
353 } else if (match(buf, ptr, OLD_MODE) >= 0) {
354 oldMode = parseFileMode(ptr + OLD_MODE.length, eol);
356 } else if (match(buf, ptr, NEW_MODE) >= 0) {
357 newMode = parseFileMode(ptr + NEW_MODE.length, eol);
359 } else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) {
360 oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol);
361 changeType = ChangeType.DELETE;
363 } else if (match(buf, ptr, NEW_FILE_MODE) >= 0) {
364 newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol);
365 changeType = ChangeType.ADD;
367 } else if (match(buf, ptr, COPY_FROM) >= 0) {
368 oldName = parseName(oldName, ptr + COPY_FROM.length, eol);
369 changeType = ChangeType.COPY;
371 } else if (match(buf, ptr, COPY_TO) >= 0) {
372 newName = parseName(newName, ptr + COPY_TO.length, eol);
373 changeType = ChangeType.COPY;
375 } else if (match(buf, ptr, RENAME_OLD) >= 0) {
376 oldName = parseName(oldName, ptr + RENAME_OLD.length, eol);
377 changeType = ChangeType.RENAME;
379 } else if (match(buf, ptr, RENAME_NEW) >= 0) {
380 newName = parseName(newName, ptr + RENAME_NEW.length, eol);
381 changeType = ChangeType.RENAME;
383 } else if (match(buf, ptr, RENAME_FROM) >= 0) {
384 oldName = parseName(oldName, ptr + RENAME_FROM.length, eol);
385 changeType = ChangeType.RENAME;
387 } else if (match(buf, ptr, RENAME_TO) >= 0) {
388 newName = parseName(newName, ptr + RENAME_TO.length, eol);
389 changeType = ChangeType.RENAME;
391 } else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) {
392 score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null);
394 } else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) {
395 score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null);
397 } else if (match(buf, ptr, INDEX) >= 0) {
398 parseIndexLine(ptr + INDEX.length, eol);
400 } else {
401 // Probably an empty patch (stat dirty).
402 break;
405 ptr = eol;
407 return ptr;
410 int parseTraditionalHeaders(int ptr) {
411 final int sz = buf.length;
412 while (ptr < sz) {
413 final int eol = nextLF(buf, ptr);
414 if (match(buf, ptr, HUNK_HDR) >= 0) {
415 // First hunk header; break out and parse them later.
416 break;
418 } else if (match(buf, ptr, OLD_NAME) >= 0) {
419 oldName = p1(parseName(oldName, ptr + OLD_NAME.length, eol));
420 if (oldName == DEV_NULL)
421 changeType = ChangeType.ADD;
423 } else if (match(buf, ptr, NEW_NAME) >= 0) {
424 newName = p1(parseName(newName, ptr + NEW_NAME.length, eol));
425 if (newName == DEV_NULL)
426 changeType = ChangeType.DELETE;
428 } else {
429 // Possibly an empty patch.
430 break;
433 ptr = eol;
435 return ptr;
438 private String parseName(final String expect, int ptr, final int end) {
439 if (ptr == end)
440 return expect;
442 String r;
443 if (buf[ptr] == '"') {
444 // New style GNU diff format
446 r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1);
447 } else {
448 // Older style GNU diff format, an optional tab ends the name.
450 int tab = end;
451 while (ptr < tab && buf[tab - 1] != '\t')
452 tab--;
453 if (ptr == tab)
454 tab = end;
455 r = decode(Constants.CHARSET, buf, ptr, tab - 1);
458 if (r.equals(DEV_NULL))
459 r = DEV_NULL;
460 return r;
463 private static String p1(final String r) {
464 final int s = r.indexOf('/');
465 return s > 0 ? r.substring(s + 1) : r;
468 private FileMode parseFileMode(int ptr, final int end) {
469 int tmp = 0;
470 while (ptr < end - 1) {
471 tmp <<= 3;
472 tmp += buf[ptr++] - '0';
474 return FileMode.fromBits(tmp);
477 private void parseIndexLine(int ptr, final int end) {
478 // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
479 // can be unique abbreviations
481 final int dot2 = nextLF(buf, ptr, '.');
482 final int mode = nextLF(buf, dot2, ' ');
484 oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1);
485 newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1);
487 if (mode < end)
488 newMode = oldMode = parseFileMode(mode, end);
491 private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) {
492 if (aEnd - aPtr != bEnd - bPtr) {
493 return false;
495 while (aPtr < aEnd) {
496 if (buf[aPtr++] != buf[bPtr++])
497 return false;
499 return true;