Add raw buffer fetch methods to FileHeader, HunkHeader
[egit/qmx.git] / org.spearce.jgit / src / org / spearce / jgit / patch / FileHeader.java
blobc91f80e731b24993964115afa52a13148fb17cf5
1 /*
2 * Copyright (C) 2008, Google Inc.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.patch;
40 import static org.spearce.jgit.lib.Constants.encodeASCII;
41 import static org.spearce.jgit.util.RawParseUtils.decode;
42 import static org.spearce.jgit.util.RawParseUtils.match;
43 import static org.spearce.jgit.util.RawParseUtils.nextLF;
44 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
46 import java.util.ArrayList;
47 import java.util.Collections;
48 import java.util.List;
50 import org.spearce.jgit.lib.AbbreviatedObjectId;
51 import org.spearce.jgit.lib.Constants;
52 import org.spearce.jgit.lib.FileMode;
53 import org.spearce.jgit.util.QuotedString;
55 /** Patch header describing an action for a single file path. */
56 public class FileHeader {
57 /** Magical file name used for file adds or deletes. */
58 public static final String DEV_NULL = "/dev/null";
60 private static final byte[] OLD_MODE = encodeASCII("old mode ");
62 private static final byte[] NEW_MODE = encodeASCII("new mode ");
64 protected static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode ");
66 protected static final byte[] NEW_FILE_MODE = encodeASCII("new file mode ");
68 private static final byte[] COPY_FROM = encodeASCII("copy from ");
70 private static final byte[] COPY_TO = encodeASCII("copy to ");
72 private static final byte[] RENAME_OLD = encodeASCII("rename old ");
74 private static final byte[] RENAME_NEW = encodeASCII("rename new ");
76 private static final byte[] RENAME_FROM = encodeASCII("rename from ");
78 private static final byte[] RENAME_TO = encodeASCII("rename to ");
80 private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index ");
82 private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index ");
84 protected static final byte[] INDEX = encodeASCII("index ");
86 static final byte[] OLD_NAME = encodeASCII("--- ");
88 static final byte[] NEW_NAME = encodeASCII("+++ ");
90 /** General type of change a single file-level patch describes. */
91 public static enum ChangeType {
92 /** Add a new file to the project */
93 ADD,
95 /** Modify an existing file in the project (content and/or mode) */
96 MODIFY,
98 /** Delete an existing file from the project */
99 DELETE,
101 /** Rename an existing file to a new location */
102 RENAME,
104 /** Copy an existing file to a new location, keeping the original */
105 COPY;
108 /** Type of patch used by this file. */
109 public static enum PatchType {
110 /** A traditional unified diff style patch of a text file. */
111 UNIFIED,
113 /** An empty patch with a message "Binary files ... differ" */
114 BINARY,
116 /** A Git binary patch, holding pre and post image deltas */
117 GIT_BINARY;
120 /** Buffer holding the patch data for this file. */
121 final byte[] buf;
123 /** Offset within {@link #buf} to the "diff ..." line. */
124 final int startOffset;
126 /** Position 1 past the end of this file within {@link #buf}. */
127 int endOffset;
129 /** File name of the old (pre-image). */
130 private String oldName;
132 /** File name of the new (post-image). */
133 private String newName;
135 /** Old mode of the file, if described by the patch, else null. */
136 private FileMode oldMode;
138 /** New mode of the file, if described by the patch, else null. */
139 protected FileMode newMode;
141 /** General type of change indicated by the patch. */
142 protected ChangeType changeType;
144 /** Similarity score if {@link #changeType} is a copy or rename. */
145 private int score;
147 /** ObjectId listed on the index line for the old (pre-image) */
148 private AbbreviatedObjectId oldId;
150 /** ObjectId listed on the index line for the new (post-image) */
151 protected AbbreviatedObjectId newId;
153 /** Type of patch used to modify this file */
154 PatchType patchType;
156 /** The hunks of this file */
157 private List<HunkHeader> hunks;
159 /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
160 BinaryHunk forwardBinaryHunk;
162 /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
163 BinaryHunk reverseBinaryHunk;
165 FileHeader(final byte[] b, final int offset) {
166 buf = b;
167 startOffset = offset;
168 changeType = ChangeType.MODIFY; // unless otherwise designated
169 patchType = PatchType.UNIFIED;
172 int getParentCount() {
173 return 1;
176 /** @return the byte array holding this file's patch script. */
177 public byte[] getBuffer() {
178 return buf;
181 /** @return offset the start of this file's script in {@link #getBuffer()}. */
182 public int getStartOffset() {
183 return startOffset;
186 /** @return offset one past the end of the file script. */
187 public int getEndOffset() {
188 return endOffset;
192 * Get the old name associated with this file.
193 * <p>
194 * The meaning of the old name can differ depending on the semantic meaning
195 * of this patch:
196 * <ul>
197 * <li><i>file add</i>: always <code>/dev/null</code></li>
198 * <li><i>file modify</i>: always {@link #getNewName()}</li>
199 * <li><i>file delete</i>: always the file being deleted</li>
200 * <li><i>file copy</i>: source file the copy originates from</li>
201 * <li><i>file rename</i>: source file the rename originates from</li>
202 * </ul>
204 * @return old name for this file.
206 public String getOldName() {
207 return oldName;
211 * Get the new name associated with this file.
212 * <p>
213 * The meaning of the new name can differ depending on the semantic meaning
214 * of this patch:
215 * <ul>
216 * <li><i>file add</i>: always the file being created</li>
217 * <li><i>file modify</i>: always {@link #getOldName()}</li>
218 * <li><i>file delete</i>: always <code>/dev/null</code></li>
219 * <li><i>file copy</i>: destination file the copy ends up at</li>
220 * <li><i>file rename</i>: destination file the rename ends up at/li>
221 * </ul>
223 * @return new name for this file.
225 public String getNewName() {
226 return newName;
229 /** @return the old file mode, if described in the patch */
230 public FileMode getOldMode() {
231 return oldMode;
234 /** @return the new file mode, if described in the patch */
235 public FileMode getNewMode() {
236 return newMode;
239 /** @return the type of change this patch makes on {@link #getNewName()} */
240 public ChangeType getChangeType() {
241 return changeType;
245 * @return similarity score between {@link #getOldName()} and
246 * {@link #getNewName()} if {@link #getChangeType()} is
247 * {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
249 public int getScore() {
250 return score;
254 * Get the old object id from the <code>index</code>.
256 * @return the object id; null if there is no index line
258 public AbbreviatedObjectId getOldId() {
259 return oldId;
263 * Get the new object id from the <code>index</code>.
265 * @return the object id; null if there is no index line
267 public AbbreviatedObjectId getNewId() {
268 return newId;
271 /** @return style of patch used to modify this file */
272 public PatchType getPatchType() {
273 return patchType;
276 /** @return true if this patch modifies metadata about a file */
277 public boolean hasMetaDataChanges() {
278 return changeType != ChangeType.MODIFY || newMode != oldMode;
281 /** @return hunks altering this file; in order of appearance in patch */
282 public List<? extends HunkHeader> getHunks() {
283 if (hunks == null)
284 return Collections.emptyList();
285 return hunks;
288 void addHunk(final HunkHeader h) {
289 if (h.getFileHeader() != this)
290 throw new IllegalArgumentException("Hunk belongs to another file");
291 if (hunks == null)
292 hunks = new ArrayList<HunkHeader>();
293 hunks.add(h);
296 HunkHeader newHunkHeader(final int offset) {
297 return new HunkHeader(this, offset);
300 /** @return if a {@link PatchType#GIT_BINARY}, the new-image delta/literal */
301 public BinaryHunk getForwardBinaryHunk() {
302 return forwardBinaryHunk;
305 /** @return if a {@link PatchType#GIT_BINARY}, the old-image delta/literal */
306 public BinaryHunk getReverseBinaryHunk() {
307 return reverseBinaryHunk;
311 * Parse a "diff --git" or "diff --cc" line.
313 * @param ptr
314 * first character after the "diff --git " or "diff --cc " part.
315 * @param end
316 * one past the last position to parse.
317 * @return first character after the LF at the end of the line; -1 on error.
319 int parseGitFileName(int ptr, final int end) {
320 final int eol = nextLF(buf, ptr);
321 final int bol = ptr;
322 if (eol >= end) {
323 return -1;
326 // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
327 // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
328 // is only one way to split the line such that text to the left
329 // of the space matches the text to the right, excluding the part
330 // before the first slash.
333 final int aStart = nextLF(buf, ptr, '/');
334 if (aStart >= eol)
335 return eol;
337 while (ptr < eol) {
338 final int sp = nextLF(buf, ptr, ' ');
339 if (sp >= eol) {
340 // We can't split the header, it isn't valid.
341 // This may be OK if this is a rename patch.
343 return eol;
345 final int bStart = nextLF(buf, sp, '/');
346 if (bStart >= eol)
347 return eol;
349 // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
350 // we have a valid split.
352 if (eq(aStart, sp - 1, bStart, eol - 1)) {
353 if (buf[bol] == '"') {
354 // We're a double quoted name. The region better end
355 // in a double quote too, and we need to decode the
356 // characters before reading the name.
358 if (buf[sp - 2] != '"') {
359 return eol;
361 oldName = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1);
362 oldName = p1(oldName);
363 } else {
364 oldName = decode(Constants.CHARSET, buf, aStart, sp - 1);
366 newName = oldName;
367 return eol;
370 // This split wasn't correct. Move past the space and try
371 // another split as the space must be part of the file name.
373 ptr = sp;
376 return eol;
379 int parseGitHeaders(int ptr, final int end) {
380 while (ptr < end) {
381 final int eol = nextLF(buf, ptr);
382 if (isHunkHdr(buf, ptr, eol) >= 1) {
383 // First hunk header; break out and parse them later.
384 break;
386 } else if (match(buf, ptr, OLD_NAME) >= 0) {
387 parseOldName(ptr, eol);
389 } else if (match(buf, ptr, NEW_NAME) >= 0) {
390 parseNewName(ptr, eol);
392 } else if (match(buf, ptr, OLD_MODE) >= 0) {
393 oldMode = parseFileMode(ptr + OLD_MODE.length, eol);
395 } else if (match(buf, ptr, NEW_MODE) >= 0) {
396 newMode = parseFileMode(ptr + NEW_MODE.length, eol);
398 } else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) {
399 oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol);
400 newMode = FileMode.MISSING;
401 changeType = ChangeType.DELETE;
403 } else if (match(buf, ptr, NEW_FILE_MODE) >= 0) {
404 parseNewFileMode(ptr, eol);
406 } else if (match(buf, ptr, COPY_FROM) >= 0) {
407 oldName = parseName(oldName, ptr + COPY_FROM.length, eol);
408 changeType = ChangeType.COPY;
410 } else if (match(buf, ptr, COPY_TO) >= 0) {
411 newName = parseName(newName, ptr + COPY_TO.length, eol);
412 changeType = ChangeType.COPY;
414 } else if (match(buf, ptr, RENAME_OLD) >= 0) {
415 oldName = parseName(oldName, ptr + RENAME_OLD.length, eol);
416 changeType = ChangeType.RENAME;
418 } else if (match(buf, ptr, RENAME_NEW) >= 0) {
419 newName = parseName(newName, ptr + RENAME_NEW.length, eol);
420 changeType = ChangeType.RENAME;
422 } else if (match(buf, ptr, RENAME_FROM) >= 0) {
423 oldName = parseName(oldName, ptr + RENAME_FROM.length, eol);
424 changeType = ChangeType.RENAME;
426 } else if (match(buf, ptr, RENAME_TO) >= 0) {
427 newName = parseName(newName, ptr + RENAME_TO.length, eol);
428 changeType = ChangeType.RENAME;
430 } else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) {
431 score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null);
433 } else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) {
434 score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null);
436 } else if (match(buf, ptr, INDEX) >= 0) {
437 parseIndexLine(ptr + INDEX.length, eol);
439 } else {
440 // Probably an empty patch (stat dirty).
441 break;
444 ptr = eol;
446 return ptr;
449 protected void parseOldName(int ptr, final int eol) {
450 oldName = p1(parseName(oldName, ptr + OLD_NAME.length, eol));
451 if (oldName == DEV_NULL)
452 changeType = ChangeType.ADD;
455 protected void parseNewName(int ptr, final int eol) {
456 newName = p1(parseName(newName, ptr + NEW_NAME.length, eol));
457 if (newName == DEV_NULL)
458 changeType = ChangeType.DELETE;
461 protected void parseNewFileMode(int ptr, final int eol) {
462 oldMode = FileMode.MISSING;
463 newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol);
464 changeType = ChangeType.ADD;
467 int parseTraditionalHeaders(int ptr, final int end) {
468 while (ptr < end) {
469 final int eol = nextLF(buf, ptr);
470 if (isHunkHdr(buf, ptr, eol) >= 1) {
471 // First hunk header; break out and parse them later.
472 break;
474 } else if (match(buf, ptr, OLD_NAME) >= 0) {
475 parseOldName(ptr, eol);
477 } else if (match(buf, ptr, NEW_NAME) >= 0) {
478 parseNewName(ptr, eol);
480 } else {
481 // Possibly an empty patch.
482 break;
485 ptr = eol;
487 return ptr;
490 private String parseName(final String expect, int ptr, final int end) {
491 if (ptr == end)
492 return expect;
494 String r;
495 if (buf[ptr] == '"') {
496 // New style GNU diff format
498 r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1);
499 } else {
500 // Older style GNU diff format, an optional tab ends the name.
502 int tab = end;
503 while (ptr < tab && buf[tab - 1] != '\t')
504 tab--;
505 if (ptr == tab)
506 tab = end;
507 r = decode(Constants.CHARSET, buf, ptr, tab - 1);
510 if (r.equals(DEV_NULL))
511 r = DEV_NULL;
512 return r;
515 private static String p1(final String r) {
516 final int s = r.indexOf('/');
517 return s > 0 ? r.substring(s + 1) : r;
520 protected FileMode parseFileMode(int ptr, final int end) {
521 int tmp = 0;
522 while (ptr < end - 1) {
523 tmp <<= 3;
524 tmp += buf[ptr++] - '0';
526 return FileMode.fromBits(tmp);
529 protected void parseIndexLine(int ptr, final int end) {
530 // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
531 // can be unique abbreviations
533 final int dot2 = nextLF(buf, ptr, '.');
534 final int mode = nextLF(buf, dot2, ' ');
536 oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1);
537 newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1);
539 if (mode < end)
540 newMode = oldMode = parseFileMode(mode, end);
543 private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) {
544 if (aEnd - aPtr != bEnd - bPtr) {
545 return false;
547 while (aPtr < aEnd) {
548 if (buf[aPtr++] != buf[bPtr++])
549 return false;
551 return true;
555 * Determine if this is a patch hunk header.
557 * @param buf
558 * the buffer to scan
559 * @param start
560 * first position in the buffer to evaluate
561 * @param end
562 * last position to consider; usually the end of the buffer (
563 * <code>buf.length</code>) or the first position on the next
564 * line. This is only used to avoid very long runs of '@' from
565 * killing the scan loop.
566 * @return the number of "ancestor revisions" in the hunk header. A
567 * traditional two-way diff ("@@ -...") returns 1; a combined diff
568 * for a 3 way-merge returns 3. If this is not a hunk header, 0 is
569 * returned instead.
571 static int isHunkHdr(final byte[] buf, final int start, final int end) {
572 int ptr = start;
573 while (ptr < end && buf[ptr] == '@')
574 ptr++;
575 if (ptr - start < 2)
576 return 0;
577 if (ptr == end || buf[ptr++] != ' ')
578 return 0;
579 if (ptr == end || buf[ptr++] != '-')
580 return 0;
581 return (ptr - 3) - start;