Reduce garbage allocation when using TreeWalk
[egit/qmx.git] / org.spearce.jgit / src / org / spearce / jgit / treewalk / AbstractTreeIterator.java
blobc9aae0fc188dc347c5c0f296ee2eba35407137dd
1 /*
2 * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or
8 * without modification, are permitted provided that the following
9 * conditions are met:
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
19 * - Neither the name of the Git Development Community nor the
20 * names of its contributors may be used to endorse or promote
21 * products derived from this software without specific prior
22 * written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
25 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
26 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
29 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
31 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 package org.spearce.jgit.treewalk;
41 import java.io.IOException;
42 import java.nio.ByteBuffer;
43 import java.nio.CharBuffer;
45 import org.spearce.jgit.errors.CorruptObjectException;
46 import org.spearce.jgit.errors.IncorrectObjectTypeException;
47 import org.spearce.jgit.lib.Constants;
48 import org.spearce.jgit.lib.FileMode;
49 import org.spearce.jgit.lib.MutableObjectId;
50 import org.spearce.jgit.lib.ObjectId;
51 import org.spearce.jgit.lib.Repository;
52 import org.spearce.jgit.lib.WindowCursor;
53 import org.spearce.jgit.treewalk.filter.TreeFilter;
55 /**
56 * Walks a Git tree (directory) in Git sort order.
57 * <p>
58 * A new iterator instance should be positioned on the first entry, or at eof.
59 * Data for the first entry (if not at eof) should be available immediately.
60 * <p>
61 * Implementors must walk a tree in the Git sort order, which has the following
62 * odd sorting:
63 * <ol>
64 * <li>A.c</li>
65 * <li>A/c</li>
66 * <li>A0c</li>
67 * </ol>
68 * <p>
69 * In the second item, <code>A</code> is the name of a subtree and
70 * <code>c</code> is a file within that subtree. The other two items are files
71 * in the root level tree.
73 * @see CanonicalTreeParser
75 public abstract class AbstractTreeIterator {
76 static final int DEFAULT_PATH_SIZE = 128;
78 /** A dummy object id buffer that matches the zero ObjectId. */
79 protected static final byte[] zeroid = new byte[Constants.OBJECT_ID_LENGTH];
81 /** Iterator for the parent tree; null if we are the root iterator. */
82 final AbstractTreeIterator parent;
84 /** The iterator this current entry is path equal to. */
85 AbstractTreeIterator matches;
87 /**
88 * Number of entries we moved forward to force a D/F conflict match.
90 * @see NameConflictTreeWalk
92 int matchShift;
94 /**
95 * Mode bits for the current entry.
96 * <p>
97 * A numerical value from FileMode is usually faster for an iterator to
98 * obtain from its data source so this is the preferred representation.
100 * @see org.spearce.jgit.lib.FileMode
102 protected int mode;
105 * Path buffer for the current entry.
106 * <p>
107 * This buffer is pre-allocated at the start of walking and is shared from
108 * parent iterators down into their subtree iterators. The sharing allows
109 * the current entry to always be a full path from the root, while each
110 * subtree only needs to populate the part that is under their control.
112 protected byte[] path;
115 * Position within {@link #path} this iterator starts writing at.
116 * <p>
117 * This is the first offset in {@link #path} that this iterator must
118 * populate during {@link #next}. At the root level (when {@link #parent}
119 * is null) this is 0. For a subtree iterator the index before this position
120 * should have the value '/'.
122 protected final int pathOffset;
125 * Total length of the current entry's complete path from the root.
126 * <p>
127 * This is the number of bytes within {@link #path} that pertain to the
128 * current entry. Values at this index through the end of the array are
129 * garbage and may be randomly populated from prior entries.
131 protected int pathLen;
133 /** Create a new iterator with no parent. */
134 protected AbstractTreeIterator() {
135 parent = null;
136 path = new byte[DEFAULT_PATH_SIZE];
137 pathOffset = 0;
141 * Create a new iterator with no parent and a prefix.
142 * <p>
143 * The prefix path supplied is inserted in front of all paths generated by
144 * this iterator. It is intended to be used when an iterator is being
145 * created for a subsection of an overall repository and needs to be
146 * combined with other iterators that are created to run over the entire
147 * repository namespace.
149 * @param prefix
150 * position of this iterator in the repository tree. The value
151 * may be null or the empty string to indicate the prefix is the
152 * root of the repository. A trailing slash ('/') is
153 * automatically appended if the prefix does not end in '/'.
155 protected AbstractTreeIterator(final String prefix) {
156 parent = null;
158 if (prefix != null && prefix.length() > 0) {
159 final ByteBuffer b;
161 b = Constants.CHARSET.encode(CharBuffer.wrap(prefix));
162 pathLen = b.limit();
163 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)];
164 b.get(path, 0, pathLen);
165 if (path[pathLen - 1] != '/')
166 path[pathLen++] = '/';
167 pathOffset = pathLen;
168 } else {
169 path = new byte[DEFAULT_PATH_SIZE];
170 pathOffset = 0;
175 * Create an iterator for a subtree of an existing iterator.
177 * @param p
178 * parent tree iterator.
180 protected AbstractTreeIterator(final AbstractTreeIterator p) {
181 parent = p;
182 path = p.path;
183 pathOffset = p.pathLen + 1;
184 try {
185 path[pathOffset - 1] = '/';
186 } catch (ArrayIndexOutOfBoundsException e) {
187 growPath(p.pathLen);
188 path[pathOffset - 1] = '/';
193 * Create an iterator for a subtree of an existing iterator.
194 * <p>
195 * The caller is responsible for setting up the path of the child iterator.
197 * @param p
198 * parent tree iterator.
199 * @param childPath
200 * path array to be used by the child iterator. This path must
201 * contain the path from the top of the walk to the first child
202 * and must end with a '/'.
203 * @param childPathOffset
204 * position within <code>childPath</code> where the child can
205 * insert its data. The value at
206 * <code>childPath[childPathOffset-1]</code> must be '/'.
208 protected AbstractTreeIterator(final AbstractTreeIterator p,
209 final byte[] childPath, final int childPathOffset) {
210 parent = p;
211 path = childPath;
212 pathOffset = childPathOffset;
216 * Grow the path buffer larger.
218 * @param len
219 * number of live bytes in the path buffer. This many bytes will
220 * be moved into the larger buffer.
222 protected void growPath(final int len) {
223 final byte[] n = new byte[path.length << 1];
224 System.arraycopy(path, 0, n, 0, len);
225 for (AbstractTreeIterator p = this; p != null; p = p.parent)
226 p.path = n;
230 * Compare the path of this current entry to another iterator's entry.
232 * @param p
233 * the other iterator to compare the path against.
234 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if
235 * p's entry sorts first.
237 public int pathCompare(final AbstractTreeIterator p) {
238 return pathCompare(p, p.mode);
241 int pathCompare(final AbstractTreeIterator p, final int pMode) {
242 final byte[] a = path;
243 final byte[] b = p.path;
244 final int aLen = pathLen;
245 final int bLen = p.pathLen;
246 int cPos;
248 // Its common when we are a subtree for both parents to match;
249 // when this happens everything in path[0..cPos] is known to
250 // be equal and does not require evaluation again.
252 cPos = alreadyMatch(this, p);
254 for (; cPos < aLen && cPos < bLen; cPos++) {
255 final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
256 if (cmp != 0)
257 return cmp;
260 if (cPos < aLen)
261 return (a[cPos] & 0xff) - lastPathChar(pMode);
262 if (cPos < bLen)
263 return lastPathChar(mode) - (b[cPos] & 0xff);
264 return lastPathChar(mode) - lastPathChar(pMode);
267 private static int alreadyMatch(AbstractTreeIterator a,
268 AbstractTreeIterator b) {
269 for (;;) {
270 final AbstractTreeIterator ap = a.parent;
271 final AbstractTreeIterator bp = b.parent;
272 if (ap == null || bp == null)
273 return 0;
274 if (ap.matches == bp.matches)
275 return a.pathOffset;
276 a = ap;
277 b = bp;
281 private static int lastPathChar(final int mode) {
282 return FileMode.TREE.equals(mode) ? '/' : '\0';
286 * Check if the current entry of both iterators has the same id.
287 * <p>
288 * This method is faster than {@link #getEntryObjectId()} as it does not
289 * require copying the bytes out of the buffers. A direct {@link #idBuffer}
290 * compare operation is performed.
292 * @param otherIterator
293 * the other iterator to test against.
294 * @return true if both iterators have the same object id; false otherwise.
296 public boolean idEqual(final AbstractTreeIterator otherIterator) {
297 return ObjectId.equals(idBuffer(), idOffset(),
298 otherIterator.idBuffer(), otherIterator.idOffset());
302 * Get the object id of the current entry.
304 * @return an object id for the current entry.
306 public ObjectId getEntryObjectId() {
307 return ObjectId.fromRaw(idBuffer(), idOffset());
311 * Get the byte array buffer object IDs must be copied out of.
312 * <p>
313 * The id buffer contains the bytes necessary to construct an ObjectId for
314 * the current entry of this iterator. The buffer can be the same buffer for
315 * all entries, or it can be a unique buffer per-entry. Implementations are
316 * encouraged to expose their private buffer whenever possible to reduce
317 * garbage generation and copying costs.
319 * @return byte array the implementation stores object IDs within.
320 * @see #getEntryObjectId()
322 public abstract byte[] idBuffer();
325 * Get the position within {@link #idBuffer()} of this entry's ObjectId.
327 * @return offset into the array returned by {@link #idBuffer()} where the
328 * ObjectId must be copied out of.
330 public abstract int idOffset();
333 * Create a new iterator for the current entry's subtree.
334 * <p>
335 * The parent reference of the iterator must be <code>this</code>,
336 * otherwise the caller would not be able to exit out of the subtree
337 * iterator correctly and return to continue walking <code>this</code>.
339 * @param repo
340 * repository to load the tree data from.
341 * @return a new parser that walks over the current subtree.
342 * @throws IncorrectObjectTypeException
343 * the current entry is not actually a tree and cannot be parsed
344 * as though it were a tree.
345 * @throws IOException
346 * a loose object or pack file could not be read.
348 public abstract AbstractTreeIterator createSubtreeIterator(Repository repo)
349 throws IncorrectObjectTypeException, IOException;
352 * Create a new iterator for the current entry's subtree.
353 * <p>
354 * The parent reference of the iterator must be <code>this</code>, otherwise
355 * the caller would not be able to exit out of the subtree iterator
356 * correctly and return to continue walking <code>this</code>.
358 * @param repo
359 * repository to load the tree data from.
360 * @param idBuffer
361 * temporary ObjectId buffer for use by this method.
362 * @param curs
363 * window cursor to use during repository access.
364 * @return a new parser that walks over the current subtree.
365 * @throws IncorrectObjectTypeException
366 * the current entry is not actually a tree and cannot be parsed
367 * as though it were a tree.
368 * @throws IOException
369 * a loose object or pack file could not be read.
371 public AbstractTreeIterator createSubtreeIterator(final Repository repo,
372 final MutableObjectId idBuffer, final WindowCursor curs)
373 throws IncorrectObjectTypeException, IOException {
374 return createSubtreeIterator(repo);
378 * Is this tree iterator positioned on its first entry?
379 * <p>
380 * An iterator is positioned on the first entry if <code>back(1)</code>
381 * would be an invalid request as there is no entry before the current one.
382 * <p>
383 * An empty iterator (one with no entries) will be
384 * <code>first() &amp;&amp; eof()</code>.
386 * @return true if the iterator is positioned on the first entry.
388 public abstract boolean first();
391 * Is this tree iterator at its EOF point (no more entries)?
392 * <p>
393 * An iterator is at EOF if there is no current entry.
395 * @return true if we have walked all entries and have none left.
397 public abstract boolean eof();
400 * Move to next entry, populating this iterator with the entry data.
401 * <p>
402 * The delta indicates how many moves forward should occur. The most common
403 * delta is 1 to move to the next entry.
404 * <p>
405 * Implementations must populate the following members:
406 * <ul>
407 * <li>{@link #mode}</li>
408 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
409 * <li>{@link #pathLen}</li>
410 * </ul>
411 * as well as any implementation dependent information necessary to
412 * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
413 * when demanded.
415 * @param delta
416 * number of entries to move the iterator by. Must be a positive,
417 * non-zero integer.
418 * @throws CorruptObjectException
419 * the tree is invalid.
421 public abstract void next(int delta) throws CorruptObjectException;
424 * Move to prior entry, populating this iterator with the entry data.
425 * <p>
426 * The delta indicates how many moves backward should occur.The most common
427 * delta is 1 to move to the prior entry.
428 * <p>
429 * Implementations must populate the following members:
430 * <ul>
431 * <li>{@link #mode}</li>
432 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
433 * <li>{@link #pathLen}</li>
434 * </ul>
435 * as well as any implementation dependent information necessary to
436 * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
437 * when demanded.
439 * @param delta
440 * number of entries to move the iterator by. Must be a positive,
441 * non-zero integer.
442 * @throws CorruptObjectException
443 * the tree is invalid.
445 public abstract void back(int delta) throws CorruptObjectException;
448 * Advance to the next tree entry, populating this iterator with its data.
449 * <p>
450 * This method behaves like <code>seek(1)</code> but is called by
451 * {@link TreeWalk} only if a {@link TreeFilter} was used and ruled out the
452 * current entry from the results. In such cases this tree iterator may
453 * perform special behavior.
455 * @throws CorruptObjectException
456 * the tree is invalid.
458 public void skip() throws CorruptObjectException {
459 next(1);
463 * Indicates to the iterator that no more entries will be read.
464 * <p>
465 * This is only invoked by TreeWalk when the iteration is aborted early due
466 * to a {@link org.spearce.jgit.errors.StopWalkException} being thrown from
467 * within a TreeFilter.
469 public void stopWalk() {
470 // Do nothing by default. Most iterators do not care.