Micro-optimize TreeWalk's exitSubtree implementation
[egit.git] / org.spearce.jgit / src / org / spearce / jgit / treewalk / TreeWalk.java
blobe4d6f3129a7e7382122222779118b92cf93950f2
1 /*
2 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.treewalk;
40 import java.io.IOException;
41 import java.util.Collections;
43 import org.spearce.jgit.errors.CorruptObjectException;
44 import org.spearce.jgit.errors.IncorrectObjectTypeException;
45 import org.spearce.jgit.errors.MissingObjectException;
46 import org.spearce.jgit.errors.StopWalkException;
47 import org.spearce.jgit.lib.Constants;
48 import org.spearce.jgit.lib.FileMode;
49 import org.spearce.jgit.lib.ObjectId;
50 import org.spearce.jgit.lib.Repository;
51 import org.spearce.jgit.revwalk.RevTree;
52 import org.spearce.jgit.treewalk.filter.PathFilterGroup;
53 import org.spearce.jgit.treewalk.filter.TreeFilter;
54 import org.spearce.jgit.util.RawParseUtils;
56 /**
57 * Walks one or more {@link AbstractTreeIterator}s in parallel.
58 * <p>
59 * This class can perform n-way differences across as many trees as necessary.
60 * <p>
61 * A TreeWalk instance can only be used once to generate results. Running a
62 * second time requires creating a new TreeWalk instance, or invoking
63 * {@link #reset()} and adding new trees before starting again. Resetting an
64 * existing instance may be faster for some applications as some internal
65 * buffers may be recycled.
66 * <p>
67 * TreeWalk instances are not thread-safe. Applications must either restrict
68 * usage of a TreeWalk instance to a single thread, or implement their own
69 * synchronization at a higher level.
70 * <p>
71 * Multiple simultaneous TreeWalk instances per {@link Repository} are
72 * permitted, even from concurrent threads.
74 public class TreeWalk {
75 /**
76 * Open a tree walk and filter to exactly one path.
77 * <p>
78 * The returned tree walk is already positioned on the requested path, so
79 * the caller should not need to invoke {@link #next()} unless they are
80 * looking for a possible directory/file name conflict.
82 * @param db
83 * repository to read tree object data from.
84 * @param path
85 * single path to advance the tree walk instance into.
86 * @param trees
87 * one or more trees to walk through.
88 * @return a new tree walk configured for exactly this one path; null if no
89 * path was found in any of the trees.
90 * @throws IOException
91 * reading a pack file or loose object failed.
92 * @throws CorruptObjectException
93 * an tree object could not be read as its data stream did not
94 * appear to be a tree, or could not be inflated.
95 * @throws IncorrectObjectTypeException
96 * an object we expected to be a tree was not a tree.
97 * @throws MissingObjectException
98 * a tree object was not found.
100 public static TreeWalk forPath(final Repository db, final String path,
101 final ObjectId[] trees) throws MissingObjectException,
102 IncorrectObjectTypeException, CorruptObjectException, IOException {
103 final TreeWalk r = new TreeWalk(db);
104 r.setFilter(PathFilterGroup.createFromStrings(Collections
105 .singleton(path)));
106 r.setRecursive(r.getFilter().shouldBeRecursive());
107 r.reset(trees);
108 return r.next() ? r : null;
112 * Open a tree walk and filter to exactly one path.
113 * <p>
114 * The returned tree walk is already positioned on the requested path, so
115 * the caller should not need to invoke {@link #next()} unless they are
116 * looking for a possible directory/file name conflict.
118 * @param db
119 * repository to read tree object data from.
120 * @param path
121 * single path to advance the tree walk instance into.
122 * @param tree
123 * the single tree to walk through.
124 * @return a new tree walk configured for exactly this one path; null if no
125 * path was found in any of the trees.
126 * @throws IOException
127 * reading a pack file or loose object failed.
128 * @throws CorruptObjectException
129 * an tree object could not be read as its data stream did not
130 * appear to be a tree, or could not be inflated.
131 * @throws IncorrectObjectTypeException
132 * an object we expected to be a tree was not a tree.
133 * @throws MissingObjectException
134 * a tree object was not found.
136 public static TreeWalk forPath(final Repository db, final String path,
137 final RevTree tree) throws MissingObjectException,
138 IncorrectObjectTypeException, CorruptObjectException, IOException {
139 return forPath(db, path, new ObjectId[] { tree });
142 private final Repository db;
144 private TreeFilter filter;
146 AbstractTreeIterator[] trees;
148 private boolean recursive;
150 private boolean postOrderTraversal;
152 private int depth;
154 private boolean advance;
156 private boolean postChildren;
158 AbstractTreeIterator currentHead;
161 * Create a new tree walker for a given repository.
163 * @param repo
164 * the repository the walker will obtain data from.
166 public TreeWalk(final Repository repo) {
167 db = repo;
168 filter = TreeFilter.ALL;
169 trees = new AbstractTreeIterator[] { new EmptyTreeIterator() };
173 * Get the repository this tree walker is reading from.
175 * @return the repository configured when the walker was created.
177 public Repository getRepository() {
178 return db;
182 * Get the currently configured filter.
184 * @return the current filter. Never null as a filter is always needed.
186 public TreeFilter getFilter() {
187 return filter;
191 * Set the tree entry filter for this walker.
192 * <p>
193 * Multiple filters may be combined by constructing an arbitrary tree of
194 * <code>AndTreeFilter</code> or <code>OrTreeFilter</code> instances to
195 * describe the boolean expression required by the application. Custom
196 * filter implementations may also be constructed by applications.
197 * <p>
198 * Note that filters are not thread-safe and may not be shared by concurrent
199 * TreeWalk instances. Every TreeWalk must be supplied its own unique
200 * filter, unless the filter implementation specifically states it is (and
201 * always will be) thread-safe. Callers may use {@link TreeFilter#clone()}
202 * to create a unique filter tree for this TreeWalk instance.
204 * @param newFilter
205 * the new filter. If null the special {@link TreeFilter#ALL}
206 * filter will be used instead, as it matches every entry.
207 * @see org.spearce.jgit.treewalk.filter.AndTreeFilter
208 * @see org.spearce.jgit.treewalk.filter.OrTreeFilter
210 public void setFilter(final TreeFilter newFilter) {
211 filter = newFilter != null ? newFilter : TreeFilter.ALL;
215 * Is this walker automatically entering into subtrees?
216 * <p>
217 * If the walker is recursive then the caller will not see a subtree node
218 * and instead will only receive file nodes in all relevant subtrees.
220 * @return true if automatically entering subtrees is enabled.
222 public boolean isRecursive() {
223 return recursive;
227 * Set the walker to enter (or not enter) subtrees automatically.
228 * <p>
229 * If recursive mode is enabled the walker will hide subtree nodes from the
230 * calling application and will produce only file level nodes. If a tree
231 * (directory) is deleted then all of the file level nodes will appear to be
232 * deleted, recursively, through as many levels as necessary to account for
233 * all entries.
235 * @param b
236 * true to skip subtree nodes and only obtain files nodes.
238 public void setRecursive(final boolean b) {
239 recursive = b;
243 * Does this walker return a tree entry after it exits the subtree?
244 * <p>
245 * If post order traversal is enabled then the walker will return a subtree
246 * after it has returned the last entry within that subtree. This may cause
247 * a subtree to be seen by the application twice if {@link #isRecursive()}
248 * is false, as the application will see it once, call
249 * {@link #enterSubtree()}, and then see it again as it leaves the subtree.
250 * <p>
251 * If an application does not enable {@link #isRecursive()} and it does not
252 * call {@link #enterSubtree()} then the tree is returned only once as none
253 * of the children were processed.
255 * @return true if subtrees are returned after entries within the subtree.
257 public boolean isPostOrderTraversal() {
258 return postOrderTraversal;
262 * Set the walker to return trees after their children.
264 * @param b
265 * true to get trees after their children.
266 * @see #isPostOrderTraversal()
268 public void setPostOrderTraversal(final boolean b) {
269 postOrderTraversal = b;
272 /** Reset this walker so new tree iterators can be added to it. */
273 public void reset() {
274 trees = new AbstractTreeIterator[0];
275 advance = false;
276 depth = 0;
280 * Reset this walker to run over a set of existing trees.
282 * @param ids
283 * the trees we need to parse. The walker will execute over this
284 * many parallel trees if the reset is successful.
285 * @throws MissingObjectException
286 * the given tree object does not exist in this repository.
287 * @throws IncorrectObjectTypeException
288 * the given object id does not denote a tree, but instead names
289 * some other non-tree type of object. Note that commits are not
290 * trees, even if they are sometimes called a "tree-ish".
291 * @throws CorruptObjectException
292 * the object claimed to be a tree, but its contents did not
293 * appear to be a tree. The repository may have data corruption.
294 * @throws IOException
295 * a loose object or pack file could not be read.
297 public void reset(final ObjectId[] ids) throws MissingObjectException,
298 IncorrectObjectTypeException, CorruptObjectException, IOException {
299 final int oldLen = trees.length;
300 final int newLen = ids.length;
301 final AbstractTreeIterator[] r = newLen == oldLen ? trees
302 : new AbstractTreeIterator[newLen];
303 for (int i = 0; i < newLen; i++) {
304 AbstractTreeIterator o;
306 if (i < oldLen) {
307 o = trees[i];
308 while (o.parent != null)
309 o = o.parent;
310 if (o instanceof CanonicalTreeParser) {
311 o.matches = null;
312 o.matchShift = 0;
313 ((CanonicalTreeParser) o).reset(db, ids[i]);
314 r[i] = o;
315 continue;
319 o = parserFor(ids[i]);
320 r[i] = o;
323 trees = r;
324 advance = false;
325 depth = 0;
329 * Add an already existing tree object for walking.
330 * <p>
331 * The position of this tree is returned to the caller, in case the caller
332 * has lost track of the order they added the trees into the walker.
334 * @param id
335 * identity of the tree object the caller wants walked.
336 * @return position of this tree within the walker.
337 * @throws MissingObjectException
338 * the given tree object does not exist in this repository.
339 * @throws IncorrectObjectTypeException
340 * the given object id does not denote a tree, but instead names
341 * some other non-tree type of object. Note that commits are not
342 * trees, even if they are sometimes called a "tree-ish".
343 * @throws CorruptObjectException
344 * the object claimed to be a tree, but its contents did not
345 * appear to be a tree. The repository may have data corruption.
346 * @throws IOException
347 * a loose object or pack file could not be read.
349 public int addTree(final ObjectId id) throws MissingObjectException,
350 IncorrectObjectTypeException, CorruptObjectException, IOException {
351 return addTree(parserFor(id));
355 * Add an already created tree iterator for walking.
356 * <p>
357 * The position of this tree is returned to the caller, in case the caller
358 * has lost track of the order they added the trees into the walker.
360 * @param p
361 * an iterator to walk over. The iterator should be new, with no
362 * parent, and should still be positioned before the first entry.
363 * @return position of this tree within the walker.
364 * @throws CorruptObjectException
365 * the iterator was unable to obtain its first entry, due to
366 * possible data corruption within the backing data store.
368 public int addTree(final AbstractTreeIterator p)
369 throws CorruptObjectException {
370 final int n = trees.length;
371 final AbstractTreeIterator[] newTrees = new AbstractTreeIterator[n + 1];
373 System.arraycopy(trees, 0, newTrees, 0, n);
374 newTrees[n] = p;
375 p.matches = null;
376 p.matchShift = 0;
378 trees = newTrees;
379 return n;
383 * Get the number of trees known to this walker.
385 * @return the total number of trees this walker is iterating over.
387 public int getTreeCount() {
388 return trees.length;
392 * Advance this walker to the next relevant entry.
394 * @return true if there is an entry available; false if all entries have
395 * been walked and the walk of this set of tree iterators is over.
396 * @throws MissingObjectException
397 * {@link #isRecursive()} was enabled, a subtree was found, but
398 * the subtree object does not exist in this repository. The
399 * repository may be missing objects.
400 * @throws IncorrectObjectTypeException
401 * {@link #isRecursive()} was enabled, a subtree was found, and
402 * the subtree id does not denote a tree, but instead names some
403 * other non-tree type of object. The repository may have data
404 * corruption.
405 * @throws CorruptObjectException
406 * the contents of a tree did not appear to be a tree. The
407 * repository may have data corruption.
408 * @throws IOException
409 * a loose object or pack file could not be read.
411 public boolean next() throws MissingObjectException,
412 IncorrectObjectTypeException, CorruptObjectException, IOException {
413 try {
414 if (advance) {
415 advance = false;
416 postChildren = false;
417 popEntriesEqual();
420 for (;;) {
421 final AbstractTreeIterator t = min();
422 if (t.eof()) {
423 if (depth > 0) {
424 exitSubtree();
425 if (postOrderTraversal) {
426 advance = true;
427 postChildren = true;
428 return true;
430 popEntriesEqual();
431 continue;
433 return false;
436 currentHead = t;
437 if (!filter.include(this)) {
438 skipEntriesEqual();
439 continue;
442 if (recursive && FileMode.TREE.equals(t.mode)) {
443 enterSubtree();
444 continue;
447 advance = true;
448 return true;
450 } catch (StopWalkException stop) {
451 for (final AbstractTreeIterator t : trees)
452 t.stopWalk();
453 return false;
458 * Obtain the tree iterator for the current entry.
459 * <p>
460 * Entering into (or exiting out of) a subtree causes the current tree
461 * iterator instance to be changed for the nth tree. This allows the tree
462 * iterators to manage only one list of items, with the diving handled by
463 * recursive trees.
465 * @param <T>
466 * type of the tree iterator expected by the caller.
467 * @param nth
468 * tree to obtain the current iterator of.
469 * @param clazz
470 * type of the tree iterator expected by the caller.
471 * @return r the current iterator of the requested type; null if the tree
472 * has no entry to match the current path.
474 public <T extends AbstractTreeIterator> T getTree(final int nth,
475 final Class<T> clazz) {
476 final AbstractTreeIterator t = trees[nth];
477 return t.matches == currentHead ? (T) t : null;
481 * Obtain the raw {@link FileMode} bits for the current entry.
482 * <p>
483 * Every added tree supplies mode bits, even if the tree does not contain
484 * the current entry. In the latter case {@link FileMode#MISSING}'s mode
485 * bits (0) are returned.
487 * @param nth
488 * tree to obtain the mode bits from.
489 * @return mode bits for the current entry of the nth tree.
490 * @see FileMode#fromBits(int)
492 public int getRawMode(final int nth) {
493 final AbstractTreeIterator t = trees[nth];
494 return t.matches == currentHead ? t.mode : 0;
498 * Obtain the {@link FileMode} for the current entry.
499 * <p>
500 * Every added tree supplies a mode, even if the tree does not contain the
501 * current entry. In the latter case {@link FileMode#MISSING} is returned.
503 * @param nth
504 * tree to obtain the mode from.
505 * @return mode for the current entry of the nth tree.
507 public FileMode getFileMode(final int nth) {
508 return FileMode.fromBits(getRawMode(nth));
512 * Obtain the ObjectId for the current entry.
513 * <p>
514 * Using this method to compare ObjectId values between trees of this walker
515 * is very inefficient. Applications should try to use
516 * {@link #idEqual(int, int)} whenever possible.
517 * <p>
518 * Every tree supplies an object id, even if the tree does not contain the
519 * current entry. In the latter case {@link ObjectId#zeroId()} is returned.
521 * @param nth
522 * tree to obtain the object identifier from.
523 * @return object identifier for the current tree entry.
524 * @see #idEqual(int, int)
526 public ObjectId getObjectId(final int nth) {
527 final AbstractTreeIterator t = trees[nth];
528 return t.matches == currentHead ? t.getEntryObjectId() : ObjectId
529 .zeroId();
533 * Compare two tree's current ObjectId values for equality.
535 * @param nthA
536 * first tree to compare the object id from.
537 * @param nthB
538 * second tree to compare the object id from.
539 * @return result of
540 * <code>getObjectId(nthA).equals(getObjectId(nthB))</code>.
541 * @see #getObjectId(int)
543 public boolean idEqual(final int nthA, final int nthB) {
544 final AbstractTreeIterator ch = currentHead;
545 final AbstractTreeIterator a = trees[nthA];
546 final AbstractTreeIterator b = trees[nthB];
547 return a.matches == ch && b.matches == ch && a.idEqual(b);
551 * Get the current entry's complete path.
552 * <p>
553 * This method is not very efficient and is primarily meant for debugging
554 * and final output generation. Applications should try to avoid calling it,
555 * and if invoked do so only once per interesting entry, where the name is
556 * absolutely required for correct function.
558 * @return complete path of the current entry, from the root of the
559 * repository. If the current entry is in a subtree there will be at
560 * least one '/' in the returned string.
562 public String getPathString() {
563 return pathOf(currentHead);
567 * Test if the supplied path matches the current entry's path.
568 * <p>
569 * This method tests that the supplied path is exactly equal to the current
570 * entry, or is one of its parent directories. It is faster to use this
571 * method then to use {@link #getPathString()} to first create a String
572 * object, then test <code>startsWith</code> or some other type of string
573 * match function.
575 * @param p
576 * path buffer to test. Callers should ensure the path does not
577 * end with '/' prior to invocation.
578 * @param pLen
579 * number of bytes from <code>buf</code> to test.
580 * @return < 0 if p is before the current path; 0 if p matches the current
581 * path; 1 if the current path is past p and p will never match
582 * again on this tree walk.
584 public int isPathPrefix(final byte[] p, final int pLen) {
585 final AbstractTreeIterator t = currentHead;
586 final byte[] c = t.path;
587 final int cLen = t.pathLen;
588 int ci;
590 for (ci = 0; ci < cLen && ci < pLen; ci++) {
591 final int c_value = (c[ci] & 0xff) - (p[ci] & 0xff);
592 if (c_value != 0)
593 return c_value;
596 if (ci < cLen) {
597 // Ran out of pattern but we still had current data.
598 // If c[ci] == '/' then pattern matches the subtree.
599 // Otherwise we cannot be certain so we return -1.
601 return c[ci] == '/' ? 0 : -1;
604 if (ci < pLen) {
605 // Ran out of current, but we still have pattern data.
606 // If p[ci] == '/' then pattern matches this subtree,
607 // otherwise we cannot be certain so we return -1.
609 return p[ci] == '/' ? 0 : -1;
612 // Both strings are identical.
614 return 0;
618 * Is the current entry a subtree?
619 * <p>
620 * This method is faster then testing the raw mode bits of all trees to see
621 * if any of them are a subtree. If at least one is a subtree then this
622 * method will return true.
624 * @return true if {@link #enterSubtree()} will work on the current node.
626 public boolean isSubtree() {
627 return FileMode.TREE.equals(currentHead.mode);
631 * Is the current entry a subtree returned after its children?
633 * @return true if the current node is a tree that has been returned after
634 * its children were already processed.
635 * @see #isPostOrderTraversal()
637 public boolean isPostChildren() {
638 return postChildren && isSubtree();
642 * Enter into the current subtree.
643 * <p>
644 * If the current entry is a subtree this method arranges for its children
645 * to be returned before the next sibling following the subtree is returned.
647 * @throws MissingObjectException
648 * a subtree was found, but the subtree object does not exist in
649 * this repository. The repository may be missing objects.
650 * @throws IncorrectObjectTypeException
651 * a subtree was found, and the subtree id does not denote a
652 * tree, but instead names some other non-tree type of object.
653 * The repository may have data corruption.
654 * @throws CorruptObjectException
655 * the contents of a tree did not appear to be a tree. The
656 * repository may have data corruption.
657 * @throws IOException
658 * a loose object or pack file could not be read.
660 public void enterSubtree() throws MissingObjectException,
661 IncorrectObjectTypeException, CorruptObjectException, IOException {
662 final AbstractTreeIterator ch = currentHead;
663 final AbstractTreeIterator[] tmp = new AbstractTreeIterator[trees.length];
664 for (int i = 0; i < trees.length; i++) {
665 final AbstractTreeIterator t = trees[i];
666 final AbstractTreeIterator n;
667 if (t.matches == ch && !t.eof() && FileMode.TREE.equals(t.mode))
668 n = t.createSubtreeIterator(db);
669 else
670 n = new EmptyTreeIterator(t);
671 tmp[i] = n;
673 depth++;
674 advance = false;
675 System.arraycopy(tmp, 0, trees, 0, trees.length);
678 AbstractTreeIterator min() throws CorruptObjectException {
679 int i = 0;
680 AbstractTreeIterator minRef = trees[i];
681 while (minRef.eof() && ++i < trees.length)
682 minRef = trees[i];
683 if (minRef.eof())
684 return minRef;
686 minRef.matches = minRef;
687 while (++i < trees.length) {
688 final AbstractTreeIterator t = trees[i];
689 if (t.eof())
690 continue;
691 final int cmp = t.pathCompare(minRef);
692 if (cmp < 0) {
693 t.matches = t;
694 minRef = t;
695 } else if (cmp == 0) {
696 t.matches = minRef;
700 return minRef;
703 void popEntriesEqual() throws CorruptObjectException {
704 final AbstractTreeIterator ch = currentHead;
705 for (int i = 0; i < trees.length; i++) {
706 final AbstractTreeIterator t = trees[i];
707 if (t.matches == ch) {
708 t.next(1);
709 t.matches = null;
714 void skipEntriesEqual() throws CorruptObjectException {
715 final AbstractTreeIterator ch = currentHead;
716 for (int i = 0; i < trees.length; i++) {
717 final AbstractTreeIterator t = trees[i];
718 if (t.matches == ch) {
719 t.skip();
720 t.matches = null;
725 private void exitSubtree() {
726 depth--;
727 for (int i = 0; i < trees.length; i++)
728 trees[i] = trees[i].parent;
730 AbstractTreeIterator minRef = null;
731 for (final AbstractTreeIterator t : trees) {
732 if (t.matches != t)
733 continue;
734 if (minRef == null || t.pathCompare(minRef) < 0)
735 minRef = t;
737 currentHead = minRef;
740 private CanonicalTreeParser parserFor(final ObjectId id)
741 throws IncorrectObjectTypeException, IOException {
742 final CanonicalTreeParser p = new CanonicalTreeParser();
743 p.reset(db, id);
744 return p;
747 private static String pathOf(final AbstractTreeIterator t) {
748 return RawParseUtils.decode(Constants.CHARSET, t.path, 0, t.pathLen);