Switch jgit library to the EDL (3-clause BSD)
[jgit.git] / org.spearce.jgit / src / org / spearce / jgit / treewalk / WorkingTreeIterator.java
blob73c4b8b2b38c1de0a6828c98df02d1431cdf17e1
1 /*
2 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.treewalk;
40 import java.io.IOException;
41 import java.io.InputStream;
42 import java.nio.ByteBuffer;
43 import java.nio.CharBuffer;
44 import java.nio.charset.CharacterCodingException;
45 import java.nio.charset.CharsetEncoder;
46 import java.security.MessageDigest;
47 import java.util.Arrays;
48 import java.util.Comparator;
50 import org.spearce.jgit.errors.CorruptObjectException;
51 import org.spearce.jgit.lib.Constants;
52 import org.spearce.jgit.lib.FileMode;
54 /**
55 * Walks a working directory tree as part of a {@link TreeWalk}.
56 * <p>
57 * Most applications will want to use the standard implementation of this
58 * iterator, {@link FileTreeIterator}, as that does all IO through the standard
59 * <code>java.io</code> package. Plugins for a Java based IDE may however wish
60 * to create their own implementations of this class to allow traversal of the
61 * IDE's project space, as well as benefit from any caching the IDE may have.
63 * @see FileTreeIterator
65 public abstract class WorkingTreeIterator extends AbstractTreeIterator {
66 /** An empty entry array, suitable for return from {@link #getEntries()}. */
67 protected static final Entry[] EOF = {};
69 /** Size we perform file IO in if we have to read and hash a file. */
70 private static final int BUFFER_SIZE = 2048;
72 /** The {@link #idBuffer()} for the current entry. */
73 private byte[] contentId;
75 /** Value of {@link #ptr} when {@link #contentId} was last populated. */
76 private int contentIdFromPtr;
78 /** Buffer used to perform {@link #contentId} computations. */
79 private byte[] contentReadBuffer;
81 /** Digest computer for {@link #contentId} computations. */
82 private MessageDigest contentDigest;
84 /** File name character encoder. */
85 private final CharsetEncoder nameEncoder;
87 /** List of entries obtained from the subclass. */
88 private Entry[] entries;
90 /** Total number of entries in {@link #entries} that are valid. */
91 private int entryCnt;
93 /** Current position within {@link #entries}. */
94 private int ptr;
96 /** Create a new iterator with no parent. */
97 protected WorkingTreeIterator() {
98 super();
99 nameEncoder = Constants.CHARSET.newEncoder();
103 * Create an iterator for a subtree of an existing iterator.
105 * @param p
106 * parent tree iterator.
108 protected WorkingTreeIterator(final WorkingTreeIterator p) {
109 super(p);
110 nameEncoder = p.nameEncoder;
113 @Override
114 protected byte[] idBuffer() {
115 if (contentIdFromPtr == ptr - 1)
116 return contentId;
117 if (entries == EOF)
118 return zeroid;
120 switch (mode & 0170000) {
121 case 0100000: /* normal files */
122 contentIdFromPtr = ptr - 1;
123 return contentId = idBufferBlob(entries[contentIdFromPtr]);
124 case 0120000: /* symbolic links */
125 // Java does not support symbolic links, so we should not
126 // have reached this particular part of the walk code.
128 return zeroid;
129 case 0160000: /* gitlink */
130 // TODO: Support obtaining current HEAD SHA-1 from nested repository
132 return zeroid;
134 return zeroid;
137 private void initializeDigest() {
138 if (contentDigest != null)
139 return;
141 if (parent == null) {
142 contentReadBuffer = new byte[BUFFER_SIZE];
143 contentDigest = Constants.newMessageDigest();
144 } else {
145 final WorkingTreeIterator p = (WorkingTreeIterator) parent;
146 p.initializeDigest();
147 contentReadBuffer = p.contentReadBuffer;
148 contentDigest = p.contentDigest;
152 private static final byte[] digits = { '0', '1', '2', '3', '4', '5', '6',
153 '7', '8', '9' };
155 private static final byte[] hblob = Constants
156 .encodedTypeString(Constants.OBJ_BLOB);
158 private byte[] idBufferBlob(final Entry e) {
159 try {
160 final InputStream is = e.openInputStream();
161 if (is == null)
162 return zeroid;
163 try {
164 initializeDigest();
166 contentDigest.reset();
167 contentDigest.update(hblob);
168 contentDigest.update((byte) ' ');
170 final long blobLength = e.getLength();
171 long sz = blobLength;
172 if (sz == 0) {
173 contentDigest.update((byte) '0');
174 } else {
175 final int bufn = contentReadBuffer.length;
176 int p = bufn;
177 do {
178 contentReadBuffer[--p] = digits[(int) (sz % 10)];
179 sz /= 10;
180 } while (sz > 0);
181 contentDigest.update(contentReadBuffer, p, bufn - p);
183 contentDigest.update((byte) 0);
185 for (;;) {
186 final int r = is.read(contentReadBuffer);
187 if (r <= 0)
188 break;
189 contentDigest.update(contentReadBuffer, 0, r);
190 sz += r;
192 if (sz != blobLength)
193 return zeroid;
194 return contentDigest.digest();
195 } finally {
196 try {
197 is.close();
198 } catch (IOException err2) {
199 // Suppress any error related to closing an input
200 // stream. We don't care, we should not have any
201 // outstanding data to flush or anything like that.
204 } catch (IOException err) {
205 // Can't read the file? Don't report the failure either.
207 return zeroid;
211 @Override
212 protected int idOffset() {
213 return 0;
216 @Override
217 public boolean eof() {
218 return entries == EOF;
221 @Override
222 public void next() throws CorruptObjectException {
223 if (entries == null)
224 loadEntries();
225 if (ptr == entryCnt) {
226 entries = EOF;
227 return;
229 if (entries == EOF)
230 return;
232 final Entry e = entries[ptr++];
233 mode = e.getMode().getBits();
235 final int nameLen = e.encodedNameLen;
236 while (pathOffset + nameLen > path.length)
237 growPath(pathOffset);
238 System.arraycopy(e.encodedName, 0, path, pathOffset, nameLen);
239 pathLen = pathOffset + nameLen;
242 private static final Comparator<Entry> ENTRY_CMP = new Comparator<Entry>() {
243 public int compare(final Entry o1, final Entry o2) {
244 final byte[] a = o1.encodedName;
245 final byte[] b = o2.encodedName;
246 final int aLen = o1.encodedNameLen;
247 final int bLen = o2.encodedNameLen;
248 int cPos;
250 for (cPos = 0; cPos < aLen && cPos < bLen; cPos++) {
251 final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
252 if (cmp != 0)
253 return cmp;
256 if (cPos < aLen) {
257 final int aj = a[cPos] & 0xff;
258 final int lastb = lastPathChar(o2);
259 if (aj < lastb)
260 return -1;
261 else if (aj > lastb)
262 return 1;
263 else if (cPos == aLen - 1)
264 return 0;
265 else
266 return -1;
269 if (cPos < bLen) {
270 final int bk = b[cPos] & 0xff;
271 final int lasta = lastPathChar(o1);
272 if (lasta < bk)
273 return -1;
274 else if (lasta > bk)
275 return 1;
276 else if (cPos == bLen - 1)
277 return 0;
278 else
279 return 1;
282 final int lasta = lastPathChar(o1);
283 final int lastb = lastPathChar(o2);
284 if (lasta < lastb)
285 return -1;
286 else if (lasta > lastb)
287 return 1;
289 if (aLen == bLen)
290 return 0;
291 else if (aLen < bLen)
292 return -1;
293 else
294 return 1;
298 static int lastPathChar(final Entry e) {
299 return e.getMode() == FileMode.TREE ? '/' : '\0';
302 private void loadEntries() throws CorruptObjectException {
303 // Filter out nulls, . and .. as these are not valid tree entries,
304 // also cache the encoded forms of the path names for efficient use
305 // later on during sorting and iteration.
307 try {
308 entries = getEntries();
309 int i, o;
311 for (i = 0, o = 0; i < entries.length; i++) {
312 final Entry e = entries[i];
313 if (e == null)
314 continue;
315 final String name = e.getName();
316 if (".".equals(name) || "..".equals(name))
317 continue;
318 if (parent == null && ".git".equals(name))
319 continue;
320 if (i != o)
321 entries[o] = e;
322 e.encodeName(nameEncoder);
323 o++;
325 entryCnt = o;
326 contentIdFromPtr = -1;
327 Arrays.sort(entries, 0, entryCnt, ENTRY_CMP);
328 } catch (CharacterCodingException e) {
329 final CorruptObjectException why;
330 why = new CorruptObjectException("Invalid file name encoding");
331 why.initCause(e);
332 throw why;
333 } catch (IOException e) {
334 final CorruptObjectException why;
335 why = new CorruptObjectException("Error reading directory");
336 why.initCause(e);
337 throw why;
342 * Obtain the current entry from this iterator.
344 * @return the currently selected entry.
346 protected Entry current() {
347 return entries[ptr - 1];
351 * Obtain an unsorted list of this iterator's contents.
352 * <p>
353 * Implementations only need to provide the unsorted contents of their lower
354 * level directory. The caller will automatically prune out ".", "..",
355 * ".git", as well as null entries as necessary, and then sort the array
356 * for iteration within a TreeWalk instance.
357 * <p>
358 * The returned array will be modified by the caller.
359 * <p>
360 * This method is only invoked once per iterator instance.
362 * @return unsorted list of the immediate children. Never null, but may be
363 * {@link #EOF} if no items can be obtained.
364 * @throws IOException
365 * reading the contents failed due to IO errors.
367 protected abstract Entry[] getEntries() throws IOException;
369 /** A single entry within a working directory tree. */
370 protected static abstract class Entry {
371 byte[] encodedName;
373 int encodedNameLen;
375 void encodeName(final CharsetEncoder enc)
376 throws CharacterCodingException {
377 final ByteBuffer b = enc.encode(CharBuffer.wrap(getName()));
378 encodedNameLen = b.limit();
379 if (b.hasArray())
380 encodedName = b.array();
381 else
382 b.get(encodedName = new byte[encodedNameLen]);
385 public String toString() {
386 return getMode().toString() + " " + getName();
390 * Get the type of this entry.
391 * <p>
392 * <b>Note: Efficient implementation required.</b>
393 * <p>
394 * The implementation of this method must be efficient. If a subclass
395 * needs to compute the value they should cache the reference within an
396 * instance member instead.
398 * @return a file mode constant from {@link FileMode}.
400 public abstract FileMode getMode();
403 * Get the byte length of this entry.
404 * <p>
405 * <b>Note: Efficient implementation required.</b>
406 * <p>
407 * The implementation of this method must be efficient. If a subclass
408 * needs to compute the value they should cache the reference within an
409 * instance member instead.
411 * @return size of this file, in bytes.
413 public abstract long getLength();
416 * Get the name of this entry within its directory.
417 * <p>
418 * Efficient implementations are not required. The caller will obtain
419 * the name only once and cache it once obtained.
421 * @return name of the entry.
423 public abstract String getName();
426 * Obtain an input stream to read the file content.
427 * <p>
428 * Efficient implementations are not required. The caller will usually
429 * obtain the stream only once per entry, if at all.
430 * <p>
431 * The input stream should not use buffering if the implementation can
432 * avoid it. The caller will buffer as necessary to perform efficient
433 * block IO operations.
434 * <p>
435 * The caller will close the stream once complete.
437 * @return a stream to read from the file.
438 * @throws IOException
439 * the file could not be opened for reading.
441 public abstract InputStream openInputStream() throws IOException;