From 8367e9e90af47624d349dc91e30a677610c0655e Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Tue, 19 Aug 2008 15:51:54 -0700 Subject: [PATCH] Support path names longer than 4095 bytes in DirCache C Git supports long paths within the index file by setting the length portion of the flags field to 4095 (0xfff) and then it does a simple C strlen() on the remaining part of the string to locate the first null byte. The trick works because every index record must have between 1 and 8 null bytes trailing the entry as padding. C Git had this rule to make it easy to pass the mmap'd name portion of an entry directly to system calls, and it helped to ensure all members were aligned properly in memory. We now support infinite length paths (well, up to 2GB) by using the same encoding strategy, however paths under the 4095 limit still perform better as they do not require a single byte read loop and a growing/shrinking byte buffer. Signed-off-by: Shawn O. Pearce Signed-off-by: Robin Rosenberg --- .../jgit/dircache/DirCacheLargePathTest.java | 106 +++++++++++++++++++++ .../org/spearce/jgit/dircache/DirCacheEntry.java | 39 ++++++-- 2 files changed, 136 insertions(+), 9 deletions(-) create mode 100644 org.spearce.jgit.test/tst/org/spearce/jgit/dircache/DirCacheLargePathTest.java diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/dircache/DirCacheLargePathTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/dircache/DirCacheLargePathTest.java new file mode 100644 index 00000000..4ea286cb --- /dev/null +++ b/org.spearce.jgit.test/tst/org/spearce/jgit/dircache/DirCacheLargePathTest.java @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2008, Google Inc. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Git Development Community nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.spearce.jgit.dircache; + +import java.io.IOException; + +import org.spearce.jgit.errors.CorruptObjectException; +import org.spearce.jgit.lib.RepositoryTestCase; + +public class DirCacheLargePathTest extends RepositoryTestCase { + public void testPath_4090() throws Exception { + testLongPath(4090); + } + + public void testPath_4094() throws Exception { + testLongPath(4094); + } + + public void testPath_4095() throws Exception { + testLongPath(4095); + } + + public void testPath_4096() throws Exception { + testLongPath(4096); + } + + public void testPath_16384() throws Exception { + testLongPath(16384); + } + + private void testLongPath(final int len) throws CorruptObjectException, + IOException { + final String longPath = makeLongPath(len); + final String shortPath = "~~~ shorter-path"; + + final DirCacheEntry longEnt = new DirCacheEntry(longPath); + final DirCacheEntry shortEnt = new DirCacheEntry(shortPath); + assertEquals(longPath, longEnt.getPathString()); + assertEquals(shortPath, shortEnt.getPathString()); + + { + final DirCache dc1 = DirCache.lock(db); + { + final DirCacheBuilder b = dc1.builder(); + b.add(longEnt); + b.add(shortEnt); + assertTrue(b.commit()); + } + assertEquals(2, dc1.getEntryCount()); + assertSame(longEnt, dc1.getEntry(0)); + assertSame(shortEnt, dc1.getEntry(1)); + } + { + final DirCache dc2 = DirCache.read(db); + assertEquals(2, dc2.getEntryCount()); + + assertNotSame(longEnt, dc2.getEntry(0)); + assertEquals(longPath, dc2.getEntry(0).getPathString()); + + assertNotSame(shortEnt, dc2.getEntry(1)); + assertEquals(shortPath, dc2.getEntry(1).getPathString()); + } + } + + private static String makeLongPath(final int len) { + final StringBuilder r = new StringBuilder(len); + for (int i = 0; i < len; i++) + r.append('a' + (i % 26)); + return r.toString(); + } +} diff --git a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java index 011bc16c..dbd74b50 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java +++ b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java @@ -37,6 +37,8 @@ package org.spearce.jgit.dircache; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -105,18 +107,36 @@ public class DirCacheEntry { NB.readFully(in, info, infoOffset, INFO_LEN); int pathLen = NB.decodeUInt16(info, infoOffset + P_FLAGS) & NAME_MASK; - if (pathLen == NAME_MASK) - throw new IOException("Path name too long for jgit"); - path = new byte[pathLen]; - NB.readFully(in, path, 0, pathLen); + int skipped = 0; + if (pathLen < NAME_MASK) { + path = new byte[pathLen]; + NB.readFully(in, path, 0, pathLen); + } else { + final ByteArrayOutputStream tmp = new ByteArrayOutputStream(); + { + final byte[] buf = new byte[NAME_MASK]; + NB.readFully(in, buf, 0, NAME_MASK); + tmp.write(buf); + } + for (;;) { + final int c = in.read(); + if (c < 0) + throw new EOFException("Short read of block."); + if (c == 0) + break; + tmp.write(c); + } + path = tmp.toByteArray(); + pathLen = path.length; + skipped = 1; // we already skipped 1 '\0' above to break the loop. + } // Index records are padded out to the next 8 byte alignment // for historical reasons related to how C Git read the files. // final int actLen = INFO_LEN + pathLen; final int expLen = (actLen + 8) & ~7; - if (actLen != expLen) - NB.skipFully(in, expLen - actLen); + NB.skipFully(in, expLen - actLen - skipped); } /** @@ -140,9 +160,10 @@ public class DirCacheEntry { infoOffset = 0; path = newPath; - if (path.length >= NAME_MASK) - throw new IllegalArgumentException("Path name too long for jgit"); - NB.encodeInt16(info, infoOffset + P_FLAGS, path.length); + if (path.length < NAME_MASK) + NB.encodeInt16(info, infoOffset + P_FLAGS, path.length); + else + NB.encodeInt16(info, infoOffset + P_FLAGS, NAME_MASK); } void write(final OutputStream os) throws IOException { -- 2.11.4.GIT