From 3eaa9f1229c45fb51ddfc8cc7c1aa97f8ccac7c0 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 11 Dec 2008 18:46:10 -0800 Subject: [PATCH] Add lineMap computer to RawParseUtils to index locations of line starts Some algorithms like a diff or patch require efficient lookup of lines within a source file, using a 1 based line number. lineMap produces a list of offsets within the file where each line starts, providing O(1) lookup time to locate those positions. Signed-off-by: Shawn O. Pearce Signed-off-by: Robin Rosenberg --- .../tst/org/spearce/jgit/util/IntListTest.java | 24 ++++++ .../jgit/util/RawParseUtils_LineMapTest.java | 97 ++++++++-------------- .../src/org/spearce/jgit/util/IntList.java | 15 ++++ .../src/org/spearce/jgit/util/RawParseUtils.java | 31 +++++++ 4 files changed, 106 insertions(+), 61 deletions(-) copy org.spearce.jgit/src/org/spearce/jgit/util/IntList.java => org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java (50%) diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java index f943075b..c470d55f 100644 --- a/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java +++ b/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java @@ -103,6 +103,30 @@ public class IntListTest extends TestCase { } } + public void testFillTo0() { + final IntList i = new IntList(); + i.fillTo(0, Integer.MIN_VALUE); + assertEquals(0, i.size()); + } + + public void testFillTo1() { + final IntList i = new IntList(); + i.fillTo(1, Integer.MIN_VALUE); + assertEquals(1, i.size()); + i.add(0); + assertEquals(Integer.MIN_VALUE, i.get(0)); + assertEquals(0, i.get(1)); + } + + public void testFillTo100() { + final IntList i = new IntList(); + i.fillTo(100, Integer.MIN_VALUE); + assertEquals(100, i.size()); + i.add(3); + assertEquals(Integer.MIN_VALUE, i.get(99)); + assertEquals(3, i.get(100)); + } + public void testClear() { final IntList i = new IntList(); final int n = 5; diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java b/org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java similarity index 50% copy from org.spearce.jgit/src/org/spearce/jgit/util/IntList.java copy to org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java index a4b82a49..3f562a4b 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java +++ b/org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java @@ -37,77 +37,52 @@ package org.spearce.jgit.util; -/** A more efficient List using a primitive integer array. */ -public class IntList { - private int[] entries; +import java.io.UnsupportedEncodingException; - private int count; +import junit.framework.TestCase; - /** Create an empty list with a default capacity. */ - public IntList() { - this(10); +public class RawParseUtils_LineMapTest extends TestCase { + public void testEmpty() { + final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0); + assertNotNull(map); + assertEquals(1, map.size()); + assertEquals(Integer.MIN_VALUE, map.get(0)); } - /** - * Create an empty list with the specified capacity. - * - * @param capacity - * number of entries the list can initially hold. - */ - public IntList(final int capacity) { - entries = new int[capacity]; + public void testOneBlankLine() { + final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1); + assertEquals(2, map.size()); + assertEquals(Integer.MIN_VALUE, map.get(0)); + assertEquals(0, map.get(1)); } - /** @return number of entries in this list */ - public int size() { - return count; + public void testTwoLineFooBar() throws UnsupportedEncodingException { + final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1"); + final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); + assertEquals(3, map.size()); + assertEquals(Integer.MIN_VALUE, map.get(0)); + assertEquals(0, map.get(1)); + assertEquals(4, map.get(2)); } - /** - * @param i - * index to read, must be in the range [0, {@link #size()}). - * @return the number at the specified index - * @throws ArrayIndexOutOfBoundsException - * the index outside the valid range - */ - public int get(final int i) { - if (count <= i) - throw new ArrayIndexOutOfBoundsException(i); - return entries[i]; + public void testTwoLineNoLF() throws UnsupportedEncodingException { + final byte[] buf = "foo\nbar".getBytes("ISO-8859-1"); + final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); + assertEquals(3, map.size()); + assertEquals(Integer.MIN_VALUE, map.get(0)); + assertEquals(0, map.get(1)); + assertEquals(4, map.get(2)); } - /** Empty this list */ - public void clear() { - count = 0; + public void testFourLineBlanks() throws UnsupportedEncodingException { + final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); + final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); + assertEquals(5, map.size()); + assertEquals(Integer.MIN_VALUE, map.get(0)); + assertEquals(0, map.get(1)); + assertEquals(4, map.get(2)); + assertEquals(5, map.get(3)); + assertEquals(6, map.get(4)); } - /** - * Add an entry to the end of the list. - * - * @param n - * the number to add. - */ - public void add(final int n) { - if (count == entries.length) - grow(); - entries[count++] = n; - } - - private void grow() { - final int[] n = new int[(entries.length + 16) * 3 / 2]; - System.arraycopy(entries, 0, n, 0, count); - entries = n; - } - - public String toString() { - final StringBuilder r = new StringBuilder(); - r.append('['); - for (int i = 0; i < count; i++) { - if (i > 0) - r.append(", "); - r.append(entries[i]); - } - r.append(']'); - return r.toString(); - } } diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java b/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java index a4b82a49..0a847934 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java +++ b/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java @@ -93,6 +93,21 @@ public class IntList { entries[count++] = n; } + /** + * Pad the list with entries. + * + * @param toIndex + * index position to stop filling at. 0 inserts no filler. 1 + * ensures the list has a size of 1, adding val if + * the list is currently empty. + * @param val + * value to insert into padded positions. + */ + public void fillTo(int toIndex, final int val) { + while (count < toIndex) + add(val); + } + private void grow() { final int[] n = new int[(entries.length + 16) * 3 / 2]; System.arraycopy(entries, 0, n, 0, count); diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java index 3b1e5100..55a3001d 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java +++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java @@ -266,6 +266,37 @@ public final class RawParseUtils { } /** + * Index the region between [ptr, end) to find line starts. + *

+ * The returned list is 1 indexed. Index 0 contains + * {@link Integer#MIN_VALUE} to pad the list out. + *

+ * Using a 1 indexed list means that line numbers can be directly accessed + * from the list, so list.get(1) (aka get line 1) returns + * ptr. + * + * @param buf + * buffer to scan. + * @param ptr + * position within the buffer corresponding to the first byte of + * line 1. + * @param end + * 1 past the end of the content within buf. + * @return a line map indexing the start position of each line. + */ + public static final IntList lineMap(final byte[] buf, int ptr, int end) { + // Experimentally derived from multiple source repositories + // the average number of bytes/line is 36. Its a rough guess + // to initially size our map close to the target. + // + final IntList map = new IntList((end - ptr) / 36); + map.fillTo(1, Integer.MIN_VALUE); + for (; ptr < end; ptr = nextLF(buf, ptr)) + map.add(ptr); + return map; + } + + /** * Locate the "author " header line data. * * @param b -- 2.11.4.GIT