Add support for writing pack index v2 files
[egit/zawir.git] / org.spearce.jgit / src / org / spearce / jgit / lib / PackIndexWriter.java
blob3d0050d5c120896ee45503cf385aebd1ce8ff1f5
1 /*
2 * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or
8 * without modification, are permitted provided that the following
9 * conditions are met:
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
19 * - Neither the name of the Git Development Community nor the
20 * names of its contributors may be used to endorse or promote
21 * products derived from this software without specific prior
22 * written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
25 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
26 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
29 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
31 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 package org.spearce.jgit.lib;
41 import java.io.BufferedOutputStream;
42 import java.io.IOException;
43 import java.io.OutputStream;
44 import java.security.DigestOutputStream;
45 import java.util.List;
47 import org.spearce.jgit.transport.PackedObjectInfo;
48 import org.spearce.jgit.util.NB;
50 /**
51 * Creates a table of contents to support random access by {@link PackFile}.
52 * <p>
53 * Pack index files (the <code>.idx</code> suffix in a pack file pair)
54 * provides random access to any object in the pack by associating an ObjectId
55 * to the byte offset within the pack where the object's data can be read.
57 public abstract class PackIndexWriter {
58 /** Magic constant indicating post-version 1 format. */
59 protected static final byte[] TOC = { -1, 't', 'O', 'c' };
61 /**
62 * Create a new writer for the oldest (most widely understood) format.
63 * <p>
64 * This method selects an index format that can accurate describe the
65 * supplied objects and that will be the most compatible format with older
66 * Git implementations.
67 * <p>
68 * Index version 1 is widely recognized by all Git implementations, but
69 * index version 2 (and later) is not as well recognized as it was
70 * introduced more than a year later. Index version 1 can only be used if
71 * the resulting pack file is under 4 gigabytes in size; packs larger than
72 * that limit must use index version 2.
74 * @param dst
75 * the stream the index data will be written to. If not already
76 * buffered it will be automatically wrapped in a buffered
77 * stream. Callers are always responsible for closing the stream.
78 * @param objs
79 * the objects the caller needs to store in the index. Entries
80 * will be examined until a format can be conclusively selected.
81 * @return a new writer to output an index file of the requested format to
82 * the supplied stream.
83 * @throws IllegalArgumentException
84 * no recognized pack index version can support the supplied
85 * objects. This is likely a bug in the implementation.
87 @SuppressWarnings("fallthrough")
88 public static PackIndexWriter createOldestPossible(final OutputStream dst,
89 final List<PackedObjectInfo> objs) {
90 int version = 1;
91 LOOP: for (final PackedObjectInfo oe : objs) {
92 switch (version) {
93 case 1:
94 if (PackIndexWriterV1.canStore(oe))
95 continue;
96 version = 2;
97 case 2:
98 break LOOP;
101 return createVersion(dst, version);
105 * Create a new writer instance for a specific index format version.
107 * @param dst
108 * the stream the index data will be written to. If not already
109 * buffered it will be automatically wrapped in a buffered
110 * stream. Callers are always responsible for closing the stream.
111 * @param version
112 * index format version number required by the caller. Exactly
113 * this formatted version will be written.
114 * @return a new writer to output an index file of the requested format to
115 * the supplied stream.
116 * @throws IllegalArgumentException
117 * the version requested is not supported by this
118 * implementation.
120 public static PackIndexWriter createVersion(final OutputStream dst,
121 final int version) {
122 switch (version) {
123 case 1:
124 return new PackIndexWriterV1(dst);
125 case 2:
126 return new PackIndexWriterV2(dst);
127 default:
128 throw new IllegalArgumentException(
129 "Unsupported pack index version " + version);
133 /** The index data stream we are responsible for creating. */
134 protected final DigestOutputStream out;
136 /** A temporary buffer for use during IO to {link #out}. */
137 protected final byte[] tmp;
139 /** The entries this writer must pack. */
140 protected List<PackedObjectInfo> entries;
142 /** SHA-1 checksum for the entire pack data. */
143 protected byte[] packChecksum;
146 * Create a new writer instance.
148 * @param dst
149 * the stream this instance outputs to. If not already buffered
150 * it will be automatically wrapped in a buffered stream.
152 protected PackIndexWriter(final OutputStream dst) {
153 out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
154 : new BufferedOutputStream(dst), Constants.newMessageDigest());
155 tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
159 * Write all object entries to the index stream.
160 * <p>
161 * After writing the stream passed to the factory is flushed but remains
162 * open. Callers are always responsible for closing the output stream.
164 * @param toStore
165 * sorted list of objects to store in the index. The caller must
166 * have previously sorted the list using {@link PackedObjectInfo}'s
167 * native {@link Comparable} implementation.
168 * @param packDataChecksum
169 * checksum signature of the entire pack data content. This is
170 * traditionally the last 20 bytes of the pack file's own stream.
171 * @throws IOException
172 * an error occurred while writing to the output stream, or this
173 * index format cannot store the object data supplied.
175 public void write(final List<PackedObjectInfo> toStore,
176 final byte[] packDataChecksum) throws IOException {
177 entries = toStore;
178 packChecksum = packDataChecksum;
179 writeImpl();
180 out.flush();
184 * Writes the index file to {@link #out}.
185 * <p>
186 * Implementations should go something like:
188 * <pre>
189 * writeFanOutTable();
190 * for (final PackedObjectInfo po : entries)
191 * writeOneEntry(po);
192 * writeChecksumFooter();
193 * </pre>
195 * <p>
196 * Where the logic for <code>writeOneEntry</code> is specific to the index
197 * format in use. Additional headers/footers may be used if necessary and
198 * the {@link #entries} collection may be iterated over more than once if
199 * necessary. Implementors therefore have complete control over the data.
201 * @throws IOException
202 * an error occurred while writing to the output stream, or this
203 * index format cannot store the object data supplied.
205 protected abstract void writeImpl() throws IOException;
208 * Output the version 2 (and later) TOC header, with version number.
209 * <p>
210 * Post version 1 all index files start with a TOC header that makes the
211 * file an invalid version 1 file, and then includes the version number.
212 * This header is necessary to recognize a version 1 from a version 2
213 * formatted index.
215 * @param version
216 * version number of this index format being written.
217 * @throws IOException
218 * an error occurred while writing to the output stream.
220 protected void writeTOC(final int version) throws IOException {
221 out.write(TOC);
222 NB.encodeInt32(tmp, 0, version);
223 out.write(tmp, 0, 4);
227 * Output the standard 256 entry first-level fan-out table.
228 * <p>
229 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
230 * counts. Each count represents the number of objects within this index
231 * whose {@link ObjectId#getFirstByte()} matches the count's position in the
232 * fan-out table.
234 * @throws IOException
235 * an error occurred while writing to the output stream.
237 protected void writeFanOutTable() throws IOException {
238 final int[] fanout = new int[256];
239 for (final PackedObjectInfo po : entries)
240 fanout[po.getFirstByte() & 0xff]++;
241 for (int i = 1; i < 256; i++)
242 fanout[i] += fanout[i - 1];
243 for (final int n : fanout) {
244 NB.encodeInt32(tmp, 0, n);
245 out.write(tmp, 0, 4);
250 * Output the standard two-checksum index footer.
251 * <p>
252 * The standard footer contains two checksums (20 byte SHA-1 values):
253 * <ol>
254 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
255 * <li>Index data checksum - checksum of all index bytes written, including
256 * the pack data checksum above.</li>
257 * </ol>
259 * @throws IOException
260 * an error occurred while writing to the output stream.
262 protected void writeChecksumFooter() throws IOException {
263 out.write(packChecksum);
264 out.on(false);
265 out.write(out.getMessageDigest().digest());