Create IndexPack task for jgit
[egit/zawir.git] / org.spearce.jgit / src / org / spearce / jgit / fetch / IndexPack.java
blobaec14d6bc939cb567f50a1a5445086812f56abf5
1 /*
2 * Copyright (C) 2007 Shawn Pearce <spearce@spearce.org>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License, version 2, as published by the Free Software Foundation.
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
17 package org.spearce.jgit.fetch;
19 import java.io.BufferedOutputStream;
20 import java.io.EOFException;
21 import java.io.File;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.RandomAccessFile;
26 import java.security.MessageDigest;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.HashMap;
30 import java.util.zip.DataFormatException;
31 import java.util.zip.Inflater;
33 import org.spearce.jgit.errors.CorruptObjectException;
34 import org.spearce.jgit.lib.BinaryDelta;
35 import org.spearce.jgit.lib.Constants;
36 import org.spearce.jgit.lib.ObjectId;
37 import org.spearce.jgit.lib.ObjectIdMap;
39 /** Indexes Git pack files for local use. */
40 public class IndexPack {
41 private static final byte[] SIGNATURE = { 'P', 'A', 'C', 'K' };
43 private static final int BUFFER_SIZE = 2048;
45 private static final byte[] COMMIT;
47 private static final byte[] TREE;
49 private static final byte[] TAG;
51 private static final byte[] BLOB;
53 static {
54 COMMIT = Constants.encodeASCII(Constants.TYPE_COMMIT);
55 TREE = Constants.encodeASCII(Constants.TYPE_TREE);
56 TAG = Constants.encodeASCII(Constants.TYPE_TAG);
57 BLOB = Constants.encodeASCII(Constants.TYPE_BLOB);
60 private final Inflater inflater;
62 private final MessageDigest objectDigest;
64 private InputStream in;
66 private byte[] buf;
68 private long bBase;
70 private int bOffset;
72 private int bAvail;
74 private final File dstPack;
76 private final File dstIdx;
78 private long objectCount;
80 private ObjectEntry[] entries;
82 private int deltaCount;
84 private int entryCount;
86 private ObjectIdMap<ArrayList<UnresolvedDelta>> baseById;
88 private HashMap<Long, ArrayList<UnresolvedDelta>> baseByPos;
90 private byte[] objectData;
92 private MessageDigest packDigest;
94 private RandomAccessFile packOut;
96 private byte[] packcsum;
98 /**
99 * Create a new pack indexer utility.
101 * @param src
102 * @param dstBase
103 * @throws IOException
104 * the output packfile could not be created.
106 public IndexPack(final InputStream src, final File dstBase)
107 throws IOException {
108 in = src;
109 inflater = new Inflater(false);
110 buf = new byte[BUFFER_SIZE];
111 objectData = new byte[BUFFER_SIZE];
112 objectDigest = Constants.newMessageDigest();
113 packDigest = Constants.newMessageDigest();
115 if (dstBase != null) {
116 final File dir = dstBase.getParentFile();
117 final String nam = dstBase.getName();
118 dstPack = new File(dir, nam + ".pack");
119 dstIdx = new File(dir, nam + ".idx");
120 packOut = new RandomAccessFile(dstPack, "rw");
121 packOut.setLength(0);
122 } else {
123 dstPack = null;
124 dstIdx = null;
129 * Consume data from the input stream until the packfile is indexed.
131 * @throws IOException
133 public void index() throws IOException {
134 try {
135 try {
136 readPackHeader();
138 entries = new ObjectEntry[(int) objectCount];
139 baseById = new ObjectIdMap<ArrayList<UnresolvedDelta>>();
140 baseByPos = new HashMap<Long, ArrayList<UnresolvedDelta>>();
142 for (int done = 0; done < objectCount; done++)
143 indexOneObject();
144 readPackFooter();
145 endInput();
147 if (deltaCount > 0) {
148 if (packOut == null)
149 throw new IOException("need packOut");
150 resolveDeltas();
151 if (entryCount < objectCount)
152 throw new IOException("thin packs aren't supported");
154 baseById = null;
155 baseByPos = null;
157 if (dstIdx != null)
158 writeIdx();
160 } finally {
161 if (packOut != null)
162 packOut.close();
163 inflater.end();
165 } catch (IOException err) {
166 if (dstPack != null)
167 dstPack.delete();
168 if (dstIdx != null)
169 dstIdx.delete();
170 throw err;
174 private void resolveDeltas() throws IOException {
175 final int last = entryCount;
176 for (int i = 0; i < last; i++)
177 resolveDeltas(entries[i]);
180 private void resolveDeltas(final ObjectEntry oe) throws IOException {
181 if (baseById.containsKey(oe) || baseByPos.containsKey(new Long(oe.pos)))
182 resolveDeltas(oe.pos, null, null, oe);
185 private void resolveDeltas(final long pos, byte[] type, byte[] data,
186 ObjectEntry oe) throws IOException {
187 position(pos);
188 int c = readFromFile();
189 final int typeCode = (c >> 4) & 7;
190 long sz = c & 15;
191 int shift = 4;
192 while ((c & 0x80) != 0) {
193 c = readFromFile();
194 sz += (c & 0x7f) << shift;
195 shift += 7;
198 switch (typeCode) {
199 case Constants.OBJ_COMMIT:
200 type = COMMIT;
201 data = inflateFromFile((int) sz);
202 break;
203 case Constants.OBJ_TREE:
204 type = TREE;
205 data = inflateFromFile((int) sz);
206 break;
207 case Constants.OBJ_BLOB:
208 type = BLOB;
209 data = inflateFromFile((int) sz);
210 break;
211 case Constants.OBJ_TAG:
212 type = TAG;
213 data = inflateFromFile((int) sz);
214 break;
215 case Constants.OBJ_OFS_DELTA: {
216 c = readFromInput() & 0xff;
217 while ((c & 128) != 0)
218 c = readFromInput() & 0xff;
219 data = BinaryDelta.apply(data, inflateFromFile((int) sz));
220 break;
222 case Constants.OBJ_REF_DELTA: {
223 fillFromInput(20);
224 use(20);
225 data = BinaryDelta.apply(data, inflateFromFile((int) sz));
226 break;
228 default:
229 throw new IOException("Unknown object type " + typeCode + ".");
232 if (oe == null) {
233 objectDigest.update(type);
234 objectDigest.update((byte) ' ');
235 objectDigest.update(Constants.encodeASCII(data.length));
236 objectDigest.update((byte) 0);
237 objectDigest.update(data);
238 oe = new ObjectEntry(pos, objectDigest.digest());
239 entries[entryCount++] = oe;
242 final ArrayList<UnresolvedDelta> a = baseById.remove(oe);
243 final ArrayList<UnresolvedDelta> b = baseByPos.remove(new Long(pos));
244 int ai = 0, bi = 0;
245 if (a != null && b != null) {
246 while (ai < a.size() && bi < b.size()) {
247 final UnresolvedDelta ad = a.get(ai);
248 final UnresolvedDelta bd = b.get(bi);
249 if (ad.position < bd.position) {
250 resolveDeltas(ad.position, type, data, null);
251 ai++;
252 } else {
253 resolveDeltas(bd.position, type, data, null);
254 bi++;
258 if (a != null)
259 while (ai < a.size())
260 resolveDeltas(a.get(ai++).position, type, data, null);
261 if (b != null)
262 while (bi < b.size())
263 resolveDeltas(b.get(bi++).position, type, data, null);
266 private void writeIdx() throws IOException {
267 Arrays.sort(entries);
268 final int[] fanout = new int[256];
269 for (int i = 0; i < entryCount; i++)
270 fanout[entries[i].getFirstByte() & 0xff]++;
271 for (int i = 1; i < 256; i++)
272 fanout[i] += fanout[i - 1];
274 final BufferedOutputStream os = new BufferedOutputStream(
275 new FileOutputStream(dstIdx), BUFFER_SIZE);
276 try {
277 final MessageDigest d = Constants.newMessageDigest();
278 for (int i = 0; i < 256; i++)
279 writeUInt32(d, os, fanout[i]);
280 for (int i = 0; i < entryCount; i++) {
281 final ObjectEntry oe = entries[i];
282 writeUInt32(d, os, oe.pos);
283 // System.out.println(oe + " " + oe.pos);
284 os.write(oe.getBytes());
285 d.update(oe.getBytes());
287 os.write(packcsum);
288 d.update(packcsum);
289 os.write(d.digest());
290 } finally {
291 os.close();
295 private void readPackHeader() throws IOException {
296 final int hdrln = SIGNATURE.length + 4 + 4;
297 final int p = fillFromInput(hdrln);
298 for (int k = 0; k < SIGNATURE.length; k++)
299 if (buf[p + k] != SIGNATURE[k])
300 throw new IOException("Not a PACK file.");
302 final long vers = readUInt32(buf, p + 4);
303 if (vers != 2 && vers != 3)
304 throw new IOException("Unsupported pack version " + vers + ".");
305 objectCount = readUInt32(buf, p + 8);
306 use(hdrln);
309 private void readPackFooter() throws IOException {
310 sync();
311 final byte[] cmpcsum = packDigest.digest();
312 final int c = fillFromInput(20);
313 packcsum = new byte[20];
314 System.arraycopy(buf, c, packcsum, 0, 20);
315 use(20);
316 if (packOut != null)
317 packOut.write(packcsum);
319 if (!Arrays.equals(cmpcsum, packcsum))
320 throw new CorruptObjectException("Packfile checksum incorrect.");
323 // Cleanup all resources associated with our input parsing.
324 private void endInput() {
325 in = null;
326 packDigest = null;
327 objectData = null;
330 // Read one entire object or delta from the input.
331 private void indexOneObject() throws IOException {
332 final long pos = position();
334 int c = readFromInput();
335 final int typeCode = (c >> 4) & 7;
336 long sz = c & 15;
337 int shift = 4;
338 while ((c & 0x80) != 0) {
339 c = readFromInput();
340 sz += (c & 0x7f) << shift;
341 shift += 7;
344 switch (typeCode) {
345 case Constants.OBJ_COMMIT:
346 whole(COMMIT, pos, sz);
347 break;
348 case Constants.OBJ_TREE:
349 whole(TREE, pos, sz);
350 break;
351 case Constants.OBJ_BLOB:
352 whole(BLOB, pos, sz);
353 break;
354 case Constants.OBJ_TAG:
355 whole(TAG, pos, sz);
356 break;
357 case Constants.OBJ_OFS_DELTA: {
358 c = readFromInput() & 0xff;
359 long ofs = c & 127;
360 while ((c & 128) != 0) {
361 ofs += 1;
362 c = readFromInput() & 0xff;
363 ofs <<= 7;
364 ofs += (c & 127);
366 final Long base = new Long(pos - ofs);
367 ArrayList<UnresolvedDelta> r = baseByPos.get(base);
368 if (r == null) {
369 r = new ArrayList<UnresolvedDelta>(8);
370 baseByPos.put(base, r);
372 r.add(new UnresolvedDelta(pos));
373 deltaCount++;
374 inflateFromInput(false);
375 break;
377 case Constants.OBJ_REF_DELTA: {
378 c = fillFromInput(20);
379 final byte[] ref = new byte[20];
380 System.arraycopy(buf, c, ref, 0, 20);
381 use(20);
382 final ObjectId base = new ObjectId(ref);
383 ArrayList<UnresolvedDelta> r = baseById.get(base);
384 if (r == null) {
385 r = new ArrayList<UnresolvedDelta>(8);
386 baseById.put(base, r);
388 r.add(new UnresolvedDelta(pos));
389 deltaCount++;
390 inflateFromInput(false);
391 break;
393 default:
394 throw new IOException("Unknown object type " + typeCode + ".");
398 private void whole(final byte[] type, final long pos, final long sz)
399 throws IOException {
400 objectDigest.update(type);
401 objectDigest.update((byte) ' ');
402 objectDigest.update(Constants.encodeASCII(sz));
403 objectDigest.update((byte) 0);
404 inflateFromInput(true);
405 entries[entryCount++] = new ObjectEntry(pos, objectDigest.digest());
408 // Current position of {@link #bOffset} within the entire file.
409 private long position() {
410 return bBase + bOffset;
413 private void position(final long pos) throws IOException {
414 packOut.seek(pos);
415 bBase = pos;
416 bOffset = 0;
417 bAvail = 0;
420 // Consume exactly one byte from the buffer and return it.
421 private int readFromInput() throws IOException {
422 if (bAvail == 0)
423 fillFromInput(1);
424 bAvail--;
425 return buf[bOffset++] & 0xff;
428 // Consume exactly one byte from the buffer and return it.
429 private int readFromFile() throws IOException {
430 if (bAvail == 0)
431 fillFromFile();
432 bAvail--;
433 return buf[bOffset++] & 0xff;
436 // Consume cnt bytes from the buffer.
437 private void use(final int cnt) {
438 bOffset += cnt;
439 bAvail -= cnt;
442 // Ensure at least need bytes are available in in {@link #buf}.
443 private int fillFromInput(final int need) throws IOException {
444 while (bAvail < need) {
445 int next = bOffset + bAvail;
446 int free = buf.length - next;
447 if (free + bAvail < need) {
448 sync();
449 next = bAvail;
450 free = buf.length - next;
452 next = in.read(buf, next, free);
453 if (next <= 0)
454 throw new EOFException("Packfile is truncated.");
455 bAvail += next;
457 return bOffset;
460 // Ensure at least need bytes are available in in {@link #buf}.
461 private int fillFromFile() throws IOException {
462 if (bAvail == 0) {
463 final int next = packOut.read(buf, 0, buf.length);
464 if (next <= 0)
465 throw new EOFException("Packfile is truncated.");
466 bAvail = next;
467 bOffset = 0;
469 return bOffset;
472 // Store consumed bytes in {@link #buf} up to {@link #bOffset}.
473 private void sync() throws IOException {
474 packDigest.update(buf, 0, bOffset);
475 if (packOut != null)
476 packOut.write(buf, 0, bOffset);
477 if (bAvail > 0)
478 System.arraycopy(buf, bOffset, buf, 0, bAvail);
479 bBase += bOffset;
480 bOffset = 0;
483 private void inflateFromInput(final boolean digest) throws IOException {
484 final Inflater inf = inflater;
485 try {
486 final byte[] dst = objectData;
487 int n = 0;
488 while (!inf.finished()) {
489 if (inf.needsInput()) {
490 final int p = fillFromInput(1);
491 inf.setInput(buf, p, bAvail);
492 use(bAvail);
495 int free = dst.length - n;
496 if (free < 8) {
497 if (digest)
498 objectDigest.update(dst, 0, n);
499 n = 0;
500 free = dst.length;
503 n += inf.inflate(dst, n, free);
505 if (digest)
506 objectDigest.update(dst, 0, n);
507 use(-inf.getRemaining());
508 } catch (DataFormatException dfe) {
509 throw corrupt(dfe);
510 } finally {
511 inf.reset();
515 private byte[] inflateFromFile(final int sz) throws IOException {
516 final Inflater inf = inflater;
517 try {
518 final byte[] dst = new byte[sz];
519 int n = 0;
520 while (!inf.finished()) {
521 if (inf.needsInput()) {
522 final int p = fillFromFile();
523 inf.setInput(buf, p, bAvail);
524 use(bAvail);
526 n += inf.inflate(dst, n, sz - n);
528 use(-inf.getRemaining());
529 return dst;
530 } catch (DataFormatException dfe) {
531 throw corrupt(dfe);
532 } finally {
533 inf.reset();
537 private static CorruptObjectException corrupt(final DataFormatException dfe) {
538 return new CorruptObjectException("Packfile corruption detected: "
539 + dfe.getMessage());
542 private static long readUInt32(final byte[] b, final int i) {
543 return (b[i + 0] & 0xff) << 24 | (b[i + 1] & 0xff) << 16
544 | (b[i + 2] & 0xff) << 8 | (b[i + 3] & 0xff);
547 private static void writeUInt32(final MessageDigest m,
548 final BufferedOutputStream o, final long i) throws IOException {
549 final int a = ((int) (i >> 24)) & 0xff;
550 final int b = ((int) (i >> 16)) & 0xff;
551 final int c = ((int) (i >> 8)) & 0xff;
552 final int d = ((int) i) & 0xff;
554 o.write(a);
555 o.write(b);
556 o.write(c);
557 o.write(d);
559 m.update((byte) a);
560 m.update((byte) b);
561 m.update((byte) c);
562 m.update((byte) d);
565 private static class ObjectEntry extends ObjectId {
566 final long pos;
568 ObjectEntry(final long headerOffset, final byte[] raw) {
569 super(raw);
570 pos = headerOffset;
574 private static class UnresolvedDelta {
575 final long position;
577 UnresolvedDelta(final long headerOffset) {
578 position = headerOffset;