Fixed memory leaks in test suite
[libgit2.git] / src / odb.c
blob627513e78ae0c8dbcdb62e371ea674495c841aca
1 /*
2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
26 #include "common.h"
27 #include "git/odb.h"
28 #include "git/zlib.h"
29 #include "fileops.h"
30 #include "hash.h"
31 #include "odb.h"
33 #define GIT_PACK_NAME_MAX (5 + 40 + 1)
35 typedef struct {
36 uint32_t n;
37 unsigned char *oid;
38 off_t offset;
39 off_t size;
40 } index_entry;
42 struct git_pack {
43 git_odb *db;
44 git_lck lock;
46 /** Functions to access idx_map. */
47 int (*idx_search)(
48 uint32_t *,
49 struct git_pack *,
50 const git_oid *);
51 int (*idx_search_offset)(
52 uint32_t *,
53 struct git_pack *,
54 off_t);
55 int (*idx_get)(
56 index_entry *,
57 struct git_pack *,
58 uint32_t n);
60 /** The .idx file, mapped into memory. */
61 git_file idx_fd;
62 git_map idx_map;
63 uint32_t *im_fanout;
64 unsigned char *im_oid;
65 uint32_t *im_crc;
66 uint32_t *im_offset32;
67 uint32_t *im_offset64;
68 uint32_t *im_off_idx;
69 uint32_t *im_off_next;
71 /** Number of objects in this pack. */
72 uint32_t obj_cnt;
74 /** File descriptor for the .pack file. */
75 git_file pack_fd;
77 /** The size of the .pack file. */
78 off_t pack_size;
80 /** The mtime of the .pack file. */
81 time_t pack_mtime;
83 /** Number of git_packlist we appear in. */
84 unsigned int refcnt;
86 /** Number of active users of the idx_map data. */
87 unsigned int idxcnt;
88 unsigned
89 invalid:1 /* the pack is unable to be read by libgit2 */
92 /** Name of the pack file(s), without extension ("pack-abc"). */
93 char pack_name[GIT_PACK_NAME_MAX];
95 typedef struct git_pack git_pack;
97 typedef struct {
98 size_t n_packs;
99 unsigned int refcnt;
100 git_pack *packs[GIT_FLEX_ARRAY];
101 } git_packlist;
103 struct git_odb {
104 git_lck lock;
106 /** Path to the "objects" directory. */
107 char *objects_dir;
109 /** Known pack files from ${objects_dir}/packs. */
110 git_packlist *packlist;
112 /** Alternate databases to search. */
113 git_odb **alternates;
114 size_t n_alternates;
116 /** loose object zlib compression level. */
117 int object_zlib_level;
118 /** loose object file fsync flag. */
119 int fsync_object_files;
122 typedef struct { /* object header data */
123 git_otype type; /* object type */
124 size_t size; /* object size */
125 } obj_hdr;
127 typedef struct { /* '.pack' file header */
128 uint32_t sig; /* PACK_SIG */
129 uint32_t ver; /* pack version */
130 uint32_t cnt; /* object count */
131 } pack_hdr;
133 static struct {
134 const char *str; /* type name string */
135 int loose; /* valid loose object type flag */
136 } obj_type_table[] = {
137 { "", 0 }, /* 0 = GIT_OBJ__EXT1 */
138 { "commit", 1 }, /* 1 = GIT_OBJ_COMMIT */
139 { "tree", 1 }, /* 2 = GIT_OBJ_TREE */
140 { "blob", 1 }, /* 3 = GIT_OBJ_BLOB */
141 { "tag", 1 }, /* 4 = GIT_OBJ_TAG */
142 { "", 0 }, /* 5 = GIT_OBJ__EXT2 */
143 { "OFS_DELTA", 0 }, /* 6 = GIT_OBJ_OFS_DELTA */
144 { "REF_DELTA", 0 } /* 7 = GIT_OBJ_REF_DELTA */
147 GIT_INLINE(uint32_t) decode32(void *b)
149 return ntohl(*((uint32_t *)b));
152 GIT_INLINE(uint64_t) decode64(void *b)
154 uint32_t *p = b;
155 return (((uint64_t)ntohl(p[0])) << 32) | ntohl(p[1]);
158 const char *git_obj_type_to_string(git_otype type)
160 if (type < 0 || ((size_t) type) >= ARRAY_SIZE(obj_type_table))
161 return "";
162 return obj_type_table[type].str;
165 git_otype git_obj_string_to_type(const char *str)
167 size_t i;
169 if (!str || !*str)
170 return GIT_OBJ_BAD;
172 for (i = 0; i < ARRAY_SIZE(obj_type_table); i++)
173 if (!strcmp(str, obj_type_table[i].str))
174 return (git_otype) i;
176 return GIT_OBJ_BAD;
179 int git_obj__loose_object_type(git_otype type)
181 if (type < 0 || ((size_t) type) >= ARRAY_SIZE(obj_type_table))
182 return 0;
183 return obj_type_table[type].loose;
186 static int format_object_header(char *hdr, size_t n, git_obj *obj)
188 const char *type_str = git_obj_type_to_string(obj->type);
189 int len = snprintf(hdr, n, "%s %"PRIuZ, type_str, obj->len);
191 assert(len > 0); /* otherwise snprintf() is broken */
192 assert(((size_t) len) < n); /* otherwise the caller is broken! */
194 if (len < 0 || ((size_t) len) >= n)
195 return GIT_ERROR;
196 return len+1;
199 static int hash_obj(git_oid *id, char *hdr, size_t n, int *len, git_obj *obj)
201 git_buf_vec vec[2];
202 int hdrlen;
204 assert(id && hdr && len && obj);
206 if (!git_obj__loose_object_type(obj->type))
207 return GIT_ERROR;
209 if (!obj->data && obj->len != 0)
210 return GIT_ERROR;
212 if ((hdrlen = format_object_header(hdr, n, obj)) < 0)
213 return GIT_ERROR;
215 *len = hdrlen;
217 vec[0].data = hdr;
218 vec[0].len = hdrlen;
219 vec[1].data = obj->data;
220 vec[1].len = obj->len;
222 git_hash_vec(id, vec, 2);
224 return GIT_SUCCESS;
227 int git_obj_hash(git_oid *id, git_obj *obj)
229 char hdr[64];
230 int hdrlen;
232 assert(id && obj);
234 return hash_obj(id, hdr, sizeof(hdr), &hdrlen, obj);
237 static size_t object_file_name(char *name, size_t n, char *dir, const git_oid *id)
239 size_t len = strlen(dir);
241 /* check length: 43 = 40 hex sha1 chars + 2 * '/' + '\0' */
242 if (len+43 > n)
243 return len+43;
245 /* the object dir: eg $GIT_DIR/objects */
246 strcpy(name, dir);
247 if (name[len-1] != '/')
248 name[len++] = '/';
250 /* loose object filename: aa/aaa... (41 bytes) */
251 git_oid_pathfmt(&name[len], id);
252 name[len+41] = '\0';
254 return 0;
257 static int is_zlib_compressed_data(unsigned char *data)
259 unsigned int w;
261 w = ((unsigned int)(data[0]) << 8) + data[1];
262 return data[0] == 0x78 && !(w % 31);
265 static size_t get_binary_object_header(obj_hdr *hdr, gitfo_buf *obj)
267 unsigned char c;
268 unsigned char *data = obj->data;
269 size_t shift, size, used = 0;
271 if (obj->len == 0)
272 return 0;
274 c = data[used++];
275 hdr->type = (c >> 4) & 7;
277 size = c & 15;
278 shift = 4;
279 while (c & 0x80) {
280 if (obj->len <= used)
281 return 0;
282 if (sizeof(size_t) * 8 <= shift)
283 return 0;
284 c = data[used++];
285 size += (c & 0x7f) << shift;
286 shift += 7;
288 hdr->size = size;
290 return used;
293 static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
295 char c, typename[10];
296 size_t size, used = 0;
299 * type name string followed by space.
301 while ((c = data[used]) != ' ') {
302 typename[used++] = c;
303 if (used >= sizeof(typename))
304 return 0;
306 typename[used] = 0;
307 if (used == 0)
308 return 0;
309 hdr->type = git_obj_string_to_type(typename);
310 used++; /* consume the space */
313 * length follows immediately in decimal (without
314 * leading zeros).
316 size = data[used++] - '0';
317 if (size > 9)
318 return 0;
319 if (size) {
320 while ((c = data[used]) != '\0') {
321 size_t d = c - '0';
322 if (d > 9)
323 break;
324 used++;
325 size = size * 10 + d;
328 hdr->size = size;
331 * the length must be followed by a zero byte
333 if (data[used++] != '\0')
334 return 0;
336 return used;
339 static void init_stream(z_stream *s, void *out, size_t len)
341 memset(s, 0, sizeof(*s));
342 s->next_out = out;
343 s->avail_out = len;
346 static void set_stream_input(z_stream *s, void *in, size_t len)
348 s->next_in = in;
349 s->avail_in = len;
352 static void set_stream_output(z_stream *s, void *out, size_t len)
354 s->next_out = out;
355 s->avail_out = len;
358 static int start_inflate(z_stream *s, gitfo_buf *obj, void *out, size_t len)
360 int status;
362 init_stream(s, out, len);
363 set_stream_input(s, obj->data, obj->len);
365 if ((status = inflateInit(s)) < Z_OK)
366 return status;
368 return inflate(s, 0);
371 static int finish_inflate(z_stream *s)
373 int status = Z_OK;
375 while (status == Z_OK)
376 status = inflate(s, Z_FINISH);
378 inflateEnd(s);
380 if ((status != Z_STREAM_END) || (s->avail_in != 0))
381 return GIT_ERROR;
383 return GIT_SUCCESS;
386 static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
388 unsigned char *buf, *head = hb;
389 size_t tail;
392 * allocate a buffer to hold the inflated data and copy the
393 * initial sequence of inflated data from the tail of the
394 * head buffer, if any.
396 if ((buf = git__malloc(hdr->size + 1)) == NULL) {
397 inflateEnd(s);
398 return NULL;
400 tail = s->total_out - used;
401 if (used > 0 && tail > 0) {
402 if (tail > hdr->size)
403 tail = hdr->size;
404 memcpy(buf, head + used, tail);
406 used = tail;
409 * inflate the remainder of the object data, if any
411 if (hdr->size < used)
412 inflateEnd(s);
413 else {
414 set_stream_output(s, buf + used, hdr->size - used);
415 if (finish_inflate(s)) {
416 free(buf);
417 return NULL;
421 return buf;
424 static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
426 z_stream zs;
427 int status = Z_OK;
429 init_stream(&zs, out, outlen);
430 set_stream_input(&zs, in, inlen);
432 if (inflateInit(&zs) < Z_OK)
433 return GIT_ERROR;
435 while (status == Z_OK)
436 status = inflate(&zs, Z_FINISH);
438 inflateEnd(&zs);
440 if ((status != Z_STREAM_END) || (zs.avail_in != 0))
441 return GIT_ERROR;
443 if (zs.total_out != outlen)
444 return GIT_ERROR;
446 return GIT_SUCCESS;
450 * At one point, there was a loose object format that was intended to
451 * mimic the format used in pack-files. This was to allow easy copying
452 * of loose object data into packs. This format is no longer used, but
453 * we must still read it.
455 static int inflate_packlike_loose_disk_obj(git_obj *out, gitfo_buf *obj)
457 unsigned char *in, *buf;
458 obj_hdr hdr;
459 size_t len, used;
462 * read the object header, which is an (uncompressed)
463 * binary encoding of the object type and size.
465 if ((used = get_binary_object_header(&hdr, obj)) == 0)
466 return GIT_ERROR;
468 if (!git_obj__loose_object_type(hdr.type))
469 return GIT_ERROR;
472 * allocate a buffer and inflate the data into it
474 buf = git__malloc(hdr.size + 1);
475 if (!buf)
476 return GIT_ERROR;
478 in = ((unsigned char *)obj->data) + used;
479 len = obj->len - used;
480 if (inflate_buffer(in, len, buf, hdr.size)) {
481 free(buf);
482 return GIT_ERROR;
484 buf[hdr.size] = '\0';
486 out->data = buf;
487 out->len = hdr.size;
488 out->type = hdr.type;
490 return GIT_SUCCESS;
493 static int inflate_disk_obj(git_obj *out, gitfo_buf *obj)
495 unsigned char head[64], *buf;
496 z_stream zs;
497 int z_status;
498 obj_hdr hdr;
499 size_t used;
502 * check for a pack-like loose object
504 if (!is_zlib_compressed_data(obj->data))
505 return inflate_packlike_loose_disk_obj(out, obj);
508 * inflate the initial part of the io buffer in order
509 * to parse the object header (type and size).
511 if ((z_status = start_inflate(&zs, obj, head, sizeof(head))) < Z_OK)
512 return GIT_ERROR;
514 if ((used = get_object_header(&hdr, head)) == 0)
515 return GIT_ERROR;
517 if (!git_obj__loose_object_type(hdr.type))
518 return GIT_ERROR;
521 * allocate a buffer and inflate the object data into it
522 * (including the initial sequence in the head buffer).
524 if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
525 return GIT_ERROR;
526 buf[hdr.size] = '\0';
528 out->data = buf;
529 out->len = hdr.size;
530 out->type = hdr.type;
532 return GIT_SUCCESS;
535 static int make_temp_file(git_file *fd, char *tmp, size_t n, char *file)
537 char *template = "/tmp_obj_XXXXXX";
538 size_t tmplen = strlen(template);
539 int dirlen;
541 if ((dirlen = git__dirname(tmp, n, file)) < 0)
542 return GIT_ERROR;
544 if ((dirlen + tmplen) >= n)
545 return GIT_ERROR;
547 strcpy(tmp + dirlen, (dirlen) ? template : template + 1);
549 *fd = gitfo_mkstemp(tmp);
550 if (*fd < 0 && dirlen) {
551 /* create directory if it doesn't exist */
552 tmp[dirlen] = '\0';
553 if ((gitfo_exists(tmp) < 0) && gitfo_mkdir(tmp, 0755))
554 return GIT_ERROR;
555 /* try again */
556 strcpy(tmp + dirlen, template);
557 *fd = gitfo_mkstemp(tmp);
559 if (*fd < 0)
560 return GIT_ERROR;
562 return GIT_SUCCESS;
565 static int deflate_buf(z_stream *s, void *in, size_t len, int flush)
567 int status = Z_OK;
569 set_stream_input(s, in, len);
570 while (status == Z_OK) {
571 status = deflate(s, flush);
572 if (s->avail_in == 0)
573 break;
575 return status;
578 static int deflate_obj(gitfo_buf *buf, char *hdr, int hdrlen, git_obj *obj, int level)
580 z_stream zs;
581 int status;
582 size_t size;
584 assert(buf && !buf->data && hdr && obj);
585 assert(level == Z_DEFAULT_COMPRESSION || (level >= 0 && level <= 9));
587 buf->data = NULL;
588 buf->len = 0;
589 init_stream(&zs, NULL, 0);
591 if (deflateInit(&zs, level) < Z_OK)
592 return GIT_ERROR;
594 size = deflateBound(&zs, hdrlen + obj->len);
596 if ((buf->data = git__malloc(size)) == NULL) {
597 deflateEnd(&zs);
598 return GIT_ERROR;
601 set_stream_output(&zs, buf->data, size);
603 /* compress the header */
604 status = deflate_buf(&zs, hdr, hdrlen, Z_NO_FLUSH);
606 /* if header compressed OK, compress the object */
607 if (status == Z_OK)
608 status = deflate_buf(&zs, obj->data, obj->len, Z_FINISH);
610 if (status != Z_STREAM_END) {
611 deflateEnd(&zs);
612 free(buf->data);
613 buf->data = NULL;
614 return GIT_ERROR;
617 buf->len = zs.total_out;
618 deflateEnd(&zs);
620 return GIT_SUCCESS;
623 static int write_obj(gitfo_buf *buf, git_oid *id, git_odb *db)
625 char file[GIT_PATH_MAX];
626 char temp[GIT_PATH_MAX];
627 git_file fd;
629 if (object_file_name(file, sizeof(file), db->objects_dir, id))
630 return GIT_ERROR;
632 if (make_temp_file(&fd, temp, sizeof(temp), file) < 0)
633 return GIT_ERROR;
635 if (gitfo_write(fd, buf->data, buf->len) < 0) {
636 gitfo_close(fd);
637 gitfo_unlink(temp);
638 return GIT_ERROR;
641 if (db->fsync_object_files)
642 gitfo_fsync(fd);
643 gitfo_close(fd);
644 gitfo_chmod(temp, 0444);
646 if (gitfo_move_file(temp, file) < 0) {
647 gitfo_unlink(temp);
648 return GIT_ERROR;
651 return GIT_SUCCESS;
654 static int open_alternates(git_odb *db)
656 unsigned n = 0;
658 gitlck_lock(&db->lock);
659 if (db->alternates) {
660 gitlck_unlock(&db->lock);
661 return 1;
664 db->alternates = git__malloc(sizeof(*db->alternates) * (n + 1));
665 if (!db->alternates) {
666 gitlck_unlock(&db->lock);
667 return -1;
670 db->alternates[n] = NULL;
671 db->n_alternates = n;
672 gitlck_unlock(&db->lock);
673 return 0;
676 static int pack_openidx_map(git_pack *p)
678 char pb[GIT_PATH_MAX];
679 off_t len;
681 if (git__fmt(pb, sizeof(pb), "%s/pack/%s.idx",
682 p->db->objects_dir,
683 p->pack_name) < 0)
684 return GIT_ERROR;
686 if ((p->idx_fd = gitfo_open(pb, O_RDONLY)) < 0)
687 return GIT_ERROR;
689 if ((len = gitfo_size(p->idx_fd)) < 0
690 || !git__is_sizet(len)
691 || gitfo_map_ro(&p->idx_map, p->idx_fd, 0, (size_t)len)) {
692 gitfo_close(p->idx_fd);
693 return GIT_ERROR;
696 return GIT_SUCCESS;
699 typedef struct {
700 off_t offset;
701 uint32_t n;
702 } offset_idx_info;
704 static int cmp_offset_idx_info(const void *lhs, const void *rhs)
706 const offset_idx_info *a = lhs;
707 const offset_idx_info *b = rhs;
708 return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
711 static int make_offset_index(git_pack *p, offset_idx_info *data)
713 off_t min_off = 3 * 4, max_off = p->pack_size - GIT_OID_RAWSZ;
714 uint32_t *idx, *next;
715 uint32_t j;
717 qsort(data, p->obj_cnt, sizeof(*data), cmp_offset_idx_info);
719 if (data[0].offset < min_off || data[p->obj_cnt].offset > max_off)
720 return GIT_ERROR;
722 if ((idx = git__malloc(sizeof(*idx) * (p->obj_cnt+1))) == NULL)
723 return GIT_ERROR;
724 if ((next = git__malloc(sizeof(*next) * p->obj_cnt)) == NULL) {
725 free(idx);
726 return GIT_ERROR;
729 for (j = 0; j < p->obj_cnt+1; j++)
730 idx[j] = data[j].n;
732 for (j = 0; j < p->obj_cnt; j++) {
733 assert(idx[j] < p->obj_cnt);
734 assert(idx[j+1] < p->obj_cnt+1);
736 next[idx[j]] = idx[j+1];
739 p->im_off_idx = idx;
740 p->im_off_next = next;
741 return GIT_SUCCESS;
744 static int idxv1_search(uint32_t *out, git_pack *p, const git_oid *id)
746 unsigned char *data = p->im_oid;
747 uint32_t lo = id->id[0] ? p->im_fanout[id->id[0] - 1] : 0;
748 uint32_t hi = p->im_fanout[id->id[0]];
750 do {
751 uint32_t mid = (lo + hi) >> 1;
752 uint32_t pos = 24 * mid;
753 int cmp = memcmp(id->id, data + pos + 4, 20);
754 if (cmp < 0)
755 hi = mid;
756 else if (!cmp) {
757 *out = mid;
758 return GIT_SUCCESS;
759 } else
760 lo = mid + 1;
761 } while (lo < hi);
762 return GIT_ENOTFOUND;
765 static int idxv1_search_offset(uint32_t *out, git_pack *p, off_t offset)
767 if (offset > 0 && offset < (p->pack_size - GIT_OID_RAWSZ)) {
768 uint32_t lo = 0, hi = p->obj_cnt+1;
769 unsigned char *data = p->im_oid;
770 uint32_t *idx = p->im_off_idx;
771 do {
772 uint32_t mid = (lo + hi) >> 1;
773 uint32_t n = idx[mid];
774 uint32_t pos = n * (GIT_OID_RAWSZ + 4);
775 off_t here = decode32(data + pos);
776 if (offset < here)
777 hi = mid;
778 else if (offset == here) {
779 *out = n;
780 return GIT_SUCCESS;
781 } else
782 lo = mid + 1;
783 } while (lo < hi);
785 return GIT_ENOTFOUND;
788 static int idxv1_get(index_entry *e, git_pack *p, uint32_t n)
790 unsigned char *data = p->im_oid;
791 uint32_t *next = p->im_off_next;
793 if (n < p->obj_cnt) {
794 uint32_t pos = n * (GIT_OID_RAWSZ + 4);
795 off_t next_off = p->pack_size - GIT_OID_RAWSZ;
796 e->n = n;
797 e->oid = data + pos + 4;
798 e->offset = decode32(data + pos);
799 if (next[n] < p->obj_cnt) {
800 pos = next[n] * (GIT_OID_RAWSZ + 4);
801 next_off = decode32(data + pos);
803 e->size = next_off - e->offset;
804 return GIT_SUCCESS;
806 return GIT_ENOTFOUND;
809 static int pack_openidx_v1(git_pack *p)
811 uint32_t *src_fanout = p->idx_map.data;
812 uint32_t *im_fanout;
813 offset_idx_info *info;
814 size_t expsz;
815 uint32_t j;
818 if ((im_fanout = git__malloc(sizeof(*im_fanout) * 256)) == NULL)
819 return GIT_ERROR;
821 im_fanout[0] = decode32(&src_fanout[0]);
822 for (j = 1; j < 256; j++) {
823 im_fanout[j] = decode32(&src_fanout[j]);
824 if (im_fanout[j] < im_fanout[j - 1]) {
825 free(im_fanout);
826 return GIT_ERROR;
829 p->obj_cnt = im_fanout[255];
831 expsz = 4 * 256 + 24 * p->obj_cnt + 2 * 20;
832 if (expsz != p->idx_map.len) {
833 free(im_fanout);
834 return GIT_ERROR;
837 p->idx_search = idxv1_search;
838 p->idx_search_offset = idxv1_search_offset;
839 p->idx_get = idxv1_get;
840 p->im_fanout = im_fanout;
841 p->im_oid = (unsigned char *)(src_fanout + 256);
843 if ((info = git__malloc(sizeof(*info) * (p->obj_cnt+1))) == NULL) {
844 free(im_fanout);
845 return GIT_ERROR;
848 for (j = 0; j < p->obj_cnt; j++) {
849 uint32_t pos = j * (GIT_OID_RAWSZ + 4);
850 info[j].offset = decode32(p->im_oid + pos);
851 info[j].n = j;
853 info[p->obj_cnt].offset = p->pack_size - GIT_OID_RAWSZ;
854 info[p->obj_cnt].n = p->obj_cnt;
856 if (make_offset_index(p, info)) {
857 free(im_fanout);
858 free(info);
859 return GIT_ERROR;
861 free(info);
863 return GIT_SUCCESS;
866 static int idxv2_search(uint32_t *out, git_pack *p, const git_oid *id)
868 unsigned char *data = p->im_oid;
869 uint32_t lo = id->id[0] ? p->im_fanout[id->id[0] - 1] : 0;
870 uint32_t hi = p->im_fanout[id->id[0]];
872 do {
873 uint32_t mid = (lo + hi) >> 1;
874 uint32_t pos = 20 * mid;
875 int cmp = memcmp(id->id, data + pos, 20);
876 if (cmp < 0)
877 hi = mid;
878 else if (!cmp) {
879 *out = mid;
880 return GIT_SUCCESS;
881 } else
882 lo = mid + 1;
883 } while (lo < hi);
884 return GIT_ENOTFOUND;
887 static int idxv2_search_offset(uint32_t *out, git_pack *p, off_t offset)
889 if (offset > 0 && offset < (p->pack_size - GIT_OID_RAWSZ)) {
890 uint32_t lo = 0, hi = p->obj_cnt+1;
891 uint32_t *idx = p->im_off_idx;
892 do {
893 uint32_t mid = (lo + hi) >> 1;
894 uint32_t n = idx[mid];
895 uint32_t o32 = decode32(p->im_offset32 + n);
896 off_t here = o32;
898 if (o32 & 0x80000000) {
899 uint32_t o64_idx = (o32 & ~0x80000000);
900 here = decode64(p->im_offset64 + 2*o64_idx);
903 if (offset < here)
904 hi = mid;
905 else if (offset == here) {
906 *out = n;
907 return GIT_SUCCESS;
908 } else
909 lo = mid + 1;
910 } while (lo < hi);
912 return GIT_ENOTFOUND;
915 static int idxv2_get(index_entry *e, git_pack *p, uint32_t n)
917 unsigned char *data = p->im_oid;
918 uint32_t *next = p->im_off_next;
920 if (n < p->obj_cnt) {
921 uint32_t o32 = decode32(p->im_offset32 + n);
922 off_t next_off = p->pack_size - GIT_OID_RAWSZ;
923 e->n = n;
924 e->oid = data + n * GIT_OID_RAWSZ;
925 e->offset = o32;
926 if (o32 & 0x80000000) {
927 uint32_t o64_idx = (o32 & ~0x80000000);
928 e->offset = decode64(p->im_offset64 + 2*o64_idx);
930 if (next[n] < p->obj_cnt) {
931 o32 = decode32(p->im_offset32 + next[n]);
932 next_off = o32;
933 if (o32 & 0x80000000) {
934 uint32_t o64_idx = (o32 & ~0x80000000);
935 next_off = decode64(p->im_offset64 + 2*o64_idx);
938 e->size = next_off - e->offset;
939 return GIT_SUCCESS;
941 return GIT_ENOTFOUND;
944 static int pack_openidx_v2(git_pack *p)
946 unsigned char *data = p->idx_map.data;
947 uint32_t *src_fanout = (uint32_t *)(data + 8);
948 uint32_t *im_fanout;
949 offset_idx_info *info;
950 size_t sz, o64_sz, o64_len;
951 uint32_t j;
953 if ((im_fanout = git__malloc(sizeof(*im_fanout) * 256)) == NULL)
954 return GIT_ERROR;
956 im_fanout[0] = decode32(&src_fanout[0]);
957 for (j = 1; j < 256; j++) {
958 im_fanout[j] = decode32(&src_fanout[j]);
959 if (im_fanout[j] < im_fanout[j - 1]) {
960 free(im_fanout);
961 return GIT_ERROR;
964 p->obj_cnt = im_fanout[255];
966 /* minimum size of .idx file (with empty 64-bit offsets table): */
967 sz = 4 + 4 + 256 * 4 + p->obj_cnt * (20 + 4 + 4) + 2 * 20;
968 if (p->idx_map.len < sz) {
969 free(im_fanout);
970 return GIT_ERROR;
973 p->idx_search = idxv2_search;
974 p->idx_search_offset = idxv2_search_offset;
975 p->idx_get = idxv2_get;
976 p->im_fanout = im_fanout;
977 p->im_oid = (unsigned char *)(src_fanout + 256);
978 p->im_crc = (uint32_t *)(p->im_oid + 20 * p->obj_cnt);
979 p->im_offset32 = p->im_crc + p->obj_cnt;
980 p->im_offset64 = p->im_offset32 + p->obj_cnt;
982 if ((info = git__malloc(sizeof(*info) * (p->obj_cnt+1))) == NULL) {
983 free(im_fanout);
984 return GIT_ERROR;
987 /* check 64-bit offset table index values are within bounds */
988 o64_sz = p->idx_map.len - sz;
989 o64_len = o64_sz / 8;
990 for (j = 0; j < p->obj_cnt; j++) {
991 uint32_t o32 = decode32(p->im_offset32 + j);
992 off_t offset = o32;
993 if (o32 & 0x80000000) {
994 uint32_t o64_idx = (o32 & ~0x80000000);
995 if (o64_idx >= o64_len) {
996 free(im_fanout);
997 free(info);
998 return GIT_ERROR;
1000 offset = decode64(p->im_offset64 + 2*o64_idx);
1002 info[j].offset = offset;
1003 info[j].n = j;
1005 info[p->obj_cnt].offset = p->pack_size - GIT_OID_RAWSZ;
1006 info[p->obj_cnt].n = p->obj_cnt;
1008 if (make_offset_index(p, info)) {
1009 free(im_fanout);
1010 free(info);
1011 return GIT_ERROR;
1013 free(info);
1015 return GIT_SUCCESS;
1018 static int pack_stat(git_pack *p)
1020 char pb[GIT_PATH_MAX];
1021 struct stat sb;
1023 if (git__fmt(pb, sizeof(pb), "%s/pack/%s.pack",
1024 p->db->objects_dir,
1025 p->pack_name) < 0)
1026 return GIT_ERROR;
1028 if (gitfo_stat(pb, &sb) || !S_ISREG(sb.st_mode))
1029 return GIT_ERROR;
1031 if (sb.st_size < (3 * 4 + GIT_OID_RAWSZ))
1032 return GIT_ERROR;
1034 p->pack_size = sb.st_size;
1035 p->pack_mtime = sb.st_mtime;
1037 return GIT_SUCCESS;
1040 static int pack_openidx(git_pack *p)
1042 gitlck_lock(&p->lock);
1044 if (p->invalid) {
1045 gitlck_unlock(&p->lock);
1046 return GIT_ERROR;
1049 if (++p->idxcnt == 1 && !p->idx_search) {
1050 int status, version;
1051 uint32_t *data;
1053 if (pack_stat(p) || pack_openidx_map(p)) {
1054 p->invalid = 1;
1055 p->idxcnt--;
1056 gitlck_unlock(&p->lock);
1057 return GIT_ERROR;
1059 data = p->idx_map.data;
1060 status = GIT_SUCCESS;
1061 version = 1;
1063 if (decode32(&data[0]) == PACK_TOC)
1064 version = decode32(&data[1]);
1066 switch (version) {
1067 case 1:
1068 status = pack_openidx_v1(p);
1069 break;
1070 case 2:
1071 status = pack_openidx_v2(p);
1072 break;
1073 default:
1074 status = GIT_ERROR;
1077 if (status != GIT_SUCCESS) {
1078 gitfo_free_map(&p->idx_map);
1079 p->invalid = 1;
1080 p->idxcnt--;
1081 gitlck_unlock(&p->lock);
1082 return status;
1086 gitlck_unlock(&p->lock);
1087 return GIT_SUCCESS;
1090 static void pack_decidx(git_pack *p)
1092 gitlck_lock(&p->lock);
1093 p->idxcnt--;
1094 gitlck_unlock(&p->lock);
1097 static int read_pack_hdr(pack_hdr *out, git_file fd)
1099 pack_hdr hdr;
1101 if (gitfo_read(fd, &hdr, sizeof(hdr)))
1102 return GIT_ERROR;
1104 out->sig = decode32(&hdr.sig);
1105 out->ver = decode32(&hdr.ver);
1106 out->cnt = decode32(&hdr.cnt);
1108 return GIT_SUCCESS;
1111 static int check_pack_hdr(git_pack *p)
1113 pack_hdr hdr;
1115 if (read_pack_hdr(&hdr, p->pack_fd))
1116 return GIT_ERROR;
1118 if (hdr.sig != PACK_SIG
1119 || (hdr.ver != 2 && hdr.ver != 3)
1120 || hdr.cnt != p->obj_cnt)
1121 return GIT_ERROR;
1123 return GIT_SUCCESS;
1126 static int check_pack_sha1(git_pack *p)
1128 unsigned char *data = p->idx_map.data;
1129 off_t pack_sha1_off = p->pack_size - GIT_OID_RAWSZ;
1130 size_t idx_pack_sha1_off = p->idx_map.len - 2 * GIT_OID_RAWSZ;
1131 git_oid pack_id, idx_pack_id;
1133 if (gitfo_lseek(p->pack_fd, pack_sha1_off, SEEK_SET) == -1)
1134 return GIT_ERROR;
1136 if (gitfo_read(p->pack_fd, pack_id.id, sizeof(pack_id.id)))
1137 return GIT_ERROR;
1139 git_oid_mkraw(&idx_pack_id, data + idx_pack_sha1_off);
1141 if (git_oid_cmp(&pack_id, &idx_pack_id))
1142 return GIT_ERROR;
1144 return GIT_SUCCESS;
1147 static int open_pack(git_pack *p)
1149 char pb[GIT_PATH_MAX];
1150 struct stat sb;
1152 if (p->pack_fd != -1)
1153 return GIT_SUCCESS;
1155 if (git__fmt(pb, sizeof(pb), "%s/pack/%s.pack",
1156 p->db->objects_dir,
1157 p->pack_name) < 0)
1158 return GIT_ERROR;
1160 if (pack_openidx(p))
1161 return GIT_ERROR;
1163 if ((p->pack_fd = gitfo_open(pb, O_RDONLY)) < 0) {
1164 p->pack_fd = -1;
1165 pack_decidx(p);
1166 return GIT_ERROR;
1169 if (gitfo_fstat(p->pack_fd, &sb)
1170 || !S_ISREG(sb.st_mode) || p->pack_size != sb.st_size
1171 || check_pack_hdr(p) || check_pack_sha1(p)) {
1172 gitfo_close(p->pack_fd);
1173 p->pack_fd = -1;
1174 pack_decidx(p);
1175 return GIT_ERROR;
1178 pack_decidx(p);
1179 return GIT_SUCCESS;
1182 static void pack_dec(git_pack *p)
1184 int need_free;
1186 gitlck_lock(&p->lock);
1187 need_free = !--p->refcnt;
1188 gitlck_unlock(&p->lock);
1190 if (need_free) {
1191 if (p->idx_search) {
1192 gitfo_free_map(&p->idx_map);
1193 gitfo_close(p->idx_fd);
1194 free(p->im_fanout);
1195 free(p->im_off_idx);
1196 free(p->im_off_next);
1197 if (p->pack_fd != -1)
1198 gitfo_close(p->pack_fd);
1201 gitlck_free(&p->lock);
1202 free(p);
1206 static void packlist_dec(git_odb *db, git_packlist *pl)
1208 int need_free;
1210 assert(db && pl);
1212 gitlck_lock(&db->lock);
1213 need_free = !--pl->refcnt;
1214 gitlck_unlock(&db->lock);
1216 if (need_free) {
1217 size_t j;
1218 for (j = 0; j < pl->n_packs; j++)
1219 pack_dec(pl->packs[j]);
1220 free(pl);
1224 static git_pack *alloc_pack(const char *pack_name)
1226 git_pack *p = git__calloc(1, sizeof(*p));
1227 if (!p)
1228 return NULL;
1230 gitlck_init(&p->lock);
1231 strcpy(p->pack_name, pack_name);
1232 p->refcnt = 1;
1233 p->pack_fd = -1;
1234 return p;
1237 struct scanned_pack {
1238 struct scanned_pack *next;
1239 git_pack *pack;
1242 static int scan_one_pack(void *state, char *name)
1244 struct scanned_pack **ret = state, *r;
1245 char *s = strrchr(name, '/'), *d;
1247 if (git__prefixcmp(s + 1, "pack-")
1248 || git__suffixcmp(s, ".pack")
1249 || strlen(s + 1) != GIT_PACK_NAME_MAX + 4)
1250 return 0;
1252 d = strrchr(s + 1, '.');
1253 strcpy(d + 1, "idx"); /* "pack-abc.pack" -> "pack-abc.idx" */
1254 if (gitfo_exists(name))
1255 return 0;
1257 if ((r = git__malloc(sizeof(*r))) == NULL)
1258 return GIT_ERROR;
1260 *d = '\0'; /* "pack-abc.pack" -_> "pack-abc" */
1261 if ((r->pack = alloc_pack(s + 1)) == NULL) {
1262 free(r);
1263 return GIT_ERROR;
1266 r->next = *ret;
1267 *ret = r;
1268 return 0;
1271 static git_packlist *scan_packs(git_odb *db)
1273 char pb[GIT_PATH_MAX];
1274 struct scanned_pack *state = NULL, *c;
1275 size_t cnt;
1276 git_packlist *new_list;
1278 if (git__fmt(pb, sizeof(pb), "%s/pack", db->objects_dir) < 0)
1279 return NULL;
1280 gitfo_dirent(pb, sizeof(pb), scan_one_pack, &state);
1282 /* TODO - merge old entries into the new array */
1283 for (cnt = 0, c = state; c; c = c->next)
1284 cnt++;
1285 new_list = git__malloc(sizeof(*new_list)
1286 + (sizeof(new_list->packs[0]) * cnt));
1287 if (!new_list)
1288 goto fail;
1290 for (cnt = 0, c = state; c; ) {
1291 struct scanned_pack *n = c->next;
1292 c->pack->db = db;
1293 new_list->packs[cnt++] = c->pack;
1294 free(c);
1295 c = n;
1297 new_list->n_packs = cnt;
1298 new_list->refcnt = 2;
1299 db->packlist = new_list;
1300 return new_list;
1302 fail:
1303 while (state) {
1304 struct scanned_pack *n = state->next;
1305 pack_dec(state->pack);
1306 free(state);
1307 state = n;
1309 return NULL;
1312 static git_packlist *packlist_get(git_odb *db)
1314 git_packlist *pl;
1316 gitlck_lock(&db->lock);
1317 if ((pl = db->packlist) != NULL)
1318 pl->refcnt++;
1319 else
1320 pl = scan_packs(db);
1321 gitlck_unlock(&db->lock);
1322 return pl;
1325 static int search_packs(git_pack **p, uint32_t *n, git_odb *db, const git_oid *id)
1327 git_packlist *pl = packlist_get(db);
1328 size_t j;
1330 if (!pl)
1331 return GIT_ENOTFOUND;
1333 for (j = 0; j < pl->n_packs; j++) {
1335 git_pack *pack = pl->packs[j];
1336 uint32_t pos;
1337 int res;
1339 if (pack_openidx(pack))
1340 continue;
1341 res = pack->idx_search(&pos, pack, id);
1342 pack_decidx(pack);
1344 if (!res) {
1345 packlist_dec(db, pl);
1346 if (p)
1347 *p = pack;
1348 if (n)
1349 *n = pos;
1350 return GIT_SUCCESS;
1355 packlist_dec(db, pl);
1356 return GIT_ENOTFOUND;
1359 static int exists_packed(git_odb *db, const git_oid *id)
1361 return !search_packs(NULL, NULL, db, id);
1364 static int exists_loose(git_odb *db, const git_oid *id)
1366 char file[GIT_PATH_MAX];
1368 if (object_file_name(file, sizeof(file), db->objects_dir, id))
1369 return 0;
1371 if (gitfo_exists(file) < 0)
1372 return 0;
1374 return 1;
1377 int git_odb_exists(git_odb *db, const git_oid *id)
1379 /* TODO: extend to search alternate db's */
1380 if (exists_packed(db, id))
1381 return 1;
1382 return exists_loose(db, id);
1385 int git_odb_open(git_odb **out, const char *objects_dir)
1387 git_odb *db = git__calloc(1, sizeof(*db));
1388 if (!db)
1389 return GIT_ERROR;
1391 db->objects_dir = git__strdup(objects_dir);
1392 if (!db->objects_dir) {
1393 free(db);
1394 return GIT_ERROR;
1397 gitlck_init(&db->lock);
1399 db->object_zlib_level = Z_BEST_SPEED;
1400 db->fsync_object_files = 0;
1402 *out = db;
1403 return GIT_SUCCESS;
1406 void git_odb_close(git_odb *db)
1408 git_packlist *pl;
1410 if (!db)
1411 return;
1413 gitlck_lock(&db->lock);
1415 pl = db->packlist;
1416 db->packlist = NULL;
1418 if (db->alternates) {
1419 git_odb **alt;
1420 for (alt = db->alternates; *alt; alt++)
1421 git_odb_close(*alt);
1422 free(db->alternates);
1425 free(db->objects_dir);
1427 gitlck_unlock(&db->lock);
1428 if (pl)
1429 packlist_dec(db, pl);
1430 gitlck_free(&db->lock);
1431 free(db);
1434 int git_odb_read(
1435 git_obj *out,
1436 git_odb *db,
1437 const git_oid *id)
1439 attempt:
1440 if (!git_odb__read_packed(out, db, id))
1441 return GIT_SUCCESS;
1442 if (!git_odb__read_loose(out, db, id))
1443 return GIT_SUCCESS;
1444 if (!open_alternates(db))
1445 goto attempt;
1447 out->data = NULL;
1448 return GIT_ENOTFOUND;
1451 int git_odb__read_loose(git_obj *out, git_odb *db, const git_oid *id)
1453 char file[GIT_PATH_MAX];
1454 gitfo_buf obj = GITFO_BUF_INIT;
1456 assert(out && db && id);
1458 out->data = NULL;
1459 out->len = 0;
1460 out->type = GIT_OBJ_BAD;
1462 if (object_file_name(file, sizeof(file), db->objects_dir, id))
1463 return GIT_ENOTFOUND; /* TODO: error handling */
1465 if (gitfo_read_file(&obj, file))
1466 return GIT_ENOTFOUND; /* TODO: error handling */
1468 if (inflate_disk_obj(out, &obj)) {
1469 gitfo_free_buf(&obj);
1470 return GIT_ENOTFOUND; /* TODO: error handling */
1473 gitfo_free_buf(&obj);
1475 return GIT_SUCCESS;
1478 static int unpack_object(git_obj *out, git_pack *p, index_entry *e)
1480 assert(out && p && e);
1482 if (open_pack(p))
1483 return GIT_ERROR;
1485 /* TODO - actually unpack the data! */
1487 return GIT_SUCCESS;
1490 static int read_packed(git_obj *out, git_pack *p, const git_oid *id)
1492 uint32_t n;
1493 index_entry e;
1494 int res;
1496 assert(out && p && id);
1498 if (pack_openidx(p))
1499 return GIT_ERROR;
1500 res = p->idx_search(&n, p, id);
1501 if (!res)
1502 res = p->idx_get(&e, p, n);
1503 pack_decidx(p);
1505 if (!res) {
1506 /* TODO unpack object */
1507 res = unpack_object(out, p, &e);
1510 return res;
1513 int git_odb__read_packed(git_obj *out, git_odb *db, const git_oid *id)
1515 git_packlist *pl = packlist_get(db);
1516 size_t j;
1518 assert(out && db && id);
1520 out->data = NULL;
1521 out->len = 0;
1522 out->type = GIT_OBJ_BAD;
1524 if (!pl)
1525 return GIT_ENOTFOUND;
1527 for (j = 0; j < pl->n_packs; j++) {
1528 if (!read_packed(out, pl->packs[j], id)) {
1529 packlist_dec(db, pl);
1530 return GIT_SUCCESS;
1534 packlist_dec(db, pl);
1535 return GIT_ENOTFOUND;
1538 int git_odb_write(git_oid *id, git_odb *db, git_obj *obj)
1540 char hdr[64];
1541 int hdrlen;
1542 gitfo_buf buf = GITFO_BUF_INIT;
1544 assert(id && db && obj);
1546 if (hash_obj(id, hdr, sizeof(hdr), &hdrlen, obj) < 0)
1547 return GIT_ERROR;
1549 if (git_odb_exists(db, id))
1550 return GIT_SUCCESS;
1552 if (deflate_obj(&buf, hdr, hdrlen, obj, db->object_zlib_level) < 0)
1553 return GIT_ERROR;
1555 if (write_obj(&buf, id, db) < 0) {
1556 gitfo_free_buf(&buf);
1557 return GIT_ERROR;
1560 gitfo_free_buf(&buf);
1562 return GIT_SUCCESS;