1 #include "git-compat-util.h"
6 #include "object-file.h"
7 #include "hash-lookup.h"
11 #include "chunk-format.h"
12 #include "pack-bitmap.h"
13 #include "pack-revindex.h"
15 int midx_checksum_valid(struct multi_pack_index
*m
);
16 void clear_midx_files_ext(const char *object_dir
, const char *ext
,
17 unsigned char *keep_hash
);
18 int cmp_idx_or_pack_name(const char *idx_or_pack_name
,
19 const char *idx_name
);
21 const unsigned char *get_midx_checksum(struct multi_pack_index
*m
)
23 return m
->data
+ m
->data_len
- the_hash_algo
->rawsz
;
26 void get_midx_filename(struct strbuf
*out
, const char *object_dir
)
28 strbuf_addf(out
, "%s/pack/multi-pack-index", object_dir
);
31 void get_midx_rev_filename(struct strbuf
*out
, struct multi_pack_index
*m
)
33 get_midx_filename(out
, m
->object_dir
);
34 strbuf_addf(out
, "-%s.rev", hash_to_hex(get_midx_checksum(m
)));
37 static int midx_read_oid_fanout(const unsigned char *chunk_start
,
38 size_t chunk_size
, void *data
)
41 struct multi_pack_index
*m
= data
;
42 m
->chunk_oid_fanout
= (uint32_t *)chunk_start
;
44 if (chunk_size
!= 4 * 256) {
45 error(_("multi-pack-index OID fanout is of the wrong size"));
48 for (i
= 0; i
< 255; i
++) {
49 uint32_t oid_fanout1
= ntohl(m
->chunk_oid_fanout
[i
]);
50 uint32_t oid_fanout2
= ntohl(m
->chunk_oid_fanout
[i
+1]);
52 if (oid_fanout1
> oid_fanout2
) {
53 error(_("oid fanout out of order: fanout[%d] = %"PRIx32
" > %"PRIx32
" = fanout[%d]"),
54 i
, oid_fanout1
, oid_fanout2
, i
+ 1);
58 m
->num_objects
= ntohl(m
->chunk_oid_fanout
[255]);
62 static int midx_read_oid_lookup(const unsigned char *chunk_start
,
63 size_t chunk_size
, void *data
)
65 struct multi_pack_index
*m
= data
;
66 m
->chunk_oid_lookup
= chunk_start
;
68 if (chunk_size
!= st_mult(m
->hash_len
, m
->num_objects
)) {
69 error(_("multi-pack-index OID lookup chunk is the wrong size"));
75 static int midx_read_object_offsets(const unsigned char *chunk_start
,
76 size_t chunk_size
, void *data
)
78 struct multi_pack_index
*m
= data
;
79 m
->chunk_object_offsets
= chunk_start
;
81 if (chunk_size
!= st_mult(m
->num_objects
, MIDX_CHUNK_OFFSET_WIDTH
)) {
82 error(_("multi-pack-index object offset chunk is the wrong size"));
88 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
90 struct multi_pack_index
*load_multi_pack_index(const char *object_dir
, int local
)
92 struct multi_pack_index
*m
= NULL
;
96 void *midx_map
= NULL
;
97 uint32_t hash_version
;
98 struct strbuf midx_name
= STRBUF_INIT
;
100 const char *cur_pack_name
;
101 struct chunkfile
*cf
= NULL
;
103 get_midx_filename(&midx_name
, object_dir
);
105 fd
= git_open(midx_name
.buf
);
109 if (fstat(fd
, &st
)) {
110 error_errno(_("failed to read %s"), midx_name
.buf
);
114 midx_size
= xsize_t(st
.st_size
);
116 if (midx_size
< MIDX_MIN_SIZE
) {
117 error(_("multi-pack-index file %s is too small"), midx_name
.buf
);
121 strbuf_release(&midx_name
);
123 midx_map
= xmmap(NULL
, midx_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
126 FLEX_ALLOC_STR(m
, object_dir
, object_dir
);
128 m
->data_len
= midx_size
;
131 m
->signature
= get_be32(m
->data
);
132 if (m
->signature
!= MIDX_SIGNATURE
)
133 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
134 m
->signature
, MIDX_SIGNATURE
);
136 m
->version
= m
->data
[MIDX_BYTE_FILE_VERSION
];
137 if (m
->version
!= MIDX_VERSION
)
138 die(_("multi-pack-index version %d not recognized"),
141 hash_version
= m
->data
[MIDX_BYTE_HASH_VERSION
];
142 if (hash_version
!= oid_version(the_hash_algo
)) {
143 error(_("multi-pack-index hash version %u does not match version %u"),
144 hash_version
, oid_version(the_hash_algo
));
147 m
->hash_len
= the_hash_algo
->rawsz
;
149 m
->num_chunks
= m
->data
[MIDX_BYTE_NUM_CHUNKS
];
151 m
->num_packs
= get_be32(m
->data
+ MIDX_BYTE_NUM_PACKS
);
153 m
->preferred_pack_idx
= -1;
155 cf
= init_chunkfile(NULL
);
157 if (read_table_of_contents(cf
, m
->data
, midx_size
,
158 MIDX_HEADER_SIZE
, m
->num_chunks
,
159 MIDX_CHUNK_ALIGNMENT
))
162 if (pair_chunk(cf
, MIDX_CHUNKID_PACKNAMES
, &m
->chunk_pack_names
, &m
->chunk_pack_names_len
))
163 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
164 if (read_chunk(cf
, MIDX_CHUNKID_OIDFANOUT
, midx_read_oid_fanout
, m
))
165 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
166 if (read_chunk(cf
, MIDX_CHUNKID_OIDLOOKUP
, midx_read_oid_lookup
, m
))
167 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
168 if (read_chunk(cf
, MIDX_CHUNKID_OBJECTOFFSETS
, midx_read_object_offsets
, m
))
169 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
171 pair_chunk(cf
, MIDX_CHUNKID_LARGEOFFSETS
, &m
->chunk_large_offsets
,
172 &m
->chunk_large_offsets_len
);
173 if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
174 pair_chunk(cf
, MIDX_CHUNKID_BITMAPPEDPACKS
,
175 (const unsigned char **)&m
->chunk_bitmapped_packs
,
176 &m
->chunk_bitmapped_packs_len
);
178 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
179 pair_chunk(cf
, MIDX_CHUNKID_REVINDEX
, &m
->chunk_revindex
,
180 &m
->chunk_revindex_len
);
182 CALLOC_ARRAY(m
->pack_names
, m
->num_packs
);
183 CALLOC_ARRAY(m
->packs
, m
->num_packs
);
185 cur_pack_name
= (const char *)m
->chunk_pack_names
;
186 for (i
= 0; i
< m
->num_packs
; i
++) {
188 size_t avail
= m
->chunk_pack_names_len
-
189 (cur_pack_name
- (const char *)m
->chunk_pack_names
);
191 m
->pack_names
[i
] = cur_pack_name
;
193 end
= memchr(cur_pack_name
, '\0', avail
);
195 die(_("multi-pack-index pack-name chunk is too short"));
196 cur_pack_name
= end
+ 1;
198 if (i
&& strcmp(m
->pack_names
[i
], m
->pack_names
[i
- 1]) <= 0)
199 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
200 m
->pack_names
[i
- 1],
204 trace2_data_intmax("midx", the_repository
, "load/num_packs", m
->num_packs
);
205 trace2_data_intmax("midx", the_repository
, "load/num_objects", m
->num_objects
);
212 strbuf_release(&midx_name
);
215 munmap(midx_map
, midx_size
);
221 void close_midx(struct multi_pack_index
*m
)
230 munmap((unsigned char *)m
->data
, m
->data_len
);
232 for (i
= 0; i
< m
->num_packs
; i
++) {
234 m
->packs
[i
]->multi_pack_index
= 0;
236 FREE_AND_NULL(m
->packs
);
237 FREE_AND_NULL(m
->pack_names
);
241 int prepare_midx_pack(struct repository
*r
, struct multi_pack_index
*m
, uint32_t pack_int_id
)
243 struct strbuf pack_name
= STRBUF_INIT
;
244 struct packed_git
*p
;
246 if (pack_int_id
>= m
->num_packs
)
247 die(_("bad pack-int-id: %u (%u total packs)"),
248 pack_int_id
, m
->num_packs
);
250 if (m
->packs
[pack_int_id
])
253 strbuf_addf(&pack_name
, "%s/pack/%s", m
->object_dir
,
254 m
->pack_names
[pack_int_id
]);
256 p
= add_packed_git(pack_name
.buf
, pack_name
.len
, m
->local
);
257 strbuf_release(&pack_name
);
262 p
->multi_pack_index
= 1;
263 m
->packs
[pack_int_id
] = p
;
264 install_packed_git(r
, p
);
265 list_add_tail(&p
->mru
, &r
->objects
->packed_git_mru
);
270 #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
272 int nth_bitmapped_pack(struct repository
*r
, struct multi_pack_index
*m
,
273 struct bitmapped_pack
*bp
, uint32_t pack_int_id
)
275 if (!m
->chunk_bitmapped_packs
)
276 return error(_("MIDX does not contain the BTMP chunk"));
278 if (prepare_midx_pack(r
, m
, pack_int_id
))
279 return error(_("could not load bitmapped pack %"PRIu32
), pack_int_id
);
281 bp
->p
= m
->packs
[pack_int_id
];
282 bp
->bitmap_pos
= get_be32((char *)m
->chunk_bitmapped_packs
+
283 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH
* pack_int_id
);
284 bp
->bitmap_nr
= get_be32((char *)m
->chunk_bitmapped_packs
+
285 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH
* pack_int_id
+
287 bp
->pack_int_id
= pack_int_id
;
292 int bsearch_midx(const struct object_id
*oid
, struct multi_pack_index
*m
, uint32_t *result
)
294 return bsearch_hash(oid
->hash
, m
->chunk_oid_fanout
, m
->chunk_oid_lookup
,
295 the_hash_algo
->rawsz
, result
);
298 struct object_id
*nth_midxed_object_oid(struct object_id
*oid
,
299 struct multi_pack_index
*m
,
302 if (n
>= m
->num_objects
)
305 oidread(oid
, m
->chunk_oid_lookup
+ st_mult(m
->hash_len
, n
));
309 off_t
nth_midxed_offset(struct multi_pack_index
*m
, uint32_t pos
)
311 const unsigned char *offset_data
;
314 offset_data
= m
->chunk_object_offsets
+ (off_t
)pos
* MIDX_CHUNK_OFFSET_WIDTH
;
315 offset32
= get_be32(offset_data
+ sizeof(uint32_t));
317 if (m
->chunk_large_offsets
&& offset32
& MIDX_LARGE_OFFSET_NEEDED
) {
318 if (sizeof(off_t
) < sizeof(uint64_t))
319 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
321 offset32
^= MIDX_LARGE_OFFSET_NEEDED
;
322 if (offset32
>= m
->chunk_large_offsets_len
/ sizeof(uint64_t))
323 die(_("multi-pack-index large offset out of bounds"));
324 return get_be64(m
->chunk_large_offsets
+ sizeof(uint64_t) * offset32
);
330 uint32_t nth_midxed_pack_int_id(struct multi_pack_index
*m
, uint32_t pos
)
332 return get_be32(m
->chunk_object_offsets
+
333 (off_t
)pos
* MIDX_CHUNK_OFFSET_WIDTH
);
336 int fill_midx_entry(struct repository
*r
,
337 const struct object_id
*oid
,
338 struct pack_entry
*e
,
339 struct multi_pack_index
*m
)
342 uint32_t pack_int_id
;
343 struct packed_git
*p
;
345 if (!bsearch_midx(oid
, m
, &pos
))
348 if (pos
>= m
->num_objects
)
351 pack_int_id
= nth_midxed_pack_int_id(m
, pos
);
353 if (prepare_midx_pack(r
, m
, pack_int_id
))
355 p
= m
->packs
[pack_int_id
];
358 * We are about to tell the caller where they can locate the
359 * requested object. We better make sure the packfile is
360 * still here and can be accessed before supplying that
361 * answer, as it may have been deleted since the MIDX was
364 if (!is_pack_valid(p
))
367 if (oidset_size(&p
->bad_objects
) &&
368 oidset_contains(&p
->bad_objects
, oid
))
371 e
->offset
= nth_midxed_offset(m
, pos
);
377 /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
378 int cmp_idx_or_pack_name(const char *idx_or_pack_name
,
379 const char *idx_name
)
381 /* Skip past any initial matching prefix. */
382 while (*idx_name
&& *idx_name
== *idx_or_pack_name
) {
388 * If we didn't match completely, we may have matched "pack-1234." and
389 * be left with "idx" and "pack" respectively, which is also OK. We do
390 * not have to check for "idx" and "idx", because that would have been
391 * a complete match (and in that case these strcmps will be false, but
392 * we'll correctly return 0 from the final strcmp() below.
394 * Technically this matches "fooidx" and "foopack", but we'd never have
395 * such names in the first place.
397 if (!strcmp(idx_name
, "idx") && !strcmp(idx_or_pack_name
, "pack"))
401 * This not only checks for a complete match, but also orders based on
402 * the first non-identical character, which means our ordering will
403 * match a raw strcmp(). That makes it OK to use this to binary search
404 * a naively-sorted list.
406 return strcmp(idx_or_pack_name
, idx_name
);
409 int midx_locate_pack(struct multi_pack_index
*m
, const char *idx_or_pack_name
,
412 uint32_t first
= 0, last
= m
->num_packs
;
414 while (first
< last
) {
415 uint32_t mid
= first
+ (last
- first
) / 2;
419 current
= m
->pack_names
[mid
];
420 cmp
= cmp_idx_or_pack_name(idx_or_pack_name
, current
);
436 int midx_contains_pack(struct multi_pack_index
*m
, const char *idx_or_pack_name
)
438 return midx_locate_pack(m
, idx_or_pack_name
, NULL
);
441 int midx_preferred_pack(struct multi_pack_index
*m
, uint32_t *pack_int_id
)
443 if (m
->preferred_pack_idx
== -1) {
444 if (load_midx_revindex(m
) < 0) {
445 m
->preferred_pack_idx
= -2;
449 m
->preferred_pack_idx
=
450 nth_midxed_pack_int_id(m
, pack_pos_to_midx(m
, 0));
451 } else if (m
->preferred_pack_idx
== -2)
452 return -1; /* no revindex */
454 *pack_int_id
= m
->preferred_pack_idx
;
458 int prepare_multi_pack_index_one(struct repository
*r
, const char *object_dir
, int local
)
460 struct multi_pack_index
*m
;
461 struct multi_pack_index
*m_search
;
463 prepare_repo_settings(r
);
464 if (!r
->settings
.core_multi_pack_index
)
467 for (m_search
= r
->objects
->multi_pack_index
; m_search
; m_search
= m_search
->next
)
468 if (!strcmp(object_dir
, m_search
->object_dir
))
471 m
= load_multi_pack_index(object_dir
, local
);
474 struct multi_pack_index
*mp
= r
->objects
->multi_pack_index
;
479 r
->objects
->multi_pack_index
= m
;
486 int midx_checksum_valid(struct multi_pack_index
*m
)
488 return hashfile_checksum_valid(m
->data
, m
->data_len
);
491 struct clear_midx_data
{
496 static void clear_midx_file_ext(const char *full_path
, size_t full_path_len UNUSED
,
497 const char *file_name
, void *_data
)
499 struct clear_midx_data
*data
= _data
;
501 if (!(starts_with(file_name
, "multi-pack-index-") &&
502 ends_with(file_name
, data
->ext
)))
504 if (data
->keep
&& !strcmp(data
->keep
, file_name
))
507 if (unlink(full_path
))
508 die_errno(_("failed to remove %s"), full_path
);
511 void clear_midx_files_ext(const char *object_dir
, const char *ext
,
512 unsigned char *keep_hash
)
514 struct clear_midx_data data
;
515 memset(&data
, 0, sizeof(struct clear_midx_data
));
518 data
.keep
= xstrfmt("multi-pack-index-%s%s",
519 hash_to_hex(keep_hash
), ext
);
522 for_each_file_in_pack_dir(object_dir
,
529 void clear_midx_file(struct repository
*r
)
531 struct strbuf midx
= STRBUF_INIT
;
533 get_midx_filename(&midx
, r
->objects
->odb
->path
);
535 if (r
->objects
&& r
->objects
->multi_pack_index
) {
536 close_midx(r
->objects
->multi_pack_index
);
537 r
->objects
->multi_pack_index
= NULL
;
540 if (remove_path(midx
.buf
))
541 die(_("failed to clear multi-pack-index at %s"), midx
.buf
);
543 clear_midx_files_ext(r
->objects
->odb
->path
, ".bitmap", NULL
);
544 clear_midx_files_ext(r
->objects
->odb
->path
, ".rev", NULL
);
546 strbuf_release(&midx
);
549 static int verify_midx_error
;
551 __attribute__((format (printf
, 1, 2)))
552 static void midx_report(const char *fmt
, ...)
555 verify_midx_error
= 1;
557 vfprintf(stderr
, fmt
, ap
);
558 fprintf(stderr
, "\n");
562 struct pair_pos_vs_id
565 uint32_t pack_int_id
;
568 static int compare_pair_pos_vs_id(const void *_a
, const void *_b
)
570 struct pair_pos_vs_id
*a
= (struct pair_pos_vs_id
*)_a
;
571 struct pair_pos_vs_id
*b
= (struct pair_pos_vs_id
*)_b
;
573 return b
->pack_int_id
- a
->pack_int_id
;
577 * Limit calls to display_progress() for performance reasons.
578 * The interval here was arbitrarily chosen.
580 #define SPARSE_PROGRESS_INTERVAL (1 << 12)
581 #define midx_display_sparse_progress(progress, n) \
584 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
585 display_progress(progress, _n); \
588 int verify_midx_file(struct repository
*r
, const char *object_dir
, unsigned flags
)
590 struct pair_pos_vs_id
*pairs
= NULL
;
592 struct progress
*progress
= NULL
;
593 struct multi_pack_index
*m
= load_multi_pack_index(object_dir
, 1);
594 verify_midx_error
= 0;
599 struct strbuf filename
= STRBUF_INIT
;
601 get_midx_filename(&filename
, object_dir
);
603 if (!stat(filename
.buf
, &sb
)) {
604 error(_("multi-pack-index file exists, but failed to parse"));
607 strbuf_release(&filename
);
611 if (!midx_checksum_valid(m
))
612 midx_report(_("incorrect checksum"));
614 if (flags
& MIDX_PROGRESS
)
615 progress
= start_delayed_progress(_("Looking for referenced packfiles"),
617 for (i
= 0; i
< m
->num_packs
; i
++) {
618 if (prepare_midx_pack(r
, m
, i
))
619 midx_report("failed to load pack in position %d", i
);
621 display_progress(progress
, i
+ 1);
623 stop_progress(&progress
);
625 if (m
->num_objects
== 0) {
626 midx_report(_("the midx contains no oid"));
628 * Remaining tests assume that we have objects, so we can
634 if (flags
& MIDX_PROGRESS
)
635 progress
= start_sparse_progress(_("Verifying OID order in multi-pack-index"),
637 for (i
= 0; i
< m
->num_objects
- 1; i
++) {
638 struct object_id oid1
, oid2
;
640 nth_midxed_object_oid(&oid1
, m
, i
);
641 nth_midxed_object_oid(&oid2
, m
, i
+ 1);
643 if (oidcmp(&oid1
, &oid2
) >= 0)
644 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
645 i
, oid_to_hex(&oid1
), oid_to_hex(&oid2
), i
+ 1);
647 midx_display_sparse_progress(progress
, i
+ 1);
649 stop_progress(&progress
);
652 * Create an array mapping each object to its packfile id. Sort it
653 * to group the objects by packfile. Use this permutation to visit
654 * each of the objects and only require 1 packfile to be open at a
657 ALLOC_ARRAY(pairs
, m
->num_objects
);
658 for (i
= 0; i
< m
->num_objects
; i
++) {
660 pairs
[i
].pack_int_id
= nth_midxed_pack_int_id(m
, i
);
663 if (flags
& MIDX_PROGRESS
)
664 progress
= start_sparse_progress(_("Sorting objects by packfile"),
666 display_progress(progress
, 0); /* TODO: Measure QSORT() progress */
667 QSORT(pairs
, m
->num_objects
, compare_pair_pos_vs_id
);
668 stop_progress(&progress
);
670 if (flags
& MIDX_PROGRESS
)
671 progress
= start_sparse_progress(_("Verifying object offsets"), m
->num_objects
);
672 for (i
= 0; i
< m
->num_objects
; i
++) {
673 struct object_id oid
;
675 off_t m_offset
, p_offset
;
677 if (i
> 0 && pairs
[i
-1].pack_int_id
!= pairs
[i
].pack_int_id
&&
678 m
->packs
[pairs
[i
-1].pack_int_id
])
680 close_pack_fd(m
->packs
[pairs
[i
-1].pack_int_id
]);
681 close_pack_index(m
->packs
[pairs
[i
-1].pack_int_id
]);
684 nth_midxed_object_oid(&oid
, m
, pairs
[i
].pos
);
686 if (!fill_midx_entry(r
, &oid
, &e
, m
)) {
687 midx_report(_("failed to load pack entry for oid[%d] = %s"),
688 pairs
[i
].pos
, oid_to_hex(&oid
));
692 if (open_pack_index(e
.p
)) {
693 midx_report(_("failed to load pack-index for packfile %s"),
699 p_offset
= find_pack_entry_one(oid
.hash
, e
.p
);
701 if (m_offset
!= p_offset
)
702 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64
" != %"PRIx64
),
703 pairs
[i
].pos
, oid_to_hex(&oid
), m_offset
, p_offset
);
705 midx_display_sparse_progress(progress
, i
+ 1);
707 stop_progress(&progress
);
713 return verify_midx_error
;