1 #include "git-compat-util.h"
10 #include "object-file.h"
11 #include "object-store-ll.h"
12 #include "hash-lookup.h"
16 #include "run-command.h"
17 #include "repository.h"
18 #include "chunk-format.h"
20 #include "pack-bitmap.h"
23 #include "list-objects.h"
25 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
26 #define MIDX_VERSION 1
27 #define MIDX_BYTE_FILE_VERSION 4
28 #define MIDX_BYTE_HASH_VERSION 5
29 #define MIDX_BYTE_NUM_CHUNKS 6
30 #define MIDX_BYTE_NUM_PACKS 8
31 #define MIDX_HEADER_SIZE 12
32 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
34 #define MIDX_CHUNK_ALIGNMENT 4
35 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
36 #define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
37 #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
38 #define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
39 #define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
40 #define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
41 #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
42 #define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
43 #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
44 #define MIDX_LARGE_OFFSET_NEEDED 0x80000000
46 #define PACK_EXPIRED UINT_MAX
48 const unsigned char *get_midx_checksum(struct multi_pack_index
*m
)
50 return m
->data
+ m
->data_len
- the_hash_algo
->rawsz
;
53 void get_midx_filename(struct strbuf
*out
, const char *object_dir
)
55 strbuf_addf(out
, "%s/pack/multi-pack-index", object_dir
);
58 void get_midx_rev_filename(struct strbuf
*out
, struct multi_pack_index
*m
)
60 get_midx_filename(out
, m
->object_dir
);
61 strbuf_addf(out
, "-%s.rev", hash_to_hex(get_midx_checksum(m
)));
64 static int midx_read_oid_fanout(const unsigned char *chunk_start
,
65 size_t chunk_size
, void *data
)
67 struct multi_pack_index
*m
= data
;
68 m
->chunk_oid_fanout
= (uint32_t *)chunk_start
;
70 if (chunk_size
!= 4 * 256) {
71 error(_("multi-pack-index OID fanout is of the wrong size"));
74 m
->num_objects
= ntohl(m
->chunk_oid_fanout
[255]);
78 static int midx_read_oid_lookup(const unsigned char *chunk_start
,
79 size_t chunk_size
, void *data
)
81 struct multi_pack_index
*m
= data
;
82 m
->chunk_oid_lookup
= chunk_start
;
84 if (chunk_size
!= st_mult(m
->hash_len
, m
->num_objects
)) {
85 error(_("multi-pack-index OID lookup chunk is the wrong size"));
91 static int midx_read_object_offsets(const unsigned char *chunk_start
,
92 size_t chunk_size
, void *data
)
94 struct multi_pack_index
*m
= data
;
95 m
->chunk_object_offsets
= chunk_start
;
97 if (chunk_size
!= st_mult(m
->num_objects
, MIDX_CHUNK_OFFSET_WIDTH
)) {
98 error(_("multi-pack-index object offset chunk is the wrong size"));
104 struct multi_pack_index
*load_multi_pack_index(const char *object_dir
, int local
)
106 struct multi_pack_index
*m
= NULL
;
110 void *midx_map
= NULL
;
111 uint32_t hash_version
;
112 struct strbuf midx_name
= STRBUF_INIT
;
114 const char *cur_pack_name
;
115 struct chunkfile
*cf
= NULL
;
117 get_midx_filename(&midx_name
, object_dir
);
119 fd
= git_open(midx_name
.buf
);
123 if (fstat(fd
, &st
)) {
124 error_errno(_("failed to read %s"), midx_name
.buf
);
128 midx_size
= xsize_t(st
.st_size
);
130 if (midx_size
< MIDX_MIN_SIZE
) {
131 error(_("multi-pack-index file %s is too small"), midx_name
.buf
);
135 strbuf_release(&midx_name
);
137 midx_map
= xmmap(NULL
, midx_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
140 FLEX_ALLOC_STR(m
, object_dir
, object_dir
);
142 m
->data_len
= midx_size
;
145 m
->signature
= get_be32(m
->data
);
146 if (m
->signature
!= MIDX_SIGNATURE
)
147 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
148 m
->signature
, MIDX_SIGNATURE
);
150 m
->version
= m
->data
[MIDX_BYTE_FILE_VERSION
];
151 if (m
->version
!= MIDX_VERSION
)
152 die(_("multi-pack-index version %d not recognized"),
155 hash_version
= m
->data
[MIDX_BYTE_HASH_VERSION
];
156 if (hash_version
!= oid_version(the_hash_algo
)) {
157 error(_("multi-pack-index hash version %u does not match version %u"),
158 hash_version
, oid_version(the_hash_algo
));
161 m
->hash_len
= the_hash_algo
->rawsz
;
163 m
->num_chunks
= m
->data
[MIDX_BYTE_NUM_CHUNKS
];
165 m
->num_packs
= get_be32(m
->data
+ MIDX_BYTE_NUM_PACKS
);
167 cf
= init_chunkfile(NULL
);
169 if (read_table_of_contents(cf
, m
->data
, midx_size
,
170 MIDX_HEADER_SIZE
, m
->num_chunks
,
171 MIDX_CHUNK_ALIGNMENT
))
174 if (pair_chunk(cf
, MIDX_CHUNKID_PACKNAMES
, &m
->chunk_pack_names
, &m
->chunk_pack_names_len
))
175 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
176 if (read_chunk(cf
, MIDX_CHUNKID_OIDFANOUT
, midx_read_oid_fanout
, m
))
177 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
178 if (read_chunk(cf
, MIDX_CHUNKID_OIDLOOKUP
, midx_read_oid_lookup
, m
))
179 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
180 if (read_chunk(cf
, MIDX_CHUNKID_OBJECTOFFSETS
, midx_read_object_offsets
, m
))
181 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
183 pair_chunk(cf
, MIDX_CHUNKID_LARGEOFFSETS
, &m
->chunk_large_offsets
,
184 &m
->chunk_large_offsets_len
);
186 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
187 pair_chunk(cf
, MIDX_CHUNKID_REVINDEX
, &m
->chunk_revindex
,
188 &m
->chunk_revindex_len
);
190 CALLOC_ARRAY(m
->pack_names
, m
->num_packs
);
191 CALLOC_ARRAY(m
->packs
, m
->num_packs
);
193 cur_pack_name
= (const char *)m
->chunk_pack_names
;
194 for (i
= 0; i
< m
->num_packs
; i
++) {
196 size_t avail
= m
->chunk_pack_names_len
-
197 (cur_pack_name
- (const char *)m
->chunk_pack_names
);
199 m
->pack_names
[i
] = cur_pack_name
;
201 end
= memchr(cur_pack_name
, '\0', avail
);
203 die(_("multi-pack-index pack-name chunk is too short"));
204 cur_pack_name
= end
+ 1;
206 if (i
&& strcmp(m
->pack_names
[i
], m
->pack_names
[i
- 1]) <= 0)
207 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
208 m
->pack_names
[i
- 1],
212 trace2_data_intmax("midx", the_repository
, "load/num_packs", m
->num_packs
);
213 trace2_data_intmax("midx", the_repository
, "load/num_objects", m
->num_objects
);
220 strbuf_release(&midx_name
);
223 munmap(midx_map
, midx_size
);
229 void close_midx(struct multi_pack_index
*m
)
238 munmap((unsigned char *)m
->data
, m
->data_len
);
240 for (i
= 0; i
< m
->num_packs
; i
++) {
242 m
->packs
[i
]->multi_pack_index
= 0;
244 FREE_AND_NULL(m
->packs
);
245 FREE_AND_NULL(m
->pack_names
);
249 int prepare_midx_pack(struct repository
*r
, struct multi_pack_index
*m
, uint32_t pack_int_id
)
251 struct strbuf pack_name
= STRBUF_INIT
;
252 struct packed_git
*p
;
254 if (pack_int_id
>= m
->num_packs
)
255 die(_("bad pack-int-id: %u (%u total packs)"),
256 pack_int_id
, m
->num_packs
);
258 if (m
->packs
[pack_int_id
])
261 strbuf_addf(&pack_name
, "%s/pack/%s", m
->object_dir
,
262 m
->pack_names
[pack_int_id
]);
264 p
= add_packed_git(pack_name
.buf
, pack_name
.len
, m
->local
);
265 strbuf_release(&pack_name
);
270 p
->multi_pack_index
= 1;
271 m
->packs
[pack_int_id
] = p
;
272 install_packed_git(r
, p
);
273 list_add_tail(&p
->mru
, &r
->objects
->packed_git_mru
);
278 int bsearch_midx(const struct object_id
*oid
, struct multi_pack_index
*m
, uint32_t *result
)
280 return bsearch_hash(oid
->hash
, m
->chunk_oid_fanout
, m
->chunk_oid_lookup
,
281 the_hash_algo
->rawsz
, result
);
284 struct object_id
*nth_midxed_object_oid(struct object_id
*oid
,
285 struct multi_pack_index
*m
,
288 if (n
>= m
->num_objects
)
291 oidread(oid
, m
->chunk_oid_lookup
+ st_mult(m
->hash_len
, n
));
295 off_t
nth_midxed_offset(struct multi_pack_index
*m
, uint32_t pos
)
297 const unsigned char *offset_data
;
300 offset_data
= m
->chunk_object_offsets
+ (off_t
)pos
* MIDX_CHUNK_OFFSET_WIDTH
;
301 offset32
= get_be32(offset_data
+ sizeof(uint32_t));
303 if (m
->chunk_large_offsets
&& offset32
& MIDX_LARGE_OFFSET_NEEDED
) {
304 if (sizeof(off_t
) < sizeof(uint64_t))
305 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
307 offset32
^= MIDX_LARGE_OFFSET_NEEDED
;
308 if (offset32
>= m
->chunk_large_offsets_len
/ sizeof(uint64_t))
309 die(_("multi-pack-index large offset out of bounds"));
310 return get_be64(m
->chunk_large_offsets
+ sizeof(uint64_t) * offset32
);
316 uint32_t nth_midxed_pack_int_id(struct multi_pack_index
*m
, uint32_t pos
)
318 return get_be32(m
->chunk_object_offsets
+
319 (off_t
)pos
* MIDX_CHUNK_OFFSET_WIDTH
);
322 int fill_midx_entry(struct repository
*r
,
323 const struct object_id
*oid
,
324 struct pack_entry
*e
,
325 struct multi_pack_index
*m
)
328 uint32_t pack_int_id
;
329 struct packed_git
*p
;
331 if (!bsearch_midx(oid
, m
, &pos
))
334 if (pos
>= m
->num_objects
)
337 pack_int_id
= nth_midxed_pack_int_id(m
, pos
);
339 if (prepare_midx_pack(r
, m
, pack_int_id
))
341 p
= m
->packs
[pack_int_id
];
344 * We are about to tell the caller where they can locate the
345 * requested object. We better make sure the packfile is
346 * still here and can be accessed before supplying that
347 * answer, as it may have been deleted since the MIDX was
350 if (!is_pack_valid(p
))
353 if (oidset_size(&p
->bad_objects
) &&
354 oidset_contains(&p
->bad_objects
, oid
))
357 e
->offset
= nth_midxed_offset(m
, pos
);
363 /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
364 static int cmp_idx_or_pack_name(const char *idx_or_pack_name
,
365 const char *idx_name
)
367 /* Skip past any initial matching prefix. */
368 while (*idx_name
&& *idx_name
== *idx_or_pack_name
) {
374 * If we didn't match completely, we may have matched "pack-1234." and
375 * be left with "idx" and "pack" respectively, which is also OK. We do
376 * not have to check for "idx" and "idx", because that would have been
377 * a complete match (and in that case these strcmps will be false, but
378 * we'll correctly return 0 from the final strcmp() below.
380 * Technically this matches "fooidx" and "foopack", but we'd never have
381 * such names in the first place.
383 if (!strcmp(idx_name
, "idx") && !strcmp(idx_or_pack_name
, "pack"))
387 * This not only checks for a complete match, but also orders based on
388 * the first non-identical character, which means our ordering will
389 * match a raw strcmp(). That makes it OK to use this to binary search
390 * a naively-sorted list.
392 return strcmp(idx_or_pack_name
, idx_name
);
395 int midx_contains_pack(struct multi_pack_index
*m
, const char *idx_or_pack_name
)
397 uint32_t first
= 0, last
= m
->num_packs
;
399 while (first
< last
) {
400 uint32_t mid
= first
+ (last
- first
) / 2;
404 current
= m
->pack_names
[mid
];
405 cmp
= cmp_idx_or_pack_name(idx_or_pack_name
, current
);
418 int prepare_multi_pack_index_one(struct repository
*r
, const char *object_dir
, int local
)
420 struct multi_pack_index
*m
;
421 struct multi_pack_index
*m_search
;
423 prepare_repo_settings(r
);
424 if (!r
->settings
.core_multi_pack_index
)
427 for (m_search
= r
->objects
->multi_pack_index
; m_search
; m_search
= m_search
->next
)
428 if (!strcmp(object_dir
, m_search
->object_dir
))
431 m
= load_multi_pack_index(object_dir
, local
);
434 struct multi_pack_index
*mp
= r
->objects
->multi_pack_index
;
439 r
->objects
->multi_pack_index
= m
;
446 static size_t write_midx_header(struct hashfile
*f
,
447 unsigned char num_chunks
,
450 hashwrite_be32(f
, MIDX_SIGNATURE
);
451 hashwrite_u8(f
, MIDX_VERSION
);
452 hashwrite_u8(f
, oid_version(the_hash_algo
));
453 hashwrite_u8(f
, num_chunks
);
454 hashwrite_u8(f
, 0); /* unused */
455 hashwrite_be32(f
, num_packs
);
457 return MIDX_HEADER_SIZE
;
461 uint32_t orig_pack_int_id
;
463 struct packed_git
*p
;
464 unsigned expired
: 1;
467 static int pack_info_compare(const void *_a
, const void *_b
)
469 struct pack_info
*a
= (struct pack_info
*)_a
;
470 struct pack_info
*b
= (struct pack_info
*)_b
;
471 return strcmp(a
->pack_name
, b
->pack_name
);
474 static int idx_or_pack_name_cmp(const void *_va
, const void *_vb
)
476 const char *pack_name
= _va
;
477 const struct pack_info
*compar
= _vb
;
479 return cmp_idx_or_pack_name(pack_name
, compar
->pack_name
);
482 struct write_midx_context
{
483 struct pack_info
*info
;
486 struct multi_pack_index
*m
;
487 struct progress
*progress
;
488 unsigned pack_paths_checked
;
490 struct pack_midx_entry
*entries
;
494 uint32_t *pack_order
;
495 unsigned large_offsets_needed
:1;
496 uint32_t num_large_offsets
;
498 int preferred_pack_idx
;
500 struct string_list
*to_include
;
503 static void add_pack_to_midx(const char *full_path
, size_t full_path_len
,
504 const char *file_name
, void *data
)
506 struct write_midx_context
*ctx
= data
;
508 if (ends_with(file_name
, ".idx")) {
509 display_progress(ctx
->progress
, ++ctx
->pack_paths_checked
);
511 * Note that at most one of ctx->m and ctx->to_include are set,
512 * so we are testing midx_contains_pack() and
513 * string_list_has_string() independently (guarded by the
514 * appropriate NULL checks).
516 * We could support passing to_include while reusing an existing
517 * MIDX, but don't currently since the reuse process drags
518 * forward all packs from an existing MIDX (without checking
519 * whether or not they appear in the to_include list).
521 * If we added support for that, these next two conditional
522 * should be performed independently (likely checking
523 * to_include before the existing MIDX).
525 if (ctx
->m
&& midx_contains_pack(ctx
->m
, file_name
))
527 else if (ctx
->to_include
&&
528 !string_list_has_string(ctx
->to_include
, file_name
))
531 ALLOC_GROW(ctx
->info
, ctx
->nr
+ 1, ctx
->alloc
);
533 ctx
->info
[ctx
->nr
].p
= add_packed_git(full_path
,
537 if (!ctx
->info
[ctx
->nr
].p
) {
538 warning(_("failed to add packfile '%s'"),
543 if (open_pack_index(ctx
->info
[ctx
->nr
].p
)) {
544 warning(_("failed to open pack-index '%s'"),
546 close_pack(ctx
->info
[ctx
->nr
].p
);
547 FREE_AND_NULL(ctx
->info
[ctx
->nr
].p
);
551 ctx
->info
[ctx
->nr
].pack_name
= xstrdup(file_name
);
552 ctx
->info
[ctx
->nr
].orig_pack_int_id
= ctx
->nr
;
553 ctx
->info
[ctx
->nr
].expired
= 0;
558 struct pack_midx_entry
{
559 struct object_id oid
;
560 uint32_t pack_int_id
;
563 unsigned preferred
: 1;
566 static int midx_oid_compare(const void *_a
, const void *_b
)
568 const struct pack_midx_entry
*a
= (const struct pack_midx_entry
*)_a
;
569 const struct pack_midx_entry
*b
= (const struct pack_midx_entry
*)_b
;
570 int cmp
= oidcmp(&a
->oid
, &b
->oid
);
575 /* Sort objects in a preferred pack first when multiple copies exist. */
576 if (a
->preferred
> b
->preferred
)
578 if (a
->preferred
< b
->preferred
)
581 if (a
->pack_mtime
> b
->pack_mtime
)
583 else if (a
->pack_mtime
< b
->pack_mtime
)
586 return a
->pack_int_id
- b
->pack_int_id
;
589 static int nth_midxed_pack_midx_entry(struct multi_pack_index
*m
,
590 struct pack_midx_entry
*e
,
593 if (pos
>= m
->num_objects
)
596 nth_midxed_object_oid(&e
->oid
, m
, pos
);
597 e
->pack_int_id
= nth_midxed_pack_int_id(m
, pos
);
598 e
->offset
= nth_midxed_offset(m
, pos
);
600 /* consider objects in midx to be from "old" packs */
605 static void fill_pack_entry(uint32_t pack_int_id
,
606 struct packed_git
*p
,
608 struct pack_midx_entry
*entry
,
611 if (nth_packed_object_id(&entry
->oid
, p
, cur_object
) < 0)
612 die(_("failed to locate object %d in packfile"), cur_object
);
614 entry
->pack_int_id
= pack_int_id
;
615 entry
->pack_mtime
= p
->mtime
;
617 entry
->offset
= nth_packed_object_offset(p
, cur_object
);
618 entry
->preferred
= !!preferred
;
622 struct pack_midx_entry
*entries
;
626 static void midx_fanout_grow(struct midx_fanout
*fanout
, size_t nr
)
629 BUG("negative growth in midx_fanout_grow() (%"PRIuMAX
" < %"PRIuMAX
")",
630 (uintmax_t)nr
, (uintmax_t)fanout
->nr
);
631 ALLOC_GROW(fanout
->entries
, nr
, fanout
->alloc
);
634 static void midx_fanout_sort(struct midx_fanout
*fanout
)
636 QSORT(fanout
->entries
, fanout
->nr
, midx_oid_compare
);
639 static void midx_fanout_add_midx_fanout(struct midx_fanout
*fanout
,
640 struct multi_pack_index
*m
,
644 uint32_t start
= 0, end
;
648 start
= ntohl(m
->chunk_oid_fanout
[cur_fanout
- 1]);
649 end
= ntohl(m
->chunk_oid_fanout
[cur_fanout
]);
651 for (cur_object
= start
; cur_object
< end
; cur_object
++) {
652 if ((preferred_pack
> -1) &&
653 (preferred_pack
== nth_midxed_pack_int_id(m
, cur_object
))) {
655 * Objects from preferred packs are added
661 midx_fanout_grow(fanout
, fanout
->nr
+ 1);
662 nth_midxed_pack_midx_entry(m
,
663 &fanout
->entries
[fanout
->nr
],
665 fanout
->entries
[fanout
->nr
].preferred
= 0;
670 static void midx_fanout_add_pack_fanout(struct midx_fanout
*fanout
,
671 struct pack_info
*info
,
676 struct packed_git
*pack
= info
[cur_pack
].p
;
677 uint32_t start
= 0, end
;
681 start
= get_pack_fanout(pack
, cur_fanout
- 1);
682 end
= get_pack_fanout(pack
, cur_fanout
);
684 for (cur_object
= start
; cur_object
< end
; cur_object
++) {
685 midx_fanout_grow(fanout
, fanout
->nr
+ 1);
686 fill_pack_entry(cur_pack
,
689 &fanout
->entries
[fanout
->nr
],
696 * It is possible to artificially get into a state where there are many
697 * duplicate copies of objects. That can create high memory pressure if
698 * we are to create a list of all objects before de-duplication. To reduce
699 * this memory pressure without a significant performance drop, automatically
700 * group objects by the first byte of their object id. Use the IDX fanout
701 * tables to group the data, copy to a local array, then sort.
703 * Copy only the de-duplicated entries (selected by most-recent modified time
704 * of a packfile containing the object).
706 static struct pack_midx_entry
*get_sorted_entries(struct multi_pack_index
*m
,
707 struct pack_info
*info
,
712 uint32_t cur_fanout
, cur_pack
, cur_object
;
713 size_t alloc_objects
, total_objects
= 0;
714 struct midx_fanout fanout
= { 0 };
715 struct pack_midx_entry
*deduplicated_entries
= NULL
;
716 uint32_t start_pack
= m
? m
->num_packs
: 0;
718 for (cur_pack
= start_pack
; cur_pack
< nr_packs
; cur_pack
++)
719 total_objects
= st_add(total_objects
,
720 info
[cur_pack
].p
->num_objects
);
723 * As we de-duplicate by fanout value, we expect the fanout
724 * slices to be evenly distributed, with some noise. Hence,
725 * allocate slightly more than one 256th.
727 alloc_objects
= fanout
.alloc
= total_objects
> 3200 ? total_objects
/ 200 : 16;
729 ALLOC_ARRAY(fanout
.entries
, fanout
.alloc
);
730 ALLOC_ARRAY(deduplicated_entries
, alloc_objects
);
733 for (cur_fanout
= 0; cur_fanout
< 256; cur_fanout
++) {
737 midx_fanout_add_midx_fanout(&fanout
, m
, cur_fanout
,
740 for (cur_pack
= start_pack
; cur_pack
< nr_packs
; cur_pack
++) {
741 int preferred
= cur_pack
== preferred_pack
;
742 midx_fanout_add_pack_fanout(&fanout
,
744 preferred
, cur_fanout
);
747 if (-1 < preferred_pack
&& preferred_pack
< start_pack
)
748 midx_fanout_add_pack_fanout(&fanout
, info
,
752 midx_fanout_sort(&fanout
);
755 * The batch is now sorted by OID and then mtime (descending).
756 * Take only the first duplicate.
758 for (cur_object
= 0; cur_object
< fanout
.nr
; cur_object
++) {
759 if (cur_object
&& oideq(&fanout
.entries
[cur_object
- 1].oid
,
760 &fanout
.entries
[cur_object
].oid
))
763 ALLOC_GROW(deduplicated_entries
, st_add(*nr_objects
, 1),
765 memcpy(&deduplicated_entries
[*nr_objects
],
766 &fanout
.entries
[cur_object
],
767 sizeof(struct pack_midx_entry
));
772 free(fanout
.entries
);
773 return deduplicated_entries
;
776 static int write_midx_pack_names(struct hashfile
*f
, void *data
)
778 struct write_midx_context
*ctx
= data
;
780 unsigned char padding
[MIDX_CHUNK_ALIGNMENT
];
783 for (i
= 0; i
< ctx
->nr
; i
++) {
786 if (ctx
->info
[i
].expired
)
789 if (i
&& strcmp(ctx
->info
[i
].pack_name
, ctx
->info
[i
- 1].pack_name
) <= 0)
790 BUG("incorrect pack-file order: %s before %s",
791 ctx
->info
[i
- 1].pack_name
,
792 ctx
->info
[i
].pack_name
);
794 writelen
= strlen(ctx
->info
[i
].pack_name
) + 1;
795 hashwrite(f
, ctx
->info
[i
].pack_name
, writelen
);
799 /* add padding to be aligned */
800 i
= MIDX_CHUNK_ALIGNMENT
- (written
% MIDX_CHUNK_ALIGNMENT
);
801 if (i
< MIDX_CHUNK_ALIGNMENT
) {
802 memset(padding
, 0, sizeof(padding
));
803 hashwrite(f
, padding
, i
);
809 static int write_midx_oid_fanout(struct hashfile
*f
,
812 struct write_midx_context
*ctx
= data
;
813 struct pack_midx_entry
*list
= ctx
->entries
;
814 struct pack_midx_entry
*last
= ctx
->entries
+ ctx
->entries_nr
;
819 * Write the first-level table (the list is sorted,
820 * but we use a 256-entry lookup to be able to avoid
821 * having to do eight extra binary search iterations).
823 for (i
= 0; i
< 256; i
++) {
824 struct pack_midx_entry
*next
= list
;
826 while (next
< last
&& next
->oid
.hash
[0] == i
) {
831 hashwrite_be32(f
, count
);
838 static int write_midx_oid_lookup(struct hashfile
*f
,
841 struct write_midx_context
*ctx
= data
;
842 unsigned char hash_len
= the_hash_algo
->rawsz
;
843 struct pack_midx_entry
*list
= ctx
->entries
;
846 for (i
= 0; i
< ctx
->entries_nr
; i
++) {
847 struct pack_midx_entry
*obj
= list
++;
849 if (i
< ctx
->entries_nr
- 1) {
850 struct pack_midx_entry
*next
= list
;
851 if (oidcmp(&obj
->oid
, &next
->oid
) >= 0)
852 BUG("OIDs not in order: %s >= %s",
853 oid_to_hex(&obj
->oid
),
854 oid_to_hex(&next
->oid
));
857 hashwrite(f
, obj
->oid
.hash
, (int)hash_len
);
863 static int write_midx_object_offsets(struct hashfile
*f
,
866 struct write_midx_context
*ctx
= data
;
867 struct pack_midx_entry
*list
= ctx
->entries
;
868 uint32_t i
, nr_large_offset
= 0;
870 for (i
= 0; i
< ctx
->entries_nr
; i
++) {
871 struct pack_midx_entry
*obj
= list
++;
873 if (ctx
->pack_perm
[obj
->pack_int_id
] == PACK_EXPIRED
)
874 BUG("object %s is in an expired pack with int-id %d",
875 oid_to_hex(&obj
->oid
),
878 hashwrite_be32(f
, ctx
->pack_perm
[obj
->pack_int_id
]);
880 if (ctx
->large_offsets_needed
&& obj
->offset
>> 31)
881 hashwrite_be32(f
, MIDX_LARGE_OFFSET_NEEDED
| nr_large_offset
++);
882 else if (!ctx
->large_offsets_needed
&& obj
->offset
>> 32)
883 BUG("object %s requires a large offset (%"PRIx64
") but the MIDX is not writing large offsets!",
884 oid_to_hex(&obj
->oid
),
887 hashwrite_be32(f
, (uint32_t)obj
->offset
);
893 static int write_midx_large_offsets(struct hashfile
*f
,
896 struct write_midx_context
*ctx
= data
;
897 struct pack_midx_entry
*list
= ctx
->entries
;
898 struct pack_midx_entry
*end
= ctx
->entries
+ ctx
->entries_nr
;
899 uint32_t nr_large_offset
= ctx
->num_large_offsets
;
901 while (nr_large_offset
) {
902 struct pack_midx_entry
*obj
;
906 BUG("too many large-offset objects");
909 offset
= obj
->offset
;
914 hashwrite_be64(f
, offset
);
922 static int write_midx_revindex(struct hashfile
*f
,
925 struct write_midx_context
*ctx
= data
;
928 for (i
= 0; i
< ctx
->entries_nr
; i
++)
929 hashwrite_be32(f
, ctx
->pack_order
[i
]);
934 struct midx_pack_order_data
{
940 static int midx_pack_order_cmp(const void *va
, const void *vb
)
942 const struct midx_pack_order_data
*a
= va
, *b
= vb
;
943 if (a
->pack
< b
->pack
)
945 else if (a
->pack
> b
->pack
)
947 else if (a
->offset
< b
->offset
)
949 else if (a
->offset
> b
->offset
)
955 static uint32_t *midx_pack_order(struct write_midx_context
*ctx
)
957 struct midx_pack_order_data
*data
;
958 uint32_t *pack_order
;
961 trace2_region_enter("midx", "midx_pack_order", the_repository
);
963 ALLOC_ARRAY(data
, ctx
->entries_nr
);
964 for (i
= 0; i
< ctx
->entries_nr
; i
++) {
965 struct pack_midx_entry
*e
= &ctx
->entries
[i
];
967 data
[i
].pack
= ctx
->pack_perm
[e
->pack_int_id
];
969 data
[i
].pack
|= (1U << 31);
970 data
[i
].offset
= e
->offset
;
973 QSORT(data
, ctx
->entries_nr
, midx_pack_order_cmp
);
975 ALLOC_ARRAY(pack_order
, ctx
->entries_nr
);
976 for (i
= 0; i
< ctx
->entries_nr
; i
++)
977 pack_order
[i
] = data
[i
].nr
;
980 trace2_region_leave("midx", "midx_pack_order", the_repository
);
985 static void write_midx_reverse_index(char *midx_name
, unsigned char *midx_hash
,
986 struct write_midx_context
*ctx
)
988 struct strbuf buf
= STRBUF_INIT
;
989 const char *tmp_file
;
991 trace2_region_enter("midx", "write_midx_reverse_index", the_repository
);
993 strbuf_addf(&buf
, "%s-%s.rev", midx_name
, hash_to_hex(midx_hash
));
995 tmp_file
= write_rev_file_order(NULL
, ctx
->pack_order
, ctx
->entries_nr
,
996 midx_hash
, WRITE_REV
);
998 if (finalize_object_file(tmp_file
, buf
.buf
))
999 die(_("cannot store reverse index file"));
1001 strbuf_release(&buf
);
1003 trace2_region_leave("midx", "write_midx_reverse_index", the_repository
);
1006 static void clear_midx_files_ext(const char *object_dir
, const char *ext
,
1007 unsigned char *keep_hash
);
1009 static int midx_checksum_valid(struct multi_pack_index
*m
)
1011 return hashfile_checksum_valid(m
->data
, m
->data_len
);
1014 static void prepare_midx_packing_data(struct packing_data
*pdata
,
1015 struct write_midx_context
*ctx
)
1019 trace2_region_enter("midx", "prepare_midx_packing_data", the_repository
);
1021 memset(pdata
, 0, sizeof(struct packing_data
));
1022 prepare_packing_data(the_repository
, pdata
);
1024 for (i
= 0; i
< ctx
->entries_nr
; i
++) {
1025 struct pack_midx_entry
*from
= &ctx
->entries
[ctx
->pack_order
[i
]];
1026 struct object_entry
*to
= packlist_alloc(pdata
, &from
->oid
);
1028 oe_set_in_pack(pdata
, to
,
1029 ctx
->info
[ctx
->pack_perm
[from
->pack_int_id
]].p
);
1032 trace2_region_leave("midx", "prepare_midx_packing_data", the_repository
);
1035 static int add_ref_to_pending(const char *refname
,
1036 const struct object_id
*oid
,
1037 int flag
, void *cb_data
)
1039 struct rev_info
*revs
= (struct rev_info
*)cb_data
;
1040 struct object_id peeled
;
1041 struct object
*object
;
1043 if ((flag
& REF_ISSYMREF
) && (flag
& REF_ISBROKEN
)) {
1044 warning("symbolic ref is dangling: %s", refname
);
1048 if (!peel_iterated_oid(oid
, &peeled
))
1051 object
= parse_object_or_die(oid
, refname
);
1052 if (object
->type
!= OBJ_COMMIT
)
1055 add_pending_object(revs
, object
, "");
1056 if (bitmap_is_preferred_refname(revs
->repo
, refname
))
1057 object
->flags
|= NEEDS_BITMAP
;
1061 struct bitmap_commit_cb
{
1062 struct commit
**commits
;
1063 size_t commits_nr
, commits_alloc
;
1065 struct write_midx_context
*ctx
;
1068 static const struct object_id
*bitmap_oid_access(size_t index
,
1069 const void *_entries
)
1071 const struct pack_midx_entry
*entries
= _entries
;
1072 return &entries
[index
].oid
;
1075 static void bitmap_show_commit(struct commit
*commit
, void *_data
)
1077 struct bitmap_commit_cb
*data
= _data
;
1078 int pos
= oid_pos(&commit
->object
.oid
, data
->ctx
->entries
,
1079 data
->ctx
->entries_nr
,
1084 ALLOC_GROW(data
->commits
, data
->commits_nr
+ 1, data
->commits_alloc
);
1085 data
->commits
[data
->commits_nr
++] = commit
;
1088 static int read_refs_snapshot(const char *refs_snapshot
,
1089 struct rev_info
*revs
)
1091 struct strbuf buf
= STRBUF_INIT
;
1092 struct object_id oid
;
1093 FILE *f
= xfopen(refs_snapshot
, "r");
1095 while (strbuf_getline(&buf
, f
) != EOF
) {
1096 struct object
*object
;
1098 char *hex
= buf
.buf
;
1099 const char *end
= NULL
;
1101 if (buf
.len
&& *buf
.buf
== '+') {
1106 if (parse_oid_hex(hex
, &oid
, &end
) < 0)
1107 die(_("could not parse line: %s"), buf
.buf
);
1109 die(_("malformed line: %s"), buf
.buf
);
1111 object
= parse_object_or_die(&oid
, NULL
);
1113 object
->flags
|= NEEDS_BITMAP
;
1115 add_pending_object(revs
, object
, "");
1119 strbuf_release(&buf
);
1123 static struct commit
**find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p
,
1124 const char *refs_snapshot
,
1125 struct write_midx_context
*ctx
)
1127 struct rev_info revs
;
1128 struct bitmap_commit_cb cb
= {0};
1130 trace2_region_enter("midx", "find_commits_for_midx_bitmap",
1135 repo_init_revisions(the_repository
, &revs
, NULL
);
1136 if (refs_snapshot
) {
1137 read_refs_snapshot(refs_snapshot
, &revs
);
1139 setup_revisions(0, NULL
, &revs
, NULL
);
1140 for_each_ref(add_ref_to_pending
, &revs
);
1144 * Skipping promisor objects here is intentional, since it only excludes
1145 * them from the list of reachable commits that we want to select from
1146 * when computing the selection of MIDX'd commits to receive bitmaps.
1148 * Reachability bitmaps do require that their objects be closed under
1149 * reachability, but fetching any objects missing from promisors at this
1150 * point is too late. But, if one of those objects can be reached from
1151 * an another object that is included in the bitmap, then we will
1152 * complain later that we don't have reachability closure (and fail
1155 fetch_if_missing
= 0;
1156 revs
.exclude_promisor_objects
= 1;
1158 if (prepare_revision_walk(&revs
))
1159 die(_("revision walk setup failed"));
1161 traverse_commit_list(&revs
, bitmap_show_commit
, NULL
, &cb
);
1162 if (indexed_commits_nr_p
)
1163 *indexed_commits_nr_p
= cb
.commits_nr
;
1165 release_revisions(&revs
);
1167 trace2_region_leave("midx", "find_commits_for_midx_bitmap",
1173 static int write_midx_bitmap(const char *midx_name
,
1174 const unsigned char *midx_hash
,
1175 struct packing_data
*pdata
,
1176 struct commit
**commits
,
1177 uint32_t commits_nr
,
1178 uint32_t *pack_order
,
1182 uint16_t options
= 0;
1183 struct pack_idx_entry
**index
;
1184 char *bitmap_name
= xstrfmt("%s-%s.bitmap", midx_name
,
1185 hash_to_hex(midx_hash
));
1187 trace2_region_enter("midx", "write_midx_bitmap", the_repository
);
1189 if (flags
& MIDX_WRITE_BITMAP_HASH_CACHE
)
1190 options
|= BITMAP_OPT_HASH_CACHE
;
1192 if (flags
& MIDX_WRITE_BITMAP_LOOKUP_TABLE
)
1193 options
|= BITMAP_OPT_LOOKUP_TABLE
;
1196 * Build the MIDX-order index based on pdata.objects (which is already
1197 * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of
1200 ALLOC_ARRAY(index
, pdata
->nr_objects
);
1201 for (i
= 0; i
< pdata
->nr_objects
; i
++)
1202 index
[i
] = &pdata
->objects
[i
].idx
;
1204 bitmap_writer_show_progress(flags
& MIDX_PROGRESS
);
1205 bitmap_writer_build_type_index(pdata
, index
, pdata
->nr_objects
);
1208 * bitmap_writer_finish expects objects in lex order, but pack_order
1209 * gives us exactly that. use it directly instead of re-sorting the
1212 * This changes the order of objects in 'index' between
1213 * bitmap_writer_build_type_index and bitmap_writer_finish.
1215 * The same re-ordering takes place in the single-pack bitmap code via
1216 * write_idx_file(), which is called by finish_tmp_packfile(), which
1217 * happens between bitmap_writer_build_type_index() and
1218 * bitmap_writer_finish().
1220 for (i
= 0; i
< pdata
->nr_objects
; i
++)
1221 index
[pack_order
[i
]] = &pdata
->objects
[i
].idx
;
1223 bitmap_writer_select_commits(commits
, commits_nr
, -1);
1224 ret
= bitmap_writer_build(pdata
);
1228 bitmap_writer_set_checksum(midx_hash
);
1229 bitmap_writer_finish(index
, pdata
->nr_objects
, bitmap_name
, options
);
1235 trace2_region_leave("midx", "write_midx_bitmap", the_repository
);
1240 static struct multi_pack_index
*lookup_multi_pack_index(struct repository
*r
,
1241 const char *object_dir
)
1243 struct multi_pack_index
*result
= NULL
;
1244 struct multi_pack_index
*cur
;
1245 char *obj_dir_real
= real_pathdup(object_dir
, 1);
1246 struct strbuf cur_path_real
= STRBUF_INIT
;
1248 /* Ensure the given object_dir is local, or a known alternate. */
1249 find_odb(r
, obj_dir_real
);
1251 for (cur
= get_multi_pack_index(r
); cur
; cur
= cur
->next
) {
1252 strbuf_realpath(&cur_path_real
, cur
->object_dir
, 1);
1253 if (!strcmp(obj_dir_real
, cur_path_real
.buf
)) {
1261 strbuf_release(&cur_path_real
);
1265 static int write_midx_internal(const char *object_dir
,
1266 struct string_list
*packs_to_include
,
1267 struct string_list
*packs_to_drop
,
1268 const char *preferred_pack_name
,
1269 const char *refs_snapshot
,
1272 struct strbuf midx_name
= STRBUF_INIT
;
1273 unsigned char midx_hash
[GIT_MAX_RAWSZ
];
1275 struct hashfile
*f
= NULL
;
1276 struct lock_file lk
;
1277 struct write_midx_context ctx
= { 0 };
1278 int pack_name_concat_len
= 0;
1279 int dropped_packs
= 0;
1281 struct chunkfile
*cf
;
1283 trace2_region_enter("midx", "write_midx_internal", the_repository
);
1285 get_midx_filename(&midx_name
, object_dir
);
1286 if (safe_create_leading_directories(midx_name
.buf
))
1287 die_errno(_("unable to create leading directories of %s"),
1290 if (!packs_to_include
) {
1292 * Only reference an existing MIDX when not filtering which
1293 * packs to include, since all packs and objects are copied
1294 * blindly from an existing MIDX if one is present.
1296 ctx
.m
= lookup_multi_pack_index(the_repository
, object_dir
);
1299 if (ctx
.m
&& !midx_checksum_valid(ctx
.m
)) {
1300 warning(_("ignoring existing multi-pack-index; checksum mismatch"));
1305 ctx
.alloc
= ctx
.m
? ctx
.m
->num_packs
: 16;
1307 ALLOC_ARRAY(ctx
.info
, ctx
.alloc
);
1310 for (i
= 0; i
< ctx
.m
->num_packs
; i
++) {
1311 ALLOC_GROW(ctx
.info
, ctx
.nr
+ 1, ctx
.alloc
);
1313 ctx
.info
[ctx
.nr
].orig_pack_int_id
= i
;
1314 ctx
.info
[ctx
.nr
].pack_name
= xstrdup(ctx
.m
->pack_names
[i
]);
1315 ctx
.info
[ctx
.nr
].p
= ctx
.m
->packs
[i
];
1316 ctx
.info
[ctx
.nr
].expired
= 0;
1318 if (flags
& MIDX_WRITE_REV_INDEX
) {
1320 * If generating a reverse index, need to have
1321 * packed_git's loaded to compare their
1322 * mtimes and object count.
1324 if (prepare_midx_pack(the_repository
, ctx
.m
, i
)) {
1325 error(_("could not load pack"));
1330 if (open_pack_index(ctx
.m
->packs
[i
]))
1331 die(_("could not open index for %s"),
1332 ctx
.m
->packs
[i
]->pack_name
);
1333 ctx
.info
[ctx
.nr
].p
= ctx
.m
->packs
[i
];
1340 ctx
.pack_paths_checked
= 0;
1341 if (flags
& MIDX_PROGRESS
)
1342 ctx
.progress
= start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0);
1344 ctx
.progress
= NULL
;
1346 ctx
.to_include
= packs_to_include
;
1348 for_each_file_in_pack_dir(object_dir
, add_pack_to_midx
, &ctx
);
1349 stop_progress(&ctx
.progress
);
1351 if ((ctx
.m
&& ctx
.nr
== ctx
.m
->num_packs
) &&
1352 !(packs_to_include
|| packs_to_drop
)) {
1353 struct bitmap_index
*bitmap_git
;
1355 int want_bitmap
= flags
& MIDX_WRITE_BITMAP
;
1357 bitmap_git
= prepare_midx_bitmap_git(ctx
.m
);
1358 bitmap_exists
= bitmap_git
&& bitmap_is_midx(bitmap_git
);
1359 free_bitmap_index(bitmap_git
);
1361 if (bitmap_exists
|| !want_bitmap
) {
1363 * The correct MIDX already exists, and so does a
1364 * corresponding bitmap (or one wasn't requested).
1367 clear_midx_files_ext(object_dir
, ".bitmap",
1373 if (preferred_pack_name
) {
1374 ctx
.preferred_pack_idx
= -1;
1376 for (i
= 0; i
< ctx
.nr
; i
++) {
1377 if (!cmp_idx_or_pack_name(preferred_pack_name
,
1378 ctx
.info
[i
].pack_name
)) {
1379 ctx
.preferred_pack_idx
= i
;
1384 if (ctx
.preferred_pack_idx
== -1)
1385 warning(_("unknown preferred pack: '%s'"),
1386 preferred_pack_name
);
1387 } else if (ctx
.nr
&&
1388 (flags
& (MIDX_WRITE_REV_INDEX
| MIDX_WRITE_BITMAP
))) {
1389 struct packed_git
*oldest
= ctx
.info
[ctx
.preferred_pack_idx
].p
;
1390 ctx
.preferred_pack_idx
= 0;
1392 if (packs_to_drop
&& packs_to_drop
->nr
)
1393 BUG("cannot write a MIDX bitmap during expiration");
1396 * set a preferred pack when writing a bitmap to ensure that
1397 * the pack from which the first object is selected in pseudo
1398 * pack-order has all of its objects selected from that pack
1399 * (and not another pack containing a duplicate)
1401 for (i
= 1; i
< ctx
.nr
; i
++) {
1402 struct packed_git
*p
= ctx
.info
[i
].p
;
1404 if (!oldest
->num_objects
|| p
->mtime
< oldest
->mtime
) {
1406 ctx
.preferred_pack_idx
= i
;
1410 if (!oldest
->num_objects
) {
1412 * If all packs are empty; unset the preferred index.
1413 * This is acceptable since there will be no duplicate
1414 * objects to resolve, so the preferred value doesn't
1417 ctx
.preferred_pack_idx
= -1;
1421 * otherwise don't mark any pack as preferred to avoid
1422 * interfering with expiration logic below
1424 ctx
.preferred_pack_idx
= -1;
1427 if (ctx
.preferred_pack_idx
> -1) {
1428 struct packed_git
*preferred
= ctx
.info
[ctx
.preferred_pack_idx
].p
;
1429 if (!preferred
->num_objects
) {
1430 error(_("cannot select preferred pack %s with no objects"),
1431 preferred
->pack_name
);
1437 ctx
.entries
= get_sorted_entries(ctx
.m
, ctx
.info
, ctx
.nr
, &ctx
.entries_nr
,
1438 ctx
.preferred_pack_idx
);
1440 ctx
.large_offsets_needed
= 0;
1441 for (i
= 0; i
< ctx
.entries_nr
; i
++) {
1442 if (ctx
.entries
[i
].offset
> 0x7fffffff)
1443 ctx
.num_large_offsets
++;
1444 if (ctx
.entries
[i
].offset
> 0xffffffff)
1445 ctx
.large_offsets_needed
= 1;
1448 QSORT(ctx
.info
, ctx
.nr
, pack_info_compare
);
1450 if (packs_to_drop
&& packs_to_drop
->nr
) {
1452 int missing_drops
= 0;
1454 for (i
= 0; i
< ctx
.nr
&& drop_index
< packs_to_drop
->nr
; i
++) {
1455 int cmp
= strcmp(ctx
.info
[i
].pack_name
,
1456 packs_to_drop
->items
[drop_index
].string
);
1460 ctx
.info
[i
].expired
= 1;
1461 } else if (cmp
> 0) {
1462 error(_("did not see pack-file %s to drop"),
1463 packs_to_drop
->items
[drop_index
].string
);
1468 ctx
.info
[i
].expired
= 0;
1472 if (missing_drops
) {
1479 * pack_perm stores a permutation between pack-int-ids from the
1480 * previous multi-pack-index to the new one we are writing:
1482 * pack_perm[old_id] = new_id
1484 ALLOC_ARRAY(ctx
.pack_perm
, ctx
.nr
);
1485 for (i
= 0; i
< ctx
.nr
; i
++) {
1486 if (ctx
.info
[i
].expired
) {
1488 ctx
.pack_perm
[ctx
.info
[i
].orig_pack_int_id
] = PACK_EXPIRED
;
1490 ctx
.pack_perm
[ctx
.info
[i
].orig_pack_int_id
] = i
- dropped_packs
;
1494 for (i
= 0; i
< ctx
.nr
; i
++) {
1495 if (!ctx
.info
[i
].expired
)
1496 pack_name_concat_len
+= strlen(ctx
.info
[i
].pack_name
) + 1;
1499 /* Check that the preferred pack wasn't expired (if given). */
1500 if (preferred_pack_name
) {
1501 struct pack_info
*preferred
= bsearch(preferred_pack_name
,
1504 idx_or_pack_name_cmp
);
1506 uint32_t perm
= ctx
.pack_perm
[preferred
->orig_pack_int_id
];
1507 if (perm
== PACK_EXPIRED
)
1508 warning(_("preferred pack '%s' is expired"),
1509 preferred_pack_name
);
1513 if (pack_name_concat_len
% MIDX_CHUNK_ALIGNMENT
)
1514 pack_name_concat_len
+= MIDX_CHUNK_ALIGNMENT
-
1515 (pack_name_concat_len
% MIDX_CHUNK_ALIGNMENT
);
1517 hold_lock_file_for_update(&lk
, midx_name
.buf
, LOCK_DIE_ON_ERROR
);
1518 f
= hashfd(get_lock_file_fd(&lk
), get_lock_file_path(&lk
));
1520 if (ctx
.nr
- dropped_packs
== 0) {
1521 error(_("no pack files to index."));
1526 if (!ctx
.entries_nr
) {
1527 if (flags
& MIDX_WRITE_BITMAP
)
1528 warning(_("refusing to write multi-pack .bitmap without any objects"));
1529 flags
&= ~(MIDX_WRITE_REV_INDEX
| MIDX_WRITE_BITMAP
);
1532 cf
= init_chunkfile(f
);
1534 add_chunk(cf
, MIDX_CHUNKID_PACKNAMES
, pack_name_concat_len
,
1535 write_midx_pack_names
);
1536 add_chunk(cf
, MIDX_CHUNKID_OIDFANOUT
, MIDX_CHUNK_FANOUT_SIZE
,
1537 write_midx_oid_fanout
);
1538 add_chunk(cf
, MIDX_CHUNKID_OIDLOOKUP
,
1539 st_mult(ctx
.entries_nr
, the_hash_algo
->rawsz
),
1540 write_midx_oid_lookup
);
1541 add_chunk(cf
, MIDX_CHUNKID_OBJECTOFFSETS
,
1542 st_mult(ctx
.entries_nr
, MIDX_CHUNK_OFFSET_WIDTH
),
1543 write_midx_object_offsets
);
1545 if (ctx
.large_offsets_needed
)
1546 add_chunk(cf
, MIDX_CHUNKID_LARGEOFFSETS
,
1547 st_mult(ctx
.num_large_offsets
,
1548 MIDX_CHUNK_LARGE_OFFSET_WIDTH
),
1549 write_midx_large_offsets
);
1551 if (flags
& (MIDX_WRITE_REV_INDEX
| MIDX_WRITE_BITMAP
)) {
1552 ctx
.pack_order
= midx_pack_order(&ctx
);
1553 add_chunk(cf
, MIDX_CHUNKID_REVINDEX
,
1554 st_mult(ctx
.entries_nr
, sizeof(uint32_t)),
1555 write_midx_revindex
);
1558 write_midx_header(f
, get_num_chunks(cf
), ctx
.nr
- dropped_packs
);
1559 write_chunkfile(cf
, &ctx
);
1561 finalize_hashfile(f
, midx_hash
, FSYNC_COMPONENT_PACK_METADATA
,
1562 CSUM_FSYNC
| CSUM_HASH_IN_STREAM
);
1565 if (flags
& MIDX_WRITE_REV_INDEX
&&
1566 git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
1567 write_midx_reverse_index(midx_name
.buf
, midx_hash
, &ctx
);
1569 if (flags
& MIDX_WRITE_BITMAP
) {
1570 struct packing_data pdata
;
1571 struct commit
**commits
;
1572 uint32_t commits_nr
;
1574 if (!ctx
.entries_nr
)
1575 BUG("cannot write a bitmap without any objects");
1577 prepare_midx_packing_data(&pdata
, &ctx
);
1579 commits
= find_commits_for_midx_bitmap(&commits_nr
, refs_snapshot
, &ctx
);
1582 * The previous steps translated the information from
1583 * 'entries' into information suitable for constructing
1584 * bitmaps. We no longer need that array, so clear it to
1585 * reduce memory pressure.
1587 FREE_AND_NULL(ctx
.entries
);
1590 if (write_midx_bitmap(midx_name
.buf
, midx_hash
, &pdata
,
1591 commits
, commits_nr
, ctx
.pack_order
,
1593 error(_("could not write multi-pack bitmap"));
1599 * NOTE: Do not use ctx.entries beyond this point, since it might
1600 * have been freed in the previous if block.
1604 close_object_store(the_repository
->objects
);
1606 if (commit_lock_file(&lk
) < 0)
1607 die_errno(_("could not write multi-pack-index"));
1609 clear_midx_files_ext(object_dir
, ".bitmap", midx_hash
);
1610 clear_midx_files_ext(object_dir
, ".rev", midx_hash
);
1613 for (i
= 0; i
< ctx
.nr
; i
++) {
1614 if (ctx
.info
[i
].p
) {
1615 close_pack(ctx
.info
[i
].p
);
1616 free(ctx
.info
[i
].p
);
1618 free(ctx
.info
[i
].pack_name
);
1623 free(ctx
.pack_perm
);
1624 free(ctx
.pack_order
);
1625 strbuf_release(&midx_name
);
1627 trace2_region_leave("midx", "write_midx_internal", the_repository
);
1632 int write_midx_file(const char *object_dir
,
1633 const char *preferred_pack_name
,
1634 const char *refs_snapshot
,
1637 return write_midx_internal(object_dir
, NULL
, NULL
, preferred_pack_name
,
1638 refs_snapshot
, flags
);
1641 int write_midx_file_only(const char *object_dir
,
1642 struct string_list
*packs_to_include
,
1643 const char *preferred_pack_name
,
1644 const char *refs_snapshot
,
1647 return write_midx_internal(object_dir
, packs_to_include
, NULL
,
1648 preferred_pack_name
, refs_snapshot
, flags
);
1651 struct clear_midx_data
{
1656 static void clear_midx_file_ext(const char *full_path
, size_t full_path_len UNUSED
,
1657 const char *file_name
, void *_data
)
1659 struct clear_midx_data
*data
= _data
;
1661 if (!(starts_with(file_name
, "multi-pack-index-") &&
1662 ends_with(file_name
, data
->ext
)))
1664 if (data
->keep
&& !strcmp(data
->keep
, file_name
))
1667 if (unlink(full_path
))
1668 die_errno(_("failed to remove %s"), full_path
);
1671 static void clear_midx_files_ext(const char *object_dir
, const char *ext
,
1672 unsigned char *keep_hash
)
1674 struct clear_midx_data data
;
1675 memset(&data
, 0, sizeof(struct clear_midx_data
));
1678 data
.keep
= xstrfmt("multi-pack-index-%s%s",
1679 hash_to_hex(keep_hash
), ext
);
1682 for_each_file_in_pack_dir(object_dir
,
1683 clear_midx_file_ext
,
1689 void clear_midx_file(struct repository
*r
)
1691 struct strbuf midx
= STRBUF_INIT
;
1693 get_midx_filename(&midx
, r
->objects
->odb
->path
);
1695 if (r
->objects
&& r
->objects
->multi_pack_index
) {
1696 close_midx(r
->objects
->multi_pack_index
);
1697 r
->objects
->multi_pack_index
= NULL
;
1700 if (remove_path(midx
.buf
))
1701 die(_("failed to clear multi-pack-index at %s"), midx
.buf
);
1703 clear_midx_files_ext(r
->objects
->odb
->path
, ".bitmap", NULL
);
1704 clear_midx_files_ext(r
->objects
->odb
->path
, ".rev", NULL
);
1706 strbuf_release(&midx
);
1709 static int verify_midx_error
;
1711 __attribute__((format (printf
, 1, 2)))
1712 static void midx_report(const char *fmt
, ...)
1715 verify_midx_error
= 1;
1717 vfprintf(stderr
, fmt
, ap
);
1718 fprintf(stderr
, "\n");
1722 struct pair_pos_vs_id
1725 uint32_t pack_int_id
;
1728 static int compare_pair_pos_vs_id(const void *_a
, const void *_b
)
1730 struct pair_pos_vs_id
*a
= (struct pair_pos_vs_id
*)_a
;
1731 struct pair_pos_vs_id
*b
= (struct pair_pos_vs_id
*)_b
;
1733 return b
->pack_int_id
- a
->pack_int_id
;
1737 * Limit calls to display_progress() for performance reasons.
1738 * The interval here was arbitrarily chosen.
1740 #define SPARSE_PROGRESS_INTERVAL (1 << 12)
1741 #define midx_display_sparse_progress(progress, n) \
1743 uint64_t _n = (n); \
1744 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
1745 display_progress(progress, _n); \
1748 int verify_midx_file(struct repository
*r
, const char *object_dir
, unsigned flags
)
1750 struct pair_pos_vs_id
*pairs
= NULL
;
1752 struct progress
*progress
= NULL
;
1753 struct multi_pack_index
*m
= load_multi_pack_index(object_dir
, 1);
1754 verify_midx_error
= 0;
1759 struct strbuf filename
= STRBUF_INIT
;
1761 get_midx_filename(&filename
, object_dir
);
1763 if (!stat(filename
.buf
, &sb
)) {
1764 error(_("multi-pack-index file exists, but failed to parse"));
1767 strbuf_release(&filename
);
1771 if (!midx_checksum_valid(m
))
1772 midx_report(_("incorrect checksum"));
1774 if (flags
& MIDX_PROGRESS
)
1775 progress
= start_delayed_progress(_("Looking for referenced packfiles"),
1777 for (i
= 0; i
< m
->num_packs
; i
++) {
1778 if (prepare_midx_pack(r
, m
, i
))
1779 midx_report("failed to load pack in position %d", i
);
1781 display_progress(progress
, i
+ 1);
1783 stop_progress(&progress
);
1785 for (i
= 0; i
< 255; i
++) {
1786 uint32_t oid_fanout1
= ntohl(m
->chunk_oid_fanout
[i
]);
1787 uint32_t oid_fanout2
= ntohl(m
->chunk_oid_fanout
[i
+ 1]);
1789 if (oid_fanout1
> oid_fanout2
)
1790 midx_report(_("oid fanout out of order: fanout[%d] = %"PRIx32
" > %"PRIx32
" = fanout[%d]"),
1791 i
, oid_fanout1
, oid_fanout2
, i
+ 1);
1794 if (m
->num_objects
== 0) {
1795 midx_report(_("the midx contains no oid"));
1797 * Remaining tests assume that we have objects, so we can
1803 if (flags
& MIDX_PROGRESS
)
1804 progress
= start_sparse_progress(_("Verifying OID order in multi-pack-index"),
1805 m
->num_objects
- 1);
1806 for (i
= 0; i
< m
->num_objects
- 1; i
++) {
1807 struct object_id oid1
, oid2
;
1809 nth_midxed_object_oid(&oid1
, m
, i
);
1810 nth_midxed_object_oid(&oid2
, m
, i
+ 1);
1812 if (oidcmp(&oid1
, &oid2
) >= 0)
1813 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
1814 i
, oid_to_hex(&oid1
), oid_to_hex(&oid2
), i
+ 1);
1816 midx_display_sparse_progress(progress
, i
+ 1);
1818 stop_progress(&progress
);
1821 * Create an array mapping each object to its packfile id. Sort it
1822 * to group the objects by packfile. Use this permutation to visit
1823 * each of the objects and only require 1 packfile to be open at a
1826 ALLOC_ARRAY(pairs
, m
->num_objects
);
1827 for (i
= 0; i
< m
->num_objects
; i
++) {
1829 pairs
[i
].pack_int_id
= nth_midxed_pack_int_id(m
, i
);
1832 if (flags
& MIDX_PROGRESS
)
1833 progress
= start_sparse_progress(_("Sorting objects by packfile"),
1835 display_progress(progress
, 0); /* TODO: Measure QSORT() progress */
1836 QSORT(pairs
, m
->num_objects
, compare_pair_pos_vs_id
);
1837 stop_progress(&progress
);
1839 if (flags
& MIDX_PROGRESS
)
1840 progress
= start_sparse_progress(_("Verifying object offsets"), m
->num_objects
);
1841 for (i
= 0; i
< m
->num_objects
; i
++) {
1842 struct object_id oid
;
1843 struct pack_entry e
;
1844 off_t m_offset
, p_offset
;
1846 if (i
> 0 && pairs
[i
-1].pack_int_id
!= pairs
[i
].pack_int_id
&&
1847 m
->packs
[pairs
[i
-1].pack_int_id
])
1849 close_pack_fd(m
->packs
[pairs
[i
-1].pack_int_id
]);
1850 close_pack_index(m
->packs
[pairs
[i
-1].pack_int_id
]);
1853 nth_midxed_object_oid(&oid
, m
, pairs
[i
].pos
);
1855 if (!fill_midx_entry(r
, &oid
, &e
, m
)) {
1856 midx_report(_("failed to load pack entry for oid[%d] = %s"),
1857 pairs
[i
].pos
, oid_to_hex(&oid
));
1861 if (open_pack_index(e
.p
)) {
1862 midx_report(_("failed to load pack-index for packfile %s"),
1867 m_offset
= e
.offset
;
1868 p_offset
= find_pack_entry_one(oid
.hash
, e
.p
);
1870 if (m_offset
!= p_offset
)
1871 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64
" != %"PRIx64
),
1872 pairs
[i
].pos
, oid_to_hex(&oid
), m_offset
, p_offset
);
1874 midx_display_sparse_progress(progress
, i
+ 1);
1876 stop_progress(&progress
);
1882 return verify_midx_error
;
1885 int expire_midx_packs(struct repository
*r
, const char *object_dir
, unsigned flags
)
1887 uint32_t i
, *count
, result
= 0;
1888 struct string_list packs_to_drop
= STRING_LIST_INIT_DUP
;
1889 struct multi_pack_index
*m
= lookup_multi_pack_index(r
, object_dir
);
1890 struct progress
*progress
= NULL
;
1895 CALLOC_ARRAY(count
, m
->num_packs
);
1897 if (flags
& MIDX_PROGRESS
)
1898 progress
= start_delayed_progress(_("Counting referenced objects"),
1900 for (i
= 0; i
< m
->num_objects
; i
++) {
1901 int pack_int_id
= nth_midxed_pack_int_id(m
, i
);
1902 count
[pack_int_id
]++;
1903 display_progress(progress
, i
+ 1);
1905 stop_progress(&progress
);
1907 if (flags
& MIDX_PROGRESS
)
1908 progress
= start_delayed_progress(_("Finding and deleting unreferenced packfiles"),
1910 for (i
= 0; i
< m
->num_packs
; i
++) {
1912 display_progress(progress
, i
+ 1);
1917 if (prepare_midx_pack(r
, m
, i
))
1920 if (m
->packs
[i
]->pack_keep
|| m
->packs
[i
]->is_cruft
)
1923 pack_name
= xstrdup(m
->packs
[i
]->pack_name
);
1924 close_pack(m
->packs
[i
]);
1926 string_list_insert(&packs_to_drop
, m
->pack_names
[i
]);
1927 unlink_pack_path(pack_name
, 0);
1930 stop_progress(&progress
);
1934 if (packs_to_drop
.nr
)
1935 result
= write_midx_internal(object_dir
, NULL
, &packs_to_drop
, NULL
, NULL
, flags
);
1937 string_list_clear(&packs_to_drop
, 0);
1942 struct repack_info
{
1944 uint32_t referenced_objects
;
1945 uint32_t pack_int_id
;
1948 static int compare_by_mtime(const void *a_
, const void *b_
)
1950 const struct repack_info
*a
, *b
;
1952 a
= (const struct repack_info
*)a_
;
1953 b
= (const struct repack_info
*)b_
;
1955 if (a
->mtime
< b
->mtime
)
1957 if (a
->mtime
> b
->mtime
)
1962 static int fill_included_packs_all(struct repository
*r
,
1963 struct multi_pack_index
*m
,
1964 unsigned char *include_pack
)
1966 uint32_t i
, count
= 0;
1967 int pack_kept_objects
= 0;
1969 repo_config_get_bool(r
, "repack.packkeptobjects", &pack_kept_objects
);
1971 for (i
= 0; i
< m
->num_packs
; i
++) {
1972 if (prepare_midx_pack(r
, m
, i
))
1974 if (!pack_kept_objects
&& m
->packs
[i
]->pack_keep
)
1976 if (m
->packs
[i
]->is_cruft
)
1979 include_pack
[i
] = 1;
1986 static int fill_included_packs_batch(struct repository
*r
,
1987 struct multi_pack_index
*m
,
1988 unsigned char *include_pack
,
1991 uint32_t i
, packs_to_repack
;
1993 struct repack_info
*pack_info
;
1994 int pack_kept_objects
= 0;
1996 CALLOC_ARRAY(pack_info
, m
->num_packs
);
1998 repo_config_get_bool(r
, "repack.packkeptobjects", &pack_kept_objects
);
2000 for (i
= 0; i
< m
->num_packs
; i
++) {
2001 pack_info
[i
].pack_int_id
= i
;
2003 if (prepare_midx_pack(r
, m
, i
))
2006 pack_info
[i
].mtime
= m
->packs
[i
]->mtime
;
2009 for (i
= 0; i
< m
->num_objects
; i
++) {
2010 uint32_t pack_int_id
= nth_midxed_pack_int_id(m
, i
);
2011 pack_info
[pack_int_id
].referenced_objects
++;
2014 QSORT(pack_info
, m
->num_packs
, compare_by_mtime
);
2017 packs_to_repack
= 0;
2018 for (i
= 0; total_size
< batch_size
&& i
< m
->num_packs
; i
++) {
2019 int pack_int_id
= pack_info
[i
].pack_int_id
;
2020 struct packed_git
*p
= m
->packs
[pack_int_id
];
2021 size_t expected_size
;
2025 if (!pack_kept_objects
&& p
->pack_keep
)
2029 if (open_pack_index(p
) || !p
->num_objects
)
2032 expected_size
= st_mult(p
->pack_size
,
2033 pack_info
[i
].referenced_objects
);
2034 expected_size
/= p
->num_objects
;
2036 if (expected_size
>= batch_size
)
2040 total_size
+= expected_size
;
2041 include_pack
[pack_int_id
] = 1;
2046 if (packs_to_repack
< 2)
2052 int midx_repack(struct repository
*r
, const char *object_dir
, size_t batch_size
, unsigned flags
)
2056 unsigned char *include_pack
;
2057 struct child_process cmd
= CHILD_PROCESS_INIT
;
2059 struct strbuf base_name
= STRBUF_INIT
;
2060 struct multi_pack_index
*m
= lookup_multi_pack_index(r
, object_dir
);
2063 * When updating the default for these configuration
2064 * variables in builtin/repack.c, these must be adjusted
2067 int delta_base_offset
= 1;
2068 int use_delta_islands
= 0;
2073 CALLOC_ARRAY(include_pack
, m
->num_packs
);
2076 if (fill_included_packs_batch(r
, m
, include_pack
, batch_size
))
2078 } else if (fill_included_packs_all(r
, m
, include_pack
))
2081 repo_config_get_bool(r
, "repack.usedeltabaseoffset", &delta_base_offset
);
2082 repo_config_get_bool(r
, "repack.usedeltaislands", &use_delta_islands
);
2084 strvec_push(&cmd
.args
, "pack-objects");
2086 strbuf_addstr(&base_name
, object_dir
);
2087 strbuf_addstr(&base_name
, "/pack/pack");
2088 strvec_push(&cmd
.args
, base_name
.buf
);
2090 if (delta_base_offset
)
2091 strvec_push(&cmd
.args
, "--delta-base-offset");
2092 if (use_delta_islands
)
2093 strvec_push(&cmd
.args
, "--delta-islands");
2095 if (flags
& MIDX_PROGRESS
)
2096 strvec_push(&cmd
.args
, "--progress");
2098 strvec_push(&cmd
.args
, "-q");
2100 strbuf_release(&base_name
);
2103 cmd
.in
= cmd
.out
= -1;
2105 if (start_command(&cmd
)) {
2106 error(_("could not start pack-objects"));
2111 cmd_in
= xfdopen(cmd
.in
, "w");
2113 for (i
= 0; i
< m
->num_objects
; i
++) {
2114 struct object_id oid
;
2115 uint32_t pack_int_id
= nth_midxed_pack_int_id(m
, i
);
2117 if (!include_pack
[pack_int_id
])
2120 nth_midxed_object_oid(&oid
, m
, i
);
2121 fprintf(cmd_in
, "%s\n", oid_to_hex(&oid
));
2125 if (finish_command(&cmd
)) {
2126 error(_("could not finish pack-objects"));
2131 result
= write_midx_internal(object_dir
, NULL
, NULL
, NULL
, NULL
, flags
);