Merge branch 'jc/gpg-lazy-init'
[git/debian.git] / midx.c
blob47989f7ea70931f53c00c7b385e66282d9f5e1b4
1 #include "git-compat-util.h"
2 #include "alloc.h"
3 #include "config.h"
4 #include "csum-file.h"
5 #include "dir.h"
6 #include "hex.h"
7 #include "lockfile.h"
8 #include "packfile.h"
9 #include "object-store.h"
10 #include "hash-lookup.h"
11 #include "midx.h"
12 #include "progress.h"
13 #include "trace2.h"
14 #include "run-command.h"
15 #include "repository.h"
16 #include "chunk-format.h"
17 #include "pack.h"
18 #include "pack-bitmap.h"
19 #include "refs.h"
20 #include "revision.h"
21 #include "list-objects.h"
23 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
24 #define MIDX_VERSION 1
25 #define MIDX_BYTE_FILE_VERSION 4
26 #define MIDX_BYTE_HASH_VERSION 5
27 #define MIDX_BYTE_NUM_CHUNKS 6
28 #define MIDX_BYTE_NUM_PACKS 8
29 #define MIDX_HEADER_SIZE 12
30 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
32 #define MIDX_CHUNK_ALIGNMENT 4
33 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
34 #define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
35 #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
36 #define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
37 #define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
38 #define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
39 #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
40 #define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
41 #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
42 #define MIDX_LARGE_OFFSET_NEEDED 0x80000000
44 #define PACK_EXPIRED UINT_MAX
46 const unsigned char *get_midx_checksum(struct multi_pack_index *m)
48 return m->data + m->data_len - the_hash_algo->rawsz;
51 void get_midx_filename(struct strbuf *out, const char *object_dir)
53 strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
56 void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m)
58 get_midx_filename(out, m->object_dir);
59 strbuf_addf(out, "-%s.rev", hash_to_hex(get_midx_checksum(m)));
62 static int midx_read_oid_fanout(const unsigned char *chunk_start,
63 size_t chunk_size, void *data)
65 struct multi_pack_index *m = data;
66 m->chunk_oid_fanout = (uint32_t *)chunk_start;
68 if (chunk_size != 4 * 256) {
69 error(_("multi-pack-index OID fanout is of the wrong size"));
70 return 1;
72 return 0;
75 struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
77 struct multi_pack_index *m = NULL;
78 int fd;
79 struct stat st;
80 size_t midx_size;
81 void *midx_map = NULL;
82 uint32_t hash_version;
83 struct strbuf midx_name = STRBUF_INIT;
84 uint32_t i;
85 const char *cur_pack_name;
86 struct chunkfile *cf = NULL;
88 get_midx_filename(&midx_name, object_dir);
90 fd = git_open(midx_name.buf);
92 if (fd < 0)
93 goto cleanup_fail;
94 if (fstat(fd, &st)) {
95 error_errno(_("failed to read %s"), midx_name.buf);
96 goto cleanup_fail;
99 midx_size = xsize_t(st.st_size);
101 if (midx_size < MIDX_MIN_SIZE) {
102 error(_("multi-pack-index file %s is too small"), midx_name.buf);
103 goto cleanup_fail;
106 strbuf_release(&midx_name);
108 midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
109 close(fd);
111 FLEX_ALLOC_STR(m, object_dir, object_dir);
112 m->data = midx_map;
113 m->data_len = midx_size;
114 m->local = local;
116 m->signature = get_be32(m->data);
117 if (m->signature != MIDX_SIGNATURE)
118 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
119 m->signature, MIDX_SIGNATURE);
121 m->version = m->data[MIDX_BYTE_FILE_VERSION];
122 if (m->version != MIDX_VERSION)
123 die(_("multi-pack-index version %d not recognized"),
124 m->version);
126 hash_version = m->data[MIDX_BYTE_HASH_VERSION];
127 if (hash_version != oid_version(the_hash_algo)) {
128 error(_("multi-pack-index hash version %u does not match version %u"),
129 hash_version, oid_version(the_hash_algo));
130 goto cleanup_fail;
132 m->hash_len = the_hash_algo->rawsz;
134 m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
136 m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
138 cf = init_chunkfile(NULL);
140 if (read_table_of_contents(cf, m->data, midx_size,
141 MIDX_HEADER_SIZE, m->num_chunks))
142 goto cleanup_fail;
144 if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND)
145 die(_("multi-pack-index missing required pack-name chunk"));
146 if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m) == CHUNK_NOT_FOUND)
147 die(_("multi-pack-index missing required OID fanout chunk"));
148 if (pair_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND)
149 die(_("multi-pack-index missing required OID lookup chunk"));
150 if (pair_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND)
151 die(_("multi-pack-index missing required object offsets chunk"));
153 pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);
155 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
156 pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);
158 m->num_objects = ntohl(m->chunk_oid_fanout[255]);
160 CALLOC_ARRAY(m->pack_names, m->num_packs);
161 CALLOC_ARRAY(m->packs, m->num_packs);
163 cur_pack_name = (const char *)m->chunk_pack_names;
164 for (i = 0; i < m->num_packs; i++) {
165 m->pack_names[i] = cur_pack_name;
167 cur_pack_name += strlen(cur_pack_name) + 1;
169 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
170 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
171 m->pack_names[i - 1],
172 m->pack_names[i]);
175 trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
176 trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
178 free_chunkfile(cf);
179 return m;
181 cleanup_fail:
182 free(m);
183 strbuf_release(&midx_name);
184 free_chunkfile(cf);
185 if (midx_map)
186 munmap(midx_map, midx_size);
187 if (0 <= fd)
188 close(fd);
189 return NULL;
192 void close_midx(struct multi_pack_index *m)
194 uint32_t i;
196 if (!m)
197 return;
199 close_midx(m->next);
201 munmap((unsigned char *)m->data, m->data_len);
203 for (i = 0; i < m->num_packs; i++) {
204 if (m->packs[i])
205 m->packs[i]->multi_pack_index = 0;
207 FREE_AND_NULL(m->packs);
208 FREE_AND_NULL(m->pack_names);
209 free(m);
212 int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id)
214 struct strbuf pack_name = STRBUF_INIT;
215 struct packed_git *p;
217 if (pack_int_id >= m->num_packs)
218 die(_("bad pack-int-id: %u (%u total packs)"),
219 pack_int_id, m->num_packs);
221 if (m->packs[pack_int_id])
222 return 0;
224 strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
225 m->pack_names[pack_int_id]);
227 p = add_packed_git(pack_name.buf, pack_name.len, m->local);
228 strbuf_release(&pack_name);
230 if (!p)
231 return 1;
233 p->multi_pack_index = 1;
234 m->packs[pack_int_id] = p;
235 install_packed_git(r, p);
236 list_add_tail(&p->mru, &r->objects->packed_git_mru);
238 return 0;
241 int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result)
243 return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup,
244 the_hash_algo->rawsz, result);
247 struct object_id *nth_midxed_object_oid(struct object_id *oid,
248 struct multi_pack_index *m,
249 uint32_t n)
251 if (n >= m->num_objects)
252 return NULL;
254 oidread(oid, m->chunk_oid_lookup + m->hash_len * n);
255 return oid;
258 off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
260 const unsigned char *offset_data;
261 uint32_t offset32;
263 offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
264 offset32 = get_be32(offset_data + sizeof(uint32_t));
266 if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
267 if (sizeof(off_t) < sizeof(uint64_t))
268 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
270 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
271 return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
274 return offset32;
277 uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
279 return get_be32(m->chunk_object_offsets +
280 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
283 int fill_midx_entry(struct repository *r,
284 const struct object_id *oid,
285 struct pack_entry *e,
286 struct multi_pack_index *m)
288 uint32_t pos;
289 uint32_t pack_int_id;
290 struct packed_git *p;
292 if (!bsearch_midx(oid, m, &pos))
293 return 0;
295 if (pos >= m->num_objects)
296 return 0;
298 pack_int_id = nth_midxed_pack_int_id(m, pos);
300 if (prepare_midx_pack(r, m, pack_int_id))
301 return 0;
302 p = m->packs[pack_int_id];
305 * We are about to tell the caller where they can locate the
306 * requested object. We better make sure the packfile is
307 * still here and can be accessed before supplying that
308 * answer, as it may have been deleted since the MIDX was
309 * loaded!
311 if (!is_pack_valid(p))
312 return 0;
314 if (oidset_size(&p->bad_objects) &&
315 oidset_contains(&p->bad_objects, oid))
316 return 0;
318 e->offset = nth_midxed_offset(m, pos);
319 e->p = p;
321 return 1;
324 /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
325 static int cmp_idx_or_pack_name(const char *idx_or_pack_name,
326 const char *idx_name)
328 /* Skip past any initial matching prefix. */
329 while (*idx_name && *idx_name == *idx_or_pack_name) {
330 idx_name++;
331 idx_or_pack_name++;
335 * If we didn't match completely, we may have matched "pack-1234." and
336 * be left with "idx" and "pack" respectively, which is also OK. We do
337 * not have to check for "idx" and "idx", because that would have been
338 * a complete match (and in that case these strcmps will be false, but
339 * we'll correctly return 0 from the final strcmp() below.
341 * Technically this matches "fooidx" and "foopack", but we'd never have
342 * such names in the first place.
344 if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
345 return 0;
348 * This not only checks for a complete match, but also orders based on
349 * the first non-identical character, which means our ordering will
350 * match a raw strcmp(). That makes it OK to use this to binary search
351 * a naively-sorted list.
353 return strcmp(idx_or_pack_name, idx_name);
356 int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
358 uint32_t first = 0, last = m->num_packs;
360 while (first < last) {
361 uint32_t mid = first + (last - first) / 2;
362 const char *current;
363 int cmp;
365 current = m->pack_names[mid];
366 cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
367 if (!cmp)
368 return 1;
369 if (cmp > 0) {
370 first = mid + 1;
371 continue;
373 last = mid;
376 return 0;
379 int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
381 struct multi_pack_index *m;
382 struct multi_pack_index *m_search;
384 prepare_repo_settings(r);
385 if (!r->settings.core_multi_pack_index)
386 return 0;
388 for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
389 if (!strcmp(object_dir, m_search->object_dir))
390 return 1;
392 m = load_multi_pack_index(object_dir, local);
394 if (m) {
395 struct multi_pack_index *mp = r->objects->multi_pack_index;
396 if (mp) {
397 m->next = mp->next;
398 mp->next = m;
399 } else
400 r->objects->multi_pack_index = m;
401 return 1;
404 return 0;
407 static size_t write_midx_header(struct hashfile *f,
408 unsigned char num_chunks,
409 uint32_t num_packs)
411 hashwrite_be32(f, MIDX_SIGNATURE);
412 hashwrite_u8(f, MIDX_VERSION);
413 hashwrite_u8(f, oid_version(the_hash_algo));
414 hashwrite_u8(f, num_chunks);
415 hashwrite_u8(f, 0); /* unused */
416 hashwrite_be32(f, num_packs);
418 return MIDX_HEADER_SIZE;
421 struct pack_info {
422 uint32_t orig_pack_int_id;
423 char *pack_name;
424 struct packed_git *p;
425 unsigned expired : 1;
428 static int pack_info_compare(const void *_a, const void *_b)
430 struct pack_info *a = (struct pack_info *)_a;
431 struct pack_info *b = (struct pack_info *)_b;
432 return strcmp(a->pack_name, b->pack_name);
435 static int idx_or_pack_name_cmp(const void *_va, const void *_vb)
437 const char *pack_name = _va;
438 const struct pack_info *compar = _vb;
440 return cmp_idx_or_pack_name(pack_name, compar->pack_name);
443 struct write_midx_context {
444 struct pack_info *info;
445 uint32_t nr;
446 uint32_t alloc;
447 struct multi_pack_index *m;
448 struct progress *progress;
449 unsigned pack_paths_checked;
451 struct pack_midx_entry *entries;
452 uint32_t entries_nr;
454 uint32_t *pack_perm;
455 uint32_t *pack_order;
456 unsigned large_offsets_needed:1;
457 uint32_t num_large_offsets;
459 int preferred_pack_idx;
461 struct string_list *to_include;
464 static void add_pack_to_midx(const char *full_path, size_t full_path_len,
465 const char *file_name, void *data)
467 struct write_midx_context *ctx = data;
469 if (ends_with(file_name, ".idx")) {
470 display_progress(ctx->progress, ++ctx->pack_paths_checked);
472 * Note that at most one of ctx->m and ctx->to_include are set,
473 * so we are testing midx_contains_pack() and
474 * string_list_has_string() independently (guarded by the
475 * appropriate NULL checks).
477 * We could support passing to_include while reusing an existing
478 * MIDX, but don't currently since the reuse process drags
479 * forward all packs from an existing MIDX (without checking
480 * whether or not they appear in the to_include list).
482 * If we added support for that, these next two conditional
483 * should be performed independently (likely checking
484 * to_include before the existing MIDX).
486 if (ctx->m && midx_contains_pack(ctx->m, file_name))
487 return;
488 else if (ctx->to_include &&
489 !string_list_has_string(ctx->to_include, file_name))
490 return;
492 ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
494 ctx->info[ctx->nr].p = add_packed_git(full_path,
495 full_path_len,
498 if (!ctx->info[ctx->nr].p) {
499 warning(_("failed to add packfile '%s'"),
500 full_path);
501 return;
504 if (open_pack_index(ctx->info[ctx->nr].p)) {
505 warning(_("failed to open pack-index '%s'"),
506 full_path);
507 close_pack(ctx->info[ctx->nr].p);
508 FREE_AND_NULL(ctx->info[ctx->nr].p);
509 return;
512 ctx->info[ctx->nr].pack_name = xstrdup(file_name);
513 ctx->info[ctx->nr].orig_pack_int_id = ctx->nr;
514 ctx->info[ctx->nr].expired = 0;
515 ctx->nr++;
519 struct pack_midx_entry {
520 struct object_id oid;
521 uint32_t pack_int_id;
522 time_t pack_mtime;
523 uint64_t offset;
524 unsigned preferred : 1;
527 static int midx_oid_compare(const void *_a, const void *_b)
529 const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
530 const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
531 int cmp = oidcmp(&a->oid, &b->oid);
533 if (cmp)
534 return cmp;
536 /* Sort objects in a preferred pack first when multiple copies exist. */
537 if (a->preferred > b->preferred)
538 return -1;
539 if (a->preferred < b->preferred)
540 return 1;
542 if (a->pack_mtime > b->pack_mtime)
543 return -1;
544 else if (a->pack_mtime < b->pack_mtime)
545 return 1;
547 return a->pack_int_id - b->pack_int_id;
550 static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
551 struct pack_midx_entry *e,
552 uint32_t pos)
554 if (pos >= m->num_objects)
555 return 1;
557 nth_midxed_object_oid(&e->oid, m, pos);
558 e->pack_int_id = nth_midxed_pack_int_id(m, pos);
559 e->offset = nth_midxed_offset(m, pos);
561 /* consider objects in midx to be from "old" packs */
562 e->pack_mtime = 0;
563 return 0;
566 static void fill_pack_entry(uint32_t pack_int_id,
567 struct packed_git *p,
568 uint32_t cur_object,
569 struct pack_midx_entry *entry,
570 int preferred)
572 if (nth_packed_object_id(&entry->oid, p, cur_object) < 0)
573 die(_("failed to locate object %d in packfile"), cur_object);
575 entry->pack_int_id = pack_int_id;
576 entry->pack_mtime = p->mtime;
578 entry->offset = nth_packed_object_offset(p, cur_object);
579 entry->preferred = !!preferred;
582 struct midx_fanout {
583 struct pack_midx_entry *entries;
584 uint32_t nr;
585 uint32_t alloc;
588 static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr)
590 ALLOC_GROW(fanout->entries, nr, fanout->alloc);
593 static void midx_fanout_sort(struct midx_fanout *fanout)
595 QSORT(fanout->entries, fanout->nr, midx_oid_compare);
598 static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout,
599 struct multi_pack_index *m,
600 uint32_t cur_fanout,
601 int preferred_pack)
603 uint32_t start = 0, end;
604 uint32_t cur_object;
606 if (cur_fanout)
607 start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]);
608 end = ntohl(m->chunk_oid_fanout[cur_fanout]);
610 for (cur_object = start; cur_object < end; cur_object++) {
611 if ((preferred_pack > -1) &&
612 (preferred_pack == nth_midxed_pack_int_id(m, cur_object))) {
614 * Objects from preferred packs are added
615 * separately.
617 continue;
620 midx_fanout_grow(fanout, fanout->nr + 1);
621 nth_midxed_pack_midx_entry(m,
622 &fanout->entries[fanout->nr],
623 cur_object);
624 fanout->entries[fanout->nr].preferred = 0;
625 fanout->nr++;
629 static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
630 struct pack_info *info,
631 uint32_t cur_pack,
632 int preferred,
633 uint32_t cur_fanout)
635 struct packed_git *pack = info[cur_pack].p;
636 uint32_t start = 0, end;
637 uint32_t cur_object;
639 if (cur_fanout)
640 start = get_pack_fanout(pack, cur_fanout - 1);
641 end = get_pack_fanout(pack, cur_fanout);
643 for (cur_object = start; cur_object < end; cur_object++) {
644 midx_fanout_grow(fanout, fanout->nr + 1);
645 fill_pack_entry(cur_pack,
646 info[cur_pack].p,
647 cur_object,
648 &fanout->entries[fanout->nr],
649 preferred);
650 fanout->nr++;
655 * It is possible to artificially get into a state where there are many
656 * duplicate copies of objects. That can create high memory pressure if
657 * we are to create a list of all objects before de-duplication. To reduce
658 * this memory pressure without a significant performance drop, automatically
659 * group objects by the first byte of their object id. Use the IDX fanout
660 * tables to group the data, copy to a local array, then sort.
662 * Copy only the de-duplicated entries (selected by most-recent modified time
663 * of a packfile containing the object).
665 static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
666 struct pack_info *info,
667 uint32_t nr_packs,
668 uint32_t *nr_objects,
669 int preferred_pack)
671 uint32_t cur_fanout, cur_pack, cur_object;
672 uint32_t alloc_objects, total_objects = 0;
673 struct midx_fanout fanout = { 0 };
674 struct pack_midx_entry *deduplicated_entries = NULL;
675 uint32_t start_pack = m ? m->num_packs : 0;
677 for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
678 total_objects += info[cur_pack].p->num_objects;
681 * As we de-duplicate by fanout value, we expect the fanout
682 * slices to be evenly distributed, with some noise. Hence,
683 * allocate slightly more than one 256th.
685 alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
687 ALLOC_ARRAY(fanout.entries, fanout.alloc);
688 ALLOC_ARRAY(deduplicated_entries, alloc_objects);
689 *nr_objects = 0;
691 for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
692 fanout.nr = 0;
694 if (m)
695 midx_fanout_add_midx_fanout(&fanout, m, cur_fanout,
696 preferred_pack);
698 for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
699 int preferred = cur_pack == preferred_pack;
700 midx_fanout_add_pack_fanout(&fanout,
701 info, cur_pack,
702 preferred, cur_fanout);
705 if (-1 < preferred_pack && preferred_pack < start_pack)
706 midx_fanout_add_pack_fanout(&fanout, info,
707 preferred_pack, 1,
708 cur_fanout);
710 midx_fanout_sort(&fanout);
713 * The batch is now sorted by OID and then mtime (descending).
714 * Take only the first duplicate.
716 for (cur_object = 0; cur_object < fanout.nr; cur_object++) {
717 if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
718 &fanout.entries[cur_object].oid))
719 continue;
721 ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
722 memcpy(&deduplicated_entries[*nr_objects],
723 &fanout.entries[cur_object],
724 sizeof(struct pack_midx_entry));
725 (*nr_objects)++;
729 free(fanout.entries);
730 return deduplicated_entries;
733 static int write_midx_pack_names(struct hashfile *f, void *data)
735 struct write_midx_context *ctx = data;
736 uint32_t i;
737 unsigned char padding[MIDX_CHUNK_ALIGNMENT];
738 size_t written = 0;
740 for (i = 0; i < ctx->nr; i++) {
741 size_t writelen;
743 if (ctx->info[i].expired)
744 continue;
746 if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0)
747 BUG("incorrect pack-file order: %s before %s",
748 ctx->info[i - 1].pack_name,
749 ctx->info[i].pack_name);
751 writelen = strlen(ctx->info[i].pack_name) + 1;
752 hashwrite(f, ctx->info[i].pack_name, writelen);
753 written += writelen;
756 /* add padding to be aligned */
757 i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
758 if (i < MIDX_CHUNK_ALIGNMENT) {
759 memset(padding, 0, sizeof(padding));
760 hashwrite(f, padding, i);
763 return 0;
766 static int write_midx_oid_fanout(struct hashfile *f,
767 void *data)
769 struct write_midx_context *ctx = data;
770 struct pack_midx_entry *list = ctx->entries;
771 struct pack_midx_entry *last = ctx->entries + ctx->entries_nr;
772 uint32_t count = 0;
773 uint32_t i;
776 * Write the first-level table (the list is sorted,
777 * but we use a 256-entry lookup to be able to avoid
778 * having to do eight extra binary search iterations).
780 for (i = 0; i < 256; i++) {
781 struct pack_midx_entry *next = list;
783 while (next < last && next->oid.hash[0] == i) {
784 count++;
785 next++;
788 hashwrite_be32(f, count);
789 list = next;
792 return 0;
795 static int write_midx_oid_lookup(struct hashfile *f,
796 void *data)
798 struct write_midx_context *ctx = data;
799 unsigned char hash_len = the_hash_algo->rawsz;
800 struct pack_midx_entry *list = ctx->entries;
801 uint32_t i;
803 for (i = 0; i < ctx->entries_nr; i++) {
804 struct pack_midx_entry *obj = list++;
806 if (i < ctx->entries_nr - 1) {
807 struct pack_midx_entry *next = list;
808 if (oidcmp(&obj->oid, &next->oid) >= 0)
809 BUG("OIDs not in order: %s >= %s",
810 oid_to_hex(&obj->oid),
811 oid_to_hex(&next->oid));
814 hashwrite(f, obj->oid.hash, (int)hash_len);
817 return 0;
820 static int write_midx_object_offsets(struct hashfile *f,
821 void *data)
823 struct write_midx_context *ctx = data;
824 struct pack_midx_entry *list = ctx->entries;
825 uint32_t i, nr_large_offset = 0;
827 for (i = 0; i < ctx->entries_nr; i++) {
828 struct pack_midx_entry *obj = list++;
830 if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED)
831 BUG("object %s is in an expired pack with int-id %d",
832 oid_to_hex(&obj->oid),
833 obj->pack_int_id);
835 hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]);
837 if (ctx->large_offsets_needed && obj->offset >> 31)
838 hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
839 else if (!ctx->large_offsets_needed && obj->offset >> 32)
840 BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!",
841 oid_to_hex(&obj->oid),
842 obj->offset);
843 else
844 hashwrite_be32(f, (uint32_t)obj->offset);
847 return 0;
850 static int write_midx_large_offsets(struct hashfile *f,
851 void *data)
853 struct write_midx_context *ctx = data;
854 struct pack_midx_entry *list = ctx->entries;
855 struct pack_midx_entry *end = ctx->entries + ctx->entries_nr;
856 uint32_t nr_large_offset = ctx->num_large_offsets;
858 while (nr_large_offset) {
859 struct pack_midx_entry *obj;
860 uint64_t offset;
862 if (list >= end)
863 BUG("too many large-offset objects");
865 obj = list++;
866 offset = obj->offset;
868 if (!(offset >> 31))
869 continue;
871 hashwrite_be64(f, offset);
873 nr_large_offset--;
876 return 0;
879 static int write_midx_revindex(struct hashfile *f,
880 void *data)
882 struct write_midx_context *ctx = data;
883 uint32_t i;
885 for (i = 0; i < ctx->entries_nr; i++)
886 hashwrite_be32(f, ctx->pack_order[i]);
888 return 0;
891 struct midx_pack_order_data {
892 uint32_t nr;
893 uint32_t pack;
894 off_t offset;
897 static int midx_pack_order_cmp(const void *va, const void *vb)
899 const struct midx_pack_order_data *a = va, *b = vb;
900 if (a->pack < b->pack)
901 return -1;
902 else if (a->pack > b->pack)
903 return 1;
904 else if (a->offset < b->offset)
905 return -1;
906 else if (a->offset > b->offset)
907 return 1;
908 else
909 return 0;
912 static uint32_t *midx_pack_order(struct write_midx_context *ctx)
914 struct midx_pack_order_data *data;
915 uint32_t *pack_order;
916 uint32_t i;
918 trace2_region_enter("midx", "midx_pack_order", the_repository);
920 ALLOC_ARRAY(data, ctx->entries_nr);
921 for (i = 0; i < ctx->entries_nr; i++) {
922 struct pack_midx_entry *e = &ctx->entries[i];
923 data[i].nr = i;
924 data[i].pack = ctx->pack_perm[e->pack_int_id];
925 if (!e->preferred)
926 data[i].pack |= (1U << 31);
927 data[i].offset = e->offset;
930 QSORT(data, ctx->entries_nr, midx_pack_order_cmp);
932 ALLOC_ARRAY(pack_order, ctx->entries_nr);
933 for (i = 0; i < ctx->entries_nr; i++)
934 pack_order[i] = data[i].nr;
935 free(data);
937 trace2_region_leave("midx", "midx_pack_order", the_repository);
939 return pack_order;
942 static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash,
943 struct write_midx_context *ctx)
945 struct strbuf buf = STRBUF_INIT;
946 const char *tmp_file;
948 trace2_region_enter("midx", "write_midx_reverse_index", the_repository);
950 strbuf_addf(&buf, "%s-%s.rev", midx_name, hash_to_hex(midx_hash));
952 tmp_file = write_rev_file_order(NULL, ctx->pack_order, ctx->entries_nr,
953 midx_hash, WRITE_REV);
955 if (finalize_object_file(tmp_file, buf.buf))
956 die(_("cannot store reverse index file"));
958 strbuf_release(&buf);
960 trace2_region_leave("midx", "write_midx_reverse_index", the_repository);
963 static void clear_midx_files_ext(const char *object_dir, const char *ext,
964 unsigned char *keep_hash);
966 static int midx_checksum_valid(struct multi_pack_index *m)
968 return hashfile_checksum_valid(m->data, m->data_len);
971 static void prepare_midx_packing_data(struct packing_data *pdata,
972 struct write_midx_context *ctx)
974 uint32_t i;
976 trace2_region_enter("midx", "prepare_midx_packing_data", the_repository);
978 memset(pdata, 0, sizeof(struct packing_data));
979 prepare_packing_data(the_repository, pdata);
981 for (i = 0; i < ctx->entries_nr; i++) {
982 struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]];
983 struct object_entry *to = packlist_alloc(pdata, &from->oid);
985 oe_set_in_pack(pdata, to,
986 ctx->info[ctx->pack_perm[from->pack_int_id]].p);
989 trace2_region_leave("midx", "prepare_midx_packing_data", the_repository);
992 static int add_ref_to_pending(const char *refname,
993 const struct object_id *oid,
994 int flag, void *cb_data)
996 struct rev_info *revs = (struct rev_info*)cb_data;
997 struct object_id peeled;
998 struct object *object;
1000 if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) {
1001 warning("symbolic ref is dangling: %s", refname);
1002 return 0;
1005 if (!peel_iterated_oid(oid, &peeled))
1006 oid = &peeled;
1008 object = parse_object_or_die(oid, refname);
1009 if (object->type != OBJ_COMMIT)
1010 return 0;
1012 add_pending_object(revs, object, "");
1013 if (bitmap_is_preferred_refname(revs->repo, refname))
1014 object->flags |= NEEDS_BITMAP;
1015 return 0;
1018 struct bitmap_commit_cb {
1019 struct commit **commits;
1020 size_t commits_nr, commits_alloc;
1022 struct write_midx_context *ctx;
1025 static const struct object_id *bitmap_oid_access(size_t index,
1026 const void *_entries)
1028 const struct pack_midx_entry *entries = _entries;
1029 return &entries[index].oid;
1032 static void bitmap_show_commit(struct commit *commit, void *_data)
1034 struct bitmap_commit_cb *data = _data;
1035 int pos = oid_pos(&commit->object.oid, data->ctx->entries,
1036 data->ctx->entries_nr,
1037 bitmap_oid_access);
1038 if (pos < 0)
1039 return;
1041 ALLOC_GROW(data->commits, data->commits_nr + 1, data->commits_alloc);
1042 data->commits[data->commits_nr++] = commit;
1045 static int read_refs_snapshot(const char *refs_snapshot,
1046 struct rev_info *revs)
1048 struct strbuf buf = STRBUF_INIT;
1049 struct object_id oid;
1050 FILE *f = xfopen(refs_snapshot, "r");
1052 while (strbuf_getline(&buf, f) != EOF) {
1053 struct object *object;
1054 int preferred = 0;
1055 char *hex = buf.buf;
1056 const char *end = NULL;
1058 if (buf.len && *buf.buf == '+') {
1059 preferred = 1;
1060 hex = &buf.buf[1];
1063 if (parse_oid_hex(hex, &oid, &end) < 0)
1064 die(_("could not parse line: %s"), buf.buf);
1065 if (*end)
1066 die(_("malformed line: %s"), buf.buf);
1068 object = parse_object_or_die(&oid, NULL);
1069 if (preferred)
1070 object->flags |= NEEDS_BITMAP;
1072 add_pending_object(revs, object, "");
1075 fclose(f);
1076 strbuf_release(&buf);
1077 return 0;
1080 static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p,
1081 const char *refs_snapshot,
1082 struct write_midx_context *ctx)
1084 struct rev_info revs;
1085 struct bitmap_commit_cb cb = {0};
1087 trace2_region_enter("midx", "find_commits_for_midx_bitmap",
1088 the_repository);
1090 cb.ctx = ctx;
1092 repo_init_revisions(the_repository, &revs, NULL);
1093 if (refs_snapshot) {
1094 read_refs_snapshot(refs_snapshot, &revs);
1095 } else {
1096 setup_revisions(0, NULL, &revs, NULL);
1097 for_each_ref(add_ref_to_pending, &revs);
1101 * Skipping promisor objects here is intentional, since it only excludes
1102 * them from the list of reachable commits that we want to select from
1103 * when computing the selection of MIDX'd commits to receive bitmaps.
1105 * Reachability bitmaps do require that their objects be closed under
1106 * reachability, but fetching any objects missing from promisors at this
1107 * point is too late. But, if one of those objects can be reached from
1108 * an another object that is included in the bitmap, then we will
1109 * complain later that we don't have reachability closure (and fail
1110 * appropriately).
1112 fetch_if_missing = 0;
1113 revs.exclude_promisor_objects = 1;
1115 if (prepare_revision_walk(&revs))
1116 die(_("revision walk setup failed"));
1118 traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb);
1119 if (indexed_commits_nr_p)
1120 *indexed_commits_nr_p = cb.commits_nr;
1122 release_revisions(&revs);
1124 trace2_region_leave("midx", "find_commits_for_midx_bitmap",
1125 the_repository);
1127 return cb.commits;
1130 static int write_midx_bitmap(const char *midx_name,
1131 const unsigned char *midx_hash,
1132 struct packing_data *pdata,
1133 struct commit **commits,
1134 uint32_t commits_nr,
1135 uint32_t *pack_order,
1136 unsigned flags)
1138 int ret, i;
1139 uint16_t options = 0;
1140 struct pack_idx_entry **index;
1141 char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name,
1142 hash_to_hex(midx_hash));
1144 trace2_region_enter("midx", "write_midx_bitmap", the_repository);
1146 if (flags & MIDX_WRITE_BITMAP_HASH_CACHE)
1147 options |= BITMAP_OPT_HASH_CACHE;
1149 if (flags & MIDX_WRITE_BITMAP_LOOKUP_TABLE)
1150 options |= BITMAP_OPT_LOOKUP_TABLE;
1153 * Build the MIDX-order index based on pdata.objects (which is already
1154 * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of
1155 * this order).
1157 ALLOC_ARRAY(index, pdata->nr_objects);
1158 for (i = 0; i < pdata->nr_objects; i++)
1159 index[i] = &pdata->objects[i].idx;
1161 bitmap_writer_show_progress(flags & MIDX_PROGRESS);
1162 bitmap_writer_build_type_index(pdata, index, pdata->nr_objects);
1165 * bitmap_writer_finish expects objects in lex order, but pack_order
1166 * gives us exactly that. use it directly instead of re-sorting the
1167 * array.
1169 * This changes the order of objects in 'index' between
1170 * bitmap_writer_build_type_index and bitmap_writer_finish.
1172 * The same re-ordering takes place in the single-pack bitmap code via
1173 * write_idx_file(), which is called by finish_tmp_packfile(), which
1174 * happens between bitmap_writer_build_type_index() and
1175 * bitmap_writer_finish().
1177 for (i = 0; i < pdata->nr_objects; i++)
1178 index[pack_order[i]] = &pdata->objects[i].idx;
1180 bitmap_writer_select_commits(commits, commits_nr, -1);
1181 ret = bitmap_writer_build(pdata);
1182 if (ret < 0)
1183 goto cleanup;
1185 bitmap_writer_set_checksum(midx_hash);
1186 bitmap_writer_finish(index, pdata->nr_objects, bitmap_name, options);
1188 cleanup:
1189 free(index);
1190 free(bitmap_name);
1192 trace2_region_leave("midx", "write_midx_bitmap", the_repository);
1194 return ret;
1197 static struct multi_pack_index *lookup_multi_pack_index(struct repository *r,
1198 const char *object_dir)
1200 struct multi_pack_index *result = NULL;
1201 struct multi_pack_index *cur;
1202 char *obj_dir_real = real_pathdup(object_dir, 1);
1203 struct strbuf cur_path_real = STRBUF_INIT;
1205 /* Ensure the given object_dir is local, or a known alternate. */
1206 find_odb(r, obj_dir_real);
1208 for (cur = get_multi_pack_index(r); cur; cur = cur->next) {
1209 strbuf_realpath(&cur_path_real, cur->object_dir, 1);
1210 if (!strcmp(obj_dir_real, cur_path_real.buf)) {
1211 result = cur;
1212 goto cleanup;
1216 cleanup:
1217 free(obj_dir_real);
1218 strbuf_release(&cur_path_real);
1219 return result;
1222 static int write_midx_internal(const char *object_dir,
1223 struct string_list *packs_to_include,
1224 struct string_list *packs_to_drop,
1225 const char *preferred_pack_name,
1226 const char *refs_snapshot,
1227 unsigned flags)
1229 struct strbuf midx_name = STRBUF_INIT;
1230 unsigned char midx_hash[GIT_MAX_RAWSZ];
1231 uint32_t i;
1232 struct hashfile *f = NULL;
1233 struct lock_file lk;
1234 struct write_midx_context ctx = { 0 };
1235 int pack_name_concat_len = 0;
1236 int dropped_packs = 0;
1237 int result = 0;
1238 struct chunkfile *cf;
1240 trace2_region_enter("midx", "write_midx_internal", the_repository);
1242 get_midx_filename(&midx_name, object_dir);
1243 if (safe_create_leading_directories(midx_name.buf))
1244 die_errno(_("unable to create leading directories of %s"),
1245 midx_name.buf);
1247 if (!packs_to_include) {
1249 * Only reference an existing MIDX when not filtering which
1250 * packs to include, since all packs and objects are copied
1251 * blindly from an existing MIDX if one is present.
1253 ctx.m = lookup_multi_pack_index(the_repository, object_dir);
1256 if (ctx.m && !midx_checksum_valid(ctx.m)) {
1257 warning(_("ignoring existing multi-pack-index; checksum mismatch"));
1258 ctx.m = NULL;
1261 ctx.nr = 0;
1262 ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
1263 ctx.info = NULL;
1264 ALLOC_ARRAY(ctx.info, ctx.alloc);
1266 if (ctx.m) {
1267 for (i = 0; i < ctx.m->num_packs; i++) {
1268 ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc);
1270 ctx.info[ctx.nr].orig_pack_int_id = i;
1271 ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]);
1272 ctx.info[ctx.nr].p = ctx.m->packs[i];
1273 ctx.info[ctx.nr].expired = 0;
1275 if (flags & MIDX_WRITE_REV_INDEX) {
1277 * If generating a reverse index, need to have
1278 * packed_git's loaded to compare their
1279 * mtimes and object count.
1281 if (prepare_midx_pack(the_repository, ctx.m, i)) {
1282 error(_("could not load pack"));
1283 result = 1;
1284 goto cleanup;
1287 if (open_pack_index(ctx.m->packs[i]))
1288 die(_("could not open index for %s"),
1289 ctx.m->packs[i]->pack_name);
1290 ctx.info[ctx.nr].p = ctx.m->packs[i];
1293 ctx.nr++;
1297 ctx.pack_paths_checked = 0;
1298 if (flags & MIDX_PROGRESS)
1299 ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0);
1300 else
1301 ctx.progress = NULL;
1303 ctx.to_include = packs_to_include;
1305 for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
1306 stop_progress(&ctx.progress);
1308 if ((ctx.m && ctx.nr == ctx.m->num_packs) &&
1309 !(packs_to_include || packs_to_drop)) {
1310 struct bitmap_index *bitmap_git;
1311 int bitmap_exists;
1312 int want_bitmap = flags & MIDX_WRITE_BITMAP;
1314 bitmap_git = prepare_midx_bitmap_git(ctx.m);
1315 bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
1316 free_bitmap_index(bitmap_git);
1318 if (bitmap_exists || !want_bitmap) {
1320 * The correct MIDX already exists, and so does a
1321 * corresponding bitmap (or one wasn't requested).
1323 if (!want_bitmap)
1324 clear_midx_files_ext(object_dir, ".bitmap",
1325 NULL);
1326 goto cleanup;
1330 if (preferred_pack_name) {
1331 int found = 0;
1332 for (i = 0; i < ctx.nr; i++) {
1333 if (!cmp_idx_or_pack_name(preferred_pack_name,
1334 ctx.info[i].pack_name)) {
1335 ctx.preferred_pack_idx = i;
1336 found = 1;
1337 break;
1341 if (!found)
1342 warning(_("unknown preferred pack: '%s'"),
1343 preferred_pack_name);
1344 } else if (ctx.nr &&
1345 (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) {
1346 struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p;
1347 ctx.preferred_pack_idx = 0;
1349 if (packs_to_drop && packs_to_drop->nr)
1350 BUG("cannot write a MIDX bitmap during expiration");
1353 * set a preferred pack when writing a bitmap to ensure that
1354 * the pack from which the first object is selected in pseudo
1355 * pack-order has all of its objects selected from that pack
1356 * (and not another pack containing a duplicate)
1358 for (i = 1; i < ctx.nr; i++) {
1359 struct packed_git *p = ctx.info[i].p;
1361 if (!oldest->num_objects || p->mtime < oldest->mtime) {
1362 oldest = p;
1363 ctx.preferred_pack_idx = i;
1367 if (!oldest->num_objects) {
1369 * If all packs are empty; unset the preferred index.
1370 * This is acceptable since there will be no duplicate
1371 * objects to resolve, so the preferred value doesn't
1372 * matter.
1374 ctx.preferred_pack_idx = -1;
1376 } else {
1378 * otherwise don't mark any pack as preferred to avoid
1379 * interfering with expiration logic below
1381 ctx.preferred_pack_idx = -1;
1384 if (ctx.preferred_pack_idx > -1) {
1385 struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p;
1386 if (!preferred->num_objects) {
1387 error(_("cannot select preferred pack %s with no objects"),
1388 preferred->pack_name);
1389 result = 1;
1390 goto cleanup;
1394 ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
1395 ctx.preferred_pack_idx);
1397 ctx.large_offsets_needed = 0;
1398 for (i = 0; i < ctx.entries_nr; i++) {
1399 if (ctx.entries[i].offset > 0x7fffffff)
1400 ctx.num_large_offsets++;
1401 if (ctx.entries[i].offset > 0xffffffff)
1402 ctx.large_offsets_needed = 1;
1405 QSORT(ctx.info, ctx.nr, pack_info_compare);
1407 if (packs_to_drop && packs_to_drop->nr) {
1408 int drop_index = 0;
1409 int missing_drops = 0;
1411 for (i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) {
1412 int cmp = strcmp(ctx.info[i].pack_name,
1413 packs_to_drop->items[drop_index].string);
1415 if (!cmp) {
1416 drop_index++;
1417 ctx.info[i].expired = 1;
1418 } else if (cmp > 0) {
1419 error(_("did not see pack-file %s to drop"),
1420 packs_to_drop->items[drop_index].string);
1421 drop_index++;
1422 missing_drops++;
1423 i--;
1424 } else {
1425 ctx.info[i].expired = 0;
1429 if (missing_drops) {
1430 result = 1;
1431 goto cleanup;
1436 * pack_perm stores a permutation between pack-int-ids from the
1437 * previous multi-pack-index to the new one we are writing:
1439 * pack_perm[old_id] = new_id
1441 ALLOC_ARRAY(ctx.pack_perm, ctx.nr);
1442 for (i = 0; i < ctx.nr; i++) {
1443 if (ctx.info[i].expired) {
1444 dropped_packs++;
1445 ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED;
1446 } else {
1447 ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs;
1451 for (i = 0; i < ctx.nr; i++) {
1452 if (!ctx.info[i].expired)
1453 pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
1456 /* Check that the preferred pack wasn't expired (if given). */
1457 if (preferred_pack_name) {
1458 struct pack_info *preferred = bsearch(preferred_pack_name,
1459 ctx.info, ctx.nr,
1460 sizeof(*ctx.info),
1461 idx_or_pack_name_cmp);
1462 if (preferred) {
1463 uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
1464 if (perm == PACK_EXPIRED)
1465 warning(_("preferred pack '%s' is expired"),
1466 preferred_pack_name);
1470 if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
1471 pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
1472 (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
1474 hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR);
1475 f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk));
1477 if (ctx.nr - dropped_packs == 0) {
1478 error(_("no pack files to index."));
1479 result = 1;
1480 goto cleanup;
1483 if (!ctx.entries_nr) {
1484 if (flags & MIDX_WRITE_BITMAP)
1485 warning(_("refusing to write multi-pack .bitmap without any objects"));
1486 flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP);
1489 cf = init_chunkfile(f);
1491 add_chunk(cf, MIDX_CHUNKID_PACKNAMES, pack_name_concat_len,
1492 write_midx_pack_names);
1493 add_chunk(cf, MIDX_CHUNKID_OIDFANOUT, MIDX_CHUNK_FANOUT_SIZE,
1494 write_midx_oid_fanout);
1495 add_chunk(cf, MIDX_CHUNKID_OIDLOOKUP,
1496 (size_t)ctx.entries_nr * the_hash_algo->rawsz,
1497 write_midx_oid_lookup);
1498 add_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS,
1499 (size_t)ctx.entries_nr * MIDX_CHUNK_OFFSET_WIDTH,
1500 write_midx_object_offsets);
1502 if (ctx.large_offsets_needed)
1503 add_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS,
1504 (size_t)ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH,
1505 write_midx_large_offsets);
1507 if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
1508 ctx.pack_order = midx_pack_order(&ctx);
1509 add_chunk(cf, MIDX_CHUNKID_REVINDEX,
1510 ctx.entries_nr * sizeof(uint32_t),
1511 write_midx_revindex);
1514 write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
1515 write_chunkfile(cf, &ctx);
1517 finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
1518 CSUM_FSYNC | CSUM_HASH_IN_STREAM);
1519 free_chunkfile(cf);
1521 if (flags & MIDX_WRITE_REV_INDEX &&
1522 git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
1523 write_midx_reverse_index(midx_name.buf, midx_hash, &ctx);
1525 if (flags & MIDX_WRITE_BITMAP) {
1526 struct packing_data pdata;
1527 struct commit **commits;
1528 uint32_t commits_nr;
1530 if (!ctx.entries_nr)
1531 BUG("cannot write a bitmap without any objects");
1533 prepare_midx_packing_data(&pdata, &ctx);
1535 commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx);
1538 * The previous steps translated the information from
1539 * 'entries' into information suitable for constructing
1540 * bitmaps. We no longer need that array, so clear it to
1541 * reduce memory pressure.
1543 FREE_AND_NULL(ctx.entries);
1544 ctx.entries_nr = 0;
1546 if (write_midx_bitmap(midx_name.buf, midx_hash, &pdata,
1547 commits, commits_nr, ctx.pack_order,
1548 flags) < 0) {
1549 error(_("could not write multi-pack bitmap"));
1550 result = 1;
1551 goto cleanup;
1555 * NOTE: Do not use ctx.entries beyond this point, since it might
1556 * have been freed in the previous if block.
1559 if (ctx.m)
1560 close_object_store(the_repository->objects);
1562 if (commit_lock_file(&lk) < 0)
1563 die_errno(_("could not write multi-pack-index"));
1565 clear_midx_files_ext(object_dir, ".bitmap", midx_hash);
1566 clear_midx_files_ext(object_dir, ".rev", midx_hash);
1568 cleanup:
1569 for (i = 0; i < ctx.nr; i++) {
1570 if (ctx.info[i].p) {
1571 close_pack(ctx.info[i].p);
1572 free(ctx.info[i].p);
1574 free(ctx.info[i].pack_name);
1577 free(ctx.info);
1578 free(ctx.entries);
1579 free(ctx.pack_perm);
1580 free(ctx.pack_order);
1581 strbuf_release(&midx_name);
1583 trace2_region_leave("midx", "write_midx_internal", the_repository);
1585 return result;
1588 int write_midx_file(const char *object_dir,
1589 const char *preferred_pack_name,
1590 const char *refs_snapshot,
1591 unsigned flags)
1593 return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name,
1594 refs_snapshot, flags);
1597 int write_midx_file_only(const char *object_dir,
1598 struct string_list *packs_to_include,
1599 const char *preferred_pack_name,
1600 const char *refs_snapshot,
1601 unsigned flags)
1603 return write_midx_internal(object_dir, packs_to_include, NULL,
1604 preferred_pack_name, refs_snapshot, flags);
1607 struct clear_midx_data {
1608 char *keep;
1609 const char *ext;
1612 static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
1613 const char *file_name, void *_data)
1615 struct clear_midx_data *data = _data;
1617 if (!(starts_with(file_name, "multi-pack-index-") &&
1618 ends_with(file_name, data->ext)))
1619 return;
1620 if (data->keep && !strcmp(data->keep, file_name))
1621 return;
1623 if (unlink(full_path))
1624 die_errno(_("failed to remove %s"), full_path);
1627 static void clear_midx_files_ext(const char *object_dir, const char *ext,
1628 unsigned char *keep_hash)
1630 struct clear_midx_data data;
1631 memset(&data, 0, sizeof(struct clear_midx_data));
1633 if (keep_hash)
1634 data.keep = xstrfmt("multi-pack-index-%s%s",
1635 hash_to_hex(keep_hash), ext);
1636 data.ext = ext;
1638 for_each_file_in_pack_dir(object_dir,
1639 clear_midx_file_ext,
1640 &data);
1642 free(data.keep);
1645 void clear_midx_file(struct repository *r)
1647 struct strbuf midx = STRBUF_INIT;
1649 get_midx_filename(&midx, r->objects->odb->path);
1651 if (r->objects && r->objects->multi_pack_index) {
1652 close_midx(r->objects->multi_pack_index);
1653 r->objects->multi_pack_index = NULL;
1656 if (remove_path(midx.buf))
1657 die(_("failed to clear multi-pack-index at %s"), midx.buf);
1659 clear_midx_files_ext(r->objects->odb->path, ".bitmap", NULL);
1660 clear_midx_files_ext(r->objects->odb->path, ".rev", NULL);
1662 strbuf_release(&midx);
1665 static int verify_midx_error;
1667 __attribute__((format (printf, 1, 2)))
1668 static void midx_report(const char *fmt, ...)
1670 va_list ap;
1671 verify_midx_error = 1;
1672 va_start(ap, fmt);
1673 vfprintf(stderr, fmt, ap);
1674 fprintf(stderr, "\n");
1675 va_end(ap);
1678 struct pair_pos_vs_id
1680 uint32_t pos;
1681 uint32_t pack_int_id;
1684 static int compare_pair_pos_vs_id(const void *_a, const void *_b)
1686 struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
1687 struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
1689 return b->pack_int_id - a->pack_int_id;
1693 * Limit calls to display_progress() for performance reasons.
1694 * The interval here was arbitrarily chosen.
1696 #define SPARSE_PROGRESS_INTERVAL (1 << 12)
1697 #define midx_display_sparse_progress(progress, n) \
1698 do { \
1699 uint64_t _n = (n); \
1700 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
1701 display_progress(progress, _n); \
1702 } while (0)
1704 int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
1706 struct pair_pos_vs_id *pairs = NULL;
1707 uint32_t i;
1708 struct progress *progress = NULL;
1709 struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
1710 verify_midx_error = 0;
1712 if (!m) {
1713 int result = 0;
1714 struct stat sb;
1715 struct strbuf filename = STRBUF_INIT;
1717 get_midx_filename(&filename, object_dir);
1719 if (!stat(filename.buf, &sb)) {
1720 error(_("multi-pack-index file exists, but failed to parse"));
1721 result = 1;
1723 strbuf_release(&filename);
1724 return result;
1727 if (!midx_checksum_valid(m))
1728 midx_report(_("incorrect checksum"));
1730 if (flags & MIDX_PROGRESS)
1731 progress = start_delayed_progress(_("Looking for referenced packfiles"),
1732 m->num_packs);
1733 for (i = 0; i < m->num_packs; i++) {
1734 if (prepare_midx_pack(r, m, i))
1735 midx_report("failed to load pack in position %d", i);
1737 display_progress(progress, i + 1);
1739 stop_progress(&progress);
1741 for (i = 0; i < 255; i++) {
1742 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
1743 uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i + 1]);
1745 if (oid_fanout1 > oid_fanout2)
1746 midx_report(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
1747 i, oid_fanout1, oid_fanout2, i + 1);
1750 if (m->num_objects == 0) {
1751 midx_report(_("the midx contains no oid"));
1753 * Remaining tests assume that we have objects, so we can
1754 * return here.
1756 goto cleanup;
1759 if (flags & MIDX_PROGRESS)
1760 progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
1761 m->num_objects - 1);
1762 for (i = 0; i < m->num_objects - 1; i++) {
1763 struct object_id oid1, oid2;
1765 nth_midxed_object_oid(&oid1, m, i);
1766 nth_midxed_object_oid(&oid2, m, i + 1);
1768 if (oidcmp(&oid1, &oid2) >= 0)
1769 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
1770 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
1772 midx_display_sparse_progress(progress, i + 1);
1774 stop_progress(&progress);
1777 * Create an array mapping each object to its packfile id. Sort it
1778 * to group the objects by packfile. Use this permutation to visit
1779 * each of the objects and only require 1 packfile to be open at a
1780 * time.
1782 ALLOC_ARRAY(pairs, m->num_objects);
1783 for (i = 0; i < m->num_objects; i++) {
1784 pairs[i].pos = i;
1785 pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
1788 if (flags & MIDX_PROGRESS)
1789 progress = start_sparse_progress(_("Sorting objects by packfile"),
1790 m->num_objects);
1791 display_progress(progress, 0); /* TODO: Measure QSORT() progress */
1792 QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
1793 stop_progress(&progress);
1795 if (flags & MIDX_PROGRESS)
1796 progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
1797 for (i = 0; i < m->num_objects; i++) {
1798 struct object_id oid;
1799 struct pack_entry e;
1800 off_t m_offset, p_offset;
1802 if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
1803 m->packs[pairs[i-1].pack_int_id])
1805 close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
1806 close_pack_index(m->packs[pairs[i-1].pack_int_id]);
1809 nth_midxed_object_oid(&oid, m, pairs[i].pos);
1811 if (!fill_midx_entry(r, &oid, &e, m)) {
1812 midx_report(_("failed to load pack entry for oid[%d] = %s"),
1813 pairs[i].pos, oid_to_hex(&oid));
1814 continue;
1817 if (open_pack_index(e.p)) {
1818 midx_report(_("failed to load pack-index for packfile %s"),
1819 e.p->pack_name);
1820 break;
1823 m_offset = e.offset;
1824 p_offset = find_pack_entry_one(oid.hash, e.p);
1826 if (m_offset != p_offset)
1827 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1828 pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
1830 midx_display_sparse_progress(progress, i + 1);
1832 stop_progress(&progress);
1834 cleanup:
1835 free(pairs);
1836 close_midx(m);
1838 return verify_midx_error;
1841 int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags)
1843 uint32_t i, *count, result = 0;
1844 struct string_list packs_to_drop = STRING_LIST_INIT_DUP;
1845 struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
1846 struct progress *progress = NULL;
1848 if (!m)
1849 return 0;
1851 CALLOC_ARRAY(count, m->num_packs);
1853 if (flags & MIDX_PROGRESS)
1854 progress = start_delayed_progress(_("Counting referenced objects"),
1855 m->num_objects);
1856 for (i = 0; i < m->num_objects; i++) {
1857 int pack_int_id = nth_midxed_pack_int_id(m, i);
1858 count[pack_int_id]++;
1859 display_progress(progress, i + 1);
1861 stop_progress(&progress);
1863 if (flags & MIDX_PROGRESS)
1864 progress = start_delayed_progress(_("Finding and deleting unreferenced packfiles"),
1865 m->num_packs);
1866 for (i = 0; i < m->num_packs; i++) {
1867 char *pack_name;
1868 display_progress(progress, i + 1);
1870 if (count[i])
1871 continue;
1873 if (prepare_midx_pack(r, m, i))
1874 continue;
1876 if (m->packs[i]->pack_keep || m->packs[i]->is_cruft)
1877 continue;
1879 pack_name = xstrdup(m->packs[i]->pack_name);
1880 close_pack(m->packs[i]);
1882 string_list_insert(&packs_to_drop, m->pack_names[i]);
1883 unlink_pack_path(pack_name, 0);
1884 free(pack_name);
1886 stop_progress(&progress);
1888 free(count);
1890 if (packs_to_drop.nr)
1891 result = write_midx_internal(object_dir, NULL, &packs_to_drop, NULL, NULL, flags);
1893 string_list_clear(&packs_to_drop, 0);
1895 return result;
1898 struct repack_info {
1899 timestamp_t mtime;
1900 uint32_t referenced_objects;
1901 uint32_t pack_int_id;
1904 static int compare_by_mtime(const void *a_, const void *b_)
1906 const struct repack_info *a, *b;
1908 a = (const struct repack_info *)a_;
1909 b = (const struct repack_info *)b_;
1911 if (a->mtime < b->mtime)
1912 return -1;
1913 if (a->mtime > b->mtime)
1914 return 1;
1915 return 0;
1918 static int fill_included_packs_all(struct repository *r,
1919 struct multi_pack_index *m,
1920 unsigned char *include_pack)
1922 uint32_t i, count = 0;
1923 int pack_kept_objects = 0;
1925 repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
1927 for (i = 0; i < m->num_packs; i++) {
1928 if (prepare_midx_pack(r, m, i))
1929 continue;
1930 if (!pack_kept_objects && m->packs[i]->pack_keep)
1931 continue;
1932 if (m->packs[i]->is_cruft)
1933 continue;
1935 include_pack[i] = 1;
1936 count++;
1939 return count < 2;
1942 static int fill_included_packs_batch(struct repository *r,
1943 struct multi_pack_index *m,
1944 unsigned char *include_pack,
1945 size_t batch_size)
1947 uint32_t i, packs_to_repack;
1948 size_t total_size;
1949 struct repack_info *pack_info;
1950 int pack_kept_objects = 0;
1952 CALLOC_ARRAY(pack_info, m->num_packs);
1954 repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
1956 for (i = 0; i < m->num_packs; i++) {
1957 pack_info[i].pack_int_id = i;
1959 if (prepare_midx_pack(r, m, i))
1960 continue;
1962 pack_info[i].mtime = m->packs[i]->mtime;
1965 for (i = 0; i < m->num_objects; i++) {
1966 uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
1967 pack_info[pack_int_id].referenced_objects++;
1970 QSORT(pack_info, m->num_packs, compare_by_mtime);
1972 total_size = 0;
1973 packs_to_repack = 0;
1974 for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
1975 int pack_int_id = pack_info[i].pack_int_id;
1976 struct packed_git *p = m->packs[pack_int_id];
1977 size_t expected_size;
1979 if (!p)
1980 continue;
1981 if (!pack_kept_objects && p->pack_keep)
1982 continue;
1983 if (p->is_cruft)
1984 continue;
1985 if (open_pack_index(p) || !p->num_objects)
1986 continue;
1988 expected_size = (size_t)(p->pack_size
1989 * pack_info[i].referenced_objects);
1990 expected_size /= p->num_objects;
1992 if (expected_size >= batch_size)
1993 continue;
1995 packs_to_repack++;
1996 total_size += expected_size;
1997 include_pack[pack_int_id] = 1;
2000 free(pack_info);
2002 if (packs_to_repack < 2)
2003 return 1;
2005 return 0;
2008 int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
2010 int result = 0;
2011 uint32_t i;
2012 unsigned char *include_pack;
2013 struct child_process cmd = CHILD_PROCESS_INIT;
2014 FILE *cmd_in;
2015 struct strbuf base_name = STRBUF_INIT;
2016 struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
2019 * When updating the default for these configuration
2020 * variables in builtin/repack.c, these must be adjusted
2021 * to match.
2023 int delta_base_offset = 1;
2024 int use_delta_islands = 0;
2026 if (!m)
2027 return 0;
2029 CALLOC_ARRAY(include_pack, m->num_packs);
2031 if (batch_size) {
2032 if (fill_included_packs_batch(r, m, include_pack, batch_size))
2033 goto cleanup;
2034 } else if (fill_included_packs_all(r, m, include_pack))
2035 goto cleanup;
2037 repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
2038 repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
2040 strvec_push(&cmd.args, "pack-objects");
2042 strbuf_addstr(&base_name, object_dir);
2043 strbuf_addstr(&base_name, "/pack/pack");
2044 strvec_push(&cmd.args, base_name.buf);
2046 if (delta_base_offset)
2047 strvec_push(&cmd.args, "--delta-base-offset");
2048 if (use_delta_islands)
2049 strvec_push(&cmd.args, "--delta-islands");
2051 if (flags & MIDX_PROGRESS)
2052 strvec_push(&cmd.args, "--progress");
2053 else
2054 strvec_push(&cmd.args, "-q");
2056 strbuf_release(&base_name);
2058 cmd.git_cmd = 1;
2059 cmd.in = cmd.out = -1;
2061 if (start_command(&cmd)) {
2062 error(_("could not start pack-objects"));
2063 result = 1;
2064 goto cleanup;
2067 cmd_in = xfdopen(cmd.in, "w");
2069 for (i = 0; i < m->num_objects; i++) {
2070 struct object_id oid;
2071 uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
2073 if (!include_pack[pack_int_id])
2074 continue;
2076 nth_midxed_object_oid(&oid, m, i);
2077 fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
2079 fclose(cmd_in);
2081 if (finish_command(&cmd)) {
2082 error(_("could not finish pack-objects"));
2083 result = 1;
2084 goto cleanup;
2087 result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
2089 cleanup:
2090 free(include_pack);
2091 return result;