11 char *odb_pack_name(struct strbuf
*buf
,
12 const unsigned char *sha1
,
16 strbuf_addf(buf
, "%s/pack/pack-%s.%s", get_object_directory(),
17 sha1_to_hex(sha1
), ext
);
21 char *sha1_pack_name(const unsigned char *sha1
)
23 static struct strbuf buf
= STRBUF_INIT
;
24 return odb_pack_name(&buf
, sha1
, "pack");
27 char *sha1_pack_index_name(const unsigned char *sha1
)
29 static struct strbuf buf
= STRBUF_INIT
;
30 return odb_pack_name(&buf
, sha1
, "idx");
33 static unsigned int pack_used_ctr
;
34 static unsigned int pack_mmap_calls
;
35 static unsigned int peak_pack_open_windows
;
36 static unsigned int pack_open_windows
;
37 static unsigned int pack_open_fds
;
38 static unsigned int pack_max_fds
;
39 static size_t peak_pack_mapped
;
40 static size_t pack_mapped
;
41 struct packed_git
*packed_git
;
43 static struct mru packed_git_mru_storage
;
44 struct mru
*packed_git_mru
= &packed_git_mru_storage
;
46 #define SZ_FMT PRIuMAX
47 static inline uintmax_t sz_fmt(size_t s
) { return s
; }
49 void pack_report(void)
52 "pack_report: getpagesize() = %10" SZ_FMT
"\n"
53 "pack_report: core.packedGitWindowSize = %10" SZ_FMT
"\n"
54 "pack_report: core.packedGitLimit = %10" SZ_FMT
"\n",
55 sz_fmt(getpagesize()),
56 sz_fmt(packed_git_window_size
),
57 sz_fmt(packed_git_limit
));
59 "pack_report: pack_used_ctr = %10u\n"
60 "pack_report: pack_mmap_calls = %10u\n"
61 "pack_report: pack_open_windows = %10u / %10u\n"
62 "pack_report: pack_mapped = "
63 "%10" SZ_FMT
" / %10" SZ_FMT
"\n",
66 pack_open_windows
, peak_pack_open_windows
,
67 sz_fmt(pack_mapped
), sz_fmt(peak_pack_mapped
));
71 * Open and mmap the index file at path, perform a couple of
72 * consistency checks, then record its information to p. Return 0 on
75 static int check_packed_git_idx(const char *path
, struct packed_git
*p
)
78 struct pack_idx_header
*hdr
;
80 uint32_t version
, nr
, i
, *index
;
81 int fd
= git_open(path
);
90 idx_size
= xsize_t(st
.st_size
);
91 if (idx_size
< 4 * 256 + 20 + 20) {
93 return error("index file %s is too small", path
);
95 idx_map
= xmmap(NULL
, idx_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
99 if (hdr
->idx_signature
== htonl(PACK_IDX_SIGNATURE
)) {
100 version
= ntohl(hdr
->idx_version
);
101 if (version
< 2 || version
> 2) {
102 munmap(idx_map
, idx_size
);
103 return error("index file %s is version %"PRIu32
104 " and is not supported by this binary"
105 " (try upgrading GIT to a newer version)",
114 index
+= 2; /* skip index header */
115 for (i
= 0; i
< 256; i
++) {
116 uint32_t n
= ntohl(index
[i
]);
118 munmap(idx_map
, idx_size
);
119 return error("non-monotonic index %s", path
);
127 * - 256 index entries 4 bytes each
128 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
129 * - 20-byte SHA1 of the packfile
130 * - 20-byte SHA1 file checksum
132 if (idx_size
!= 4*256 + nr
* 24 + 20 + 20) {
133 munmap(idx_map
, idx_size
);
134 return error("wrong index v1 file size in %s", path
);
136 } else if (version
== 2) {
139 * - 8 bytes of header
140 * - 256 index entries 4 bytes each
141 * - 20-byte sha1 entry * nr
142 * - 4-byte crc entry * nr
143 * - 4-byte offset entry * nr
144 * - 20-byte SHA1 of the packfile
145 * - 20-byte SHA1 file checksum
146 * And after the 4-byte offset table might be a
147 * variable sized table containing 8-byte entries
148 * for offsets larger than 2^31.
150 unsigned long min_size
= 8 + 4*256 + nr
*(20 + 4 + 4) + 20 + 20;
151 unsigned long max_size
= min_size
;
153 max_size
+= (nr
- 1)*8;
154 if (idx_size
< min_size
|| idx_size
> max_size
) {
155 munmap(idx_map
, idx_size
);
156 return error("wrong index v2 file size in %s", path
);
158 if (idx_size
!= min_size
&&
160 * make sure we can deal with large pack offsets.
161 * 31-bit signed offset won't be enough, neither
162 * 32-bit unsigned one will be.
164 (sizeof(off_t
) <= 4)) {
165 munmap(idx_map
, idx_size
);
166 return error("pack too large for current definition of off_t in %s", path
);
170 p
->index_version
= version
;
171 p
->index_data
= idx_map
;
172 p
->index_size
= idx_size
;
177 int open_pack_index(struct packed_git
*p
)
186 if (!strip_suffix(p
->pack_name
, ".pack", &len
))
187 die("BUG: pack_name does not end in .pack");
188 idx_name
= xstrfmt("%.*s.idx", (int)len
, p
->pack_name
);
189 ret
= check_packed_git_idx(idx_name
, p
);
194 static struct packed_git
*alloc_packed_git(int extra
)
196 struct packed_git
*p
= xmalloc(st_add(sizeof(*p
), extra
));
197 memset(p
, 0, sizeof(*p
));
202 struct packed_git
*parse_pack_index(unsigned char *sha1
, const char *idx_path
)
204 const char *path
= sha1_pack_name(sha1
);
205 size_t alloc
= st_add(strlen(path
), 1);
206 struct packed_git
*p
= alloc_packed_git(alloc
);
208 memcpy(p
->pack_name
, path
, alloc
); /* includes NUL */
209 hashcpy(p
->sha1
, sha1
);
210 if (check_packed_git_idx(idx_path
, p
)) {
218 static void scan_windows(struct packed_git
*p
,
219 struct packed_git
**lru_p
,
220 struct pack_window
**lru_w
,
221 struct pack_window
**lru_l
)
223 struct pack_window
*w
, *w_l
;
225 for (w_l
= NULL
, w
= p
->windows
; w
; w
= w
->next
) {
227 if (!*lru_w
|| w
->last_used
< (*lru_w
)->last_used
) {
237 static int unuse_one_window(struct packed_git
*current
)
239 struct packed_git
*p
, *lru_p
= NULL
;
240 struct pack_window
*lru_w
= NULL
, *lru_l
= NULL
;
243 scan_windows(current
, &lru_p
, &lru_w
, &lru_l
);
244 for (p
= packed_git
; p
; p
= p
->next
)
245 scan_windows(p
, &lru_p
, &lru_w
, &lru_l
);
247 munmap(lru_w
->base
, lru_w
->len
);
248 pack_mapped
-= lru_w
->len
;
250 lru_l
->next
= lru_w
->next
;
252 lru_p
->windows
= lru_w
->next
;
260 void release_pack_memory(size_t need
)
262 size_t cur
= pack_mapped
;
263 while (need
>= (cur
- pack_mapped
) && unuse_one_window(NULL
))
267 void close_pack_windows(struct packed_git
*p
)
270 struct pack_window
*w
= p
->windows
;
273 die("pack '%s' still has open windows to it",
275 munmap(w
->base
, w
->len
);
276 pack_mapped
-= w
->len
;
278 p
->windows
= w
->next
;
283 static int close_pack_fd(struct packed_git
*p
)
295 void close_pack_index(struct packed_git
*p
)
298 munmap((void *)p
->index_data
, p
->index_size
);
299 p
->index_data
= NULL
;
303 static void close_pack(struct packed_git
*p
)
305 close_pack_windows(p
);
310 void close_all_packs(void)
312 struct packed_git
*p
;
314 for (p
= packed_git
; p
; p
= p
->next
)
316 die("BUG: want to close pack marked 'do-not-close'");
322 * The LRU pack is the one with the oldest MRU window, preferring packs
323 * with no used windows, or the oldest mtime if it has no windows allocated.
325 static void find_lru_pack(struct packed_git
*p
, struct packed_git
**lru_p
, struct pack_window
**mru_w
, int *accept_windows_inuse
)
327 struct pack_window
*w
, *this_mru_w
;
328 int has_windows_inuse
= 0;
331 * Reject this pack if it has windows and the previously selected
332 * one does not. If this pack does not have windows, reject
333 * it if the pack file is newer than the previously selected one.
335 if (*lru_p
&& !*mru_w
&& (p
->windows
|| p
->mtime
> (*lru_p
)->mtime
))
338 for (w
= this_mru_w
= p
->windows
; w
; w
= w
->next
) {
340 * Reject this pack if any of its windows are in use,
341 * but the previously selected pack did not have any
342 * inuse windows. Otherwise, record that this pack
343 * has windows in use.
346 if (*accept_windows_inuse
)
347 has_windows_inuse
= 1;
352 if (w
->last_used
> this_mru_w
->last_used
)
356 * Reject this pack if it has windows that have been
357 * used more recently than the previously selected pack.
358 * If the previously selected pack had windows inuse and
359 * we have not encountered a window in this pack that is
360 * inuse, skip this check since we prefer a pack with no
361 * inuse windows to one that has inuse windows.
363 if (*mru_w
&& *accept_windows_inuse
== has_windows_inuse
&&
364 this_mru_w
->last_used
> (*mru_w
)->last_used
)
373 *accept_windows_inuse
= has_windows_inuse
;
376 static int close_one_pack(void)
378 struct packed_git
*p
, *lru_p
= NULL
;
379 struct pack_window
*mru_w
= NULL
;
380 int accept_windows_inuse
= 1;
382 for (p
= packed_git
; p
; p
= p
->next
) {
383 if (p
->pack_fd
== -1)
385 find_lru_pack(p
, &lru_p
, &mru_w
, &accept_windows_inuse
);
389 return close_pack_fd(lru_p
);
394 static unsigned int get_max_fd_limit(void)
400 if (!getrlimit(RLIMIT_NOFILE
, &lim
))
407 long open_max
= sysconf(_SC_OPEN_MAX
);
411 * Otherwise, we got -1 for one of the two
414 * (1) sysconf() did not understand _SC_OPEN_MAX
415 * and signaled an error with -1; or
416 * (2) sysconf() said there is no limit.
418 * We _could_ clear errno before calling sysconf() to
419 * tell these two cases apart and return a huge number
420 * in the latter case to let the caller cap it to a
421 * value that is not so selfish, but letting the
422 * fallback OPEN_MAX codepath take care of these cases
431 return 1; /* see the caller ;-) */
436 * Do not call this directly as this leaks p->pack_fd on error return;
437 * call open_packed_git() instead.
439 static int open_packed_git_1(struct packed_git
*p
)
442 struct pack_header hdr
;
443 unsigned char sha1
[20];
444 unsigned char *idx_sha1
;
447 if (!p
->index_data
&& open_pack_index(p
))
448 return error("packfile %s index unavailable", p
->pack_name
);
451 unsigned int max_fds
= get_max_fd_limit();
453 /* Save 3 for stdin/stdout/stderr, 22 for work */
455 pack_max_fds
= max_fds
- 25;
460 while (pack_max_fds
<= pack_open_fds
&& close_one_pack())
463 p
->pack_fd
= git_open(p
->pack_name
);
464 if (p
->pack_fd
< 0 || fstat(p
->pack_fd
, &st
))
468 /* If we created the struct before we had the pack we lack size. */
470 if (!S_ISREG(st
.st_mode
))
471 return error("packfile %s not a regular file", p
->pack_name
);
472 p
->pack_size
= st
.st_size
;
473 } else if (p
->pack_size
!= st
.st_size
)
474 return error("packfile %s size changed", p
->pack_name
);
476 /* We leave these file descriptors open with sliding mmap;
477 * there is no point keeping them open across exec(), though.
479 fd_flag
= fcntl(p
->pack_fd
, F_GETFD
, 0);
481 return error("cannot determine file descriptor flags");
482 fd_flag
|= FD_CLOEXEC
;
483 if (fcntl(p
->pack_fd
, F_SETFD
, fd_flag
) == -1)
484 return error("cannot set FD_CLOEXEC");
486 /* Verify we recognize this pack file format. */
487 if (read_in_full(p
->pack_fd
, &hdr
, sizeof(hdr
)) != sizeof(hdr
))
488 return error("file %s is far too short to be a packfile", p
->pack_name
);
489 if (hdr
.hdr_signature
!= htonl(PACK_SIGNATURE
))
490 return error("file %s is not a GIT packfile", p
->pack_name
);
491 if (!pack_version_ok(hdr
.hdr_version
))
492 return error("packfile %s is version %"PRIu32
" and not"
493 " supported (try upgrading GIT to a newer version)",
494 p
->pack_name
, ntohl(hdr
.hdr_version
));
496 /* Verify the pack matches its index. */
497 if (p
->num_objects
!= ntohl(hdr
.hdr_entries
))
498 return error("packfile %s claims to have %"PRIu32
" objects"
499 " while index indicates %"PRIu32
" objects",
500 p
->pack_name
, ntohl(hdr
.hdr_entries
),
502 if (lseek(p
->pack_fd
, p
->pack_size
- sizeof(sha1
), SEEK_SET
) == -1)
503 return error("end of packfile %s is unavailable", p
->pack_name
);
504 if (read_in_full(p
->pack_fd
, sha1
, sizeof(sha1
)) != sizeof(sha1
))
505 return error("packfile %s signature is unavailable", p
->pack_name
);
506 idx_sha1
= ((unsigned char *)p
->index_data
) + p
->index_size
- 40;
507 if (hashcmp(sha1
, idx_sha1
))
508 return error("packfile %s does not match index", p
->pack_name
);
512 int open_packed_git(struct packed_git
*p
)
514 if (!open_packed_git_1(p
))
520 static int in_window(struct pack_window
*win
, off_t offset
)
522 /* We must promise at least 20 bytes (one hash) after the
523 * offset is available from this window, otherwise the offset
524 * is not actually in this window and a different window (which
525 * has that one hash excess) must be used. This is to support
526 * the object header and delta base parsing routines below.
528 off_t win_off
= win
->offset
;
529 return win_off
<= offset
530 && (offset
+ 20) <= (win_off
+ win
->len
);
533 unsigned char *use_pack(struct packed_git
*p
,
534 struct pack_window
**w_cursor
,
538 struct pack_window
*win
= *w_cursor
;
540 /* Since packfiles end in a hash of their content and it's
541 * pointless to ask for an offset into the middle of that
542 * hash, and the in_window function above wouldn't match
543 * don't allow an offset too close to the end of the file.
545 if (!p
->pack_size
&& p
->pack_fd
== -1 && open_packed_git(p
))
546 die("packfile %s cannot be accessed", p
->pack_name
);
547 if (offset
> (p
->pack_size
- 20))
548 die("offset beyond end of packfile (truncated pack?)");
550 die(_("offset before end of packfile (broken .idx?)"));
552 if (!win
|| !in_window(win
, offset
)) {
555 for (win
= p
->windows
; win
; win
= win
->next
) {
556 if (in_window(win
, offset
))
560 size_t window_align
= packed_git_window_size
/ 2;
563 if (p
->pack_fd
== -1 && open_packed_git(p
))
564 die("packfile %s cannot be accessed", p
->pack_name
);
566 win
= xcalloc(1, sizeof(*win
));
567 win
->offset
= (offset
/ window_align
) * window_align
;
568 len
= p
->pack_size
- win
->offset
;
569 if (len
> packed_git_window_size
)
570 len
= packed_git_window_size
;
571 win
->len
= (size_t)len
;
572 pack_mapped
+= win
->len
;
573 while (packed_git_limit
< pack_mapped
574 && unuse_one_window(p
))
576 win
->base
= xmmap(NULL
, win
->len
,
577 PROT_READ
, MAP_PRIVATE
,
578 p
->pack_fd
, win
->offset
);
579 if (win
->base
== MAP_FAILED
)
580 die_errno("packfile %s cannot be mapped",
582 if (!win
->offset
&& win
->len
== p
->pack_size
587 if (pack_mapped
> peak_pack_mapped
)
588 peak_pack_mapped
= pack_mapped
;
589 if (pack_open_windows
> peak_pack_open_windows
)
590 peak_pack_open_windows
= pack_open_windows
;
591 win
->next
= p
->windows
;
595 if (win
!= *w_cursor
) {
596 win
->last_used
= pack_used_ctr
++;
600 offset
-= win
->offset
;
602 *left
= win
->len
- xsize_t(offset
);
603 return win
->base
+ offset
;
606 void unuse_pack(struct pack_window
**w_cursor
)
608 struct pack_window
*w
= *w_cursor
;
615 static void try_to_free_pack_memory(size_t size
)
617 release_pack_memory(size
);
620 struct packed_git
*add_packed_git(const char *path
, size_t path_len
, int local
)
622 static int have_set_try_to_free_routine
;
625 struct packed_git
*p
;
627 if (!have_set_try_to_free_routine
) {
628 have_set_try_to_free_routine
= 1;
629 set_try_to_free_routine(try_to_free_pack_memory
);
633 * Make sure a corresponding .pack file exists and that
634 * the index looks sane.
636 if (!strip_suffix_mem(path
, &path_len
, ".idx"))
640 * ".pack" is long enough to hold any suffix we're adding (and
641 * the use xsnprintf double-checks that)
643 alloc
= st_add3(path_len
, strlen(".pack"), 1);
644 p
= alloc_packed_git(alloc
);
645 memcpy(p
->pack_name
, path
, path_len
);
647 xsnprintf(p
->pack_name
+ path_len
, alloc
- path_len
, ".keep");
648 if (!access(p
->pack_name
, F_OK
))
651 xsnprintf(p
->pack_name
+ path_len
, alloc
- path_len
, ".pack");
652 if (stat(p
->pack_name
, &st
) || !S_ISREG(st
.st_mode
)) {
657 /* ok, it looks sane as far as we can check without
658 * actually mapping the pack file.
660 p
->pack_size
= st
.st_size
;
661 p
->pack_local
= local
;
662 p
->mtime
= st
.st_mtime
;
663 if (path_len
< 40 || get_sha1_hex(path
+ path_len
- 40, p
->sha1
))
668 void install_packed_git(struct packed_git
*pack
)
670 if (pack
->pack_fd
!= -1)
673 pack
->next
= packed_git
;
677 void (*report_garbage
)(unsigned seen_bits
, const char *path
);
679 static void report_helper(const struct string_list
*list
,
680 int seen_bits
, int first
, int last
)
682 if (seen_bits
== (PACKDIR_FILE_PACK
|PACKDIR_FILE_IDX
))
685 for (; first
< last
; first
++)
686 report_garbage(seen_bits
, list
->items
[first
].string
);
689 static void report_pack_garbage(struct string_list
*list
)
691 int i
, baselen
= -1, first
= 0, seen_bits
= 0;
696 string_list_sort(list
);
698 for (i
= 0; i
< list
->nr
; i
++) {
699 const char *path
= list
->items
[i
].string
;
701 strncmp(path
, list
->items
[first
].string
, baselen
)) {
702 report_helper(list
, seen_bits
, first
, i
);
707 const char *dot
= strrchr(path
, '.');
709 report_garbage(PACKDIR_FILE_GARBAGE
, path
);
712 baselen
= dot
- path
+ 1;
715 if (!strcmp(path
+ baselen
, "pack"))
717 else if (!strcmp(path
+ baselen
, "idx"))
720 report_helper(list
, seen_bits
, first
, list
->nr
);
723 static void prepare_packed_git_one(char *objdir
, int local
)
725 struct strbuf path
= STRBUF_INIT
;
729 struct string_list garbage
= STRING_LIST_INIT_DUP
;
731 strbuf_addstr(&path
, objdir
);
732 strbuf_addstr(&path
, "/pack");
733 dir
= opendir(path
.buf
);
736 error_errno("unable to open object pack directory: %s",
738 strbuf_release(&path
);
741 strbuf_addch(&path
, '/');
742 dirnamelen
= path
.len
;
743 while ((de
= readdir(dir
)) != NULL
) {
744 struct packed_git
*p
;
747 if (is_dot_or_dotdot(de
->d_name
))
750 strbuf_setlen(&path
, dirnamelen
);
751 strbuf_addstr(&path
, de
->d_name
);
754 if (strip_suffix_mem(path
.buf
, &base_len
, ".idx")) {
755 /* Don't reopen a pack we already have. */
756 for (p
= packed_git
; p
; p
= p
->next
) {
758 if (strip_suffix(p
->pack_name
, ".pack", &len
) &&
760 !memcmp(p
->pack_name
, path
.buf
, len
))
765 * See if it really is a valid .idx file with
766 * corresponding .pack file that we can map.
768 (p
= add_packed_git(path
.buf
, path
.len
, local
)) != NULL
)
769 install_packed_git(p
);
775 if (ends_with(de
->d_name
, ".idx") ||
776 ends_with(de
->d_name
, ".pack") ||
777 ends_with(de
->d_name
, ".bitmap") ||
778 ends_with(de
->d_name
, ".keep"))
779 string_list_append(&garbage
, path
.buf
);
781 report_garbage(PACKDIR_FILE_GARBAGE
, path
.buf
);
784 report_pack_garbage(&garbage
);
785 string_list_clear(&garbage
, 0);
786 strbuf_release(&path
);
789 static int approximate_object_count_valid
;
792 * Give a fast, rough count of the number of objects in the repository. This
793 * ignores loose objects completely. If you have a lot of them, then either
794 * you should repack because your performance will be awful, or they are
795 * all unreachable objects about to be pruned, in which case they're not really
796 * interesting as a measure of repo size in the first place.
798 unsigned long approximate_object_count(void)
800 static unsigned long count
;
801 if (!approximate_object_count_valid
) {
802 struct packed_git
*p
;
804 prepare_packed_git();
806 for (p
= packed_git
; p
; p
= p
->next
) {
807 if (open_pack_index(p
))
809 count
+= p
->num_objects
;
815 static void *get_next_packed_git(const void *p
)
817 return ((const struct packed_git
*)p
)->next
;
820 static void set_next_packed_git(void *p
, void *next
)
822 ((struct packed_git
*)p
)->next
= next
;
825 static int sort_pack(const void *a_
, const void *b_
)
827 const struct packed_git
*a
= a_
;
828 const struct packed_git
*b
= b_
;
832 * Local packs tend to contain objects specific to our
833 * variant of the project than remote ones. In addition,
834 * remote ones could be on a network mounted filesystem.
835 * Favor local ones for these reasons.
837 st
= a
->pack_local
- b
->pack_local
;
842 * Younger packs tend to contain more recent objects,
843 * and more recent objects tend to get accessed more
846 if (a
->mtime
< b
->mtime
)
848 else if (a
->mtime
== b
->mtime
)
853 static void rearrange_packed_git(void)
855 packed_git
= llist_mergesort(packed_git
, get_next_packed_git
,
856 set_next_packed_git
, sort_pack
);
859 static void prepare_packed_git_mru(void)
861 struct packed_git
*p
;
863 mru_clear(packed_git_mru
);
864 for (p
= packed_git
; p
; p
= p
->next
)
865 mru_append(packed_git_mru
, p
);
868 static int prepare_packed_git_run_once
= 0;
869 void prepare_packed_git(void)
871 struct alternate_object_database
*alt
;
873 if (prepare_packed_git_run_once
)
875 prepare_packed_git_one(get_object_directory(), 1);
877 for (alt
= alt_odb_list
; alt
; alt
= alt
->next
)
878 prepare_packed_git_one(alt
->path
, 0);
879 rearrange_packed_git();
880 prepare_packed_git_mru();
881 prepare_packed_git_run_once
= 1;
884 void reprepare_packed_git(void)
886 approximate_object_count_valid
= 0;
887 prepare_packed_git_run_once
= 0;
888 prepare_packed_git();
891 unsigned long unpack_object_header_buffer(const unsigned char *buf
,
892 unsigned long len
, enum object_type
*type
, unsigned long *sizep
)
895 unsigned long size
, c
;
896 unsigned long used
= 0;
899 *type
= (c
>> 4) & 7;
903 if (len
<= used
|| bitsizeof(long) <= shift
) {
904 error("bad object header");
909 size
+= (c
& 0x7f) << shift
;
916 unsigned long get_size_from_delta(struct packed_git
*p
,
917 struct pack_window
**w_curs
,
920 const unsigned char *data
;
921 unsigned char delta_head
[20], *in
;
925 memset(&stream
, 0, sizeof(stream
));
926 stream
.next_out
= delta_head
;
927 stream
.avail_out
= sizeof(delta_head
);
929 git_inflate_init(&stream
);
931 in
= use_pack(p
, w_curs
, curpos
, &stream
.avail_in
);
933 st
= git_inflate(&stream
, Z_FINISH
);
934 curpos
+= stream
.next_in
- in
;
935 } while ((st
== Z_OK
|| st
== Z_BUF_ERROR
) &&
936 stream
.total_out
< sizeof(delta_head
));
937 git_inflate_end(&stream
);
938 if ((st
!= Z_STREAM_END
) && stream
.total_out
!= sizeof(delta_head
)) {
939 error("delta data unpack-initial failed");
943 /* Examine the initial part of the delta to figure out
948 /* ignore base size */
949 get_delta_hdr_size(&data
, delta_head
+sizeof(delta_head
));
951 /* Read the result size */
952 return get_delta_hdr_size(&data
, delta_head
+sizeof(delta_head
));
955 int unpack_object_header(struct packed_git
*p
,
956 struct pack_window
**w_curs
,
958 unsigned long *sizep
)
963 enum object_type type
;
965 /* use_pack() assures us we have [base, base + 20) available
966 * as a range that we can look at. (Its actually the hash
967 * size that is assured.) With our object header encoding
968 * the maximum deflated object size is 2^137, which is just
969 * insane, so we know won't exceed what we have been given.
971 base
= use_pack(p
, w_curs
, *curpos
, &left
);
972 used
= unpack_object_header_buffer(base
, left
, &type
, sizep
);
981 void mark_bad_packed_object(struct packed_git
*p
, const unsigned char *sha1
)
984 for (i
= 0; i
< p
->num_bad_objects
; i
++)
985 if (!hashcmp(sha1
, p
->bad_object_sha1
+ GIT_SHA1_RAWSZ
* i
))
987 p
->bad_object_sha1
= xrealloc(p
->bad_object_sha1
,
988 st_mult(GIT_MAX_RAWSZ
,
989 st_add(p
->num_bad_objects
, 1)));
990 hashcpy(p
->bad_object_sha1
+ GIT_SHA1_RAWSZ
* p
->num_bad_objects
, sha1
);
991 p
->num_bad_objects
++;
994 const struct packed_git
*has_packed_and_bad(const unsigned char *sha1
)
996 struct packed_git
*p
;
999 for (p
= packed_git
; p
; p
= p
->next
)
1000 for (i
= 0; i
< p
->num_bad_objects
; i
++)
1001 if (!hashcmp(sha1
, p
->bad_object_sha1
+ 20 * i
))
1006 static off_t
get_delta_base(struct packed_git
*p
,
1007 struct pack_window
**w_curs
,
1009 enum object_type type
,
1010 off_t delta_obj_offset
)
1012 unsigned char *base_info
= use_pack(p
, w_curs
, *curpos
, NULL
);
1015 /* use_pack() assured us we have [base_info, base_info + 20)
1016 * as a range that we can look at without walking off the
1017 * end of the mapped window. Its actually the hash size
1018 * that is assured. An OFS_DELTA longer than the hash size
1019 * is stupid, as then a REF_DELTA would be smaller to store.
1021 if (type
== OBJ_OFS_DELTA
) {
1023 unsigned char c
= base_info
[used
++];
1024 base_offset
= c
& 127;
1027 if (!base_offset
|| MSB(base_offset
, 7))
1028 return 0; /* overflow */
1029 c
= base_info
[used
++];
1030 base_offset
= (base_offset
<< 7) + (c
& 127);
1032 base_offset
= delta_obj_offset
- base_offset
;
1033 if (base_offset
<= 0 || base_offset
>= delta_obj_offset
)
1034 return 0; /* out of bound */
1036 } else if (type
== OBJ_REF_DELTA
) {
1037 /* The base entry _must_ be in the same pack */
1038 base_offset
= find_pack_entry_one(base_info
, p
);
1041 die("I am totally screwed");
1046 * Like get_delta_base above, but we return the sha1 instead of the pack
1047 * offset. This means it is cheaper for REF deltas (we do not have to do
1048 * the final object lookup), but more expensive for OFS deltas (we
1049 * have to load the revidx to convert the offset back into a sha1).
1051 static const unsigned char *get_delta_base_sha1(struct packed_git
*p
,
1052 struct pack_window
**w_curs
,
1054 enum object_type type
,
1055 off_t delta_obj_offset
)
1057 if (type
== OBJ_REF_DELTA
) {
1058 unsigned char *base
= use_pack(p
, w_curs
, curpos
, NULL
);
1060 } else if (type
== OBJ_OFS_DELTA
) {
1061 struct revindex_entry
*revidx
;
1062 off_t base_offset
= get_delta_base(p
, w_curs
, &curpos
,
1063 type
, delta_obj_offset
);
1068 revidx
= find_pack_revindex(p
, base_offset
);
1072 return nth_packed_object_sha1(p
, revidx
->nr
);
1077 static int retry_bad_packed_offset(struct packed_git
*p
, off_t obj_offset
)
1080 struct revindex_entry
*revidx
;
1081 const unsigned char *sha1
;
1082 revidx
= find_pack_revindex(p
, obj_offset
);
1085 sha1
= nth_packed_object_sha1(p
, revidx
->nr
);
1086 mark_bad_packed_object(p
, sha1
);
1087 type
= sha1_object_info(sha1
, NULL
);
1088 if (type
<= OBJ_NONE
)
1093 #define POI_STACK_PREALLOC 64
1095 static enum object_type
packed_to_object_type(struct packed_git
*p
,
1097 enum object_type type
,
1098 struct pack_window
**w_curs
,
1101 off_t small_poi_stack
[POI_STACK_PREALLOC
];
1102 off_t
*poi_stack
= small_poi_stack
;
1103 int poi_stack_nr
= 0, poi_stack_alloc
= POI_STACK_PREALLOC
;
1105 while (type
== OBJ_OFS_DELTA
|| type
== OBJ_REF_DELTA
) {
1108 /* Push the object we're going to leave behind */
1109 if (poi_stack_nr
>= poi_stack_alloc
&& poi_stack
== small_poi_stack
) {
1110 poi_stack_alloc
= alloc_nr(poi_stack_nr
);
1111 ALLOC_ARRAY(poi_stack
, poi_stack_alloc
);
1112 memcpy(poi_stack
, small_poi_stack
, sizeof(off_t
)*poi_stack_nr
);
1114 ALLOC_GROW(poi_stack
, poi_stack_nr
+1, poi_stack_alloc
);
1116 poi_stack
[poi_stack_nr
++] = obj_offset
;
1117 /* If parsing the base offset fails, just unwind */
1118 base_offset
= get_delta_base(p
, w_curs
, &curpos
, type
, obj_offset
);
1121 curpos
= obj_offset
= base_offset
;
1122 type
= unpack_object_header(p
, w_curs
, &curpos
, &size
);
1123 if (type
<= OBJ_NONE
) {
1124 /* If getting the base itself fails, we first
1125 * retry the base, otherwise unwind */
1126 type
= retry_bad_packed_offset(p
, base_offset
);
1127 if (type
> OBJ_NONE
)
1141 error("unknown object type %i at offset %"PRIuMAX
" in %s",
1142 type
, (uintmax_t)obj_offset
, p
->pack_name
);
1147 if (poi_stack
!= small_poi_stack
)
1152 while (poi_stack_nr
) {
1153 obj_offset
= poi_stack
[--poi_stack_nr
];
1154 type
= retry_bad_packed_offset(p
, obj_offset
);
1155 if (type
> OBJ_NONE
)
1162 static struct hashmap delta_base_cache
;
1163 static size_t delta_base_cached
;
1165 static LIST_HEAD(delta_base_cache_lru
);
1167 struct delta_base_cache_key
{
1168 struct packed_git
*p
;
1172 struct delta_base_cache_entry
{
1173 struct hashmap hash
;
1174 struct delta_base_cache_key key
;
1175 struct list_head lru
;
1178 enum object_type type
;
1181 static unsigned int pack_entry_hash(struct packed_git
*p
, off_t base_offset
)
1185 hash
= (unsigned int)(intptr_t)p
+ (unsigned int)base_offset
;
1186 hash
+= (hash
>> 8) + (hash
>> 16);
1190 static struct delta_base_cache_entry
*
1191 get_delta_base_cache_entry(struct packed_git
*p
, off_t base_offset
)
1193 struct hashmap_entry entry
;
1194 struct delta_base_cache_key key
;
1196 if (!delta_base_cache
.cmpfn
)
1199 hashmap_entry_init(&entry
, pack_entry_hash(p
, base_offset
));
1201 key
.base_offset
= base_offset
;
1202 return hashmap_get(&delta_base_cache
, &entry
, &key
);
1205 static int delta_base_cache_key_eq(const struct delta_base_cache_key
*a
,
1206 const struct delta_base_cache_key
*b
)
1208 return a
->p
== b
->p
&& a
->base_offset
== b
->base_offset
;
1211 static int delta_base_cache_hash_cmp(const void *unused_cmp_data
,
1212 const void *va
, const void *vb
,
1215 const struct delta_base_cache_entry
*a
= va
, *b
= vb
;
1216 const struct delta_base_cache_key
*key
= vkey
;
1218 return !delta_base_cache_key_eq(&a
->key
, key
);
1220 return !delta_base_cache_key_eq(&a
->key
, &b
->key
);
1223 static int in_delta_base_cache(struct packed_git
*p
, off_t base_offset
)
1225 return !!get_delta_base_cache_entry(p
, base_offset
);
1229 * Remove the entry from the cache, but do _not_ free the associated
1230 * entry data. The caller takes ownership of the "data" buffer, and
1231 * should copy out any fields it wants before detaching.
1233 static void detach_delta_base_cache_entry(struct delta_base_cache_entry
*ent
)
1235 hashmap_remove(&delta_base_cache
, ent
, &ent
->key
);
1236 list_del(&ent
->lru
);
1237 delta_base_cached
-= ent
->size
;
1241 static void *cache_or_unpack_entry(struct packed_git
*p
, off_t base_offset
,
1242 unsigned long *base_size
, enum object_type
*type
)
1244 struct delta_base_cache_entry
*ent
;
1246 ent
= get_delta_base_cache_entry(p
, base_offset
);
1248 return unpack_entry(p
, base_offset
, type
, base_size
);
1253 *base_size
= ent
->size
;
1254 return xmemdupz(ent
->data
, ent
->size
);
1257 static inline void release_delta_base_cache(struct delta_base_cache_entry
*ent
)
1260 detach_delta_base_cache_entry(ent
);
1263 void clear_delta_base_cache(void)
1265 struct list_head
*lru
, *tmp
;
1266 list_for_each_safe(lru
, tmp
, &delta_base_cache_lru
) {
1267 struct delta_base_cache_entry
*entry
=
1268 list_entry(lru
, struct delta_base_cache_entry
, lru
);
1269 release_delta_base_cache(entry
);
1273 static void add_delta_base_cache(struct packed_git
*p
, off_t base_offset
,
1274 void *base
, unsigned long base_size
, enum object_type type
)
1276 struct delta_base_cache_entry
*ent
= xmalloc(sizeof(*ent
));
1277 struct list_head
*lru
, *tmp
;
1279 delta_base_cached
+= base_size
;
1281 list_for_each_safe(lru
, tmp
, &delta_base_cache_lru
) {
1282 struct delta_base_cache_entry
*f
=
1283 list_entry(lru
, struct delta_base_cache_entry
, lru
);
1284 if (delta_base_cached
<= delta_base_cache_limit
)
1286 release_delta_base_cache(f
);
1290 ent
->key
.base_offset
= base_offset
;
1293 ent
->size
= base_size
;
1294 list_add_tail(&ent
->lru
, &delta_base_cache_lru
);
1296 if (!delta_base_cache
.cmpfn
)
1297 hashmap_init(&delta_base_cache
, delta_base_cache_hash_cmp
, NULL
, 0);
1298 hashmap_entry_init(ent
, pack_entry_hash(p
, base_offset
));
1299 hashmap_add(&delta_base_cache
, ent
);
1302 int packed_object_info(struct packed_git
*p
, off_t obj_offset
,
1303 struct object_info
*oi
)
1305 struct pack_window
*w_curs
= NULL
;
1307 off_t curpos
= obj_offset
;
1308 enum object_type type
;
1311 * We always get the representation type, but only convert it to
1312 * a "real" type later if the caller is interested.
1315 *oi
->contentp
= cache_or_unpack_entry(p
, obj_offset
, oi
->sizep
,
1320 type
= unpack_object_header(p
, &w_curs
, &curpos
, &size
);
1323 if (!oi
->contentp
&& oi
->sizep
) {
1324 if (type
== OBJ_OFS_DELTA
|| type
== OBJ_REF_DELTA
) {
1325 off_t tmp_pos
= curpos
;
1326 off_t base_offset
= get_delta_base(p
, &w_curs
, &tmp_pos
,
1332 *oi
->sizep
= get_size_from_delta(p
, &w_curs
, tmp_pos
);
1333 if (*oi
->sizep
== 0) {
1342 if (oi
->disk_sizep
) {
1343 struct revindex_entry
*revidx
= find_pack_revindex(p
, obj_offset
);
1344 *oi
->disk_sizep
= revidx
[1].offset
- obj_offset
;
1347 if (oi
->typep
|| oi
->typename
) {
1348 enum object_type ptot
;
1349 ptot
= packed_to_object_type(p
, obj_offset
, type
, &w_curs
,
1354 const char *tn
= typename(ptot
);
1356 strbuf_addstr(oi
->typename
, tn
);
1364 if (oi
->delta_base_sha1
) {
1365 if (type
== OBJ_OFS_DELTA
|| type
== OBJ_REF_DELTA
) {
1366 const unsigned char *base
;
1368 base
= get_delta_base_sha1(p
, &w_curs
, curpos
,
1375 hashcpy(oi
->delta_base_sha1
, base
);
1377 hashclr(oi
->delta_base_sha1
);
1380 oi
->whence
= in_delta_base_cache(p
, obj_offset
) ? OI_DBCACHED
:
1384 unuse_pack(&w_curs
);
1388 static void *unpack_compressed_entry(struct packed_git
*p
,
1389 struct pack_window
**w_curs
,
1395 unsigned char *buffer
, *in
;
1397 buffer
= xmallocz_gently(size
);
1400 memset(&stream
, 0, sizeof(stream
));
1401 stream
.next_out
= buffer
;
1402 stream
.avail_out
= size
+ 1;
1404 git_inflate_init(&stream
);
1406 in
= use_pack(p
, w_curs
, curpos
, &stream
.avail_in
);
1407 stream
.next_in
= in
;
1408 st
= git_inflate(&stream
, Z_FINISH
);
1409 if (!stream
.avail_out
)
1410 break; /* the payload is larger than it should be */
1411 curpos
+= stream
.next_in
- in
;
1412 } while (st
== Z_OK
|| st
== Z_BUF_ERROR
);
1413 git_inflate_end(&stream
);
1414 if ((st
!= Z_STREAM_END
) || stream
.total_out
!= size
) {
1422 static void write_pack_access_log(struct packed_git
*p
, off_t obj_offset
)
1424 static struct trace_key pack_access
= TRACE_KEY_INIT(PACK_ACCESS
);
1425 trace_printf_key(&pack_access
, "%s %"PRIuMAX
"\n",
1426 p
->pack_name
, (uintmax_t)obj_offset
);
1429 int do_check_packed_object_crc
;
1431 #define UNPACK_ENTRY_STACK_PREALLOC 64
1432 struct unpack_entry_stack_ent
{
1438 static void *read_object(const unsigned char *sha1
, enum object_type
*type
,
1439 unsigned long *size
)
1441 struct object_info oi
= OBJECT_INFO_INIT
;
1445 oi
.contentp
= &content
;
1447 if (sha1_object_info_extended(sha1
, &oi
, 0) < 0)
1452 void *unpack_entry(struct packed_git
*p
, off_t obj_offset
,
1453 enum object_type
*final_type
, unsigned long *final_size
)
1455 struct pack_window
*w_curs
= NULL
;
1456 off_t curpos
= obj_offset
;
1459 enum object_type type
;
1460 struct unpack_entry_stack_ent small_delta_stack
[UNPACK_ENTRY_STACK_PREALLOC
];
1461 struct unpack_entry_stack_ent
*delta_stack
= small_delta_stack
;
1462 int delta_stack_nr
= 0, delta_stack_alloc
= UNPACK_ENTRY_STACK_PREALLOC
;
1463 int base_from_cache
= 0;
1465 write_pack_access_log(p
, obj_offset
);
1467 /* PHASE 1: drill down to the innermost base object */
1471 struct delta_base_cache_entry
*ent
;
1473 ent
= get_delta_base_cache_entry(p
, curpos
);
1478 detach_delta_base_cache_entry(ent
);
1479 base_from_cache
= 1;
1483 if (do_check_packed_object_crc
&& p
->index_version
> 1) {
1484 struct revindex_entry
*revidx
= find_pack_revindex(p
, obj_offset
);
1485 off_t len
= revidx
[1].offset
- obj_offset
;
1486 if (check_pack_crc(p
, &w_curs
, obj_offset
, len
, revidx
->nr
)) {
1487 const unsigned char *sha1
=
1488 nth_packed_object_sha1(p
, revidx
->nr
);
1489 error("bad packed object CRC for %s",
1491 mark_bad_packed_object(p
, sha1
);
1497 type
= unpack_object_header(p
, &w_curs
, &curpos
, &size
);
1498 if (type
!= OBJ_OFS_DELTA
&& type
!= OBJ_REF_DELTA
)
1501 base_offset
= get_delta_base(p
, &w_curs
, &curpos
, type
, obj_offset
);
1503 error("failed to validate delta base reference "
1504 "at offset %"PRIuMAX
" from %s",
1505 (uintmax_t)curpos
, p
->pack_name
);
1506 /* bail to phase 2, in hopes of recovery */
1511 /* push object, proceed to base */
1512 if (delta_stack_nr
>= delta_stack_alloc
1513 && delta_stack
== small_delta_stack
) {
1514 delta_stack_alloc
= alloc_nr(delta_stack_nr
);
1515 ALLOC_ARRAY(delta_stack
, delta_stack_alloc
);
1516 memcpy(delta_stack
, small_delta_stack
,
1517 sizeof(*delta_stack
)*delta_stack_nr
);
1519 ALLOC_GROW(delta_stack
, delta_stack_nr
+1, delta_stack_alloc
);
1521 i
= delta_stack_nr
++;
1522 delta_stack
[i
].obj_offset
= obj_offset
;
1523 delta_stack
[i
].curpos
= curpos
;
1524 delta_stack
[i
].size
= size
;
1526 curpos
= obj_offset
= base_offset
;
1529 /* PHASE 2: handle the base */
1534 die("BUG: unpack_entry: left loop at a valid delta");
1540 if (!base_from_cache
)
1541 data
= unpack_compressed_entry(p
, &w_curs
, curpos
, size
);
1545 error("unknown object type %i at offset %"PRIuMAX
" in %s",
1546 type
, (uintmax_t)obj_offset
, p
->pack_name
);
1549 /* PHASE 3: apply deltas in order */
1552 * 'data' holds the base data, or NULL if there was corruption
1554 while (delta_stack_nr
) {
1557 void *external_base
= NULL
;
1558 unsigned long delta_size
, base_size
= size
;
1564 add_delta_base_cache(p
, obj_offset
, base
, base_size
, type
);
1568 * We're probably in deep shit, but let's try to fetch
1569 * the required base anyway from another pack or loose.
1570 * This is costly but should happen only in the presence
1571 * of a corrupted pack, and is better than failing outright.
1573 struct revindex_entry
*revidx
;
1574 const unsigned char *base_sha1
;
1575 revidx
= find_pack_revindex(p
, obj_offset
);
1577 base_sha1
= nth_packed_object_sha1(p
, revidx
->nr
);
1578 error("failed to read delta base object %s"
1579 " at offset %"PRIuMAX
" from %s",
1580 sha1_to_hex(base_sha1
), (uintmax_t)obj_offset
,
1582 mark_bad_packed_object(p
, base_sha1
);
1583 base
= read_object(base_sha1
, &type
, &base_size
);
1584 external_base
= base
;
1588 i
= --delta_stack_nr
;
1589 obj_offset
= delta_stack
[i
].obj_offset
;
1590 curpos
= delta_stack
[i
].curpos
;
1591 delta_size
= delta_stack
[i
].size
;
1596 delta_data
= unpack_compressed_entry(p
, &w_curs
, curpos
, delta_size
);
1599 error("failed to unpack compressed delta "
1600 "at offset %"PRIuMAX
" from %s",
1601 (uintmax_t)curpos
, p
->pack_name
);
1603 free(external_base
);
1607 data
= patch_delta(base
, base_size
,
1608 delta_data
, delta_size
,
1612 * We could not apply the delta; warn the user, but keep going.
1613 * Our failure will be noticed either in the next iteration of
1614 * the loop, or if this is the final delta, in the caller when
1615 * we return NULL. Those code paths will take care of making
1616 * a more explicit warning and retrying with another copy of
1620 error("failed to apply delta");
1623 free(external_base
);
1632 unuse_pack(&w_curs
);
1634 if (delta_stack
!= small_delta_stack
)
1640 const unsigned char *nth_packed_object_sha1(struct packed_git
*p
,
1643 const unsigned char *index
= p
->index_data
;
1645 if (open_pack_index(p
))
1647 index
= p
->index_data
;
1649 if (n
>= p
->num_objects
)
1652 if (p
->index_version
== 1) {
1653 return index
+ 24 * n
+ 4;
1656 return index
+ 20 * n
;
1660 const struct object_id
*nth_packed_object_oid(struct object_id
*oid
,
1661 struct packed_git
*p
,
1664 const unsigned char *hash
= nth_packed_object_sha1(p
, n
);
1667 hashcpy(oid
->hash
, hash
);
1671 void check_pack_index_ptr(const struct packed_git
*p
, const void *vptr
)
1673 const unsigned char *ptr
= vptr
;
1674 const unsigned char *start
= p
->index_data
;
1675 const unsigned char *end
= start
+ p
->index_size
;
1677 die(_("offset before start of pack index for %s (corrupt index?)"),
1679 /* No need to check for underflow; .idx files must be at least 8 bytes */
1681 die(_("offset beyond end of pack index for %s (truncated index?)"),
1685 off_t
nth_packed_object_offset(const struct packed_git
*p
, uint32_t n
)
1687 const unsigned char *index
= p
->index_data
;
1689 if (p
->index_version
== 1) {
1690 return ntohl(*((uint32_t *)(index
+ 24 * n
)));
1693 index
+= 8 + p
->num_objects
* (20 + 4);
1694 off
= ntohl(*((uint32_t *)(index
+ 4 * n
)));
1695 if (!(off
& 0x80000000))
1697 index
+= p
->num_objects
* 4 + (off
& 0x7fffffff) * 8;
1698 check_pack_index_ptr(p
, index
);
1699 return (((uint64_t)ntohl(*((uint32_t *)(index
+ 0)))) << 32) |
1700 ntohl(*((uint32_t *)(index
+ 4)));