From c9ced051c3afa6f3da7f59b0dcb92787b2b5c702 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 17 Jan 2008 22:57:00 -0500 Subject: [PATCH] Fix random fast-import errors when compiled with NO_MMAP fast-import was relying on the fact that on most systems mmap() and write() are synchronized by the filesystem's buffer cache. We were relying on the ability to mmap() 20 bytes beyond the current end of the file, then later fill in those bytes with a future write() call, then read them through the previously obtained mmap() address. This isn't always true with some implementations of NFS, but it is especially not true with our NO_MMAP=YesPlease build time option used on some platforms. If fast-import was built with NO_MMAP=YesPlease we used the malloc()+pread() emulation and the subsequent write() call does not update the trailing 20 bytes of a previously obtained "mmap()" (aka malloc'd) address. Under NO_MMAP that behavior causes unpack_entry() in sha1_file.c to be unable to read an object header (or data) that has been unlucky enough to be written to the packfile at a location such that it is in the trailing 20 bytes of a window previously opened on that same packfile. This bug has gone unnoticed for a very long time as it is highly data dependent. Not only does the object have to be placed at the right position, but it also needs to be positioned behind some other object that has been accessed due to a branch cache invalidation. In other words the stars had to align just right, and if you did run into this bug you probably should also have purchased a lottery ticket. Fortunately the workaround is a lot easier than the bug explanation. Before we allow unpack_entry() to read data from a pack window that has also (possibly) been modified through write() we force all existing windows on that packfile to be closed. By closing the windows we ensure that any new access via the emulated mmap() will reread the packfile, updating to the current file content. This comes at a slight performance degredation as we cannot reuse previously cached windows when we update the packfile. But it is a fairly minor difference as the window closes happen at only two points: - When the packfile is finalized and its .idx is generated: At this stage we are getting ready to update the refs and any data access into the packfile is going to be random, and is going after only the branch tips (to ensure they are valid). Our existing windows (if any) are not likely to be positioned at useful locations to access those final tip commits so we probably were closing them before anyway. - When the branch cache missed and we need to reload: At this point fast-import is getting change commands for the next commit and it needs to go re-read a tree object it previously had written out to the packfile. What windows we had (if any) are not likely to cover the tree in question so we probably were closing them before anyway. We do try to avoid unnecessarily closing windows in the second case by checking to see if the packfile size has increased since the last time we called unpack_entry() on that packfile. If the size has not changed then we have not written additional data, and any existing window is still vaild. This nicely handles the cases where fast-import is going through a branch cache reload and needs to read many trees at once. During such an event we are not likely to be updating the packfile so we do not cycle the windows between reads. With this change in place t9301-fast-export.sh (which was broken by c3b0dec509fe136c5417422f31898b5a4e2d5e02) finally works again. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- cache.h | 1 + fast-import.c | 6 ++++-- sha1_file.c | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index 24735bdfee..549f4bbac7 100644 --- a/cache.h +++ b/cache.h @@ -561,6 +561,7 @@ extern struct packed_git *find_sha1_pack(const unsigned char *sha1, extern void pack_report(void); extern int open_pack_index(struct packed_git *); extern unsigned char* use_pack(struct packed_git *, struct pack_window **, off_t, unsigned int *); +extern void close_pack_windows(struct packed_git *); extern void unuse_pack(struct pack_window **); extern struct packed_git *add_packed_git(const char *, int, int); extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t); diff --git a/fast-import.c b/fast-import.c index 4cf5092976..5e89eef1aa 100644 --- a/fast-import.c +++ b/fast-import.c @@ -917,6 +917,7 @@ static void end_packfile(void) struct branch *b; struct tag *t; + close_pack_windows(pack_data); fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1, pack_data->pack_name, object_count); close(pack_data->pack_fd); @@ -926,7 +927,6 @@ static void end_packfile(void) new_p = add_packed_git(idx_name, strlen(idx_name), 1); if (!new_p) die("core git rejected index %s", idx_name); - new_p->windows = old_p->windows; all_packs[pack_id] = new_p; install_packed_git(new_p); @@ -1129,8 +1129,10 @@ static void *gfi_unpack_entry( { enum object_type type; struct packed_git *p = all_packs[oe->pack_id]; - if (p == pack_data) + if (p == pack_data && p->pack_size < (pack_size + 20)) { + close_pack_windows(p); p->pack_size = pack_size + 20; + } return unpack_entry(p, oe->offset, &type, sizep); } diff --git a/sha1_file.c b/sha1_file.c index 6583797ce5..66a4e00fa8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -611,6 +611,22 @@ void release_pack_memory(size_t need, int fd) ; /* nothing */ } +void close_pack_windows(struct packed_git *p) +{ + while (p->windows) { + struct pack_window *w = p->windows; + + if (w->inuse_cnt) + die("pack '%s' still has open windows to it", + p->pack_name); + munmap(w->base, w->len); + pack_mapped -= w->len; + pack_open_windows--; + p->windows = w->next; + free(w); + } +} + void unuse_pack(struct pack_window **w_cursor) { struct pack_window *w = *w_cursor; -- 2.11.4.GIT