2 * Support for RAM backed by mmaped host memory.
4 * Copyright (c) 2015 Red Hat, Inc.
7 * Michael S. Tsirkin <mst@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
14 #include <linux/mman.h>
15 #else /* !CONFIG_LINUX */
17 #define MAP_SHARED_VALIDATE 0x0
18 #endif /* CONFIG_LINUX */
20 #include "qemu/osdep.h"
21 #include "qemu/mmap-alloc.h"
22 #include "qemu/host-utils.h"
24 #define HUGETLBFS_MAGIC 0x958458f6
30 size_t qemu_fd_getpagesize(int fd
)
38 ret
= fstatfs(fd
, &fs
);
39 } while (ret
!= 0 && errno
== EINTR
);
41 if (ret
== 0 && fs
.f_type
== HUGETLBFS_MAGIC
) {
46 /* SPARC Linux needs greater alignment than the pagesize */
47 return QEMU_VMALLOC_ALIGN
;
51 return qemu_real_host_page_size
;
54 size_t qemu_mempath_getpagesize(const char *mem_path
)
62 ret
= statfs(mem_path
, &fs
);
63 } while (ret
!= 0 && errno
== EINTR
);
66 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
71 if (fs
.f_type
== HUGETLBFS_MAGIC
) {
72 /* It's hugepage, return the huge page size */
77 /* SPARC Linux needs greater alignment than the pagesize */
78 return QEMU_VMALLOC_ALIGN
;
82 return qemu_real_host_page_size
;
85 static inline size_t mmap_guard_pagesize(int fd
)
87 #if defined(__powerpc64__) && defined(__linux__)
88 /* Mappings in the same segment must share the same page size */
89 return qemu_fd_getpagesize(fd
);
91 return qemu_real_host_page_size
;
95 void *qemu_ram_mmap(int fd
,
103 const size_t guard_pagesize
= mmap_guard_pagesize(fd
);
106 int map_sync_flags
= 0;
114 * Note: this always allocates at least one extra page of virtual address
115 * space, even if size is already aligned.
117 total
= size
+ align
;
119 #if defined(__powerpc64__) && defined(__linux__)
120 /* On ppc64 mappings in the same segment (aka slice) must share the same
121 * page size. Since we will be re-allocating part of this segment
122 * from the supplied fd, we should make sure to use the same page size, to
123 * this end we mmap the supplied fd. In this case, set MAP_NORESERVE to
124 * avoid allocating backing store memory.
125 * We do this unless we are using the system page size, in which case
126 * anonymous memory is OK.
129 if (fd
== -1 || guard_pagesize
== qemu_real_host_page_size
) {
131 flags
|= MAP_ANONYMOUS
;
134 flags
|= MAP_NORESERVE
;
138 flags
= MAP_PRIVATE
| MAP_ANONYMOUS
;
141 guardptr
= mmap(0, total
, PROT_NONE
, flags
, guardfd
, 0);
143 if (guardptr
== MAP_FAILED
) {
147 assert(is_power_of_2(align
));
148 /* Always align to host page size */
149 assert(align
>= guard_pagesize
);
152 flags
|= fd
== -1 ? MAP_ANONYMOUS
: 0;
153 flags
|= shared
? MAP_SHARED
: MAP_PRIVATE
;
154 if (shared
&& is_pmem
) {
155 map_sync_flags
= MAP_SYNC
| MAP_SHARED_VALIDATE
;
158 offset
= QEMU_ALIGN_UP((uintptr_t)guardptr
, align
) - (uintptr_t)guardptr
;
160 prot
= PROT_READ
| (readonly
? 0 : PROT_WRITE
);
162 ptr
= mmap(guardptr
+ offset
, size
, prot
,
163 flags
| map_sync_flags
, fd
, map_offset
);
165 if (ptr
== MAP_FAILED
&& map_sync_flags
) {
166 if (errno
== ENOTSUP
) {
167 char *proc_link
, *file_name
;
169 proc_link
= g_strdup_printf("/proc/self/fd/%d", fd
);
170 file_name
= g_malloc0(PATH_MAX
);
171 len
= readlink(proc_link
, file_name
, PATH_MAX
- 1);
175 file_name
[len
] = '\0';
176 fprintf(stderr
, "Warning: requesting persistence across crashes "
177 "for backend file %s failed. Proceeding without "
178 "persistence, data might become corrupted in case of host "
179 "crash.\n", file_name
);
184 * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
185 * we will remove these flags to handle compatibility.
187 ptr
= mmap(guardptr
+ offset
, size
, prot
, flags
, fd
, map_offset
);
190 if (ptr
== MAP_FAILED
) {
191 munmap(guardptr
, total
);
196 munmap(guardptr
, offset
);
200 * Leave a single PROT_NONE page allocated after the RAM block, to serve as
201 * a guard page guarding against potential buffer overflows.
204 if (total
> size
+ guard_pagesize
) {
205 munmap(ptr
+ size
+ guard_pagesize
, total
- size
- guard_pagesize
);
211 void qemu_ram_munmap(int fd
, void *ptr
, size_t size
)
214 /* Unmap both the RAM block and the guard page */
215 munmap(ptr
, size
+ mmap_guard_pagesize(fd
));