qcow2: Factor next_refcount_table_size out
[qemu.git] / linux-user / mmap.c
blob25fc0b2959ef28a2b5b701e04c9849d4732fb34b
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <stdarg.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <linux/mman.h>
29 #include <linux/unistd.h>
31 #include "qemu.h"
32 #include "qemu-common.h"
34 //#define DEBUG_MMAP
36 #if defined(CONFIG_USE_NPTL)
37 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
38 static __thread int mmap_lock_count;
40 void mmap_lock(void)
42 if (mmap_lock_count++ == 0) {
43 pthread_mutex_lock(&mmap_mutex);
47 void mmap_unlock(void)
49 if (--mmap_lock_count == 0) {
50 pthread_mutex_unlock(&mmap_mutex);
54 /* Grab lock to make sure things are in a consistent state after fork(). */
55 void mmap_fork_start(void)
57 if (mmap_lock_count)
58 abort();
59 pthread_mutex_lock(&mmap_mutex);
62 void mmap_fork_end(int child)
64 if (child)
65 pthread_mutex_init(&mmap_mutex, NULL);
66 else
67 pthread_mutex_unlock(&mmap_mutex);
69 #else
70 /* We aren't threadsafe to start with, so no need to worry about locking. */
71 void mmap_lock(void)
75 void mmap_unlock(void)
78 #endif
80 void *qemu_vmalloc(size_t size)
82 void *p;
83 unsigned long addr;
84 mmap_lock();
85 /* Use map and mark the pages as used. */
86 p = mmap(NULL, size, PROT_READ | PROT_WRITE,
87 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
89 addr = (unsigned long)p;
90 if (addr == (target_ulong) addr) {
91 /* Allocated region overlaps guest address space.
92 This may recurse. */
93 page_set_flags(addr & TARGET_PAGE_MASK, TARGET_PAGE_ALIGN(addr + size),
94 PAGE_RESERVED);
97 mmap_unlock();
98 return p;
101 void *qemu_malloc(size_t size)
103 char * p;
104 size += 16;
105 p = qemu_vmalloc(size);
106 *(size_t *)p = size;
107 return p + 16;
110 /* We use map, which is always zero initialized. */
111 void * qemu_mallocz(size_t size)
113 return qemu_malloc(size);
116 void qemu_free(void *ptr)
118 /* FIXME: We should unmark the reserved pages here. However this gets
119 complicated when one target page spans multiple host pages, so we
120 don't bother. */
121 size_t *p;
122 p = (size_t *)((char *)ptr - 16);
123 munmap(p, *p);
126 void *qemu_realloc(void *ptr, size_t size)
128 size_t old_size, copy;
129 void *new_ptr;
131 if (!ptr)
132 return qemu_malloc(size);
133 old_size = *(size_t *)((char *)ptr - 16);
134 copy = old_size < size ? old_size : size;
135 new_ptr = qemu_malloc(size);
136 memcpy(new_ptr, ptr, copy);
137 qemu_free(ptr);
138 return new_ptr;
141 /* NOTE: all the constants are the HOST ones, but addresses are target. */
142 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
144 abi_ulong end, host_start, host_end, addr;
145 int prot1, ret;
147 #ifdef DEBUG_MMAP
148 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
149 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
150 prot & PROT_READ ? 'r' : '-',
151 prot & PROT_WRITE ? 'w' : '-',
152 prot & PROT_EXEC ? 'x' : '-');
153 #endif
155 if ((start & ~TARGET_PAGE_MASK) != 0)
156 return -EINVAL;
157 len = TARGET_PAGE_ALIGN(len);
158 end = start + len;
159 if (end < start)
160 return -EINVAL;
161 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
162 if (len == 0)
163 return 0;
165 mmap_lock();
166 host_start = start & qemu_host_page_mask;
167 host_end = HOST_PAGE_ALIGN(end);
168 if (start > host_start) {
169 /* handle host page containing start */
170 prot1 = prot;
171 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
172 prot1 |= page_get_flags(addr);
174 if (host_end == host_start + qemu_host_page_size) {
175 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
176 prot1 |= page_get_flags(addr);
178 end = host_end;
180 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
181 if (ret != 0)
182 goto error;
183 host_start += qemu_host_page_size;
185 if (end < host_end) {
186 prot1 = prot;
187 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
188 prot1 |= page_get_flags(addr);
190 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
191 prot1 & PAGE_BITS);
192 if (ret != 0)
193 goto error;
194 host_end -= qemu_host_page_size;
197 /* handle the pages in the middle */
198 if (host_start < host_end) {
199 ret = mprotect(g2h(host_start), host_end - host_start, prot);
200 if (ret != 0)
201 goto error;
203 page_set_flags(start, start + len, prot | PAGE_VALID);
204 mmap_unlock();
205 return 0;
206 error:
207 mmap_unlock();
208 return ret;
211 /* map an incomplete host page */
212 static int mmap_frag(abi_ulong real_start,
213 abi_ulong start, abi_ulong end,
214 int prot, int flags, int fd, abi_ulong offset)
216 abi_ulong real_end, addr;
217 void *host_start;
218 int prot1, prot_new;
220 real_end = real_start + qemu_host_page_size;
221 host_start = g2h(real_start);
223 /* get the protection of the target pages outside the mapping */
224 prot1 = 0;
225 for(addr = real_start; addr < real_end; addr++) {
226 if (addr < start || addr >= end)
227 prot1 |= page_get_flags(addr);
230 if (prot1 == 0) {
231 /* no page was there, so we allocate one */
232 void *p = mmap(host_start, qemu_host_page_size, prot,
233 flags | MAP_ANONYMOUS, -1, 0);
234 if (p == MAP_FAILED)
235 return -1;
236 prot1 = prot;
238 prot1 &= PAGE_BITS;
240 prot_new = prot | prot1;
241 if (!(flags & MAP_ANONYMOUS)) {
242 /* msync() won't work here, so we return an error if write is
243 possible while it is a shared mapping */
244 if ((flags & MAP_TYPE) == MAP_SHARED &&
245 (prot & PROT_WRITE))
246 return -1;
248 /* adjust protection to be able to read */
249 if (!(prot1 & PROT_WRITE))
250 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
252 /* read the corresponding file data */
253 if (pread(fd, g2h(start), end - start, offset) == -1)
254 return -1;
256 /* put final protection */
257 if (prot_new != (prot1 | PROT_WRITE))
258 mprotect(host_start, qemu_host_page_size, prot_new);
259 } else {
260 /* just update the protection */
261 if (prot_new != prot1) {
262 mprotect(host_start, qemu_host_page_size, prot_new);
265 return 0;
268 #if defined(__CYGWIN__)
269 /* Cygwin doesn't have a whole lot of address space. */
270 static abi_ulong mmap_next_start = 0x18000000;
271 #else
272 static abi_ulong mmap_next_start = 0x40000000;
273 #endif
275 unsigned long last_brk;
278 * Find and reserve a free memory area of size 'size'. The search
279 * starts at 'start'.
280 * It must be called with mmap_lock() held.
281 * Return -1 if error.
283 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
285 void *ptr;
286 abi_ulong addr;
288 size = HOST_PAGE_ALIGN(size);
289 start &= qemu_host_page_mask;
291 /* If 'start' == 0, then a default start address is used. */
292 if (start == 0)
293 start = mmap_next_start;
295 addr = start;
297 for(;;) {
299 * Reserve needed memory area to avoid a race.
300 * It should be discarded using:
301 * - mmap() with MAP_FIXED flag
302 * - mremap() with MREMAP_FIXED flag
303 * - shmat() with SHM_REMAP flag
305 ptr = mmap((void *)(unsigned long)addr, size, PROT_NONE,
306 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
308 /* ENOMEM, if host address space has no memory */
309 if (ptr == MAP_FAILED)
310 return (abi_ulong)-1;
312 /* If address fits target address space we've found what we need */
313 if ((unsigned long)ptr + size - 1 <= (abi_ulong)-1)
314 break;
316 /* Unmap and try again with new page */
317 munmap(ptr, size);
318 addr += qemu_host_page_size;
320 /* ENOMEM if we check whole of target address space */
321 if (addr == start)
322 return (abi_ulong)-1;
325 /* Update default start address */
326 if (start == mmap_next_start)
327 mmap_next_start = (unsigned long)ptr + size;
329 return h2g(ptr);
332 /* NOTE: all the constants are the HOST ones */
333 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
334 int flags, int fd, abi_ulong offset)
336 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
337 unsigned long host_start;
339 mmap_lock();
340 #ifdef DEBUG_MMAP
342 printf("mmap: start=0x" TARGET_ABI_FMT_lx
343 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
344 start, len,
345 prot & PROT_READ ? 'r' : '-',
346 prot & PROT_WRITE ? 'w' : '-',
347 prot & PROT_EXEC ? 'x' : '-');
348 if (flags & MAP_FIXED)
349 printf("MAP_FIXED ");
350 if (flags & MAP_ANONYMOUS)
351 printf("MAP_ANON ");
352 switch(flags & MAP_TYPE) {
353 case MAP_PRIVATE:
354 printf("MAP_PRIVATE ");
355 break;
356 case MAP_SHARED:
357 printf("MAP_SHARED ");
358 break;
359 default:
360 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
361 break;
363 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
365 #endif
367 if (offset & ~TARGET_PAGE_MASK) {
368 errno = EINVAL;
369 goto fail;
372 len = TARGET_PAGE_ALIGN(len);
373 if (len == 0)
374 goto the_end;
375 real_start = start & qemu_host_page_mask;
377 /* When mapping files into a memory area larger than the file, accesses
378 to pages beyond the file size will cause a SIGBUS.
380 For example, if mmaping a file of 100 bytes on a host with 4K pages
381 emulating a target with 8K pages, the target expects to be able to
382 access the first 8K. But the host will trap us on any access beyond
383 4K.
385 When emulating a target with a larger page-size than the hosts, we
386 may need to truncate file maps at EOF and add extra anonymous pages
387 up to the targets page boundary. */
389 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
390 && !(flags & MAP_ANONYMOUS)) {
391 struct stat sb;
393 if (fstat (fd, &sb) == -1)
394 goto fail;
396 /* Are we trying to create a map beyond EOF?. */
397 if (offset + len > sb.st_size) {
398 /* If so, truncate the file map at eof aligned with
399 the hosts real pagesize. Additional anonymous maps
400 will be created beyond EOF. */
401 len = (sb.st_size - offset);
402 len += qemu_real_host_page_size - 1;
403 len &= ~(qemu_real_host_page_size - 1);
407 if (!(flags & MAP_FIXED)) {
408 abi_ulong mmap_start;
409 void *p;
410 host_offset = offset & qemu_host_page_mask;
411 host_len = len + offset - host_offset;
412 host_len = HOST_PAGE_ALIGN(host_len);
413 mmap_start = mmap_find_vma(real_start, host_len);
414 if (mmap_start == (abi_ulong)-1) {
415 errno = ENOMEM;
416 goto fail;
418 /* Note: we prefer to control the mapping address. It is
419 especially important if qemu_host_page_size >
420 qemu_real_host_page_size */
421 p = mmap(g2h(mmap_start),
422 host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
423 if (p == MAP_FAILED)
424 goto fail;
425 /* update start so that it points to the file position at 'offset' */
426 host_start = (unsigned long)p;
427 if (!(flags & MAP_ANONYMOUS)) {
428 p = mmap(g2h(mmap_start), len, prot,
429 flags | MAP_FIXED, fd, host_offset);
430 host_start += offset - host_offset;
432 start = h2g(host_start);
433 } else {
434 int flg;
435 target_ulong addr;
437 if (start & ~TARGET_PAGE_MASK) {
438 errno = EINVAL;
439 goto fail;
441 end = start + len;
442 real_end = HOST_PAGE_ALIGN(end);
445 * Test if requested memory area fits target address space
446 * It can fail only on 64-bit host with 32-bit target.
447 * On any other target/host host mmap() handles this error correctly.
449 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
450 errno = EINVAL;
451 goto fail;
454 for(addr = real_start; addr < real_end; addr += TARGET_PAGE_SIZE) {
455 flg = page_get_flags(addr);
456 if (flg & PAGE_RESERVED) {
457 errno = ENXIO;
458 goto fail;
462 /* worst case: we cannot map the file because the offset is not
463 aligned, so we read it */
464 if (!(flags & MAP_ANONYMOUS) &&
465 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
466 /* msync() won't work here, so we return an error if write is
467 possible while it is a shared mapping */
468 if ((flags & MAP_TYPE) == MAP_SHARED &&
469 (prot & PROT_WRITE)) {
470 errno = EINVAL;
471 goto fail;
473 retaddr = target_mmap(start, len, prot | PROT_WRITE,
474 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
475 -1, 0);
476 if (retaddr == -1)
477 goto fail;
478 if (pread(fd, g2h(start), len, offset) == -1)
479 goto fail;
480 if (!(prot & PROT_WRITE)) {
481 ret = target_mprotect(start, len, prot);
482 if (ret != 0) {
483 start = ret;
484 goto the_end;
487 goto the_end;
490 /* handle the start of the mapping */
491 if (start > real_start) {
492 if (real_end == real_start + qemu_host_page_size) {
493 /* one single host page */
494 ret = mmap_frag(real_start, start, end,
495 prot, flags, fd, offset);
496 if (ret == -1)
497 goto fail;
498 goto the_end1;
500 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
501 prot, flags, fd, offset);
502 if (ret == -1)
503 goto fail;
504 real_start += qemu_host_page_size;
506 /* handle the end of the mapping */
507 if (end < real_end) {
508 ret = mmap_frag(real_end - qemu_host_page_size,
509 real_end - qemu_host_page_size, real_end,
510 prot, flags, fd,
511 offset + real_end - qemu_host_page_size - start);
512 if (ret == -1)
513 goto fail;
514 real_end -= qemu_host_page_size;
517 /* map the middle (easier) */
518 if (real_start < real_end) {
519 void *p;
520 unsigned long offset1;
521 if (flags & MAP_ANONYMOUS)
522 offset1 = 0;
523 else
524 offset1 = offset + real_start - start;
525 p = mmap(g2h(real_start), real_end - real_start,
526 prot, flags, fd, offset1);
527 if (p == MAP_FAILED)
528 goto fail;
531 the_end1:
532 page_set_flags(start, start + len, prot | PAGE_VALID);
533 the_end:
534 #ifdef DEBUG_MMAP
535 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
536 page_dump(stdout);
537 printf("\n");
538 #endif
539 mmap_unlock();
540 return start;
541 fail:
542 mmap_unlock();
543 return -1;
546 int target_munmap(abi_ulong start, abi_ulong len)
548 abi_ulong end, real_start, real_end, addr;
549 int prot, ret;
551 #ifdef DEBUG_MMAP
552 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
553 TARGET_ABI_FMT_lx "\n",
554 start, len);
555 #endif
556 if (start & ~TARGET_PAGE_MASK)
557 return -EINVAL;
558 len = TARGET_PAGE_ALIGN(len);
559 if (len == 0)
560 return -EINVAL;
561 mmap_lock();
562 end = start + len;
563 real_start = start & qemu_host_page_mask;
564 real_end = HOST_PAGE_ALIGN(end);
566 if (start > real_start) {
567 /* handle host page containing start */
568 prot = 0;
569 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
570 prot |= page_get_flags(addr);
572 if (real_end == real_start + qemu_host_page_size) {
573 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
574 prot |= page_get_flags(addr);
576 end = real_end;
578 if (prot != 0)
579 real_start += qemu_host_page_size;
581 if (end < real_end) {
582 prot = 0;
583 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
584 prot |= page_get_flags(addr);
586 if (prot != 0)
587 real_end -= qemu_host_page_size;
590 ret = 0;
591 /* unmap what we can */
592 if (real_start < real_end) {
593 ret = munmap(g2h(real_start), real_end - real_start);
596 if (ret == 0)
597 page_set_flags(start, start + len, 0);
598 mmap_unlock();
599 return ret;
602 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
603 abi_ulong new_size, unsigned long flags,
604 abi_ulong new_addr)
606 int prot;
607 void *host_addr;
609 mmap_lock();
611 if (flags & MREMAP_FIXED)
612 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
613 old_size, new_size,
614 flags,
615 new_addr);
616 else if (flags & MREMAP_MAYMOVE) {
617 abi_ulong mmap_start;
619 mmap_start = mmap_find_vma(0, new_size);
621 if (mmap_start == -1) {
622 errno = ENOMEM;
623 host_addr = MAP_FAILED;
624 } else
625 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
626 old_size, new_size,
627 flags | MREMAP_FIXED,
628 g2h(mmap_start));
629 } else {
630 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
631 /* Check if address fits target address space */
632 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
633 /* Revert mremap() changes */
634 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
635 errno = ENOMEM;
636 host_addr = MAP_FAILED;
640 if (host_addr == MAP_FAILED) {
641 new_addr = -1;
642 } else {
643 new_addr = h2g(host_addr);
644 prot = page_get_flags(old_addr);
645 page_set_flags(old_addr, old_addr + old_size, 0);
646 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
648 mmap_unlock();
649 return new_addr;
652 int target_msync(abi_ulong start, abi_ulong len, int flags)
654 abi_ulong end;
656 if (start & ~TARGET_PAGE_MASK)
657 return -EINVAL;
658 len = TARGET_PAGE_ALIGN(len);
659 end = start + len;
660 if (end < start)
661 return -EINVAL;
662 if (end == start)
663 return 0;
665 start &= qemu_host_page_mask;
666 return msync(g2h(start), end - start, flags);