block: check bdrv_in_use() before blockdev operations
[qemu-kvm.git] / linux-user / mmap.c
blob994c02bb77f3fdc1b9969e73736f2c0ad2e100c8
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <stdarg.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <linux/mman.h>
29 #include <linux/unistd.h>
31 #include "qemu.h"
32 #include "qemu-common.h"
34 //#define DEBUG_MMAP
36 #if defined(CONFIG_USE_NPTL)
37 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
38 static __thread int mmap_lock_count;
40 void mmap_lock(void)
42 if (mmap_lock_count++ == 0) {
43 pthread_mutex_lock(&mmap_mutex);
47 void mmap_unlock(void)
49 if (--mmap_lock_count == 0) {
50 pthread_mutex_unlock(&mmap_mutex);
54 /* Grab lock to make sure things are in a consistent state after fork(). */
55 void mmap_fork_start(void)
57 if (mmap_lock_count)
58 abort();
59 pthread_mutex_lock(&mmap_mutex);
62 void mmap_fork_end(int child)
64 if (child)
65 pthread_mutex_init(&mmap_mutex, NULL);
66 else
67 pthread_mutex_unlock(&mmap_mutex);
69 #else
70 /* We aren't threadsafe to start with, so no need to worry about locking. */
71 void mmap_lock(void)
75 void mmap_unlock(void)
78 #endif
80 /* NOTE: all the constants are the HOST ones, but addresses are target. */
81 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
83 abi_ulong end, host_start, host_end, addr;
84 int prot1, ret;
86 #ifdef DEBUG_MMAP
87 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
88 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
89 prot & PROT_READ ? 'r' : '-',
90 prot & PROT_WRITE ? 'w' : '-',
91 prot & PROT_EXEC ? 'x' : '-');
92 #endif
94 if ((start & ~TARGET_PAGE_MASK) != 0)
95 return -EINVAL;
96 len = TARGET_PAGE_ALIGN(len);
97 end = start + len;
98 if (end < start)
99 return -EINVAL;
100 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
101 if (len == 0)
102 return 0;
104 mmap_lock();
105 host_start = start & qemu_host_page_mask;
106 host_end = HOST_PAGE_ALIGN(end);
107 if (start > host_start) {
108 /* handle host page containing start */
109 prot1 = prot;
110 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
111 prot1 |= page_get_flags(addr);
113 if (host_end == host_start + qemu_host_page_size) {
114 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
115 prot1 |= page_get_flags(addr);
117 end = host_end;
119 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
120 if (ret != 0)
121 goto error;
122 host_start += qemu_host_page_size;
124 if (end < host_end) {
125 prot1 = prot;
126 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
127 prot1 |= page_get_flags(addr);
129 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
130 prot1 & PAGE_BITS);
131 if (ret != 0)
132 goto error;
133 host_end -= qemu_host_page_size;
136 /* handle the pages in the middle */
137 if (host_start < host_end) {
138 ret = mprotect(g2h(host_start), host_end - host_start, prot);
139 if (ret != 0)
140 goto error;
142 page_set_flags(start, start + len, prot | PAGE_VALID);
143 mmap_unlock();
144 return 0;
145 error:
146 mmap_unlock();
147 return ret;
150 /* map an incomplete host page */
151 static int mmap_frag(abi_ulong real_start,
152 abi_ulong start, abi_ulong end,
153 int prot, int flags, int fd, abi_ulong offset)
155 abi_ulong real_end, addr;
156 void *host_start;
157 int prot1, prot_new;
159 real_end = real_start + qemu_host_page_size;
160 host_start = g2h(real_start);
162 /* get the protection of the target pages outside the mapping */
163 prot1 = 0;
164 for(addr = real_start; addr < real_end; addr++) {
165 if (addr < start || addr >= end)
166 prot1 |= page_get_flags(addr);
169 if (prot1 == 0) {
170 /* no page was there, so we allocate one */
171 void *p = mmap(host_start, qemu_host_page_size, prot,
172 flags | MAP_ANONYMOUS, -1, 0);
173 if (p == MAP_FAILED)
174 return -1;
175 prot1 = prot;
177 prot1 &= PAGE_BITS;
179 prot_new = prot | prot1;
180 if (!(flags & MAP_ANONYMOUS)) {
181 /* msync() won't work here, so we return an error if write is
182 possible while it is a shared mapping */
183 if ((flags & MAP_TYPE) == MAP_SHARED &&
184 (prot & PROT_WRITE))
185 return -1;
187 /* adjust protection to be able to read */
188 if (!(prot1 & PROT_WRITE))
189 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
191 /* read the corresponding file data */
192 if (pread(fd, g2h(start), end - start, offset) == -1)
193 return -1;
195 /* put final protection */
196 if (prot_new != (prot1 | PROT_WRITE))
197 mprotect(host_start, qemu_host_page_size, prot_new);
198 } else {
199 /* just update the protection */
200 if (prot_new != prot1) {
201 mprotect(host_start, qemu_host_page_size, prot_new);
204 return 0;
207 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
208 # define TASK_UNMAPPED_BASE (1ul << 38)
209 #elif defined(__CYGWIN__)
210 /* Cygwin doesn't have a whole lot of address space. */
211 # define TASK_UNMAPPED_BASE 0x18000000
212 #else
213 # define TASK_UNMAPPED_BASE 0x40000000
214 #endif
215 static abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
217 unsigned long last_brk;
219 #ifdef CONFIG_USE_GUEST_BASE
220 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
221 of guest address space. */
222 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
224 abi_ulong addr;
225 abi_ulong last_addr;
226 int prot;
227 int looped = 0;
229 if (size > RESERVED_VA) {
230 return (abi_ulong)-1;
233 last_addr = start;
234 for (addr = start; last_addr + size != addr; addr += qemu_host_page_size) {
235 if (last_addr + size >= RESERVED_VA
236 || (abi_ulong)(last_addr + size) < last_addr) {
237 if (looped) {
238 return (abi_ulong)-1;
240 last_addr = qemu_host_page_size;
241 addr = 0;
242 looped = 1;
243 continue;
245 prot = page_get_flags(addr);
246 if (prot) {
247 last_addr = addr + qemu_host_page_size;
250 mmap_next_start = addr;
251 return last_addr;
253 #endif
256 * Find and reserve a free memory area of size 'size'. The search
257 * starts at 'start'.
258 * It must be called with mmap_lock() held.
259 * Return -1 if error.
261 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
263 void *ptr, *prev;
264 abi_ulong addr;
265 int wrapped, repeat;
267 /* If 'start' == 0, then a default start address is used. */
268 if (start == 0) {
269 start = mmap_next_start;
270 } else {
271 start &= qemu_host_page_mask;
274 size = HOST_PAGE_ALIGN(size);
276 #ifdef CONFIG_USE_GUEST_BASE
277 if (RESERVED_VA) {
278 return mmap_find_vma_reserved(start, size);
280 #endif
282 addr = start;
283 wrapped = repeat = 0;
284 prev = 0;
286 for (;; prev = ptr) {
288 * Reserve needed memory area to avoid a race.
289 * It should be discarded using:
290 * - mmap() with MAP_FIXED flag
291 * - mremap() with MREMAP_FIXED flag
292 * - shmat() with SHM_REMAP flag
294 ptr = mmap(g2h(addr), size, PROT_NONE,
295 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
297 /* ENOMEM, if host address space has no memory */
298 if (ptr == MAP_FAILED) {
299 return (abi_ulong)-1;
302 /* Count the number of sequential returns of the same address.
303 This is used to modify the search algorithm below. */
304 repeat = (ptr == prev ? repeat + 1 : 0);
306 if (h2g_valid(ptr + size - 1)) {
307 addr = h2g(ptr);
309 if ((addr & ~TARGET_PAGE_MASK) == 0) {
310 /* Success. */
311 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
312 mmap_next_start = addr + size;
314 return addr;
317 /* The address is not properly aligned for the target. */
318 switch (repeat) {
319 case 0:
320 /* Assume the result that the kernel gave us is the
321 first with enough free space, so start again at the
322 next higher target page. */
323 addr = TARGET_PAGE_ALIGN(addr);
324 break;
325 case 1:
326 /* Sometimes the kernel decides to perform the allocation
327 at the top end of memory instead. */
328 addr &= TARGET_PAGE_MASK;
329 break;
330 case 2:
331 /* Start over at low memory. */
332 addr = 0;
333 break;
334 default:
335 /* Fail. This unaligned block must the last. */
336 addr = -1;
337 break;
339 } else {
340 /* Since the result the kernel gave didn't fit, start
341 again at low memory. If any repetition, fail. */
342 addr = (repeat ? -1 : 0);
345 /* Unmap and try again. */
346 munmap(ptr, size);
348 /* ENOMEM if we checked the whole of the target address space. */
349 if (addr == (abi_ulong)-1) {
350 return (abi_ulong)-1;
351 } else if (addr == 0) {
352 if (wrapped) {
353 return (abi_ulong)-1;
355 wrapped = 1;
356 /* Don't actually use 0 when wrapping, instead indicate
357 that we'd truly like an allocation in low memory. */
358 addr = (mmap_min_addr > TARGET_PAGE_SIZE
359 ? TARGET_PAGE_ALIGN(mmap_min_addr)
360 : TARGET_PAGE_SIZE);
361 } else if (wrapped && addr >= start) {
362 return (abi_ulong)-1;
367 /* NOTE: all the constants are the HOST ones */
368 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
369 int flags, int fd, abi_ulong offset)
371 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
372 unsigned long host_start;
374 mmap_lock();
375 #ifdef DEBUG_MMAP
377 printf("mmap: start=0x" TARGET_ABI_FMT_lx
378 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
379 start, len,
380 prot & PROT_READ ? 'r' : '-',
381 prot & PROT_WRITE ? 'w' : '-',
382 prot & PROT_EXEC ? 'x' : '-');
383 if (flags & MAP_FIXED)
384 printf("MAP_FIXED ");
385 if (flags & MAP_ANONYMOUS)
386 printf("MAP_ANON ");
387 switch(flags & MAP_TYPE) {
388 case MAP_PRIVATE:
389 printf("MAP_PRIVATE ");
390 break;
391 case MAP_SHARED:
392 printf("MAP_SHARED ");
393 break;
394 default:
395 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
396 break;
398 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
400 #endif
402 if (offset & ~TARGET_PAGE_MASK) {
403 errno = EINVAL;
404 goto fail;
407 len = TARGET_PAGE_ALIGN(len);
408 if (len == 0)
409 goto the_end;
410 real_start = start & qemu_host_page_mask;
412 /* When mapping files into a memory area larger than the file, accesses
413 to pages beyond the file size will cause a SIGBUS.
415 For example, if mmaping a file of 100 bytes on a host with 4K pages
416 emulating a target with 8K pages, the target expects to be able to
417 access the first 8K. But the host will trap us on any access beyond
418 4K.
420 When emulating a target with a larger page-size than the hosts, we
421 may need to truncate file maps at EOF and add extra anonymous pages
422 up to the targets page boundary. */
424 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
425 && !(flags & MAP_ANONYMOUS)) {
426 struct stat sb;
428 if (fstat (fd, &sb) == -1)
429 goto fail;
431 /* Are we trying to create a map beyond EOF?. */
432 if (offset + len > sb.st_size) {
433 /* If so, truncate the file map at eof aligned with
434 the hosts real pagesize. Additional anonymous maps
435 will be created beyond EOF. */
436 len = (sb.st_size - offset);
437 len += qemu_real_host_page_size - 1;
438 len &= ~(qemu_real_host_page_size - 1);
442 if (!(flags & MAP_FIXED)) {
443 abi_ulong mmap_start;
444 void *p;
445 host_offset = offset & qemu_host_page_mask;
446 host_len = len + offset - host_offset;
447 host_len = HOST_PAGE_ALIGN(host_len);
448 mmap_start = mmap_find_vma(real_start, host_len);
449 if (mmap_start == (abi_ulong)-1) {
450 errno = ENOMEM;
451 goto fail;
453 /* Note: we prefer to control the mapping address. It is
454 especially important if qemu_host_page_size >
455 qemu_real_host_page_size */
456 p = mmap(g2h(mmap_start),
457 host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
458 if (p == MAP_FAILED)
459 goto fail;
460 /* update start so that it points to the file position at 'offset' */
461 host_start = (unsigned long)p;
462 if (!(flags & MAP_ANONYMOUS)) {
463 p = mmap(g2h(mmap_start), len, prot,
464 flags | MAP_FIXED, fd, host_offset);
465 host_start += offset - host_offset;
467 start = h2g(host_start);
468 } else {
469 if (start & ~TARGET_PAGE_MASK) {
470 errno = EINVAL;
471 goto fail;
473 end = start + len;
474 real_end = HOST_PAGE_ALIGN(end);
477 * Test if requested memory area fits target address space
478 * It can fail only on 64-bit host with 32-bit target.
479 * On any other target/host host mmap() handles this error correctly.
481 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
482 errno = EINVAL;
483 goto fail;
486 /* worst case: we cannot map the file because the offset is not
487 aligned, so we read it */
488 if (!(flags & MAP_ANONYMOUS) &&
489 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
490 /* msync() won't work here, so we return an error if write is
491 possible while it is a shared mapping */
492 if ((flags & MAP_TYPE) == MAP_SHARED &&
493 (prot & PROT_WRITE)) {
494 errno = EINVAL;
495 goto fail;
497 retaddr = target_mmap(start, len, prot | PROT_WRITE,
498 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
499 -1, 0);
500 if (retaddr == -1)
501 goto fail;
502 if (pread(fd, g2h(start), len, offset) == -1)
503 goto fail;
504 if (!(prot & PROT_WRITE)) {
505 ret = target_mprotect(start, len, prot);
506 if (ret != 0) {
507 start = ret;
508 goto the_end;
511 goto the_end;
514 /* handle the start of the mapping */
515 if (start > real_start) {
516 if (real_end == real_start + qemu_host_page_size) {
517 /* one single host page */
518 ret = mmap_frag(real_start, start, end,
519 prot, flags, fd, offset);
520 if (ret == -1)
521 goto fail;
522 goto the_end1;
524 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
525 prot, flags, fd, offset);
526 if (ret == -1)
527 goto fail;
528 real_start += qemu_host_page_size;
530 /* handle the end of the mapping */
531 if (end < real_end) {
532 ret = mmap_frag(real_end - qemu_host_page_size,
533 real_end - qemu_host_page_size, real_end,
534 prot, flags, fd,
535 offset + real_end - qemu_host_page_size - start);
536 if (ret == -1)
537 goto fail;
538 real_end -= qemu_host_page_size;
541 /* map the middle (easier) */
542 if (real_start < real_end) {
543 void *p;
544 unsigned long offset1;
545 if (flags & MAP_ANONYMOUS)
546 offset1 = 0;
547 else
548 offset1 = offset + real_start - start;
549 p = mmap(g2h(real_start), real_end - real_start,
550 prot, flags, fd, offset1);
551 if (p == MAP_FAILED)
552 goto fail;
555 the_end1:
556 page_set_flags(start, start + len, prot | PAGE_VALID);
557 the_end:
558 #ifdef DEBUG_MMAP
559 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
560 page_dump(stdout);
561 printf("\n");
562 #endif
563 mmap_unlock();
564 return start;
565 fail:
566 mmap_unlock();
567 return -1;
570 static void mmap_reserve(abi_ulong start, abi_ulong size)
572 abi_ulong real_start;
573 abi_ulong real_end;
574 abi_ulong addr;
575 abi_ulong end;
576 int prot;
578 real_start = start & qemu_host_page_mask;
579 real_end = HOST_PAGE_ALIGN(start + size);
580 end = start + size;
581 if (start > real_start) {
582 /* handle host page containing start */
583 prot = 0;
584 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
585 prot |= page_get_flags(addr);
587 if (real_end == real_start + qemu_host_page_size) {
588 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
589 prot |= page_get_flags(addr);
591 end = real_end;
593 if (prot != 0)
594 real_start += qemu_host_page_size;
596 if (end < real_end) {
597 prot = 0;
598 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
599 prot |= page_get_flags(addr);
601 if (prot != 0)
602 real_end -= qemu_host_page_size;
604 if (real_start != real_end) {
605 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
606 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
607 -1, 0);
611 int target_munmap(abi_ulong start, abi_ulong len)
613 abi_ulong end, real_start, real_end, addr;
614 int prot, ret;
616 #ifdef DEBUG_MMAP
617 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
618 TARGET_ABI_FMT_lx "\n",
619 start, len);
620 #endif
621 if (start & ~TARGET_PAGE_MASK)
622 return -EINVAL;
623 len = TARGET_PAGE_ALIGN(len);
624 if (len == 0)
625 return -EINVAL;
626 mmap_lock();
627 end = start + len;
628 real_start = start & qemu_host_page_mask;
629 real_end = HOST_PAGE_ALIGN(end);
631 if (start > real_start) {
632 /* handle host page containing start */
633 prot = 0;
634 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
635 prot |= page_get_flags(addr);
637 if (real_end == real_start + qemu_host_page_size) {
638 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
639 prot |= page_get_flags(addr);
641 end = real_end;
643 if (prot != 0)
644 real_start += qemu_host_page_size;
646 if (end < real_end) {
647 prot = 0;
648 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
649 prot |= page_get_flags(addr);
651 if (prot != 0)
652 real_end -= qemu_host_page_size;
655 ret = 0;
656 /* unmap what we can */
657 if (real_start < real_end) {
658 if (RESERVED_VA) {
659 mmap_reserve(real_start, real_end - real_start);
660 } else {
661 ret = munmap(g2h(real_start), real_end - real_start);
665 if (ret == 0)
666 page_set_flags(start, start + len, 0);
667 mmap_unlock();
668 return ret;
671 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
672 abi_ulong new_size, unsigned long flags,
673 abi_ulong new_addr)
675 int prot;
676 void *host_addr;
678 mmap_lock();
680 if (flags & MREMAP_FIXED) {
681 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
682 old_size, new_size,
683 flags,
684 g2h(new_addr));
686 if (RESERVED_VA && host_addr != MAP_FAILED) {
687 /* If new and old addresses overlap then the above mremap will
688 already have failed with EINVAL. */
689 mmap_reserve(old_addr, old_size);
691 } else if (flags & MREMAP_MAYMOVE) {
692 abi_ulong mmap_start;
694 mmap_start = mmap_find_vma(0, new_size);
696 if (mmap_start == -1) {
697 errno = ENOMEM;
698 host_addr = MAP_FAILED;
699 } else {
700 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
701 old_size, new_size,
702 flags | MREMAP_FIXED,
703 g2h(mmap_start));
704 if ( RESERVED_VA ) {
705 mmap_reserve(old_addr, old_size);
708 } else {
709 int prot = 0;
710 if (RESERVED_VA && old_size < new_size) {
711 abi_ulong addr;
712 for (addr = old_addr + old_size;
713 addr < old_addr + new_size;
714 addr++) {
715 prot |= page_get_flags(addr);
718 if (prot == 0) {
719 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
720 if (host_addr != MAP_FAILED && RESERVED_VA && old_size > new_size) {
721 mmap_reserve(old_addr + old_size, new_size - old_size);
723 } else {
724 errno = ENOMEM;
725 host_addr = MAP_FAILED;
727 /* Check if address fits target address space */
728 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
729 /* Revert mremap() changes */
730 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
731 errno = ENOMEM;
732 host_addr = MAP_FAILED;
736 if (host_addr == MAP_FAILED) {
737 new_addr = -1;
738 } else {
739 new_addr = h2g(host_addr);
740 prot = page_get_flags(old_addr);
741 page_set_flags(old_addr, old_addr + old_size, 0);
742 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
744 mmap_unlock();
745 return new_addr;
748 int target_msync(abi_ulong start, abi_ulong len, int flags)
750 abi_ulong end;
752 if (start & ~TARGET_PAGE_MASK)
753 return -EINVAL;
754 len = TARGET_PAGE_ALIGN(len);
755 end = start + len;
756 if (end < start)
757 return -EINVAL;
758 if (end == start)
759 return 0;
761 start &= qemu_host_page_mask;
762 return msync(g2h(start), end - start, flags);