tracetool: Rewrite infrastructure as python modules
[qemu/qmp-unstable.git] / linux-user / mmap.c
blob7125d1cd4bc2d1c8b262caf4f4b009b961e2a376
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <stdarg.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <linux/mman.h>
29 #include <linux/unistd.h>
31 #include "qemu.h"
32 #include "qemu-common.h"
34 //#define DEBUG_MMAP
36 #if defined(CONFIG_USE_NPTL)
37 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
38 static __thread int mmap_lock_count;
40 void mmap_lock(void)
42 if (mmap_lock_count++ == 0) {
43 pthread_mutex_lock(&mmap_mutex);
47 void mmap_unlock(void)
49 if (--mmap_lock_count == 0) {
50 pthread_mutex_unlock(&mmap_mutex);
54 /* Grab lock to make sure things are in a consistent state after fork(). */
55 void mmap_fork_start(void)
57 if (mmap_lock_count)
58 abort();
59 pthread_mutex_lock(&mmap_mutex);
62 void mmap_fork_end(int child)
64 if (child)
65 pthread_mutex_init(&mmap_mutex, NULL);
66 else
67 pthread_mutex_unlock(&mmap_mutex);
69 #else
70 /* We aren't threadsafe to start with, so no need to worry about locking. */
71 void mmap_lock(void)
75 void mmap_unlock(void)
78 #endif
80 /* NOTE: all the constants are the HOST ones, but addresses are target. */
81 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
83 abi_ulong end, host_start, host_end, addr;
84 int prot1, ret;
86 #ifdef DEBUG_MMAP
87 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
88 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
89 prot & PROT_READ ? 'r' : '-',
90 prot & PROT_WRITE ? 'w' : '-',
91 prot & PROT_EXEC ? 'x' : '-');
92 #endif
94 if ((start & ~TARGET_PAGE_MASK) != 0)
95 return -EINVAL;
96 len = TARGET_PAGE_ALIGN(len);
97 end = start + len;
98 if (end < start)
99 return -EINVAL;
100 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
101 if (len == 0)
102 return 0;
104 mmap_lock();
105 host_start = start & qemu_host_page_mask;
106 host_end = HOST_PAGE_ALIGN(end);
107 if (start > host_start) {
108 /* handle host page containing start */
109 prot1 = prot;
110 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
111 prot1 |= page_get_flags(addr);
113 if (host_end == host_start + qemu_host_page_size) {
114 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
115 prot1 |= page_get_flags(addr);
117 end = host_end;
119 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
120 if (ret != 0)
121 goto error;
122 host_start += qemu_host_page_size;
124 if (end < host_end) {
125 prot1 = prot;
126 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
127 prot1 |= page_get_flags(addr);
129 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
130 prot1 & PAGE_BITS);
131 if (ret != 0)
132 goto error;
133 host_end -= qemu_host_page_size;
136 /* handle the pages in the middle */
137 if (host_start < host_end) {
138 ret = mprotect(g2h(host_start), host_end - host_start, prot);
139 if (ret != 0)
140 goto error;
142 page_set_flags(start, start + len, prot | PAGE_VALID);
143 mmap_unlock();
144 return 0;
145 error:
146 mmap_unlock();
147 return ret;
150 /* map an incomplete host page */
151 static int mmap_frag(abi_ulong real_start,
152 abi_ulong start, abi_ulong end,
153 int prot, int flags, int fd, abi_ulong offset)
155 abi_ulong real_end, addr;
156 void *host_start;
157 int prot1, prot_new;
159 real_end = real_start + qemu_host_page_size;
160 host_start = g2h(real_start);
162 /* get the protection of the target pages outside the mapping */
163 prot1 = 0;
164 for(addr = real_start; addr < real_end; addr++) {
165 if (addr < start || addr >= end)
166 prot1 |= page_get_flags(addr);
169 if (prot1 == 0) {
170 /* no page was there, so we allocate one */
171 void *p = mmap(host_start, qemu_host_page_size, prot,
172 flags | MAP_ANONYMOUS, -1, 0);
173 if (p == MAP_FAILED)
174 return -1;
175 prot1 = prot;
177 prot1 &= PAGE_BITS;
179 prot_new = prot | prot1;
180 if (!(flags & MAP_ANONYMOUS)) {
181 /* msync() won't work here, so we return an error if write is
182 possible while it is a shared mapping */
183 if ((flags & MAP_TYPE) == MAP_SHARED &&
184 (prot & PROT_WRITE))
185 return -1;
187 /* adjust protection to be able to read */
188 if (!(prot1 & PROT_WRITE))
189 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
191 /* read the corresponding file data */
192 if (pread(fd, g2h(start), end - start, offset) == -1)
193 return -1;
195 /* put final protection */
196 if (prot_new != (prot1 | PROT_WRITE))
197 mprotect(host_start, qemu_host_page_size, prot_new);
198 } else {
199 /* just update the protection */
200 if (prot_new != prot1) {
201 mprotect(host_start, qemu_host_page_size, prot_new);
204 return 0;
207 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
208 # define TASK_UNMAPPED_BASE (1ul << 38)
209 #elif defined(__CYGWIN__)
210 /* Cygwin doesn't have a whole lot of address space. */
211 # define TASK_UNMAPPED_BASE 0x18000000
212 #else
213 # define TASK_UNMAPPED_BASE 0x40000000
214 #endif
215 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
217 unsigned long last_brk;
219 #ifdef CONFIG_USE_GUEST_BASE
220 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
221 of guest address space. */
222 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
224 abi_ulong addr;
225 abi_ulong end_addr;
226 int prot;
227 int looped = 0;
229 if (size > RESERVED_VA) {
230 return (abi_ulong)-1;
233 size = HOST_PAGE_ALIGN(size);
234 end_addr = start + size;
235 if (end_addr > RESERVED_VA) {
236 end_addr = RESERVED_VA;
238 addr = end_addr - qemu_host_page_size;
240 while (1) {
241 if (addr > end_addr) {
242 if (looped) {
243 return (abi_ulong)-1;
245 end_addr = RESERVED_VA;
246 addr = end_addr - qemu_host_page_size;
247 looped = 1;
248 continue;
250 prot = page_get_flags(addr);
251 if (prot) {
252 end_addr = addr;
254 if (addr + size == end_addr) {
255 break;
257 addr -= qemu_host_page_size;
260 if (start == mmap_next_start) {
261 mmap_next_start = addr;
264 return addr;
266 #endif
269 * Find and reserve a free memory area of size 'size'. The search
270 * starts at 'start'.
271 * It must be called with mmap_lock() held.
272 * Return -1 if error.
274 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
276 void *ptr, *prev;
277 abi_ulong addr;
278 int wrapped, repeat;
280 /* If 'start' == 0, then a default start address is used. */
281 if (start == 0) {
282 start = mmap_next_start;
283 } else {
284 start &= qemu_host_page_mask;
287 size = HOST_PAGE_ALIGN(size);
289 #ifdef CONFIG_USE_GUEST_BASE
290 if (RESERVED_VA) {
291 return mmap_find_vma_reserved(start, size);
293 #endif
295 addr = start;
296 wrapped = repeat = 0;
297 prev = 0;
299 for (;; prev = ptr) {
301 * Reserve needed memory area to avoid a race.
302 * It should be discarded using:
303 * - mmap() with MAP_FIXED flag
304 * - mremap() with MREMAP_FIXED flag
305 * - shmat() with SHM_REMAP flag
307 ptr = mmap(g2h(addr), size, PROT_NONE,
308 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
310 /* ENOMEM, if host address space has no memory */
311 if (ptr == MAP_FAILED) {
312 return (abi_ulong)-1;
315 /* Count the number of sequential returns of the same address.
316 This is used to modify the search algorithm below. */
317 repeat = (ptr == prev ? repeat + 1 : 0);
319 if (h2g_valid(ptr + size - 1)) {
320 addr = h2g(ptr);
322 if ((addr & ~TARGET_PAGE_MASK) == 0) {
323 /* Success. */
324 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
325 mmap_next_start = addr + size;
327 return addr;
330 /* The address is not properly aligned for the target. */
331 switch (repeat) {
332 case 0:
333 /* Assume the result that the kernel gave us is the
334 first with enough free space, so start again at the
335 next higher target page. */
336 addr = TARGET_PAGE_ALIGN(addr);
337 break;
338 case 1:
339 /* Sometimes the kernel decides to perform the allocation
340 at the top end of memory instead. */
341 addr &= TARGET_PAGE_MASK;
342 break;
343 case 2:
344 /* Start over at low memory. */
345 addr = 0;
346 break;
347 default:
348 /* Fail. This unaligned block must the last. */
349 addr = -1;
350 break;
352 } else {
353 /* Since the result the kernel gave didn't fit, start
354 again at low memory. If any repetition, fail. */
355 addr = (repeat ? -1 : 0);
358 /* Unmap and try again. */
359 munmap(ptr, size);
361 /* ENOMEM if we checked the whole of the target address space. */
362 if (addr == (abi_ulong)-1) {
363 return (abi_ulong)-1;
364 } else if (addr == 0) {
365 if (wrapped) {
366 return (abi_ulong)-1;
368 wrapped = 1;
369 /* Don't actually use 0 when wrapping, instead indicate
370 that we'd truly like an allocation in low memory. */
371 addr = (mmap_min_addr > TARGET_PAGE_SIZE
372 ? TARGET_PAGE_ALIGN(mmap_min_addr)
373 : TARGET_PAGE_SIZE);
374 } else if (wrapped && addr >= start) {
375 return (abi_ulong)-1;
380 /* NOTE: all the constants are the HOST ones */
381 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
382 int flags, int fd, abi_ulong offset)
384 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
385 unsigned long host_start;
387 mmap_lock();
388 #ifdef DEBUG_MMAP
390 printf("mmap: start=0x" TARGET_ABI_FMT_lx
391 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
392 start, len,
393 prot & PROT_READ ? 'r' : '-',
394 prot & PROT_WRITE ? 'w' : '-',
395 prot & PROT_EXEC ? 'x' : '-');
396 if (flags & MAP_FIXED)
397 printf("MAP_FIXED ");
398 if (flags & MAP_ANONYMOUS)
399 printf("MAP_ANON ");
400 switch(flags & MAP_TYPE) {
401 case MAP_PRIVATE:
402 printf("MAP_PRIVATE ");
403 break;
404 case MAP_SHARED:
405 printf("MAP_SHARED ");
406 break;
407 default:
408 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
409 break;
411 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
413 #endif
415 if (offset & ~TARGET_PAGE_MASK) {
416 errno = EINVAL;
417 goto fail;
420 len = TARGET_PAGE_ALIGN(len);
421 if (len == 0)
422 goto the_end;
423 real_start = start & qemu_host_page_mask;
425 /* When mapping files into a memory area larger than the file, accesses
426 to pages beyond the file size will cause a SIGBUS.
428 For example, if mmaping a file of 100 bytes on a host with 4K pages
429 emulating a target with 8K pages, the target expects to be able to
430 access the first 8K. But the host will trap us on any access beyond
431 4K.
433 When emulating a target with a larger page-size than the hosts, we
434 may need to truncate file maps at EOF and add extra anonymous pages
435 up to the targets page boundary. */
437 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
438 && !(flags & MAP_ANONYMOUS)) {
439 struct stat sb;
441 if (fstat (fd, &sb) == -1)
442 goto fail;
444 /* Are we trying to create a map beyond EOF?. */
445 if (offset + len > sb.st_size) {
446 /* If so, truncate the file map at eof aligned with
447 the hosts real pagesize. Additional anonymous maps
448 will be created beyond EOF. */
449 len = (sb.st_size - offset);
450 len += qemu_real_host_page_size - 1;
451 len &= ~(qemu_real_host_page_size - 1);
455 if (!(flags & MAP_FIXED)) {
456 abi_ulong mmap_start;
457 void *p;
458 host_offset = offset & qemu_host_page_mask;
459 host_len = len + offset - host_offset;
460 host_len = HOST_PAGE_ALIGN(host_len);
461 mmap_start = mmap_find_vma(real_start, host_len);
462 if (mmap_start == (abi_ulong)-1) {
463 errno = ENOMEM;
464 goto fail;
466 /* Note: we prefer to control the mapping address. It is
467 especially important if qemu_host_page_size >
468 qemu_real_host_page_size */
469 p = mmap(g2h(mmap_start),
470 host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
471 if (p == MAP_FAILED)
472 goto fail;
473 /* update start so that it points to the file position at 'offset' */
474 host_start = (unsigned long)p;
475 if (!(flags & MAP_ANONYMOUS)) {
476 p = mmap(g2h(mmap_start), len, prot,
477 flags | MAP_FIXED, fd, host_offset);
478 host_start += offset - host_offset;
480 start = h2g(host_start);
481 } else {
482 if (start & ~TARGET_PAGE_MASK) {
483 errno = EINVAL;
484 goto fail;
486 end = start + len;
487 real_end = HOST_PAGE_ALIGN(end);
490 * Test if requested memory area fits target address space
491 * It can fail only on 64-bit host with 32-bit target.
492 * On any other target/host host mmap() handles this error correctly.
494 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
495 errno = EINVAL;
496 goto fail;
499 /* worst case: we cannot map the file because the offset is not
500 aligned, so we read it */
501 if (!(flags & MAP_ANONYMOUS) &&
502 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
503 /* msync() won't work here, so we return an error if write is
504 possible while it is a shared mapping */
505 if ((flags & MAP_TYPE) == MAP_SHARED &&
506 (prot & PROT_WRITE)) {
507 errno = EINVAL;
508 goto fail;
510 retaddr = target_mmap(start, len, prot | PROT_WRITE,
511 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
512 -1, 0);
513 if (retaddr == -1)
514 goto fail;
515 if (pread(fd, g2h(start), len, offset) == -1)
516 goto fail;
517 if (!(prot & PROT_WRITE)) {
518 ret = target_mprotect(start, len, prot);
519 if (ret != 0) {
520 start = ret;
521 goto the_end;
524 goto the_end;
527 /* handle the start of the mapping */
528 if (start > real_start) {
529 if (real_end == real_start + qemu_host_page_size) {
530 /* one single host page */
531 ret = mmap_frag(real_start, start, end,
532 prot, flags, fd, offset);
533 if (ret == -1)
534 goto fail;
535 goto the_end1;
537 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
538 prot, flags, fd, offset);
539 if (ret == -1)
540 goto fail;
541 real_start += qemu_host_page_size;
543 /* handle the end of the mapping */
544 if (end < real_end) {
545 ret = mmap_frag(real_end - qemu_host_page_size,
546 real_end - qemu_host_page_size, real_end,
547 prot, flags, fd,
548 offset + real_end - qemu_host_page_size - start);
549 if (ret == -1)
550 goto fail;
551 real_end -= qemu_host_page_size;
554 /* map the middle (easier) */
555 if (real_start < real_end) {
556 void *p;
557 unsigned long offset1;
558 if (flags & MAP_ANONYMOUS)
559 offset1 = 0;
560 else
561 offset1 = offset + real_start - start;
562 p = mmap(g2h(real_start), real_end - real_start,
563 prot, flags, fd, offset1);
564 if (p == MAP_FAILED)
565 goto fail;
568 the_end1:
569 page_set_flags(start, start + len, prot | PAGE_VALID);
570 the_end:
571 #ifdef DEBUG_MMAP
572 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
573 page_dump(stdout);
574 printf("\n");
575 #endif
576 mmap_unlock();
577 return start;
578 fail:
579 mmap_unlock();
580 return -1;
583 static void mmap_reserve(abi_ulong start, abi_ulong size)
585 abi_ulong real_start;
586 abi_ulong real_end;
587 abi_ulong addr;
588 abi_ulong end;
589 int prot;
591 real_start = start & qemu_host_page_mask;
592 real_end = HOST_PAGE_ALIGN(start + size);
593 end = start + size;
594 if (start > real_start) {
595 /* handle host page containing start */
596 prot = 0;
597 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
598 prot |= page_get_flags(addr);
600 if (real_end == real_start + qemu_host_page_size) {
601 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
602 prot |= page_get_flags(addr);
604 end = real_end;
606 if (prot != 0)
607 real_start += qemu_host_page_size;
609 if (end < real_end) {
610 prot = 0;
611 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
612 prot |= page_get_flags(addr);
614 if (prot != 0)
615 real_end -= qemu_host_page_size;
617 if (real_start != real_end) {
618 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
619 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
620 -1, 0);
624 int target_munmap(abi_ulong start, abi_ulong len)
626 abi_ulong end, real_start, real_end, addr;
627 int prot, ret;
629 #ifdef DEBUG_MMAP
630 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
631 TARGET_ABI_FMT_lx "\n",
632 start, len);
633 #endif
634 if (start & ~TARGET_PAGE_MASK)
635 return -EINVAL;
636 len = TARGET_PAGE_ALIGN(len);
637 if (len == 0)
638 return -EINVAL;
639 mmap_lock();
640 end = start + len;
641 real_start = start & qemu_host_page_mask;
642 real_end = HOST_PAGE_ALIGN(end);
644 if (start > real_start) {
645 /* handle host page containing start */
646 prot = 0;
647 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
648 prot |= page_get_flags(addr);
650 if (real_end == real_start + qemu_host_page_size) {
651 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
652 prot |= page_get_flags(addr);
654 end = real_end;
656 if (prot != 0)
657 real_start += qemu_host_page_size;
659 if (end < real_end) {
660 prot = 0;
661 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
662 prot |= page_get_flags(addr);
664 if (prot != 0)
665 real_end -= qemu_host_page_size;
668 ret = 0;
669 /* unmap what we can */
670 if (real_start < real_end) {
671 if (RESERVED_VA) {
672 mmap_reserve(real_start, real_end - real_start);
673 } else {
674 ret = munmap(g2h(real_start), real_end - real_start);
678 if (ret == 0)
679 page_set_flags(start, start + len, 0);
680 mmap_unlock();
681 return ret;
684 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
685 abi_ulong new_size, unsigned long flags,
686 abi_ulong new_addr)
688 int prot;
689 void *host_addr;
691 mmap_lock();
693 if (flags & MREMAP_FIXED) {
694 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
695 old_size, new_size,
696 flags,
697 g2h(new_addr));
699 if (RESERVED_VA && host_addr != MAP_FAILED) {
700 /* If new and old addresses overlap then the above mremap will
701 already have failed with EINVAL. */
702 mmap_reserve(old_addr, old_size);
704 } else if (flags & MREMAP_MAYMOVE) {
705 abi_ulong mmap_start;
707 mmap_start = mmap_find_vma(0, new_size);
709 if (mmap_start == -1) {
710 errno = ENOMEM;
711 host_addr = MAP_FAILED;
712 } else {
713 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
714 old_size, new_size,
715 flags | MREMAP_FIXED,
716 g2h(mmap_start));
717 if ( RESERVED_VA ) {
718 mmap_reserve(old_addr, old_size);
721 } else {
722 int prot = 0;
723 if (RESERVED_VA && old_size < new_size) {
724 abi_ulong addr;
725 for (addr = old_addr + old_size;
726 addr < old_addr + new_size;
727 addr++) {
728 prot |= page_get_flags(addr);
731 if (prot == 0) {
732 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
733 if (host_addr != MAP_FAILED && RESERVED_VA && old_size > new_size) {
734 mmap_reserve(old_addr + old_size, new_size - old_size);
736 } else {
737 errno = ENOMEM;
738 host_addr = MAP_FAILED;
740 /* Check if address fits target address space */
741 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
742 /* Revert mremap() changes */
743 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
744 errno = ENOMEM;
745 host_addr = MAP_FAILED;
749 if (host_addr == MAP_FAILED) {
750 new_addr = -1;
751 } else {
752 new_addr = h2g(host_addr);
753 prot = page_get_flags(old_addr);
754 page_set_flags(old_addr, old_addr + old_size, 0);
755 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
757 mmap_unlock();
758 return new_addr;
761 int target_msync(abi_ulong start, abi_ulong len, int flags)
763 abi_ulong end;
765 if (start & ~TARGET_PAGE_MASK)
766 return -EINVAL;
767 len = TARGET_PAGE_ALIGN(len);
768 end = start + len;
769 if (end < start)
770 return -EINVAL;
771 if (end == start)
772 return 0;
774 start &= qemu_host_page_mask;
775 return msync(g2h(start), end - start, flags);