MAINTAINERS: Add David Gibson as ppc maintainer
[qemu/ar7.git] / linux-user / mmap.c
blob3519147bce91219e84181b61725cce1126a0492f
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include <sys/mman.h>
21 #include <linux/mman.h>
22 #include <linux/unistd.h>
24 #include "qemu.h"
25 #include "qemu-common.h"
26 #include "translate-all.h"
28 //#define DEBUG_MMAP
30 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
31 static __thread int mmap_lock_count;
33 void mmap_lock(void)
35 if (mmap_lock_count++ == 0) {
36 pthread_mutex_lock(&mmap_mutex);
40 void mmap_unlock(void)
42 if (--mmap_lock_count == 0) {
43 pthread_mutex_unlock(&mmap_mutex);
47 /* Grab lock to make sure things are in a consistent state after fork(). */
48 void mmap_fork_start(void)
50 if (mmap_lock_count)
51 abort();
52 pthread_mutex_lock(&mmap_mutex);
55 void mmap_fork_end(int child)
57 if (child)
58 pthread_mutex_init(&mmap_mutex, NULL);
59 else
60 pthread_mutex_unlock(&mmap_mutex);
63 /* NOTE: all the constants are the HOST ones, but addresses are target. */
64 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
66 abi_ulong end, host_start, host_end, addr;
67 int prot1, ret;
69 #ifdef DEBUG_MMAP
70 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
71 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
72 prot & PROT_READ ? 'r' : '-',
73 prot & PROT_WRITE ? 'w' : '-',
74 prot & PROT_EXEC ? 'x' : '-');
75 #endif
77 if ((start & ~TARGET_PAGE_MASK) != 0)
78 return -EINVAL;
79 len = TARGET_PAGE_ALIGN(len);
80 end = start + len;
81 if (end < start)
82 return -EINVAL;
83 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
84 if (len == 0)
85 return 0;
87 mmap_lock();
88 host_start = start & qemu_host_page_mask;
89 host_end = HOST_PAGE_ALIGN(end);
90 if (start > host_start) {
91 /* handle host page containing start */
92 prot1 = prot;
93 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
94 prot1 |= page_get_flags(addr);
96 if (host_end == host_start + qemu_host_page_size) {
97 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
98 prot1 |= page_get_flags(addr);
100 end = host_end;
102 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
103 if (ret != 0)
104 goto error;
105 host_start += qemu_host_page_size;
107 if (end < host_end) {
108 prot1 = prot;
109 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
110 prot1 |= page_get_flags(addr);
112 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
113 prot1 & PAGE_BITS);
114 if (ret != 0)
115 goto error;
116 host_end -= qemu_host_page_size;
119 /* handle the pages in the middle */
120 if (host_start < host_end) {
121 ret = mprotect(g2h(host_start), host_end - host_start, prot);
122 if (ret != 0)
123 goto error;
125 page_set_flags(start, start + len, prot | PAGE_VALID);
126 mmap_unlock();
127 return 0;
128 error:
129 mmap_unlock();
130 return ret;
133 /* map an incomplete host page */
134 static int mmap_frag(abi_ulong real_start,
135 abi_ulong start, abi_ulong end,
136 int prot, int flags, int fd, abi_ulong offset)
138 abi_ulong real_end, addr;
139 void *host_start;
140 int prot1, prot_new;
142 real_end = real_start + qemu_host_page_size;
143 host_start = g2h(real_start);
145 /* get the protection of the target pages outside the mapping */
146 prot1 = 0;
147 for(addr = real_start; addr < real_end; addr++) {
148 if (addr < start || addr >= end)
149 prot1 |= page_get_flags(addr);
152 if (prot1 == 0) {
153 /* no page was there, so we allocate one */
154 void *p = mmap(host_start, qemu_host_page_size, prot,
155 flags | MAP_ANONYMOUS, -1, 0);
156 if (p == MAP_FAILED)
157 return -1;
158 prot1 = prot;
160 prot1 &= PAGE_BITS;
162 prot_new = prot | prot1;
163 if (!(flags & MAP_ANONYMOUS)) {
164 /* msync() won't work here, so we return an error if write is
165 possible while it is a shared mapping */
166 if ((flags & MAP_TYPE) == MAP_SHARED &&
167 (prot & PROT_WRITE))
168 return -1;
170 /* adjust protection to be able to read */
171 if (!(prot1 & PROT_WRITE))
172 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
174 /* read the corresponding file data */
175 if (pread(fd, g2h(start), end - start, offset) == -1)
176 return -1;
178 /* put final protection */
179 if (prot_new != (prot1 | PROT_WRITE))
180 mprotect(host_start, qemu_host_page_size, prot_new);
181 } else {
182 if (prot_new != prot1) {
183 mprotect(host_start, qemu_host_page_size, prot_new);
185 if (prot_new & PROT_WRITE) {
186 memset(g2h(start), 0, end - start);
189 return 0;
192 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
193 # define TASK_UNMAPPED_BASE (1ul << 38)
194 #elif defined(__CYGWIN__)
195 /* Cygwin doesn't have a whole lot of address space. */
196 # define TASK_UNMAPPED_BASE 0x18000000
197 #else
198 # define TASK_UNMAPPED_BASE 0x40000000
199 #endif
200 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
202 unsigned long last_brk;
204 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
205 of guest address space. */
206 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
208 abi_ulong addr;
209 abi_ulong end_addr;
210 int prot;
211 int looped = 0;
213 if (size > reserved_va) {
214 return (abi_ulong)-1;
217 size = HOST_PAGE_ALIGN(size);
218 end_addr = start + size;
219 if (end_addr > reserved_va) {
220 end_addr = reserved_va;
222 addr = end_addr - qemu_host_page_size;
224 while (1) {
225 if (addr > end_addr) {
226 if (looped) {
227 return (abi_ulong)-1;
229 end_addr = reserved_va;
230 addr = end_addr - qemu_host_page_size;
231 looped = 1;
232 continue;
234 prot = page_get_flags(addr);
235 if (prot) {
236 end_addr = addr;
238 if (addr + size == end_addr) {
239 break;
241 addr -= qemu_host_page_size;
244 if (start == mmap_next_start) {
245 mmap_next_start = addr;
248 return addr;
252 * Find and reserve a free memory area of size 'size'. The search
253 * starts at 'start'.
254 * It must be called with mmap_lock() held.
255 * Return -1 if error.
257 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
259 void *ptr, *prev;
260 abi_ulong addr;
261 int wrapped, repeat;
263 /* If 'start' == 0, then a default start address is used. */
264 if (start == 0) {
265 start = mmap_next_start;
266 } else {
267 start &= qemu_host_page_mask;
270 size = HOST_PAGE_ALIGN(size);
272 if (reserved_va) {
273 return mmap_find_vma_reserved(start, size);
276 addr = start;
277 wrapped = repeat = 0;
278 prev = 0;
280 for (;; prev = ptr) {
282 * Reserve needed memory area to avoid a race.
283 * It should be discarded using:
284 * - mmap() with MAP_FIXED flag
285 * - mremap() with MREMAP_FIXED flag
286 * - shmat() with SHM_REMAP flag
288 ptr = mmap(g2h(addr), size, PROT_NONE,
289 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
291 /* ENOMEM, if host address space has no memory */
292 if (ptr == MAP_FAILED) {
293 return (abi_ulong)-1;
296 /* Count the number of sequential returns of the same address.
297 This is used to modify the search algorithm below. */
298 repeat = (ptr == prev ? repeat + 1 : 0);
300 if (h2g_valid(ptr + size - 1)) {
301 addr = h2g(ptr);
303 if ((addr & ~TARGET_PAGE_MASK) == 0) {
304 /* Success. */
305 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
306 mmap_next_start = addr + size;
308 return addr;
311 /* The address is not properly aligned for the target. */
312 switch (repeat) {
313 case 0:
314 /* Assume the result that the kernel gave us is the
315 first with enough free space, so start again at the
316 next higher target page. */
317 addr = TARGET_PAGE_ALIGN(addr);
318 break;
319 case 1:
320 /* Sometimes the kernel decides to perform the allocation
321 at the top end of memory instead. */
322 addr &= TARGET_PAGE_MASK;
323 break;
324 case 2:
325 /* Start over at low memory. */
326 addr = 0;
327 break;
328 default:
329 /* Fail. This unaligned block must the last. */
330 addr = -1;
331 break;
333 } else {
334 /* Since the result the kernel gave didn't fit, start
335 again at low memory. If any repetition, fail. */
336 addr = (repeat ? -1 : 0);
339 /* Unmap and try again. */
340 munmap(ptr, size);
342 /* ENOMEM if we checked the whole of the target address space. */
343 if (addr == (abi_ulong)-1) {
344 return (abi_ulong)-1;
345 } else if (addr == 0) {
346 if (wrapped) {
347 return (abi_ulong)-1;
349 wrapped = 1;
350 /* Don't actually use 0 when wrapping, instead indicate
351 that we'd truly like an allocation in low memory. */
352 addr = (mmap_min_addr > TARGET_PAGE_SIZE
353 ? TARGET_PAGE_ALIGN(mmap_min_addr)
354 : TARGET_PAGE_SIZE);
355 } else if (wrapped && addr >= start) {
356 return (abi_ulong)-1;
361 /* NOTE: all the constants are the HOST ones */
362 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
363 int flags, int fd, abi_ulong offset)
365 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
367 mmap_lock();
368 #ifdef DEBUG_MMAP
370 printf("mmap: start=0x" TARGET_ABI_FMT_lx
371 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
372 start, len,
373 prot & PROT_READ ? 'r' : '-',
374 prot & PROT_WRITE ? 'w' : '-',
375 prot & PROT_EXEC ? 'x' : '-');
376 if (flags & MAP_FIXED)
377 printf("MAP_FIXED ");
378 if (flags & MAP_ANONYMOUS)
379 printf("MAP_ANON ");
380 switch(flags & MAP_TYPE) {
381 case MAP_PRIVATE:
382 printf("MAP_PRIVATE ");
383 break;
384 case MAP_SHARED:
385 printf("MAP_SHARED ");
386 break;
387 default:
388 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
389 break;
391 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
393 #endif
395 if (offset & ~TARGET_PAGE_MASK) {
396 errno = EINVAL;
397 goto fail;
400 len = TARGET_PAGE_ALIGN(len);
401 if (len == 0)
402 goto the_end;
403 real_start = start & qemu_host_page_mask;
404 host_offset = offset & qemu_host_page_mask;
406 /* If the user is asking for the kernel to find a location, do that
407 before we truncate the length for mapping files below. */
408 if (!(flags & MAP_FIXED)) {
409 host_len = len + offset - host_offset;
410 host_len = HOST_PAGE_ALIGN(host_len);
411 start = mmap_find_vma(real_start, host_len);
412 if (start == (abi_ulong)-1) {
413 errno = ENOMEM;
414 goto fail;
418 /* When mapping files into a memory area larger than the file, accesses
419 to pages beyond the file size will cause a SIGBUS.
421 For example, if mmaping a file of 100 bytes on a host with 4K pages
422 emulating a target with 8K pages, the target expects to be able to
423 access the first 8K. But the host will trap us on any access beyond
424 4K.
426 When emulating a target with a larger page-size than the hosts, we
427 may need to truncate file maps at EOF and add extra anonymous pages
428 up to the targets page boundary. */
430 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
431 && !(flags & MAP_ANONYMOUS)) {
432 struct stat sb;
434 if (fstat (fd, &sb) == -1)
435 goto fail;
437 /* Are we trying to create a map beyond EOF?. */
438 if (offset + len > sb.st_size) {
439 /* If so, truncate the file map at eof aligned with
440 the hosts real pagesize. Additional anonymous maps
441 will be created beyond EOF. */
442 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
446 if (!(flags & MAP_FIXED)) {
447 unsigned long host_start;
448 void *p;
450 host_len = len + offset - host_offset;
451 host_len = HOST_PAGE_ALIGN(host_len);
453 /* Note: we prefer to control the mapping address. It is
454 especially important if qemu_host_page_size >
455 qemu_real_host_page_size */
456 p = mmap(g2h(start), host_len, prot,
457 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
458 if (p == MAP_FAILED)
459 goto fail;
460 /* update start so that it points to the file position at 'offset' */
461 host_start = (unsigned long)p;
462 if (!(flags & MAP_ANONYMOUS)) {
463 p = mmap(g2h(start), len, prot,
464 flags | MAP_FIXED, fd, host_offset);
465 if (p == MAP_FAILED) {
466 munmap(g2h(start), host_len);
467 goto fail;
469 host_start += offset - host_offset;
471 start = h2g(host_start);
472 } else {
473 if (start & ~TARGET_PAGE_MASK) {
474 errno = EINVAL;
475 goto fail;
477 end = start + len;
478 real_end = HOST_PAGE_ALIGN(end);
481 * Test if requested memory area fits target address space
482 * It can fail only on 64-bit host with 32-bit target.
483 * On any other target/host host mmap() handles this error correctly.
485 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
486 errno = EINVAL;
487 goto fail;
490 /* worst case: we cannot map the file because the offset is not
491 aligned, so we read it */
492 if (!(flags & MAP_ANONYMOUS) &&
493 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
494 /* msync() won't work here, so we return an error if write is
495 possible while it is a shared mapping */
496 if ((flags & MAP_TYPE) == MAP_SHARED &&
497 (prot & PROT_WRITE)) {
498 errno = EINVAL;
499 goto fail;
501 retaddr = target_mmap(start, len, prot | PROT_WRITE,
502 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
503 -1, 0);
504 if (retaddr == -1)
505 goto fail;
506 if (pread(fd, g2h(start), len, offset) == -1)
507 goto fail;
508 if (!(prot & PROT_WRITE)) {
509 ret = target_mprotect(start, len, prot);
510 assert(ret == 0);
512 goto the_end;
515 /* handle the start of the mapping */
516 if (start > real_start) {
517 if (real_end == real_start + qemu_host_page_size) {
518 /* one single host page */
519 ret = mmap_frag(real_start, start, end,
520 prot, flags, fd, offset);
521 if (ret == -1)
522 goto fail;
523 goto the_end1;
525 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
526 prot, flags, fd, offset);
527 if (ret == -1)
528 goto fail;
529 real_start += qemu_host_page_size;
531 /* handle the end of the mapping */
532 if (end < real_end) {
533 ret = mmap_frag(real_end - qemu_host_page_size,
534 real_end - qemu_host_page_size, end,
535 prot, flags, fd,
536 offset + real_end - qemu_host_page_size - start);
537 if (ret == -1)
538 goto fail;
539 real_end -= qemu_host_page_size;
542 /* map the middle (easier) */
543 if (real_start < real_end) {
544 void *p;
545 unsigned long offset1;
546 if (flags & MAP_ANONYMOUS)
547 offset1 = 0;
548 else
549 offset1 = offset + real_start - start;
550 p = mmap(g2h(real_start), real_end - real_start,
551 prot, flags, fd, offset1);
552 if (p == MAP_FAILED)
553 goto fail;
556 the_end1:
557 page_set_flags(start, start + len, prot | PAGE_VALID);
558 the_end:
559 #ifdef DEBUG_MMAP
560 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
561 page_dump(stdout);
562 printf("\n");
563 #endif
564 tb_invalidate_phys_range(start, start + len);
565 mmap_unlock();
566 return start;
567 fail:
568 mmap_unlock();
569 return -1;
572 static void mmap_reserve(abi_ulong start, abi_ulong size)
574 abi_ulong real_start;
575 abi_ulong real_end;
576 abi_ulong addr;
577 abi_ulong end;
578 int prot;
580 real_start = start & qemu_host_page_mask;
581 real_end = HOST_PAGE_ALIGN(start + size);
582 end = start + size;
583 if (start > real_start) {
584 /* handle host page containing start */
585 prot = 0;
586 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
587 prot |= page_get_flags(addr);
589 if (real_end == real_start + qemu_host_page_size) {
590 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
591 prot |= page_get_flags(addr);
593 end = real_end;
595 if (prot != 0)
596 real_start += qemu_host_page_size;
598 if (end < real_end) {
599 prot = 0;
600 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
601 prot |= page_get_flags(addr);
603 if (prot != 0)
604 real_end -= qemu_host_page_size;
606 if (real_start != real_end) {
607 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
608 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
609 -1, 0);
613 int target_munmap(abi_ulong start, abi_ulong len)
615 abi_ulong end, real_start, real_end, addr;
616 int prot, ret;
618 #ifdef DEBUG_MMAP
619 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
620 TARGET_ABI_FMT_lx "\n",
621 start, len);
622 #endif
623 if (start & ~TARGET_PAGE_MASK)
624 return -EINVAL;
625 len = TARGET_PAGE_ALIGN(len);
626 if (len == 0)
627 return -EINVAL;
628 mmap_lock();
629 end = start + len;
630 real_start = start & qemu_host_page_mask;
631 real_end = HOST_PAGE_ALIGN(end);
633 if (start > real_start) {
634 /* handle host page containing start */
635 prot = 0;
636 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
637 prot |= page_get_flags(addr);
639 if (real_end == real_start + qemu_host_page_size) {
640 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
641 prot |= page_get_flags(addr);
643 end = real_end;
645 if (prot != 0)
646 real_start += qemu_host_page_size;
648 if (end < real_end) {
649 prot = 0;
650 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
651 prot |= page_get_flags(addr);
653 if (prot != 0)
654 real_end -= qemu_host_page_size;
657 ret = 0;
658 /* unmap what we can */
659 if (real_start < real_end) {
660 if (reserved_va) {
661 mmap_reserve(real_start, real_end - real_start);
662 } else {
663 ret = munmap(g2h(real_start), real_end - real_start);
667 if (ret == 0) {
668 page_set_flags(start, start + len, 0);
669 tb_invalidate_phys_range(start, start + len);
671 mmap_unlock();
672 return ret;
675 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
676 abi_ulong new_size, unsigned long flags,
677 abi_ulong new_addr)
679 int prot;
680 void *host_addr;
682 mmap_lock();
684 if (flags & MREMAP_FIXED) {
685 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
686 old_size, new_size,
687 flags,
688 g2h(new_addr));
690 if (reserved_va && host_addr != MAP_FAILED) {
691 /* If new and old addresses overlap then the above mremap will
692 already have failed with EINVAL. */
693 mmap_reserve(old_addr, old_size);
695 } else if (flags & MREMAP_MAYMOVE) {
696 abi_ulong mmap_start;
698 mmap_start = mmap_find_vma(0, new_size);
700 if (mmap_start == -1) {
701 errno = ENOMEM;
702 host_addr = MAP_FAILED;
703 } else {
704 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
705 old_size, new_size,
706 flags | MREMAP_FIXED,
707 g2h(mmap_start));
708 if (reserved_va) {
709 mmap_reserve(old_addr, old_size);
712 } else {
713 int prot = 0;
714 if (reserved_va && old_size < new_size) {
715 abi_ulong addr;
716 for (addr = old_addr + old_size;
717 addr < old_addr + new_size;
718 addr++) {
719 prot |= page_get_flags(addr);
722 if (prot == 0) {
723 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
724 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
725 mmap_reserve(old_addr + old_size, new_size - old_size);
727 } else {
728 errno = ENOMEM;
729 host_addr = MAP_FAILED;
731 /* Check if address fits target address space */
732 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
733 /* Revert mremap() changes */
734 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
735 errno = ENOMEM;
736 host_addr = MAP_FAILED;
740 if (host_addr == MAP_FAILED) {
741 new_addr = -1;
742 } else {
743 new_addr = h2g(host_addr);
744 prot = page_get_flags(old_addr);
745 page_set_flags(old_addr, old_addr + old_size, 0);
746 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
748 tb_invalidate_phys_range(new_addr, new_addr + new_size);
749 mmap_unlock();
750 return new_addr;
753 int target_msync(abi_ulong start, abi_ulong len, int flags)
755 abi_ulong end;
757 if (start & ~TARGET_PAGE_MASK)
758 return -EINVAL;
759 len = TARGET_PAGE_ALIGN(len);
760 end = start + len;
761 if (end < start)
762 return -EINVAL;
763 if (end == start)
764 return 0;
766 start &= qemu_host_page_mask;
767 return msync(g2h(start), end - start, flags);