Merge remote-tracking branch 'qemu/master'
[qemu/ar7.git] / linux-user / mmap.c
blobe9c4171d48e5fa6f78ffb3f237acf8407463f8ed
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/mman.h>
23 #include <linux/mman.h>
24 #include <linux/unistd.h>
26 #include "qemu-common.h"
27 #include "qemu.h"
28 #include "translate-all.h"
30 //#define DEBUG_MMAP
32 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
33 static __thread int mmap_lock_count;
35 void mmap_lock(void)
37 if (mmap_lock_count++ == 0) {
38 pthread_mutex_lock(&mmap_mutex);
42 void mmap_unlock(void)
44 if (--mmap_lock_count == 0) {
45 pthread_mutex_unlock(&mmap_mutex);
49 /* Grab lock to make sure things are in a consistent state after fork(). */
50 void mmap_fork_start(void)
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
57 void mmap_fork_end(int child)
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
65 /* NOTE: all the constants are the HOST ones, but addresses are target. */
66 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
68 abi_ulong end, host_start, host_end, addr;
69 int prot1, ret;
71 #ifdef DEBUG_MMAP
72 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
73 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
74 prot & PROT_READ ? 'r' : '-',
75 prot & PROT_WRITE ? 'w' : '-',
76 prot & PROT_EXEC ? 'x' : '-');
77 #endif
79 if ((start & ~TARGET_PAGE_MASK) != 0)
80 return -EINVAL;
81 len = TARGET_PAGE_ALIGN(len);
82 end = start + len;
83 if (end < start)
84 return -EINVAL;
85 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
86 if (len == 0)
87 return 0;
89 mmap_lock();
90 host_start = start & qemu_host_page_mask;
91 host_end = HOST_PAGE_ALIGN(end);
92 if (start > host_start) {
93 /* handle host page containing start */
94 prot1 = prot;
95 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
96 prot1 |= page_get_flags(addr);
98 if (host_end == host_start + qemu_host_page_size) {
99 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
100 prot1 |= page_get_flags(addr);
102 end = host_end;
104 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
105 if (ret != 0)
106 goto error;
107 host_start += qemu_host_page_size;
109 if (end < host_end) {
110 prot1 = prot;
111 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
112 prot1 |= page_get_flags(addr);
114 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
115 prot1 & PAGE_BITS);
116 if (ret != 0)
117 goto error;
118 host_end -= qemu_host_page_size;
121 /* handle the pages in the middle */
122 if (host_start < host_end) {
123 ret = mprotect(g2h(host_start), host_end - host_start, prot);
124 if (ret != 0)
125 goto error;
127 page_set_flags(start, start + len, prot | PAGE_VALID);
128 mmap_unlock();
129 return 0;
130 error:
131 mmap_unlock();
132 return ret;
135 /* map an incomplete host page */
136 static int mmap_frag(abi_ulong real_start,
137 abi_ulong start, abi_ulong end,
138 int prot, int flags, int fd, abi_ulong offset)
140 abi_ulong real_end, addr;
141 void *host_start;
142 int prot1, prot_new;
144 real_end = real_start + qemu_host_page_size;
145 host_start = g2h(real_start);
147 /* get the protection of the target pages outside the mapping */
148 prot1 = 0;
149 for(addr = real_start; addr < real_end; addr++) {
150 if (addr < start || addr >= end)
151 prot1 |= page_get_flags(addr);
154 if (prot1 == 0) {
155 /* no page was there, so we allocate one */
156 void *p = mmap(host_start, qemu_host_page_size, prot,
157 flags | MAP_ANONYMOUS, -1, 0);
158 if (p == MAP_FAILED)
159 return -1;
160 prot1 = prot;
162 prot1 &= PAGE_BITS;
164 prot_new = prot | prot1;
165 if (!(flags & MAP_ANONYMOUS)) {
166 /* msync() won't work here, so we return an error if write is
167 possible while it is a shared mapping */
168 if ((flags & MAP_TYPE) == MAP_SHARED &&
169 (prot & PROT_WRITE))
170 return -1;
172 /* adjust protection to be able to read */
173 if (!(prot1 & PROT_WRITE))
174 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
176 /* read the corresponding file data */
177 if (pread(fd, g2h(start), end - start, offset) == -1)
178 return -1;
180 /* put final protection */
181 if (prot_new != (prot1 | PROT_WRITE))
182 mprotect(host_start, qemu_host_page_size, prot_new);
183 } else {
184 /* just update the protection */
185 if (prot_new != prot1) {
186 mprotect(host_start, qemu_host_page_size, prot_new);
189 return 0;
192 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
193 # define TASK_UNMAPPED_BASE (1ul << 38)
194 #elif defined(__CYGWIN__)
195 /* Cygwin doesn't have a whole lot of address space. */
196 # define TASK_UNMAPPED_BASE 0x18000000
197 #else
198 # define TASK_UNMAPPED_BASE 0x40000000
199 #endif
200 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
202 unsigned long last_brk;
204 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
205 of guest address space. */
206 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
208 abi_ulong addr;
209 abi_ulong end_addr;
210 int prot;
211 int looped = 0;
213 if (size > reserved_va) {
214 return (abi_ulong)-1;
217 size = HOST_PAGE_ALIGN(size);
218 end_addr = start + size;
219 if (end_addr > reserved_va) {
220 end_addr = reserved_va;
222 addr = end_addr - qemu_host_page_size;
224 while (1) {
225 if (addr > end_addr) {
226 if (looped) {
227 return (abi_ulong)-1;
229 end_addr = reserved_va;
230 addr = end_addr - qemu_host_page_size;
231 looped = 1;
232 continue;
234 prot = page_get_flags(addr);
235 if (prot) {
236 end_addr = addr;
238 if (addr + size == end_addr) {
239 break;
241 addr -= qemu_host_page_size;
244 if (start == mmap_next_start) {
245 mmap_next_start = addr;
248 return addr;
252 * Find and reserve a free memory area of size 'size'. The search
253 * starts at 'start'.
254 * It must be called with mmap_lock() held.
255 * Return -1 if error.
257 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
259 void *ptr, *prev;
260 abi_ulong addr;
261 int wrapped, repeat;
263 /* If 'start' == 0, then a default start address is used. */
264 if (start == 0) {
265 start = mmap_next_start;
266 } else {
267 start &= qemu_host_page_mask;
270 size = HOST_PAGE_ALIGN(size);
272 if (reserved_va) {
273 return mmap_find_vma_reserved(start, size);
276 addr = start;
277 wrapped = repeat = 0;
278 prev = 0;
280 for (;; prev = ptr) {
282 * Reserve needed memory area to avoid a race.
283 * It should be discarded using:
284 * - mmap() with MAP_FIXED flag
285 * - mremap() with MREMAP_FIXED flag
286 * - shmat() with SHM_REMAP flag
288 ptr = mmap(g2h(addr), size, PROT_NONE,
289 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
291 /* ENOMEM, if host address space has no memory */
292 if (ptr == MAP_FAILED) {
293 return (abi_ulong)-1;
296 /* Count the number of sequential returns of the same address.
297 This is used to modify the search algorithm below. */
298 repeat = (ptr == prev ? repeat + 1 : 0);
300 if (h2g_valid(ptr + size - 1)) {
301 addr = h2g(ptr);
303 if ((addr & ~TARGET_PAGE_MASK) == 0) {
304 /* Success. */
305 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
306 mmap_next_start = addr + size;
308 return addr;
311 /* The address is not properly aligned for the target. */
312 switch (repeat) {
313 case 0:
314 /* Assume the result that the kernel gave us is the
315 first with enough free space, so start again at the
316 next higher target page. */
317 addr = TARGET_PAGE_ALIGN(addr);
318 break;
319 case 1:
320 /* Sometimes the kernel decides to perform the allocation
321 at the top end of memory instead. */
322 addr &= TARGET_PAGE_MASK;
323 break;
324 case 2:
325 /* Start over at low memory. */
326 addr = 0;
327 break;
328 default:
329 /* Fail. This unaligned block must the last. */
330 addr = -1;
331 break;
333 } else {
334 /* Since the result the kernel gave didn't fit, start
335 again at low memory. If any repetition, fail. */
336 addr = (repeat ? -1 : 0);
339 /* Unmap and try again. */
340 munmap(ptr, size);
342 /* ENOMEM if we checked the whole of the target address space. */
343 if (addr == (abi_ulong)-1) {
344 return (abi_ulong)-1;
345 } else if (addr == 0) {
346 if (wrapped) {
347 return (abi_ulong)-1;
349 wrapped = 1;
350 /* Don't actually use 0 when wrapping, instead indicate
351 that we'd truly like an allocation in low memory. */
352 addr = (mmap_min_addr > TARGET_PAGE_SIZE
353 ? TARGET_PAGE_ALIGN(mmap_min_addr)
354 : TARGET_PAGE_SIZE);
355 } else if (wrapped && addr >= start) {
356 return (abi_ulong)-1;
361 /* NOTE: all the constants are the HOST ones */
362 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
363 int flags, int fd, abi_ulong offset)
365 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
367 mmap_lock();
368 #ifdef DEBUG_MMAP
370 printf("mmap: start=0x" TARGET_ABI_FMT_lx
371 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
372 start, len,
373 prot & PROT_READ ? 'r' : '-',
374 prot & PROT_WRITE ? 'w' : '-',
375 prot & PROT_EXEC ? 'x' : '-');
376 if (flags & MAP_FIXED)
377 printf("MAP_FIXED ");
378 if (flags & MAP_ANONYMOUS)
379 printf("MAP_ANON ");
380 switch(flags & MAP_TYPE) {
381 case MAP_PRIVATE:
382 printf("MAP_PRIVATE ");
383 break;
384 case MAP_SHARED:
385 printf("MAP_SHARED ");
386 break;
387 default:
388 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
389 break;
391 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
393 #endif
395 if (offset & ~TARGET_PAGE_MASK) {
396 errno = EINVAL;
397 goto fail;
400 len = TARGET_PAGE_ALIGN(len);
401 if (len == 0)
402 goto the_end;
403 real_start = start & qemu_host_page_mask;
404 host_offset = offset & qemu_host_page_mask;
406 /* If the user is asking for the kernel to find a location, do that
407 before we truncate the length for mapping files below. */
408 if (!(flags & MAP_FIXED)) {
409 host_len = len + offset - host_offset;
410 host_len = HOST_PAGE_ALIGN(host_len);
411 start = mmap_find_vma(real_start, host_len);
412 if (start == (abi_ulong)-1) {
413 errno = ENOMEM;
414 goto fail;
418 /* When mapping files into a memory area larger than the file, accesses
419 to pages beyond the file size will cause a SIGBUS.
421 For example, if mmaping a file of 100 bytes on a host with 4K pages
422 emulating a target with 8K pages, the target expects to be able to
423 access the first 8K. But the host will trap us on any access beyond
424 4K.
426 When emulating a target with a larger page-size than the hosts, we
427 may need to truncate file maps at EOF and add extra anonymous pages
428 up to the targets page boundary. */
430 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
431 && !(flags & MAP_ANONYMOUS)) {
432 struct stat sb;
434 if (fstat (fd, &sb) == -1)
435 goto fail;
437 /* Are we trying to create a map beyond EOF?. */
438 if (offset + len > sb.st_size) {
439 /* If so, truncate the file map at eof aligned with
440 the hosts real pagesize. Additional anonymous maps
441 will be created beyond EOF. */
442 len = (sb.st_size - offset);
443 len += qemu_real_host_page_size - 1;
444 len &= ~(qemu_real_host_page_size - 1);
448 if (!(flags & MAP_FIXED)) {
449 unsigned long host_start;
450 void *p;
452 host_len = len + offset - host_offset;
453 host_len = HOST_PAGE_ALIGN(host_len);
455 /* Note: we prefer to control the mapping address. It is
456 especially important if qemu_host_page_size >
457 qemu_real_host_page_size */
458 p = mmap(g2h(start), host_len, prot,
459 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
460 if (p == MAP_FAILED)
461 goto fail;
462 /* update start so that it points to the file position at 'offset' */
463 host_start = (unsigned long)p;
464 if (!(flags & MAP_ANONYMOUS)) {
465 p = mmap(g2h(start), len, prot,
466 flags | MAP_FIXED, fd, host_offset);
467 if (p == MAP_FAILED) {
468 munmap(g2h(start), host_len);
469 goto fail;
471 host_start += offset - host_offset;
473 start = h2g(host_start);
474 } else {
475 if (start & ~TARGET_PAGE_MASK) {
476 errno = EINVAL;
477 goto fail;
479 end = start + len;
480 real_end = HOST_PAGE_ALIGN(end);
483 * Test if requested memory area fits target address space
484 * It can fail only on 64-bit host with 32-bit target.
485 * On any other target/host host mmap() handles this error correctly.
487 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
488 errno = EINVAL;
489 goto fail;
492 /* worst case: we cannot map the file because the offset is not
493 aligned, so we read it */
494 if (!(flags & MAP_ANONYMOUS) &&
495 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
496 /* msync() won't work here, so we return an error if write is
497 possible while it is a shared mapping */
498 if ((flags & MAP_TYPE) == MAP_SHARED &&
499 (prot & PROT_WRITE)) {
500 errno = EINVAL;
501 goto fail;
503 retaddr = target_mmap(start, len, prot | PROT_WRITE,
504 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
505 -1, 0);
506 if (retaddr == -1)
507 goto fail;
508 if (pread(fd, g2h(start), len, offset) == -1)
509 goto fail;
510 if (!(prot & PROT_WRITE)) {
511 ret = target_mprotect(start, len, prot);
512 if (ret != 0) {
513 start = ret;
514 goto the_end;
517 goto the_end;
520 /* handle the start of the mapping */
521 if (start > real_start) {
522 if (real_end == real_start + qemu_host_page_size) {
523 /* one single host page */
524 ret = mmap_frag(real_start, start, end,
525 prot, flags, fd, offset);
526 if (ret == -1)
527 goto fail;
528 goto the_end1;
530 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
531 prot, flags, fd, offset);
532 if (ret == -1)
533 goto fail;
534 real_start += qemu_host_page_size;
536 /* handle the end of the mapping */
537 if (end < real_end) {
538 ret = mmap_frag(real_end - qemu_host_page_size,
539 real_end - qemu_host_page_size, real_end,
540 prot, flags, fd,
541 offset + real_end - qemu_host_page_size - start);
542 if (ret == -1)
543 goto fail;
544 real_end -= qemu_host_page_size;
547 /* map the middle (easier) */
548 if (real_start < real_end) {
549 void *p;
550 unsigned long offset1;
551 if (flags & MAP_ANONYMOUS)
552 offset1 = 0;
553 else
554 offset1 = offset + real_start - start;
555 p = mmap(g2h(real_start), real_end - real_start,
556 prot, flags, fd, offset1);
557 if (p == MAP_FAILED)
558 goto fail;
561 the_end1:
562 page_set_flags(start, start + len, prot | PAGE_VALID);
563 the_end:
564 #ifdef DEBUG_MMAP
565 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
566 page_dump(stdout);
567 printf("\n");
568 #endif
569 tb_invalidate_phys_range(start, start + len);
570 mmap_unlock();
571 return start;
572 fail:
573 mmap_unlock();
574 return -1;
577 static void mmap_reserve(abi_ulong start, abi_ulong size)
579 abi_ulong real_start;
580 abi_ulong real_end;
581 abi_ulong addr;
582 abi_ulong end;
583 int prot;
585 real_start = start & qemu_host_page_mask;
586 real_end = HOST_PAGE_ALIGN(start + size);
587 end = start + size;
588 if (start > real_start) {
589 /* handle host page containing start */
590 prot = 0;
591 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
592 prot |= page_get_flags(addr);
594 if (real_end == real_start + qemu_host_page_size) {
595 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
596 prot |= page_get_flags(addr);
598 end = real_end;
600 if (prot != 0)
601 real_start += qemu_host_page_size;
603 if (end < real_end) {
604 prot = 0;
605 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
606 prot |= page_get_flags(addr);
608 if (prot != 0)
609 real_end -= qemu_host_page_size;
611 if (real_start != real_end) {
612 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
613 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
614 -1, 0);
618 int target_munmap(abi_ulong start, abi_ulong len)
620 abi_ulong end, real_start, real_end, addr;
621 int prot, ret;
623 #ifdef DEBUG_MMAP
624 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
625 TARGET_ABI_FMT_lx "\n",
626 start, len);
627 #endif
628 if (start & ~TARGET_PAGE_MASK)
629 return -EINVAL;
630 len = TARGET_PAGE_ALIGN(len);
631 if (len == 0)
632 return -EINVAL;
633 mmap_lock();
634 end = start + len;
635 real_start = start & qemu_host_page_mask;
636 real_end = HOST_PAGE_ALIGN(end);
638 if (start > real_start) {
639 /* handle host page containing start */
640 prot = 0;
641 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
642 prot |= page_get_flags(addr);
644 if (real_end == real_start + qemu_host_page_size) {
645 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
646 prot |= page_get_flags(addr);
648 end = real_end;
650 if (prot != 0)
651 real_start += qemu_host_page_size;
653 if (end < real_end) {
654 prot = 0;
655 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
656 prot |= page_get_flags(addr);
658 if (prot != 0)
659 real_end -= qemu_host_page_size;
662 ret = 0;
663 /* unmap what we can */
664 if (real_start < real_end) {
665 if (reserved_va) {
666 mmap_reserve(real_start, real_end - real_start);
667 } else {
668 ret = munmap(g2h(real_start), real_end - real_start);
672 if (ret == 0) {
673 page_set_flags(start, start + len, 0);
674 tb_invalidate_phys_range(start, start + len);
676 mmap_unlock();
677 return ret;
680 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
681 abi_ulong new_size, unsigned long flags,
682 abi_ulong new_addr)
684 int prot;
685 void *host_addr;
687 mmap_lock();
689 if (flags & MREMAP_FIXED) {
690 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
691 old_size, new_size,
692 flags,
693 g2h(new_addr));
695 if (reserved_va && host_addr != MAP_FAILED) {
696 /* If new and old addresses overlap then the above mremap will
697 already have failed with EINVAL. */
698 mmap_reserve(old_addr, old_size);
700 } else if (flags & MREMAP_MAYMOVE) {
701 abi_ulong mmap_start;
703 mmap_start = mmap_find_vma(0, new_size);
705 if (mmap_start == -1) {
706 errno = ENOMEM;
707 host_addr = MAP_FAILED;
708 } else {
709 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
710 old_size, new_size,
711 flags | MREMAP_FIXED,
712 g2h(mmap_start));
713 if (reserved_va) {
714 mmap_reserve(old_addr, old_size);
717 } else {
718 int prot = 0;
719 if (reserved_va && old_size < new_size) {
720 abi_ulong addr;
721 for (addr = old_addr + old_size;
722 addr < old_addr + new_size;
723 addr++) {
724 prot |= page_get_flags(addr);
727 if (prot == 0) {
728 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
729 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
730 mmap_reserve(old_addr + old_size, new_size - old_size);
732 } else {
733 errno = ENOMEM;
734 host_addr = MAP_FAILED;
736 /* Check if address fits target address space */
737 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
738 /* Revert mremap() changes */
739 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
740 errno = ENOMEM;
741 host_addr = MAP_FAILED;
745 if (host_addr == MAP_FAILED) {
746 new_addr = -1;
747 } else {
748 new_addr = h2g(host_addr);
749 prot = page_get_flags(old_addr);
750 page_set_flags(old_addr, old_addr + old_size, 0);
751 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
753 tb_invalidate_phys_range(new_addr, new_addr + new_size);
754 mmap_unlock();
755 return new_addr;
758 int target_msync(abi_ulong start, abi_ulong len, int flags)
760 abi_ulong end;
762 if (start & ~TARGET_PAGE_MASK)
763 return -EINVAL;
764 len = TARGET_PAGE_ALIGN(len);
765 end = start + len;
766 if (end < start)
767 return -EINVAL;
768 if (end == start)
769 return 0;
771 start &= qemu_host_page_mask;
772 return msync(g2h(start), end - start, flags);