Merge commit '9605111958173938ac08298f515d55e937d0211c' into upstream-merge
[qemu-kvm/amd-iommu.git] / linux-user / mmap.c
blob39da6dfb402307f6efc6c8f48425e7e120557b92
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <stdarg.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <linux/mman.h>
29 #include <linux/unistd.h>
31 #include "qemu.h"
32 #include "qemu-common.h"
34 //#define DEBUG_MMAP
36 #if defined(CONFIG_USE_NPTL)
37 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
38 static __thread int mmap_lock_count;
40 void mmap_lock(void)
42 if (mmap_lock_count++ == 0) {
43 pthread_mutex_lock(&mmap_mutex);
47 void mmap_unlock(void)
49 if (--mmap_lock_count == 0) {
50 pthread_mutex_unlock(&mmap_mutex);
54 /* Grab lock to make sure things are in a consistent state after fork(). */
55 void mmap_fork_start(void)
57 if (mmap_lock_count)
58 abort();
59 pthread_mutex_lock(&mmap_mutex);
62 void mmap_fork_end(int child)
64 if (child)
65 pthread_mutex_init(&mmap_mutex, NULL);
66 else
67 pthread_mutex_unlock(&mmap_mutex);
69 #else
70 /* We aren't threadsafe to start with, so no need to worry about locking. */
71 void mmap_lock(void)
75 void mmap_unlock(void)
78 #endif
80 /* NOTE: all the constants are the HOST ones, but addresses are target. */
81 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
83 abi_ulong end, host_start, host_end, addr;
84 int prot1, ret;
86 #ifdef DEBUG_MMAP
87 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
88 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
89 prot & PROT_READ ? 'r' : '-',
90 prot & PROT_WRITE ? 'w' : '-',
91 prot & PROT_EXEC ? 'x' : '-');
92 #endif
94 if ((start & ~TARGET_PAGE_MASK) != 0)
95 return -EINVAL;
96 len = TARGET_PAGE_ALIGN(len);
97 end = start + len;
98 if (end < start)
99 return -EINVAL;
100 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
101 if (len == 0)
102 return 0;
104 mmap_lock();
105 host_start = start & qemu_host_page_mask;
106 host_end = HOST_PAGE_ALIGN(end);
107 if (start > host_start) {
108 /* handle host page containing start */
109 prot1 = prot;
110 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
111 prot1 |= page_get_flags(addr);
113 if (host_end == host_start + qemu_host_page_size) {
114 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
115 prot1 |= page_get_flags(addr);
117 end = host_end;
119 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
120 if (ret != 0)
121 goto error;
122 host_start += qemu_host_page_size;
124 if (end < host_end) {
125 prot1 = prot;
126 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
127 prot1 |= page_get_flags(addr);
129 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
130 prot1 & PAGE_BITS);
131 if (ret != 0)
132 goto error;
133 host_end -= qemu_host_page_size;
136 /* handle the pages in the middle */
137 if (host_start < host_end) {
138 ret = mprotect(g2h(host_start), host_end - host_start, prot);
139 if (ret != 0)
140 goto error;
142 page_set_flags(start, start + len, prot | PAGE_VALID);
143 mmap_unlock();
144 return 0;
145 error:
146 mmap_unlock();
147 return ret;
150 /* map an incomplete host page */
151 static int mmap_frag(abi_ulong real_start,
152 abi_ulong start, abi_ulong end,
153 int prot, int flags, int fd, abi_ulong offset)
155 abi_ulong real_end, addr;
156 void *host_start;
157 int prot1, prot_new;
159 real_end = real_start + qemu_host_page_size;
160 host_start = g2h(real_start);
162 /* get the protection of the target pages outside the mapping */
163 prot1 = 0;
164 for(addr = real_start; addr < real_end; addr++) {
165 if (addr < start || addr >= end)
166 prot1 |= page_get_flags(addr);
169 if (prot1 == 0) {
170 /* no page was there, so we allocate one */
171 void *p = mmap(host_start, qemu_host_page_size, prot,
172 flags | MAP_ANONYMOUS, -1, 0);
173 if (p == MAP_FAILED)
174 return -1;
175 prot1 = prot;
177 prot1 &= PAGE_BITS;
179 prot_new = prot | prot1;
180 if (!(flags & MAP_ANONYMOUS)) {
181 /* msync() won't work here, so we return an error if write is
182 possible while it is a shared mapping */
183 if ((flags & MAP_TYPE) == MAP_SHARED &&
184 (prot & PROT_WRITE))
185 return -1;
187 /* adjust protection to be able to read */
188 if (!(prot1 & PROT_WRITE))
189 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
191 /* read the corresponding file data */
192 if (pread(fd, g2h(start), end - start, offset) == -1)
193 return -1;
195 /* put final protection */
196 if (prot_new != (prot1 | PROT_WRITE))
197 mprotect(host_start, qemu_host_page_size, prot_new);
198 } else {
199 /* just update the protection */
200 if (prot_new != prot1) {
201 mprotect(host_start, qemu_host_page_size, prot_new);
204 return 0;
207 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
208 # define TASK_UNMAPPED_BASE (1ul << 38)
209 #elif defined(__CYGWIN__)
210 /* Cygwin doesn't have a whole lot of address space. */
211 # define TASK_UNMAPPED_BASE 0x18000000
212 #else
213 # define TASK_UNMAPPED_BASE 0x40000000
214 #endif
215 static abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
217 unsigned long last_brk;
219 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
220 of guest address space. */
221 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
223 abi_ulong addr;
224 abi_ulong last_addr;
225 int prot;
226 int looped = 0;
228 if (size > reserved_va) {
229 return (abi_ulong)-1;
232 last_addr = start;
233 for (addr = start; last_addr + size != addr; addr += qemu_host_page_size) {
234 if (last_addr + size >= reserved_va
235 || (abi_ulong)(last_addr + size) < last_addr) {
236 if (looped) {
237 return (abi_ulong)-1;
239 last_addr = qemu_host_page_size;
240 addr = 0;
241 looped = 1;
242 continue;
244 prot = page_get_flags(addr);
245 if (prot) {
246 last_addr = addr + qemu_host_page_size;
249 mmap_next_start = addr;
250 return last_addr;
254 * Find and reserve a free memory area of size 'size'. The search
255 * starts at 'start'.
256 * It must be called with mmap_lock() held.
257 * Return -1 if error.
259 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
261 void *ptr, *prev;
262 abi_ulong addr;
263 int wrapped, repeat;
265 /* If 'start' == 0, then a default start address is used. */
266 if (start == 0) {
267 start = mmap_next_start;
268 } else {
269 start &= qemu_host_page_mask;
272 size = HOST_PAGE_ALIGN(size);
274 if (reserved_va) {
275 return mmap_find_vma_reserved(start, size);
278 addr = start;
279 wrapped = repeat = 0;
280 prev = 0;
282 for (;; prev = ptr) {
284 * Reserve needed memory area to avoid a race.
285 * It should be discarded using:
286 * - mmap() with MAP_FIXED flag
287 * - mremap() with MREMAP_FIXED flag
288 * - shmat() with SHM_REMAP flag
290 ptr = mmap(g2h(addr), size, PROT_NONE,
291 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
293 /* ENOMEM, if host address space has no memory */
294 if (ptr == MAP_FAILED) {
295 return (abi_ulong)-1;
298 /* Count the number of sequential returns of the same address.
299 This is used to modify the search algorithm below. */
300 repeat = (ptr == prev ? repeat + 1 : 0);
302 if (h2g_valid(ptr + size - 1)) {
303 addr = h2g(ptr);
305 if ((addr & ~TARGET_PAGE_MASK) == 0) {
306 /* Success. */
307 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
308 mmap_next_start = addr + size;
310 return addr;
313 /* The address is not properly aligned for the target. */
314 switch (repeat) {
315 case 0:
316 /* Assume the result that the kernel gave us is the
317 first with enough free space, so start again at the
318 next higher target page. */
319 addr = TARGET_PAGE_ALIGN(addr);
320 break;
321 case 1:
322 /* Sometimes the kernel decides to perform the allocation
323 at the top end of memory instead. */
324 addr &= TARGET_PAGE_MASK;
325 break;
326 case 2:
327 /* Start over at low memory. */
328 addr = 0;
329 break;
330 default:
331 /* Fail. This unaligned block must the last. */
332 addr = -1;
333 break;
335 } else {
336 /* Since the result the kernel gave didn't fit, start
337 again at low memory. If any repetition, fail. */
338 addr = (repeat ? -1 : 0);
341 /* Unmap and try again. */
342 munmap(ptr, size);
344 /* ENOMEM if we checked the whole of the target address space. */
345 if (addr == -1ul) {
346 return (abi_ulong)-1;
347 } else if (addr == 0) {
348 if (wrapped) {
349 return (abi_ulong)-1;
351 wrapped = 1;
352 /* Don't actually use 0 when wrapping, instead indicate
353 that we'd truely like an allocation in low memory. */
354 addr = (mmap_min_addr > TARGET_PAGE_SIZE
355 ? TARGET_PAGE_ALIGN(mmap_min_addr)
356 : TARGET_PAGE_SIZE);
357 } else if (wrapped && addr >= start) {
358 return (abi_ulong)-1;
363 /* NOTE: all the constants are the HOST ones */
364 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
365 int flags, int fd, abi_ulong offset)
367 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
368 unsigned long host_start;
370 mmap_lock();
371 #ifdef DEBUG_MMAP
373 printf("mmap: start=0x" TARGET_ABI_FMT_lx
374 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
375 start, len,
376 prot & PROT_READ ? 'r' : '-',
377 prot & PROT_WRITE ? 'w' : '-',
378 prot & PROT_EXEC ? 'x' : '-');
379 if (flags & MAP_FIXED)
380 printf("MAP_FIXED ");
381 if (flags & MAP_ANONYMOUS)
382 printf("MAP_ANON ");
383 switch(flags & MAP_TYPE) {
384 case MAP_PRIVATE:
385 printf("MAP_PRIVATE ");
386 break;
387 case MAP_SHARED:
388 printf("MAP_SHARED ");
389 break;
390 default:
391 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
392 break;
394 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
396 #endif
398 if (offset & ~TARGET_PAGE_MASK) {
399 errno = EINVAL;
400 goto fail;
403 len = TARGET_PAGE_ALIGN(len);
404 if (len == 0)
405 goto the_end;
406 real_start = start & qemu_host_page_mask;
408 /* When mapping files into a memory area larger than the file, accesses
409 to pages beyond the file size will cause a SIGBUS.
411 For example, if mmaping a file of 100 bytes on a host with 4K pages
412 emulating a target with 8K pages, the target expects to be able to
413 access the first 8K. But the host will trap us on any access beyond
414 4K.
416 When emulating a target with a larger page-size than the hosts, we
417 may need to truncate file maps at EOF and add extra anonymous pages
418 up to the targets page boundary. */
420 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
421 && !(flags & MAP_ANONYMOUS)) {
422 struct stat sb;
424 if (fstat (fd, &sb) == -1)
425 goto fail;
427 /* Are we trying to create a map beyond EOF?. */
428 if (offset + len > sb.st_size) {
429 /* If so, truncate the file map at eof aligned with
430 the hosts real pagesize. Additional anonymous maps
431 will be created beyond EOF. */
432 len = (sb.st_size - offset);
433 len += qemu_real_host_page_size - 1;
434 len &= ~(qemu_real_host_page_size - 1);
438 if (!(flags & MAP_FIXED)) {
439 abi_ulong mmap_start;
440 void *p;
441 host_offset = offset & qemu_host_page_mask;
442 host_len = len + offset - host_offset;
443 host_len = HOST_PAGE_ALIGN(host_len);
444 mmap_start = mmap_find_vma(real_start, host_len);
445 if (mmap_start == (abi_ulong)-1) {
446 errno = ENOMEM;
447 goto fail;
449 /* Note: we prefer to control the mapping address. It is
450 especially important if qemu_host_page_size >
451 qemu_real_host_page_size */
452 p = mmap(g2h(mmap_start),
453 host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
454 if (p == MAP_FAILED)
455 goto fail;
456 /* update start so that it points to the file position at 'offset' */
457 host_start = (unsigned long)p;
458 if (!(flags & MAP_ANONYMOUS)) {
459 p = mmap(g2h(mmap_start), len, prot,
460 flags | MAP_FIXED, fd, host_offset);
461 host_start += offset - host_offset;
463 start = h2g(host_start);
464 } else {
465 if (start & ~TARGET_PAGE_MASK) {
466 errno = EINVAL;
467 goto fail;
469 end = start + len;
470 real_end = HOST_PAGE_ALIGN(end);
473 * Test if requested memory area fits target address space
474 * It can fail only on 64-bit host with 32-bit target.
475 * On any other target/host host mmap() handles this error correctly.
477 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
478 errno = EINVAL;
479 goto fail;
482 /* worst case: we cannot map the file because the offset is not
483 aligned, so we read it */
484 if (!(flags & MAP_ANONYMOUS) &&
485 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
486 /* msync() won't work here, so we return an error if write is
487 possible while it is a shared mapping */
488 if ((flags & MAP_TYPE) == MAP_SHARED &&
489 (prot & PROT_WRITE)) {
490 errno = EINVAL;
491 goto fail;
493 retaddr = target_mmap(start, len, prot | PROT_WRITE,
494 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
495 -1, 0);
496 if (retaddr == -1)
497 goto fail;
498 if (pread(fd, g2h(start), len, offset) == -1)
499 goto fail;
500 if (!(prot & PROT_WRITE)) {
501 ret = target_mprotect(start, len, prot);
502 if (ret != 0) {
503 start = ret;
504 goto the_end;
507 goto the_end;
510 /* handle the start of the mapping */
511 if (start > real_start) {
512 if (real_end == real_start + qemu_host_page_size) {
513 /* one single host page */
514 ret = mmap_frag(real_start, start, end,
515 prot, flags, fd, offset);
516 if (ret == -1)
517 goto fail;
518 goto the_end1;
520 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
521 prot, flags, fd, offset);
522 if (ret == -1)
523 goto fail;
524 real_start += qemu_host_page_size;
526 /* handle the end of the mapping */
527 if (end < real_end) {
528 ret = mmap_frag(real_end - qemu_host_page_size,
529 real_end - qemu_host_page_size, real_end,
530 prot, flags, fd,
531 offset + real_end - qemu_host_page_size - start);
532 if (ret == -1)
533 goto fail;
534 real_end -= qemu_host_page_size;
537 /* map the middle (easier) */
538 if (real_start < real_end) {
539 void *p;
540 unsigned long offset1;
541 if (flags & MAP_ANONYMOUS)
542 offset1 = 0;
543 else
544 offset1 = offset + real_start - start;
545 p = mmap(g2h(real_start), real_end - real_start,
546 prot, flags, fd, offset1);
547 if (p == MAP_FAILED)
548 goto fail;
551 the_end1:
552 page_set_flags(start, start + len, prot | PAGE_VALID);
553 the_end:
554 #ifdef DEBUG_MMAP
555 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
556 page_dump(stdout);
557 printf("\n");
558 #endif
559 mmap_unlock();
560 return start;
561 fail:
562 mmap_unlock();
563 return -1;
566 static void mmap_reserve(abi_ulong start, abi_ulong size)
568 abi_ulong real_start;
569 abi_ulong real_end;
570 abi_ulong addr;
571 abi_ulong end;
572 int prot;
574 real_start = start & qemu_host_page_mask;
575 real_end = HOST_PAGE_ALIGN(start + size);
576 end = start + size;
577 if (start > real_start) {
578 /* handle host page containing start */
579 prot = 0;
580 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
581 prot |= page_get_flags(addr);
583 if (real_end == real_start + qemu_host_page_size) {
584 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
585 prot |= page_get_flags(addr);
587 end = real_end;
589 if (prot != 0)
590 real_start += qemu_host_page_size;
592 if (end < real_end) {
593 prot = 0;
594 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
595 prot |= page_get_flags(addr);
597 if (prot != 0)
598 real_end -= qemu_host_page_size;
600 if (real_start != real_end) {
601 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
602 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
603 -1, 0);
607 int target_munmap(abi_ulong start, abi_ulong len)
609 abi_ulong end, real_start, real_end, addr;
610 int prot, ret;
612 #ifdef DEBUG_MMAP
613 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
614 TARGET_ABI_FMT_lx "\n",
615 start, len);
616 #endif
617 if (start & ~TARGET_PAGE_MASK)
618 return -EINVAL;
619 len = TARGET_PAGE_ALIGN(len);
620 if (len == 0)
621 return -EINVAL;
622 mmap_lock();
623 end = start + len;
624 real_start = start & qemu_host_page_mask;
625 real_end = HOST_PAGE_ALIGN(end);
627 if (start > real_start) {
628 /* handle host page containing start */
629 prot = 0;
630 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
631 prot |= page_get_flags(addr);
633 if (real_end == real_start + qemu_host_page_size) {
634 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
635 prot |= page_get_flags(addr);
637 end = real_end;
639 if (prot != 0)
640 real_start += qemu_host_page_size;
642 if (end < real_end) {
643 prot = 0;
644 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
645 prot |= page_get_flags(addr);
647 if (prot != 0)
648 real_end -= qemu_host_page_size;
651 ret = 0;
652 /* unmap what we can */
653 if (real_start < real_end) {
654 if (reserved_va) {
655 mmap_reserve(real_start, real_end - real_start);
656 } else {
657 ret = munmap(g2h(real_start), real_end - real_start);
661 if (ret == 0)
662 page_set_flags(start, start + len, 0);
663 mmap_unlock();
664 return ret;
667 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
668 abi_ulong new_size, unsigned long flags,
669 abi_ulong new_addr)
671 int prot;
672 void *host_addr;
674 mmap_lock();
676 if (flags & MREMAP_FIXED) {
677 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
678 old_size, new_size,
679 flags,
680 g2h(new_addr));
682 if (reserved_va && host_addr != MAP_FAILED) {
683 /* If new and old addresses overlap then the above mremap will
684 already have failed with EINVAL. */
685 mmap_reserve(old_addr, old_size);
687 } else if (flags & MREMAP_MAYMOVE) {
688 abi_ulong mmap_start;
690 mmap_start = mmap_find_vma(0, new_size);
692 if (mmap_start == -1) {
693 errno = ENOMEM;
694 host_addr = MAP_FAILED;
695 } else {
696 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
697 old_size, new_size,
698 flags | MREMAP_FIXED,
699 g2h(mmap_start));
700 mmap_reserve(old_addr, old_size);
702 } else {
703 int prot = 0;
704 if (reserved_va && old_size < new_size) {
705 abi_ulong addr;
706 for (addr = old_addr + old_size;
707 addr < old_addr + new_size;
708 addr++) {
709 prot |= page_get_flags(addr);
712 if (prot == 0) {
713 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
714 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
715 mmap_reserve(old_addr + old_size, new_size - old_size);
717 } else {
718 errno = ENOMEM;
719 host_addr = MAP_FAILED;
721 /* Check if address fits target address space */
722 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
723 /* Revert mremap() changes */
724 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
725 errno = ENOMEM;
726 host_addr = MAP_FAILED;
730 if (host_addr == MAP_FAILED) {
731 new_addr = -1;
732 } else {
733 new_addr = h2g(host_addr);
734 prot = page_get_flags(old_addr);
735 page_set_flags(old_addr, old_addr + old_size, 0);
736 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
738 mmap_unlock();
739 return new_addr;
742 int target_msync(abi_ulong start, abi_ulong len, int flags)
744 abi_ulong end;
746 if (start & ~TARGET_PAGE_MASK)
747 return -EINVAL;
748 len = TARGET_PAGE_ALIGN(len);
749 end = start + len;
750 if (end < start)
751 return -EINVAL;
752 if (end == start)
753 return 0;
755 start &= qemu_host_page_mask;
756 return msync(g2h(start), end - start, flags);