xen: Rename xen_be_evtchn_event
[qemu.git] / linux-user / mmap.c
blobffd099dfe73190cb759ddcf63bb42b2968741181
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "qemu.h"
22 #include "qemu-common.h"
23 #include "translate-all.h"
25 //#define DEBUG_MMAP
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
30 void mmap_lock(void)
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
37 void mmap_unlock(void)
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
44 /* Grab lock to make sure things are in a consistent state after fork(). */
45 void mmap_fork_start(void)
47 if (mmap_lock_count)
48 abort();
49 pthread_mutex_lock(&mmap_mutex);
52 void mmap_fork_end(int child)
54 if (child)
55 pthread_mutex_init(&mmap_mutex, NULL);
56 else
57 pthread_mutex_unlock(&mmap_mutex);
60 /* NOTE: all the constants are the HOST ones, but addresses are target. */
61 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
63 abi_ulong end, host_start, host_end, addr;
64 int prot1, ret;
66 #ifdef DEBUG_MMAP
67 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
68 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
69 prot & PROT_READ ? 'r' : '-',
70 prot & PROT_WRITE ? 'w' : '-',
71 prot & PROT_EXEC ? 'x' : '-');
72 #endif
74 if ((start & ~TARGET_PAGE_MASK) != 0)
75 return -EINVAL;
76 len = TARGET_PAGE_ALIGN(len);
77 end = start + len;
78 if (end < start)
79 return -EINVAL;
80 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
81 if (len == 0)
82 return 0;
84 mmap_lock();
85 host_start = start & qemu_host_page_mask;
86 host_end = HOST_PAGE_ALIGN(end);
87 if (start > host_start) {
88 /* handle host page containing start */
89 prot1 = prot;
90 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
91 prot1 |= page_get_flags(addr);
93 if (host_end == host_start + qemu_host_page_size) {
94 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
95 prot1 |= page_get_flags(addr);
97 end = host_end;
99 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
100 if (ret != 0)
101 goto error;
102 host_start += qemu_host_page_size;
104 if (end < host_end) {
105 prot1 = prot;
106 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
107 prot1 |= page_get_flags(addr);
109 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
110 prot1 & PAGE_BITS);
111 if (ret != 0)
112 goto error;
113 host_end -= qemu_host_page_size;
116 /* handle the pages in the middle */
117 if (host_start < host_end) {
118 ret = mprotect(g2h(host_start), host_end - host_start, prot);
119 if (ret != 0)
120 goto error;
122 page_set_flags(start, start + len, prot | PAGE_VALID);
123 mmap_unlock();
124 return 0;
125 error:
126 mmap_unlock();
127 return ret;
130 /* map an incomplete host page */
131 static int mmap_frag(abi_ulong real_start,
132 abi_ulong start, abi_ulong end,
133 int prot, int flags, int fd, abi_ulong offset)
135 abi_ulong real_end, addr;
136 void *host_start;
137 int prot1, prot_new;
139 real_end = real_start + qemu_host_page_size;
140 host_start = g2h(real_start);
142 /* get the protection of the target pages outside the mapping */
143 prot1 = 0;
144 for(addr = real_start; addr < real_end; addr++) {
145 if (addr < start || addr >= end)
146 prot1 |= page_get_flags(addr);
149 if (prot1 == 0) {
150 /* no page was there, so we allocate one */
151 void *p = mmap(host_start, qemu_host_page_size, prot,
152 flags | MAP_ANONYMOUS, -1, 0);
153 if (p == MAP_FAILED)
154 return -1;
155 prot1 = prot;
157 prot1 &= PAGE_BITS;
159 prot_new = prot | prot1;
160 if (!(flags & MAP_ANONYMOUS)) {
161 /* msync() won't work here, so we return an error if write is
162 possible while it is a shared mapping */
163 if ((flags & MAP_TYPE) == MAP_SHARED &&
164 (prot & PROT_WRITE))
165 return -1;
167 /* adjust protection to be able to read */
168 if (!(prot1 & PROT_WRITE))
169 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
171 /* read the corresponding file data */
172 if (pread(fd, g2h(start), end - start, offset) == -1)
173 return -1;
175 /* put final protection */
176 if (prot_new != (prot1 | PROT_WRITE))
177 mprotect(host_start, qemu_host_page_size, prot_new);
178 } else {
179 if (prot_new != prot1) {
180 mprotect(host_start, qemu_host_page_size, prot_new);
182 if (prot_new & PROT_WRITE) {
183 memset(g2h(start), 0, end - start);
186 return 0;
189 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
190 # define TASK_UNMAPPED_BASE (1ul << 38)
191 #elif defined(__CYGWIN__)
192 /* Cygwin doesn't have a whole lot of address space. */
193 # define TASK_UNMAPPED_BASE 0x18000000
194 #else
195 # define TASK_UNMAPPED_BASE 0x40000000
196 #endif
197 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
199 unsigned long last_brk;
201 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
202 of guest address space. */
203 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
205 abi_ulong addr;
206 abi_ulong end_addr;
207 int prot;
208 int looped = 0;
210 if (size > reserved_va) {
211 return (abi_ulong)-1;
214 size = HOST_PAGE_ALIGN(size);
215 end_addr = start + size;
216 if (end_addr > reserved_va) {
217 end_addr = reserved_va;
219 addr = end_addr - qemu_host_page_size;
221 while (1) {
222 if (addr > end_addr) {
223 if (looped) {
224 return (abi_ulong)-1;
226 end_addr = reserved_va;
227 addr = end_addr - qemu_host_page_size;
228 looped = 1;
229 continue;
231 prot = page_get_flags(addr);
232 if (prot) {
233 end_addr = addr;
235 if (addr + size == end_addr) {
236 break;
238 addr -= qemu_host_page_size;
241 if (start == mmap_next_start) {
242 mmap_next_start = addr;
245 return addr;
249 * Find and reserve a free memory area of size 'size'. The search
250 * starts at 'start'.
251 * It must be called with mmap_lock() held.
252 * Return -1 if error.
254 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
256 void *ptr, *prev;
257 abi_ulong addr;
258 int wrapped, repeat;
260 /* If 'start' == 0, then a default start address is used. */
261 if (start == 0) {
262 start = mmap_next_start;
263 } else {
264 start &= qemu_host_page_mask;
267 size = HOST_PAGE_ALIGN(size);
269 if (reserved_va) {
270 return mmap_find_vma_reserved(start, size);
273 addr = start;
274 wrapped = repeat = 0;
275 prev = 0;
277 for (;; prev = ptr) {
279 * Reserve needed memory area to avoid a race.
280 * It should be discarded using:
281 * - mmap() with MAP_FIXED flag
282 * - mremap() with MREMAP_FIXED flag
283 * - shmat() with SHM_REMAP flag
285 ptr = mmap(g2h(addr), size, PROT_NONE,
286 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
288 /* ENOMEM, if host address space has no memory */
289 if (ptr == MAP_FAILED) {
290 return (abi_ulong)-1;
293 /* Count the number of sequential returns of the same address.
294 This is used to modify the search algorithm below. */
295 repeat = (ptr == prev ? repeat + 1 : 0);
297 if (h2g_valid(ptr + size - 1)) {
298 addr = h2g(ptr);
300 if ((addr & ~TARGET_PAGE_MASK) == 0) {
301 /* Success. */
302 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
303 mmap_next_start = addr + size;
305 return addr;
308 /* The address is not properly aligned for the target. */
309 switch (repeat) {
310 case 0:
311 /* Assume the result that the kernel gave us is the
312 first with enough free space, so start again at the
313 next higher target page. */
314 addr = TARGET_PAGE_ALIGN(addr);
315 break;
316 case 1:
317 /* Sometimes the kernel decides to perform the allocation
318 at the top end of memory instead. */
319 addr &= TARGET_PAGE_MASK;
320 break;
321 case 2:
322 /* Start over at low memory. */
323 addr = 0;
324 break;
325 default:
326 /* Fail. This unaligned block must the last. */
327 addr = -1;
328 break;
330 } else {
331 /* Since the result the kernel gave didn't fit, start
332 again at low memory. If any repetition, fail. */
333 addr = (repeat ? -1 : 0);
336 /* Unmap and try again. */
337 munmap(ptr, size);
339 /* ENOMEM if we checked the whole of the target address space. */
340 if (addr == (abi_ulong)-1) {
341 return (abi_ulong)-1;
342 } else if (addr == 0) {
343 if (wrapped) {
344 return (abi_ulong)-1;
346 wrapped = 1;
347 /* Don't actually use 0 when wrapping, instead indicate
348 that we'd truly like an allocation in low memory. */
349 addr = (mmap_min_addr > TARGET_PAGE_SIZE
350 ? TARGET_PAGE_ALIGN(mmap_min_addr)
351 : TARGET_PAGE_SIZE);
352 } else if (wrapped && addr >= start) {
353 return (abi_ulong)-1;
358 /* NOTE: all the constants are the HOST ones */
359 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
360 int flags, int fd, abi_ulong offset)
362 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
364 mmap_lock();
365 #ifdef DEBUG_MMAP
367 printf("mmap: start=0x" TARGET_ABI_FMT_lx
368 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
369 start, len,
370 prot & PROT_READ ? 'r' : '-',
371 prot & PROT_WRITE ? 'w' : '-',
372 prot & PROT_EXEC ? 'x' : '-');
373 if (flags & MAP_FIXED)
374 printf("MAP_FIXED ");
375 if (flags & MAP_ANONYMOUS)
376 printf("MAP_ANON ");
377 switch(flags & MAP_TYPE) {
378 case MAP_PRIVATE:
379 printf("MAP_PRIVATE ");
380 break;
381 case MAP_SHARED:
382 printf("MAP_SHARED ");
383 break;
384 default:
385 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
386 break;
388 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
390 #endif
392 if (offset & ~TARGET_PAGE_MASK) {
393 errno = EINVAL;
394 goto fail;
397 len = TARGET_PAGE_ALIGN(len);
398 if (len == 0)
399 goto the_end;
400 real_start = start & qemu_host_page_mask;
401 host_offset = offset & qemu_host_page_mask;
403 /* If the user is asking for the kernel to find a location, do that
404 before we truncate the length for mapping files below. */
405 if (!(flags & MAP_FIXED)) {
406 host_len = len + offset - host_offset;
407 host_len = HOST_PAGE_ALIGN(host_len);
408 start = mmap_find_vma(real_start, host_len);
409 if (start == (abi_ulong)-1) {
410 errno = ENOMEM;
411 goto fail;
415 /* When mapping files into a memory area larger than the file, accesses
416 to pages beyond the file size will cause a SIGBUS.
418 For example, if mmaping a file of 100 bytes on a host with 4K pages
419 emulating a target with 8K pages, the target expects to be able to
420 access the first 8K. But the host will trap us on any access beyond
421 4K.
423 When emulating a target with a larger page-size than the hosts, we
424 may need to truncate file maps at EOF and add extra anonymous pages
425 up to the targets page boundary. */
427 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
428 && !(flags & MAP_ANONYMOUS)) {
429 struct stat sb;
431 if (fstat (fd, &sb) == -1)
432 goto fail;
434 /* Are we trying to create a map beyond EOF?. */
435 if (offset + len > sb.st_size) {
436 /* If so, truncate the file map at eof aligned with
437 the hosts real pagesize. Additional anonymous maps
438 will be created beyond EOF. */
439 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
443 if (!(flags & MAP_FIXED)) {
444 unsigned long host_start;
445 void *p;
447 host_len = len + offset - host_offset;
448 host_len = HOST_PAGE_ALIGN(host_len);
450 /* Note: we prefer to control the mapping address. It is
451 especially important if qemu_host_page_size >
452 qemu_real_host_page_size */
453 p = mmap(g2h(start), host_len, prot,
454 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
455 if (p == MAP_FAILED)
456 goto fail;
457 /* update start so that it points to the file position at 'offset' */
458 host_start = (unsigned long)p;
459 if (!(flags & MAP_ANONYMOUS)) {
460 p = mmap(g2h(start), len, prot,
461 flags | MAP_FIXED, fd, host_offset);
462 if (p == MAP_FAILED) {
463 munmap(g2h(start), host_len);
464 goto fail;
466 host_start += offset - host_offset;
468 start = h2g(host_start);
469 } else {
470 if (start & ~TARGET_PAGE_MASK) {
471 errno = EINVAL;
472 goto fail;
474 end = start + len;
475 real_end = HOST_PAGE_ALIGN(end);
478 * Test if requested memory area fits target address space
479 * It can fail only on 64-bit host with 32-bit target.
480 * On any other target/host host mmap() handles this error correctly.
482 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
483 errno = EINVAL;
484 goto fail;
487 /* worst case: we cannot map the file because the offset is not
488 aligned, so we read it */
489 if (!(flags & MAP_ANONYMOUS) &&
490 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
491 /* msync() won't work here, so we return an error if write is
492 possible while it is a shared mapping */
493 if ((flags & MAP_TYPE) == MAP_SHARED &&
494 (prot & PROT_WRITE)) {
495 errno = EINVAL;
496 goto fail;
498 retaddr = target_mmap(start, len, prot | PROT_WRITE,
499 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
500 -1, 0);
501 if (retaddr == -1)
502 goto fail;
503 if (pread(fd, g2h(start), len, offset) == -1)
504 goto fail;
505 if (!(prot & PROT_WRITE)) {
506 ret = target_mprotect(start, len, prot);
507 assert(ret == 0);
509 goto the_end;
512 /* handle the start of the mapping */
513 if (start > real_start) {
514 if (real_end == real_start + qemu_host_page_size) {
515 /* one single host page */
516 ret = mmap_frag(real_start, start, end,
517 prot, flags, fd, offset);
518 if (ret == -1)
519 goto fail;
520 goto the_end1;
522 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
523 prot, flags, fd, offset);
524 if (ret == -1)
525 goto fail;
526 real_start += qemu_host_page_size;
528 /* handle the end of the mapping */
529 if (end < real_end) {
530 ret = mmap_frag(real_end - qemu_host_page_size,
531 real_end - qemu_host_page_size, end,
532 prot, flags, fd,
533 offset + real_end - qemu_host_page_size - start);
534 if (ret == -1)
535 goto fail;
536 real_end -= qemu_host_page_size;
539 /* map the middle (easier) */
540 if (real_start < real_end) {
541 void *p;
542 unsigned long offset1;
543 if (flags & MAP_ANONYMOUS)
544 offset1 = 0;
545 else
546 offset1 = offset + real_start - start;
547 p = mmap(g2h(real_start), real_end - real_start,
548 prot, flags, fd, offset1);
549 if (p == MAP_FAILED)
550 goto fail;
553 the_end1:
554 page_set_flags(start, start + len, prot | PAGE_VALID);
555 the_end:
556 #ifdef DEBUG_MMAP
557 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
558 page_dump(stdout);
559 printf("\n");
560 #endif
561 tb_invalidate_phys_range(start, start + len);
562 mmap_unlock();
563 return start;
564 fail:
565 mmap_unlock();
566 return -1;
569 static void mmap_reserve(abi_ulong start, abi_ulong size)
571 abi_ulong real_start;
572 abi_ulong real_end;
573 abi_ulong addr;
574 abi_ulong end;
575 int prot;
577 real_start = start & qemu_host_page_mask;
578 real_end = HOST_PAGE_ALIGN(start + size);
579 end = start + size;
580 if (start > real_start) {
581 /* handle host page containing start */
582 prot = 0;
583 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
584 prot |= page_get_flags(addr);
586 if (real_end == real_start + qemu_host_page_size) {
587 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
588 prot |= page_get_flags(addr);
590 end = real_end;
592 if (prot != 0)
593 real_start += qemu_host_page_size;
595 if (end < real_end) {
596 prot = 0;
597 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
598 prot |= page_get_flags(addr);
600 if (prot != 0)
601 real_end -= qemu_host_page_size;
603 if (real_start != real_end) {
604 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
605 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
606 -1, 0);
610 int target_munmap(abi_ulong start, abi_ulong len)
612 abi_ulong end, real_start, real_end, addr;
613 int prot, ret;
615 #ifdef DEBUG_MMAP
616 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
617 TARGET_ABI_FMT_lx "\n",
618 start, len);
619 #endif
620 if (start & ~TARGET_PAGE_MASK)
621 return -EINVAL;
622 len = TARGET_PAGE_ALIGN(len);
623 if (len == 0)
624 return -EINVAL;
625 mmap_lock();
626 end = start + len;
627 real_start = start & qemu_host_page_mask;
628 real_end = HOST_PAGE_ALIGN(end);
630 if (start > real_start) {
631 /* handle host page containing start */
632 prot = 0;
633 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
634 prot |= page_get_flags(addr);
636 if (real_end == real_start + qemu_host_page_size) {
637 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
638 prot |= page_get_flags(addr);
640 end = real_end;
642 if (prot != 0)
643 real_start += qemu_host_page_size;
645 if (end < real_end) {
646 prot = 0;
647 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
648 prot |= page_get_flags(addr);
650 if (prot != 0)
651 real_end -= qemu_host_page_size;
654 ret = 0;
655 /* unmap what we can */
656 if (real_start < real_end) {
657 if (reserved_va) {
658 mmap_reserve(real_start, real_end - real_start);
659 } else {
660 ret = munmap(g2h(real_start), real_end - real_start);
664 if (ret == 0) {
665 page_set_flags(start, start + len, 0);
666 tb_invalidate_phys_range(start, start + len);
668 mmap_unlock();
669 return ret;
672 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
673 abi_ulong new_size, unsigned long flags,
674 abi_ulong new_addr)
676 int prot;
677 void *host_addr;
679 mmap_lock();
681 if (flags & MREMAP_FIXED) {
682 host_addr = mremap(g2h(old_addr), old_size, new_size,
683 flags, g2h(new_addr));
685 if (reserved_va && host_addr != MAP_FAILED) {
686 /* If new and old addresses overlap then the above mremap will
687 already have failed with EINVAL. */
688 mmap_reserve(old_addr, old_size);
690 } else if (flags & MREMAP_MAYMOVE) {
691 abi_ulong mmap_start;
693 mmap_start = mmap_find_vma(0, new_size);
695 if (mmap_start == -1) {
696 errno = ENOMEM;
697 host_addr = MAP_FAILED;
698 } else {
699 host_addr = mremap(g2h(old_addr), old_size, new_size,
700 flags | MREMAP_FIXED, g2h(mmap_start));
701 if (reserved_va) {
702 mmap_reserve(old_addr, old_size);
705 } else {
706 int prot = 0;
707 if (reserved_va && old_size < new_size) {
708 abi_ulong addr;
709 for (addr = old_addr + old_size;
710 addr < old_addr + new_size;
711 addr++) {
712 prot |= page_get_flags(addr);
715 if (prot == 0) {
716 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
717 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
718 mmap_reserve(old_addr + old_size, new_size - old_size);
720 } else {
721 errno = ENOMEM;
722 host_addr = MAP_FAILED;
724 /* Check if address fits target address space */
725 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
726 /* Revert mremap() changes */
727 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
728 errno = ENOMEM;
729 host_addr = MAP_FAILED;
733 if (host_addr == MAP_FAILED) {
734 new_addr = -1;
735 } else {
736 new_addr = h2g(host_addr);
737 prot = page_get_flags(old_addr);
738 page_set_flags(old_addr, old_addr + old_size, 0);
739 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
741 tb_invalidate_phys_range(new_addr, new_addr + new_size);
742 mmap_unlock();
743 return new_addr;
746 int target_msync(abi_ulong start, abi_ulong len, int flags)
748 abi_ulong end;
750 if (start & ~TARGET_PAGE_MASK)
751 return -EINVAL;
752 len = TARGET_PAGE_ALIGN(len);
753 end = start + len;
754 if (end < start)
755 return -EINVAL;
756 if (end == start)
757 return 0;
759 start &= qemu_host_page_mask;
760 return msync(g2h(start), end - start, flags);