Merge remote-tracking branch 'fam/tags/for-upstream' into staging
[qemu/kevin.git] / linux-user / mmap.c
blob61685bf79e91e2bf4da2fba7ae1fc16c4349775d
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "qemu.h"
22 #include "qemu-common.h"
23 #include "translate-all.h"
25 //#define DEBUG_MMAP
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
30 void mmap_lock(void)
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
37 void mmap_unlock(void)
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
44 bool have_mmap_lock(void)
46 return mmap_lock_count > 0 ? true : false;
49 /* Grab lock to make sure things are in a consistent state after fork(). */
50 void mmap_fork_start(void)
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
57 void mmap_fork_end(int child)
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
65 /* NOTE: all the constants are the HOST ones, but addresses are target. */
66 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
68 abi_ulong end, host_start, host_end, addr;
69 int prot1, ret;
71 #ifdef DEBUG_MMAP
72 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
73 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
74 prot & PROT_READ ? 'r' : '-',
75 prot & PROT_WRITE ? 'w' : '-',
76 prot & PROT_EXEC ? 'x' : '-');
77 #endif
79 if ((start & ~TARGET_PAGE_MASK) != 0)
80 return -EINVAL;
81 len = TARGET_PAGE_ALIGN(len);
82 end = start + len;
83 if (end < start)
84 return -EINVAL;
85 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
86 if (len == 0)
87 return 0;
89 mmap_lock();
90 host_start = start & qemu_host_page_mask;
91 host_end = HOST_PAGE_ALIGN(end);
92 if (start > host_start) {
93 /* handle host page containing start */
94 prot1 = prot;
95 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
96 prot1 |= page_get_flags(addr);
98 if (host_end == host_start + qemu_host_page_size) {
99 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
100 prot1 |= page_get_flags(addr);
102 end = host_end;
104 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
105 if (ret != 0)
106 goto error;
107 host_start += qemu_host_page_size;
109 if (end < host_end) {
110 prot1 = prot;
111 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
112 prot1 |= page_get_flags(addr);
114 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
115 prot1 & PAGE_BITS);
116 if (ret != 0)
117 goto error;
118 host_end -= qemu_host_page_size;
121 /* handle the pages in the middle */
122 if (host_start < host_end) {
123 ret = mprotect(g2h(host_start), host_end - host_start, prot);
124 if (ret != 0)
125 goto error;
127 page_set_flags(start, start + len, prot | PAGE_VALID);
128 mmap_unlock();
129 return 0;
130 error:
131 mmap_unlock();
132 return ret;
135 /* map an incomplete host page */
136 static int mmap_frag(abi_ulong real_start,
137 abi_ulong start, abi_ulong end,
138 int prot, int flags, int fd, abi_ulong offset)
140 abi_ulong real_end, addr;
141 void *host_start;
142 int prot1, prot_new;
144 real_end = real_start + qemu_host_page_size;
145 host_start = g2h(real_start);
147 /* get the protection of the target pages outside the mapping */
148 prot1 = 0;
149 for(addr = real_start; addr < real_end; addr++) {
150 if (addr < start || addr >= end)
151 prot1 |= page_get_flags(addr);
154 if (prot1 == 0) {
155 /* no page was there, so we allocate one */
156 void *p = mmap(host_start, qemu_host_page_size, prot,
157 flags | MAP_ANONYMOUS, -1, 0);
158 if (p == MAP_FAILED)
159 return -1;
160 prot1 = prot;
162 prot1 &= PAGE_BITS;
164 prot_new = prot | prot1;
165 if (!(flags & MAP_ANONYMOUS)) {
166 /* msync() won't work here, so we return an error if write is
167 possible while it is a shared mapping */
168 if ((flags & MAP_TYPE) == MAP_SHARED &&
169 (prot & PROT_WRITE))
170 return -1;
172 /* adjust protection to be able to read */
173 if (!(prot1 & PROT_WRITE))
174 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
176 /* read the corresponding file data */
177 if (pread(fd, g2h(start), end - start, offset) == -1)
178 return -1;
180 /* put final protection */
181 if (prot_new != (prot1 | PROT_WRITE))
182 mprotect(host_start, qemu_host_page_size, prot_new);
183 } else {
184 if (prot_new != prot1) {
185 mprotect(host_start, qemu_host_page_size, prot_new);
187 if (prot_new & PROT_WRITE) {
188 memset(g2h(start), 0, end - start);
191 return 0;
194 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
195 # define TASK_UNMAPPED_BASE (1ul << 38)
196 #elif defined(__CYGWIN__)
197 /* Cygwin doesn't have a whole lot of address space. */
198 # define TASK_UNMAPPED_BASE 0x18000000
199 #else
200 # define TASK_UNMAPPED_BASE 0x40000000
201 #endif
202 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
204 unsigned long last_brk;
206 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
207 of guest address space. */
208 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
210 abi_ulong addr;
211 abi_ulong end_addr;
212 int prot;
213 int looped = 0;
215 if (size > reserved_va) {
216 return (abi_ulong)-1;
219 size = HOST_PAGE_ALIGN(size);
220 end_addr = start + size;
221 if (end_addr > reserved_va) {
222 end_addr = reserved_va;
224 addr = end_addr - qemu_host_page_size;
226 while (1) {
227 if (addr > end_addr) {
228 if (looped) {
229 return (abi_ulong)-1;
231 end_addr = reserved_va;
232 addr = end_addr - qemu_host_page_size;
233 looped = 1;
234 continue;
236 prot = page_get_flags(addr);
237 if (prot) {
238 end_addr = addr;
240 if (addr + size == end_addr) {
241 break;
243 addr -= qemu_host_page_size;
246 if (start == mmap_next_start) {
247 mmap_next_start = addr;
250 return addr;
254 * Find and reserve a free memory area of size 'size'. The search
255 * starts at 'start'.
256 * It must be called with mmap_lock() held.
257 * Return -1 if error.
259 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
261 void *ptr, *prev;
262 abi_ulong addr;
263 int wrapped, repeat;
265 /* If 'start' == 0, then a default start address is used. */
266 if (start == 0) {
267 start = mmap_next_start;
268 } else {
269 start &= qemu_host_page_mask;
272 size = HOST_PAGE_ALIGN(size);
274 if (reserved_va) {
275 return mmap_find_vma_reserved(start, size);
278 addr = start;
279 wrapped = repeat = 0;
280 prev = 0;
282 for (;; prev = ptr) {
284 * Reserve needed memory area to avoid a race.
285 * It should be discarded using:
286 * - mmap() with MAP_FIXED flag
287 * - mremap() with MREMAP_FIXED flag
288 * - shmat() with SHM_REMAP flag
290 ptr = mmap(g2h(addr), size, PROT_NONE,
291 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
293 /* ENOMEM, if host address space has no memory */
294 if (ptr == MAP_FAILED) {
295 return (abi_ulong)-1;
298 /* Count the number of sequential returns of the same address.
299 This is used to modify the search algorithm below. */
300 repeat = (ptr == prev ? repeat + 1 : 0);
302 if (h2g_valid(ptr + size - 1)) {
303 addr = h2g(ptr);
305 if ((addr & ~TARGET_PAGE_MASK) == 0) {
306 /* Success. */
307 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
308 mmap_next_start = addr + size;
310 return addr;
313 /* The address is not properly aligned for the target. */
314 switch (repeat) {
315 case 0:
316 /* Assume the result that the kernel gave us is the
317 first with enough free space, so start again at the
318 next higher target page. */
319 addr = TARGET_PAGE_ALIGN(addr);
320 break;
321 case 1:
322 /* Sometimes the kernel decides to perform the allocation
323 at the top end of memory instead. */
324 addr &= TARGET_PAGE_MASK;
325 break;
326 case 2:
327 /* Start over at low memory. */
328 addr = 0;
329 break;
330 default:
331 /* Fail. This unaligned block must the last. */
332 addr = -1;
333 break;
335 } else {
336 /* Since the result the kernel gave didn't fit, start
337 again at low memory. If any repetition, fail. */
338 addr = (repeat ? -1 : 0);
341 /* Unmap and try again. */
342 munmap(ptr, size);
344 /* ENOMEM if we checked the whole of the target address space. */
345 if (addr == (abi_ulong)-1) {
346 return (abi_ulong)-1;
347 } else if (addr == 0) {
348 if (wrapped) {
349 return (abi_ulong)-1;
351 wrapped = 1;
352 /* Don't actually use 0 when wrapping, instead indicate
353 that we'd truly like an allocation in low memory. */
354 addr = (mmap_min_addr > TARGET_PAGE_SIZE
355 ? TARGET_PAGE_ALIGN(mmap_min_addr)
356 : TARGET_PAGE_SIZE);
357 } else if (wrapped && addr >= start) {
358 return (abi_ulong)-1;
363 /* NOTE: all the constants are the HOST ones */
364 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
365 int flags, int fd, abi_ulong offset)
367 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
369 mmap_lock();
370 #ifdef DEBUG_MMAP
372 printf("mmap: start=0x" TARGET_ABI_FMT_lx
373 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
374 start, len,
375 prot & PROT_READ ? 'r' : '-',
376 prot & PROT_WRITE ? 'w' : '-',
377 prot & PROT_EXEC ? 'x' : '-');
378 if (flags & MAP_FIXED)
379 printf("MAP_FIXED ");
380 if (flags & MAP_ANONYMOUS)
381 printf("MAP_ANON ");
382 switch(flags & MAP_TYPE) {
383 case MAP_PRIVATE:
384 printf("MAP_PRIVATE ");
385 break;
386 case MAP_SHARED:
387 printf("MAP_SHARED ");
388 break;
389 default:
390 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
391 break;
393 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
395 #endif
397 if (offset & ~TARGET_PAGE_MASK) {
398 errno = EINVAL;
399 goto fail;
402 len = TARGET_PAGE_ALIGN(len);
403 if (len == 0)
404 goto the_end;
405 real_start = start & qemu_host_page_mask;
406 host_offset = offset & qemu_host_page_mask;
408 /* If the user is asking for the kernel to find a location, do that
409 before we truncate the length for mapping files below. */
410 if (!(flags & MAP_FIXED)) {
411 host_len = len + offset - host_offset;
412 host_len = HOST_PAGE_ALIGN(host_len);
413 start = mmap_find_vma(real_start, host_len);
414 if (start == (abi_ulong)-1) {
415 errno = ENOMEM;
416 goto fail;
420 /* When mapping files into a memory area larger than the file, accesses
421 to pages beyond the file size will cause a SIGBUS.
423 For example, if mmaping a file of 100 bytes on a host with 4K pages
424 emulating a target with 8K pages, the target expects to be able to
425 access the first 8K. But the host will trap us on any access beyond
426 4K.
428 When emulating a target with a larger page-size than the hosts, we
429 may need to truncate file maps at EOF and add extra anonymous pages
430 up to the targets page boundary. */
432 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
433 && !(flags & MAP_ANONYMOUS)) {
434 struct stat sb;
436 if (fstat (fd, &sb) == -1)
437 goto fail;
439 /* Are we trying to create a map beyond EOF?. */
440 if (offset + len > sb.st_size) {
441 /* If so, truncate the file map at eof aligned with
442 the hosts real pagesize. Additional anonymous maps
443 will be created beyond EOF. */
444 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
448 if (!(flags & MAP_FIXED)) {
449 unsigned long host_start;
450 void *p;
452 host_len = len + offset - host_offset;
453 host_len = HOST_PAGE_ALIGN(host_len);
455 /* Note: we prefer to control the mapping address. It is
456 especially important if qemu_host_page_size >
457 qemu_real_host_page_size */
458 p = mmap(g2h(start), host_len, prot,
459 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
460 if (p == MAP_FAILED)
461 goto fail;
462 /* update start so that it points to the file position at 'offset' */
463 host_start = (unsigned long)p;
464 if (!(flags & MAP_ANONYMOUS)) {
465 p = mmap(g2h(start), len, prot,
466 flags | MAP_FIXED, fd, host_offset);
467 if (p == MAP_FAILED) {
468 munmap(g2h(start), host_len);
469 goto fail;
471 host_start += offset - host_offset;
473 start = h2g(host_start);
474 } else {
475 if (start & ~TARGET_PAGE_MASK) {
476 errno = EINVAL;
477 goto fail;
479 end = start + len;
480 real_end = HOST_PAGE_ALIGN(end);
483 * Test if requested memory area fits target address space
484 * It can fail only on 64-bit host with 32-bit target.
485 * On any other target/host host mmap() handles this error correctly.
487 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
488 errno = EINVAL;
489 goto fail;
492 /* worst case: we cannot map the file because the offset is not
493 aligned, so we read it */
494 if (!(flags & MAP_ANONYMOUS) &&
495 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
496 /* msync() won't work here, so we return an error if write is
497 possible while it is a shared mapping */
498 if ((flags & MAP_TYPE) == MAP_SHARED &&
499 (prot & PROT_WRITE)) {
500 errno = EINVAL;
501 goto fail;
503 retaddr = target_mmap(start, len, prot | PROT_WRITE,
504 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
505 -1, 0);
506 if (retaddr == -1)
507 goto fail;
508 if (pread(fd, g2h(start), len, offset) == -1)
509 goto fail;
510 if (!(prot & PROT_WRITE)) {
511 ret = target_mprotect(start, len, prot);
512 assert(ret == 0);
514 goto the_end;
517 /* handle the start of the mapping */
518 if (start > real_start) {
519 if (real_end == real_start + qemu_host_page_size) {
520 /* one single host page */
521 ret = mmap_frag(real_start, start, end,
522 prot, flags, fd, offset);
523 if (ret == -1)
524 goto fail;
525 goto the_end1;
527 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
528 prot, flags, fd, offset);
529 if (ret == -1)
530 goto fail;
531 real_start += qemu_host_page_size;
533 /* handle the end of the mapping */
534 if (end < real_end) {
535 ret = mmap_frag(real_end - qemu_host_page_size,
536 real_end - qemu_host_page_size, end,
537 prot, flags, fd,
538 offset + real_end - qemu_host_page_size - start);
539 if (ret == -1)
540 goto fail;
541 real_end -= qemu_host_page_size;
544 /* map the middle (easier) */
545 if (real_start < real_end) {
546 void *p;
547 unsigned long offset1;
548 if (flags & MAP_ANONYMOUS)
549 offset1 = 0;
550 else
551 offset1 = offset + real_start - start;
552 p = mmap(g2h(real_start), real_end - real_start,
553 prot, flags, fd, offset1);
554 if (p == MAP_FAILED)
555 goto fail;
558 the_end1:
559 page_set_flags(start, start + len, prot | PAGE_VALID);
560 the_end:
561 #ifdef DEBUG_MMAP
562 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
563 page_dump(stdout);
564 printf("\n");
565 #endif
566 tb_invalidate_phys_range(start, start + len);
567 mmap_unlock();
568 return start;
569 fail:
570 mmap_unlock();
571 return -1;
574 static void mmap_reserve(abi_ulong start, abi_ulong size)
576 abi_ulong real_start;
577 abi_ulong real_end;
578 abi_ulong addr;
579 abi_ulong end;
580 int prot;
582 real_start = start & qemu_host_page_mask;
583 real_end = HOST_PAGE_ALIGN(start + size);
584 end = start + size;
585 if (start > real_start) {
586 /* handle host page containing start */
587 prot = 0;
588 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
589 prot |= page_get_flags(addr);
591 if (real_end == real_start + qemu_host_page_size) {
592 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
593 prot |= page_get_flags(addr);
595 end = real_end;
597 if (prot != 0)
598 real_start += qemu_host_page_size;
600 if (end < real_end) {
601 prot = 0;
602 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
603 prot |= page_get_flags(addr);
605 if (prot != 0)
606 real_end -= qemu_host_page_size;
608 if (real_start != real_end) {
609 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
610 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
611 -1, 0);
615 int target_munmap(abi_ulong start, abi_ulong len)
617 abi_ulong end, real_start, real_end, addr;
618 int prot, ret;
620 #ifdef DEBUG_MMAP
621 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
622 TARGET_ABI_FMT_lx "\n",
623 start, len);
624 #endif
625 if (start & ~TARGET_PAGE_MASK)
626 return -EINVAL;
627 len = TARGET_PAGE_ALIGN(len);
628 if (len == 0)
629 return -EINVAL;
630 mmap_lock();
631 end = start + len;
632 real_start = start & qemu_host_page_mask;
633 real_end = HOST_PAGE_ALIGN(end);
635 if (start > real_start) {
636 /* handle host page containing start */
637 prot = 0;
638 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
639 prot |= page_get_flags(addr);
641 if (real_end == real_start + qemu_host_page_size) {
642 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
643 prot |= page_get_flags(addr);
645 end = real_end;
647 if (prot != 0)
648 real_start += qemu_host_page_size;
650 if (end < real_end) {
651 prot = 0;
652 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
653 prot |= page_get_flags(addr);
655 if (prot != 0)
656 real_end -= qemu_host_page_size;
659 ret = 0;
660 /* unmap what we can */
661 if (real_start < real_end) {
662 if (reserved_va) {
663 mmap_reserve(real_start, real_end - real_start);
664 } else {
665 ret = munmap(g2h(real_start), real_end - real_start);
669 if (ret == 0) {
670 page_set_flags(start, start + len, 0);
671 tb_invalidate_phys_range(start, start + len);
673 mmap_unlock();
674 return ret;
677 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
678 abi_ulong new_size, unsigned long flags,
679 abi_ulong new_addr)
681 int prot;
682 void *host_addr;
684 mmap_lock();
686 if (flags & MREMAP_FIXED) {
687 host_addr = mremap(g2h(old_addr), old_size, new_size,
688 flags, g2h(new_addr));
690 if (reserved_va && host_addr != MAP_FAILED) {
691 /* If new and old addresses overlap then the above mremap will
692 already have failed with EINVAL. */
693 mmap_reserve(old_addr, old_size);
695 } else if (flags & MREMAP_MAYMOVE) {
696 abi_ulong mmap_start;
698 mmap_start = mmap_find_vma(0, new_size);
700 if (mmap_start == -1) {
701 errno = ENOMEM;
702 host_addr = MAP_FAILED;
703 } else {
704 host_addr = mremap(g2h(old_addr), old_size, new_size,
705 flags | MREMAP_FIXED, g2h(mmap_start));
706 if (reserved_va) {
707 mmap_reserve(old_addr, old_size);
710 } else {
711 int prot = 0;
712 if (reserved_va && old_size < new_size) {
713 abi_ulong addr;
714 for (addr = old_addr + old_size;
715 addr < old_addr + new_size;
716 addr++) {
717 prot |= page_get_flags(addr);
720 if (prot == 0) {
721 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
722 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
723 mmap_reserve(old_addr + old_size, new_size - old_size);
725 } else {
726 errno = ENOMEM;
727 host_addr = MAP_FAILED;
729 /* Check if address fits target address space */
730 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
731 /* Revert mremap() changes */
732 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
733 errno = ENOMEM;
734 host_addr = MAP_FAILED;
738 if (host_addr == MAP_FAILED) {
739 new_addr = -1;
740 } else {
741 new_addr = h2g(host_addr);
742 prot = page_get_flags(old_addr);
743 page_set_flags(old_addr, old_addr + old_size, 0);
744 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
746 tb_invalidate_phys_range(new_addr, new_addr + new_size);
747 mmap_unlock();
748 return new_addr;
751 int target_msync(abi_ulong start, abi_ulong len, int flags)
753 abi_ulong end;
755 if (start & ~TARGET_PAGE_MASK)
756 return -EINVAL;
757 len = TARGET_PAGE_ALIGN(len);
758 end = start + len;
759 if (end < start)
760 return -EINVAL;
761 if (end == start)
762 return 0;
764 start &= qemu_host_page_mask;
765 return msync(g2h(start), end - start, flags);