Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
[qemu.git] / linux-user / mmap.c
blob4888f531395576cfcae864c3866edf55cd0173f3
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "qemu.h"
22 #include "qemu-common.h"
23 #include "translate-all.h"
25 //#define DEBUG_MMAP
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
30 void mmap_lock(void)
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
37 void mmap_unlock(void)
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
44 bool have_mmap_lock(void)
46 return mmap_lock_count > 0 ? true : false;
49 /* Grab lock to make sure things are in a consistent state after fork(). */
50 void mmap_fork_start(void)
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
57 void mmap_fork_end(int child)
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
65 /* NOTE: all the constants are the HOST ones, but addresses are target. */
66 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
68 abi_ulong end, host_start, host_end, addr;
69 int prot1, ret;
71 #ifdef DEBUG_MMAP
72 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
73 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
74 prot & PROT_READ ? 'r' : '-',
75 prot & PROT_WRITE ? 'w' : '-',
76 prot & PROT_EXEC ? 'x' : '-');
77 #endif
79 if ((start & ~TARGET_PAGE_MASK) != 0)
80 return -EINVAL;
81 len = TARGET_PAGE_ALIGN(len);
82 end = start + len;
83 if (end < start)
84 return -EINVAL;
85 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
86 if (len == 0)
87 return 0;
89 mmap_lock();
90 host_start = start & qemu_host_page_mask;
91 host_end = HOST_PAGE_ALIGN(end);
92 if (start > host_start) {
93 /* handle host page containing start */
94 prot1 = prot;
95 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
96 prot1 |= page_get_flags(addr);
98 if (host_end == host_start + qemu_host_page_size) {
99 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
100 prot1 |= page_get_flags(addr);
102 end = host_end;
104 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
105 if (ret != 0)
106 goto error;
107 host_start += qemu_host_page_size;
109 if (end < host_end) {
110 prot1 = prot;
111 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
112 prot1 |= page_get_flags(addr);
114 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
115 prot1 & PAGE_BITS);
116 if (ret != 0)
117 goto error;
118 host_end -= qemu_host_page_size;
121 /* handle the pages in the middle */
122 if (host_start < host_end) {
123 ret = mprotect(g2h(host_start), host_end - host_start, prot);
124 if (ret != 0)
125 goto error;
127 page_set_flags(start, start + len, prot | PAGE_VALID);
128 mmap_unlock();
129 return 0;
130 error:
131 mmap_unlock();
132 return ret;
135 /* map an incomplete host page */
136 static int mmap_frag(abi_ulong real_start,
137 abi_ulong start, abi_ulong end,
138 int prot, int flags, int fd, abi_ulong offset)
140 abi_ulong real_end, addr;
141 void *host_start;
142 int prot1, prot_new;
144 real_end = real_start + qemu_host_page_size;
145 host_start = g2h(real_start);
147 /* get the protection of the target pages outside the mapping */
148 prot1 = 0;
149 for(addr = real_start; addr < real_end; addr++) {
150 if (addr < start || addr >= end)
151 prot1 |= page_get_flags(addr);
154 if (prot1 == 0) {
155 /* no page was there, so we allocate one */
156 void *p = mmap(host_start, qemu_host_page_size, prot,
157 flags | MAP_ANONYMOUS, -1, 0);
158 if (p == MAP_FAILED)
159 return -1;
160 prot1 = prot;
162 prot1 &= PAGE_BITS;
164 prot_new = prot | prot1;
165 if (!(flags & MAP_ANONYMOUS)) {
166 /* msync() won't work here, so we return an error if write is
167 possible while it is a shared mapping */
168 if ((flags & MAP_TYPE) == MAP_SHARED &&
169 (prot & PROT_WRITE))
170 return -1;
172 /* adjust protection to be able to read */
173 if (!(prot1 & PROT_WRITE))
174 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
176 /* read the corresponding file data */
177 if (pread(fd, g2h(start), end - start, offset) == -1)
178 return -1;
180 /* put final protection */
181 if (prot_new != (prot1 | PROT_WRITE))
182 mprotect(host_start, qemu_host_page_size, prot_new);
183 } else {
184 if (prot_new != prot1) {
185 mprotect(host_start, qemu_host_page_size, prot_new);
187 if (prot_new & PROT_WRITE) {
188 memset(g2h(start), 0, end - start);
191 return 0;
194 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
195 # define TASK_UNMAPPED_BASE (1ul << 38)
196 #else
197 # define TASK_UNMAPPED_BASE 0x40000000
198 #endif
199 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
201 unsigned long last_brk;
203 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
204 of guest address space. */
205 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
207 abi_ulong addr;
208 abi_ulong end_addr;
209 int prot;
210 int looped = 0;
212 if (size > reserved_va) {
213 return (abi_ulong)-1;
216 size = HOST_PAGE_ALIGN(size);
217 end_addr = start + size;
218 if (end_addr > reserved_va) {
219 end_addr = reserved_va;
221 addr = end_addr - qemu_host_page_size;
223 while (1) {
224 if (addr > end_addr) {
225 if (looped) {
226 return (abi_ulong)-1;
228 end_addr = reserved_va;
229 addr = end_addr - qemu_host_page_size;
230 looped = 1;
231 continue;
233 prot = page_get_flags(addr);
234 if (prot) {
235 end_addr = addr;
237 if (addr + size == end_addr) {
238 break;
240 addr -= qemu_host_page_size;
243 if (start == mmap_next_start) {
244 mmap_next_start = addr;
247 return addr;
251 * Find and reserve a free memory area of size 'size'. The search
252 * starts at 'start'.
253 * It must be called with mmap_lock() held.
254 * Return -1 if error.
256 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
258 void *ptr, *prev;
259 abi_ulong addr;
260 int wrapped, repeat;
262 /* If 'start' == 0, then a default start address is used. */
263 if (start == 0) {
264 start = mmap_next_start;
265 } else {
266 start &= qemu_host_page_mask;
269 size = HOST_PAGE_ALIGN(size);
271 if (reserved_va) {
272 return mmap_find_vma_reserved(start, size);
275 addr = start;
276 wrapped = repeat = 0;
277 prev = 0;
279 for (;; prev = ptr) {
281 * Reserve needed memory area to avoid a race.
282 * It should be discarded using:
283 * - mmap() with MAP_FIXED flag
284 * - mremap() with MREMAP_FIXED flag
285 * - shmat() with SHM_REMAP flag
287 ptr = mmap(g2h(addr), size, PROT_NONE,
288 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
290 /* ENOMEM, if host address space has no memory */
291 if (ptr == MAP_FAILED) {
292 return (abi_ulong)-1;
295 /* Count the number of sequential returns of the same address.
296 This is used to modify the search algorithm below. */
297 repeat = (ptr == prev ? repeat + 1 : 0);
299 if (h2g_valid(ptr + size - 1)) {
300 addr = h2g(ptr);
302 if ((addr & ~TARGET_PAGE_MASK) == 0) {
303 /* Success. */
304 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
305 mmap_next_start = addr + size;
307 return addr;
310 /* The address is not properly aligned for the target. */
311 switch (repeat) {
312 case 0:
313 /* Assume the result that the kernel gave us is the
314 first with enough free space, so start again at the
315 next higher target page. */
316 addr = TARGET_PAGE_ALIGN(addr);
317 break;
318 case 1:
319 /* Sometimes the kernel decides to perform the allocation
320 at the top end of memory instead. */
321 addr &= TARGET_PAGE_MASK;
322 break;
323 case 2:
324 /* Start over at low memory. */
325 addr = 0;
326 break;
327 default:
328 /* Fail. This unaligned block must the last. */
329 addr = -1;
330 break;
332 } else {
333 /* Since the result the kernel gave didn't fit, start
334 again at low memory. If any repetition, fail. */
335 addr = (repeat ? -1 : 0);
338 /* Unmap and try again. */
339 munmap(ptr, size);
341 /* ENOMEM if we checked the whole of the target address space. */
342 if (addr == (abi_ulong)-1) {
343 return (abi_ulong)-1;
344 } else if (addr == 0) {
345 if (wrapped) {
346 return (abi_ulong)-1;
348 wrapped = 1;
349 /* Don't actually use 0 when wrapping, instead indicate
350 that we'd truly like an allocation in low memory. */
351 addr = (mmap_min_addr > TARGET_PAGE_SIZE
352 ? TARGET_PAGE_ALIGN(mmap_min_addr)
353 : TARGET_PAGE_SIZE);
354 } else if (wrapped && addr >= start) {
355 return (abi_ulong)-1;
360 /* NOTE: all the constants are the HOST ones */
361 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
362 int flags, int fd, abi_ulong offset)
364 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
366 mmap_lock();
367 #ifdef DEBUG_MMAP
369 printf("mmap: start=0x" TARGET_ABI_FMT_lx
370 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
371 start, len,
372 prot & PROT_READ ? 'r' : '-',
373 prot & PROT_WRITE ? 'w' : '-',
374 prot & PROT_EXEC ? 'x' : '-');
375 if (flags & MAP_FIXED)
376 printf("MAP_FIXED ");
377 if (flags & MAP_ANONYMOUS)
378 printf("MAP_ANON ");
379 switch(flags & MAP_TYPE) {
380 case MAP_PRIVATE:
381 printf("MAP_PRIVATE ");
382 break;
383 case MAP_SHARED:
384 printf("MAP_SHARED ");
385 break;
386 default:
387 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
388 break;
390 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
392 #endif
394 if (offset & ~TARGET_PAGE_MASK) {
395 errno = EINVAL;
396 goto fail;
399 len = TARGET_PAGE_ALIGN(len);
400 if (len == 0)
401 goto the_end;
402 real_start = start & qemu_host_page_mask;
403 host_offset = offset & qemu_host_page_mask;
405 /* If the user is asking for the kernel to find a location, do that
406 before we truncate the length for mapping files below. */
407 if (!(flags & MAP_FIXED)) {
408 host_len = len + offset - host_offset;
409 host_len = HOST_PAGE_ALIGN(host_len);
410 start = mmap_find_vma(real_start, host_len);
411 if (start == (abi_ulong)-1) {
412 errno = ENOMEM;
413 goto fail;
417 /* When mapping files into a memory area larger than the file, accesses
418 to pages beyond the file size will cause a SIGBUS.
420 For example, if mmaping a file of 100 bytes on a host with 4K pages
421 emulating a target with 8K pages, the target expects to be able to
422 access the first 8K. But the host will trap us on any access beyond
423 4K.
425 When emulating a target with a larger page-size than the hosts, we
426 may need to truncate file maps at EOF and add extra anonymous pages
427 up to the targets page boundary. */
429 if ((qemu_real_host_page_size < qemu_host_page_size) &&
430 !(flags & MAP_ANONYMOUS)) {
431 struct stat sb;
433 if (fstat (fd, &sb) == -1)
434 goto fail;
436 /* Are we trying to create a map beyond EOF?. */
437 if (offset + len > sb.st_size) {
438 /* If so, truncate the file map at eof aligned with
439 the hosts real pagesize. Additional anonymous maps
440 will be created beyond EOF. */
441 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
445 if (!(flags & MAP_FIXED)) {
446 unsigned long host_start;
447 void *p;
449 host_len = len + offset - host_offset;
450 host_len = HOST_PAGE_ALIGN(host_len);
452 /* Note: we prefer to control the mapping address. It is
453 especially important if qemu_host_page_size >
454 qemu_real_host_page_size */
455 p = mmap(g2h(start), host_len, prot,
456 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
457 if (p == MAP_FAILED)
458 goto fail;
459 /* update start so that it points to the file position at 'offset' */
460 host_start = (unsigned long)p;
461 if (!(flags & MAP_ANONYMOUS)) {
462 p = mmap(g2h(start), len, prot,
463 flags | MAP_FIXED, fd, host_offset);
464 if (p == MAP_FAILED) {
465 munmap(g2h(start), host_len);
466 goto fail;
468 host_start += offset - host_offset;
470 start = h2g(host_start);
471 } else {
472 if (start & ~TARGET_PAGE_MASK) {
473 errno = EINVAL;
474 goto fail;
476 end = start + len;
477 real_end = HOST_PAGE_ALIGN(end);
480 * Test if requested memory area fits target address space
481 * It can fail only on 64-bit host with 32-bit target.
482 * On any other target/host host mmap() handles this error correctly.
484 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
485 errno = EINVAL;
486 goto fail;
489 /* worst case: we cannot map the file because the offset is not
490 aligned, so we read it */
491 if (!(flags & MAP_ANONYMOUS) &&
492 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
493 /* msync() won't work here, so we return an error if write is
494 possible while it is a shared mapping */
495 if ((flags & MAP_TYPE) == MAP_SHARED &&
496 (prot & PROT_WRITE)) {
497 errno = EINVAL;
498 goto fail;
500 retaddr = target_mmap(start, len, prot | PROT_WRITE,
501 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
502 -1, 0);
503 if (retaddr == -1)
504 goto fail;
505 if (pread(fd, g2h(start), len, offset) == -1)
506 goto fail;
507 if (!(prot & PROT_WRITE)) {
508 ret = target_mprotect(start, len, prot);
509 assert(ret == 0);
511 goto the_end;
514 /* handle the start of the mapping */
515 if (start > real_start) {
516 if (real_end == real_start + qemu_host_page_size) {
517 /* one single host page */
518 ret = mmap_frag(real_start, start, end,
519 prot, flags, fd, offset);
520 if (ret == -1)
521 goto fail;
522 goto the_end1;
524 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
525 prot, flags, fd, offset);
526 if (ret == -1)
527 goto fail;
528 real_start += qemu_host_page_size;
530 /* handle the end of the mapping */
531 if (end < real_end) {
532 ret = mmap_frag(real_end - qemu_host_page_size,
533 real_end - qemu_host_page_size, end,
534 prot, flags, fd,
535 offset + real_end - qemu_host_page_size - start);
536 if (ret == -1)
537 goto fail;
538 real_end -= qemu_host_page_size;
541 /* map the middle (easier) */
542 if (real_start < real_end) {
543 void *p;
544 unsigned long offset1;
545 if (flags & MAP_ANONYMOUS)
546 offset1 = 0;
547 else
548 offset1 = offset + real_start - start;
549 p = mmap(g2h(real_start), real_end - real_start,
550 prot, flags, fd, offset1);
551 if (p == MAP_FAILED)
552 goto fail;
555 the_end1:
556 page_set_flags(start, start + len, prot | PAGE_VALID);
557 the_end:
558 #ifdef DEBUG_MMAP
559 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
560 page_dump(stdout);
561 printf("\n");
562 #endif
563 tb_invalidate_phys_range(start, start + len);
564 mmap_unlock();
565 return start;
566 fail:
567 mmap_unlock();
568 return -1;
571 static void mmap_reserve(abi_ulong start, abi_ulong size)
573 abi_ulong real_start;
574 abi_ulong real_end;
575 abi_ulong addr;
576 abi_ulong end;
577 int prot;
579 real_start = start & qemu_host_page_mask;
580 real_end = HOST_PAGE_ALIGN(start + size);
581 end = start + size;
582 if (start > real_start) {
583 /* handle host page containing start */
584 prot = 0;
585 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
586 prot |= page_get_flags(addr);
588 if (real_end == real_start + qemu_host_page_size) {
589 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
590 prot |= page_get_flags(addr);
592 end = real_end;
594 if (prot != 0)
595 real_start += qemu_host_page_size;
597 if (end < real_end) {
598 prot = 0;
599 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
600 prot |= page_get_flags(addr);
602 if (prot != 0)
603 real_end -= qemu_host_page_size;
605 if (real_start != real_end) {
606 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
607 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
608 -1, 0);
612 int target_munmap(abi_ulong start, abi_ulong len)
614 abi_ulong end, real_start, real_end, addr;
615 int prot, ret;
617 #ifdef DEBUG_MMAP
618 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
619 TARGET_ABI_FMT_lx "\n",
620 start, len);
621 #endif
622 if (start & ~TARGET_PAGE_MASK)
623 return -EINVAL;
624 len = TARGET_PAGE_ALIGN(len);
625 if (len == 0)
626 return -EINVAL;
627 mmap_lock();
628 end = start + len;
629 real_start = start & qemu_host_page_mask;
630 real_end = HOST_PAGE_ALIGN(end);
632 if (start > real_start) {
633 /* handle host page containing start */
634 prot = 0;
635 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
636 prot |= page_get_flags(addr);
638 if (real_end == real_start + qemu_host_page_size) {
639 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
640 prot |= page_get_flags(addr);
642 end = real_end;
644 if (prot != 0)
645 real_start += qemu_host_page_size;
647 if (end < real_end) {
648 prot = 0;
649 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
650 prot |= page_get_flags(addr);
652 if (prot != 0)
653 real_end -= qemu_host_page_size;
656 ret = 0;
657 /* unmap what we can */
658 if (real_start < real_end) {
659 if (reserved_va) {
660 mmap_reserve(real_start, real_end - real_start);
661 } else {
662 ret = munmap(g2h(real_start), real_end - real_start);
666 if (ret == 0) {
667 page_set_flags(start, start + len, 0);
668 tb_invalidate_phys_range(start, start + len);
670 mmap_unlock();
671 return ret;
674 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
675 abi_ulong new_size, unsigned long flags,
676 abi_ulong new_addr)
678 int prot;
679 void *host_addr;
681 mmap_lock();
683 if (flags & MREMAP_FIXED) {
684 host_addr = mremap(g2h(old_addr), old_size, new_size,
685 flags, g2h(new_addr));
687 if (reserved_va && host_addr != MAP_FAILED) {
688 /* If new and old addresses overlap then the above mremap will
689 already have failed with EINVAL. */
690 mmap_reserve(old_addr, old_size);
692 } else if (flags & MREMAP_MAYMOVE) {
693 abi_ulong mmap_start;
695 mmap_start = mmap_find_vma(0, new_size);
697 if (mmap_start == -1) {
698 errno = ENOMEM;
699 host_addr = MAP_FAILED;
700 } else {
701 host_addr = mremap(g2h(old_addr), old_size, new_size,
702 flags | MREMAP_FIXED, g2h(mmap_start));
703 if (reserved_va) {
704 mmap_reserve(old_addr, old_size);
707 } else {
708 int prot = 0;
709 if (reserved_va && old_size < new_size) {
710 abi_ulong addr;
711 for (addr = old_addr + old_size;
712 addr < old_addr + new_size;
713 addr++) {
714 prot |= page_get_flags(addr);
717 if (prot == 0) {
718 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
719 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
720 mmap_reserve(old_addr + old_size, new_size - old_size);
722 } else {
723 errno = ENOMEM;
724 host_addr = MAP_FAILED;
726 /* Check if address fits target address space */
727 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
728 /* Revert mremap() changes */
729 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
730 errno = ENOMEM;
731 host_addr = MAP_FAILED;
735 if (host_addr == MAP_FAILED) {
736 new_addr = -1;
737 } else {
738 new_addr = h2g(host_addr);
739 prot = page_get_flags(old_addr);
740 page_set_flags(old_addr, old_addr + old_size, 0);
741 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
743 tb_invalidate_phys_range(new_addr, new_addr + new_size);
744 mmap_unlock();
745 return new_addr;
748 int target_msync(abi_ulong start, abi_ulong len, int flags)
750 abi_ulong end;
752 if (start & ~TARGET_PAGE_MASK)
753 return -EINVAL;
754 len = TARGET_PAGE_ALIGN(len);
755 end = start + len;
756 if (end < start)
757 return -EINVAL;
758 if (end == start)
759 return 0;
761 start &= qemu_host_page_mask;
762 return msync(g2h(start), end - start, flags);