migration-test: Move shmem handling to common commandline
[qemu/ar7.git] / linux-user / mmap.c
blob46a6e3a761ac1efe28b4aa18c4467a3ea0c675f3
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "qemu.h"
23 //#define DEBUG_MMAP
25 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
26 static __thread int mmap_lock_count;
28 void mmap_lock(void)
30 if (mmap_lock_count++ == 0) {
31 pthread_mutex_lock(&mmap_mutex);
35 void mmap_unlock(void)
37 if (--mmap_lock_count == 0) {
38 pthread_mutex_unlock(&mmap_mutex);
42 bool have_mmap_lock(void)
44 return mmap_lock_count > 0 ? true : false;
47 /* Grab lock to make sure things are in a consistent state after fork(). */
48 void mmap_fork_start(void)
50 if (mmap_lock_count)
51 abort();
52 pthread_mutex_lock(&mmap_mutex);
55 void mmap_fork_end(int child)
57 if (child)
58 pthread_mutex_init(&mmap_mutex, NULL);
59 else
60 pthread_mutex_unlock(&mmap_mutex);
63 /* NOTE: all the constants are the HOST ones, but addresses are target. */
64 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
66 abi_ulong end, host_start, host_end, addr;
67 int prot1, ret;
69 #ifdef DEBUG_MMAP
70 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
71 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
72 prot & PROT_READ ? 'r' : '-',
73 prot & PROT_WRITE ? 'w' : '-',
74 prot & PROT_EXEC ? 'x' : '-');
75 #endif
77 if ((start & ~TARGET_PAGE_MASK) != 0)
78 return -TARGET_EINVAL;
79 len = TARGET_PAGE_ALIGN(len);
80 end = start + len;
81 if (!guest_range_valid(start, len)) {
82 return -TARGET_ENOMEM;
84 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
85 if (len == 0)
86 return 0;
88 mmap_lock();
89 host_start = start & qemu_host_page_mask;
90 host_end = HOST_PAGE_ALIGN(end);
91 if (start > host_start) {
92 /* handle host page containing start */
93 prot1 = prot;
94 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
95 prot1 |= page_get_flags(addr);
97 if (host_end == host_start + qemu_host_page_size) {
98 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
99 prot1 |= page_get_flags(addr);
101 end = host_end;
103 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
104 if (ret != 0)
105 goto error;
106 host_start += qemu_host_page_size;
108 if (end < host_end) {
109 prot1 = prot;
110 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
111 prot1 |= page_get_flags(addr);
113 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
114 prot1 & PAGE_BITS);
115 if (ret != 0)
116 goto error;
117 host_end -= qemu_host_page_size;
120 /* handle the pages in the middle */
121 if (host_start < host_end) {
122 ret = mprotect(g2h(host_start), host_end - host_start, prot);
123 if (ret != 0)
124 goto error;
126 page_set_flags(start, start + len, prot | PAGE_VALID);
127 mmap_unlock();
128 return 0;
129 error:
130 mmap_unlock();
131 return ret;
134 /* map an incomplete host page */
135 static int mmap_frag(abi_ulong real_start,
136 abi_ulong start, abi_ulong end,
137 int prot, int flags, int fd, abi_ulong offset)
139 abi_ulong real_end, addr;
140 void *host_start;
141 int prot1, prot_new;
143 real_end = real_start + qemu_host_page_size;
144 host_start = g2h(real_start);
146 /* get the protection of the target pages outside the mapping */
147 prot1 = 0;
148 for(addr = real_start; addr < real_end; addr++) {
149 if (addr < start || addr >= end)
150 prot1 |= page_get_flags(addr);
153 if (prot1 == 0) {
154 /* no page was there, so we allocate one */
155 void *p = mmap(host_start, qemu_host_page_size, prot,
156 flags | MAP_ANONYMOUS, -1, 0);
157 if (p == MAP_FAILED)
158 return -1;
159 prot1 = prot;
161 prot1 &= PAGE_BITS;
163 prot_new = prot | prot1;
164 if (!(flags & MAP_ANONYMOUS)) {
165 /* msync() won't work here, so we return an error if write is
166 possible while it is a shared mapping */
167 if ((flags & MAP_TYPE) == MAP_SHARED &&
168 (prot & PROT_WRITE))
169 return -1;
171 /* adjust protection to be able to read */
172 if (!(prot1 & PROT_WRITE))
173 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
175 /* read the corresponding file data */
176 if (pread(fd, g2h(start), end - start, offset) == -1)
177 return -1;
179 /* put final protection */
180 if (prot_new != (prot1 | PROT_WRITE))
181 mprotect(host_start, qemu_host_page_size, prot_new);
182 } else {
183 if (prot_new != prot1) {
184 mprotect(host_start, qemu_host_page_size, prot_new);
186 if (prot_new & PROT_WRITE) {
187 memset(g2h(start), 0, end - start);
190 return 0;
193 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
194 # define TASK_UNMAPPED_BASE (1ul << 38)
195 #else
196 # define TASK_UNMAPPED_BASE 0x40000000
197 #endif
198 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
200 unsigned long last_brk;
202 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
203 of guest address space. */
204 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
205 abi_ulong align)
207 abi_ulong addr, end_addr, incr = qemu_host_page_size;
208 int prot;
209 bool looped = false;
211 if (size > reserved_va) {
212 return (abi_ulong)-1;
215 /* Note that start and size have already been aligned by mmap_find_vma. */
217 end_addr = start + size;
218 if (start > reserved_va - size) {
219 /* Start at the top of the address space. */
220 end_addr = ((reserved_va - size) & -align) + size;
221 looped = true;
224 /* Search downward from END_ADDR, checking to see if a page is in use. */
225 addr = end_addr;
226 while (1) {
227 addr -= incr;
228 if (addr > end_addr) {
229 if (looped) {
230 /* Failure. The entire address space has been searched. */
231 return (abi_ulong)-1;
233 /* Re-start at the top of the address space. */
234 addr = end_addr = ((reserved_va - size) & -align) + size;
235 looped = true;
236 } else {
237 prot = page_get_flags(addr);
238 if (prot) {
239 /* Page in use. Restart below this page. */
240 addr = end_addr = ((addr - size) & -align) + size;
241 } else if (addr && addr + size == end_addr) {
242 /* Success! All pages between ADDR and END_ADDR are free. */
243 if (start == mmap_next_start) {
244 mmap_next_start = addr;
246 return addr;
253 * Find and reserve a free memory area of size 'size'. The search
254 * starts at 'start'.
255 * It must be called with mmap_lock() held.
256 * Return -1 if error.
258 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
260 void *ptr, *prev;
261 abi_ulong addr;
262 int wrapped, repeat;
264 align = MAX(align, qemu_host_page_size);
266 /* If 'start' == 0, then a default start address is used. */
267 if (start == 0) {
268 start = mmap_next_start;
269 } else {
270 start &= qemu_host_page_mask;
272 start = ROUND_UP(start, align);
274 size = HOST_PAGE_ALIGN(size);
276 if (reserved_va) {
277 return mmap_find_vma_reserved(start, size, align);
280 addr = start;
281 wrapped = repeat = 0;
282 prev = 0;
284 for (;; prev = ptr) {
286 * Reserve needed memory area to avoid a race.
287 * It should be discarded using:
288 * - mmap() with MAP_FIXED flag
289 * - mremap() with MREMAP_FIXED flag
290 * - shmat() with SHM_REMAP flag
292 ptr = mmap(g2h(addr), size, PROT_NONE,
293 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
295 /* ENOMEM, if host address space has no memory */
296 if (ptr == MAP_FAILED) {
297 return (abi_ulong)-1;
300 /* Count the number of sequential returns of the same address.
301 This is used to modify the search algorithm below. */
302 repeat = (ptr == prev ? repeat + 1 : 0);
304 if (h2g_valid(ptr + size - 1)) {
305 addr = h2g(ptr);
307 if ((addr & (align - 1)) == 0) {
308 /* Success. */
309 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
310 mmap_next_start = addr + size;
312 return addr;
315 /* The address is not properly aligned for the target. */
316 switch (repeat) {
317 case 0:
318 /* Assume the result that the kernel gave us is the
319 first with enough free space, so start again at the
320 next higher target page. */
321 addr = ROUND_UP(addr, align);
322 break;
323 case 1:
324 /* Sometimes the kernel decides to perform the allocation
325 at the top end of memory instead. */
326 addr &= -align;
327 break;
328 case 2:
329 /* Start over at low memory. */
330 addr = 0;
331 break;
332 default:
333 /* Fail. This unaligned block must the last. */
334 addr = -1;
335 break;
337 } else {
338 /* Since the result the kernel gave didn't fit, start
339 again at low memory. If any repetition, fail. */
340 addr = (repeat ? -1 : 0);
343 /* Unmap and try again. */
344 munmap(ptr, size);
346 /* ENOMEM if we checked the whole of the target address space. */
347 if (addr == (abi_ulong)-1) {
348 return (abi_ulong)-1;
349 } else if (addr == 0) {
350 if (wrapped) {
351 return (abi_ulong)-1;
353 wrapped = 1;
354 /* Don't actually use 0 when wrapping, instead indicate
355 that we'd truly like an allocation in low memory. */
356 addr = (mmap_min_addr > TARGET_PAGE_SIZE
357 ? TARGET_PAGE_ALIGN(mmap_min_addr)
358 : TARGET_PAGE_SIZE);
359 } else if (wrapped && addr >= start) {
360 return (abi_ulong)-1;
365 /* NOTE: all the constants are the HOST ones */
366 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
367 int flags, int fd, abi_ulong offset)
369 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
371 mmap_lock();
372 #ifdef DEBUG_MMAP
374 printf("mmap: start=0x" TARGET_ABI_FMT_lx
375 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
376 start, len,
377 prot & PROT_READ ? 'r' : '-',
378 prot & PROT_WRITE ? 'w' : '-',
379 prot & PROT_EXEC ? 'x' : '-');
380 if (flags & MAP_FIXED)
381 printf("MAP_FIXED ");
382 if (flags & MAP_ANONYMOUS)
383 printf("MAP_ANON ");
384 switch(flags & MAP_TYPE) {
385 case MAP_PRIVATE:
386 printf("MAP_PRIVATE ");
387 break;
388 case MAP_SHARED:
389 printf("MAP_SHARED ");
390 break;
391 default:
392 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
393 break;
395 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
397 #endif
399 if (!len) {
400 errno = EINVAL;
401 goto fail;
404 /* Also check for overflows... */
405 len = TARGET_PAGE_ALIGN(len);
406 if (!len) {
407 errno = ENOMEM;
408 goto fail;
411 if (offset & ~TARGET_PAGE_MASK) {
412 errno = EINVAL;
413 goto fail;
416 real_start = start & qemu_host_page_mask;
417 host_offset = offset & qemu_host_page_mask;
419 /* If the user is asking for the kernel to find a location, do that
420 before we truncate the length for mapping files below. */
421 if (!(flags & MAP_FIXED)) {
422 host_len = len + offset - host_offset;
423 host_len = HOST_PAGE_ALIGN(host_len);
424 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
425 if (start == (abi_ulong)-1) {
426 errno = ENOMEM;
427 goto fail;
431 /* When mapping files into a memory area larger than the file, accesses
432 to pages beyond the file size will cause a SIGBUS.
434 For example, if mmaping a file of 100 bytes on a host with 4K pages
435 emulating a target with 8K pages, the target expects to be able to
436 access the first 8K. But the host will trap us on any access beyond
437 4K.
439 When emulating a target with a larger page-size than the hosts, we
440 may need to truncate file maps at EOF and add extra anonymous pages
441 up to the targets page boundary. */
443 if ((qemu_real_host_page_size < qemu_host_page_size) &&
444 !(flags & MAP_ANONYMOUS)) {
445 struct stat sb;
447 if (fstat (fd, &sb) == -1)
448 goto fail;
450 /* Are we trying to create a map beyond EOF?. */
451 if (offset + len > sb.st_size) {
452 /* If so, truncate the file map at eof aligned with
453 the hosts real pagesize. Additional anonymous maps
454 will be created beyond EOF. */
455 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
459 if (!(flags & MAP_FIXED)) {
460 unsigned long host_start;
461 void *p;
463 host_len = len + offset - host_offset;
464 host_len = HOST_PAGE_ALIGN(host_len);
466 /* Note: we prefer to control the mapping address. It is
467 especially important if qemu_host_page_size >
468 qemu_real_host_page_size */
469 p = mmap(g2h(start), host_len, prot,
470 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
471 if (p == MAP_FAILED)
472 goto fail;
473 /* update start so that it points to the file position at 'offset' */
474 host_start = (unsigned long)p;
475 if (!(flags & MAP_ANONYMOUS)) {
476 p = mmap(g2h(start), len, prot,
477 flags | MAP_FIXED, fd, host_offset);
478 if (p == MAP_FAILED) {
479 munmap(g2h(start), host_len);
480 goto fail;
482 host_start += offset - host_offset;
484 start = h2g(host_start);
485 } else {
486 if (start & ~TARGET_PAGE_MASK) {
487 errno = EINVAL;
488 goto fail;
490 end = start + len;
491 real_end = HOST_PAGE_ALIGN(end);
494 * Test if requested memory area fits target address space
495 * It can fail only on 64-bit host with 32-bit target.
496 * On any other target/host host mmap() handles this error correctly.
498 if (!guest_range_valid(start, len)) {
499 errno = ENOMEM;
500 goto fail;
503 /* worst case: we cannot map the file because the offset is not
504 aligned, so we read it */
505 if (!(flags & MAP_ANONYMOUS) &&
506 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
507 /* msync() won't work here, so we return an error if write is
508 possible while it is a shared mapping */
509 if ((flags & MAP_TYPE) == MAP_SHARED &&
510 (prot & PROT_WRITE)) {
511 errno = EINVAL;
512 goto fail;
514 retaddr = target_mmap(start, len, prot | PROT_WRITE,
515 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
516 -1, 0);
517 if (retaddr == -1)
518 goto fail;
519 if (pread(fd, g2h(start), len, offset) == -1)
520 goto fail;
521 if (!(prot & PROT_WRITE)) {
522 ret = target_mprotect(start, len, prot);
523 assert(ret == 0);
525 goto the_end;
528 /* handle the start of the mapping */
529 if (start > real_start) {
530 if (real_end == real_start + qemu_host_page_size) {
531 /* one single host page */
532 ret = mmap_frag(real_start, start, end,
533 prot, flags, fd, offset);
534 if (ret == -1)
535 goto fail;
536 goto the_end1;
538 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
539 prot, flags, fd, offset);
540 if (ret == -1)
541 goto fail;
542 real_start += qemu_host_page_size;
544 /* handle the end of the mapping */
545 if (end < real_end) {
546 ret = mmap_frag(real_end - qemu_host_page_size,
547 real_end - qemu_host_page_size, end,
548 prot, flags, fd,
549 offset + real_end - qemu_host_page_size - start);
550 if (ret == -1)
551 goto fail;
552 real_end -= qemu_host_page_size;
555 /* map the middle (easier) */
556 if (real_start < real_end) {
557 void *p;
558 unsigned long offset1;
559 if (flags & MAP_ANONYMOUS)
560 offset1 = 0;
561 else
562 offset1 = offset + real_start - start;
563 p = mmap(g2h(real_start), real_end - real_start,
564 prot, flags, fd, offset1);
565 if (p == MAP_FAILED)
566 goto fail;
569 the_end1:
570 page_set_flags(start, start + len, prot | PAGE_VALID);
571 the_end:
572 #ifdef DEBUG_MMAP
573 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
574 page_dump(stdout);
575 printf("\n");
576 #endif
577 tb_invalidate_phys_range(start, start + len);
578 mmap_unlock();
579 return start;
580 fail:
581 mmap_unlock();
582 return -1;
585 static void mmap_reserve(abi_ulong start, abi_ulong size)
587 abi_ulong real_start;
588 abi_ulong real_end;
589 abi_ulong addr;
590 abi_ulong end;
591 int prot;
593 real_start = start & qemu_host_page_mask;
594 real_end = HOST_PAGE_ALIGN(start + size);
595 end = start + size;
596 if (start > real_start) {
597 /* handle host page containing start */
598 prot = 0;
599 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
600 prot |= page_get_flags(addr);
602 if (real_end == real_start + qemu_host_page_size) {
603 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
604 prot |= page_get_flags(addr);
606 end = real_end;
608 if (prot != 0)
609 real_start += qemu_host_page_size;
611 if (end < real_end) {
612 prot = 0;
613 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
614 prot |= page_get_flags(addr);
616 if (prot != 0)
617 real_end -= qemu_host_page_size;
619 if (real_start != real_end) {
620 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
621 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
622 -1, 0);
626 int target_munmap(abi_ulong start, abi_ulong len)
628 abi_ulong end, real_start, real_end, addr;
629 int prot, ret;
631 #ifdef DEBUG_MMAP
632 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
633 TARGET_ABI_FMT_lx "\n",
634 start, len);
635 #endif
636 if (start & ~TARGET_PAGE_MASK)
637 return -TARGET_EINVAL;
638 len = TARGET_PAGE_ALIGN(len);
639 if (len == 0 || !guest_range_valid(start, len)) {
640 return -TARGET_EINVAL;
643 mmap_lock();
644 end = start + len;
645 real_start = start & qemu_host_page_mask;
646 real_end = HOST_PAGE_ALIGN(end);
648 if (start > real_start) {
649 /* handle host page containing start */
650 prot = 0;
651 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
652 prot |= page_get_flags(addr);
654 if (real_end == real_start + qemu_host_page_size) {
655 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
656 prot |= page_get_flags(addr);
658 end = real_end;
660 if (prot != 0)
661 real_start += qemu_host_page_size;
663 if (end < real_end) {
664 prot = 0;
665 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
666 prot |= page_get_flags(addr);
668 if (prot != 0)
669 real_end -= qemu_host_page_size;
672 ret = 0;
673 /* unmap what we can */
674 if (real_start < real_end) {
675 if (reserved_va) {
676 mmap_reserve(real_start, real_end - real_start);
677 } else {
678 ret = munmap(g2h(real_start), real_end - real_start);
682 if (ret == 0) {
683 page_set_flags(start, start + len, 0);
684 tb_invalidate_phys_range(start, start + len);
686 mmap_unlock();
687 return ret;
690 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
691 abi_ulong new_size, unsigned long flags,
692 abi_ulong new_addr)
694 int prot;
695 void *host_addr;
697 if (!guest_range_valid(old_addr, old_size) ||
698 ((flags & MREMAP_FIXED) &&
699 !guest_range_valid(new_addr, new_size))) {
700 errno = ENOMEM;
701 return -1;
704 mmap_lock();
706 if (flags & MREMAP_FIXED) {
707 host_addr = mremap(g2h(old_addr), old_size, new_size,
708 flags, g2h(new_addr));
710 if (reserved_va && host_addr != MAP_FAILED) {
711 /* If new and old addresses overlap then the above mremap will
712 already have failed with EINVAL. */
713 mmap_reserve(old_addr, old_size);
715 } else if (flags & MREMAP_MAYMOVE) {
716 abi_ulong mmap_start;
718 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
720 if (mmap_start == -1) {
721 errno = ENOMEM;
722 host_addr = MAP_FAILED;
723 } else {
724 host_addr = mremap(g2h(old_addr), old_size, new_size,
725 flags | MREMAP_FIXED, g2h(mmap_start));
726 if (reserved_va) {
727 mmap_reserve(old_addr, old_size);
730 } else {
731 int prot = 0;
732 if (reserved_va && old_size < new_size) {
733 abi_ulong addr;
734 for (addr = old_addr + old_size;
735 addr < old_addr + new_size;
736 addr++) {
737 prot |= page_get_flags(addr);
740 if (prot == 0) {
741 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
742 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
743 mmap_reserve(old_addr + old_size, new_size - old_size);
745 } else {
746 errno = ENOMEM;
747 host_addr = MAP_FAILED;
749 /* Check if address fits target address space */
750 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
751 /* Revert mremap() changes */
752 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
753 errno = ENOMEM;
754 host_addr = MAP_FAILED;
758 if (host_addr == MAP_FAILED) {
759 new_addr = -1;
760 } else {
761 new_addr = h2g(host_addr);
762 prot = page_get_flags(old_addr);
763 page_set_flags(old_addr, old_addr + old_size, 0);
764 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
766 tb_invalidate_phys_range(new_addr, new_addr + new_size);
767 mmap_unlock();
768 return new_addr;