qcow2: Process QCOW2_CLUSTER_ZERO_ALLOC clusters in handle_copied()
[qemu/armbru.git] / linux-user / mmap.c
blobf2615634201d81c36745dbad586775bb18f324d6
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
24 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
25 static __thread int mmap_lock_count;
27 void mmap_lock(void)
29 if (mmap_lock_count++ == 0) {
30 pthread_mutex_lock(&mmap_mutex);
34 void mmap_unlock(void)
36 if (--mmap_lock_count == 0) {
37 pthread_mutex_unlock(&mmap_mutex);
41 bool have_mmap_lock(void)
43 return mmap_lock_count > 0 ? true : false;
46 /* Grab lock to make sure things are in a consistent state after fork(). */
47 void mmap_fork_start(void)
49 if (mmap_lock_count)
50 abort();
51 pthread_mutex_lock(&mmap_mutex);
54 void mmap_fork_end(int child)
56 if (child)
57 pthread_mutex_init(&mmap_mutex, NULL);
58 else
59 pthread_mutex_unlock(&mmap_mutex);
63 * Validate target prot bitmask.
64 * Return the prot bitmask for the host in *HOST_PROT.
65 * Return 0 if the target prot bitmask is invalid, otherwise
66 * the internal qemu page_flags (which will include PAGE_VALID).
68 static int validate_prot_to_pageflags(int *host_prot, int prot)
70 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
71 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
74 * For the host, we need not pass anything except read/write/exec.
75 * While PROT_SEM is allowed by all hosts, it is also ignored, so
76 * don't bother transforming guest bit to host bit. Any other
77 * target-specific prot bits will not be understood by the host
78 * and will need to be encoded into page_flags for qemu emulation.
80 * Pages that are executable by the guest will never be executed
81 * by the host, but the host will need to be able to read them.
83 *host_prot = (prot & (PROT_READ | PROT_WRITE))
84 | (prot & PROT_EXEC ? PROT_READ : 0);
86 return prot & ~valid ? 0 : page_flags;
89 /* NOTE: all the constants are the HOST ones, but addresses are target. */
90 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
92 abi_ulong end, host_start, host_end, addr;
93 int prot1, ret, page_flags, host_prot;
95 trace_target_mprotect(start, len, target_prot);
97 if ((start & ~TARGET_PAGE_MASK) != 0) {
98 return -TARGET_EINVAL;
100 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
101 if (!page_flags) {
102 return -TARGET_EINVAL;
104 len = TARGET_PAGE_ALIGN(len);
105 end = start + len;
106 if (!guest_range_valid(start, len)) {
107 return -TARGET_ENOMEM;
109 if (len == 0) {
110 return 0;
113 mmap_lock();
114 host_start = start & qemu_host_page_mask;
115 host_end = HOST_PAGE_ALIGN(end);
116 if (start > host_start) {
117 /* handle host page containing start */
118 prot1 = host_prot;
119 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
120 prot1 |= page_get_flags(addr);
122 if (host_end == host_start + qemu_host_page_size) {
123 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
124 prot1 |= page_get_flags(addr);
126 end = host_end;
128 ret = mprotect(g2h(host_start), qemu_host_page_size,
129 prot1 & PAGE_BITS);
130 if (ret != 0) {
131 goto error;
133 host_start += qemu_host_page_size;
135 if (end < host_end) {
136 prot1 = host_prot;
137 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
138 prot1 |= page_get_flags(addr);
140 ret = mprotect(g2h(host_end - qemu_host_page_size),
141 qemu_host_page_size, prot1 & PAGE_BITS);
142 if (ret != 0) {
143 goto error;
145 host_end -= qemu_host_page_size;
148 /* handle the pages in the middle */
149 if (host_start < host_end) {
150 ret = mprotect(g2h(host_start), host_end - host_start, host_prot);
151 if (ret != 0) {
152 goto error;
155 page_set_flags(start, start + len, page_flags);
156 mmap_unlock();
157 return 0;
158 error:
159 mmap_unlock();
160 return ret;
163 /* map an incomplete host page */
164 static int mmap_frag(abi_ulong real_start,
165 abi_ulong start, abi_ulong end,
166 int prot, int flags, int fd, abi_ulong offset)
168 abi_ulong real_end, addr;
169 void *host_start;
170 int prot1, prot_new;
172 real_end = real_start + qemu_host_page_size;
173 host_start = g2h(real_start);
175 /* get the protection of the target pages outside the mapping */
176 prot1 = 0;
177 for(addr = real_start; addr < real_end; addr++) {
178 if (addr < start || addr >= end)
179 prot1 |= page_get_flags(addr);
182 if (prot1 == 0) {
183 /* no page was there, so we allocate one */
184 void *p = mmap(host_start, qemu_host_page_size, prot,
185 flags | MAP_ANONYMOUS, -1, 0);
186 if (p == MAP_FAILED)
187 return -1;
188 prot1 = prot;
190 prot1 &= PAGE_BITS;
192 prot_new = prot | prot1;
193 if (!(flags & MAP_ANONYMOUS)) {
194 /* msync() won't work here, so we return an error if write is
195 possible while it is a shared mapping */
196 if ((flags & MAP_TYPE) == MAP_SHARED &&
197 (prot & PROT_WRITE))
198 return -1;
200 /* adjust protection to be able to read */
201 if (!(prot1 & PROT_WRITE))
202 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
204 /* read the corresponding file data */
205 if (pread(fd, g2h(start), end - start, offset) == -1)
206 return -1;
208 /* put final protection */
209 if (prot_new != (prot1 | PROT_WRITE))
210 mprotect(host_start, qemu_host_page_size, prot_new);
211 } else {
212 if (prot_new != prot1) {
213 mprotect(host_start, qemu_host_page_size, prot_new);
215 if (prot_new & PROT_WRITE) {
216 memset(g2h(start), 0, end - start);
219 return 0;
222 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
223 #ifdef TARGET_AARCH64
224 # define TASK_UNMAPPED_BASE 0x5500000000
225 #else
226 # define TASK_UNMAPPED_BASE (1ul << 38)
227 #endif
228 #else
229 # define TASK_UNMAPPED_BASE 0x40000000
230 #endif
231 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
233 unsigned long last_brk;
235 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
236 of guest address space. */
237 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
238 abi_ulong align)
240 abi_ulong addr, end_addr, incr = qemu_host_page_size;
241 int prot;
242 bool looped = false;
244 if (size > reserved_va) {
245 return (abi_ulong)-1;
248 /* Note that start and size have already been aligned by mmap_find_vma. */
250 end_addr = start + size;
251 if (start > reserved_va - size) {
252 /* Start at the top of the address space. */
253 end_addr = ((reserved_va - size) & -align) + size;
254 looped = true;
257 /* Search downward from END_ADDR, checking to see if a page is in use. */
258 addr = end_addr;
259 while (1) {
260 addr -= incr;
261 if (addr > end_addr) {
262 if (looped) {
263 /* Failure. The entire address space has been searched. */
264 return (abi_ulong)-1;
266 /* Re-start at the top of the address space. */
267 addr = end_addr = ((reserved_va - size) & -align) + size;
268 looped = true;
269 } else {
270 prot = page_get_flags(addr);
271 if (prot) {
272 /* Page in use. Restart below this page. */
273 addr = end_addr = ((addr - size) & -align) + size;
274 } else if (addr && addr + size == end_addr) {
275 /* Success! All pages between ADDR and END_ADDR are free. */
276 if (start == mmap_next_start) {
277 mmap_next_start = addr;
279 return addr;
286 * Find and reserve a free memory area of size 'size'. The search
287 * starts at 'start'.
288 * It must be called with mmap_lock() held.
289 * Return -1 if error.
291 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
293 void *ptr, *prev;
294 abi_ulong addr;
295 int wrapped, repeat;
297 align = MAX(align, qemu_host_page_size);
299 /* If 'start' == 0, then a default start address is used. */
300 if (start == 0) {
301 start = mmap_next_start;
302 } else {
303 start &= qemu_host_page_mask;
305 start = ROUND_UP(start, align);
307 size = HOST_PAGE_ALIGN(size);
309 if (reserved_va) {
310 return mmap_find_vma_reserved(start, size, align);
313 addr = start;
314 wrapped = repeat = 0;
315 prev = 0;
317 for (;; prev = ptr) {
319 * Reserve needed memory area to avoid a race.
320 * It should be discarded using:
321 * - mmap() with MAP_FIXED flag
322 * - mremap() with MREMAP_FIXED flag
323 * - shmat() with SHM_REMAP flag
325 ptr = mmap(g2h(addr), size, PROT_NONE,
326 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
328 /* ENOMEM, if host address space has no memory */
329 if (ptr == MAP_FAILED) {
330 return (abi_ulong)-1;
333 /* Count the number of sequential returns of the same address.
334 This is used to modify the search algorithm below. */
335 repeat = (ptr == prev ? repeat + 1 : 0);
337 if (h2g_valid(ptr + size - 1)) {
338 addr = h2g(ptr);
340 if ((addr & (align - 1)) == 0) {
341 /* Success. */
342 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
343 mmap_next_start = addr + size;
345 return addr;
348 /* The address is not properly aligned for the target. */
349 switch (repeat) {
350 case 0:
351 /* Assume the result that the kernel gave us is the
352 first with enough free space, so start again at the
353 next higher target page. */
354 addr = ROUND_UP(addr, align);
355 break;
356 case 1:
357 /* Sometimes the kernel decides to perform the allocation
358 at the top end of memory instead. */
359 addr &= -align;
360 break;
361 case 2:
362 /* Start over at low memory. */
363 addr = 0;
364 break;
365 default:
366 /* Fail. This unaligned block must the last. */
367 addr = -1;
368 break;
370 } else {
371 /* Since the result the kernel gave didn't fit, start
372 again at low memory. If any repetition, fail. */
373 addr = (repeat ? -1 : 0);
376 /* Unmap and try again. */
377 munmap(ptr, size);
379 /* ENOMEM if we checked the whole of the target address space. */
380 if (addr == (abi_ulong)-1) {
381 return (abi_ulong)-1;
382 } else if (addr == 0) {
383 if (wrapped) {
384 return (abi_ulong)-1;
386 wrapped = 1;
387 /* Don't actually use 0 when wrapping, instead indicate
388 that we'd truly like an allocation in low memory. */
389 addr = (mmap_min_addr > TARGET_PAGE_SIZE
390 ? TARGET_PAGE_ALIGN(mmap_min_addr)
391 : TARGET_PAGE_SIZE);
392 } else if (wrapped && addr >= start) {
393 return (abi_ulong)-1;
398 /* NOTE: all the constants are the HOST ones */
399 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
400 int flags, int fd, abi_ulong offset)
402 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
403 int page_flags, host_prot;
405 mmap_lock();
406 trace_target_mmap(start, len, target_prot, flags, fd, offset);
408 if (!len) {
409 errno = EINVAL;
410 goto fail;
413 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
414 if (!page_flags) {
415 errno = EINVAL;
416 goto fail;
419 /* Also check for overflows... */
420 len = TARGET_PAGE_ALIGN(len);
421 if (!len) {
422 errno = ENOMEM;
423 goto fail;
426 if (offset & ~TARGET_PAGE_MASK) {
427 errno = EINVAL;
428 goto fail;
431 real_start = start & qemu_host_page_mask;
432 host_offset = offset & qemu_host_page_mask;
434 /* If the user is asking for the kernel to find a location, do that
435 before we truncate the length for mapping files below. */
436 if (!(flags & MAP_FIXED)) {
437 host_len = len + offset - host_offset;
438 host_len = HOST_PAGE_ALIGN(host_len);
439 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
440 if (start == (abi_ulong)-1) {
441 errno = ENOMEM;
442 goto fail;
446 /* When mapping files into a memory area larger than the file, accesses
447 to pages beyond the file size will cause a SIGBUS.
449 For example, if mmaping a file of 100 bytes on a host with 4K pages
450 emulating a target with 8K pages, the target expects to be able to
451 access the first 8K. But the host will trap us on any access beyond
452 4K.
454 When emulating a target with a larger page-size than the hosts, we
455 may need to truncate file maps at EOF and add extra anonymous pages
456 up to the targets page boundary. */
458 if ((qemu_real_host_page_size < qemu_host_page_size) &&
459 !(flags & MAP_ANONYMOUS)) {
460 struct stat sb;
462 if (fstat (fd, &sb) == -1)
463 goto fail;
465 /* Are we trying to create a map beyond EOF?. */
466 if (offset + len > sb.st_size) {
467 /* If so, truncate the file map at eof aligned with
468 the hosts real pagesize. Additional anonymous maps
469 will be created beyond EOF. */
470 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
474 if (!(flags & MAP_FIXED)) {
475 unsigned long host_start;
476 void *p;
478 host_len = len + offset - host_offset;
479 host_len = HOST_PAGE_ALIGN(host_len);
481 /* Note: we prefer to control the mapping address. It is
482 especially important if qemu_host_page_size >
483 qemu_real_host_page_size */
484 p = mmap(g2h(start), host_len, host_prot,
485 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
486 if (p == MAP_FAILED) {
487 goto fail;
489 /* update start so that it points to the file position at 'offset' */
490 host_start = (unsigned long)p;
491 if (!(flags & MAP_ANONYMOUS)) {
492 p = mmap(g2h(start), len, host_prot,
493 flags | MAP_FIXED, fd, host_offset);
494 if (p == MAP_FAILED) {
495 munmap(g2h(start), host_len);
496 goto fail;
498 host_start += offset - host_offset;
500 start = h2g(host_start);
501 } else {
502 if (start & ~TARGET_PAGE_MASK) {
503 errno = EINVAL;
504 goto fail;
506 end = start + len;
507 real_end = HOST_PAGE_ALIGN(end);
510 * Test if requested memory area fits target address space
511 * It can fail only on 64-bit host with 32-bit target.
512 * On any other target/host host mmap() handles this error correctly.
514 if (end < start || !guest_range_valid(start, len)) {
515 errno = ENOMEM;
516 goto fail;
519 /* worst case: we cannot map the file because the offset is not
520 aligned, so we read it */
521 if (!(flags & MAP_ANONYMOUS) &&
522 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
523 /* msync() won't work here, so we return an error if write is
524 possible while it is a shared mapping */
525 if ((flags & MAP_TYPE) == MAP_SHARED &&
526 (host_prot & PROT_WRITE)) {
527 errno = EINVAL;
528 goto fail;
530 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
531 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
532 -1, 0);
533 if (retaddr == -1)
534 goto fail;
535 if (pread(fd, g2h(start), len, offset) == -1)
536 goto fail;
537 if (!(host_prot & PROT_WRITE)) {
538 ret = target_mprotect(start, len, target_prot);
539 assert(ret == 0);
541 goto the_end;
544 /* handle the start of the mapping */
545 if (start > real_start) {
546 if (real_end == real_start + qemu_host_page_size) {
547 /* one single host page */
548 ret = mmap_frag(real_start, start, end,
549 host_prot, flags, fd, offset);
550 if (ret == -1)
551 goto fail;
552 goto the_end1;
554 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
555 host_prot, flags, fd, offset);
556 if (ret == -1)
557 goto fail;
558 real_start += qemu_host_page_size;
560 /* handle the end of the mapping */
561 if (end < real_end) {
562 ret = mmap_frag(real_end - qemu_host_page_size,
563 real_end - qemu_host_page_size, end,
564 host_prot, flags, fd,
565 offset + real_end - qemu_host_page_size - start);
566 if (ret == -1)
567 goto fail;
568 real_end -= qemu_host_page_size;
571 /* map the middle (easier) */
572 if (real_start < real_end) {
573 void *p;
574 unsigned long offset1;
575 if (flags & MAP_ANONYMOUS)
576 offset1 = 0;
577 else
578 offset1 = offset + real_start - start;
579 p = mmap(g2h(real_start), real_end - real_start,
580 host_prot, flags, fd, offset1);
581 if (p == MAP_FAILED)
582 goto fail;
585 the_end1:
586 page_set_flags(start, start + len, page_flags);
587 the_end:
588 trace_target_mmap_complete(start);
589 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
590 log_page_dump(__func__);
592 tb_invalidate_phys_range(start, start + len);
593 mmap_unlock();
594 return start;
595 fail:
596 mmap_unlock();
597 return -1;
600 static void mmap_reserve(abi_ulong start, abi_ulong size)
602 abi_ulong real_start;
603 abi_ulong real_end;
604 abi_ulong addr;
605 abi_ulong end;
606 int prot;
608 real_start = start & qemu_host_page_mask;
609 real_end = HOST_PAGE_ALIGN(start + size);
610 end = start + size;
611 if (start > real_start) {
612 /* handle host page containing start */
613 prot = 0;
614 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
615 prot |= page_get_flags(addr);
617 if (real_end == real_start + qemu_host_page_size) {
618 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
619 prot |= page_get_flags(addr);
621 end = real_end;
623 if (prot != 0)
624 real_start += qemu_host_page_size;
626 if (end < real_end) {
627 prot = 0;
628 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
629 prot |= page_get_flags(addr);
631 if (prot != 0)
632 real_end -= qemu_host_page_size;
634 if (real_start != real_end) {
635 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
636 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
637 -1, 0);
641 int target_munmap(abi_ulong start, abi_ulong len)
643 abi_ulong end, real_start, real_end, addr;
644 int prot, ret;
646 trace_target_munmap(start, len);
648 if (start & ~TARGET_PAGE_MASK)
649 return -TARGET_EINVAL;
650 len = TARGET_PAGE_ALIGN(len);
651 if (len == 0 || !guest_range_valid(start, len)) {
652 return -TARGET_EINVAL;
655 mmap_lock();
656 end = start + len;
657 real_start = start & qemu_host_page_mask;
658 real_end = HOST_PAGE_ALIGN(end);
660 if (start > real_start) {
661 /* handle host page containing start */
662 prot = 0;
663 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
664 prot |= page_get_flags(addr);
666 if (real_end == real_start + qemu_host_page_size) {
667 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
668 prot |= page_get_flags(addr);
670 end = real_end;
672 if (prot != 0)
673 real_start += qemu_host_page_size;
675 if (end < real_end) {
676 prot = 0;
677 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
678 prot |= page_get_flags(addr);
680 if (prot != 0)
681 real_end -= qemu_host_page_size;
684 ret = 0;
685 /* unmap what we can */
686 if (real_start < real_end) {
687 if (reserved_va) {
688 mmap_reserve(real_start, real_end - real_start);
689 } else {
690 ret = munmap(g2h(real_start), real_end - real_start);
694 if (ret == 0) {
695 page_set_flags(start, start + len, 0);
696 tb_invalidate_phys_range(start, start + len);
698 mmap_unlock();
699 return ret;
702 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
703 abi_ulong new_size, unsigned long flags,
704 abi_ulong new_addr)
706 int prot;
707 void *host_addr;
709 if (!guest_range_valid(old_addr, old_size) ||
710 ((flags & MREMAP_FIXED) &&
711 !guest_range_valid(new_addr, new_size))) {
712 errno = ENOMEM;
713 return -1;
716 mmap_lock();
718 if (flags & MREMAP_FIXED) {
719 host_addr = mremap(g2h(old_addr), old_size, new_size,
720 flags, g2h(new_addr));
722 if (reserved_va && host_addr != MAP_FAILED) {
723 /* If new and old addresses overlap then the above mremap will
724 already have failed with EINVAL. */
725 mmap_reserve(old_addr, old_size);
727 } else if (flags & MREMAP_MAYMOVE) {
728 abi_ulong mmap_start;
730 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
732 if (mmap_start == -1) {
733 errno = ENOMEM;
734 host_addr = MAP_FAILED;
735 } else {
736 host_addr = mremap(g2h(old_addr), old_size, new_size,
737 flags | MREMAP_FIXED, g2h(mmap_start));
738 if (reserved_va) {
739 mmap_reserve(old_addr, old_size);
742 } else {
743 int prot = 0;
744 if (reserved_va && old_size < new_size) {
745 abi_ulong addr;
746 for (addr = old_addr + old_size;
747 addr < old_addr + new_size;
748 addr++) {
749 prot |= page_get_flags(addr);
752 if (prot == 0) {
753 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
754 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
755 mmap_reserve(old_addr + old_size, old_size - new_size);
757 } else {
758 errno = ENOMEM;
759 host_addr = MAP_FAILED;
761 /* Check if address fits target address space */
762 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
763 /* Revert mremap() changes */
764 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
765 errno = ENOMEM;
766 host_addr = MAP_FAILED;
770 if (host_addr == MAP_FAILED) {
771 new_addr = -1;
772 } else {
773 new_addr = h2g(host_addr);
774 prot = page_get_flags(old_addr);
775 page_set_flags(old_addr, old_addr + old_size, 0);
776 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
778 tb_invalidate_phys_range(new_addr, new_addr + new_size);
779 mmap_unlock();
780 return new_addr;