Revert "spapr: Ensure CPU cores are added contiguously and removed in LIFO order"
[qemu.git] / linux-user / mmap.c
blobc4371d943a85664f2f450c9d4ac72d93e2157cff
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include <linux/mman.h>
21 #include <linux/unistd.h>
23 #include "qemu.h"
24 #include "qemu-common.h"
25 #include "translate-all.h"
27 //#define DEBUG_MMAP
29 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
30 static __thread int mmap_lock_count;
32 void mmap_lock(void)
34 if (mmap_lock_count++ == 0) {
35 pthread_mutex_lock(&mmap_mutex);
39 void mmap_unlock(void)
41 if (--mmap_lock_count == 0) {
42 pthread_mutex_unlock(&mmap_mutex);
46 /* Grab lock to make sure things are in a consistent state after fork(). */
47 void mmap_fork_start(void)
49 if (mmap_lock_count)
50 abort();
51 pthread_mutex_lock(&mmap_mutex);
54 void mmap_fork_end(int child)
56 if (child)
57 pthread_mutex_init(&mmap_mutex, NULL);
58 else
59 pthread_mutex_unlock(&mmap_mutex);
62 /* NOTE: all the constants are the HOST ones, but addresses are target. */
63 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
65 abi_ulong end, host_start, host_end, addr;
66 int prot1, ret;
68 #ifdef DEBUG_MMAP
69 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
70 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
71 prot & PROT_READ ? 'r' : '-',
72 prot & PROT_WRITE ? 'w' : '-',
73 prot & PROT_EXEC ? 'x' : '-');
74 #endif
76 if ((start & ~TARGET_PAGE_MASK) != 0)
77 return -EINVAL;
78 len = TARGET_PAGE_ALIGN(len);
79 end = start + len;
80 if (end < start)
81 return -EINVAL;
82 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
83 if (len == 0)
84 return 0;
86 mmap_lock();
87 host_start = start & qemu_host_page_mask;
88 host_end = HOST_PAGE_ALIGN(end);
89 if (start > host_start) {
90 /* handle host page containing start */
91 prot1 = prot;
92 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
93 prot1 |= page_get_flags(addr);
95 if (host_end == host_start + qemu_host_page_size) {
96 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
97 prot1 |= page_get_flags(addr);
99 end = host_end;
101 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
102 if (ret != 0)
103 goto error;
104 host_start += qemu_host_page_size;
106 if (end < host_end) {
107 prot1 = prot;
108 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
109 prot1 |= page_get_flags(addr);
111 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
112 prot1 & PAGE_BITS);
113 if (ret != 0)
114 goto error;
115 host_end -= qemu_host_page_size;
118 /* handle the pages in the middle */
119 if (host_start < host_end) {
120 ret = mprotect(g2h(host_start), host_end - host_start, prot);
121 if (ret != 0)
122 goto error;
124 page_set_flags(start, start + len, prot | PAGE_VALID);
125 mmap_unlock();
126 return 0;
127 error:
128 mmap_unlock();
129 return ret;
132 /* map an incomplete host page */
133 static int mmap_frag(abi_ulong real_start,
134 abi_ulong start, abi_ulong end,
135 int prot, int flags, int fd, abi_ulong offset)
137 abi_ulong real_end, addr;
138 void *host_start;
139 int prot1, prot_new;
141 real_end = real_start + qemu_host_page_size;
142 host_start = g2h(real_start);
144 /* get the protection of the target pages outside the mapping */
145 prot1 = 0;
146 for(addr = real_start; addr < real_end; addr++) {
147 if (addr < start || addr >= end)
148 prot1 |= page_get_flags(addr);
151 if (prot1 == 0) {
152 /* no page was there, so we allocate one */
153 void *p = mmap(host_start, qemu_host_page_size, prot,
154 flags | MAP_ANONYMOUS, -1, 0);
155 if (p == MAP_FAILED)
156 return -1;
157 prot1 = prot;
159 prot1 &= PAGE_BITS;
161 prot_new = prot | prot1;
162 if (!(flags & MAP_ANONYMOUS)) {
163 /* msync() won't work here, so we return an error if write is
164 possible while it is a shared mapping */
165 if ((flags & MAP_TYPE) == MAP_SHARED &&
166 (prot & PROT_WRITE))
167 return -1;
169 /* adjust protection to be able to read */
170 if (!(prot1 & PROT_WRITE))
171 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
173 /* read the corresponding file data */
174 if (pread(fd, g2h(start), end - start, offset) == -1)
175 return -1;
177 /* put final protection */
178 if (prot_new != (prot1 | PROT_WRITE))
179 mprotect(host_start, qemu_host_page_size, prot_new);
180 } else {
181 if (prot_new != prot1) {
182 mprotect(host_start, qemu_host_page_size, prot_new);
184 if (prot_new & PROT_WRITE) {
185 memset(g2h(start), 0, end - start);
188 return 0;
191 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
192 # define TASK_UNMAPPED_BASE (1ul << 38)
193 #elif defined(__CYGWIN__)
194 /* Cygwin doesn't have a whole lot of address space. */
195 # define TASK_UNMAPPED_BASE 0x18000000
196 #else
197 # define TASK_UNMAPPED_BASE 0x40000000
198 #endif
199 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
201 unsigned long last_brk;
203 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
204 of guest address space. */
205 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size)
207 abi_ulong addr;
208 abi_ulong end_addr;
209 int prot;
210 int looped = 0;
212 if (size > reserved_va) {
213 return (abi_ulong)-1;
216 size = HOST_PAGE_ALIGN(size);
217 end_addr = start + size;
218 if (end_addr > reserved_va) {
219 end_addr = reserved_va;
221 addr = end_addr - qemu_host_page_size;
223 while (1) {
224 if (addr > end_addr) {
225 if (looped) {
226 return (abi_ulong)-1;
228 end_addr = reserved_va;
229 addr = end_addr - qemu_host_page_size;
230 looped = 1;
231 continue;
233 prot = page_get_flags(addr);
234 if (prot) {
235 end_addr = addr;
237 if (addr + size == end_addr) {
238 break;
240 addr -= qemu_host_page_size;
243 if (start == mmap_next_start) {
244 mmap_next_start = addr;
247 return addr;
251 * Find and reserve a free memory area of size 'size'. The search
252 * starts at 'start'.
253 * It must be called with mmap_lock() held.
254 * Return -1 if error.
256 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
258 void *ptr, *prev;
259 abi_ulong addr;
260 int wrapped, repeat;
262 /* If 'start' == 0, then a default start address is used. */
263 if (start == 0) {
264 start = mmap_next_start;
265 } else {
266 start &= qemu_host_page_mask;
269 size = HOST_PAGE_ALIGN(size);
271 if (reserved_va) {
272 return mmap_find_vma_reserved(start, size);
275 addr = start;
276 wrapped = repeat = 0;
277 prev = 0;
279 for (;; prev = ptr) {
281 * Reserve needed memory area to avoid a race.
282 * It should be discarded using:
283 * - mmap() with MAP_FIXED flag
284 * - mremap() with MREMAP_FIXED flag
285 * - shmat() with SHM_REMAP flag
287 ptr = mmap(g2h(addr), size, PROT_NONE,
288 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
290 /* ENOMEM, if host address space has no memory */
291 if (ptr == MAP_FAILED) {
292 return (abi_ulong)-1;
295 /* Count the number of sequential returns of the same address.
296 This is used to modify the search algorithm below. */
297 repeat = (ptr == prev ? repeat + 1 : 0);
299 if (h2g_valid(ptr + size - 1)) {
300 addr = h2g(ptr);
302 if ((addr & ~TARGET_PAGE_MASK) == 0) {
303 /* Success. */
304 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
305 mmap_next_start = addr + size;
307 return addr;
310 /* The address is not properly aligned for the target. */
311 switch (repeat) {
312 case 0:
313 /* Assume the result that the kernel gave us is the
314 first with enough free space, so start again at the
315 next higher target page. */
316 addr = TARGET_PAGE_ALIGN(addr);
317 break;
318 case 1:
319 /* Sometimes the kernel decides to perform the allocation
320 at the top end of memory instead. */
321 addr &= TARGET_PAGE_MASK;
322 break;
323 case 2:
324 /* Start over at low memory. */
325 addr = 0;
326 break;
327 default:
328 /* Fail. This unaligned block must the last. */
329 addr = -1;
330 break;
332 } else {
333 /* Since the result the kernel gave didn't fit, start
334 again at low memory. If any repetition, fail. */
335 addr = (repeat ? -1 : 0);
338 /* Unmap and try again. */
339 munmap(ptr, size);
341 /* ENOMEM if we checked the whole of the target address space. */
342 if (addr == (abi_ulong)-1) {
343 return (abi_ulong)-1;
344 } else if (addr == 0) {
345 if (wrapped) {
346 return (abi_ulong)-1;
348 wrapped = 1;
349 /* Don't actually use 0 when wrapping, instead indicate
350 that we'd truly like an allocation in low memory. */
351 addr = (mmap_min_addr > TARGET_PAGE_SIZE
352 ? TARGET_PAGE_ALIGN(mmap_min_addr)
353 : TARGET_PAGE_SIZE);
354 } else if (wrapped && addr >= start) {
355 return (abi_ulong)-1;
360 /* NOTE: all the constants are the HOST ones */
361 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
362 int flags, int fd, abi_ulong offset)
364 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
366 mmap_lock();
367 #ifdef DEBUG_MMAP
369 printf("mmap: start=0x" TARGET_ABI_FMT_lx
370 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
371 start, len,
372 prot & PROT_READ ? 'r' : '-',
373 prot & PROT_WRITE ? 'w' : '-',
374 prot & PROT_EXEC ? 'x' : '-');
375 if (flags & MAP_FIXED)
376 printf("MAP_FIXED ");
377 if (flags & MAP_ANONYMOUS)
378 printf("MAP_ANON ");
379 switch(flags & MAP_TYPE) {
380 case MAP_PRIVATE:
381 printf("MAP_PRIVATE ");
382 break;
383 case MAP_SHARED:
384 printf("MAP_SHARED ");
385 break;
386 default:
387 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
388 break;
390 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
392 #endif
394 if (offset & ~TARGET_PAGE_MASK) {
395 errno = EINVAL;
396 goto fail;
399 len = TARGET_PAGE_ALIGN(len);
400 if (len == 0)
401 goto the_end;
402 real_start = start & qemu_host_page_mask;
403 host_offset = offset & qemu_host_page_mask;
405 /* If the user is asking for the kernel to find a location, do that
406 before we truncate the length for mapping files below. */
407 if (!(flags & MAP_FIXED)) {
408 host_len = len + offset - host_offset;
409 host_len = HOST_PAGE_ALIGN(host_len);
410 start = mmap_find_vma(real_start, host_len);
411 if (start == (abi_ulong)-1) {
412 errno = ENOMEM;
413 goto fail;
417 /* When mapping files into a memory area larger than the file, accesses
418 to pages beyond the file size will cause a SIGBUS.
420 For example, if mmaping a file of 100 bytes on a host with 4K pages
421 emulating a target with 8K pages, the target expects to be able to
422 access the first 8K. But the host will trap us on any access beyond
423 4K.
425 When emulating a target with a larger page-size than the hosts, we
426 may need to truncate file maps at EOF and add extra anonymous pages
427 up to the targets page boundary. */
429 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
430 && !(flags & MAP_ANONYMOUS)) {
431 struct stat sb;
433 if (fstat (fd, &sb) == -1)
434 goto fail;
436 /* Are we trying to create a map beyond EOF?. */
437 if (offset + len > sb.st_size) {
438 /* If so, truncate the file map at eof aligned with
439 the hosts real pagesize. Additional anonymous maps
440 will be created beyond EOF. */
441 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
445 if (!(flags & MAP_FIXED)) {
446 unsigned long host_start;
447 void *p;
449 host_len = len + offset - host_offset;
450 host_len = HOST_PAGE_ALIGN(host_len);
452 /* Note: we prefer to control the mapping address. It is
453 especially important if qemu_host_page_size >
454 qemu_real_host_page_size */
455 p = mmap(g2h(start), host_len, prot,
456 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
457 if (p == MAP_FAILED)
458 goto fail;
459 /* update start so that it points to the file position at 'offset' */
460 host_start = (unsigned long)p;
461 if (!(flags & MAP_ANONYMOUS)) {
462 p = mmap(g2h(start), len, prot,
463 flags | MAP_FIXED, fd, host_offset);
464 if (p == MAP_FAILED) {
465 munmap(g2h(start), host_len);
466 goto fail;
468 host_start += offset - host_offset;
470 start = h2g(host_start);
471 } else {
472 if (start & ~TARGET_PAGE_MASK) {
473 errno = EINVAL;
474 goto fail;
476 end = start + len;
477 real_end = HOST_PAGE_ALIGN(end);
480 * Test if requested memory area fits target address space
481 * It can fail only on 64-bit host with 32-bit target.
482 * On any other target/host host mmap() handles this error correctly.
484 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
485 errno = EINVAL;
486 goto fail;
489 /* worst case: we cannot map the file because the offset is not
490 aligned, so we read it */
491 if (!(flags & MAP_ANONYMOUS) &&
492 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
493 /* msync() won't work here, so we return an error if write is
494 possible while it is a shared mapping */
495 if ((flags & MAP_TYPE) == MAP_SHARED &&
496 (prot & PROT_WRITE)) {
497 errno = EINVAL;
498 goto fail;
500 retaddr = target_mmap(start, len, prot | PROT_WRITE,
501 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
502 -1, 0);
503 if (retaddr == -1)
504 goto fail;
505 if (pread(fd, g2h(start), len, offset) == -1)
506 goto fail;
507 if (!(prot & PROT_WRITE)) {
508 ret = target_mprotect(start, len, prot);
509 assert(ret == 0);
511 goto the_end;
514 /* handle the start of the mapping */
515 if (start > real_start) {
516 if (real_end == real_start + qemu_host_page_size) {
517 /* one single host page */
518 ret = mmap_frag(real_start, start, end,
519 prot, flags, fd, offset);
520 if (ret == -1)
521 goto fail;
522 goto the_end1;
524 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
525 prot, flags, fd, offset);
526 if (ret == -1)
527 goto fail;
528 real_start += qemu_host_page_size;
530 /* handle the end of the mapping */
531 if (end < real_end) {
532 ret = mmap_frag(real_end - qemu_host_page_size,
533 real_end - qemu_host_page_size, end,
534 prot, flags, fd,
535 offset + real_end - qemu_host_page_size - start);
536 if (ret == -1)
537 goto fail;
538 real_end -= qemu_host_page_size;
541 /* map the middle (easier) */
542 if (real_start < real_end) {
543 void *p;
544 unsigned long offset1;
545 if (flags & MAP_ANONYMOUS)
546 offset1 = 0;
547 else
548 offset1 = offset + real_start - start;
549 p = mmap(g2h(real_start), real_end - real_start,
550 prot, flags, fd, offset1);
551 if (p == MAP_FAILED)
552 goto fail;
555 the_end1:
556 page_set_flags(start, start + len, prot | PAGE_VALID);
557 the_end:
558 #ifdef DEBUG_MMAP
559 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
560 page_dump(stdout);
561 printf("\n");
562 #endif
563 tb_invalidate_phys_range(start, start + len);
564 mmap_unlock();
565 return start;
566 fail:
567 mmap_unlock();
568 return -1;
571 static void mmap_reserve(abi_ulong start, abi_ulong size)
573 abi_ulong real_start;
574 abi_ulong real_end;
575 abi_ulong addr;
576 abi_ulong end;
577 int prot;
579 real_start = start & qemu_host_page_mask;
580 real_end = HOST_PAGE_ALIGN(start + size);
581 end = start + size;
582 if (start > real_start) {
583 /* handle host page containing start */
584 prot = 0;
585 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
586 prot |= page_get_flags(addr);
588 if (real_end == real_start + qemu_host_page_size) {
589 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
590 prot |= page_get_flags(addr);
592 end = real_end;
594 if (prot != 0)
595 real_start += qemu_host_page_size;
597 if (end < real_end) {
598 prot = 0;
599 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
600 prot |= page_get_flags(addr);
602 if (prot != 0)
603 real_end -= qemu_host_page_size;
605 if (real_start != real_end) {
606 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
607 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
608 -1, 0);
612 int target_munmap(abi_ulong start, abi_ulong len)
614 abi_ulong end, real_start, real_end, addr;
615 int prot, ret;
617 #ifdef DEBUG_MMAP
618 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
619 TARGET_ABI_FMT_lx "\n",
620 start, len);
621 #endif
622 if (start & ~TARGET_PAGE_MASK)
623 return -EINVAL;
624 len = TARGET_PAGE_ALIGN(len);
625 if (len == 0)
626 return -EINVAL;
627 mmap_lock();
628 end = start + len;
629 real_start = start & qemu_host_page_mask;
630 real_end = HOST_PAGE_ALIGN(end);
632 if (start > real_start) {
633 /* handle host page containing start */
634 prot = 0;
635 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
636 prot |= page_get_flags(addr);
638 if (real_end == real_start + qemu_host_page_size) {
639 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
640 prot |= page_get_flags(addr);
642 end = real_end;
644 if (prot != 0)
645 real_start += qemu_host_page_size;
647 if (end < real_end) {
648 prot = 0;
649 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
650 prot |= page_get_flags(addr);
652 if (prot != 0)
653 real_end -= qemu_host_page_size;
656 ret = 0;
657 /* unmap what we can */
658 if (real_start < real_end) {
659 if (reserved_va) {
660 mmap_reserve(real_start, real_end - real_start);
661 } else {
662 ret = munmap(g2h(real_start), real_end - real_start);
666 if (ret == 0) {
667 page_set_flags(start, start + len, 0);
668 tb_invalidate_phys_range(start, start + len);
670 mmap_unlock();
671 return ret;
674 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
675 abi_ulong new_size, unsigned long flags,
676 abi_ulong new_addr)
678 int prot;
679 void *host_addr;
681 mmap_lock();
683 if (flags & MREMAP_FIXED) {
684 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
685 old_size, new_size,
686 flags,
687 g2h(new_addr));
689 if (reserved_va && host_addr != MAP_FAILED) {
690 /* If new and old addresses overlap then the above mremap will
691 already have failed with EINVAL. */
692 mmap_reserve(old_addr, old_size);
694 } else if (flags & MREMAP_MAYMOVE) {
695 abi_ulong mmap_start;
697 mmap_start = mmap_find_vma(0, new_size);
699 if (mmap_start == -1) {
700 errno = ENOMEM;
701 host_addr = MAP_FAILED;
702 } else {
703 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
704 old_size, new_size,
705 flags | MREMAP_FIXED,
706 g2h(mmap_start));
707 if (reserved_va) {
708 mmap_reserve(old_addr, old_size);
711 } else {
712 int prot = 0;
713 if (reserved_va && old_size < new_size) {
714 abi_ulong addr;
715 for (addr = old_addr + old_size;
716 addr < old_addr + new_size;
717 addr++) {
718 prot |= page_get_flags(addr);
721 if (prot == 0) {
722 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
723 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
724 mmap_reserve(old_addr + old_size, new_size - old_size);
726 } else {
727 errno = ENOMEM;
728 host_addr = MAP_FAILED;
730 /* Check if address fits target address space */
731 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
732 /* Revert mremap() changes */
733 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
734 errno = ENOMEM;
735 host_addr = MAP_FAILED;
739 if (host_addr == MAP_FAILED) {
740 new_addr = -1;
741 } else {
742 new_addr = h2g(host_addr);
743 prot = page_get_flags(old_addr);
744 page_set_flags(old_addr, old_addr + old_size, 0);
745 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
747 tb_invalidate_phys_range(new_addr, new_addr + new_size);
748 mmap_unlock();
749 return new_addr;
752 int target_msync(abi_ulong start, abi_ulong len, int flags)
754 abi_ulong end;
756 if (start & ~TARGET_PAGE_MASK)
757 return -EINVAL;
758 len = TARGET_PAGE_ALIGN(len);
759 end = start + len;
760 if (end < start)
761 return -EINVAL;
762 if (end == start)
763 return 0;
765 start &= qemu_host_page_mask;
766 return msync(g2h(start), end - start, flags);