linux-user: fix page_unprotect when host page size > target page size
[qemu.git] / linux-user / mmap.c
blob46923c707c42056dda139cf97a53da3948801e51
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <stdarg.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <linux/mman.h>
29 #include <linux/unistd.h>
31 #include "qemu.h"
32 #include "qemu-common.h"
34 //#define DEBUG_MMAP
36 #if defined(CONFIG_USE_NPTL)
37 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
38 static __thread int mmap_lock_count;
40 void mmap_lock(void)
42 if (mmap_lock_count++ == 0) {
43 pthread_mutex_lock(&mmap_mutex);
47 void mmap_unlock(void)
49 if (--mmap_lock_count == 0) {
50 pthread_mutex_unlock(&mmap_mutex);
54 /* Grab lock to make sure things are in a consistent state after fork(). */
55 void mmap_fork_start(void)
57 if (mmap_lock_count)
58 abort();
59 pthread_mutex_lock(&mmap_mutex);
62 void mmap_fork_end(int child)
64 if (child)
65 pthread_mutex_init(&mmap_mutex, NULL);
66 else
67 pthread_mutex_unlock(&mmap_mutex);
69 #else
70 /* We aren't threadsafe to start with, so no need to worry about locking. */
71 void mmap_lock(void)
75 void mmap_unlock(void)
78 #endif
80 void *qemu_vmalloc(size_t size)
82 void *p;
84 mmap_lock();
85 /* Use map and mark the pages as used. */
86 p = mmap(NULL, size, PROT_READ | PROT_WRITE,
87 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
89 if (h2g_valid(p)) {
90 /* Allocated region overlaps guest address space. This may recurse. */
91 abi_ulong addr = h2g(p);
92 page_set_flags(addr & TARGET_PAGE_MASK, TARGET_PAGE_ALIGN(addr + size),
93 PAGE_RESERVED);
96 mmap_unlock();
97 return p;
100 void *qemu_malloc(size_t size)
102 char * p;
103 size += 16;
104 p = qemu_vmalloc(size);
105 *(size_t *)p = size;
106 return p + 16;
109 /* We use map, which is always zero initialized. */
110 void * qemu_mallocz(size_t size)
112 return qemu_malloc(size);
115 void qemu_free(void *ptr)
117 /* FIXME: We should unmark the reserved pages here. However this gets
118 complicated when one target page spans multiple host pages, so we
119 don't bother. */
120 size_t *p;
121 p = (size_t *)((char *)ptr - 16);
122 munmap(p, *p);
125 void *qemu_realloc(void *ptr, size_t size)
127 size_t old_size, copy;
128 void *new_ptr;
130 if (!ptr)
131 return qemu_malloc(size);
132 old_size = *(size_t *)((char *)ptr - 16);
133 copy = old_size < size ? old_size : size;
134 new_ptr = qemu_malloc(size);
135 memcpy(new_ptr, ptr, copy);
136 qemu_free(ptr);
137 return new_ptr;
140 /* NOTE: all the constants are the HOST ones, but addresses are target. */
141 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
143 abi_ulong end, host_start, host_end, addr;
144 int prot1, ret;
146 #ifdef DEBUG_MMAP
147 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
148 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
149 prot & PROT_READ ? 'r' : '-',
150 prot & PROT_WRITE ? 'w' : '-',
151 prot & PROT_EXEC ? 'x' : '-');
152 #endif
154 if ((start & ~TARGET_PAGE_MASK) != 0)
155 return -EINVAL;
156 len = TARGET_PAGE_ALIGN(len);
157 end = start + len;
158 if (end < start)
159 return -EINVAL;
160 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
161 if (len == 0)
162 return 0;
164 mmap_lock();
165 host_start = start & qemu_host_page_mask;
166 host_end = HOST_PAGE_ALIGN(end);
167 if (start > host_start) {
168 /* handle host page containing start */
169 prot1 = prot;
170 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
171 prot1 |= page_get_flags(addr);
173 if (host_end == host_start + qemu_host_page_size) {
174 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
175 prot1 |= page_get_flags(addr);
177 end = host_end;
179 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
180 if (ret != 0)
181 goto error;
182 host_start += qemu_host_page_size;
184 if (end < host_end) {
185 prot1 = prot;
186 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
187 prot1 |= page_get_flags(addr);
189 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
190 prot1 & PAGE_BITS);
191 if (ret != 0)
192 goto error;
193 host_end -= qemu_host_page_size;
196 /* handle the pages in the middle */
197 if (host_start < host_end) {
198 ret = mprotect(g2h(host_start), host_end - host_start, prot);
199 if (ret != 0)
200 goto error;
202 page_set_flags(start, start + len, prot | PAGE_VALID);
203 mmap_unlock();
204 return 0;
205 error:
206 mmap_unlock();
207 return ret;
210 /* map an incomplete host page */
211 static int mmap_frag(abi_ulong real_start,
212 abi_ulong start, abi_ulong end,
213 int prot, int flags, int fd, abi_ulong offset)
215 abi_ulong real_end, addr;
216 void *host_start;
217 int prot1, prot_new;
219 real_end = real_start + qemu_host_page_size;
220 host_start = g2h(real_start);
222 /* get the protection of the target pages outside the mapping */
223 prot1 = 0;
224 for(addr = real_start; addr < real_end; addr++) {
225 if (addr < start || addr >= end)
226 prot1 |= page_get_flags(addr);
229 if (prot1 == 0) {
230 /* no page was there, so we allocate one */
231 void *p = mmap(host_start, qemu_host_page_size, prot,
232 flags | MAP_ANONYMOUS, -1, 0);
233 if (p == MAP_FAILED)
234 return -1;
235 prot1 = prot;
237 prot1 &= PAGE_BITS;
239 prot_new = prot | prot1;
240 if (!(flags & MAP_ANONYMOUS)) {
241 /* msync() won't work here, so we return an error if write is
242 possible while it is a shared mapping */
243 if ((flags & MAP_TYPE) == MAP_SHARED &&
244 (prot & PROT_WRITE))
245 return -1;
247 /* adjust protection to be able to read */
248 if (!(prot1 & PROT_WRITE))
249 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
251 /* read the corresponding file data */
252 if (pread(fd, g2h(start), end - start, offset) == -1)
253 return -1;
255 /* put final protection */
256 if (prot_new != (prot1 | PROT_WRITE))
257 mprotect(host_start, qemu_host_page_size, prot_new);
258 } else {
259 /* just update the protection */
260 if (prot_new != prot1) {
261 mprotect(host_start, qemu_host_page_size, prot_new);
264 return 0;
267 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
268 # define TASK_UNMAPPED_BASE (1ul << 38)
269 #elif defined(__CYGWIN__)
270 /* Cygwin doesn't have a whole lot of address space. */
271 # define TASK_UNMAPPED_BASE 0x18000000
272 #else
273 # define TASK_UNMAPPED_BASE 0x40000000
274 #endif
275 static abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
277 unsigned long last_brk;
280 * Find and reserve a free memory area of size 'size'. The search
281 * starts at 'start'.
282 * It must be called with mmap_lock() held.
283 * Return -1 if error.
285 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
287 void *ptr, *prev;
288 abi_ulong addr;
289 int wrapped, repeat;
291 /* If 'start' == 0, then a default start address is used. */
292 if (start == 0) {
293 start = mmap_next_start;
294 } else {
295 start &= qemu_host_page_mask;
298 size = HOST_PAGE_ALIGN(size);
300 addr = start;
301 wrapped = repeat = 0;
302 prev = 0;
304 for (;; prev = ptr) {
306 * Reserve needed memory area to avoid a race.
307 * It should be discarded using:
308 * - mmap() with MAP_FIXED flag
309 * - mremap() with MREMAP_FIXED flag
310 * - shmat() with SHM_REMAP flag
312 ptr = mmap(g2h(addr), size, PROT_NONE,
313 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
315 /* ENOMEM, if host address space has no memory */
316 if (ptr == MAP_FAILED) {
317 return (abi_ulong)-1;
320 /* Count the number of sequential returns of the same address.
321 This is used to modify the search algorithm below. */
322 repeat = (ptr == prev ? repeat + 1 : 0);
324 if (h2g_valid(ptr + size - 1)) {
325 addr = h2g(ptr);
327 if ((addr & ~TARGET_PAGE_MASK) == 0) {
328 /* Success. */
329 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
330 mmap_next_start = addr + size;
332 return addr;
335 /* The address is not properly aligned for the target. */
336 switch (repeat) {
337 case 0:
338 /* Assume the result that the kernel gave us is the
339 first with enough free space, so start again at the
340 next higher target page. */
341 addr = TARGET_PAGE_ALIGN(addr);
342 break;
343 case 1:
344 /* Sometimes the kernel decides to perform the allocation
345 at the top end of memory instead. */
346 addr &= TARGET_PAGE_MASK;
347 break;
348 case 2:
349 /* Start over at low memory. */
350 addr = 0;
351 break;
352 default:
353 /* Fail. This unaligned block must the last. */
354 addr = -1;
355 break;
357 } else {
358 /* Since the result the kernel gave didn't fit, start
359 again at low memory. If any repetition, fail. */
360 addr = (repeat ? -1 : 0);
363 /* Unmap and try again. */
364 munmap(ptr, size);
366 /* ENOMEM if we checked the whole of the target address space. */
367 if (addr == -1ul) {
368 return (abi_ulong)-1;
369 } else if (addr == 0) {
370 if (wrapped) {
371 return (abi_ulong)-1;
373 wrapped = 1;
374 /* Don't actually use 0 when wrapping, instead indicate
375 that we'd truely like an allocation in low memory. */
376 addr = (mmap_min_addr > TARGET_PAGE_SIZE
377 ? TARGET_PAGE_ALIGN(mmap_min_addr)
378 : TARGET_PAGE_SIZE);
379 } else if (wrapped && addr >= start) {
380 return (abi_ulong)-1;
385 /* NOTE: all the constants are the HOST ones */
386 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
387 int flags, int fd, abi_ulong offset)
389 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
390 unsigned long host_start;
392 mmap_lock();
393 #ifdef DEBUG_MMAP
395 printf("mmap: start=0x" TARGET_ABI_FMT_lx
396 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
397 start, len,
398 prot & PROT_READ ? 'r' : '-',
399 prot & PROT_WRITE ? 'w' : '-',
400 prot & PROT_EXEC ? 'x' : '-');
401 if (flags & MAP_FIXED)
402 printf("MAP_FIXED ");
403 if (flags & MAP_ANONYMOUS)
404 printf("MAP_ANON ");
405 switch(flags & MAP_TYPE) {
406 case MAP_PRIVATE:
407 printf("MAP_PRIVATE ");
408 break;
409 case MAP_SHARED:
410 printf("MAP_SHARED ");
411 break;
412 default:
413 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
414 break;
416 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
418 #endif
420 if (offset & ~TARGET_PAGE_MASK) {
421 errno = EINVAL;
422 goto fail;
425 len = TARGET_PAGE_ALIGN(len);
426 if (len == 0)
427 goto the_end;
428 real_start = start & qemu_host_page_mask;
430 /* When mapping files into a memory area larger than the file, accesses
431 to pages beyond the file size will cause a SIGBUS.
433 For example, if mmaping a file of 100 bytes on a host with 4K pages
434 emulating a target with 8K pages, the target expects to be able to
435 access the first 8K. But the host will trap us on any access beyond
436 4K.
438 When emulating a target with a larger page-size than the hosts, we
439 may need to truncate file maps at EOF and add extra anonymous pages
440 up to the targets page boundary. */
442 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
443 && !(flags & MAP_ANONYMOUS)) {
444 struct stat sb;
446 if (fstat (fd, &sb) == -1)
447 goto fail;
449 /* Are we trying to create a map beyond EOF?. */
450 if (offset + len > sb.st_size) {
451 /* If so, truncate the file map at eof aligned with
452 the hosts real pagesize. Additional anonymous maps
453 will be created beyond EOF. */
454 len = (sb.st_size - offset);
455 len += qemu_real_host_page_size - 1;
456 len &= ~(qemu_real_host_page_size - 1);
460 if (!(flags & MAP_FIXED)) {
461 abi_ulong mmap_start;
462 void *p;
463 host_offset = offset & qemu_host_page_mask;
464 host_len = len + offset - host_offset;
465 host_len = HOST_PAGE_ALIGN(host_len);
466 mmap_start = mmap_find_vma(real_start, host_len);
467 if (mmap_start == (abi_ulong)-1) {
468 errno = ENOMEM;
469 goto fail;
471 /* Note: we prefer to control the mapping address. It is
472 especially important if qemu_host_page_size >
473 qemu_real_host_page_size */
474 p = mmap(g2h(mmap_start),
475 host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
476 if (p == MAP_FAILED)
477 goto fail;
478 /* update start so that it points to the file position at 'offset' */
479 host_start = (unsigned long)p;
480 if (!(flags & MAP_ANONYMOUS)) {
481 p = mmap(g2h(mmap_start), len, prot,
482 flags | MAP_FIXED, fd, host_offset);
483 host_start += offset - host_offset;
485 start = h2g(host_start);
486 } else {
487 int flg;
488 target_ulong addr;
490 if (start & ~TARGET_PAGE_MASK) {
491 errno = EINVAL;
492 goto fail;
494 end = start + len;
495 real_end = HOST_PAGE_ALIGN(end);
498 * Test if requested memory area fits target address space
499 * It can fail only on 64-bit host with 32-bit target.
500 * On any other target/host host mmap() handles this error correctly.
502 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
503 errno = EINVAL;
504 goto fail;
507 for(addr = real_start; addr < real_end; addr += TARGET_PAGE_SIZE) {
508 flg = page_get_flags(addr);
509 if (flg & PAGE_RESERVED) {
510 errno = ENXIO;
511 goto fail;
515 /* worst case: we cannot map the file because the offset is not
516 aligned, so we read it */
517 if (!(flags & MAP_ANONYMOUS) &&
518 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
519 /* msync() won't work here, so we return an error if write is
520 possible while it is a shared mapping */
521 if ((flags & MAP_TYPE) == MAP_SHARED &&
522 (prot & PROT_WRITE)) {
523 errno = EINVAL;
524 goto fail;
526 retaddr = target_mmap(start, len, prot | PROT_WRITE,
527 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
528 -1, 0);
529 if (retaddr == -1)
530 goto fail;
531 if (pread(fd, g2h(start), len, offset) == -1)
532 goto fail;
533 if (!(prot & PROT_WRITE)) {
534 ret = target_mprotect(start, len, prot);
535 if (ret != 0) {
536 start = ret;
537 goto the_end;
540 goto the_end;
543 /* handle the start of the mapping */
544 if (start > real_start) {
545 if (real_end == real_start + qemu_host_page_size) {
546 /* one single host page */
547 ret = mmap_frag(real_start, start, end,
548 prot, flags, fd, offset);
549 if (ret == -1)
550 goto fail;
551 goto the_end1;
553 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
554 prot, flags, fd, offset);
555 if (ret == -1)
556 goto fail;
557 real_start += qemu_host_page_size;
559 /* handle the end of the mapping */
560 if (end < real_end) {
561 ret = mmap_frag(real_end - qemu_host_page_size,
562 real_end - qemu_host_page_size, real_end,
563 prot, flags, fd,
564 offset + real_end - qemu_host_page_size - start);
565 if (ret == -1)
566 goto fail;
567 real_end -= qemu_host_page_size;
570 /* map the middle (easier) */
571 if (real_start < real_end) {
572 void *p;
573 unsigned long offset1;
574 if (flags & MAP_ANONYMOUS)
575 offset1 = 0;
576 else
577 offset1 = offset + real_start - start;
578 p = mmap(g2h(real_start), real_end - real_start,
579 prot, flags, fd, offset1);
580 if (p == MAP_FAILED)
581 goto fail;
584 the_end1:
585 page_set_flags(start, start + len, prot | PAGE_VALID);
586 the_end:
587 #ifdef DEBUG_MMAP
588 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
589 page_dump(stdout);
590 printf("\n");
591 #endif
592 mmap_unlock();
593 return start;
594 fail:
595 mmap_unlock();
596 return -1;
599 int target_munmap(abi_ulong start, abi_ulong len)
601 abi_ulong end, real_start, real_end, addr;
602 int prot, ret;
604 #ifdef DEBUG_MMAP
605 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
606 TARGET_ABI_FMT_lx "\n",
607 start, len);
608 #endif
609 if (start & ~TARGET_PAGE_MASK)
610 return -EINVAL;
611 len = TARGET_PAGE_ALIGN(len);
612 if (len == 0)
613 return -EINVAL;
614 mmap_lock();
615 end = start + len;
616 real_start = start & qemu_host_page_mask;
617 real_end = HOST_PAGE_ALIGN(end);
619 if (start > real_start) {
620 /* handle host page containing start */
621 prot = 0;
622 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
623 prot |= page_get_flags(addr);
625 if (real_end == real_start + qemu_host_page_size) {
626 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
627 prot |= page_get_flags(addr);
629 end = real_end;
631 if (prot != 0)
632 real_start += qemu_host_page_size;
634 if (end < real_end) {
635 prot = 0;
636 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
637 prot |= page_get_flags(addr);
639 if (prot != 0)
640 real_end -= qemu_host_page_size;
643 ret = 0;
644 /* unmap what we can */
645 if (real_start < real_end) {
646 ret = munmap(g2h(real_start), real_end - real_start);
649 if (ret == 0)
650 page_set_flags(start, start + len, 0);
651 mmap_unlock();
652 return ret;
655 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
656 abi_ulong new_size, unsigned long flags,
657 abi_ulong new_addr)
659 int prot;
660 void *host_addr;
662 mmap_lock();
664 if (flags & MREMAP_FIXED)
665 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
666 old_size, new_size,
667 flags,
668 new_addr);
669 else if (flags & MREMAP_MAYMOVE) {
670 abi_ulong mmap_start;
672 mmap_start = mmap_find_vma(0, new_size);
674 if (mmap_start == -1) {
675 errno = ENOMEM;
676 host_addr = MAP_FAILED;
677 } else
678 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
679 old_size, new_size,
680 flags | MREMAP_FIXED,
681 g2h(mmap_start));
682 } else {
683 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
684 /* Check if address fits target address space */
685 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
686 /* Revert mremap() changes */
687 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
688 errno = ENOMEM;
689 host_addr = MAP_FAILED;
693 if (host_addr == MAP_FAILED) {
694 new_addr = -1;
695 } else {
696 new_addr = h2g(host_addr);
697 prot = page_get_flags(old_addr);
698 page_set_flags(old_addr, old_addr + old_size, 0);
699 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
701 mmap_unlock();
702 return new_addr;
705 int target_msync(abi_ulong start, abi_ulong len, int flags)
707 abi_ulong end;
709 if (start & ~TARGET_PAGE_MASK)
710 return -EINVAL;
711 len = TARGET_PAGE_ALIGN(len);
712 end = start + len;
713 if (end < start)
714 return -EINVAL;
715 if (end == start)
716 return 0;
718 start &= qemu_host_page_mask;
719 return msync(g2h(start), end - start, flags);