MAINTAINERS: Add the CAN documentation file to the CAN section
[qemu/kevin.git] / bsd-user / mmap.c
blob3ef11b28079b2254c090fa1701cbb11143a2a1b2
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 - 2008 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "qemu.h"
23 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
24 static __thread int mmap_lock_count;
26 void mmap_lock(void)
28 if (mmap_lock_count++ == 0) {
29 pthread_mutex_lock(&mmap_mutex);
33 void mmap_unlock(void)
35 assert(mmap_lock_count > 0);
36 if (--mmap_lock_count == 0) {
37 pthread_mutex_unlock(&mmap_mutex);
41 bool have_mmap_lock(void)
43 return mmap_lock_count > 0 ? true : false;
46 /* Grab lock to make sure things are in a consistent state after fork(). */
47 void mmap_fork_start(void)
49 if (mmap_lock_count)
50 abort();
51 pthread_mutex_lock(&mmap_mutex);
54 void mmap_fork_end(int child)
56 if (child)
57 pthread_mutex_init(&mmap_mutex, NULL);
58 else
59 pthread_mutex_unlock(&mmap_mutex);
62 /* NOTE: all the constants are the HOST ones, but addresses are target. */
63 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
65 abi_ulong end, host_start, host_end, addr;
66 int prot1, ret;
68 qemu_log_mask(CPU_LOG_PAGE, "mprotect: start=0x" TARGET_ABI_FMT_lx
69 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
70 prot & PROT_READ ? 'r' : '-',
71 prot & PROT_WRITE ? 'w' : '-',
72 prot & PROT_EXEC ? 'x' : '-');
73 if ((start & ~TARGET_PAGE_MASK) != 0)
74 return -EINVAL;
75 len = TARGET_PAGE_ALIGN(len);
76 end = start + len;
77 if (end < start)
78 return -EINVAL;
79 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
80 if (len == 0)
81 return 0;
83 mmap_lock();
84 host_start = start & qemu_host_page_mask;
85 host_end = HOST_PAGE_ALIGN(end);
86 if (start > host_start) {
87 /* handle host page containing start */
88 prot1 = prot;
89 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
90 prot1 |= page_get_flags(addr);
92 if (host_end == host_start + qemu_host_page_size) {
93 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
94 prot1 |= page_get_flags(addr);
96 end = host_end;
98 ret = mprotect(g2h_untagged(host_start),
99 qemu_host_page_size, prot1 & PAGE_BITS);
100 if (ret != 0)
101 goto error;
102 host_start += qemu_host_page_size;
104 if (end < host_end) {
105 prot1 = prot;
106 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
107 prot1 |= page_get_flags(addr);
109 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
110 qemu_host_page_size, prot1 & PAGE_BITS);
111 if (ret != 0)
112 goto error;
113 host_end -= qemu_host_page_size;
116 /* handle the pages in the middle */
117 if (host_start < host_end) {
118 ret = mprotect(g2h_untagged(host_start), host_end - host_start, prot);
119 if (ret != 0)
120 goto error;
122 page_set_flags(start, start + len - 1, prot | PAGE_VALID);
123 mmap_unlock();
124 return 0;
125 error:
126 mmap_unlock();
127 return ret;
131 * map an incomplete host page
133 * mmap_frag can be called with a valid fd, if flags doesn't contain one of
134 * MAP_ANON, MAP_STACK, MAP_GUARD. If we need to map a page in those cases, we
135 * pass fd == -1. However, if flags contains MAP_GUARD then MAP_ANON cannot be
136 * added.
138 * * If fd is valid (not -1) we want to map the pages with MAP_ANON.
139 * * If flags contains MAP_GUARD we don't want to add MAP_ANON because it
140 * will be rejected. See kern_mmap's enforcing of constraints for MAP_GUARD
141 * in sys/vm/vm_mmap.c.
142 * * If flags contains MAP_ANON it doesn't matter if we add it or not.
143 * * If flags contains MAP_STACK, mmap adds MAP_ANON when called so doesn't
144 * matter if we add it or not either. See enforcing of constraints for
145 * MAP_STACK in kern_mmap.
147 * Don't add MAP_ANON for the flags that use fd == -1 without specifying the
148 * flags directly, with the assumption that future flags that require fd == -1
149 * will also not require MAP_ANON.
151 static int mmap_frag(abi_ulong real_start,
152 abi_ulong start, abi_ulong end,
153 int prot, int flags, int fd, abi_ulong offset)
155 abi_ulong real_end, addr;
156 void *host_start;
157 int prot1, prot_new;
159 real_end = real_start + qemu_host_page_size;
160 host_start = g2h_untagged(real_start);
162 /* get the protection of the target pages outside the mapping */
163 prot1 = 0;
164 for (addr = real_start; addr < real_end; addr++) {
165 if (addr < start || addr >= end)
166 prot1 |= page_get_flags(addr);
169 if (prot1 == 0) {
170 /* no page was there, so we allocate one. See also above. */
171 void *p = mmap(host_start, qemu_host_page_size, prot,
172 flags | ((fd != -1) ? MAP_ANON : 0), -1, 0);
173 if (p == MAP_FAILED)
174 return -1;
175 prot1 = prot;
177 prot1 &= PAGE_BITS;
179 prot_new = prot | prot1;
180 if (fd != -1) {
181 /* msync() won't work here, so we return an error if write is
182 possible while it is a shared mapping */
183 if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED &&
184 (prot & PROT_WRITE))
185 return -1;
187 /* adjust protection to be able to read */
188 if (!(prot1 & PROT_WRITE))
189 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
191 /* read the corresponding file data */
192 if (pread(fd, g2h_untagged(start), end - start, offset) == -1) {
193 return -1;
196 /* put final protection */
197 if (prot_new != (prot1 | PROT_WRITE))
198 mprotect(host_start, qemu_host_page_size, prot_new);
199 } else {
200 if (prot_new != prot1) {
201 mprotect(host_start, qemu_host_page_size, prot_new);
203 if (prot_new & PROT_WRITE) {
204 memset(g2h_untagged(start), 0, end - start);
207 return 0;
210 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
211 # define TASK_UNMAPPED_BASE (1ul << 38)
212 #else
213 # define TASK_UNMAPPED_BASE 0x40000000
214 #endif
215 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
218 * Subroutine of mmap_find_vma, used when we have pre-allocated a chunk of guest
219 * address space.
221 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
222 abi_ulong alignment)
224 abi_ulong ret;
226 ret = page_find_range_empty(start, reserved_va, size, alignment);
227 if (ret == -1 && start > TARGET_PAGE_SIZE) {
228 /* Restart at the beginning of the address space. */
229 ret = page_find_range_empty(TARGET_PAGE_SIZE, start - 1,
230 size, alignment);
233 return ret;
237 * Find and reserve a free memory area of size 'size'. The search
238 * starts at 'start'.
239 * It must be called with mmap_lock() held.
240 * Return -1 if error.
242 static abi_ulong mmap_find_vma_aligned(abi_ulong start, abi_ulong size,
243 abi_ulong alignment)
245 void *ptr, *prev;
246 abi_ulong addr;
247 int flags;
248 int wrapped, repeat;
250 /* If 'start' == 0, then a default start address is used. */
251 if (start == 0) {
252 start = mmap_next_start;
253 } else {
254 start &= qemu_host_page_mask;
257 size = HOST_PAGE_ALIGN(size);
259 if (reserved_va) {
260 return mmap_find_vma_reserved(start, size,
261 (alignment != 0 ? 1 << alignment :
262 MAX(qemu_host_page_size, TARGET_PAGE_SIZE)));
265 addr = start;
266 wrapped = repeat = 0;
267 prev = 0;
268 flags = MAP_ANON | MAP_PRIVATE;
269 if (alignment != 0) {
270 flags |= MAP_ALIGNED(alignment);
273 for (;; prev = ptr) {
275 * Reserve needed memory area to avoid a race.
276 * It should be discarded using:
277 * - mmap() with MAP_FIXED flag
278 * - mremap() with MREMAP_FIXED flag
279 * - shmat() with SHM_REMAP flag
281 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
282 flags, -1, 0);
284 /* ENOMEM, if host address space has no memory */
285 if (ptr == MAP_FAILED) {
286 return (abi_ulong)-1;
290 * Count the number of sequential returns of the same address.
291 * This is used to modify the search algorithm below.
293 repeat = (ptr == prev ? repeat + 1 : 0);
295 if (h2g_valid(ptr + size - 1)) {
296 addr = h2g(ptr);
298 if ((addr & ~TARGET_PAGE_MASK) == 0) {
299 /* Success. */
300 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
301 mmap_next_start = addr + size;
303 return addr;
306 /* The address is not properly aligned for the target. */
307 switch (repeat) {
308 case 0:
310 * Assume the result that the kernel gave us is the
311 * first with enough free space, so start again at the
312 * next higher target page.
314 addr = TARGET_PAGE_ALIGN(addr);
315 break;
316 case 1:
318 * Sometimes the kernel decides to perform the allocation
319 * at the top end of memory instead.
321 addr &= TARGET_PAGE_MASK;
322 break;
323 case 2:
324 /* Start over at low memory. */
325 addr = 0;
326 break;
327 default:
328 /* Fail. This unaligned block must the last. */
329 addr = -1;
330 break;
332 } else {
334 * Since the result the kernel gave didn't fit, start
335 * again at low memory. If any repetition, fail.
337 addr = (repeat ? -1 : 0);
340 /* Unmap and try again. */
341 munmap(ptr, size);
343 /* ENOMEM if we checked the whole of the target address space. */
344 if (addr == (abi_ulong)-1) {
345 return (abi_ulong)-1;
346 } else if (addr == 0) {
347 if (wrapped) {
348 return (abi_ulong)-1;
350 wrapped = 1;
352 * Don't actually use 0 when wrapping, instead indicate
353 * that we'd truly like an allocation in low memory.
355 addr = TARGET_PAGE_SIZE;
356 } else if (wrapped && addr >= start) {
357 return (abi_ulong)-1;
362 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
364 return mmap_find_vma_aligned(start, size, 0);
367 /* NOTE: all the constants are the HOST ones */
368 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
369 int flags, int fd, off_t offset)
371 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
373 mmap_lock();
374 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
375 qemu_log("mmap: start=0x" TARGET_ABI_FMT_lx
376 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
377 start, len,
378 prot & PROT_READ ? 'r' : '-',
379 prot & PROT_WRITE ? 'w' : '-',
380 prot & PROT_EXEC ? 'x' : '-');
381 if (flags & MAP_ALIGNMENT_MASK) {
382 qemu_log("MAP_ALIGNED(%u) ",
383 (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT);
385 if (flags & MAP_GUARD) {
386 qemu_log("MAP_GUARD ");
388 if (flags & MAP_FIXED) {
389 qemu_log("MAP_FIXED ");
391 if (flags & MAP_ANON) {
392 qemu_log("MAP_ANON ");
394 if (flags & MAP_EXCL) {
395 qemu_log("MAP_EXCL ");
397 if (flags & MAP_PRIVATE) {
398 qemu_log("MAP_PRIVATE ");
400 if (flags & MAP_SHARED) {
401 qemu_log("MAP_SHARED ");
403 if (flags & MAP_NOCORE) {
404 qemu_log("MAP_NOCORE ");
406 if (flags & MAP_STACK) {
407 qemu_log("MAP_STACK ");
409 qemu_log("fd=%d offset=0x%lx\n", fd, offset);
412 if ((flags & MAP_ANON) && fd != -1) {
413 errno = EINVAL;
414 goto fail;
416 if (flags & MAP_STACK) {
417 if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) !=
418 (PROT_READ | PROT_WRITE))) {
419 errno = EINVAL;
420 goto fail;
423 if ((flags & MAP_GUARD) && (prot != PROT_NONE || fd != -1 ||
424 offset != 0 || (flags & (MAP_SHARED | MAP_PRIVATE |
425 /* MAP_PREFAULT | */ /* MAP_PREFAULT not in mman.h */
426 MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0)) {
427 errno = EINVAL;
428 goto fail;
431 if (offset & ~TARGET_PAGE_MASK) {
432 errno = EINVAL;
433 goto fail;
436 if (len == 0) {
437 errno = EINVAL;
438 goto fail;
441 /* Check for overflows */
442 len = TARGET_PAGE_ALIGN(len);
443 if (len == 0) {
444 errno = ENOMEM;
445 goto fail;
448 real_start = start & qemu_host_page_mask;
449 host_offset = offset & qemu_host_page_mask;
452 * If the user is asking for the kernel to find a location, do that
453 * before we truncate the length for mapping files below.
455 if (!(flags & MAP_FIXED)) {
456 host_len = len + offset - host_offset;
457 host_len = HOST_PAGE_ALIGN(host_len);
458 if ((flags & MAP_ALIGNMENT_MASK) != 0)
459 start = mmap_find_vma_aligned(real_start, host_len,
460 (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT);
461 else
462 start = mmap_find_vma(real_start, host_len);
463 if (start == (abi_ulong)-1) {
464 errno = ENOMEM;
465 goto fail;
470 * When mapping files into a memory area larger than the file, accesses
471 * to pages beyond the file size will cause a SIGBUS.
473 * For example, if mmaping a file of 100 bytes on a host with 4K pages
474 * emulating a target with 8K pages, the target expects to be able to
475 * access the first 8K. But the host will trap us on any access beyond
476 * 4K.
478 * When emulating a target with a larger page-size than the hosts, we
479 * may need to truncate file maps at EOF and add extra anonymous pages
480 * up to the targets page boundary.
483 if ((qemu_real_host_page_size() < qemu_host_page_size) && fd != -1) {
484 struct stat sb;
486 if (fstat(fd, &sb) == -1) {
487 goto fail;
490 /* Are we trying to create a map beyond EOF?. */
491 if (offset + len > sb.st_size) {
493 * If so, truncate the file map at eof aligned with
494 * the hosts real pagesize. Additional anonymous maps
495 * will be created beyond EOF.
497 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
501 if (!(flags & MAP_FIXED)) {
502 unsigned long host_start;
503 void *p;
505 host_len = len + offset - host_offset;
506 host_len = HOST_PAGE_ALIGN(host_len);
509 * Note: we prefer to control the mapping address. It is
510 * especially important if qemu_host_page_size >
511 * qemu_real_host_page_size
513 p = mmap(g2h_untagged(start), host_len, prot,
514 flags | MAP_FIXED | ((fd != -1) ? MAP_ANON : 0), -1, 0);
515 if (p == MAP_FAILED)
516 goto fail;
517 /* update start so that it points to the file position at 'offset' */
518 host_start = (unsigned long)p;
519 if (fd != -1) {
520 p = mmap(g2h_untagged(start), len, prot,
521 flags | MAP_FIXED, fd, host_offset);
522 if (p == MAP_FAILED) {
523 munmap(g2h_untagged(start), host_len);
524 goto fail;
526 host_start += offset - host_offset;
528 start = h2g(host_start);
529 } else {
530 if (start & ~TARGET_PAGE_MASK) {
531 errno = EINVAL;
532 goto fail;
534 end = start + len;
535 real_end = HOST_PAGE_ALIGN(end);
538 * Test if requested memory area fits target address space
539 * It can fail only on 64-bit host with 32-bit target.
540 * On any other target/host host mmap() handles this error correctly.
542 if (!guest_range_valid_untagged(start, len)) {
543 errno = EINVAL;
544 goto fail;
548 * worst case: we cannot map the file because the offset is not
549 * aligned, so we read it
551 if (fd != -1 &&
552 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
554 * msync() won't work here, so we return an error if write is
555 * possible while it is a shared mapping
557 if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED &&
558 (prot & PROT_WRITE)) {
559 errno = EINVAL;
560 goto fail;
562 retaddr = target_mmap(start, len, prot | PROT_WRITE,
563 MAP_FIXED | MAP_PRIVATE | MAP_ANON,
564 -1, 0);
565 if (retaddr == -1)
566 goto fail;
567 if (pread(fd, g2h_untagged(start), len, offset) == -1) {
568 goto fail;
570 if (!(prot & PROT_WRITE)) {
571 ret = target_mprotect(start, len, prot);
572 assert(ret == 0);
574 goto the_end;
577 /* Reject the mapping if any page within the range is mapped */
578 if ((flags & MAP_EXCL) && !page_check_range_empty(start, end - 1)) {
579 errno = EINVAL;
580 goto fail;
583 /* handle the start of the mapping */
584 if (start > real_start) {
585 if (real_end == real_start + qemu_host_page_size) {
586 /* one single host page */
587 ret = mmap_frag(real_start, start, end,
588 prot, flags, fd, offset);
589 if (ret == -1)
590 goto fail;
591 goto the_end1;
593 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
594 prot, flags, fd, offset);
595 if (ret == -1)
596 goto fail;
597 real_start += qemu_host_page_size;
599 /* handle the end of the mapping */
600 if (end < real_end) {
601 ret = mmap_frag(real_end - qemu_host_page_size,
602 real_end - qemu_host_page_size, end,
603 prot, flags, fd,
604 offset + real_end - qemu_host_page_size - start);
605 if (ret == -1)
606 goto fail;
607 real_end -= qemu_host_page_size;
610 /* map the middle (easier) */
611 if (real_start < real_end) {
612 void *p;
613 unsigned long offset1;
614 if (flags & MAP_ANON)
615 offset1 = 0;
616 else
617 offset1 = offset + real_start - start;
618 p = mmap(g2h_untagged(real_start), real_end - real_start,
619 prot, flags, fd, offset1);
620 if (p == MAP_FAILED)
621 goto fail;
624 the_end1:
625 page_set_flags(start, start + len - 1, prot | PAGE_VALID);
626 the_end:
627 #ifdef DEBUG_MMAP
628 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
629 page_dump(stdout);
630 printf("\n");
631 #endif
632 mmap_unlock();
633 return start;
634 fail:
635 mmap_unlock();
636 return -1;
639 void mmap_reserve(abi_ulong start, abi_ulong size)
641 abi_ulong real_start;
642 abi_ulong real_end;
643 abi_ulong addr;
644 abi_ulong end;
645 int prot;
647 real_start = start & qemu_host_page_mask;
648 real_end = HOST_PAGE_ALIGN(start + size);
649 end = start + size;
650 if (start > real_start) {
651 /* handle host page containing start */
652 prot = 0;
653 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
654 prot |= page_get_flags(addr);
656 if (real_end == real_start + qemu_host_page_size) {
657 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
658 prot |= page_get_flags(addr);
660 end = real_end;
662 if (prot != 0) {
663 real_start += qemu_host_page_size;
666 if (end < real_end) {
667 prot = 0;
668 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
669 prot |= page_get_flags(addr);
671 if (prot != 0) {
672 real_end -= qemu_host_page_size;
675 if (real_start != real_end) {
676 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
677 MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
681 int target_munmap(abi_ulong start, abi_ulong len)
683 abi_ulong end, real_start, real_end, addr;
684 int prot, ret;
686 #ifdef DEBUG_MMAP
687 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
688 TARGET_ABI_FMT_lx "\n",
689 start, len);
690 #endif
691 if (start & ~TARGET_PAGE_MASK)
692 return -EINVAL;
693 len = TARGET_PAGE_ALIGN(len);
694 if (len == 0)
695 return -EINVAL;
696 mmap_lock();
697 end = start + len;
698 real_start = start & qemu_host_page_mask;
699 real_end = HOST_PAGE_ALIGN(end);
701 if (start > real_start) {
702 /* handle host page containing start */
703 prot = 0;
704 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
705 prot |= page_get_flags(addr);
707 if (real_end == real_start + qemu_host_page_size) {
708 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
709 prot |= page_get_flags(addr);
711 end = real_end;
713 if (prot != 0)
714 real_start += qemu_host_page_size;
716 if (end < real_end) {
717 prot = 0;
718 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
719 prot |= page_get_flags(addr);
721 if (prot != 0)
722 real_end -= qemu_host_page_size;
725 ret = 0;
726 /* unmap what we can */
727 if (real_start < real_end) {
728 if (reserved_va) {
729 mmap_reserve(real_start, real_end - real_start);
730 } else {
731 ret = munmap(g2h_untagged(real_start), real_end - real_start);
735 if (ret == 0) {
736 page_set_flags(start, start + len - 1, 0);
738 mmap_unlock();
739 return ret;
742 int target_msync(abi_ulong start, abi_ulong len, int flags)
744 abi_ulong end;
746 if (start & ~TARGET_PAGE_MASK)
747 return -EINVAL;
748 len = TARGET_PAGE_ALIGN(len);
749 end = start + len;
750 if (end < start)
751 return -EINVAL;
752 if (end == start)
753 return 0;
755 start &= qemu_host_page_mask;
756 return msync(g2h_untagged(start), end - start, flags);