Fix -device help and documentation
[qemu/aliguori-queue.git] / linux-user / mmap.c
blob9c062e7078b8d63651ca54f20bd10cffd5594a96
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <stdarg.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <linux/mman.h>
29 #include <linux/unistd.h>
31 #include "qemu.h"
32 #include "qemu-common.h"
34 //#define DEBUG_MMAP
36 #if defined(CONFIG_USE_NPTL)
37 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
38 static __thread int mmap_lock_count;
40 void mmap_lock(void)
42 if (mmap_lock_count++ == 0) {
43 pthread_mutex_lock(&mmap_mutex);
47 void mmap_unlock(void)
49 if (--mmap_lock_count == 0) {
50 pthread_mutex_unlock(&mmap_mutex);
54 /* Grab lock to make sure things are in a consistent state after fork(). */
55 void mmap_fork_start(void)
57 if (mmap_lock_count)
58 abort();
59 pthread_mutex_lock(&mmap_mutex);
62 void mmap_fork_end(int child)
64 if (child)
65 pthread_mutex_init(&mmap_mutex, NULL);
66 else
67 pthread_mutex_unlock(&mmap_mutex);
69 #else
70 /* We aren't threadsafe to start with, so no need to worry about locking. */
71 void mmap_lock(void)
75 void mmap_unlock(void)
78 #endif
80 void *qemu_vmalloc(size_t size)
82 void *p;
84 mmap_lock();
85 /* Use map and mark the pages as used. */
86 p = mmap(NULL, size, PROT_READ | PROT_WRITE,
87 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
88 mmap_unlock();
89 return p;
92 void *qemu_malloc(size_t size)
94 char * p;
95 size += 16;
96 p = qemu_vmalloc(size);
97 *(size_t *)p = size;
98 return p + 16;
101 /* We use map, which is always zero initialized. */
102 void * qemu_mallocz(size_t size)
104 return qemu_malloc(size);
107 void qemu_free(void *ptr)
109 /* FIXME: We should unmark the reserved pages here. However this gets
110 complicated when one target page spans multiple host pages, so we
111 don't bother. */
112 size_t *p;
113 p = (size_t *)((char *)ptr - 16);
114 munmap(p, *p);
117 void *qemu_realloc(void *ptr, size_t size)
119 size_t old_size, copy;
120 void *new_ptr;
122 if (!ptr)
123 return qemu_malloc(size);
124 old_size = *(size_t *)((char *)ptr - 16);
125 copy = old_size < size ? old_size : size;
126 new_ptr = qemu_malloc(size);
127 memcpy(new_ptr, ptr, copy);
128 qemu_free(ptr);
129 return new_ptr;
132 /* NOTE: all the constants are the HOST ones, but addresses are target. */
133 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
135 abi_ulong end, host_start, host_end, addr;
136 int prot1, ret;
138 #ifdef DEBUG_MMAP
139 printf("mprotect: start=0x" TARGET_ABI_FMT_lx
140 "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
141 prot & PROT_READ ? 'r' : '-',
142 prot & PROT_WRITE ? 'w' : '-',
143 prot & PROT_EXEC ? 'x' : '-');
144 #endif
146 if ((start & ~TARGET_PAGE_MASK) != 0)
147 return -EINVAL;
148 len = TARGET_PAGE_ALIGN(len);
149 end = start + len;
150 if (end < start)
151 return -EINVAL;
152 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
153 if (len == 0)
154 return 0;
156 mmap_lock();
157 host_start = start & qemu_host_page_mask;
158 host_end = HOST_PAGE_ALIGN(end);
159 if (start > host_start) {
160 /* handle host page containing start */
161 prot1 = prot;
162 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
163 prot1 |= page_get_flags(addr);
165 if (host_end == host_start + qemu_host_page_size) {
166 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
167 prot1 |= page_get_flags(addr);
169 end = host_end;
171 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
172 if (ret != 0)
173 goto error;
174 host_start += qemu_host_page_size;
176 if (end < host_end) {
177 prot1 = prot;
178 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
179 prot1 |= page_get_flags(addr);
181 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
182 prot1 & PAGE_BITS);
183 if (ret != 0)
184 goto error;
185 host_end -= qemu_host_page_size;
188 /* handle the pages in the middle */
189 if (host_start < host_end) {
190 ret = mprotect(g2h(host_start), host_end - host_start, prot);
191 if (ret != 0)
192 goto error;
194 page_set_flags(start, start + len, prot | PAGE_VALID);
195 mmap_unlock();
196 return 0;
197 error:
198 mmap_unlock();
199 return ret;
202 /* map an incomplete host page */
203 static int mmap_frag(abi_ulong real_start,
204 abi_ulong start, abi_ulong end,
205 int prot, int flags, int fd, abi_ulong offset)
207 abi_ulong real_end, addr;
208 void *host_start;
209 int prot1, prot_new;
211 real_end = real_start + qemu_host_page_size;
212 host_start = g2h(real_start);
214 /* get the protection of the target pages outside the mapping */
215 prot1 = 0;
216 for(addr = real_start; addr < real_end; addr++) {
217 if (addr < start || addr >= end)
218 prot1 |= page_get_flags(addr);
221 if (prot1 == 0) {
222 /* no page was there, so we allocate one */
223 void *p = mmap(host_start, qemu_host_page_size, prot,
224 flags | MAP_ANONYMOUS, -1, 0);
225 if (p == MAP_FAILED)
226 return -1;
227 prot1 = prot;
229 prot1 &= PAGE_BITS;
231 prot_new = prot | prot1;
232 if (!(flags & MAP_ANONYMOUS)) {
233 /* msync() won't work here, so we return an error if write is
234 possible while it is a shared mapping */
235 if ((flags & MAP_TYPE) == MAP_SHARED &&
236 (prot & PROT_WRITE))
237 return -1;
239 /* adjust protection to be able to read */
240 if (!(prot1 & PROT_WRITE))
241 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
243 /* read the corresponding file data */
244 if (pread(fd, g2h(start), end - start, offset) == -1)
245 return -1;
247 /* put final protection */
248 if (prot_new != (prot1 | PROT_WRITE))
249 mprotect(host_start, qemu_host_page_size, prot_new);
250 } else {
251 /* just update the protection */
252 if (prot_new != prot1) {
253 mprotect(host_start, qemu_host_page_size, prot_new);
256 return 0;
259 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
260 # define TASK_UNMAPPED_BASE (1ul << 38)
261 #elif defined(__CYGWIN__)
262 /* Cygwin doesn't have a whole lot of address space. */
263 # define TASK_UNMAPPED_BASE 0x18000000
264 #else
265 # define TASK_UNMAPPED_BASE 0x40000000
266 #endif
267 static abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
269 unsigned long last_brk;
272 * Find and reserve a free memory area of size 'size'. The search
273 * starts at 'start'.
274 * It must be called with mmap_lock() held.
275 * Return -1 if error.
277 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
279 void *ptr, *prev;
280 abi_ulong addr;
281 int wrapped, repeat;
283 /* If 'start' == 0, then a default start address is used. */
284 if (start == 0) {
285 start = mmap_next_start;
286 } else {
287 start &= qemu_host_page_mask;
290 size = HOST_PAGE_ALIGN(size);
292 addr = start;
293 wrapped = repeat = 0;
294 prev = 0;
296 for (;; prev = ptr) {
298 * Reserve needed memory area to avoid a race.
299 * It should be discarded using:
300 * - mmap() with MAP_FIXED flag
301 * - mremap() with MREMAP_FIXED flag
302 * - shmat() with SHM_REMAP flag
304 ptr = mmap(g2h(addr), size, PROT_NONE,
305 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
307 /* ENOMEM, if host address space has no memory */
308 if (ptr == MAP_FAILED) {
309 return (abi_ulong)-1;
312 /* Count the number of sequential returns of the same address.
313 This is used to modify the search algorithm below. */
314 repeat = (ptr == prev ? repeat + 1 : 0);
316 if (h2g_valid(ptr + size - 1)) {
317 addr = h2g(ptr);
319 if ((addr & ~TARGET_PAGE_MASK) == 0) {
320 /* Success. */
321 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
322 mmap_next_start = addr + size;
324 return addr;
327 /* The address is not properly aligned for the target. */
328 switch (repeat) {
329 case 0:
330 /* Assume the result that the kernel gave us is the
331 first with enough free space, so start again at the
332 next higher target page. */
333 addr = TARGET_PAGE_ALIGN(addr);
334 break;
335 case 1:
336 /* Sometimes the kernel decides to perform the allocation
337 at the top end of memory instead. */
338 addr &= TARGET_PAGE_MASK;
339 break;
340 case 2:
341 /* Start over at low memory. */
342 addr = 0;
343 break;
344 default:
345 /* Fail. This unaligned block must the last. */
346 addr = -1;
347 break;
349 } else {
350 /* Since the result the kernel gave didn't fit, start
351 again at low memory. If any repetition, fail. */
352 addr = (repeat ? -1 : 0);
355 /* Unmap and try again. */
356 munmap(ptr, size);
358 /* ENOMEM if we checked the whole of the target address space. */
359 if (addr == -1ul) {
360 return (abi_ulong)-1;
361 } else if (addr == 0) {
362 if (wrapped) {
363 return (abi_ulong)-1;
365 wrapped = 1;
366 /* Don't actually use 0 when wrapping, instead indicate
367 that we'd truely like an allocation in low memory. */
368 addr = (mmap_min_addr > TARGET_PAGE_SIZE
369 ? TARGET_PAGE_ALIGN(mmap_min_addr)
370 : TARGET_PAGE_SIZE);
371 } else if (wrapped && addr >= start) {
372 return (abi_ulong)-1;
377 /* NOTE: all the constants are the HOST ones */
378 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
379 int flags, int fd, abi_ulong offset)
381 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
382 unsigned long host_start;
384 mmap_lock();
385 #ifdef DEBUG_MMAP
387 printf("mmap: start=0x" TARGET_ABI_FMT_lx
388 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
389 start, len,
390 prot & PROT_READ ? 'r' : '-',
391 prot & PROT_WRITE ? 'w' : '-',
392 prot & PROT_EXEC ? 'x' : '-');
393 if (flags & MAP_FIXED)
394 printf("MAP_FIXED ");
395 if (flags & MAP_ANONYMOUS)
396 printf("MAP_ANON ");
397 switch(flags & MAP_TYPE) {
398 case MAP_PRIVATE:
399 printf("MAP_PRIVATE ");
400 break;
401 case MAP_SHARED:
402 printf("MAP_SHARED ");
403 break;
404 default:
405 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
406 break;
408 printf("fd=%d offset=" TARGET_ABI_FMT_lx "\n", fd, offset);
410 #endif
412 if (offset & ~TARGET_PAGE_MASK) {
413 errno = EINVAL;
414 goto fail;
417 len = TARGET_PAGE_ALIGN(len);
418 if (len == 0)
419 goto the_end;
420 real_start = start & qemu_host_page_mask;
422 /* When mapping files into a memory area larger than the file, accesses
423 to pages beyond the file size will cause a SIGBUS.
425 For example, if mmaping a file of 100 bytes on a host with 4K pages
426 emulating a target with 8K pages, the target expects to be able to
427 access the first 8K. But the host will trap us on any access beyond
428 4K.
430 When emulating a target with a larger page-size than the hosts, we
431 may need to truncate file maps at EOF and add extra anonymous pages
432 up to the targets page boundary. */
434 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
435 && !(flags & MAP_ANONYMOUS)) {
436 struct stat sb;
438 if (fstat (fd, &sb) == -1)
439 goto fail;
441 /* Are we trying to create a map beyond EOF?. */
442 if (offset + len > sb.st_size) {
443 /* If so, truncate the file map at eof aligned with
444 the hosts real pagesize. Additional anonymous maps
445 will be created beyond EOF. */
446 len = (sb.st_size - offset);
447 len += qemu_real_host_page_size - 1;
448 len &= ~(qemu_real_host_page_size - 1);
452 if (!(flags & MAP_FIXED)) {
453 abi_ulong mmap_start;
454 void *p;
455 host_offset = offset & qemu_host_page_mask;
456 host_len = len + offset - host_offset;
457 host_len = HOST_PAGE_ALIGN(host_len);
458 mmap_start = mmap_find_vma(real_start, host_len);
459 if (mmap_start == (abi_ulong)-1) {
460 errno = ENOMEM;
461 goto fail;
463 /* Note: we prefer to control the mapping address. It is
464 especially important if qemu_host_page_size >
465 qemu_real_host_page_size */
466 p = mmap(g2h(mmap_start),
467 host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
468 if (p == MAP_FAILED)
469 goto fail;
470 /* update start so that it points to the file position at 'offset' */
471 host_start = (unsigned long)p;
472 if (!(flags & MAP_ANONYMOUS)) {
473 p = mmap(g2h(mmap_start), len, prot,
474 flags | MAP_FIXED, fd, host_offset);
475 host_start += offset - host_offset;
477 start = h2g(host_start);
478 } else {
479 if (start & ~TARGET_PAGE_MASK) {
480 errno = EINVAL;
481 goto fail;
483 end = start + len;
484 real_end = HOST_PAGE_ALIGN(end);
487 * Test if requested memory area fits target address space
488 * It can fail only on 64-bit host with 32-bit target.
489 * On any other target/host host mmap() handles this error correctly.
491 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
492 errno = EINVAL;
493 goto fail;
496 /* worst case: we cannot map the file because the offset is not
497 aligned, so we read it */
498 if (!(flags & MAP_ANONYMOUS) &&
499 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
500 /* msync() won't work here, so we return an error if write is
501 possible while it is a shared mapping */
502 if ((flags & MAP_TYPE) == MAP_SHARED &&
503 (prot & PROT_WRITE)) {
504 errno = EINVAL;
505 goto fail;
507 retaddr = target_mmap(start, len, prot | PROT_WRITE,
508 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
509 -1, 0);
510 if (retaddr == -1)
511 goto fail;
512 if (pread(fd, g2h(start), len, offset) == -1)
513 goto fail;
514 if (!(prot & PROT_WRITE)) {
515 ret = target_mprotect(start, len, prot);
516 if (ret != 0) {
517 start = ret;
518 goto the_end;
521 goto the_end;
524 /* handle the start of the mapping */
525 if (start > real_start) {
526 if (real_end == real_start + qemu_host_page_size) {
527 /* one single host page */
528 ret = mmap_frag(real_start, start, end,
529 prot, flags, fd, offset);
530 if (ret == -1)
531 goto fail;
532 goto the_end1;
534 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
535 prot, flags, fd, offset);
536 if (ret == -1)
537 goto fail;
538 real_start += qemu_host_page_size;
540 /* handle the end of the mapping */
541 if (end < real_end) {
542 ret = mmap_frag(real_end - qemu_host_page_size,
543 real_end - qemu_host_page_size, real_end,
544 prot, flags, fd,
545 offset + real_end - qemu_host_page_size - start);
546 if (ret == -1)
547 goto fail;
548 real_end -= qemu_host_page_size;
551 /* map the middle (easier) */
552 if (real_start < real_end) {
553 void *p;
554 unsigned long offset1;
555 if (flags & MAP_ANONYMOUS)
556 offset1 = 0;
557 else
558 offset1 = offset + real_start - start;
559 p = mmap(g2h(real_start), real_end - real_start,
560 prot, flags, fd, offset1);
561 if (p == MAP_FAILED)
562 goto fail;
565 the_end1:
566 page_set_flags(start, start + len, prot | PAGE_VALID);
567 the_end:
568 #ifdef DEBUG_MMAP
569 printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
570 page_dump(stdout);
571 printf("\n");
572 #endif
573 mmap_unlock();
574 return start;
575 fail:
576 mmap_unlock();
577 return -1;
580 int target_munmap(abi_ulong start, abi_ulong len)
582 abi_ulong end, real_start, real_end, addr;
583 int prot, ret;
585 #ifdef DEBUG_MMAP
586 printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
587 TARGET_ABI_FMT_lx "\n",
588 start, len);
589 #endif
590 if (start & ~TARGET_PAGE_MASK)
591 return -EINVAL;
592 len = TARGET_PAGE_ALIGN(len);
593 if (len == 0)
594 return -EINVAL;
595 mmap_lock();
596 end = start + len;
597 real_start = start & qemu_host_page_mask;
598 real_end = HOST_PAGE_ALIGN(end);
600 if (start > real_start) {
601 /* handle host page containing start */
602 prot = 0;
603 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
604 prot |= page_get_flags(addr);
606 if (real_end == real_start + qemu_host_page_size) {
607 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
608 prot |= page_get_flags(addr);
610 end = real_end;
612 if (prot != 0)
613 real_start += qemu_host_page_size;
615 if (end < real_end) {
616 prot = 0;
617 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
618 prot |= page_get_flags(addr);
620 if (prot != 0)
621 real_end -= qemu_host_page_size;
624 ret = 0;
625 /* unmap what we can */
626 if (real_start < real_end) {
627 ret = munmap(g2h(real_start), real_end - real_start);
630 if (ret == 0)
631 page_set_flags(start, start + len, 0);
632 mmap_unlock();
633 return ret;
636 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
637 abi_ulong new_size, unsigned long flags,
638 abi_ulong new_addr)
640 int prot;
641 void *host_addr;
643 mmap_lock();
645 if (flags & MREMAP_FIXED)
646 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
647 old_size, new_size,
648 flags,
649 new_addr);
650 else if (flags & MREMAP_MAYMOVE) {
651 abi_ulong mmap_start;
653 mmap_start = mmap_find_vma(0, new_size);
655 if (mmap_start == -1) {
656 errno = ENOMEM;
657 host_addr = MAP_FAILED;
658 } else
659 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
660 old_size, new_size,
661 flags | MREMAP_FIXED,
662 g2h(mmap_start));
663 } else {
664 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
665 /* Check if address fits target address space */
666 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
667 /* Revert mremap() changes */
668 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
669 errno = ENOMEM;
670 host_addr = MAP_FAILED;
674 if (host_addr == MAP_FAILED) {
675 new_addr = -1;
676 } else {
677 new_addr = h2g(host_addr);
678 prot = page_get_flags(old_addr);
679 page_set_flags(old_addr, old_addr + old_size, 0);
680 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
682 mmap_unlock();
683 return new_addr;
686 int target_msync(abi_ulong start, abi_ulong len, int flags)
688 abi_ulong end;
690 if (start & ~TARGET_PAGE_MASK)
691 return -EINVAL;
692 len = TARGET_PAGE_ALIGN(len);
693 end = start + len;
694 if (end < start)
695 return -EINVAL;
696 if (end == start)
697 return 0;
699 start &= qemu_host_page_mask;
700 return msync(g2h(start), end - start, flags);