Add serial number support for virtio_blk
[qemu.git] / linux-user / mmap.c
blobaa22006ff3d4242a2b40a88007de5cd3ff5b42cd
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
19 * MA 02110-1301, USA.
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <stdarg.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/mman.h>
30 #include <linux/mman.h>
31 #include <linux/unistd.h>
33 #include "qemu.h"
34 #include "qemu-common.h"
36 //#define DEBUG_MMAP
38 #if defined(USE_NPTL)
39 pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
40 static int __thread mmap_lock_count;
42 void mmap_lock(void)
44 if (mmap_lock_count++ == 0) {
45 pthread_mutex_lock(&mmap_mutex);
49 void mmap_unlock(void)
51 if (--mmap_lock_count == 0) {
52 pthread_mutex_unlock(&mmap_mutex);
56 /* Grab lock to make sure things are in a consistent state after fork(). */
57 void mmap_fork_start(void)
59 if (mmap_lock_count)
60 abort();
61 pthread_mutex_lock(&mmap_mutex);
64 void mmap_fork_end(int child)
66 if (child)
67 pthread_mutex_init(&mmap_mutex, NULL);
68 else
69 pthread_mutex_unlock(&mmap_mutex);
71 #else
72 /* We aren't threadsafe to start with, so no need to worry about locking. */
73 void mmap_lock(void)
77 void mmap_unlock(void)
80 #endif
82 void *qemu_vmalloc(size_t size)
84 void *p;
85 unsigned long addr;
86 mmap_lock();
87 /* Use map and mark the pages as used. */
88 p = mmap(NULL, size, PROT_READ | PROT_WRITE,
89 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
91 addr = (unsigned long)p;
92 if (addr == (target_ulong) addr) {
93 /* Allocated region overlaps guest address space.
94 This may recurse. */
95 page_set_flags(addr & TARGET_PAGE_MASK, TARGET_PAGE_ALIGN(addr + size),
96 PAGE_RESERVED);
99 mmap_unlock();
100 return p;
103 void *qemu_malloc(size_t size)
105 char * p;
106 size += 16;
107 p = qemu_vmalloc(size);
108 *(size_t *)p = size;
109 return p + 16;
112 /* We use map, which is always zero initialized. */
113 void * qemu_mallocz(size_t size)
115 return qemu_malloc(size);
118 void qemu_free(void *ptr)
120 /* FIXME: We should unmark the reserved pages here. However this gets
121 complicated when one target page spans multiple host pages, so we
122 don't bother. */
123 size_t *p;
124 p = (size_t *)((char *)ptr - 16);
125 munmap(p, *p);
128 void *qemu_realloc(void *ptr, size_t size)
130 size_t old_size, copy;
131 void *new_ptr;
133 if (!ptr)
134 return qemu_malloc(size);
135 old_size = *(size_t *)((char *)ptr - 16);
136 copy = old_size < size ? old_size : size;
137 new_ptr = qemu_malloc(size);
138 memcpy(new_ptr, ptr, copy);
139 qemu_free(ptr);
140 return new_ptr;
143 /* NOTE: all the constants are the HOST ones, but addresses are target. */
144 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
146 abi_ulong end, host_start, host_end, addr;
147 int prot1, ret;
149 #ifdef DEBUG_MMAP
150 printf("mprotect: start=0x" TARGET_FMT_lx
151 "len=0x" TARGET_FMT_lx " prot=%c%c%c\n", start, len,
152 prot & PROT_READ ? 'r' : '-',
153 prot & PROT_WRITE ? 'w' : '-',
154 prot & PROT_EXEC ? 'x' : '-');
155 #endif
157 if ((start & ~TARGET_PAGE_MASK) != 0)
158 return -EINVAL;
159 len = TARGET_PAGE_ALIGN(len);
160 end = start + len;
161 if (end < start)
162 return -EINVAL;
163 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
164 if (len == 0)
165 return 0;
167 mmap_lock();
168 host_start = start & qemu_host_page_mask;
169 host_end = HOST_PAGE_ALIGN(end);
170 if (start > host_start) {
171 /* handle host page containing start */
172 prot1 = prot;
173 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
174 prot1 |= page_get_flags(addr);
176 if (host_end == host_start + qemu_host_page_size) {
177 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
178 prot1 |= page_get_flags(addr);
180 end = host_end;
182 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
183 if (ret != 0)
184 goto error;
185 host_start += qemu_host_page_size;
187 if (end < host_end) {
188 prot1 = prot;
189 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
190 prot1 |= page_get_flags(addr);
192 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
193 prot1 & PAGE_BITS);
194 if (ret != 0)
195 goto error;
196 host_end -= qemu_host_page_size;
199 /* handle the pages in the middle */
200 if (host_start < host_end) {
201 ret = mprotect(g2h(host_start), host_end - host_start, prot);
202 if (ret != 0)
203 goto error;
205 page_set_flags(start, start + len, prot | PAGE_VALID);
206 mmap_unlock();
207 return 0;
208 error:
209 mmap_unlock();
210 return ret;
213 /* map an incomplete host page */
214 static int mmap_frag(abi_ulong real_start,
215 abi_ulong start, abi_ulong end,
216 int prot, int flags, int fd, abi_ulong offset)
218 abi_ulong real_end, addr;
219 void *host_start;
220 int prot1, prot_new;
222 real_end = real_start + qemu_host_page_size;
223 host_start = g2h(real_start);
225 /* get the protection of the target pages outside the mapping */
226 prot1 = 0;
227 for(addr = real_start; addr < real_end; addr++) {
228 if (addr < start || addr >= end)
229 prot1 |= page_get_flags(addr);
232 if (prot1 == 0) {
233 /* no page was there, so we allocate one */
234 void *p = mmap(host_start, qemu_host_page_size, prot,
235 flags | MAP_ANONYMOUS, -1, 0);
236 if (p == MAP_FAILED)
237 return -1;
238 prot1 = prot;
240 prot1 &= PAGE_BITS;
242 prot_new = prot | prot1;
243 if (!(flags & MAP_ANONYMOUS)) {
244 /* msync() won't work here, so we return an error if write is
245 possible while it is a shared mapping */
246 if ((flags & MAP_TYPE) == MAP_SHARED &&
247 (prot & PROT_WRITE))
248 return -EINVAL;
250 /* adjust protection to be able to read */
251 if (!(prot1 & PROT_WRITE))
252 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
254 /* read the corresponding file data */
255 pread(fd, g2h(start), end - start, offset);
257 /* put final protection */
258 if (prot_new != (prot1 | PROT_WRITE))
259 mprotect(host_start, qemu_host_page_size, prot_new);
260 } else {
261 /* just update the protection */
262 if (prot_new != prot1) {
263 mprotect(host_start, qemu_host_page_size, prot_new);
266 return 0;
269 #if defined(__CYGWIN__)
270 /* Cygwin doesn't have a whole lot of address space. */
271 static abi_ulong mmap_next_start = 0x18000000;
272 #else
273 static abi_ulong mmap_next_start = 0x40000000;
274 #endif
276 unsigned long last_brk;
278 /* find a free memory area of size 'size'. The search starts at
279 'start'. If 'start' == 0, then a default start address is used.
280 Return -1 if error.
282 /* page_init() marks pages used by the host as reserved to be sure not
283 to use them. */
284 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
286 abi_ulong addr, addr1, addr_start;
287 int prot;
288 unsigned long new_brk;
290 new_brk = (unsigned long)sbrk(0);
291 if (last_brk && last_brk < new_brk && last_brk == (target_ulong)last_brk) {
292 /* This is a hack to catch the host allocating memory with brk().
293 If it uses mmap then we loose.
294 FIXME: We really want to avoid the host allocating memory in
295 the first place, and maybe leave some slack to avoid switching
296 to mmap. */
297 page_set_flags(last_brk & TARGET_PAGE_MASK,
298 TARGET_PAGE_ALIGN(new_brk),
299 PAGE_RESERVED);
301 last_brk = new_brk;
303 size = HOST_PAGE_ALIGN(size);
304 start = start & qemu_host_page_mask;
305 addr = start;
306 if (addr == 0)
307 addr = mmap_next_start;
308 addr_start = addr;
309 for(;;) {
310 prot = 0;
311 for(addr1 = addr; addr1 < (addr + size); addr1 += TARGET_PAGE_SIZE) {
312 prot |= page_get_flags(addr1);
314 if (prot == 0)
315 break;
316 addr += qemu_host_page_size;
317 /* we found nothing */
318 if (addr == addr_start)
319 return (abi_ulong)-1;
321 if (start == 0)
322 mmap_next_start = addr + size;
323 return addr;
326 /* NOTE: all the constants are the HOST ones */
327 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
328 int flags, int fd, abi_ulong offset)
330 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
331 unsigned long host_start;
333 mmap_lock();
334 #ifdef DEBUG_MMAP
336 printf("mmap: start=0x" TARGET_FMT_lx
337 " len=0x" TARGET_FMT_lx " prot=%c%c%c flags=",
338 start, len,
339 prot & PROT_READ ? 'r' : '-',
340 prot & PROT_WRITE ? 'w' : '-',
341 prot & PROT_EXEC ? 'x' : '-');
342 if (flags & MAP_FIXED)
343 printf("MAP_FIXED ");
344 if (flags & MAP_ANONYMOUS)
345 printf("MAP_ANON ");
346 switch(flags & MAP_TYPE) {
347 case MAP_PRIVATE:
348 printf("MAP_PRIVATE ");
349 break;
350 case MAP_SHARED:
351 printf("MAP_SHARED ");
352 break;
353 default:
354 printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
355 break;
357 printf("fd=%d offset=" TARGET_FMT_lx "\n", fd, offset);
359 #endif
361 if (offset & ~TARGET_PAGE_MASK) {
362 errno = EINVAL;
363 goto fail;
366 len = TARGET_PAGE_ALIGN(len);
367 if (len == 0)
368 goto the_end;
369 real_start = start & qemu_host_page_mask;
371 /* When mapping files into a memory area larger than the file, accesses
372 to pages beyond the file size will cause a SIGBUS.
374 For example, if mmaping a file of 100 bytes on a host with 4K pages
375 emulating a target with 8K pages, the target expects to be able to
376 access the first 8K. But the host will trap us on any access beyond
377 4K.
379 When emulating a target with a larger page-size than the hosts, we
380 may need to truncate file maps at EOF and add extra anonymous pages
381 up to the targets page boundary. */
383 if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
384 && !(flags & MAP_ANONYMOUS)) {
385 struct stat sb;
387 if (fstat (fd, &sb) == -1)
388 goto fail;
390 /* Are we trying to create a map beyond EOF?. */
391 if (offset + len > sb.st_size) {
392 /* If so, truncate the file map at eof aligned with
393 the hosts real pagesize. Additional anonymous maps
394 will be created beyond EOF. */
395 len = (sb.st_size - offset);
396 len += qemu_real_host_page_size - 1;
397 len &= ~(qemu_real_host_page_size - 1);
401 if (!(flags & MAP_FIXED)) {
402 abi_ulong mmap_start;
403 void *p;
404 host_offset = offset & qemu_host_page_mask;
405 host_len = len + offset - host_offset;
406 host_len = HOST_PAGE_ALIGN(host_len);
407 mmap_start = mmap_find_vma(real_start, host_len);
408 if (mmap_start == (abi_ulong)-1) {
409 errno = ENOMEM;
410 goto fail;
412 /* Note: we prefer to control the mapping address. It is
413 especially important if qemu_host_page_size >
414 qemu_real_host_page_size */
415 p = mmap(g2h(mmap_start),
416 host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
417 if (p == MAP_FAILED)
418 goto fail;
419 /* update start so that it points to the file position at 'offset' */
420 host_start = (unsigned long)p;
421 if (!(flags & MAP_ANONYMOUS)) {
422 p = mmap(g2h(mmap_start), len, prot,
423 flags | MAP_FIXED, fd, host_offset);
424 host_start += offset - host_offset;
426 start = h2g(host_start);
427 } else {
428 int flg;
429 target_ulong addr;
431 if (start & ~TARGET_PAGE_MASK) {
432 errno = EINVAL;
433 goto fail;
435 end = start + len;
436 real_end = HOST_PAGE_ALIGN(end);
439 * Test if requested memory area fits target address space
440 * It can fail only on 64-bit host with 32-bit target.
441 * On any other target/host host mmap() handles this error correctly.
443 if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
444 errno = EINVAL;
445 goto fail;
448 for(addr = real_start; addr < real_end; addr += TARGET_PAGE_SIZE) {
449 flg = page_get_flags(addr);
450 if (flg & PAGE_RESERVED) {
451 errno = ENXIO;
452 goto fail;
456 /* worst case: we cannot map the file because the offset is not
457 aligned, so we read it */
458 if (!(flags & MAP_ANONYMOUS) &&
459 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
460 /* msync() won't work here, so we return an error if write is
461 possible while it is a shared mapping */
462 if ((flags & MAP_TYPE) == MAP_SHARED &&
463 (prot & PROT_WRITE)) {
464 errno = EINVAL;
465 goto fail;
467 retaddr = target_mmap(start, len, prot | PROT_WRITE,
468 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
469 -1, 0);
470 if (retaddr == -1)
471 goto fail;
472 pread(fd, g2h(start), len, offset);
473 if (!(prot & PROT_WRITE)) {
474 ret = target_mprotect(start, len, prot);
475 if (ret != 0) {
476 start = ret;
477 goto the_end;
480 goto the_end;
483 /* handle the start of the mapping */
484 if (start > real_start) {
485 if (real_end == real_start + qemu_host_page_size) {
486 /* one single host page */
487 ret = mmap_frag(real_start, start, end,
488 prot, flags, fd, offset);
489 if (ret == -1)
490 goto fail;
491 goto the_end1;
493 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
494 prot, flags, fd, offset);
495 if (ret == -1)
496 goto fail;
497 real_start += qemu_host_page_size;
499 /* handle the end of the mapping */
500 if (end < real_end) {
501 ret = mmap_frag(real_end - qemu_host_page_size,
502 real_end - qemu_host_page_size, real_end,
503 prot, flags, fd,
504 offset + real_end - qemu_host_page_size - start);
505 if (ret == -1)
506 goto fail;
507 real_end -= qemu_host_page_size;
510 /* map the middle (easier) */
511 if (real_start < real_end) {
512 void *p;
513 unsigned long offset1;
514 if (flags & MAP_ANONYMOUS)
515 offset1 = 0;
516 else
517 offset1 = offset + real_start - start;
518 p = mmap(g2h(real_start), real_end - real_start,
519 prot, flags, fd, offset1);
520 if (p == MAP_FAILED)
521 goto fail;
524 the_end1:
525 page_set_flags(start, start + len, prot | PAGE_VALID);
526 the_end:
527 #ifdef DEBUG_MMAP
528 printf("ret=0x" TARGET_FMT_lx "\n", start);
529 page_dump(stdout);
530 printf("\n");
531 #endif
532 mmap_unlock();
533 return start;
534 fail:
535 mmap_unlock();
536 return -1;
539 int target_munmap(abi_ulong start, abi_ulong len)
541 abi_ulong end, real_start, real_end, addr;
542 int prot, ret;
544 #ifdef DEBUG_MMAP
545 printf("munmap: start=0x%lx len=0x%lx\n", start, len);
546 #endif
547 if (start & ~TARGET_PAGE_MASK)
548 return -EINVAL;
549 len = TARGET_PAGE_ALIGN(len);
550 if (len == 0)
551 return -EINVAL;
552 mmap_lock();
553 end = start + len;
554 real_start = start & qemu_host_page_mask;
555 real_end = HOST_PAGE_ALIGN(end);
557 if (start > real_start) {
558 /* handle host page containing start */
559 prot = 0;
560 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
561 prot |= page_get_flags(addr);
563 if (real_end == real_start + qemu_host_page_size) {
564 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
565 prot |= page_get_flags(addr);
567 end = real_end;
569 if (prot != 0)
570 real_start += qemu_host_page_size;
572 if (end < real_end) {
573 prot = 0;
574 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
575 prot |= page_get_flags(addr);
577 if (prot != 0)
578 real_end -= qemu_host_page_size;
581 ret = 0;
582 /* unmap what we can */
583 if (real_start < real_end) {
584 ret = munmap(g2h(real_start), real_end - real_start);
587 if (ret == 0)
588 page_set_flags(start, start + len, 0);
589 mmap_unlock();
590 return ret;
593 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
594 abi_ulong new_size, unsigned long flags,
595 abi_ulong new_addr)
597 int prot;
598 void *host_addr;
600 mmap_lock();
602 if (flags & MREMAP_FIXED)
603 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
604 old_size, new_size,
605 flags,
606 new_addr);
607 else if (flags & MREMAP_MAYMOVE) {
608 abi_ulong mmap_start;
610 mmap_start = mmap_find_vma(0, new_size);
612 if (mmap_start == -1) {
613 errno = ENOMEM;
614 host_addr = MAP_FAILED;
615 } else
616 host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
617 old_size, new_size,
618 flags | MREMAP_FIXED,
619 g2h(mmap_start));
620 } else {
621 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
622 /* Check if address fits target address space */
623 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
624 /* Revert mremap() changes */
625 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
626 errno = ENOMEM;
627 host_addr = MAP_FAILED;
631 if (host_addr == MAP_FAILED) {
632 new_addr = -1;
633 } else {
634 new_addr = h2g(host_addr);
635 prot = page_get_flags(old_addr);
636 page_set_flags(old_addr, old_addr + old_size, 0);
637 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
639 mmap_unlock();
640 return new_addr;
643 int target_msync(abi_ulong start, abi_ulong len, int flags)
645 abi_ulong end;
647 if (start & ~TARGET_PAGE_MASK)
648 return -EINVAL;
649 len = TARGET_PAGE_ALIGN(len);
650 end = start + len;
651 if (end < start)
652 return -EINVAL;
653 if (end == start)
654 return 0;
656 start &= qemu_host_page_mask;
657 return msync(g2h(start), end - start, flags);