linux-user/mmap.c

   1 /*
   2  *  mmap support for qemu
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  *  This program is free software; you can redistribute it and/or modify
   7  *  it under the terms of the GNU General Public License as published by
   8  *  the Free Software Foundation; either version 2 of the License, or
   9  *  (at your option) any later version.
  10  *
  11  *  This program is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  *  GNU General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU General Public License
  17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include <sys/shm.h>
  21 #include "trace.h"
  22 #include "exec/log.h"
  23 #include "qemu.h"
  24 #include "user-internals.h"
  25 #include "user-mmap.h"
  26 #include "target_mman.h"
  27
  28 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  29 static __thread int mmap_lock_count;
  30
  31 #define N_SHM_REGIONS  32
  32
  33 static struct shm_region {
  34     abi_ulong start;
  35     abi_ulong size;
  36     bool in_use;
  37 } shm_regions[N_SHM_REGIONS];
  38
  39 void mmap_lock(void)
  40 {
  41     if (mmap_lock_count++ == 0) {
  42         pthread_mutex_lock(&mmap_mutex);
  43     }
  44 }
  45
  46 void mmap_unlock(void)
  47 {
  48     assert(mmap_lock_count > 0);
  49     if (--mmap_lock_count == 0) {
  50         pthread_mutex_unlock(&mmap_mutex);
  51     }
  52 }
  53
  54 bool have_mmap_lock(void)
  55 {
  56     return mmap_lock_count > 0 ? true : false;
  57 }
  58
  59 /* Grab lock to make sure things are in a consistent state after fork().  */
  60 void mmap_fork_start(void)
  61 {
  62     if (mmap_lock_count)
  63         abort();
  64     pthread_mutex_lock(&mmap_mutex);
  65 }
  66
  67 void mmap_fork_end(int child)
  68 {
  69     if (child) {
  70         pthread_mutex_init(&mmap_mutex, NULL);
  71     } else {
  72         pthread_mutex_unlock(&mmap_mutex);
  73     }
  74 }
  75
  76 /*
  77  * Validate target prot bitmask.
  78  * Return the prot bitmask for the host in *HOST_PROT.
  79  * Return 0 if the target prot bitmask is invalid, otherwise
  80  * the internal qemu page_flags (which will include PAGE_VALID).
  81  */
  82 static int validate_prot_to_pageflags(int prot)
  83 {
  84     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
  85     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
  86
  87 #ifdef TARGET_AARCH64
  88     {
  89         ARMCPU *cpu = ARM_CPU(thread_cpu);
  90
  91         /*
  92          * The PROT_BTI bit is only accepted if the cpu supports the feature.
  93          * Since this is the unusual case, don't bother checking unless
  94          * the bit has been requested.  If set and valid, record the bit
  95          * within QEMU's page_flags.
  96          */
  97         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
  98             valid |= TARGET_PROT_BTI;
  99             page_flags |= PAGE_BTI;
 100         }
 101         /* Similarly for the PROT_MTE bit. */
 102         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
 103             valid |= TARGET_PROT_MTE;
 104             page_flags |= PAGE_MTE;
 105         }
 106     }
 107 #elif defined(TARGET_HPPA)
 108     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
 109 #endif
 110
 111     return prot & ~valid ? 0 : page_flags;
 112 }
 113
 114 /*
 115  * For the host, we need not pass anything except read/write/exec.
 116  * While PROT_SEM is allowed by all hosts, it is also ignored, so
 117  * don't bother transforming guest bit to host bit.  Any other
 118  * target-specific prot bits will not be understood by the host
 119  * and will need to be encoded into page_flags for qemu emulation.
 120  *
 121  * Pages that are executable by the guest will never be executed
 122  * by the host, but the host will need to be able to read them.
 123  */
 124 static int target_to_host_prot(int prot)
 125 {
 126     return (prot & (PROT_READ | PROT_WRITE)) |
 127            (prot & PROT_EXEC ? PROT_READ : 0);
 128 }
 129
 130 /* NOTE: all the constants are the HOST ones, but addresses are target. */
 131 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
 132 {
 133     abi_ulong starts[3];
 134     abi_ulong lens[3];
 135     int prots[3];
 136     abi_ulong host_start, host_last, last;
 137     int prot1, ret, page_flags, nranges;
 138
 139     trace_target_mprotect(start, len, target_prot);
 140
 141     if ((start & ~TARGET_PAGE_MASK) != 0) {
 142         return -TARGET_EINVAL;
 143     }
 144     page_flags = validate_prot_to_pageflags(target_prot);
 145     if (!page_flags) {
 146         return -TARGET_EINVAL;
 147     }
 148     if (len == 0) {
 149         return 0;
 150     }
 151     len = TARGET_PAGE_ALIGN(len);
 152     if (!guest_range_valid_untagged(start, len)) {
 153         return -TARGET_ENOMEM;
 154     }
 155
 156     last = start + len - 1;
 157     host_start = start & qemu_host_page_mask;
 158     host_last = HOST_PAGE_ALIGN(last) - 1;
 159     nranges = 0;
 160
 161     mmap_lock();
 162
 163     if (host_last - host_start < qemu_host_page_size) {
 164         /* Single host page contains all guest pages: sum the prot. */
 165         prot1 = target_prot;
 166         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 167             prot1 |= page_get_flags(a);
 168         }
 169         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 170             prot1 |= page_get_flags(a + 1);
 171         }
 172         starts[nranges] = host_start;
 173         lens[nranges] = qemu_host_page_size;
 174         prots[nranges] = prot1;
 175         nranges++;
 176     } else {
 177         if (host_start < start) {
 178             /* Host page contains more than one guest page: sum the prot. */
 179             prot1 = target_prot;
 180             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 181                 prot1 |= page_get_flags(a);
 182             }
 183             /* If the resulting sum differs, create a new range. */
 184             if (prot1 != target_prot) {
 185                 starts[nranges] = host_start;
 186                 lens[nranges] = qemu_host_page_size;
 187                 prots[nranges] = prot1;
 188                 nranges++;
 189                 host_start += qemu_host_page_size;
 190             }
 191         }
 192
 193         if (last < host_last) {
 194             /* Host page contains more than one guest page: sum the prot. */
 195             prot1 = target_prot;
 196             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 197                 prot1 |= page_get_flags(a + 1);
 198             }
 199             /* If the resulting sum differs, create a new range. */
 200             if (prot1 != target_prot) {
 201                 host_last -= qemu_host_page_size;
 202                 starts[nranges] = host_last + 1;
 203                 lens[nranges] = qemu_host_page_size;
 204                 prots[nranges] = prot1;
 205                 nranges++;
 206             }
 207         }
 208
 209         /* Create a range for the middle, if any remains. */
 210         if (host_start < host_last) {
 211             starts[nranges] = host_start;
 212             lens[nranges] = host_last - host_start + 1;
 213             prots[nranges] = target_prot;
 214             nranges++;
 215         }
 216     }
 217
 218     for (int i = 0; i < nranges; ++i) {
 219         ret = mprotect(g2h_untagged(starts[i]), lens[i],
 220                        target_to_host_prot(prots[i]));
 221         if (ret != 0) {
 222             goto error;
 223         }
 224     }
 225
 226     page_set_flags(start, last, page_flags);
 227     ret = 0;
 228
 229  error:
 230     mmap_unlock();
 231     return ret;
 232 }
 233
 234 /* map an incomplete host page */
 235 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
 236                       int prot, int flags, int fd, off_t offset)
 237 {
 238     abi_ulong real_last;
 239     void *host_start;
 240     int prot_old, prot_new;
 241     int host_prot_old, host_prot_new;
 242
 243     if (!(flags & MAP_ANONYMOUS)
 244         && (flags & MAP_TYPE) == MAP_SHARED
 245         && (prot & PROT_WRITE)) {
 246         /*
 247          * msync() won't work with the partial page, so we return an
 248          * error if write is possible while it is a shared mapping.
 249          */
 250         errno = EINVAL;
 251         return false;
 252     }
 253
 254     real_last = real_start + qemu_host_page_size - 1;
 255     host_start = g2h_untagged(real_start);
 256
 257     /* Get the protection of the target pages outside the mapping. */
 258     prot_old = 0;
 259     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 260         prot_old |= page_get_flags(a);
 261     }
 262     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
 263         prot_old |= page_get_flags(a);
 264     }
 265
 266     if (prot_old == 0) {
 267         /*
 268          * Since !(prot_old & PAGE_VALID), there were no guest pages
 269          * outside of the fragment we need to map.  Allocate a new host
 270          * page to cover, discarding whatever else may have been present.
 271          */
 272         void *p = mmap(host_start, qemu_host_page_size,
 273                        target_to_host_prot(prot),
 274                        flags | MAP_ANONYMOUS, -1, 0);
 275         if (p != host_start) {
 276             if (p != MAP_FAILED) {
 277                 munmap(p, qemu_host_page_size);
 278                 errno = EEXIST;
 279             }
 280             return false;
 281         }
 282         prot_old = prot;
 283     }
 284     prot_new = prot | prot_old;
 285
 286     host_prot_old = target_to_host_prot(prot_old);
 287     host_prot_new = target_to_host_prot(prot_new);
 288
 289     /* Adjust protection to be able to write. */
 290     if (!(host_prot_old & PROT_WRITE)) {
 291         host_prot_old |= PROT_WRITE;
 292         mprotect(host_start, qemu_host_page_size, host_prot_old);
 293     }
 294
 295     /* Read or zero the new guest pages. */
 296     if (flags & MAP_ANONYMOUS) {
 297         memset(g2h_untagged(start), 0, last - start + 1);
 298     } else {
 299         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
 300             return false;
 301         }
 302     }
 303
 304     /* Put final protection */
 305     if (host_prot_new != host_prot_old) {
 306         mprotect(host_start, qemu_host_page_size, host_prot_new);
 307     }
 308     return true;
 309 }
 310
 311 abi_ulong task_unmapped_base;
 312 abi_ulong elf_et_dyn_base;
 313 abi_ulong mmap_next_start;
 314
 315 /*
 316  * Subroutine of mmap_find_vma, used when we have pre-allocated
 317  * a chunk of guest address space.
 318  */
 319 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
 320                                         abi_ulong align)
 321 {
 322     target_ulong ret;
 323
 324     ret = page_find_range_empty(start, reserved_va, size, align);
 325     if (ret == -1 && start > mmap_min_addr) {
 326         /* Restart at the beginning of the address space. */
 327         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
 328     }
 329
 330     return ret;
 331 }
 332
 333 /*
 334  * Find and reserve a free memory area of size 'size'. The search
 335  * starts at 'start'.
 336  * It must be called with mmap_lock() held.
 337  * Return -1 if error.
 338  */
 339 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 340 {
 341     void *ptr, *prev;
 342     abi_ulong addr;
 343     int wrapped, repeat;
 344
 345     align = MAX(align, qemu_host_page_size);
 346
 347     /* If 'start' == 0, then a default start address is used. */
 348     if (start == 0) {
 349         start = mmap_next_start;
 350     } else {
 351         start &= qemu_host_page_mask;
 352     }
 353     start = ROUND_UP(start, align);
 354
 355     size = HOST_PAGE_ALIGN(size);
 356
 357     if (reserved_va) {
 358         return mmap_find_vma_reserved(start, size, align);
 359     }
 360
 361     addr = start;
 362     wrapped = repeat = 0;
 363     prev = 0;
 364
 365     for (;; prev = ptr) {
 366         /*
 367          * Reserve needed memory area to avoid a race.
 368          * It should be discarded using:
 369          *  - mmap() with MAP_FIXED flag
 370          *  - mremap() with MREMAP_FIXED flag
 371          *  - shmat() with SHM_REMAP flag
 372          */
 373         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
 374                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 375
 376         /* ENOMEM, if host address space has no memory */
 377         if (ptr == MAP_FAILED) {
 378             return (abi_ulong)-1;
 379         }
 380
 381         /*
 382          * Count the number of sequential returns of the same address.
 383          * This is used to modify the search algorithm below.
 384          */
 385         repeat = (ptr == prev ? repeat + 1 : 0);
 386
 387         if (h2g_valid(ptr + size - 1)) {
 388             addr = h2g(ptr);
 389
 390             if ((addr & (align - 1)) == 0) {
 391                 /* Success.  */
 392                 if (start == mmap_next_start && addr >= task_unmapped_base) {
 393                     mmap_next_start = addr + size;
 394                 }
 395                 return addr;
 396             }
 397
 398             /* The address is not properly aligned for the target.  */
 399             switch (repeat) {
 400             case 0:
 401                 /*
 402                  * Assume the result that the kernel gave us is the
 403                  * first with enough free space, so start again at the
 404                  * next higher target page.
 405                  */
 406                 addr = ROUND_UP(addr, align);
 407                 break;
 408             case 1:
 409                 /*
 410                  * Sometimes the kernel decides to perform the allocation
 411                  * at the top end of memory instead.
 412                  */
 413                 addr &= -align;
 414                 break;
 415             case 2:
 416                 /* Start over at low memory.  */
 417                 addr = 0;
 418                 break;
 419             default:
 420                 /* Fail.  This unaligned block must the last.  */
 421                 addr = -1;
 422                 break;
 423             }
 424         } else {
 425             /*
 426              * Since the result the kernel gave didn't fit, start
 427              * again at low memory.  If any repetition, fail.
 428              */
 429             addr = (repeat ? -1 : 0);
 430         }
 431
 432         /* Unmap and try again.  */
 433         munmap(ptr, size);
 434
 435         /* ENOMEM if we checked the whole of the target address space.  */
 436         if (addr == (abi_ulong)-1) {
 437             return (abi_ulong)-1;
 438         } else if (addr == 0) {
 439             if (wrapped) {
 440                 return (abi_ulong)-1;
 441             }
 442             wrapped = 1;
 443             /*
 444              * Don't actually use 0 when wrapping, instead indicate
 445              * that we'd truly like an allocation in low memory.
 446              */
 447             addr = (mmap_min_addr > TARGET_PAGE_SIZE
 448                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
 449                      : TARGET_PAGE_SIZE);
 450         } else if (wrapped && addr >= start) {
 451             return (abi_ulong)-1;
 452         }
 453     }
 454 }
 455
 456 /* NOTE: all the constants are the HOST ones */
 457 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
 458                      int flags, int fd, off_t offset)
 459 {
 460     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
 461     abi_ulong passthrough_start = -1, passthrough_last = 0;
 462     int page_flags;
 463     off_t host_offset;
 464
 465     mmap_lock();
 466     trace_target_mmap(start, len, target_prot, flags, fd, offset);
 467
 468     if (!len) {
 469         errno = EINVAL;
 470         goto fail;
 471     }
 472
 473     page_flags = validate_prot_to_pageflags(target_prot);
 474     if (!page_flags) {
 475         errno = EINVAL;
 476         goto fail;
 477     }
 478
 479     /* Also check for overflows... */
 480     len = TARGET_PAGE_ALIGN(len);
 481     if (!len) {
 482         errno = ENOMEM;
 483         goto fail;
 484     }
 485
 486     if (offset & ~TARGET_PAGE_MASK) {
 487         errno = EINVAL;
 488         goto fail;
 489     }
 490
 491     /*
 492      * If we're mapping shared memory, ensure we generate code for parallel
 493      * execution and flush old translations.  This will work up to the level
 494      * supported by the host -- anything that requires EXCP_ATOMIC will not
 495      * be atomic with respect to an external process.
 496      */
 497     if (flags & MAP_SHARED) {
 498         CPUState *cpu = thread_cpu;
 499         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
 500             cpu->tcg_cflags |= CF_PARALLEL;
 501             tb_flush(cpu);
 502         }
 503     }
 504
 505     real_start = start & qemu_host_page_mask;
 506     host_offset = offset & qemu_host_page_mask;
 507
 508     /*
 509      * If the user is asking for the kernel to find a location, do that
 510      * before we truncate the length for mapping files below.
 511      */
 512     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 513         host_len = len + offset - host_offset;
 514         host_len = HOST_PAGE_ALIGN(host_len);
 515         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
 516         if (start == (abi_ulong)-1) {
 517             errno = ENOMEM;
 518             goto fail;
 519         }
 520     }
 521
 522     /*
 523      * When mapping files into a memory area larger than the file, accesses
 524      * to pages beyond the file size will cause a SIGBUS.
 525      *
 526      * For example, if mmaping a file of 100 bytes on a host with 4K pages
 527      * emulating a target with 8K pages, the target expects to be able to
 528      * access the first 8K. But the host will trap us on any access beyond
 529      * 4K.
 530      *
 531      * When emulating a target with a larger page-size than the hosts, we
 532      * may need to truncate file maps at EOF and add extra anonymous pages
 533      * up to the targets page boundary.
 534      */
 535     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
 536         !(flags & MAP_ANONYMOUS)) {
 537         struct stat sb;
 538
 539         if (fstat(fd, &sb) == -1) {
 540             goto fail;
 541         }
 542
 543         /* Are we trying to create a map beyond EOF?.  */
 544         if (offset + len > sb.st_size) {
 545             /*
 546              * If so, truncate the file map at eof aligned with
 547              * the hosts real pagesize. Additional anonymous maps
 548              * will be created beyond EOF.
 549              */
 550             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
 551         }
 552     }
 553
 554     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 555         uintptr_t host_start;
 556         int host_prot;
 557         void *p;
 558
 559         host_len = len + offset - host_offset;
 560         host_len = HOST_PAGE_ALIGN(host_len);
 561         host_prot = target_to_host_prot(target_prot);
 562
 563         /*
 564          * Note: we prefer to control the mapping address. It is
 565          * especially important if qemu_host_page_size >
 566          * qemu_real_host_page_size.
 567          */
 568         p = mmap(g2h_untagged(start), host_len, host_prot,
 569                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
 570         if (p == MAP_FAILED) {
 571             goto fail;
 572         }
 573         /* update start so that it points to the file position at 'offset' */
 574         host_start = (uintptr_t)p;
 575         if (!(flags & MAP_ANONYMOUS)) {
 576             p = mmap(g2h_untagged(start), len, host_prot,
 577                      flags | MAP_FIXED, fd, host_offset);
 578             if (p == MAP_FAILED) {
 579                 munmap(g2h_untagged(start), host_len);
 580                 goto fail;
 581             }
 582             host_start += offset - host_offset;
 583         }
 584         start = h2g(host_start);
 585         last = start + len - 1;
 586         passthrough_start = start;
 587         passthrough_last = last;
 588     } else {
 589         if (start & ~TARGET_PAGE_MASK) {
 590             errno = EINVAL;
 591             goto fail;
 592         }
 593         last = start + len - 1;
 594         real_last = HOST_PAGE_ALIGN(last) - 1;
 595
 596         /*
 597          * Test if requested memory area fits target address space
 598          * It can fail only on 64-bit host with 32-bit target.
 599          * On any other target/host host mmap() handles this error correctly.
 600          */
 601         if (last < start || !guest_range_valid_untagged(start, len)) {
 602             errno = ENOMEM;
 603             goto fail;
 604         }
 605
 606         if (flags & MAP_FIXED_NOREPLACE) {
 607             /* Validate that the chosen range is empty. */
 608             if (!page_check_range_empty(start, last)) {
 609                 errno = EEXIST;
 610                 goto fail;
 611             }
 612
 613             /*
 614              * With reserved_va, the entire address space is mmaped in the
 615              * host to ensure it isn't accidentally used for something else.
 616              * We have just checked that the guest address is not mapped
 617              * within the guest, but need to replace the host reservation.
 618              *
 619              * Without reserved_va, despite the guest address check above,
 620              * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
 621              * any host address mappings.
 622              */
 623             if (reserved_va) {
 624                 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
 625             }
 626         }
 627
 628         /*
 629          * worst case: we cannot map the file because the offset is not
 630          * aligned, so we read it
 631          */
 632         if (!(flags & MAP_ANONYMOUS) &&
 633             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
 634             /*
 635              * msync() won't work here, so we return an error if write is
 636              * possible while it is a shared mapping
 637              */
 638             if ((flags & MAP_TYPE) == MAP_SHARED
 639                 && (target_prot & PROT_WRITE)) {
 640                 errno = EINVAL;
 641                 goto fail;
 642             }
 643             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
 644                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
 645                                   | MAP_PRIVATE | MAP_ANONYMOUS,
 646                                   -1, 0);
 647             if (retaddr == -1) {
 648                 goto fail;
 649             }
 650             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
 651                 goto fail;
 652             }
 653             if (!(target_prot & PROT_WRITE)) {
 654                 ret = target_mprotect(start, len, target_prot);
 655                 assert(ret == 0);
 656             }
 657             goto the_end;
 658         }
 659
 660         /* handle the start of the mapping */
 661         if (start > real_start) {
 662             if (real_last == real_start + qemu_host_page_size - 1) {
 663                 /* one single host page */
 664                 if (!mmap_frag(real_start, start, last,
 665                                target_prot, flags, fd, offset)) {
 666                     goto fail;
 667                 }
 668                 goto the_end1;
 669             }
 670             if (!mmap_frag(real_start, start,
 671                            real_start + qemu_host_page_size - 1,
 672                            target_prot, flags, fd, offset)) {
 673                 goto fail;
 674             }
 675             real_start += qemu_host_page_size;
 676         }
 677         /* handle the end of the mapping */
 678         if (last < real_last) {
 679             abi_ulong real_page = real_last - qemu_host_page_size + 1;
 680             if (!mmap_frag(real_page, real_page, last,
 681                            target_prot, flags, fd,
 682                            offset + real_page - start)) {
 683                 goto fail;
 684             }
 685             real_last -= qemu_host_page_size;
 686         }
 687
 688         /* map the middle (easier) */
 689         if (real_start < real_last) {
 690             void *p, *want_p;
 691             off_t offset1;
 692             size_t len1;
 693
 694             if (flags & MAP_ANONYMOUS) {
 695                 offset1 = 0;
 696             } else {
 697                 offset1 = offset + real_start - start;
 698             }
 699             len1 = real_last - real_start + 1;
 700             want_p = g2h_untagged(real_start);
 701
 702             p = mmap(want_p, len1, target_to_host_prot(target_prot),
 703                      flags, fd, offset1);
 704             if (p != want_p) {
 705                 if (p != MAP_FAILED) {
 706                     munmap(p, len1);
 707                     errno = EEXIST;
 708                 }
 709                 goto fail;
 710             }
 711             passthrough_start = real_start;
 712             passthrough_last = real_last;
 713         }
 714     }
 715  the_end1:
 716     if (flags & MAP_ANONYMOUS) {
 717         page_flags |= PAGE_ANON;
 718     }
 719     page_flags |= PAGE_RESET;
 720     if (passthrough_start > passthrough_last) {
 721         page_set_flags(start, last, page_flags);
 722     } else {
 723         if (start < passthrough_start) {
 724             page_set_flags(start, passthrough_start - 1, page_flags);
 725         }
 726         page_set_flags(passthrough_start, passthrough_last,
 727                        page_flags | PAGE_PASSTHROUGH);
 728         if (passthrough_last < last) {
 729             page_set_flags(passthrough_last + 1, last, page_flags);
 730         }
 731     }
 732  the_end:
 733     trace_target_mmap_complete(start);
 734     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
 735         FILE *f = qemu_log_trylock();
 736         if (f) {
 737             fprintf(f, "page layout changed following mmap\n");
 738             page_dump(f);
 739             qemu_log_unlock(f);
 740         }
 741     }
 742     mmap_unlock();
 743     return start;
 744 fail:
 745     mmap_unlock();
 746     return -1;
 747 }
 748
 749 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
 750 {
 751     abi_ulong real_start;
 752     abi_ulong real_last;
 753     abi_ulong real_len;
 754     abi_ulong last;
 755     abi_ulong a;
 756     void *host_start;
 757     int prot;
 758
 759     last = start + len - 1;
 760     real_start = start & qemu_host_page_mask;
 761     real_last = HOST_PAGE_ALIGN(last) - 1;
 762
 763     /*
 764      * If guest pages remain on the first or last host pages,
 765      * adjust the deallocation to retain those guest pages.
 766      * The single page special case is required for the last page,
 767      * lest real_start overflow to zero.
 768      */
 769     if (real_last - real_start < qemu_host_page_size) {
 770         prot = 0;
 771         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 772             prot |= page_get_flags(a);
 773         }
 774         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 775             prot |= page_get_flags(a + 1);
 776         }
 777         if (prot != 0) {
 778             return;
 779         }
 780     } else {
 781         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 782             prot |= page_get_flags(a);
 783         }
 784         if (prot != 0) {
 785             real_start += qemu_host_page_size;
 786         }
 787
 788         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 789             prot |= page_get_flags(a + 1);
 790         }
 791         if (prot != 0) {
 792             real_last -= qemu_host_page_size;
 793         }
 794
 795         if (real_last < real_start) {
 796             return;
 797         }
 798     }
 799
 800     real_len = real_last - real_start + 1;
 801     host_start = g2h_untagged(real_start);
 802
 803     if (reserved_va) {
 804         void *ptr = mmap(host_start, real_len, PROT_NONE,
 805                          MAP_FIXED | MAP_ANONYMOUS
 806                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 807         assert(ptr == host_start);
 808     } else {
 809         int ret = munmap(host_start, real_len);
 810         assert(ret == 0);
 811     }
 812 }
 813
 814 int target_munmap(abi_ulong start, abi_ulong len)
 815 {
 816     trace_target_munmap(start, len);
 817
 818     if (start & ~TARGET_PAGE_MASK) {
 819         return -TARGET_EINVAL;
 820     }
 821     len = TARGET_PAGE_ALIGN(len);
 822     if (len == 0 || !guest_range_valid_untagged(start, len)) {
 823         return -TARGET_EINVAL;
 824     }
 825
 826     mmap_lock();
 827     mmap_reserve_or_unmap(start, len);
 828     page_set_flags(start, start + len - 1, 0);
 829     mmap_unlock();
 830
 831     return 0;
 832 }
 833
 834 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
 835                        abi_ulong new_size, unsigned long flags,
 836                        abi_ulong new_addr)
 837 {
 838     int prot;
 839     void *host_addr;
 840
 841     if (!guest_range_valid_untagged(old_addr, old_size) ||
 842         ((flags & MREMAP_FIXED) &&
 843          !guest_range_valid_untagged(new_addr, new_size)) ||
 844         ((flags & MREMAP_MAYMOVE) == 0 &&
 845          !guest_range_valid_untagged(old_addr, new_size))) {
 846         errno = ENOMEM;
 847         return -1;
 848     }
 849
 850     mmap_lock();
 851
 852     if (flags & MREMAP_FIXED) {
 853         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 854                            flags, g2h_untagged(new_addr));
 855
 856         if (reserved_va && host_addr != MAP_FAILED) {
 857             /*
 858              * If new and old addresses overlap then the above mremap will
 859              * already have failed with EINVAL.
 860              */
 861             mmap_reserve_or_unmap(old_addr, old_size);
 862         }
 863     } else if (flags & MREMAP_MAYMOVE) {
 864         abi_ulong mmap_start;
 865
 866         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
 867
 868         if (mmap_start == -1) {
 869             errno = ENOMEM;
 870             host_addr = MAP_FAILED;
 871         } else {
 872             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 873                                flags | MREMAP_FIXED,
 874                                g2h_untagged(mmap_start));
 875             if (reserved_va) {
 876                 mmap_reserve_or_unmap(old_addr, old_size);
 877             }
 878         }
 879     } else {
 880         int prot = 0;
 881         if (reserved_va && old_size < new_size) {
 882             abi_ulong addr;
 883             for (addr = old_addr + old_size;
 884                  addr < old_addr + new_size;
 885                  addr++) {
 886                 prot |= page_get_flags(addr);
 887             }
 888         }
 889         if (prot == 0) {
 890             host_addr = mremap(g2h_untagged(old_addr),
 891                                old_size, new_size, flags);
 892
 893             if (host_addr != MAP_FAILED) {
 894                 /* Check if address fits target address space */
 895                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
 896                     /* Revert mremap() changes */
 897                     host_addr = mremap(g2h_untagged(old_addr),
 898                                        new_size, old_size, flags);
 899                     errno = ENOMEM;
 900                     host_addr = MAP_FAILED;
 901                 } else if (reserved_va && old_size > new_size) {
 902                     mmap_reserve_or_unmap(old_addr + old_size,
 903                                           old_size - new_size);
 904                 }
 905             }
 906         } else {
 907             errno = ENOMEM;
 908             host_addr = MAP_FAILED;
 909         }
 910     }
 911
 912     if (host_addr == MAP_FAILED) {
 913         new_addr = -1;
 914     } else {
 915         new_addr = h2g(host_addr);
 916         prot = page_get_flags(old_addr);
 917         page_set_flags(old_addr, old_addr + old_size - 1, 0);
 918         page_set_flags(new_addr, new_addr + new_size - 1,
 919                        prot | PAGE_VALID | PAGE_RESET);
 920     }
 921     mmap_unlock();
 922     return new_addr;
 923 }
 924
 925 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
 926 {
 927     abi_ulong len;
 928     int ret = 0;
 929
 930     if (start & ~TARGET_PAGE_MASK) {
 931         return -TARGET_EINVAL;
 932     }
 933     if (len_in == 0) {
 934         return 0;
 935     }
 936     len = TARGET_PAGE_ALIGN(len_in);
 937     if (len == 0 || !guest_range_valid_untagged(start, len)) {
 938         return -TARGET_EINVAL;
 939     }
 940
 941     /* Translate for some architectures which have different MADV_xxx values */
 942     switch (advice) {
 943     case TARGET_MADV_DONTNEED:      /* alpha */
 944         advice = MADV_DONTNEED;
 945         break;
 946     case TARGET_MADV_WIPEONFORK:    /* parisc */
 947         advice = MADV_WIPEONFORK;
 948         break;
 949     case TARGET_MADV_KEEPONFORK:    /* parisc */
 950         advice = MADV_KEEPONFORK;
 951         break;
 952     /* we do not care about the other MADV_xxx values yet */
 953     }
 954
 955     /*
 956      * Most advice values are hints, so ignoring and returning success is ok.
 957      *
 958      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
 959      * MADV_KEEPONFORK are not hints and need to be emulated.
 960      *
 961      * A straight passthrough for those may not be safe because qemu sometimes
 962      * turns private file-backed mappings into anonymous mappings.
 963      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
 964      * same semantics for the host as for the guest.
 965      *
 966      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
 967      * return failure if not.
 968      *
 969      * MADV_DONTNEED is passed through as well, if possible.
 970      * If passthrough isn't possible, we nevertheless (wrongly!) return
 971      * success, which is broken but some userspace programs fail to work
 972      * otherwise. Completely implementing such emulation is quite complicated
 973      * though.
 974      */
 975     mmap_lock();
 976     switch (advice) {
 977     case MADV_WIPEONFORK:
 978     case MADV_KEEPONFORK:
 979         ret = -EINVAL;
 980         /* fall through */
 981     case MADV_DONTNEED:
 982         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
 983             ret = get_errno(madvise(g2h_untagged(start), len, advice));
 984             if ((advice == MADV_DONTNEED) && (ret == 0)) {
 985                 page_reset_target_data(start, start + len - 1);
 986             }
 987         }
 988     }
 989     mmap_unlock();
 990
 991     return ret;
 992 }
 993
 994 #ifndef TARGET_FORCE_SHMLBA
 995 /*
 996  * For most architectures, SHMLBA is the same as the page size;
 997  * some architectures have larger values, in which case they should
 998  * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
 999  * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
1000  * and defining its own value for SHMLBA.
1001  *
1002  * The kernel also permits SHMLBA to be set by the architecture to a
1003  * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
1004  * this means that addresses are rounded to the large size if
1005  * SHM_RND is set but addresses not aligned to that size are not rejected
1006  * as long as they are at least page-aligned. Since the only architecture
1007  * which uses this is ia64 this code doesn't provide for that oddity.
1008  */
1009 static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
1010 {
1011     return TARGET_PAGE_SIZE;
1012 }
1013 #endif
1014
1015 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
1016                        abi_ulong shmaddr, int shmflg)
1017 {
1018     CPUState *cpu = env_cpu(cpu_env);
1019     abi_ulong raddr;
1020     void *host_raddr;
1021     struct shmid_ds shm_info;
1022     int i, ret;
1023     abi_ulong shmlba;
1024
1025     /* shmat pointers are always untagged */
1026
1027     /* find out the length of the shared memory segment */
1028     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
1029     if (is_error(ret)) {
1030         /* can't get length, bail out */
1031         return ret;
1032     }
1033
1034     shmlba = target_shmlba(cpu_env);
1035
1036     if (shmaddr & (shmlba - 1)) {
1037         if (shmflg & SHM_RND) {
1038             shmaddr &= ~(shmlba - 1);
1039         } else {
1040             return -TARGET_EINVAL;
1041         }
1042     }
1043     if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
1044         return -TARGET_EINVAL;
1045     }
1046
1047     mmap_lock();
1048
1049     /*
1050      * We're mapping shared memory, so ensure we generate code for parallel
1051      * execution and flush old translations.  This will work up to the level
1052      * supported by the host -- anything that requires EXCP_ATOMIC will not
1053      * be atomic with respect to an external process.
1054      */
1055     if (!(cpu->tcg_cflags & CF_PARALLEL)) {
1056         cpu->tcg_cflags |= CF_PARALLEL;
1057         tb_flush(cpu);
1058     }
1059
1060     if (shmaddr) {
1061         host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
1062     } else {
1063         abi_ulong mmap_start;
1064
1065         /* In order to use the host shmat, we need to honor host SHMLBA.  */
1066         mmap_start = mmap_find_vma(0, shm_info.shm_segsz, MAX(SHMLBA, shmlba));
1067
1068         if (mmap_start == -1) {
1069             errno = ENOMEM;
1070             host_raddr = (void *)-1;
1071         } else {
1072             host_raddr = shmat(shmid, g2h_untagged(mmap_start),
1073                                shmflg | SHM_REMAP);
1074         }
1075     }
1076
1077     if (host_raddr == (void *)-1) {
1078         mmap_unlock();
1079         return get_errno((intptr_t)host_raddr);
1080     }
1081     raddr = h2g((uintptr_t)host_raddr);
1082
1083     page_set_flags(raddr, raddr + shm_info.shm_segsz - 1,
1084                    PAGE_VALID | PAGE_RESET | PAGE_READ |
1085                    (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
1086
1087     for (i = 0; i < N_SHM_REGIONS; i++) {
1088         if (!shm_regions[i].in_use) {
1089             shm_regions[i].in_use = true;
1090             shm_regions[i].start = raddr;
1091             shm_regions[i].size = shm_info.shm_segsz;
1092             break;
1093         }
1094     }
1095
1096     mmap_unlock();
1097     return raddr;
1098 }
1099
1100 abi_long target_shmdt(abi_ulong shmaddr)
1101 {
1102     int i;
1103     abi_long rv;
1104
1105     /* shmdt pointers are always untagged */
1106
1107     mmap_lock();
1108
1109     for (i = 0; i < N_SHM_REGIONS; ++i) {
1110         if (shm_regions[i].in_use && shm_regions[i].start == shmaddr) {
1111             shm_regions[i].in_use = false;
1112             page_set_flags(shmaddr, shmaddr + shm_regions[i].size - 1, 0);
1113             break;
1114         }
1115     }
1116     rv = get_errno(shmdt(g2h_untagged(shmaddr)));
1117
1118     mmap_unlock();
1119
1120     return rv;
1121 }