linux-user/mmap.c

   1 /*
   2  *  mmap support for qemu
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  *  This program is free software; you can redistribute it and/or modify
   7  *  it under the terms of the GNU General Public License as published by
   8  *  the Free Software Foundation; either version 2 of the License, or
   9  *  (at your option) any later version.
  10  *
  11  *  This program is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  *  GNU General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU General Public License
  17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include <sys/shm.h>
  21 #include "trace.h"
  22 #include "exec/log.h"
  23 #include "qemu.h"
  24 #include "user-internals.h"
  25 #include "user-mmap.h"
  26 #include "target_mman.h"
  27 #include "qemu/interval-tree.h"
  28
  29 #ifdef TARGET_ARM
  30 #include "target/arm/cpu-features.h"
  31 #endif
  32
  33 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  34 static __thread int mmap_lock_count;
  35
  36 void mmap_lock(void)
  37 {
  38     if (mmap_lock_count++ == 0) {
  39         pthread_mutex_lock(&mmap_mutex);
  40     }
  41 }
  42
  43 void mmap_unlock(void)
  44 {
  45     assert(mmap_lock_count > 0);
  46     if (--mmap_lock_count == 0) {
  47         pthread_mutex_unlock(&mmap_mutex);
  48     }
  49 }
  50
  51 bool have_mmap_lock(void)
  52 {
  53     return mmap_lock_count > 0 ? true : false;
  54 }
  55
  56 /* Grab lock to make sure things are in a consistent state after fork().  */
  57 void mmap_fork_start(void)
  58 {
  59     if (mmap_lock_count)
  60         abort();
  61     pthread_mutex_lock(&mmap_mutex);
  62 }
  63
  64 void mmap_fork_end(int child)
  65 {
  66     if (child) {
  67         pthread_mutex_init(&mmap_mutex, NULL);
  68     } else {
  69         pthread_mutex_unlock(&mmap_mutex);
  70     }
  71 }
  72
  73 /* Protected by mmap_lock. */
  74 static IntervalTreeRoot shm_regions;
  75
  76 static void shm_region_add(abi_ptr start, abi_ptr last)
  77 {
  78     IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
  79
  80     i->start = start;
  81     i->last = last;
  82     interval_tree_insert(i, &shm_regions);
  83 }
  84
  85 static abi_ptr shm_region_find(abi_ptr start)
  86 {
  87     IntervalTreeNode *i;
  88
  89     for (i = interval_tree_iter_first(&shm_regions, start, start); i;
  90          i = interval_tree_iter_next(i, start, start)) {
  91         if (i->start == start) {
  92             return i->last;
  93         }
  94     }
  95     return 0;
  96 }
  97
  98 static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
  99 {
 100     IntervalTreeNode *i, *n;
 101
 102     for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
 103         n = interval_tree_iter_next(i, start, last);
 104         if (i->start >= start && i->last <= last) {
 105             interval_tree_remove(i, &shm_regions);
 106             g_free(i);
 107         }
 108     }
 109 }
 110
 111 /*
 112  * Validate target prot bitmask.
 113  * Return the prot bitmask for the host in *HOST_PROT.
 114  * Return 0 if the target prot bitmask is invalid, otherwise
 115  * the internal qemu page_flags (which will include PAGE_VALID).
 116  */
 117 static int validate_prot_to_pageflags(int prot)
 118 {
 119     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
 120     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
 121
 122 #ifdef TARGET_AARCH64
 123     {
 124         ARMCPU *cpu = ARM_CPU(thread_cpu);
 125
 126         /*
 127          * The PROT_BTI bit is only accepted if the cpu supports the feature.
 128          * Since this is the unusual case, don't bother checking unless
 129          * the bit has been requested.  If set and valid, record the bit
 130          * within QEMU's page_flags.
 131          */
 132         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
 133             valid |= TARGET_PROT_BTI;
 134             page_flags |= PAGE_BTI;
 135         }
 136         /* Similarly for the PROT_MTE bit. */
 137         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
 138             valid |= TARGET_PROT_MTE;
 139             page_flags |= PAGE_MTE;
 140         }
 141     }
 142 #elif defined(TARGET_HPPA)
 143     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
 144 #endif
 145
 146     return prot & ~valid ? 0 : page_flags;
 147 }
 148
 149 /*
 150  * For the host, we need not pass anything except read/write/exec.
 151  * While PROT_SEM is allowed by all hosts, it is also ignored, so
 152  * don't bother transforming guest bit to host bit.  Any other
 153  * target-specific prot bits will not be understood by the host
 154  * and will need to be encoded into page_flags for qemu emulation.
 155  *
 156  * Pages that are executable by the guest will never be executed
 157  * by the host, but the host will need to be able to read them.
 158  */
 159 static int target_to_host_prot(int prot)
 160 {
 161     return (prot & (PROT_READ | PROT_WRITE)) |
 162            (prot & PROT_EXEC ? PROT_READ : 0);
 163 }
 164
 165 /* NOTE: all the constants are the HOST ones, but addresses are target. */
 166 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
 167 {
 168     abi_ulong starts[3];
 169     abi_ulong lens[3];
 170     int prots[3];
 171     abi_ulong host_start, host_last, last;
 172     int prot1, ret, page_flags, nranges;
 173
 174     trace_target_mprotect(start, len, target_prot);
 175
 176     if ((start & ~TARGET_PAGE_MASK) != 0) {
 177         return -TARGET_EINVAL;
 178     }
 179     page_flags = validate_prot_to_pageflags(target_prot);
 180     if (!page_flags) {
 181         return -TARGET_EINVAL;
 182     }
 183     if (len == 0) {
 184         return 0;
 185     }
 186     len = TARGET_PAGE_ALIGN(len);
 187     if (!guest_range_valid_untagged(start, len)) {
 188         return -TARGET_ENOMEM;
 189     }
 190
 191     last = start + len - 1;
 192     host_start = start & qemu_host_page_mask;
 193     host_last = HOST_PAGE_ALIGN(last) - 1;
 194     nranges = 0;
 195
 196     mmap_lock();
 197
 198     if (host_last - host_start < qemu_host_page_size) {
 199         /* Single host page contains all guest pages: sum the prot. */
 200         prot1 = target_prot;
 201         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 202             prot1 |= page_get_flags(a);
 203         }
 204         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 205             prot1 |= page_get_flags(a + 1);
 206         }
 207         starts[nranges] = host_start;
 208         lens[nranges] = qemu_host_page_size;
 209         prots[nranges] = prot1;
 210         nranges++;
 211     } else {
 212         if (host_start < start) {
 213             /* Host page contains more than one guest page: sum the prot. */
 214             prot1 = target_prot;
 215             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 216                 prot1 |= page_get_flags(a);
 217             }
 218             /* If the resulting sum differs, create a new range. */
 219             if (prot1 != target_prot) {
 220                 starts[nranges] = host_start;
 221                 lens[nranges] = qemu_host_page_size;
 222                 prots[nranges] = prot1;
 223                 nranges++;
 224                 host_start += qemu_host_page_size;
 225             }
 226         }
 227
 228         if (last < host_last) {
 229             /* Host page contains more than one guest page: sum the prot. */
 230             prot1 = target_prot;
 231             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 232                 prot1 |= page_get_flags(a + 1);
 233             }
 234             /* If the resulting sum differs, create a new range. */
 235             if (prot1 != target_prot) {
 236                 host_last -= qemu_host_page_size;
 237                 starts[nranges] = host_last + 1;
 238                 lens[nranges] = qemu_host_page_size;
 239                 prots[nranges] = prot1;
 240                 nranges++;
 241             }
 242         }
 243
 244         /* Create a range for the middle, if any remains. */
 245         if (host_start < host_last) {
 246             starts[nranges] = host_start;
 247             lens[nranges] = host_last - host_start + 1;
 248             prots[nranges] = target_prot;
 249             nranges++;
 250         }
 251     }
 252
 253     for (int i = 0; i < nranges; ++i) {
 254         ret = mprotect(g2h_untagged(starts[i]), lens[i],
 255                        target_to_host_prot(prots[i]));
 256         if (ret != 0) {
 257             goto error;
 258         }
 259     }
 260
 261     page_set_flags(start, last, page_flags);
 262     ret = 0;
 263
 264  error:
 265     mmap_unlock();
 266     return ret;
 267 }
 268
 269 /* map an incomplete host page */
 270 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
 271                       int prot, int flags, int fd, off_t offset)
 272 {
 273     abi_ulong real_last;
 274     void *host_start;
 275     int prot_old, prot_new;
 276     int host_prot_old, host_prot_new;
 277
 278     if (!(flags & MAP_ANONYMOUS)
 279         && (flags & MAP_TYPE) == MAP_SHARED
 280         && (prot & PROT_WRITE)) {
 281         /*
 282          * msync() won't work with the partial page, so we return an
 283          * error if write is possible while it is a shared mapping.
 284          */
 285         errno = EINVAL;
 286         return false;
 287     }
 288
 289     real_last = real_start + qemu_host_page_size - 1;
 290     host_start = g2h_untagged(real_start);
 291
 292     /* Get the protection of the target pages outside the mapping. */
 293     prot_old = 0;
 294     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 295         prot_old |= page_get_flags(a);
 296     }
 297     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
 298         prot_old |= page_get_flags(a);
 299     }
 300
 301     if (prot_old == 0) {
 302         /*
 303          * Since !(prot_old & PAGE_VALID), there were no guest pages
 304          * outside of the fragment we need to map.  Allocate a new host
 305          * page to cover, discarding whatever else may have been present.
 306          */
 307         void *p = mmap(host_start, qemu_host_page_size,
 308                        target_to_host_prot(prot),
 309                        flags | MAP_ANONYMOUS, -1, 0);
 310         if (p != host_start) {
 311             if (p != MAP_FAILED) {
 312                 munmap(p, qemu_host_page_size);
 313                 errno = EEXIST;
 314             }
 315             return false;
 316         }
 317         prot_old = prot;
 318     }
 319     prot_new = prot | prot_old;
 320
 321     host_prot_old = target_to_host_prot(prot_old);
 322     host_prot_new = target_to_host_prot(prot_new);
 323
 324     /* Adjust protection to be able to write. */
 325     if (!(host_prot_old & PROT_WRITE)) {
 326         host_prot_old |= PROT_WRITE;
 327         mprotect(host_start, qemu_host_page_size, host_prot_old);
 328     }
 329
 330     /* Read or zero the new guest pages. */
 331     if (flags & MAP_ANONYMOUS) {
 332         memset(g2h_untagged(start), 0, last - start + 1);
 333     } else {
 334         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
 335             return false;
 336         }
 337     }
 338
 339     /* Put final protection */
 340     if (host_prot_new != host_prot_old) {
 341         mprotect(host_start, qemu_host_page_size, host_prot_new);
 342     }
 343     return true;
 344 }
 345
 346 abi_ulong task_unmapped_base;
 347 abi_ulong elf_et_dyn_base;
 348 abi_ulong mmap_next_start;
 349
 350 /*
 351  * Subroutine of mmap_find_vma, used when we have pre-allocated
 352  * a chunk of guest address space.
 353  */
 354 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
 355                                         abi_ulong align)
 356 {
 357     target_ulong ret;
 358
 359     ret = page_find_range_empty(start, reserved_va, size, align);
 360     if (ret == -1 && start > mmap_min_addr) {
 361         /* Restart at the beginning of the address space. */
 362         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
 363     }
 364
 365     return ret;
 366 }
 367
 368 /*
 369  * Find and reserve a free memory area of size 'size'. The search
 370  * starts at 'start'.
 371  * It must be called with mmap_lock() held.
 372  * Return -1 if error.
 373  */
 374 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 375 {
 376     void *ptr, *prev;
 377     abi_ulong addr;
 378     int wrapped, repeat;
 379
 380     align = MAX(align, qemu_host_page_size);
 381
 382     /* If 'start' == 0, then a default start address is used. */
 383     if (start == 0) {
 384         start = mmap_next_start;
 385     } else {
 386         start &= qemu_host_page_mask;
 387     }
 388     start = ROUND_UP(start, align);
 389
 390     size = HOST_PAGE_ALIGN(size);
 391
 392     if (reserved_va) {
 393         return mmap_find_vma_reserved(start, size, align);
 394     }
 395
 396     addr = start;
 397     wrapped = repeat = 0;
 398     prev = 0;
 399
 400     for (;; prev = ptr) {
 401         /*
 402          * Reserve needed memory area to avoid a race.
 403          * It should be discarded using:
 404          *  - mmap() with MAP_FIXED flag
 405          *  - mremap() with MREMAP_FIXED flag
 406          *  - shmat() with SHM_REMAP flag
 407          */
 408         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
 409                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 410
 411         /* ENOMEM, if host address space has no memory */
 412         if (ptr == MAP_FAILED) {
 413             return (abi_ulong)-1;
 414         }
 415
 416         /*
 417          * Count the number of sequential returns of the same address.
 418          * This is used to modify the search algorithm below.
 419          */
 420         repeat = (ptr == prev ? repeat + 1 : 0);
 421
 422         if (h2g_valid(ptr + size - 1)) {
 423             addr = h2g(ptr);
 424
 425             if ((addr & (align - 1)) == 0) {
 426                 /* Success.  */
 427                 if (start == mmap_next_start && addr >= task_unmapped_base) {
 428                     mmap_next_start = addr + size;
 429                 }
 430                 return addr;
 431             }
 432
 433             /* The address is not properly aligned for the target.  */
 434             switch (repeat) {
 435             case 0:
 436                 /*
 437                  * Assume the result that the kernel gave us is the
 438                  * first with enough free space, so start again at the
 439                  * next higher target page.
 440                  */
 441                 addr = ROUND_UP(addr, align);
 442                 break;
 443             case 1:
 444                 /*
 445                  * Sometimes the kernel decides to perform the allocation
 446                  * at the top end of memory instead.
 447                  */
 448                 addr &= -align;
 449                 break;
 450             case 2:
 451                 /* Start over at low memory.  */
 452                 addr = 0;
 453                 break;
 454             default:
 455                 /* Fail.  This unaligned block must the last.  */
 456                 addr = -1;
 457                 break;
 458             }
 459         } else {
 460             /*
 461              * Since the result the kernel gave didn't fit, start
 462              * again at low memory.  If any repetition, fail.
 463              */
 464             addr = (repeat ? -1 : 0);
 465         }
 466
 467         /* Unmap and try again.  */
 468         munmap(ptr, size);
 469
 470         /* ENOMEM if we checked the whole of the target address space.  */
 471         if (addr == (abi_ulong)-1) {
 472             return (abi_ulong)-1;
 473         } else if (addr == 0) {
 474             if (wrapped) {
 475                 return (abi_ulong)-1;
 476             }
 477             wrapped = 1;
 478             /*
 479              * Don't actually use 0 when wrapping, instead indicate
 480              * that we'd truly like an allocation in low memory.
 481              */
 482             addr = (mmap_min_addr > TARGET_PAGE_SIZE
 483                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
 484                      : TARGET_PAGE_SIZE);
 485         } else if (wrapped && addr >= start) {
 486             return (abi_ulong)-1;
 487         }
 488     }
 489 }
 490
 491 /* NOTE: all the constants are the HOST ones */
 492 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
 493                      int flags, int fd, off_t offset)
 494 {
 495     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
 496     abi_ulong passthrough_start = -1, passthrough_last = 0;
 497     int page_flags;
 498     off_t host_offset;
 499
 500     mmap_lock();
 501     trace_target_mmap(start, len, target_prot, flags, fd, offset);
 502
 503     if (!len) {
 504         errno = EINVAL;
 505         goto fail;
 506     }
 507
 508     page_flags = validate_prot_to_pageflags(target_prot);
 509     if (!page_flags) {
 510         errno = EINVAL;
 511         goto fail;
 512     }
 513
 514     /* Also check for overflows... */
 515     len = TARGET_PAGE_ALIGN(len);
 516     if (!len) {
 517         errno = ENOMEM;
 518         goto fail;
 519     }
 520
 521     if (offset & ~TARGET_PAGE_MASK) {
 522         errno = EINVAL;
 523         goto fail;
 524     }
 525
 526     /*
 527      * If we're mapping shared memory, ensure we generate code for parallel
 528      * execution and flush old translations.  This will work up to the level
 529      * supported by the host -- anything that requires EXCP_ATOMIC will not
 530      * be atomic with respect to an external process.
 531      */
 532     if (flags & MAP_SHARED) {
 533         CPUState *cpu = thread_cpu;
 534         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
 535             cpu->tcg_cflags |= CF_PARALLEL;
 536             tb_flush(cpu);
 537         }
 538     }
 539
 540     real_start = start & qemu_host_page_mask;
 541     host_offset = offset & qemu_host_page_mask;
 542
 543     /*
 544      * If the user is asking for the kernel to find a location, do that
 545      * before we truncate the length for mapping files below.
 546      */
 547     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 548         host_len = len + offset - host_offset;
 549         host_len = HOST_PAGE_ALIGN(host_len);
 550         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
 551         if (start == (abi_ulong)-1) {
 552             errno = ENOMEM;
 553             goto fail;
 554         }
 555     }
 556
 557     /*
 558      * When mapping files into a memory area larger than the file, accesses
 559      * to pages beyond the file size will cause a SIGBUS.
 560      *
 561      * For example, if mmaping a file of 100 bytes on a host with 4K pages
 562      * emulating a target with 8K pages, the target expects to be able to
 563      * access the first 8K. But the host will trap us on any access beyond
 564      * 4K.
 565      *
 566      * When emulating a target with a larger page-size than the hosts, we
 567      * may need to truncate file maps at EOF and add extra anonymous pages
 568      * up to the targets page boundary.
 569      */
 570     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
 571         !(flags & MAP_ANONYMOUS)) {
 572         struct stat sb;
 573
 574         if (fstat(fd, &sb) == -1) {
 575             goto fail;
 576         }
 577
 578         /* Are we trying to create a map beyond EOF?.  */
 579         if (offset + len > sb.st_size) {
 580             /*
 581              * If so, truncate the file map at eof aligned with
 582              * the hosts real pagesize. Additional anonymous maps
 583              * will be created beyond EOF.
 584              */
 585             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
 586         }
 587     }
 588
 589     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 590         uintptr_t host_start;
 591         int host_prot;
 592         void *p;
 593
 594         host_len = len + offset - host_offset;
 595         host_len = HOST_PAGE_ALIGN(host_len);
 596         host_prot = target_to_host_prot(target_prot);
 597
 598         /*
 599          * Note: we prefer to control the mapping address. It is
 600          * especially important if qemu_host_page_size >
 601          * qemu_real_host_page_size.
 602          */
 603         p = mmap(g2h_untagged(start), host_len, host_prot,
 604                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
 605         if (p == MAP_FAILED) {
 606             goto fail;
 607         }
 608         /* update start so that it points to the file position at 'offset' */
 609         host_start = (uintptr_t)p;
 610         if (!(flags & MAP_ANONYMOUS)) {
 611             p = mmap(g2h_untagged(start), len, host_prot,
 612                      flags | MAP_FIXED, fd, host_offset);
 613             if (p == MAP_FAILED) {
 614                 munmap(g2h_untagged(start), host_len);
 615                 goto fail;
 616             }
 617             host_start += offset - host_offset;
 618         }
 619         start = h2g(host_start);
 620         last = start + len - 1;
 621         passthrough_start = start;
 622         passthrough_last = last;
 623     } else {
 624         if (start & ~TARGET_PAGE_MASK) {
 625             errno = EINVAL;
 626             goto fail;
 627         }
 628         last = start + len - 1;
 629         real_last = HOST_PAGE_ALIGN(last) - 1;
 630
 631         /*
 632          * Test if requested memory area fits target address space
 633          * It can fail only on 64-bit host with 32-bit target.
 634          * On any other target/host host mmap() handles this error correctly.
 635          */
 636         if (last < start || !guest_range_valid_untagged(start, len)) {
 637             errno = ENOMEM;
 638             goto fail;
 639         }
 640
 641         if (flags & MAP_FIXED_NOREPLACE) {
 642             /* Validate that the chosen range is empty. */
 643             if (!page_check_range_empty(start, last)) {
 644                 errno = EEXIST;
 645                 goto fail;
 646             }
 647
 648             /*
 649              * With reserved_va, the entire address space is mmaped in the
 650              * host to ensure it isn't accidentally used for something else.
 651              * We have just checked that the guest address is not mapped
 652              * within the guest, but need to replace the host reservation.
 653              *
 654              * Without reserved_va, despite the guest address check above,
 655              * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
 656              * any host address mappings.
 657              */
 658             if (reserved_va) {
 659                 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
 660             }
 661         }
 662
 663         /*
 664          * worst case: we cannot map the file because the offset is not
 665          * aligned, so we read it
 666          */
 667         if (!(flags & MAP_ANONYMOUS) &&
 668             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
 669             /*
 670              * msync() won't work here, so we return an error if write is
 671              * possible while it is a shared mapping
 672              */
 673             if ((flags & MAP_TYPE) == MAP_SHARED
 674                 && (target_prot & PROT_WRITE)) {
 675                 errno = EINVAL;
 676                 goto fail;
 677             }
 678             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
 679                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
 680                                   | MAP_PRIVATE | MAP_ANONYMOUS,
 681                                   -1, 0);
 682             if (retaddr == -1) {
 683                 goto fail;
 684             }
 685             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
 686                 goto fail;
 687             }
 688             if (!(target_prot & PROT_WRITE)) {
 689                 ret = target_mprotect(start, len, target_prot);
 690                 assert(ret == 0);
 691             }
 692             goto the_end;
 693         }
 694
 695         /* handle the start of the mapping */
 696         if (start > real_start) {
 697             if (real_last == real_start + qemu_host_page_size - 1) {
 698                 /* one single host page */
 699                 if (!mmap_frag(real_start, start, last,
 700                                target_prot, flags, fd, offset)) {
 701                     goto fail;
 702                 }
 703                 goto the_end1;
 704             }
 705             if (!mmap_frag(real_start, start,
 706                            real_start + qemu_host_page_size - 1,
 707                            target_prot, flags, fd, offset)) {
 708                 goto fail;
 709             }
 710             real_start += qemu_host_page_size;
 711         }
 712         /* handle the end of the mapping */
 713         if (last < real_last) {
 714             abi_ulong real_page = real_last - qemu_host_page_size + 1;
 715             if (!mmap_frag(real_page, real_page, last,
 716                            target_prot, flags, fd,
 717                            offset + real_page - start)) {
 718                 goto fail;
 719             }
 720             real_last -= qemu_host_page_size;
 721         }
 722
 723         /* map the middle (easier) */
 724         if (real_start < real_last) {
 725             void *p, *want_p;
 726             off_t offset1;
 727             size_t len1;
 728
 729             if (flags & MAP_ANONYMOUS) {
 730                 offset1 = 0;
 731             } else {
 732                 offset1 = offset + real_start - start;
 733             }
 734             len1 = real_last - real_start + 1;
 735             want_p = g2h_untagged(real_start);
 736
 737             p = mmap(want_p, len1, target_to_host_prot(target_prot),
 738                      flags, fd, offset1);
 739             if (p != want_p) {
 740                 if (p != MAP_FAILED) {
 741                     munmap(p, len1);
 742                     errno = EEXIST;
 743                 }
 744                 goto fail;
 745             }
 746             passthrough_start = real_start;
 747             passthrough_last = real_last;
 748         }
 749     }
 750  the_end1:
 751     if (flags & MAP_ANONYMOUS) {
 752         page_flags |= PAGE_ANON;
 753     }
 754     page_flags |= PAGE_RESET;
 755     if (passthrough_start > passthrough_last) {
 756         page_set_flags(start, last, page_flags);
 757     } else {
 758         if (start < passthrough_start) {
 759             page_set_flags(start, passthrough_start - 1, page_flags);
 760         }
 761         page_set_flags(passthrough_start, passthrough_last,
 762                        page_flags | PAGE_PASSTHROUGH);
 763         if (passthrough_last < last) {
 764             page_set_flags(passthrough_last + 1, last, page_flags);
 765         }
 766     }
 767     shm_region_rm_complete(start, last);
 768  the_end:
 769     trace_target_mmap_complete(start);
 770     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
 771         FILE *f = qemu_log_trylock();
 772         if (f) {
 773             fprintf(f, "page layout changed following mmap\n");
 774             page_dump(f);
 775             qemu_log_unlock(f);
 776         }
 777     }
 778     mmap_unlock();
 779     return start;
 780 fail:
 781     mmap_unlock();
 782     return -1;
 783 }
 784
 785 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
 786 {
 787     abi_ulong real_start;
 788     abi_ulong real_last;
 789     abi_ulong real_len;
 790     abi_ulong last;
 791     abi_ulong a;
 792     void *host_start;
 793     int prot;
 794
 795     last = start + len - 1;
 796     real_start = start & qemu_host_page_mask;
 797     real_last = HOST_PAGE_ALIGN(last) - 1;
 798
 799     /*
 800      * If guest pages remain on the first or last host pages,
 801      * adjust the deallocation to retain those guest pages.
 802      * The single page special case is required for the last page,
 803      * lest real_start overflow to zero.
 804      */
 805     if (real_last - real_start < qemu_host_page_size) {
 806         prot = 0;
 807         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 808             prot |= page_get_flags(a);
 809         }
 810         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 811             prot |= page_get_flags(a + 1);
 812         }
 813         if (prot != 0) {
 814             return 0;
 815         }
 816     } else {
 817         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 818             prot |= page_get_flags(a);
 819         }
 820         if (prot != 0) {
 821             real_start += qemu_host_page_size;
 822         }
 823
 824         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 825             prot |= page_get_flags(a + 1);
 826         }
 827         if (prot != 0) {
 828             real_last -= qemu_host_page_size;
 829         }
 830
 831         if (real_last < real_start) {
 832             return 0;
 833         }
 834     }
 835
 836     real_len = real_last - real_start + 1;
 837     host_start = g2h_untagged(real_start);
 838
 839     if (reserved_va) {
 840         void *ptr = mmap(host_start, real_len, PROT_NONE,
 841                          MAP_FIXED | MAP_ANONYMOUS
 842                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 843         return ptr == host_start ? 0 : -1;
 844     }
 845     return munmap(host_start, real_len);
 846 }
 847
 848 int target_munmap(abi_ulong start, abi_ulong len)
 849 {
 850     int ret;
 851
 852     trace_target_munmap(start, len);
 853
 854     if (start & ~TARGET_PAGE_MASK) {
 855         errno = EINVAL;
 856         return -1;
 857     }
 858     len = TARGET_PAGE_ALIGN(len);
 859     if (len == 0 || !guest_range_valid_untagged(start, len)) {
 860         errno = EINVAL;
 861         return -1;
 862     }
 863
 864     mmap_lock();
 865     ret = mmap_reserve_or_unmap(start, len);
 866     if (likely(ret == 0)) {
 867         page_set_flags(start, start + len - 1, 0);
 868         shm_region_rm_complete(start, start + len - 1);
 869     }
 870     mmap_unlock();
 871
 872     return ret;
 873 }
 874
 875 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
 876                        abi_ulong new_size, unsigned long flags,
 877                        abi_ulong new_addr)
 878 {
 879     int prot;
 880     void *host_addr;
 881
 882     if (!guest_range_valid_untagged(old_addr, old_size) ||
 883         ((flags & MREMAP_FIXED) &&
 884          !guest_range_valid_untagged(new_addr, new_size)) ||
 885         ((flags & MREMAP_MAYMOVE) == 0 &&
 886          !guest_range_valid_untagged(old_addr, new_size))) {
 887         errno = ENOMEM;
 888         return -1;
 889     }
 890
 891     mmap_lock();
 892
 893     if (flags & MREMAP_FIXED) {
 894         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 895                            flags, g2h_untagged(new_addr));
 896
 897         if (reserved_va && host_addr != MAP_FAILED) {
 898             /*
 899              * If new and old addresses overlap then the above mremap will
 900              * already have failed with EINVAL.
 901              */
 902             mmap_reserve_or_unmap(old_addr, old_size);
 903         }
 904     } else if (flags & MREMAP_MAYMOVE) {
 905         abi_ulong mmap_start;
 906
 907         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
 908
 909         if (mmap_start == -1) {
 910             errno = ENOMEM;
 911             host_addr = MAP_FAILED;
 912         } else {
 913             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 914                                flags | MREMAP_FIXED,
 915                                g2h_untagged(mmap_start));
 916             if (reserved_va) {
 917                 mmap_reserve_or_unmap(old_addr, old_size);
 918             }
 919         }
 920     } else {
 921         int page_flags = 0;
 922         if (reserved_va && old_size < new_size) {
 923             abi_ulong addr;
 924             for (addr = old_addr + old_size;
 925                  addr < old_addr + new_size;
 926                  addr++) {
 927                 page_flags |= page_get_flags(addr);
 928             }
 929         }
 930         if (page_flags == 0) {
 931             host_addr = mremap(g2h_untagged(old_addr),
 932                                old_size, new_size, flags);
 933
 934             if (host_addr != MAP_FAILED) {
 935                 /* Check if address fits target address space */
 936                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
 937                     /* Revert mremap() changes */
 938                     host_addr = mremap(g2h_untagged(old_addr),
 939                                        new_size, old_size, flags);
 940                     errno = ENOMEM;
 941                     host_addr = MAP_FAILED;
 942                 } else if (reserved_va && old_size > new_size) {
 943                     mmap_reserve_or_unmap(old_addr + old_size,
 944                                           old_size - new_size);
 945                 }
 946             }
 947         } else {
 948             errno = ENOMEM;
 949             host_addr = MAP_FAILED;
 950         }
 951     }
 952
 953     if (host_addr == MAP_FAILED) {
 954         new_addr = -1;
 955     } else {
 956         new_addr = h2g(host_addr);
 957         prot = page_get_flags(old_addr);
 958         page_set_flags(old_addr, old_addr + old_size - 1, 0);
 959         shm_region_rm_complete(old_addr, old_addr + old_size - 1);
 960         page_set_flags(new_addr, new_addr + new_size - 1,
 961                        prot | PAGE_VALID | PAGE_RESET);
 962         shm_region_rm_complete(new_addr, new_addr + new_size - 1);
 963     }
 964     mmap_unlock();
 965     return new_addr;
 966 }
 967
 968 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
 969 {
 970     abi_ulong len;
 971     int ret = 0;
 972
 973     if (start & ~TARGET_PAGE_MASK) {
 974         return -TARGET_EINVAL;
 975     }
 976     if (len_in == 0) {
 977         return 0;
 978     }
 979     len = TARGET_PAGE_ALIGN(len_in);
 980     if (len == 0 || !guest_range_valid_untagged(start, len)) {
 981         return -TARGET_EINVAL;
 982     }
 983
 984     /* Translate for some architectures which have different MADV_xxx values */
 985     switch (advice) {
 986     case TARGET_MADV_DONTNEED:      /* alpha */
 987         advice = MADV_DONTNEED;
 988         break;
 989     case TARGET_MADV_WIPEONFORK:    /* parisc */
 990         advice = MADV_WIPEONFORK;
 991         break;
 992     case TARGET_MADV_KEEPONFORK:    /* parisc */
 993         advice = MADV_KEEPONFORK;
 994         break;
 995     /* we do not care about the other MADV_xxx values yet */
 996     }
 997
 998     /*
 999      * Most advice values are hints, so ignoring and returning success is ok.
1000      *
1001      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
1002      * MADV_KEEPONFORK are not hints and need to be emulated.
1003      *
1004      * A straight passthrough for those may not be safe because qemu sometimes
1005      * turns private file-backed mappings into anonymous mappings.
1006      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
1007      * same semantics for the host as for the guest.
1008      *
1009      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
1010      * return failure if not.
1011      *
1012      * MADV_DONTNEED is passed through as well, if possible.
1013      * If passthrough isn't possible, we nevertheless (wrongly!) return
1014      * success, which is broken but some userspace programs fail to work
1015      * otherwise. Completely implementing such emulation is quite complicated
1016      * though.
1017      */
1018     mmap_lock();
1019     switch (advice) {
1020     case MADV_WIPEONFORK:
1021     case MADV_KEEPONFORK:
1022         ret = -EINVAL;
1023         /* fall through */
1024     case MADV_DONTNEED:
1025         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
1026             ret = get_errno(madvise(g2h_untagged(start), len, advice));
1027             if ((advice == MADV_DONTNEED) && (ret == 0)) {
1028                 page_reset_target_data(start, start + len - 1);
1029             }
1030         }
1031     }
1032     mmap_unlock();
1033
1034     return ret;
1035 }
1036
1037 #ifndef TARGET_FORCE_SHMLBA
1038 /*
1039  * For most architectures, SHMLBA is the same as the page size;
1040  * some architectures have larger values, in which case they should
1041  * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
1042  * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
1043  * and defining its own value for SHMLBA.
1044  *
1045  * The kernel also permits SHMLBA to be set by the architecture to a
1046  * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
1047  * this means that addresses are rounded to the large size if
1048  * SHM_RND is set but addresses not aligned to that size are not rejected
1049  * as long as they are at least page-aligned. Since the only architecture
1050  * which uses this is ia64 this code doesn't provide for that oddity.
1051  */
1052 static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
1053 {
1054     return TARGET_PAGE_SIZE;
1055 }
1056 #endif
1057
1058 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
1059                        abi_ulong shmaddr, int shmflg)
1060 {
1061     CPUState *cpu = env_cpu(cpu_env);
1062     abi_ulong raddr;
1063     struct shmid_ds shm_info;
1064     int ret;
1065     abi_ulong shmlba;
1066
1067     /* shmat pointers are always untagged */
1068
1069     /* find out the length of the shared memory segment */
1070     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
1071     if (is_error(ret)) {
1072         /* can't get length, bail out */
1073         return ret;
1074     }
1075
1076     shmlba = target_shmlba(cpu_env);
1077
1078     if (shmaddr & (shmlba - 1)) {
1079         if (shmflg & SHM_RND) {
1080             shmaddr &= ~(shmlba - 1);
1081         } else {
1082             return -TARGET_EINVAL;
1083         }
1084     }
1085     if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
1086         return -TARGET_EINVAL;
1087     }
1088
1089     WITH_MMAP_LOCK_GUARD() {
1090         void *host_raddr;
1091         abi_ulong last;
1092
1093         if (shmaddr) {
1094             host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
1095         } else {
1096             abi_ulong mmap_start;
1097
1098             /* In order to use the host shmat, we need to honor host SHMLBA.  */
1099             mmap_start = mmap_find_vma(0, shm_info.shm_segsz,
1100                                        MAX(SHMLBA, shmlba));
1101
1102             if (mmap_start == -1) {
1103                 return -TARGET_ENOMEM;
1104             }
1105             host_raddr = shmat(shmid, g2h_untagged(mmap_start),
1106                                shmflg | SHM_REMAP);
1107         }
1108
1109         if (host_raddr == (void *)-1) {
1110             return get_errno(-1);
1111         }
1112         raddr = h2g(host_raddr);
1113         last = raddr + shm_info.shm_segsz - 1;
1114
1115         page_set_flags(raddr, last,
1116                        PAGE_VALID | PAGE_RESET | PAGE_READ |
1117                        (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
1118
1119         shm_region_rm_complete(raddr, last);
1120         shm_region_add(raddr, last);
1121     }
1122
1123     /*
1124      * We're mapping shared memory, so ensure we generate code for parallel
1125      * execution and flush old translations.  This will work up to the level
1126      * supported by the host -- anything that requires EXCP_ATOMIC will not
1127      * be atomic with respect to an external process.
1128      */
1129     if (!(cpu->tcg_cflags & CF_PARALLEL)) {
1130         cpu->tcg_cflags |= CF_PARALLEL;
1131         tb_flush(cpu);
1132     }
1133
1134     return raddr;
1135 }
1136
1137 abi_long target_shmdt(abi_ulong shmaddr)
1138 {
1139     abi_long rv;
1140
1141     /* shmdt pointers are always untagged */
1142
1143     WITH_MMAP_LOCK_GUARD() {
1144         abi_ulong last = shm_region_find(shmaddr);
1145         if (last == 0) {
1146             return -TARGET_EINVAL;
1147         }
1148
1149         rv = get_errno(shmdt(g2h_untagged(shmaddr)));
1150         if (rv == 0) {
1151             abi_ulong size = last - shmaddr + 1;
1152
1153             page_set_flags(shmaddr, last, 0);
1154             shm_region_rm_complete(shmaddr, last);
1155             mmap_reserve_or_unmap(shmaddr, size);
1156         }
1157     }
1158     return rv;
1159 }