linux-user/mmap.c

   1 /*
   2  *  mmap support for qemu
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  *  This program is free software; you can redistribute it and/or modify
   7  *  it under the terms of the GNU General Public License as published by
   8  *  the Free Software Foundation; either version 2 of the License, or
   9  *  (at your option) any later version.
  10  *
  11  *  This program is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  *  GNU General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU General Public License
  17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include <sys/shm.h>
  21 #include "trace.h"
  22 #include "exec/log.h"
  23 #include "qemu.h"
  24 #include "user-internals.h"
  25 #include "user-mmap.h"
  26 #include "target_mman.h"
  27 #include "qemu/interval-tree.h"
  28
  29 #ifdef TARGET_ARM
  30 #include "target/arm/cpu-features.h"
  31 #endif
  32
  33 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  34 static __thread int mmap_lock_count;
  35
  36 void mmap_lock(void)
  37 {
  38     if (mmap_lock_count++ == 0) {
  39         pthread_mutex_lock(&mmap_mutex);
  40     }
  41 }
  42
  43 void mmap_unlock(void)
  44 {
  45     assert(mmap_lock_count > 0);
  46     if (--mmap_lock_count == 0) {
  47         pthread_mutex_unlock(&mmap_mutex);
  48     }
  49 }
  50
  51 bool have_mmap_lock(void)
  52 {
  53     return mmap_lock_count > 0 ? true : false;
  54 }
  55
  56 /* Grab lock to make sure things are in a consistent state after fork().  */
  57 void mmap_fork_start(void)
  58 {
  59     if (mmap_lock_count)
  60         abort();
  61     pthread_mutex_lock(&mmap_mutex);
  62 }
  63
  64 void mmap_fork_end(int child)
  65 {
  66     if (child) {
  67         pthread_mutex_init(&mmap_mutex, NULL);
  68     } else {
  69         pthread_mutex_unlock(&mmap_mutex);
  70     }
  71 }
  72
  73 /* Protected by mmap_lock. */
  74 static IntervalTreeRoot shm_regions;
  75
  76 static void shm_region_add(abi_ptr start, abi_ptr last)
  77 {
  78     IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
  79
  80     i->start = start;
  81     i->last = last;
  82     interval_tree_insert(i, &shm_regions);
  83 }
  84
  85 static abi_ptr shm_region_find(abi_ptr start)
  86 {
  87     IntervalTreeNode *i;
  88
  89     for (i = interval_tree_iter_first(&shm_regions, start, start); i;
  90          i = interval_tree_iter_next(i, start, start)) {
  91         if (i->start == start) {
  92             return i->last;
  93         }
  94     }
  95     return 0;
  96 }
  97
  98 static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
  99 {
 100     IntervalTreeNode *i, *n;
 101
 102     for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
 103         n = interval_tree_iter_next(i, start, last);
 104         if (i->start >= start && i->last <= last) {
 105             interval_tree_remove(i, &shm_regions);
 106             g_free(i);
 107         }
 108     }
 109 }
 110
 111 /*
 112  * Validate target prot bitmask.
 113  * Return the prot bitmask for the host in *HOST_PROT.
 114  * Return 0 if the target prot bitmask is invalid, otherwise
 115  * the internal qemu page_flags (which will include PAGE_VALID).
 116  */
 117 static int validate_prot_to_pageflags(int prot)
 118 {
 119     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
 120     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
 121
 122 #ifdef TARGET_AARCH64
 123     {
 124         ARMCPU *cpu = ARM_CPU(thread_cpu);
 125
 126         /*
 127          * The PROT_BTI bit is only accepted if the cpu supports the feature.
 128          * Since this is the unusual case, don't bother checking unless
 129          * the bit has been requested.  If set and valid, record the bit
 130          * within QEMU's page_flags.
 131          */
 132         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
 133             valid |= TARGET_PROT_BTI;
 134             page_flags |= PAGE_BTI;
 135         }
 136         /* Similarly for the PROT_MTE bit. */
 137         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
 138             valid |= TARGET_PROT_MTE;
 139             page_flags |= PAGE_MTE;
 140         }
 141     }
 142 #elif defined(TARGET_HPPA)
 143     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
 144 #endif
 145
 146     return prot & ~valid ? 0 : page_flags;
 147 }
 148
 149 /*
 150  * For the host, we need not pass anything except read/write/exec.
 151  * While PROT_SEM is allowed by all hosts, it is also ignored, so
 152  * don't bother transforming guest bit to host bit.  Any other
 153  * target-specific prot bits will not be understood by the host
 154  * and will need to be encoded into page_flags for qemu emulation.
 155  *
 156  * Pages that are executable by the guest will never be executed
 157  * by the host, but the host will need to be able to read them.
 158  */
 159 static int target_to_host_prot(int prot)
 160 {
 161     return (prot & (PROT_READ | PROT_WRITE)) |
 162            (prot & PROT_EXEC ? PROT_READ : 0);
 163 }
 164
 165 /* NOTE: all the constants are the HOST ones, but addresses are target. */
 166 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
 167 {
 168     int host_page_size = qemu_real_host_page_size();
 169     abi_ulong starts[3];
 170     abi_ulong lens[3];
 171     int prots[3];
 172     abi_ulong host_start, host_last, last;
 173     int prot1, ret, page_flags, nranges;
 174
 175     trace_target_mprotect(start, len, target_prot);
 176
 177     if ((start & ~TARGET_PAGE_MASK) != 0) {
 178         return -TARGET_EINVAL;
 179     }
 180     page_flags = validate_prot_to_pageflags(target_prot);
 181     if (!page_flags) {
 182         return -TARGET_EINVAL;
 183     }
 184     if (len == 0) {
 185         return 0;
 186     }
 187     len = TARGET_PAGE_ALIGN(len);
 188     if (!guest_range_valid_untagged(start, len)) {
 189         return -TARGET_ENOMEM;
 190     }
 191
 192     last = start + len - 1;
 193     host_start = start & -host_page_size;
 194     host_last = HOST_PAGE_ALIGN(last) - 1;
 195     nranges = 0;
 196
 197     mmap_lock();
 198
 199     if (host_last - host_start < host_page_size) {
 200         /* Single host page contains all guest pages: sum the prot. */
 201         prot1 = target_prot;
 202         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 203             prot1 |= page_get_flags(a);
 204         }
 205         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 206             prot1 |= page_get_flags(a + 1);
 207         }
 208         starts[nranges] = host_start;
 209         lens[nranges] = host_page_size;
 210         prots[nranges] = prot1;
 211         nranges++;
 212     } else {
 213         if (host_start < start) {
 214             /* Host page contains more than one guest page: sum the prot. */
 215             prot1 = target_prot;
 216             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 217                 prot1 |= page_get_flags(a);
 218             }
 219             /* If the resulting sum differs, create a new range. */
 220             if (prot1 != target_prot) {
 221                 starts[nranges] = host_start;
 222                 lens[nranges] = host_page_size;
 223                 prots[nranges] = prot1;
 224                 nranges++;
 225                 host_start += host_page_size;
 226             }
 227         }
 228
 229         if (last < host_last) {
 230             /* Host page contains more than one guest page: sum the prot. */
 231             prot1 = target_prot;
 232             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 233                 prot1 |= page_get_flags(a + 1);
 234             }
 235             /* If the resulting sum differs, create a new range. */
 236             if (prot1 != target_prot) {
 237                 host_last -= host_page_size;
 238                 starts[nranges] = host_last + 1;
 239                 lens[nranges] = host_page_size;
 240                 prots[nranges] = prot1;
 241                 nranges++;
 242             }
 243         }
 244
 245         /* Create a range for the middle, if any remains. */
 246         if (host_start < host_last) {
 247             starts[nranges] = host_start;
 248             lens[nranges] = host_last - host_start + 1;
 249             prots[nranges] = target_prot;
 250             nranges++;
 251         }
 252     }
 253
 254     for (int i = 0; i < nranges; ++i) {
 255         ret = mprotect(g2h_untagged(starts[i]), lens[i],
 256                        target_to_host_prot(prots[i]));
 257         if (ret != 0) {
 258             goto error;
 259         }
 260     }
 261
 262     page_set_flags(start, last, page_flags);
 263     ret = 0;
 264
 265  error:
 266     mmap_unlock();
 267     return ret;
 268 }
 269
 270 /* map an incomplete host page */
 271 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
 272                       int prot, int flags, int fd, off_t offset)
 273 {
 274     int host_page_size = qemu_real_host_page_size();
 275     abi_ulong real_last;
 276     void *host_start;
 277     int prot_old, prot_new;
 278     int host_prot_old, host_prot_new;
 279
 280     if (!(flags & MAP_ANONYMOUS)
 281         && (flags & MAP_TYPE) == MAP_SHARED
 282         && (prot & PROT_WRITE)) {
 283         /*
 284          * msync() won't work with the partial page, so we return an
 285          * error if write is possible while it is a shared mapping.
 286          */
 287         errno = EINVAL;
 288         return false;
 289     }
 290
 291     real_last = real_start + host_page_size - 1;
 292     host_start = g2h_untagged(real_start);
 293
 294     /* Get the protection of the target pages outside the mapping. */
 295     prot_old = 0;
 296     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 297         prot_old |= page_get_flags(a);
 298     }
 299     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
 300         prot_old |= page_get_flags(a);
 301     }
 302
 303     if (prot_old == 0) {
 304         /*
 305          * Since !(prot_old & PAGE_VALID), there were no guest pages
 306          * outside of the fragment we need to map.  Allocate a new host
 307          * page to cover, discarding whatever else may have been present.
 308          */
 309         void *p = mmap(host_start, host_page_size,
 310                        target_to_host_prot(prot),
 311                        flags | MAP_ANONYMOUS, -1, 0);
 312         if (p != host_start) {
 313             if (p != MAP_FAILED) {
 314                 munmap(p, host_page_size);
 315                 errno = EEXIST;
 316             }
 317             return false;
 318         }
 319         prot_old = prot;
 320     }
 321     prot_new = prot | prot_old;
 322
 323     host_prot_old = target_to_host_prot(prot_old);
 324     host_prot_new = target_to_host_prot(prot_new);
 325
 326     /* Adjust protection to be able to write. */
 327     if (!(host_prot_old & PROT_WRITE)) {
 328         host_prot_old |= PROT_WRITE;
 329         mprotect(host_start, host_page_size, host_prot_old);
 330     }
 331
 332     /* Read or zero the new guest pages. */
 333     if (flags & MAP_ANONYMOUS) {
 334         memset(g2h_untagged(start), 0, last - start + 1);
 335     } else {
 336         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
 337             return false;
 338         }
 339     }
 340
 341     /* Put final protection */
 342     if (host_prot_new != host_prot_old) {
 343         mprotect(host_start, host_page_size, host_prot_new);
 344     }
 345     return true;
 346 }
 347
 348 abi_ulong task_unmapped_base;
 349 abi_ulong elf_et_dyn_base;
 350 abi_ulong mmap_next_start;
 351
 352 /*
 353  * Subroutine of mmap_find_vma, used when we have pre-allocated
 354  * a chunk of guest address space.
 355  */
 356 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
 357                                         abi_ulong align)
 358 {
 359     target_ulong ret;
 360
 361     ret = page_find_range_empty(start, reserved_va, size, align);
 362     if (ret == -1 && start > mmap_min_addr) {
 363         /* Restart at the beginning of the address space. */
 364         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
 365     }
 366
 367     return ret;
 368 }
 369
 370 /*
 371  * Find and reserve a free memory area of size 'size'. The search
 372  * starts at 'start'.
 373  * It must be called with mmap_lock() held.
 374  * Return -1 if error.
 375  */
 376 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 377 {
 378     int host_page_size = qemu_real_host_page_size();
 379     void *ptr, *prev;
 380     abi_ulong addr;
 381     int wrapped, repeat;
 382
 383     align = MAX(align, host_page_size);
 384
 385     /* If 'start' == 0, then a default start address is used. */
 386     if (start == 0) {
 387         start = mmap_next_start;
 388     } else {
 389         start &= -host_page_size;
 390     }
 391     start = ROUND_UP(start, align);
 392
 393     size = HOST_PAGE_ALIGN(size);
 394
 395     if (reserved_va) {
 396         return mmap_find_vma_reserved(start, size, align);
 397     }
 398
 399     addr = start;
 400     wrapped = repeat = 0;
 401     prev = 0;
 402
 403     for (;; prev = ptr) {
 404         /*
 405          * Reserve needed memory area to avoid a race.
 406          * It should be discarded using:
 407          *  - mmap() with MAP_FIXED flag
 408          *  - mremap() with MREMAP_FIXED flag
 409          *  - shmat() with SHM_REMAP flag
 410          */
 411         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
 412                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 413
 414         /* ENOMEM, if host address space has no memory */
 415         if (ptr == MAP_FAILED) {
 416             return (abi_ulong)-1;
 417         }
 418
 419         /*
 420          * Count the number of sequential returns of the same address.
 421          * This is used to modify the search algorithm below.
 422          */
 423         repeat = (ptr == prev ? repeat + 1 : 0);
 424
 425         if (h2g_valid(ptr + size - 1)) {
 426             addr = h2g(ptr);
 427
 428             if ((addr & (align - 1)) == 0) {
 429                 /* Success.  */
 430                 if (start == mmap_next_start && addr >= task_unmapped_base) {
 431                     mmap_next_start = addr + size;
 432                 }
 433                 return addr;
 434             }
 435
 436             /* The address is not properly aligned for the target.  */
 437             switch (repeat) {
 438             case 0:
 439                 /*
 440                  * Assume the result that the kernel gave us is the
 441                  * first with enough free space, so start again at the
 442                  * next higher target page.
 443                  */
 444                 addr = ROUND_UP(addr, align);
 445                 break;
 446             case 1:
 447                 /*
 448                  * Sometimes the kernel decides to perform the allocation
 449                  * at the top end of memory instead.
 450                  */
 451                 addr &= -align;
 452                 break;
 453             case 2:
 454                 /* Start over at low memory.  */
 455                 addr = 0;
 456                 break;
 457             default:
 458                 /* Fail.  This unaligned block must the last.  */
 459                 addr = -1;
 460                 break;
 461             }
 462         } else {
 463             /*
 464              * Since the result the kernel gave didn't fit, start
 465              * again at low memory.  If any repetition, fail.
 466              */
 467             addr = (repeat ? -1 : 0);
 468         }
 469
 470         /* Unmap and try again.  */
 471         munmap(ptr, size);
 472
 473         /* ENOMEM if we checked the whole of the target address space.  */
 474         if (addr == (abi_ulong)-1) {
 475             return (abi_ulong)-1;
 476         } else if (addr == 0) {
 477             if (wrapped) {
 478                 return (abi_ulong)-1;
 479             }
 480             wrapped = 1;
 481             /*
 482              * Don't actually use 0 when wrapping, instead indicate
 483              * that we'd truly like an allocation in low memory.
 484              */
 485             addr = (mmap_min_addr > TARGET_PAGE_SIZE
 486                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
 487                      : TARGET_PAGE_SIZE);
 488         } else if (wrapped && addr >= start) {
 489             return (abi_ulong)-1;
 490         }
 491     }
 492 }
 493
 494 /* NOTE: all the constants are the HOST ones */
 495 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
 496                      int flags, int fd, off_t offset)
 497 {
 498     int host_page_size = qemu_real_host_page_size();
 499     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
 500     abi_ulong passthrough_start = -1, passthrough_last = 0;
 501     int page_flags;
 502     off_t host_offset;
 503
 504     mmap_lock();
 505     trace_target_mmap(start, len, target_prot, flags, fd, offset);
 506
 507     if (!len) {
 508         errno = EINVAL;
 509         goto fail;
 510     }
 511
 512     page_flags = validate_prot_to_pageflags(target_prot);
 513     if (!page_flags) {
 514         errno = EINVAL;
 515         goto fail;
 516     }
 517
 518     /* Also check for overflows... */
 519     len = TARGET_PAGE_ALIGN(len);
 520     if (!len) {
 521         errno = ENOMEM;
 522         goto fail;
 523     }
 524
 525     if (offset & ~TARGET_PAGE_MASK) {
 526         errno = EINVAL;
 527         goto fail;
 528     }
 529
 530     /*
 531      * If we're mapping shared memory, ensure we generate code for parallel
 532      * execution and flush old translations.  This will work up to the level
 533      * supported by the host -- anything that requires EXCP_ATOMIC will not
 534      * be atomic with respect to an external process.
 535      */
 536     if (flags & MAP_SHARED) {
 537         CPUState *cpu = thread_cpu;
 538         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
 539             cpu->tcg_cflags |= CF_PARALLEL;
 540             tb_flush(cpu);
 541         }
 542     }
 543
 544     real_start = start & -host_page_size;
 545     host_offset = offset & -host_page_size;
 546
 547     /*
 548      * If the user is asking for the kernel to find a location, do that
 549      * before we truncate the length for mapping files below.
 550      */
 551     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 552         host_len = len + offset - host_offset;
 553         host_len = HOST_PAGE_ALIGN(host_len);
 554         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
 555         if (start == (abi_ulong)-1) {
 556             errno = ENOMEM;
 557             goto fail;
 558         }
 559     }
 560
 561     /*
 562      * When mapping files into a memory area larger than the file, accesses
 563      * to pages beyond the file size will cause a SIGBUS.
 564      *
 565      * For example, if mmaping a file of 100 bytes on a host with 4K pages
 566      * emulating a target with 8K pages, the target expects to be able to
 567      * access the first 8K. But the host will trap us on any access beyond
 568      * 4K.
 569      *
 570      * When emulating a target with a larger page-size than the hosts, we
 571      * may need to truncate file maps at EOF and add extra anonymous pages
 572      * up to the targets page boundary.
 573      */
 574     if (host_page_size < TARGET_PAGE_SIZE && !(flags & MAP_ANONYMOUS)) {
 575         struct stat sb;
 576
 577         if (fstat(fd, &sb) == -1) {
 578             goto fail;
 579         }
 580
 581         /* Are we trying to create a map beyond EOF?.  */
 582         if (offset + len > sb.st_size) {
 583             /*
 584              * If so, truncate the file map at eof aligned with
 585              * the hosts real pagesize. Additional anonymous maps
 586              * will be created beyond EOF.
 587              */
 588             len = ROUND_UP(sb.st_size - offset, host_page_size);
 589         }
 590     }
 591
 592     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 593         uintptr_t host_start;
 594         int host_prot;
 595         void *p;
 596
 597         host_len = len + offset - host_offset;
 598         host_len = HOST_PAGE_ALIGN(host_len);
 599         host_prot = target_to_host_prot(target_prot);
 600
 601         /* Note: we prefer to control the mapping address. */
 602         p = mmap(g2h_untagged(start), host_len, host_prot,
 603                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
 604         if (p == MAP_FAILED) {
 605             goto fail;
 606         }
 607         /* update start so that it points to the file position at 'offset' */
 608         host_start = (uintptr_t)p;
 609         if (!(flags & MAP_ANONYMOUS)) {
 610             p = mmap(g2h_untagged(start), len, host_prot,
 611                      flags | MAP_FIXED, fd, host_offset);
 612             if (p == MAP_FAILED) {
 613                 munmap(g2h_untagged(start), host_len);
 614                 goto fail;
 615             }
 616             host_start += offset - host_offset;
 617         }
 618         start = h2g(host_start);
 619         last = start + len - 1;
 620         passthrough_start = start;
 621         passthrough_last = last;
 622     } else {
 623         if (start & ~TARGET_PAGE_MASK) {
 624             errno = EINVAL;
 625             goto fail;
 626         }
 627         last = start + len - 1;
 628         real_last = HOST_PAGE_ALIGN(last) - 1;
 629
 630         /*
 631          * Test if requested memory area fits target address space
 632          * It can fail only on 64-bit host with 32-bit target.
 633          * On any other target/host host mmap() handles this error correctly.
 634          */
 635         if (last < start || !guest_range_valid_untagged(start, len)) {
 636             errno = ENOMEM;
 637             goto fail;
 638         }
 639
 640         if (flags & MAP_FIXED_NOREPLACE) {
 641             /* Validate that the chosen range is empty. */
 642             if (!page_check_range_empty(start, last)) {
 643                 errno = EEXIST;
 644                 goto fail;
 645             }
 646
 647             /*
 648              * With reserved_va, the entire address space is mmaped in the
 649              * host to ensure it isn't accidentally used for something else.
 650              * We have just checked that the guest address is not mapped
 651              * within the guest, but need to replace the host reservation.
 652              *
 653              * Without reserved_va, despite the guest address check above,
 654              * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
 655              * any host address mappings.
 656              */
 657             if (reserved_va) {
 658                 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
 659             }
 660         }
 661
 662         /*
 663          * worst case: we cannot map the file because the offset is not
 664          * aligned, so we read it
 665          */
 666         if (!(flags & MAP_ANONYMOUS) &&
 667             (offset & (host_page_size - 1)) != (start & (host_page_size - 1))) {
 668             /*
 669              * msync() won't work here, so we return an error if write is
 670              * possible while it is a shared mapping
 671              */
 672             if ((flags & MAP_TYPE) == MAP_SHARED
 673                 && (target_prot & PROT_WRITE)) {
 674                 errno = EINVAL;
 675                 goto fail;
 676             }
 677             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
 678                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
 679                                   | MAP_PRIVATE | MAP_ANONYMOUS,
 680                                   -1, 0);
 681             if (retaddr == -1) {
 682                 goto fail;
 683             }
 684             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
 685                 goto fail;
 686             }
 687             if (!(target_prot & PROT_WRITE)) {
 688                 ret = target_mprotect(start, len, target_prot);
 689                 assert(ret == 0);
 690             }
 691             goto the_end;
 692         }
 693
 694         /* handle the start of the mapping */
 695         if (start > real_start) {
 696             if (real_last == real_start + host_page_size - 1) {
 697                 /* one single host page */
 698                 if (!mmap_frag(real_start, start, last,
 699                                target_prot, flags, fd, offset)) {
 700                     goto fail;
 701                 }
 702                 goto the_end1;
 703             }
 704             if (!mmap_frag(real_start, start,
 705                            real_start + host_page_size - 1,
 706                            target_prot, flags, fd, offset)) {
 707                 goto fail;
 708             }
 709             real_start += host_page_size;
 710         }
 711         /* handle the end of the mapping */
 712         if (last < real_last) {
 713             abi_ulong real_page = real_last - host_page_size + 1;
 714             if (!mmap_frag(real_page, real_page, last,
 715                            target_prot, flags, fd,
 716                            offset + real_page - start)) {
 717                 goto fail;
 718             }
 719             real_last -= host_page_size;
 720         }
 721
 722         /* map the middle (easier) */
 723         if (real_start < real_last) {
 724             void *p, *want_p;
 725             off_t offset1;
 726             size_t len1;
 727
 728             if (flags & MAP_ANONYMOUS) {
 729                 offset1 = 0;
 730             } else {
 731                 offset1 = offset + real_start - start;
 732             }
 733             len1 = real_last - real_start + 1;
 734             want_p = g2h_untagged(real_start);
 735
 736             p = mmap(want_p, len1, target_to_host_prot(target_prot),
 737                      flags, fd, offset1);
 738             if (p != want_p) {
 739                 if (p != MAP_FAILED) {
 740                     munmap(p, len1);
 741                     errno = EEXIST;
 742                 }
 743                 goto fail;
 744             }
 745             passthrough_start = real_start;
 746             passthrough_last = real_last;
 747         }
 748     }
 749  the_end1:
 750     if (flags & MAP_ANONYMOUS) {
 751         page_flags |= PAGE_ANON;
 752     }
 753     page_flags |= PAGE_RESET;
 754     if (passthrough_start > passthrough_last) {
 755         page_set_flags(start, last, page_flags);
 756     } else {
 757         if (start < passthrough_start) {
 758             page_set_flags(start, passthrough_start - 1, page_flags);
 759         }
 760         page_set_flags(passthrough_start, passthrough_last,
 761                        page_flags | PAGE_PASSTHROUGH);
 762         if (passthrough_last < last) {
 763             page_set_flags(passthrough_last + 1, last, page_flags);
 764         }
 765     }
 766     shm_region_rm_complete(start, last);
 767  the_end:
 768     trace_target_mmap_complete(start);
 769     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
 770         FILE *f = qemu_log_trylock();
 771         if (f) {
 772             fprintf(f, "page layout changed following mmap\n");
 773             page_dump(f);
 774             qemu_log_unlock(f);
 775         }
 776     }
 777     mmap_unlock();
 778     return start;
 779 fail:
 780     mmap_unlock();
 781     return -1;
 782 }
 783
 784 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
 785 {
 786     int host_page_size = qemu_real_host_page_size();
 787     abi_ulong real_start;
 788     abi_ulong real_last;
 789     abi_ulong real_len;
 790     abi_ulong last;
 791     abi_ulong a;
 792     void *host_start;
 793     int prot;
 794
 795     last = start + len - 1;
 796     real_start = start & -host_page_size;
 797     real_last = HOST_PAGE_ALIGN(last) - 1;
 798
 799     /*
 800      * If guest pages remain on the first or last host pages,
 801      * adjust the deallocation to retain those guest pages.
 802      * The single page special case is required for the last page,
 803      * lest real_start overflow to zero.
 804      */
 805     if (real_last - real_start < host_page_size) {
 806         prot = 0;
 807         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 808             prot |= page_get_flags(a);
 809         }
 810         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 811             prot |= page_get_flags(a + 1);
 812         }
 813         if (prot != 0) {
 814             return 0;
 815         }
 816     } else {
 817         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 818             prot |= page_get_flags(a);
 819         }
 820         if (prot != 0) {
 821             real_start += host_page_size;
 822         }
 823
 824         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 825             prot |= page_get_flags(a + 1);
 826         }
 827         if (prot != 0) {
 828             real_last -= host_page_size;
 829         }
 830
 831         if (real_last < real_start) {
 832             return 0;
 833         }
 834     }
 835
 836     real_len = real_last - real_start + 1;
 837     host_start = g2h_untagged(real_start);
 838
 839     if (reserved_va) {
 840         void *ptr = mmap(host_start, real_len, PROT_NONE,
 841                          MAP_FIXED | MAP_ANONYMOUS
 842                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 843         return ptr == host_start ? 0 : -1;
 844     }
 845     return munmap(host_start, real_len);
 846 }
 847
 848 int target_munmap(abi_ulong start, abi_ulong len)
 849 {
 850     int ret;
 851
 852     trace_target_munmap(start, len);
 853
 854     if (start & ~TARGET_PAGE_MASK) {
 855         errno = EINVAL;
 856         return -1;
 857     }
 858     len = TARGET_PAGE_ALIGN(len);
 859     if (len == 0 || !guest_range_valid_untagged(start, len)) {
 860         errno = EINVAL;
 861         return -1;
 862     }
 863
 864     mmap_lock();
 865     ret = mmap_reserve_or_unmap(start, len);
 866     if (likely(ret == 0)) {
 867         page_set_flags(start, start + len - 1, 0);
 868         shm_region_rm_complete(start, start + len - 1);
 869     }
 870     mmap_unlock();
 871
 872     return ret;
 873 }
 874
 875 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
 876                        abi_ulong new_size, unsigned long flags,
 877                        abi_ulong new_addr)
 878 {
 879     int prot;
 880     void *host_addr;
 881
 882     if (!guest_range_valid_untagged(old_addr, old_size) ||
 883         ((flags & MREMAP_FIXED) &&
 884          !guest_range_valid_untagged(new_addr, new_size)) ||
 885         ((flags & MREMAP_MAYMOVE) == 0 &&
 886          !guest_range_valid_untagged(old_addr, new_size))) {
 887         errno = ENOMEM;
 888         return -1;
 889     }
 890
 891     mmap_lock();
 892
 893     if (flags & MREMAP_FIXED) {
 894         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 895                            flags, g2h_untagged(new_addr));
 896
 897         if (reserved_va && host_addr != MAP_FAILED) {
 898             /*
 899              * If new and old addresses overlap then the above mremap will
 900              * already have failed with EINVAL.
 901              */
 902             mmap_reserve_or_unmap(old_addr, old_size);
 903         }
 904     } else if (flags & MREMAP_MAYMOVE) {
 905         abi_ulong mmap_start;
 906
 907         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
 908
 909         if (mmap_start == -1) {
 910             errno = ENOMEM;
 911             host_addr = MAP_FAILED;
 912         } else {
 913             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 914                                flags | MREMAP_FIXED,
 915                                g2h_untagged(mmap_start));
 916             if (reserved_va) {
 917                 mmap_reserve_or_unmap(old_addr, old_size);
 918             }
 919         }
 920     } else {
 921         int page_flags = 0;
 922         if (reserved_va && old_size < new_size) {
 923             abi_ulong addr;
 924             for (addr = old_addr + old_size;
 925                  addr < old_addr + new_size;
 926                  addr++) {
 927                 page_flags |= page_get_flags(addr);
 928             }
 929         }
 930         if (page_flags == 0) {
 931             host_addr = mremap(g2h_untagged(old_addr),
 932                                old_size, new_size, flags);
 933
 934             if (host_addr != MAP_FAILED) {
 935                 /* Check if address fits target address space */
 936                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
 937                     /* Revert mremap() changes */
 938                     host_addr = mremap(g2h_untagged(old_addr),
 939                                        new_size, old_size, flags);
 940                     errno = ENOMEM;
 941                     host_addr = MAP_FAILED;
 942                 } else if (reserved_va && old_size > new_size) {
 943                     mmap_reserve_or_unmap(old_addr + old_size,
 944                                           old_size - new_size);
 945                 }
 946             }
 947         } else {
 948             errno = ENOMEM;
 949             host_addr = MAP_FAILED;
 950         }
 951     }
 952
 953     if (host_addr == MAP_FAILED) {
 954         new_addr = -1;
 955     } else {
 956         new_addr = h2g(host_addr);
 957         prot = page_get_flags(old_addr);
 958         page_set_flags(old_addr, old_addr + old_size - 1, 0);
 959         shm_region_rm_complete(old_addr, old_addr + old_size - 1);
 960         page_set_flags(new_addr, new_addr + new_size - 1,
 961                        prot | PAGE_VALID | PAGE_RESET);
 962         shm_region_rm_complete(new_addr, new_addr + new_size - 1);
 963     }
 964     mmap_unlock();
 965     return new_addr;
 966 }
 967
 968 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
 969 {
 970     abi_ulong len;
 971     int ret = 0;
 972
 973     if (start & ~TARGET_PAGE_MASK) {
 974         return -TARGET_EINVAL;
 975     }
 976     if (len_in == 0) {
 977         return 0;
 978     }
 979     len = TARGET_PAGE_ALIGN(len_in);
 980     if (len == 0 || !guest_range_valid_untagged(start, len)) {
 981         return -TARGET_EINVAL;
 982     }
 983
 984     /* Translate for some architectures which have different MADV_xxx values */
 985     switch (advice) {
 986     case TARGET_MADV_DONTNEED:      /* alpha */
 987         advice = MADV_DONTNEED;
 988         break;
 989     case TARGET_MADV_WIPEONFORK:    /* parisc */
 990         advice = MADV_WIPEONFORK;
 991         break;
 992     case TARGET_MADV_KEEPONFORK:    /* parisc */
 993         advice = MADV_KEEPONFORK;
 994         break;
 995     /* we do not care about the other MADV_xxx values yet */
 996     }
 997
 998     /*
 999      * Most advice values are hints, so ignoring and returning success is ok.
1000      *
1001      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
1002      * MADV_KEEPONFORK are not hints and need to be emulated.
1003      *
1004      * A straight passthrough for those may not be safe because qemu sometimes
1005      * turns private file-backed mappings into anonymous mappings.
1006      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
1007      * same semantics for the host as for the guest.
1008      *
1009      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
1010      * return failure if not.
1011      *
1012      * MADV_DONTNEED is passed through as well, if possible.
1013      * If passthrough isn't possible, we nevertheless (wrongly!) return
1014      * success, which is broken but some userspace programs fail to work
1015      * otherwise. Completely implementing such emulation is quite complicated
1016      * though.
1017      */
1018     mmap_lock();
1019     switch (advice) {
1020     case MADV_WIPEONFORK:
1021     case MADV_KEEPONFORK:
1022         ret = -EINVAL;
1023         /* fall through */
1024     case MADV_DONTNEED:
1025         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
1026             ret = get_errno(madvise(g2h_untagged(start), len, advice));
1027             if ((advice == MADV_DONTNEED) && (ret == 0)) {
1028                 page_reset_target_data(start, start + len - 1);
1029             }
1030         }
1031     }
1032     mmap_unlock();
1033
1034     return ret;
1035 }
1036
1037 #ifndef TARGET_FORCE_SHMLBA
1038 /*
1039  * For most architectures, SHMLBA is the same as the page size;
1040  * some architectures have larger values, in which case they should
1041  * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
1042  * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
1043  * and defining its own value for SHMLBA.
1044  *
1045  * The kernel also permits SHMLBA to be set by the architecture to a
1046  * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
1047  * this means that addresses are rounded to the large size if
1048  * SHM_RND is set but addresses not aligned to that size are not rejected
1049  * as long as they are at least page-aligned. Since the only architecture
1050  * which uses this is ia64 this code doesn't provide for that oddity.
1051  */
1052 static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
1053 {
1054     return TARGET_PAGE_SIZE;
1055 }
1056 #endif
1057
1058 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
1059                        abi_ulong shmaddr, int shmflg)
1060 {
1061     CPUState *cpu = env_cpu(cpu_env);
1062     abi_ulong raddr;
1063     struct shmid_ds shm_info;
1064     int ret;
1065     abi_ulong shmlba;
1066
1067     /* shmat pointers are always untagged */
1068
1069     /* find out the length of the shared memory segment */
1070     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
1071     if (is_error(ret)) {
1072         /* can't get length, bail out */
1073         return ret;
1074     }
1075
1076     shmlba = target_shmlba(cpu_env);
1077
1078     if (shmaddr & (shmlba - 1)) {
1079         if (shmflg & SHM_RND) {
1080             shmaddr &= ~(shmlba - 1);
1081         } else {
1082             return -TARGET_EINVAL;
1083         }
1084     }
1085     if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
1086         return -TARGET_EINVAL;
1087     }
1088
1089     WITH_MMAP_LOCK_GUARD() {
1090         void *host_raddr;
1091         abi_ulong last;
1092
1093         if (shmaddr) {
1094             host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
1095         } else {
1096             abi_ulong mmap_start;
1097
1098             /* In order to use the host shmat, we need to honor host SHMLBA.  */
1099             mmap_start = mmap_find_vma(0, shm_info.shm_segsz,
1100                                        MAX(SHMLBA, shmlba));
1101
1102             if (mmap_start == -1) {
1103                 return -TARGET_ENOMEM;
1104             }
1105             host_raddr = shmat(shmid, g2h_untagged(mmap_start),
1106                                shmflg | SHM_REMAP);
1107         }
1108
1109         if (host_raddr == (void *)-1) {
1110             return get_errno(-1);
1111         }
1112         raddr = h2g(host_raddr);
1113         last = raddr + shm_info.shm_segsz - 1;
1114
1115         page_set_flags(raddr, last,
1116                        PAGE_VALID | PAGE_RESET | PAGE_READ |
1117                        (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
1118
1119         shm_region_rm_complete(raddr, last);
1120         shm_region_add(raddr, last);
1121     }
1122
1123     /*
1124      * We're mapping shared memory, so ensure we generate code for parallel
1125      * execution and flush old translations.  This will work up to the level
1126      * supported by the host -- anything that requires EXCP_ATOMIC will not
1127      * be atomic with respect to an external process.
1128      */
1129     if (!(cpu->tcg_cflags & CF_PARALLEL)) {
1130         cpu->tcg_cflags |= CF_PARALLEL;
1131         tb_flush(cpu);
1132     }
1133
1134     return raddr;
1135 }
1136
1137 abi_long target_shmdt(abi_ulong shmaddr)
1138 {
1139     abi_long rv;
1140
1141     /* shmdt pointers are always untagged */
1142
1143     WITH_MMAP_LOCK_GUARD() {
1144         abi_ulong last = shm_region_find(shmaddr);
1145         if (last == 0) {
1146             return -TARGET_EINVAL;
1147         }
1148
1149         rv = get_errno(shmdt(g2h_untagged(shmaddr)));
1150         if (rv == 0) {
1151             abi_ulong size = last - shmaddr + 1;
1152
1153             page_set_flags(shmaddr, last, 0);
1154             shm_region_rm_complete(shmaddr, last);
1155             mmap_reserve_or_unmap(shmaddr, size);
1156         }
1157     }
1158     return rv;
1159 }