linux-user/mmap.c

   1 /*
   2  *  mmap support for qemu
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  *  This program is free software; you can redistribute it and/or modify
   7  *  it under the terms of the GNU General Public License as published by
   8  *  the Free Software Foundation; either version 2 of the License, or
   9  *  (at your option) any later version.
  10  *
  11  *  This program is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  *  GNU General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU General Public License
  17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include <sys/shm.h>
  21 #include "trace.h"
  22 #include "exec/log.h"
  23 #include "qemu.h"
  24 #include "user-internals.h"
  25 #include "user-mmap.h"
  26 #include "target_mman.h"
  27 #include "qemu/interval-tree.h"
  28
  29 #ifdef TARGET_ARM
  30 #include "target/arm/cpu-features.h"
  31 #endif
  32
  33 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  34 static __thread int mmap_lock_count;
  35
  36 void mmap_lock(void)
  37 {
  38     if (mmap_lock_count++ == 0) {
  39         pthread_mutex_lock(&mmap_mutex);
  40     }
  41 }
  42
  43 void mmap_unlock(void)
  44 {
  45     assert(mmap_lock_count > 0);
  46     if (--mmap_lock_count == 0) {
  47         pthread_mutex_unlock(&mmap_mutex);
  48     }
  49 }
  50
  51 bool have_mmap_lock(void)
  52 {
  53     return mmap_lock_count > 0 ? true : false;
  54 }
  55
  56 /* Grab lock to make sure things are in a consistent state after fork().  */
  57 void mmap_fork_start(void)
  58 {
  59     if (mmap_lock_count)
  60         abort();
  61     pthread_mutex_lock(&mmap_mutex);
  62 }
  63
  64 void mmap_fork_end(int child)
  65 {
  66     if (child) {
  67         pthread_mutex_init(&mmap_mutex, NULL);
  68     } else {
  69         pthread_mutex_unlock(&mmap_mutex);
  70     }
  71 }
  72
  73 /* Protected by mmap_lock. */
  74 static IntervalTreeRoot shm_regions;
  75
  76 static void shm_region_add(abi_ptr start, abi_ptr last)
  77 {
  78     IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
  79
  80     i->start = start;
  81     i->last = last;
  82     interval_tree_insert(i, &shm_regions);
  83 }
  84
  85 static abi_ptr shm_region_find(abi_ptr start)
  86 {
  87     IntervalTreeNode *i;
  88
  89     for (i = interval_tree_iter_first(&shm_regions, start, start); i;
  90          i = interval_tree_iter_next(i, start, start)) {
  91         if (i->start == start) {
  92             return i->last;
  93         }
  94     }
  95     return 0;
  96 }
  97
  98 static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
  99 {
 100     IntervalTreeNode *i, *n;
 101
 102     for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
 103         n = interval_tree_iter_next(i, start, last);
 104         if (i->start >= start && i->last <= last) {
 105             interval_tree_remove(i, &shm_regions);
 106             g_free(i);
 107         }
 108     }
 109 }
 110
 111 /*
 112  * Validate target prot bitmask.
 113  * Return the prot bitmask for the host in *HOST_PROT.
 114  * Return 0 if the target prot bitmask is invalid, otherwise
 115  * the internal qemu page_flags (which will include PAGE_VALID).
 116  */
 117 static int validate_prot_to_pageflags(int prot)
 118 {
 119     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
 120     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
 121
 122 #ifdef TARGET_AARCH64
 123     {
 124         ARMCPU *cpu = ARM_CPU(thread_cpu);
 125
 126         /*
 127          * The PROT_BTI bit is only accepted if the cpu supports the feature.
 128          * Since this is the unusual case, don't bother checking unless
 129          * the bit has been requested.  If set and valid, record the bit
 130          * within QEMU's page_flags.
 131          */
 132         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
 133             valid |= TARGET_PROT_BTI;
 134             page_flags |= PAGE_BTI;
 135         }
 136         /* Similarly for the PROT_MTE bit. */
 137         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
 138             valid |= TARGET_PROT_MTE;
 139             page_flags |= PAGE_MTE;
 140         }
 141     }
 142 #elif defined(TARGET_HPPA)
 143     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
 144 #endif
 145
 146     return prot & ~valid ? 0 : page_flags;
 147 }
 148
 149 /*
 150  * For the host, we need not pass anything except read/write/exec.
 151  * While PROT_SEM is allowed by all hosts, it is also ignored, so
 152  * don't bother transforming guest bit to host bit.  Any other
 153  * target-specific prot bits will not be understood by the host
 154  * and will need to be encoded into page_flags for qemu emulation.
 155  *
 156  * Pages that are executable by the guest will never be executed
 157  * by the host, but the host will need to be able to read them.
 158  */
 159 static int target_to_host_prot(int prot)
 160 {
 161     return (prot & (PROT_READ | PROT_WRITE)) |
 162            (prot & PROT_EXEC ? PROT_READ : 0);
 163 }
 164
 165 /* NOTE: all the constants are the HOST ones, but addresses are target. */
 166 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
 167 {
 168     int host_page_size = qemu_real_host_page_size();
 169     abi_ulong starts[3];
 170     abi_ulong lens[3];
 171     int prots[3];
 172     abi_ulong host_start, host_last, last;
 173     int prot1, ret, page_flags, nranges;
 174
 175     trace_target_mprotect(start, len, target_prot);
 176
 177     if ((start & ~TARGET_PAGE_MASK) != 0) {
 178         return -TARGET_EINVAL;
 179     }
 180     page_flags = validate_prot_to_pageflags(target_prot);
 181     if (!page_flags) {
 182         return -TARGET_EINVAL;
 183     }
 184     if (len == 0) {
 185         return 0;
 186     }
 187     len = TARGET_PAGE_ALIGN(len);
 188     if (!guest_range_valid_untagged(start, len)) {
 189         return -TARGET_ENOMEM;
 190     }
 191
 192     last = start + len - 1;
 193     host_start = start & -host_page_size;
 194     host_last = ROUND_UP(last, host_page_size) - 1;
 195     nranges = 0;
 196
 197     mmap_lock();
 198
 199     if (host_last - host_start < host_page_size) {
 200         /* Single host page contains all guest pages: sum the prot. */
 201         prot1 = target_prot;
 202         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 203             prot1 |= page_get_flags(a);
 204         }
 205         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 206             prot1 |= page_get_flags(a + 1);
 207         }
 208         starts[nranges] = host_start;
 209         lens[nranges] = host_page_size;
 210         prots[nranges] = prot1;
 211         nranges++;
 212     } else {
 213         if (host_start < start) {
 214             /* Host page contains more than one guest page: sum the prot. */
 215             prot1 = target_prot;
 216             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 217                 prot1 |= page_get_flags(a);
 218             }
 219             /* If the resulting sum differs, create a new range. */
 220             if (prot1 != target_prot) {
 221                 starts[nranges] = host_start;
 222                 lens[nranges] = host_page_size;
 223                 prots[nranges] = prot1;
 224                 nranges++;
 225                 host_start += host_page_size;
 226             }
 227         }
 228
 229         if (last < host_last) {
 230             /* Host page contains more than one guest page: sum the prot. */
 231             prot1 = target_prot;
 232             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 233                 prot1 |= page_get_flags(a + 1);
 234             }
 235             /* If the resulting sum differs, create a new range. */
 236             if (prot1 != target_prot) {
 237                 host_last -= host_page_size;
 238                 starts[nranges] = host_last + 1;
 239                 lens[nranges] = host_page_size;
 240                 prots[nranges] = prot1;
 241                 nranges++;
 242             }
 243         }
 244
 245         /* Create a range for the middle, if any remains. */
 246         if (host_start < host_last) {
 247             starts[nranges] = host_start;
 248             lens[nranges] = host_last - host_start + 1;
 249             prots[nranges] = target_prot;
 250             nranges++;
 251         }
 252     }
 253
 254     for (int i = 0; i < nranges; ++i) {
 255         ret = mprotect(g2h_untagged(starts[i]), lens[i],
 256                        target_to_host_prot(prots[i]));
 257         if (ret != 0) {
 258             goto error;
 259         }
 260     }
 261
 262     page_set_flags(start, last, page_flags);
 263     ret = 0;
 264
 265  error:
 266     mmap_unlock();
 267     return ret;
 268 }
 269
 270 /*
 271  * Perform munmap on behalf of the target, with host parameters.
 272  * If reserved_va, we must replace the memory reservation.
 273  */
 274 static int do_munmap(void *addr, size_t len)
 275 {
 276     if (reserved_va) {
 277         void *ptr = mmap(addr, len, PROT_NONE,
 278                          MAP_FIXED | MAP_ANONYMOUS
 279                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 280         return ptr == addr ? 0 : -1;
 281     }
 282     return munmap(addr, len);
 283 }
 284
 285 /* map an incomplete host page */
 286 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
 287                       int prot, int flags, int fd, off_t offset)
 288 {
 289     int host_page_size = qemu_real_host_page_size();
 290     abi_ulong real_last;
 291     void *host_start;
 292     int prot_old, prot_new;
 293     int host_prot_old, host_prot_new;
 294
 295     if (!(flags & MAP_ANONYMOUS)
 296         && (flags & MAP_TYPE) == MAP_SHARED
 297         && (prot & PROT_WRITE)) {
 298         /*
 299          * msync() won't work with the partial page, so we return an
 300          * error if write is possible while it is a shared mapping.
 301          */
 302         errno = EINVAL;
 303         return false;
 304     }
 305
 306     real_last = real_start + host_page_size - 1;
 307     host_start = g2h_untagged(real_start);
 308
 309     /* Get the protection of the target pages outside the mapping. */
 310     prot_old = 0;
 311     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 312         prot_old |= page_get_flags(a);
 313     }
 314     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
 315         prot_old |= page_get_flags(a);
 316     }
 317
 318     if (prot_old == 0) {
 319         /*
 320          * Since !(prot_old & PAGE_VALID), there were no guest pages
 321          * outside of the fragment we need to map.  Allocate a new host
 322          * page to cover, discarding whatever else may have been present.
 323          */
 324         void *p = mmap(host_start, host_page_size,
 325                        target_to_host_prot(prot),
 326                        flags | MAP_ANONYMOUS, -1, 0);
 327         if (p != host_start) {
 328             if (p != MAP_FAILED) {
 329                 munmap(p, host_page_size);
 330                 errno = EEXIST;
 331             }
 332             return false;
 333         }
 334         prot_old = prot;
 335     }
 336     prot_new = prot | prot_old;
 337
 338     host_prot_old = target_to_host_prot(prot_old);
 339     host_prot_new = target_to_host_prot(prot_new);
 340
 341     /* Adjust protection to be able to write. */
 342     if (!(host_prot_old & PROT_WRITE)) {
 343         host_prot_old |= PROT_WRITE;
 344         mprotect(host_start, host_page_size, host_prot_old);
 345     }
 346
 347     /* Read or zero the new guest pages. */
 348     if (flags & MAP_ANONYMOUS) {
 349         memset(g2h_untagged(start), 0, last - start + 1);
 350     } else {
 351         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
 352             return false;
 353         }
 354     }
 355
 356     /* Put final protection */
 357     if (host_prot_new != host_prot_old) {
 358         mprotect(host_start, host_page_size, host_prot_new);
 359     }
 360     return true;
 361 }
 362
 363 abi_ulong task_unmapped_base;
 364 abi_ulong elf_et_dyn_base;
 365 abi_ulong mmap_next_start;
 366
 367 /*
 368  * Subroutine of mmap_find_vma, used when we have pre-allocated
 369  * a chunk of guest address space.
 370  */
 371 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
 372                                         abi_ulong align)
 373 {
 374     target_ulong ret;
 375
 376     ret = page_find_range_empty(start, reserved_va, size, align);
 377     if (ret == -1 && start > mmap_min_addr) {
 378         /* Restart at the beginning of the address space. */
 379         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
 380     }
 381
 382     return ret;
 383 }
 384
 385 /*
 386  * Find and reserve a free memory area of size 'size'. The search
 387  * starts at 'start'.
 388  * It must be called with mmap_lock() held.
 389  * Return -1 if error.
 390  */
 391 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 392 {
 393     int host_page_size = qemu_real_host_page_size();
 394     void *ptr, *prev;
 395     abi_ulong addr;
 396     int wrapped, repeat;
 397
 398     align = MAX(align, host_page_size);
 399
 400     /* If 'start' == 0, then a default start address is used. */
 401     if (start == 0) {
 402         start = mmap_next_start;
 403     } else {
 404         start &= -host_page_size;
 405     }
 406     start = ROUND_UP(start, align);
 407     size = ROUND_UP(size, host_page_size);
 408
 409     if (reserved_va) {
 410         return mmap_find_vma_reserved(start, size, align);
 411     }
 412
 413     addr = start;
 414     wrapped = repeat = 0;
 415     prev = 0;
 416
 417     for (;; prev = ptr) {
 418         /*
 419          * Reserve needed memory area to avoid a race.
 420          * It should be discarded using:
 421          *  - mmap() with MAP_FIXED flag
 422          *  - mremap() with MREMAP_FIXED flag
 423          *  - shmat() with SHM_REMAP flag
 424          */
 425         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
 426                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 427
 428         /* ENOMEM, if host address space has no memory */
 429         if (ptr == MAP_FAILED) {
 430             return (abi_ulong)-1;
 431         }
 432
 433         /*
 434          * Count the number of sequential returns of the same address.
 435          * This is used to modify the search algorithm below.
 436          */
 437         repeat = (ptr == prev ? repeat + 1 : 0);
 438
 439         if (h2g_valid(ptr + size - 1)) {
 440             addr = h2g(ptr);
 441
 442             if ((addr & (align - 1)) == 0) {
 443                 /* Success.  */
 444                 if (start == mmap_next_start && addr >= task_unmapped_base) {
 445                     mmap_next_start = addr + size;
 446                 }
 447                 return addr;
 448             }
 449
 450             /* The address is not properly aligned for the target.  */
 451             switch (repeat) {
 452             case 0:
 453                 /*
 454                  * Assume the result that the kernel gave us is the
 455                  * first with enough free space, so start again at the
 456                  * next higher target page.
 457                  */
 458                 addr = ROUND_UP(addr, align);
 459                 break;
 460             case 1:
 461                 /*
 462                  * Sometimes the kernel decides to perform the allocation
 463                  * at the top end of memory instead.
 464                  */
 465                 addr &= -align;
 466                 break;
 467             case 2:
 468                 /* Start over at low memory.  */
 469                 addr = 0;
 470                 break;
 471             default:
 472                 /* Fail.  This unaligned block must the last.  */
 473                 addr = -1;
 474                 break;
 475             }
 476         } else {
 477             /*
 478              * Since the result the kernel gave didn't fit, start
 479              * again at low memory.  If any repetition, fail.
 480              */
 481             addr = (repeat ? -1 : 0);
 482         }
 483
 484         /* Unmap and try again.  */
 485         munmap(ptr, size);
 486
 487         /* ENOMEM if we checked the whole of the target address space.  */
 488         if (addr == (abi_ulong)-1) {
 489             return (abi_ulong)-1;
 490         } else if (addr == 0) {
 491             if (wrapped) {
 492                 return (abi_ulong)-1;
 493             }
 494             wrapped = 1;
 495             /*
 496              * Don't actually use 0 when wrapping, instead indicate
 497              * that we'd truly like an allocation in low memory.
 498              */
 499             addr = (mmap_min_addr > TARGET_PAGE_SIZE
 500                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
 501                      : TARGET_PAGE_SIZE);
 502         } else if (wrapped && addr >= start) {
 503             return (abi_ulong)-1;
 504         }
 505     }
 506 }
 507
 508 /*
 509  * Record a successful mmap within the user-exec interval tree.
 510  */
 511 static abi_long mmap_end(abi_ulong start, abi_ulong last,
 512                          abi_ulong passthrough_start,
 513                          abi_ulong passthrough_last,
 514                          int flags, int page_flags)
 515 {
 516     if (flags & MAP_ANONYMOUS) {
 517         page_flags |= PAGE_ANON;
 518     }
 519     page_flags |= PAGE_RESET;
 520     if (passthrough_start > passthrough_last) {
 521         page_set_flags(start, last, page_flags);
 522     } else {
 523         if (start < passthrough_start) {
 524             page_set_flags(start, passthrough_start - 1, page_flags);
 525         }
 526         page_set_flags(passthrough_start, passthrough_last,
 527                        page_flags | PAGE_PASSTHROUGH);
 528         if (passthrough_last < last) {
 529             page_set_flags(passthrough_last + 1, last, page_flags);
 530         }
 531     }
 532     shm_region_rm_complete(start, last);
 533     trace_target_mmap_complete(start);
 534     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
 535         FILE *f = qemu_log_trylock();
 536         if (f) {
 537             fprintf(f, "page layout changed following mmap\n");
 538             page_dump(f);
 539             qemu_log_unlock(f);
 540         }
 541     }
 542     return start;
 543 }
 544
 545 static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
 546                                     int target_prot, int flags, int page_flags,
 547                                     int fd, off_t offset)
 548 {
 549     int host_page_size = qemu_real_host_page_size();
 550     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
 551     abi_ulong passthrough_start = -1, passthrough_last = 0;
 552     off_t host_offset;
 553
 554     real_start = start & -host_page_size;
 555     host_offset = offset & -host_page_size;
 556
 557     /*
 558      * For reserved_va, we are in full control of the allocation.
 559      * Find a suitable hole and convert to MAP_FIXED.
 560      */
 561     if (reserved_va && !(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 562         host_len = len + offset - host_offset;
 563         start = mmap_find_vma(real_start, host_len,
 564                               MAX(host_page_size, TARGET_PAGE_SIZE));
 565         if (start == (abi_ulong)-1) {
 566             errno = ENOMEM;
 567             return -1;
 568         }
 569         start += offset - host_offset;
 570         flags |= MAP_FIXED;
 571     }
 572
 573     /*
 574      * When mapping files into a memory area larger than the file, accesses
 575      * to pages beyond the file size will cause a SIGBUS.
 576      *
 577      * For example, if mmaping a file of 100 bytes on a host with 4K pages
 578      * emulating a target with 8K pages, the target expects to be able to
 579      * access the first 8K. But the host will trap us on any access beyond
 580      * 4K.
 581      *
 582      * When emulating a target with a larger page-size than the hosts, we
 583      * may need to truncate file maps at EOF and add extra anonymous pages
 584      * up to the targets page boundary.
 585      */
 586     if (host_page_size < TARGET_PAGE_SIZE && !(flags & MAP_ANONYMOUS)) {
 587         struct stat sb;
 588
 589         if (fstat(fd, &sb) == -1) {
 590             return -1;
 591         }
 592
 593         /* Are we trying to create a map beyond EOF?.  */
 594         if (offset + len > sb.st_size) {
 595             /*
 596              * If so, truncate the file map at eof aligned with
 597              * the hosts real pagesize. Additional anonymous maps
 598              * will be created beyond EOF.
 599              */
 600             len = ROUND_UP(sb.st_size - offset, host_page_size);
 601         }
 602     }
 603
 604     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 605         uintptr_t host_start;
 606         int host_prot;
 607         void *p;
 608
 609         host_len = len + offset - host_offset;
 610         host_len = ROUND_UP(host_len, host_page_size);
 611         host_prot = target_to_host_prot(target_prot);
 612
 613         /* Note: we prefer to control the mapping address. */
 614         p = mmap(g2h_untagged(start), host_len, host_prot,
 615                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
 616         if (p == MAP_FAILED) {
 617             return -1;
 618         }
 619         /* update start so that it points to the file position at 'offset' */
 620         host_start = (uintptr_t)p;
 621         if (!(flags & MAP_ANONYMOUS)) {
 622             p = mmap(g2h_untagged(start), len, host_prot,
 623                      flags | MAP_FIXED, fd, host_offset);
 624             if (p == MAP_FAILED) {
 625                 munmap(g2h_untagged(start), host_len);
 626                 return -1;
 627             }
 628             host_start += offset - host_offset;
 629         }
 630         start = h2g(host_start);
 631         last = start + len - 1;
 632         passthrough_start = start;
 633         passthrough_last = last;
 634     } else {
 635         last = start + len - 1;
 636         real_last = ROUND_UP(last, host_page_size) - 1;
 637
 638         if (flags & MAP_FIXED_NOREPLACE) {
 639             /* Validate that the chosen range is empty. */
 640             if (!page_check_range_empty(start, last)) {
 641                 errno = EEXIST;
 642                 return -1;
 643             }
 644
 645             /*
 646              * With reserved_va, the entire address space is mmaped in the
 647              * host to ensure it isn't accidentally used for something else.
 648              * We have just checked that the guest address is not mapped
 649              * within the guest, but need to replace the host reservation.
 650              *
 651              * Without reserved_va, despite the guest address check above,
 652              * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
 653              * any host address mappings.
 654              */
 655             if (reserved_va) {
 656                 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
 657             }
 658         }
 659
 660         /*
 661          * worst case: we cannot map the file because the offset is not
 662          * aligned, so we read it
 663          */
 664         if (!(flags & MAP_ANONYMOUS) &&
 665             (offset & (host_page_size - 1)) != (start & (host_page_size - 1))) {
 666             /*
 667              * msync() won't work here, so we return an error if write is
 668              * possible while it is a shared mapping
 669              */
 670             if ((flags & MAP_TYPE) == MAP_SHARED
 671                 && (target_prot & PROT_WRITE)) {
 672                 errno = EINVAL;
 673                 return -1;
 674             }
 675             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
 676                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
 677                                   | MAP_PRIVATE | MAP_ANONYMOUS,
 678                                   -1, 0);
 679             if (retaddr == -1) {
 680                 return -1;
 681             }
 682             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
 683                 return -1;
 684             }
 685             if (!(target_prot & PROT_WRITE)) {
 686                 ret = target_mprotect(start, len, target_prot);
 687                 assert(ret == 0);
 688             }
 689             return mmap_end(start, last, -1, 0, flags, page_flags);
 690         }
 691
 692         /* handle the start of the mapping */
 693         if (start > real_start) {
 694             if (real_last == real_start + host_page_size - 1) {
 695                 /* one single host page */
 696                 if (!mmap_frag(real_start, start, last,
 697                                target_prot, flags, fd, offset)) {
 698                     return -1;
 699                 }
 700                 return mmap_end(start, last, -1, 0, flags, page_flags);
 701             }
 702             if (!mmap_frag(real_start, start,
 703                            real_start + host_page_size - 1,
 704                            target_prot, flags, fd, offset)) {
 705                 return -1;
 706             }
 707             real_start += host_page_size;
 708         }
 709         /* handle the end of the mapping */
 710         if (last < real_last) {
 711             abi_ulong real_page = real_last - host_page_size + 1;
 712             if (!mmap_frag(real_page, real_page, last,
 713                            target_prot, flags, fd,
 714                            offset + real_page - start)) {
 715                 return -1;
 716             }
 717             real_last -= host_page_size;
 718         }
 719
 720         /* map the middle (easier) */
 721         if (real_start < real_last) {
 722             void *p, *want_p;
 723             off_t offset1;
 724             size_t len1;
 725
 726             if (flags & MAP_ANONYMOUS) {
 727                 offset1 = 0;
 728             } else {
 729                 offset1 = offset + real_start - start;
 730             }
 731             len1 = real_last - real_start + 1;
 732             want_p = g2h_untagged(real_start);
 733
 734             p = mmap(want_p, len1, target_to_host_prot(target_prot),
 735                      flags, fd, offset1);
 736             if (p != want_p) {
 737                 if (p != MAP_FAILED) {
 738                     munmap(p, len1);
 739                     errno = EEXIST;
 740                 }
 741                 return -1;
 742             }
 743             passthrough_start = real_start;
 744             passthrough_last = real_last;
 745         }
 746     }
 747     return mmap_end(start, last, passthrough_start, passthrough_last,
 748                     flags, page_flags);
 749 }
 750
 751 /* NOTE: all the constants are the HOST ones */
 752 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
 753                      int flags, int fd, off_t offset)
 754 {
 755     abi_long ret;
 756     int page_flags;
 757
 758     trace_target_mmap(start, len, target_prot, flags, fd, offset);
 759
 760     if (!len) {
 761         errno = EINVAL;
 762         return -1;
 763     }
 764
 765     page_flags = validate_prot_to_pageflags(target_prot);
 766     if (!page_flags) {
 767         errno = EINVAL;
 768         return -1;
 769     }
 770
 771     /* Also check for overflows... */
 772     len = TARGET_PAGE_ALIGN(len);
 773     if (!len || len != (size_t)len) {
 774         errno = ENOMEM;
 775         return -1;
 776     }
 777
 778     if (offset & ~TARGET_PAGE_MASK) {
 779         errno = EINVAL;
 780         return -1;
 781     }
 782     if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
 783         if (start & ~TARGET_PAGE_MASK) {
 784             errno = EINVAL;
 785             return -1;
 786         }
 787         if (!guest_range_valid_untagged(start, len)) {
 788             errno = ENOMEM;
 789             return -1;
 790         }
 791     }
 792
 793     mmap_lock();
 794
 795     ret = target_mmap__locked(start, len, target_prot, flags,
 796                               page_flags, fd, offset);
 797
 798     mmap_unlock();
 799
 800     /*
 801      * If we're mapping shared memory, ensure we generate code for parallel
 802      * execution and flush old translations.  This will work up to the level
 803      * supported by the host -- anything that requires EXCP_ATOMIC will not
 804      * be atomic with respect to an external process.
 805      */
 806     if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) {
 807         CPUState *cpu = thread_cpu;
 808         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
 809             cpu->tcg_cflags |= CF_PARALLEL;
 810             tb_flush(cpu);
 811         }
 812     }
 813
 814     return ret;
 815 }
 816
 817 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
 818 {
 819     int host_page_size = qemu_real_host_page_size();
 820     abi_ulong real_start;
 821     abi_ulong real_last;
 822     abi_ulong real_len;
 823     abi_ulong last;
 824     abi_ulong a;
 825     void *host_start;
 826     int prot;
 827
 828     last = start + len - 1;
 829     real_start = start & -host_page_size;
 830     real_last = ROUND_UP(last, host_page_size) - 1;
 831
 832     /*
 833      * If guest pages remain on the first or last host pages,
 834      * adjust the deallocation to retain those guest pages.
 835      * The single page special case is required for the last page,
 836      * lest real_start overflow to zero.
 837      */
 838     if (real_last - real_start < host_page_size) {
 839         prot = 0;
 840         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 841             prot |= page_get_flags(a);
 842         }
 843         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 844             prot |= page_get_flags(a + 1);
 845         }
 846         if (prot != 0) {
 847             return 0;
 848         }
 849     } else {
 850         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 851             prot |= page_get_flags(a);
 852         }
 853         if (prot != 0) {
 854             real_start += host_page_size;
 855         }
 856
 857         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 858             prot |= page_get_flags(a + 1);
 859         }
 860         if (prot != 0) {
 861             real_last -= host_page_size;
 862         }
 863
 864         if (real_last < real_start) {
 865             return 0;
 866         }
 867     }
 868
 869     real_len = real_last - real_start + 1;
 870     host_start = g2h_untagged(real_start);
 871
 872     return do_munmap(host_start, real_len);
 873 }
 874
 875 int target_munmap(abi_ulong start, abi_ulong len)
 876 {
 877     int ret;
 878
 879     trace_target_munmap(start, len);
 880
 881     if (start & ~TARGET_PAGE_MASK) {
 882         errno = EINVAL;
 883         return -1;
 884     }
 885     len = TARGET_PAGE_ALIGN(len);
 886     if (len == 0 || !guest_range_valid_untagged(start, len)) {
 887         errno = EINVAL;
 888         return -1;
 889     }
 890
 891     mmap_lock();
 892     ret = mmap_reserve_or_unmap(start, len);
 893     if (likely(ret == 0)) {
 894         page_set_flags(start, start + len - 1, 0);
 895         shm_region_rm_complete(start, start + len - 1);
 896     }
 897     mmap_unlock();
 898
 899     return ret;
 900 }
 901
 902 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
 903                        abi_ulong new_size, unsigned long flags,
 904                        abi_ulong new_addr)
 905 {
 906     int prot;
 907     void *host_addr;
 908
 909     if (!guest_range_valid_untagged(old_addr, old_size) ||
 910         ((flags & MREMAP_FIXED) &&
 911          !guest_range_valid_untagged(new_addr, new_size)) ||
 912         ((flags & MREMAP_MAYMOVE) == 0 &&
 913          !guest_range_valid_untagged(old_addr, new_size))) {
 914         errno = ENOMEM;
 915         return -1;
 916     }
 917
 918     mmap_lock();
 919
 920     if (flags & MREMAP_FIXED) {
 921         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 922                            flags, g2h_untagged(new_addr));
 923
 924         if (reserved_va && host_addr != MAP_FAILED) {
 925             /*
 926              * If new and old addresses overlap then the above mremap will
 927              * already have failed with EINVAL.
 928              */
 929             mmap_reserve_or_unmap(old_addr, old_size);
 930         }
 931     } else if (flags & MREMAP_MAYMOVE) {
 932         abi_ulong mmap_start;
 933
 934         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
 935
 936         if (mmap_start == -1) {
 937             errno = ENOMEM;
 938             host_addr = MAP_FAILED;
 939         } else {
 940             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 941                                flags | MREMAP_FIXED,
 942                                g2h_untagged(mmap_start));
 943             if (reserved_va) {
 944                 mmap_reserve_or_unmap(old_addr, old_size);
 945             }
 946         }
 947     } else {
 948         int page_flags = 0;
 949         if (reserved_va && old_size < new_size) {
 950             abi_ulong addr;
 951             for (addr = old_addr + old_size;
 952                  addr < old_addr + new_size;
 953                  addr++) {
 954                 page_flags |= page_get_flags(addr);
 955             }
 956         }
 957         if (page_flags == 0) {
 958             host_addr = mremap(g2h_untagged(old_addr),
 959                                old_size, new_size, flags);
 960
 961             if (host_addr != MAP_FAILED) {
 962                 /* Check if address fits target address space */
 963                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
 964                     /* Revert mremap() changes */
 965                     host_addr = mremap(g2h_untagged(old_addr),
 966                                        new_size, old_size, flags);
 967                     errno = ENOMEM;
 968                     host_addr = MAP_FAILED;
 969                 } else if (reserved_va && old_size > new_size) {
 970                     mmap_reserve_or_unmap(old_addr + old_size,
 971                                           old_size - new_size);
 972                 }
 973             }
 974         } else {
 975             errno = ENOMEM;
 976             host_addr = MAP_FAILED;
 977         }
 978     }
 979
 980     if (host_addr == MAP_FAILED) {
 981         new_addr = -1;
 982     } else {
 983         new_addr = h2g(host_addr);
 984         prot = page_get_flags(old_addr);
 985         page_set_flags(old_addr, old_addr + old_size - 1, 0);
 986         shm_region_rm_complete(old_addr, old_addr + old_size - 1);
 987         page_set_flags(new_addr, new_addr + new_size - 1,
 988                        prot | PAGE_VALID | PAGE_RESET);
 989         shm_region_rm_complete(new_addr, new_addr + new_size - 1);
 990     }
 991     mmap_unlock();
 992     return new_addr;
 993 }
 994
 995 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
 996 {
 997     abi_ulong len;
 998     int ret = 0;
 999
1000     if (start & ~TARGET_PAGE_MASK) {
1001         return -TARGET_EINVAL;
1002     }
1003     if (len_in == 0) {
1004         return 0;
1005     }
1006     len = TARGET_PAGE_ALIGN(len_in);
1007     if (len == 0 || !guest_range_valid_untagged(start, len)) {
1008         return -TARGET_EINVAL;
1009     }
1010
1011     /* Translate for some architectures which have different MADV_xxx values */
1012     switch (advice) {
1013     case TARGET_MADV_DONTNEED:      /* alpha */
1014         advice = MADV_DONTNEED;
1015         break;
1016     case TARGET_MADV_WIPEONFORK:    /* parisc */
1017         advice = MADV_WIPEONFORK;
1018         break;
1019     case TARGET_MADV_KEEPONFORK:    /* parisc */
1020         advice = MADV_KEEPONFORK;
1021         break;
1022     /* we do not care about the other MADV_xxx values yet */
1023     }
1024
1025     /*
1026      * Most advice values are hints, so ignoring and returning success is ok.
1027      *
1028      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
1029      * MADV_KEEPONFORK are not hints and need to be emulated.
1030      *
1031      * A straight passthrough for those may not be safe because qemu sometimes
1032      * turns private file-backed mappings into anonymous mappings.
1033      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
1034      * same semantics for the host as for the guest.
1035      *
1036      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
1037      * return failure if not.
1038      *
1039      * MADV_DONTNEED is passed through as well, if possible.
1040      * If passthrough isn't possible, we nevertheless (wrongly!) return
1041      * success, which is broken but some userspace programs fail to work
1042      * otherwise. Completely implementing such emulation is quite complicated
1043      * though.
1044      */
1045     mmap_lock();
1046     switch (advice) {
1047     case MADV_WIPEONFORK:
1048     case MADV_KEEPONFORK:
1049         ret = -EINVAL;
1050         /* fall through */
1051     case MADV_DONTNEED:
1052         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
1053             ret = get_errno(madvise(g2h_untagged(start), len, advice));
1054             if ((advice == MADV_DONTNEED) && (ret == 0)) {
1055                 page_reset_target_data(start, start + len - 1);
1056             }
1057         }
1058     }
1059     mmap_unlock();
1060
1061     return ret;
1062 }
1063
1064 #ifndef TARGET_FORCE_SHMLBA
1065 /*
1066  * For most architectures, SHMLBA is the same as the page size;
1067  * some architectures have larger values, in which case they should
1068  * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
1069  * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
1070  * and defining its own value for SHMLBA.
1071  *
1072  * The kernel also permits SHMLBA to be set by the architecture to a
1073  * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
1074  * this means that addresses are rounded to the large size if
1075  * SHM_RND is set but addresses not aligned to that size are not rejected
1076  * as long as they are at least page-aligned. Since the only architecture
1077  * which uses this is ia64 this code doesn't provide for that oddity.
1078  */
1079 static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
1080 {
1081     return TARGET_PAGE_SIZE;
1082 }
1083 #endif
1084
1085 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
1086                        abi_ulong shmaddr, int shmflg)
1087 {
1088     CPUState *cpu = env_cpu(cpu_env);
1089     abi_ulong raddr;
1090     struct shmid_ds shm_info;
1091     int ret;
1092     abi_ulong shmlba;
1093
1094     /* shmat pointers are always untagged */
1095
1096     /* find out the length of the shared memory segment */
1097     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
1098     if (is_error(ret)) {
1099         /* can't get length, bail out */
1100         return ret;
1101     }
1102
1103     shmlba = target_shmlba(cpu_env);
1104
1105     if (shmaddr & (shmlba - 1)) {
1106         if (shmflg & SHM_RND) {
1107             shmaddr &= ~(shmlba - 1);
1108         } else {
1109             return -TARGET_EINVAL;
1110         }
1111     }
1112     if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
1113         return -TARGET_EINVAL;
1114     }
1115
1116     WITH_MMAP_LOCK_GUARD() {
1117         void *host_raddr;
1118         abi_ulong last;
1119
1120         if (shmaddr) {
1121             host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
1122         } else {
1123             abi_ulong mmap_start;
1124
1125             /* In order to use the host shmat, we need to honor host SHMLBA.  */
1126             mmap_start = mmap_find_vma(0, shm_info.shm_segsz,
1127                                        MAX(SHMLBA, shmlba));
1128
1129             if (mmap_start == -1) {
1130                 return -TARGET_ENOMEM;
1131             }
1132             host_raddr = shmat(shmid, g2h_untagged(mmap_start),
1133                                shmflg | SHM_REMAP);
1134         }
1135
1136         if (host_raddr == (void *)-1) {
1137             return get_errno(-1);
1138         }
1139         raddr = h2g(host_raddr);
1140         last = raddr + shm_info.shm_segsz - 1;
1141
1142         page_set_flags(raddr, last,
1143                        PAGE_VALID | PAGE_RESET | PAGE_READ |
1144                        (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
1145
1146         shm_region_rm_complete(raddr, last);
1147         shm_region_add(raddr, last);
1148     }
1149
1150     /*
1151      * We're mapping shared memory, so ensure we generate code for parallel
1152      * execution and flush old translations.  This will work up to the level
1153      * supported by the host -- anything that requires EXCP_ATOMIC will not
1154      * be atomic with respect to an external process.
1155      */
1156     if (!(cpu->tcg_cflags & CF_PARALLEL)) {
1157         cpu->tcg_cflags |= CF_PARALLEL;
1158         tb_flush(cpu);
1159     }
1160
1161     return raddr;
1162 }
1163
1164 abi_long target_shmdt(abi_ulong shmaddr)
1165 {
1166     abi_long rv;
1167
1168     /* shmdt pointers are always untagged */
1169
1170     WITH_MMAP_LOCK_GUARD() {
1171         abi_ulong last = shm_region_find(shmaddr);
1172         if (last == 0) {
1173             return -TARGET_EINVAL;
1174         }
1175
1176         rv = get_errno(shmdt(g2h_untagged(shmaddr)));
1177         if (rv == 0) {
1178             abi_ulong size = last - shmaddr + 1;
1179
1180             page_set_flags(shmaddr, last, 0);
1181             shm_region_rm_complete(shmaddr, last);
1182             mmap_reserve_or_unmap(shmaddr, size);
1183         }
1184     }
1185     return rv;
1186 }