arch/x86/xen/mmu.c

   1 /*
   2  * Xen mmu operations
   3  *
   4  * This file contains the various mmu fetch and update operations.
   5  * The most important job they must perform is the mapping between the
   6  * domain's pfn and the overall machine mfns.
   7  *
   8  * Xen allows guests to directly update the pagetable, in a controlled
   9  * fashion.  In other words, the guest modifies the same pagetable
  10  * that the CPU actually uses, which eliminates the overhead of having
  11  * a separate shadow pagetable.
  12  *
  13  * In order to allow this, it falls on the guest domain to map its
  14  * notion of a "physical" pfn - which is just a domain-local linear
  15  * address - into a real "machine address" which the CPU's MMU can
  16  * use.
  17  *
  18  * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
  19  * inserted directly into the pagetable.  When creating a new
  20  * pte/pmd/pgd, it converts the passed pfn into an mfn.  Conversely,
  21  * when reading the content back with __(pgd|pmd|pte)_val, it converts
  22  * the mfn back into a pfn.
  23  *
  24  * The other constraint is that all pages which make up a pagetable
  25  * must be mapped read-only in the guest.  This prevents uncontrolled
  26  * guest updates to the pagetable.  Xen strictly enforces this, and
  27  * will disallow any pagetable update which will end up mapping a
  28  * pagetable page RW, and will disallow using any writable page as a
  29  * pagetable.
  30  *
  31  * Naively, when loading %cr3 with the base of a new pagetable, Xen
  32  * would need to validate the whole pagetable before going on.
  33  * Naturally, this is quite slow.  The solution is to "pin" a
  34  * pagetable, which enforces all the constraints on the pagetable even
  35  * when it is not actively in use.  This menas that Xen can be assured
  36  * that it is still valid when you do load it into %cr3, and doesn't
  37  * need to revalidate it.
  38  *
  39  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  40  */
  41 #include <linux/sched.h>
  42 #include <linux/highmem.h>
  43 #include <linux/bug.h>
  44
  45 #include <asm/pgtable.h>
  46 #include <asm/tlbflush.h>
  47 #include <asm/mmu_context.h>
  48 #include <asm/paravirt.h>
  49
  50 #include <asm/xen/hypercall.h>
  51 #include <asm/xen/hypervisor.h>
  52
  53 #include <xen/page.h>
  54 #include <xen/interface/xen.h>
  55
  56 #include "multicalls.h"
  57 #include "mmu.h"
  58
  59 #define P2M_ENTRIES_PER_PAGE    (PAGE_SIZE / sizeof(unsigned long))
  60 #define TOP_ENTRIES             (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
  61
  62 /* Placeholder for holes in the address space */
  63 static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
  64         __attribute__((section(".data.page_aligned"))) =
  65                 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
  66
  67  /* Array of pointers to pages containing p2m entries */
  68 static unsigned long *p2m_top[TOP_ENTRIES]
  69         __attribute__((section(".data.page_aligned"))) =
  70                 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
  71
  72 static inline unsigned p2m_top_index(unsigned long pfn)
  73 {
  74         BUG_ON(pfn >= MAX_DOMAIN_PAGES);
  75         return pfn / P2M_ENTRIES_PER_PAGE;
  76 }
  77
  78 static inline unsigned p2m_index(unsigned long pfn)
  79 {
  80         return pfn % P2M_ENTRIES_PER_PAGE;
  81 }
  82
  83 void __init xen_build_dynamic_phys_to_machine(void)
  84 {
  85         unsigned pfn;
  86         unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
  87         unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
  88
  89         for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
  90                 unsigned topidx = p2m_top_index(pfn);
  91
  92                 p2m_top[topidx] = &mfn_list[pfn];
  93         }
  94 }
  95
  96 unsigned long get_phys_to_machine(unsigned long pfn)
  97 {
  98         unsigned topidx, idx;
  99
 100         if (unlikely(pfn >= MAX_DOMAIN_PAGES))
 101                 return INVALID_P2M_ENTRY;
 102
 103         topidx = p2m_top_index(pfn);
 104         idx = p2m_index(pfn);
 105         return p2m_top[topidx][idx];
 106 }
 107
 108 static void alloc_p2m(unsigned long **pp)
 109 {
 110         unsigned long *p;
 111         unsigned i;
 112
 113         p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
 114         BUG_ON(p == NULL);
 115
 116         for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
 117                 p[i] = INVALID_P2M_ENTRY;
 118
 119         if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
 120                 free_page((unsigned long)p);
 121 }
 122
 123 void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 124 {
 125         unsigned topidx, idx;
 126
 127         if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
 128                 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
 129                 return;
 130         }
 131
 132         if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
 133                 BUG_ON(mfn != INVALID_P2M_ENTRY);
 134                 return;
 135         }
 136
 137         topidx = p2m_top_index(pfn);
 138         if (p2m_top[topidx] == p2m_missing) {
 139                 /* no need to allocate a page to store an invalid entry */
 140                 if (mfn == INVALID_P2M_ENTRY)
 141                         return;
 142                 alloc_p2m(&p2m_top[topidx]);
 143         }
 144
 145         idx = p2m_index(pfn);
 146         p2m_top[topidx][idx] = mfn;
 147 }
 148
 149 xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 150 {
 151         unsigned int level;
 152         pte_t *pte = lookup_address(address, &level);
 153         unsigned offset = address & PAGE_MASK;
 154
 155         BUG_ON(pte == NULL);
 156
 157         return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
 158 }
 159
 160 void make_lowmem_page_readonly(void *vaddr)
 161 {
 162         pte_t *pte, ptev;
 163         unsigned long address = (unsigned long)vaddr;
 164         unsigned int level;
 165
 166         pte = lookup_address(address, &level);
 167         BUG_ON(pte == NULL);
 168
 169         ptev = pte_wrprotect(*pte);
 170
 171         if (HYPERVISOR_update_va_mapping(address, ptev, 0))
 172                 BUG();
 173 }
 174
 175 void make_lowmem_page_readwrite(void *vaddr)
 176 {
 177         pte_t *pte, ptev;
 178         unsigned long address = (unsigned long)vaddr;
 179         unsigned int level;
 180
 181         pte = lookup_address(address, &level);
 182         BUG_ON(pte == NULL);
 183
 184         ptev = pte_mkwrite(*pte);
 185
 186         if (HYPERVISOR_update_va_mapping(address, ptev, 0))
 187                 BUG();
 188 }
 189
 190
 191 void xen_set_pmd(pmd_t *ptr, pmd_t val)
 192 {
 193         struct multicall_space mcs;
 194         struct mmu_update *u;
 195
 196         preempt_disable();
 197
 198         mcs = xen_mc_entry(sizeof(*u));
 199         u = mcs.args;
 200         u->ptr = virt_to_machine(ptr).maddr;
 201         u->val = pmd_val_ma(val);
 202         MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
 203
 204         xen_mc_issue(PARAVIRT_LAZY_MMU);
 205
 206         preempt_enable();
 207 }
 208
 209 /*
 210  * Associate a virtual page frame with a given physical page frame
 211  * and protection flags for that frame.
 212  */
 213 void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 214 {
 215         pgd_t *pgd;
 216         pud_t *pud;
 217         pmd_t *pmd;
 218         pte_t *pte;
 219
 220         pgd = swapper_pg_dir + pgd_index(vaddr);
 221         if (pgd_none(*pgd)) {
 222                 BUG();
 223                 return;
 224         }
 225         pud = pud_offset(pgd, vaddr);
 226         if (pud_none(*pud)) {
 227                 BUG();
 228                 return;
 229         }
 230         pmd = pmd_offset(pud, vaddr);
 231         if (pmd_none(*pmd)) {
 232                 BUG();
 233                 return;
 234         }
 235         pte = pte_offset_kernel(pmd, vaddr);
 236         /* <mfn,flags> stored as-is, to permit clearing entries */
 237         xen_set_pte(pte, mfn_pte(mfn, flags));
 238
 239         /*
 240          * It's enough to flush this one mapping.
 241          * (PGE mappings get flushed as well)
 242          */
 243         __flush_tlb_one(vaddr);
 244 }
 245
 246 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 247                     pte_t *ptep, pte_t pteval)
 248 {
 249         /* updates to init_mm may be done without lock */
 250         if (mm == &init_mm)
 251                 preempt_disable();
 252
 253         if (mm == current->mm || mm == &init_mm) {
 254                 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
 255                         struct multicall_space mcs;
 256                         mcs = xen_mc_entry(0);
 257
 258                         MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
 259                         xen_mc_issue(PARAVIRT_LAZY_MMU);
 260                         goto out;
 261                 } else
 262                         if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
 263                                 goto out;
 264         }
 265         xen_set_pte(ptep, pteval);
 266
 267 out:
 268         if (mm == &init_mm)
 269                 preempt_enable();
 270 }
 271
 272 pteval_t xen_pte_val(pte_t pte)
 273 {
 274         pteval_t ret = pte.pte;
 275
 276         if (ret & _PAGE_PRESENT)
 277                 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 278
 279         return ret;
 280 }
 281
 282 pgdval_t xen_pgd_val(pgd_t pgd)
 283 {
 284         pgdval_t ret = pgd.pgd;
 285         if (ret & _PAGE_PRESENT)
 286                 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 287         return ret;
 288 }
 289
 290 pte_t xen_make_pte(pteval_t pte)
 291 {
 292         if (pte & _PAGE_PRESENT) {
 293                 pte = phys_to_machine(XPADDR(pte)).maddr;
 294                 pte &= ~(_PAGE_PCD | _PAGE_PWT);
 295         }
 296
 297         return (pte_t){ .pte = pte };
 298 }
 299
 300 pgd_t xen_make_pgd(pgdval_t pgd)
 301 {
 302         if (pgd & _PAGE_PRESENT)
 303                 pgd = phys_to_machine(XPADDR(pgd)).maddr;
 304
 305         return (pgd_t){ pgd };
 306 }
 307
 308 pmdval_t xen_pmd_val(pmd_t pmd)
 309 {
 310         pmdval_t ret = native_pmd_val(pmd);
 311         if (ret & _PAGE_PRESENT)
 312                 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 313         return ret;
 314 }
 315
 316 void xen_set_pud(pud_t *ptr, pud_t val)
 317 {
 318         struct multicall_space mcs;
 319         struct mmu_update *u;
 320
 321         preempt_disable();
 322
 323         mcs = xen_mc_entry(sizeof(*u));
 324         u = mcs.args;
 325         u->ptr = virt_to_machine(ptr).maddr;
 326         u->val = pud_val_ma(val);
 327         MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
 328
 329         xen_mc_issue(PARAVIRT_LAZY_MMU);
 330
 331         preempt_enable();
 332 }
 333
 334 void xen_set_pte(pte_t *ptep, pte_t pte)
 335 {
 336         ptep->pte_high = pte.pte_high;
 337         smp_wmb();
 338         ptep->pte_low = pte.pte_low;
 339 }
 340
 341 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 342 {
 343         set_64bit((u64 *)ptep, pte_val_ma(pte));
 344 }
 345
 346 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 347 {
 348         ptep->pte_low = 0;
 349         smp_wmb();              /* make sure low gets written first */
 350         ptep->pte_high = 0;
 351 }
 352
 353 void xen_pmd_clear(pmd_t *pmdp)
 354 {
 355         xen_set_pmd(pmdp, __pmd(0));
 356 }
 357
 358 pmd_t xen_make_pmd(pmdval_t pmd)
 359 {
 360         if (pmd & _PAGE_PRESENT)
 361                 pmd = phys_to_machine(XPADDR(pmd)).maddr;
 362
 363         return native_make_pmd(pmd);
 364 }
 365
 366 /*
 367   (Yet another) pagetable walker.  This one is intended for pinning a
 368   pagetable.  This means that it walks a pagetable and calls the
 369   callback function on each page it finds making up the page table,
 370   at every level.  It walks the entire pagetable, but it only bothers
 371   pinning pte pages which are below pte_limit.  In the normal case
 372   this will be TASK_SIZE, but at boot we need to pin up to
 373   FIXADDR_TOP.  But the important bit is that we don't pin beyond
 374   there, because then we start getting into Xen's ptes.
 375 */
 376 static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
 377                     unsigned long limit)
 378 {
 379         pgd_t *pgd = pgd_base;
 380         int flush = 0;
 381         unsigned long addr = 0;
 382         unsigned long pgd_next;
 383
 384         BUG_ON(limit > FIXADDR_TOP);
 385
 386         if (xen_feature(XENFEAT_auto_translated_physmap))
 387                 return 0;
 388
 389         for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
 390                 pud_t *pud;
 391                 unsigned long pud_limit, pud_next;
 392
 393                 pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
 394
 395                 if (!pgd_val(*pgd))
 396                         continue;
 397
 398                 pud = pud_offset(pgd, 0);
 399
 400                 if (PTRS_PER_PUD > 1) /* not folded */
 401                         flush |= (*func)(virt_to_page(pud), PT_PUD);
 402
 403                 for (; addr != pud_limit; pud++, addr = pud_next) {
 404                         pmd_t *pmd;
 405                         unsigned long pmd_limit;
 406
 407                         pud_next = pud_addr_end(addr, pud_limit);
 408
 409                         if (pud_next < limit)
 410                                 pmd_limit = pud_next;
 411                         else
 412                                 pmd_limit = limit;
 413
 414                         if (pud_none(*pud))
 415                                 continue;
 416
 417                         pmd = pmd_offset(pud, 0);
 418
 419                         if (PTRS_PER_PMD > 1) /* not folded */
 420                                 flush |= (*func)(virt_to_page(pmd), PT_PMD);
 421
 422                         for (; addr != pmd_limit; pmd++) {
 423                                 addr += (PAGE_SIZE * PTRS_PER_PTE);
 424                                 if ((pmd_limit-1) < (addr-1)) {
 425                                         addr = pmd_limit;
 426                                         break;
 427                                 }
 428
 429                                 if (pmd_none(*pmd))
 430                                         continue;
 431
 432                                 flush |= (*func)(pmd_page(*pmd), PT_PTE);
 433                         }
 434                 }
 435         }
 436
 437         flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
 438
 439         return flush;
 440 }
 441
 442 static spinlock_t *lock_pte(struct page *page)
 443 {
 444         spinlock_t *ptl = NULL;
 445
 446 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
 447         ptl = __pte_lockptr(page);
 448         spin_lock(ptl);
 449 #endif
 450
 451         return ptl;
 452 }
 453
 454 static void do_unlock(void *v)
 455 {
 456         spinlock_t *ptl = v;
 457         spin_unlock(ptl);
 458 }
 459
 460 static void xen_do_pin(unsigned level, unsigned long pfn)
 461 {
 462         struct mmuext_op *op;
 463         struct multicall_space mcs;
 464
 465         mcs = __xen_mc_entry(sizeof(*op));
 466         op = mcs.args;
 467         op->cmd = level;
 468         op->arg1.mfn = pfn_to_mfn(pfn);
 469         MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 470 }
 471
 472 static int pin_page(struct page *page, enum pt_level level)
 473 {
 474         unsigned pgfl = TestSetPagePinned(page);
 475         int flush;
 476
 477         if (pgfl)
 478                 flush = 0;              /* already pinned */
 479         else if (PageHighMem(page))
 480                 /* kmaps need flushing if we found an unpinned
 481                    highpage */
 482                 flush = 1;
 483         else {
 484                 void *pt = lowmem_page_address(page);
 485                 unsigned long pfn = page_to_pfn(page);
 486                 struct multicall_space mcs = __xen_mc_entry(0);
 487                 spinlock_t *ptl;
 488
 489                 flush = 0;
 490
 491                 ptl = NULL;
 492                 if (level == PT_PTE)
 493                         ptl = lock_pte(page);
 494
 495                 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
 496                                         pfn_pte(pfn, PAGE_KERNEL_RO),
 497                                         level == PT_PGD ? UVMF_TLB_FLUSH : 0);
 498
 499                 if (level == PT_PTE)
 500                         xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
 501
 502                 if (ptl) {
 503                         /* Queue a deferred unlock for when this batch
 504                            is completed. */
 505                         xen_mc_callback(do_unlock, ptl);
 506                 }
 507         }
 508
 509         return flush;
 510 }
 511
 512 /* This is called just after a mm has been created, but it has not
 513    been used yet.  We need to make sure that its pagetable is all
 514    read-only, and can be pinned. */
 515 void xen_pgd_pin(pgd_t *pgd)
 516 {
 517         xen_mc_batch();
 518
 519         if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
 520                 /* re-enable interrupts for kmap_flush_unused */
 521                 xen_mc_issue(0);
 522                 kmap_flush_unused();
 523                 xen_mc_batch();
 524         }
 525
 526         xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 527         xen_mc_issue(0);
 528 }
 529
 530 /* The init_mm pagetable is really pinned as soon as its created, but
 531    that's before we have page structures to store the bits.  So do all
 532    the book-keeping now. */
 533 static __init int mark_pinned(struct page *page, enum pt_level level)
 534 {
 535         SetPagePinned(page);
 536         return 0;
 537 }
 538
 539 void __init xen_mark_init_mm_pinned(void)
 540 {
 541         pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
 542 }
 543
 544 static int unpin_page(struct page *page, enum pt_level level)
 545 {
 546         unsigned pgfl = TestClearPagePinned(page);
 547
 548         if (pgfl && !PageHighMem(page)) {
 549                 void *pt = lowmem_page_address(page);
 550                 unsigned long pfn = page_to_pfn(page);
 551                 spinlock_t *ptl = NULL;
 552                 struct multicall_space mcs;
 553
 554                 if (level == PT_PTE) {
 555                         ptl = lock_pte(page);
 556
 557                         xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
 558                 }
 559
 560                 mcs = __xen_mc_entry(0);
 561
 562                 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
 563                                         pfn_pte(pfn, PAGE_KERNEL),
 564                                         level == PT_PGD ? UVMF_TLB_FLUSH : 0);
 565
 566                 if (ptl) {
 567                         /* unlock when batch completed */
 568                         xen_mc_callback(do_unlock, ptl);
 569                 }
 570         }
 571
 572         return 0;               /* never need to flush on unpin */
 573 }
 574
 575 /* Release a pagetables pages back as normal RW */
 576 static void xen_pgd_unpin(pgd_t *pgd)
 577 {
 578         xen_mc_batch();
 579
 580         xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 581
 582         pgd_walk(pgd, unpin_page, TASK_SIZE);
 583
 584         xen_mc_issue(0);
 585 }
 586
 587 void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
 588 {
 589         spin_lock(&next->page_table_lock);
 590         xen_pgd_pin(next->pgd);
 591         spin_unlock(&next->page_table_lock);
 592 }
 593
 594 void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 595 {
 596         spin_lock(&mm->page_table_lock);
 597         xen_pgd_pin(mm->pgd);
 598         spin_unlock(&mm->page_table_lock);
 599 }
 600
 601
 602 #ifdef CONFIG_SMP
 603 /* Another cpu may still have their %cr3 pointing at the pagetable, so
 604    we need to repoint it somewhere else before we can unpin it. */
 605 static void drop_other_mm_ref(void *info)
 606 {
 607         struct mm_struct *mm = info;
 608
 609         if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
 610                 leave_mm(smp_processor_id());
 611
 612         /* If this cpu still has a stale cr3 reference, then make sure
 613            it has been flushed. */
 614         if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
 615                 load_cr3(swapper_pg_dir);
 616                 arch_flush_lazy_cpu_mode();
 617         }
 618 }
 619
 620 static void drop_mm_ref(struct mm_struct *mm)
 621 {
 622         cpumask_t mask;
 623         unsigned cpu;
 624
 625         if (current->active_mm == mm) {
 626                 if (current->mm == mm)
 627                         load_cr3(swapper_pg_dir);
 628                 else
 629                         leave_mm(smp_processor_id());
 630                 arch_flush_lazy_cpu_mode();
 631         }
 632
 633         /* Get the "official" set of cpus referring to our pagetable. */
 634         mask = mm->cpu_vm_mask;
 635
 636         /* It's possible that a vcpu may have a stale reference to our
 637            cr3, because its in lazy mode, and it hasn't yet flushed
 638            its set of pending hypercalls yet.  In this case, we can
 639            look at its actual current cr3 value, and force it to flush
 640            if needed. */
 641         for_each_online_cpu(cpu) {
 642                 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
 643                         cpu_set(cpu, mask);
 644         }
 645
 646         if (!cpus_empty(mask))
 647                 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 648 }
 649 #else
 650 static void drop_mm_ref(struct mm_struct *mm)
 651 {
 652         if (current->active_mm == mm)
 653                 load_cr3(swapper_pg_dir);
 654 }
 655 #endif
 656
 657 /*
 658  * While a process runs, Xen pins its pagetables, which means that the
 659  * hypervisor forces it to be read-only, and it controls all updates
 660  * to it.  This means that all pagetable updates have to go via the
 661  * hypervisor, which is moderately expensive.
 662  *
 663  * Since we're pulling the pagetable down, we switch to use init_mm,
 664  * unpin old process pagetable and mark it all read-write, which
 665  * allows further operations on it to be simple memory accesses.
 666  *
 667  * The only subtle point is that another CPU may be still using the
 668  * pagetable because of lazy tlb flushing.  This means we need need to
 669  * switch all CPUs off this pagetable before we can unpin it.
 670  */
 671 void xen_exit_mmap(struct mm_struct *mm)
 672 {
 673         get_cpu();              /* make sure we don't move around */
 674         drop_mm_ref(mm);
 675         put_cpu();
 676
 677         spin_lock(&mm->page_table_lock);
 678
 679         /* pgd may not be pinned in the error exit path of execve */
 680         if (PagePinned(virt_to_page(mm->pgd)))
 681                 xen_pgd_unpin(mm->pgd);
 682
 683         spin_unlock(&mm->page_table_lock);
 684 }