arch/x86/mm/pageattr.c

   1 /*
   2  * Copyright 2002 Andi Kleen, SuSE Labs.
   3  * Thanks to Ben LaHaise for precious feedback.
   4  */
   5 #include <linux/highmem.h>
   6 #include <linux/bootmem.h>
   7 #include <linux/module.h>
   8 #include <linux/sched.h>
   9 #include <linux/slab.h>
  10 #include <linux/mm.h>
  11 #include <linux/interrupt.h>
  12
  13 #include <asm/e820.h>
  14 #include <asm/processor.h>
  15 #include <asm/tlbflush.h>
  16 #include <asm/sections.h>
  17 #include <asm/uaccess.h>
  18 #include <asm/pgalloc.h>
  19
  20 /*
  21  * The current flushing context - we pass it instead of 5 arguments:
  22  */
  23 struct cpa_data {
  24         unsigned long   vaddr;
  25         pgprot_t        mask_set;
  26         pgprot_t        mask_clr;
  27         int             numpages;
  28         int             flushtlb;
  29 };
  30
  31 static inline int
  32 within(unsigned long addr, unsigned long start, unsigned long end)
  33 {
  34         return addr >= start && addr < end;
  35 }
  36
  37 /*
  38  * Flushing functions
  39  */
  40
  41 /**
  42  * clflush_cache_range - flush a cache range with clflush
  43  * @addr:       virtual start address
  44  * @size:       number of bytes to flush
  45  *
  46  * clflush is an unordered instruction which needs fencing with mfence
  47  * to avoid ordering issues.
  48  */
  49 void clflush_cache_range(void *vaddr, unsigned int size)
  50 {
  51         void *vend = vaddr + size - 1;
  52
  53         mb();
  54
  55         for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
  56                 clflush(vaddr);
  57         /*
  58          * Flush any possible final partial cacheline:
  59          */
  60         clflush(vend);
  61
  62         mb();
  63 }
  64
  65 static void __cpa_flush_all(void *arg)
  66 {
  67         unsigned long cache = (unsigned long)arg;
  68
  69         /*
  70          * Flush all to work around Errata in early athlons regarding
  71          * large page flushing.
  72          */
  73         __flush_tlb_all();
  74
  75         if (cache && boot_cpu_data.x86_model >= 4)
  76                 wbinvd();
  77 }
  78
  79 static void cpa_flush_all(unsigned long cache)
  80 {
  81         BUG_ON(irqs_disabled());
  82
  83         on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
  84 }
  85
  86 static void __cpa_flush_range(void *arg)
  87 {
  88         /*
  89          * We could optimize that further and do individual per page
  90          * tlb invalidates for a low number of pages. Caveat: we must
  91          * flush the high aliases on 64bit as well.
  92          */
  93         __flush_tlb_all();
  94 }
  95
  96 static void cpa_flush_range(unsigned long start, int numpages, int cache)
  97 {
  98         unsigned int i, level;
  99         unsigned long addr;
 100
 101         BUG_ON(irqs_disabled());
 102         WARN_ON(PAGE_ALIGN(start) != start);
 103
 104         on_each_cpu(__cpa_flush_range, NULL, 1, 1);
 105
 106         if (!cache)
 107                 return;
 108
 109         /*
 110          * We only need to flush on one CPU,
 111          * clflush is a MESI-coherent instruction that
 112          * will cause all other CPUs to flush the same
 113          * cachelines:
 114          */
 115         for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
 116                 pte_t *pte = lookup_address(addr, &level);
 117
 118                 /*
 119                  * Only flush present addresses:
 120                  */
 121                 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
 122                         clflush_cache_range((void *) addr, PAGE_SIZE);
 123         }
 124 }
 125
 126 #define HIGH_MAP_START  __START_KERNEL_map
 127 #define HIGH_MAP_END    (__START_KERNEL_map + KERNEL_TEXT_SIZE)
 128
 129
 130 /*
 131  * Converts a virtual address to a X86-64 highmap address
 132  */
 133 static unsigned long virt_to_highmap(void *address)
 134 {
 135 #ifdef CONFIG_X86_64
 136         return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
 137 #else
 138         return (unsigned long)address;
 139 #endif
 140 }
 141
 142 /*
 143  * Certain areas of memory on x86 require very specific protection flags,
 144  * for example the BIOS area or kernel text. Callers don't always get this
 145  * right (again, ioremap() on BIOS memory is not uncommon) so this function
 146  * checks and fixes these known static required protection bits.
 147  */
 148 static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
 149 {
 150         pgprot_t forbidden = __pgprot(0);
 151
 152         /*
 153          * The BIOS area between 640k and 1Mb needs to be executable for
 154          * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 155          */
 156         if (within(__pa(address), BIOS_BEGIN, BIOS_END))
 157                 pgprot_val(forbidden) |= _PAGE_NX;
 158
 159         /*
 160          * The kernel text needs to be executable for obvious reasons
 161          * Does not cover __inittext since that is gone later on
 162          */
 163         if (within(address, (unsigned long)_text, (unsigned long)_etext))
 164                 pgprot_val(forbidden) |= _PAGE_NX;
 165         /*
 166          * Do the same for the x86-64 high kernel mapping
 167          */
 168         if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
 169                 pgprot_val(forbidden) |= _PAGE_NX;
 170
 171         /* The .rodata section needs to be read-only */
 172         if (within(address, (unsigned long)__start_rodata,
 173                                 (unsigned long)__end_rodata))
 174                 pgprot_val(forbidden) |= _PAGE_RW;
 175         /*
 176          * Do the same for the x86-64 high kernel mapping
 177          */
 178         if (within(address, virt_to_highmap(__start_rodata),
 179                                 virt_to_highmap(__end_rodata)))
 180                 pgprot_val(forbidden) |= _PAGE_RW;
 181
 182         prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
 183
 184         return prot;
 185 }
 186
 187 /*
 188  * Lookup the page table entry for a virtual address. Return a pointer
 189  * to the entry and the level of the mapping.
 190  *
 191  * Note: We return pud and pmd either when the entry is marked large
 192  * or when the present bit is not set. Otherwise we would return a
 193  * pointer to a nonexisting mapping.
 194  */
 195 pte_t *lookup_address(unsigned long address, unsigned int *level)
 196 {
 197         pgd_t *pgd = pgd_offset_k(address);
 198         pud_t *pud;
 199         pmd_t *pmd;
 200
 201         *level = PG_LEVEL_NONE;
 202
 203         if (pgd_none(*pgd))
 204                 return NULL;
 205
 206         pud = pud_offset(pgd, address);
 207         if (pud_none(*pud))
 208                 return NULL;
 209
 210         *level = PG_LEVEL_1G;
 211         if (pud_large(*pud) || !pud_present(*pud))
 212                 return (pte_t *)pud;
 213
 214         pmd = pmd_offset(pud, address);
 215         if (pmd_none(*pmd))
 216                 return NULL;
 217
 218         *level = PG_LEVEL_2M;
 219         if (pmd_large(*pmd) || !pmd_present(*pmd))
 220                 return (pte_t *)pmd;
 221
 222         *level = PG_LEVEL_4K;
 223
 224         return pte_offset_kernel(pmd, address);
 225 }
 226
 227 /*
 228  * Set the new pmd in all the pgds we know about:
 229  */
 230 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 231 {
 232         /* change init_mm */
 233         set_pte_atomic(kpte, pte);
 234 #ifdef CONFIG_X86_32
 235         if (!SHARED_KERNEL_PMD) {
 236                 struct page *page;
 237
 238                 list_for_each_entry(page, &pgd_list, lru) {
 239                         pgd_t *pgd;
 240                         pud_t *pud;
 241                         pmd_t *pmd;
 242
 243                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
 244                         pud = pud_offset(pgd, address);
 245                         pmd = pmd_offset(pud, address);
 246                         set_pte_atomic((pte_t *)pmd, pte);
 247                 }
 248         }
 249 #endif
 250 }
 251
 252 static int
 253 try_preserve_large_page(pte_t *kpte, unsigned long address,
 254                         struct cpa_data *cpa)
 255 {
 256         unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
 257         pte_t new_pte, old_pte, *tmp;
 258         pgprot_t old_prot, new_prot;
 259         int i, do_split = 1;
 260         unsigned int level;
 261
 262         spin_lock_irqsave(&pgd_lock, flags);
 263         /*
 264          * Check for races, another CPU might have split this page
 265          * up already:
 266          */
 267         tmp = lookup_address(address, &level);
 268         if (tmp != kpte)
 269                 goto out_unlock;
 270
 271         switch (level) {
 272         case PG_LEVEL_2M:
 273                 psize = PMD_PAGE_SIZE;
 274                 pmask = PMD_PAGE_MASK;
 275                 break;
 276 #ifdef CONFIG_X86_64
 277         case PG_LEVEL_1G:
 278                 psize = PUD_PAGE_SIZE;
 279                 pmask = PUD_PAGE_MASK;
 280                 break;
 281 #endif
 282         default:
 283                 do_split = -EINVAL;
 284                 goto out_unlock;
 285         }
 286
 287         /*
 288          * Calculate the number of pages, which fit into this large
 289          * page starting at address:
 290          */
 291         nextpage_addr = (address + psize) & pmask;
 292         numpages = (nextpage_addr - address) >> PAGE_SHIFT;
 293         if (numpages < cpa->numpages)
 294                 cpa->numpages = numpages;
 295
 296         /*
 297          * We are safe now. Check whether the new pgprot is the same:
 298          */
 299         old_pte = *kpte;
 300         old_prot = new_prot = pte_pgprot(old_pte);
 301
 302         pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
 303         pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
 304         new_prot = static_protections(new_prot, address);
 305
 306         /*
 307          * We need to check the full range, whether
 308          * static_protection() requires a different pgprot for one of
 309          * the pages in the range we try to preserve:
 310          */
 311         addr = address + PAGE_SIZE;
 312         for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
 313                 pgprot_t chk_prot = static_protections(new_prot, addr);
 314
 315                 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
 316                         goto out_unlock;
 317         }
 318
 319         /*
 320          * If there are no changes, return. maxpages has been updated
 321          * above:
 322          */
 323         if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
 324                 do_split = 0;
 325                 goto out_unlock;
 326         }
 327
 328         /*
 329          * We need to change the attributes. Check, whether we can
 330          * change the large page in one go. We request a split, when
 331          * the address is not aligned and the number of pages is
 332          * smaller than the number of pages in the large page. Note
 333          * that we limited the number of possible pages already to
 334          * the number of pages in the large page.
 335          */
 336         if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
 337                 /*
 338                  * The address is aligned and the number of pages
 339                  * covers the full page.
 340                  */
 341                 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
 342                 __set_pmd_pte(kpte, address, new_pte);
 343                 cpa->flushtlb = 1;
 344                 do_split = 0;
 345         }
 346
 347 out_unlock:
 348         spin_unlock_irqrestore(&pgd_lock, flags);
 349
 350         return do_split;
 351 }
 352
 353 static LIST_HEAD(page_pool);
 354 static unsigned long pool_size, pool_pages, pool_low;
 355 static unsigned long pool_used, pool_failed, pool_refill;
 356
 357 static void cpa_fill_pool(void)
 358 {
 359         struct page *p;
 360         gfp_t gfp = GFP_KERNEL;
 361
 362         /* Do not allocate from interrupt context */
 363         if (in_irq() || irqs_disabled())
 364                 return;
 365         /*
 366          * Check unlocked. I does not matter when we have one more
 367          * page in the pool. The bit lock avoids recursive pool
 368          * allocations:
 369          */
 370         if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
 371                 return;
 372
 373 #ifdef CONFIG_DEBUG_PAGEALLOC
 374         /*
 375          * We could do:
 376          * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
 377          * but this fails on !PREEMPT kernels
 378          */
 379         gfp =  GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
 380 #endif
 381
 382         while (pool_pages < pool_size) {
 383                 p = alloc_pages(gfp, 0);
 384                 if (!p) {
 385                         pool_failed++;
 386                         break;
 387                 }
 388                 spin_lock_irq(&pgd_lock);
 389                 list_add(&p->lru, &page_pool);
 390                 pool_pages++;
 391                 spin_unlock_irq(&pgd_lock);
 392         }
 393         clear_bit_unlock(0, &pool_refill);
 394 }
 395
 396 #define SHIFT_MB                (20 - PAGE_SHIFT)
 397 #define ROUND_MB_GB             ((1 << 10) - 1)
 398 #define SHIFT_MB_GB             10
 399 #define POOL_PAGES_PER_GB       16
 400
 401 void __init cpa_init(void)
 402 {
 403         struct sysinfo si;
 404         unsigned long gb;
 405
 406         si_meminfo(&si);
 407         /*
 408          * Calculate the number of pool pages:
 409          *
 410          * Convert totalram (nr of pages) to MiB and round to the next
 411          * GiB. Shift MiB to Gib and multiply the result by
 412          * POOL_PAGES_PER_GB:
 413          */
 414         gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
 415         pool_size = POOL_PAGES_PER_GB * gb;
 416         pool_low = pool_size;
 417
 418         cpa_fill_pool();
 419         printk(KERN_DEBUG
 420                "CPA: page pool initialized %lu of %lu pages preallocated\n",
 421                pool_pages, pool_size);
 422 }
 423
 424 static int split_large_page(pte_t *kpte, unsigned long address)
 425 {
 426         unsigned long flags, pfn, pfninc = 1;
 427         unsigned int i, level;
 428         pte_t *pbase, *tmp;
 429         pgprot_t ref_prot;
 430         struct page *base;
 431
 432         /*
 433          * Get a page from the pool. The pool list is protected by the
 434          * pgd_lock, which we have to take anyway for the split
 435          * operation:
 436          */
 437         spin_lock_irqsave(&pgd_lock, flags);
 438         if (list_empty(&page_pool)) {
 439                 spin_unlock_irqrestore(&pgd_lock, flags);
 440                 return -ENOMEM;
 441         }
 442
 443         base = list_first_entry(&page_pool, struct page, lru);
 444         list_del(&base->lru);
 445         pool_pages--;
 446
 447         if (pool_pages < pool_low)
 448                 pool_low = pool_pages;
 449
 450         /*
 451          * Check for races, another CPU might have split this page
 452          * up for us already:
 453          */
 454         tmp = lookup_address(address, &level);
 455         if (tmp != kpte)
 456                 goto out_unlock;
 457
 458         pbase = (pte_t *)page_address(base);
 459 #ifdef CONFIG_X86_32
 460         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 461 #endif
 462         ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 463
 464 #ifdef CONFIG_X86_64
 465         if (level == PG_LEVEL_1G) {
 466                 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
 467                 pgprot_val(ref_prot) |= _PAGE_PSE;
 468         }
 469 #endif
 470
 471         /*
 472          * Get the target pfn from the original entry:
 473          */
 474         pfn = pte_pfn(*kpte);
 475         for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
 476                 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 477
 478         /*
 479          * Install the new, split up pagetable. Important details here:
 480          *
 481          * On Intel the NX bit of all levels must be cleared to make a
 482          * page executable. See section 4.13.2 of Intel 64 and IA-32
 483          * Architectures Software Developer's Manual).
 484          *
 485          * Mark the entry present. The current mapping might be
 486          * set to not present, which we preserved above.
 487          */
 488         ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
 489         pgprot_val(ref_prot) |= _PAGE_PRESENT;
 490         __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
 491         base = NULL;
 492
 493 out_unlock:
 494         /*
 495          * If we dropped out via the lookup_address check under
 496          * pgd_lock then stick the page back into the pool:
 497          */
 498         if (base) {
 499                 list_add(&base->lru, &page_pool);
 500                 pool_pages++;
 501         } else
 502                 pool_used++;
 503         spin_unlock_irqrestore(&pgd_lock, flags);
 504
 505         return 0;
 506 }
 507
 508 static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 509 {
 510         int do_split, err;
 511         unsigned int level;
 512         struct page *kpte_page;
 513         pte_t *kpte;
 514
 515 repeat:
 516         kpte = lookup_address(address, &level);
 517         if (!kpte)
 518                 return -EINVAL;
 519
 520         kpte_page = virt_to_page(kpte);
 521         BUG_ON(PageLRU(kpte_page));
 522         BUG_ON(PageCompound(kpte_page));
 523
 524         if (level == PG_LEVEL_4K) {
 525                 pte_t new_pte, old_pte = *kpte;
 526                 pgprot_t new_prot = pte_pgprot(old_pte);
 527
 528                 if(!pte_val(old_pte)) {
 529                         printk(KERN_WARNING "CPA: called for zero pte. "
 530                                "vaddr = %lx cpa->vaddr = %lx\n", address,
 531                                 cpa->vaddr);
 532                         WARN_ON(1);
 533                         return -EINVAL;
 534                 }
 535
 536                 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
 537                 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
 538
 539                 new_prot = static_protections(new_prot, address);
 540
 541                 /*
 542                  * We need to keep the pfn from the existing PTE,
 543                  * after all we're only going to change it's attributes
 544                  * not the memory it points to
 545                  */
 546                 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
 547
 548                 /*
 549                  * Do we really change anything ?
 550                  */
 551                 if (pte_val(old_pte) != pte_val(new_pte)) {
 552                         set_pte_atomic(kpte, new_pte);
 553                         cpa->flushtlb = 1;
 554                 }
 555                 cpa->numpages = 1;
 556                 return 0;
 557         }
 558
 559         /*
 560          * Check, whether we can keep the large page intact
 561          * and just change the pte:
 562          */
 563         do_split = try_preserve_large_page(kpte, address, cpa);
 564         /*
 565          * When the range fits into the existing large page,
 566          * return. cp->numpages and cpa->tlbflush have been updated in
 567          * try_large_page:
 568          */
 569         if (do_split <= 0)
 570                 return do_split;
 571
 572         /*
 573          * We have to split the large page:
 574          */
 575         err = split_large_page(kpte, address);
 576         if (!err) {
 577                 cpa->flushtlb = 1;
 578                 goto repeat;
 579         }
 580
 581         return err;
 582 }
 583
 584 /**
 585  * change_page_attr_addr - Change page table attributes in linear mapping
 586  * @address: Virtual address in linear mapping.
 587  * @prot:    New page table attribute (PAGE_*)
 588  *
 589  * Change page attributes of a page in the direct mapping. This is a variant
 590  * of change_page_attr() that also works on memory holes that do not have
 591  * mem_map entry (pfn_valid() is false).
 592  *
 593  * See change_page_attr() documentation for more details.
 594  *
 595  * Modules and drivers should use the set_memory_* APIs instead.
 596  */
 597 static int change_page_attr_addr(struct cpa_data *cpa)
 598 {
 599         int err;
 600         unsigned long address = cpa->vaddr;
 601
 602 #ifdef CONFIG_X86_64
 603         unsigned long phys_addr = __pa(address);
 604
 605         /*
 606          * If we are inside the high mapped kernel range, then we
 607          * fixup the low mapping first. __va() returns the virtual
 608          * address in the linear mapping:
 609          */
 610         if (within(address, HIGH_MAP_START, HIGH_MAP_END))
 611                 address = (unsigned long) __va(phys_addr);
 612 #endif
 613
 614         err = __change_page_attr(address, cpa);
 615         if (err)
 616                 return err;
 617
 618 #ifdef CONFIG_X86_64
 619         /*
 620          * If the physical address is inside the kernel map, we need
 621          * to touch the high mapped kernel as well:
 622          */
 623         if (within(phys_addr, 0, KERNEL_TEXT_SIZE)) {
 624                 /*
 625                  * Calc the high mapping address. See __phys_addr()
 626                  * for the non obvious details.
 627                  *
 628                  * Note that NX and other required permissions are
 629                  * checked in static_protections().
 630                  */
 631                 address = phys_addr + HIGH_MAP_START - phys_base;
 632
 633                 /*
 634                  * Our high aliases are imprecise, because we check
 635                  * everything between 0 and KERNEL_TEXT_SIZE, so do
 636                  * not propagate lookup failures back to users:
 637                  */
 638                 __change_page_attr(address, cpa);
 639         }
 640 #endif
 641         return err;
 642 }
 643
 644 static int __change_page_attr_set_clr(struct cpa_data *cpa)
 645 {
 646         int ret, numpages = cpa->numpages;
 647
 648         while (numpages) {
 649                 /*
 650                  * Store the remaining nr of pages for the large page
 651                  * preservation check.
 652                  */
 653                 cpa->numpages = numpages;
 654                 ret = change_page_attr_addr(cpa);
 655                 if (ret)
 656                         return ret;
 657
 658                 /*
 659                  * Adjust the number of pages with the result of the
 660                  * CPA operation. Either a large page has been
 661                  * preserved or a single page update happened.
 662                  */
 663                 BUG_ON(cpa->numpages > numpages);
 664                 numpages -= cpa->numpages;
 665                 cpa->vaddr += cpa->numpages * PAGE_SIZE;
 666         }
 667         return 0;
 668 }
 669
 670 static inline int cache_attr(pgprot_t attr)
 671 {
 672         return pgprot_val(attr) &
 673                 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
 674 }
 675
 676 static int change_page_attr_set_clr(unsigned long addr, int numpages,
 677                                     pgprot_t mask_set, pgprot_t mask_clr)
 678 {
 679         struct cpa_data cpa;
 680         int ret, cache;
 681
 682         /*
 683          * Check, if we are requested to change a not supported
 684          * feature:
 685          */
 686         mask_set = canon_pgprot(mask_set);
 687         mask_clr = canon_pgprot(mask_clr);
 688         if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
 689                 return 0;
 690
 691         /* Ensure we are PAGE_SIZE aligned */
 692         if (addr & ~PAGE_MASK) {
 693                 addr &= PAGE_MASK;
 694                 /*
 695                  * People should not be passing in unaligned addresses:
 696                  */
 697                 WARN_ON_ONCE(1);
 698         }
 699
 700         cpa.vaddr = addr;
 701         cpa.numpages = numpages;
 702         cpa.mask_set = mask_set;
 703         cpa.mask_clr = mask_clr;
 704         cpa.flushtlb = 0;
 705
 706         ret = __change_page_attr_set_clr(&cpa);
 707
 708         /*
 709          * Check whether we really changed something:
 710          */
 711         if (!cpa.flushtlb)
 712                 goto out;
 713
 714         /*
 715          * No need to flush, when we did not set any of the caching
 716          * attributes:
 717          */
 718         cache = cache_attr(mask_set);
 719
 720         /*
 721          * On success we use clflush, when the CPU supports it to
 722          * avoid the wbindv. If the CPU does not support it and in the
 723          * error case we fall back to cpa_flush_all (which uses
 724          * wbindv):
 725          */
 726         if (!ret && cpu_has_clflush)
 727                 cpa_flush_range(addr, numpages, cache);
 728         else
 729                 cpa_flush_all(cache);
 730
 731 out:
 732         cpa_fill_pool();
 733         return ret;
 734 }
 735
 736 static inline int change_page_attr_set(unsigned long addr, int numpages,
 737                                        pgprot_t mask)
 738 {
 739         return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
 740 }
 741
 742 static inline int change_page_attr_clear(unsigned long addr, int numpages,
 743                                          pgprot_t mask)
 744 {
 745         return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
 746 }
 747
 748 int set_memory_uc(unsigned long addr, int numpages)
 749 {
 750         return change_page_attr_set(addr, numpages,
 751                                     __pgprot(_PAGE_PCD | _PAGE_PWT));
 752 }
 753 EXPORT_SYMBOL(set_memory_uc);
 754
 755 int set_memory_wb(unsigned long addr, int numpages)
 756 {
 757         return change_page_attr_clear(addr, numpages,
 758                                       __pgprot(_PAGE_PCD | _PAGE_PWT));
 759 }
 760 EXPORT_SYMBOL(set_memory_wb);
 761
 762 int set_memory_x(unsigned long addr, int numpages)
 763 {
 764         return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
 765 }
 766 EXPORT_SYMBOL(set_memory_x);
 767
 768 int set_memory_nx(unsigned long addr, int numpages)
 769 {
 770         return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
 771 }
 772 EXPORT_SYMBOL(set_memory_nx);
 773
 774 int set_memory_ro(unsigned long addr, int numpages)
 775 {
 776         return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
 777 }
 778
 779 int set_memory_rw(unsigned long addr, int numpages)
 780 {
 781         return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
 782 }
 783
 784 int set_memory_np(unsigned long addr, int numpages)
 785 {
 786         return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
 787 }
 788
 789 int set_pages_uc(struct page *page, int numpages)
 790 {
 791         unsigned long addr = (unsigned long)page_address(page);
 792
 793         return set_memory_uc(addr, numpages);
 794 }
 795 EXPORT_SYMBOL(set_pages_uc);
 796
 797 int set_pages_wb(struct page *page, int numpages)
 798 {
 799         unsigned long addr = (unsigned long)page_address(page);
 800
 801         return set_memory_wb(addr, numpages);
 802 }
 803 EXPORT_SYMBOL(set_pages_wb);
 804
 805 int set_pages_x(struct page *page, int numpages)
 806 {
 807         unsigned long addr = (unsigned long)page_address(page);
 808
 809         return set_memory_x(addr, numpages);
 810 }
 811 EXPORT_SYMBOL(set_pages_x);
 812
 813 int set_pages_nx(struct page *page, int numpages)
 814 {
 815         unsigned long addr = (unsigned long)page_address(page);
 816
 817         return set_memory_nx(addr, numpages);
 818 }
 819 EXPORT_SYMBOL(set_pages_nx);
 820
 821 int set_pages_ro(struct page *page, int numpages)
 822 {
 823         unsigned long addr = (unsigned long)page_address(page);
 824
 825         return set_memory_ro(addr, numpages);
 826 }
 827
 828 int set_pages_rw(struct page *page, int numpages)
 829 {
 830         unsigned long addr = (unsigned long)page_address(page);
 831
 832         return set_memory_rw(addr, numpages);
 833 }
 834
 835 #ifdef CONFIG_DEBUG_PAGEALLOC
 836
 837 static int __set_pages_p(struct page *page, int numpages)
 838 {
 839         struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
 840                                 .numpages = numpages,
 841                                 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
 842                                 .mask_clr = __pgprot(0)};
 843
 844         return __change_page_attr_set_clr(&cpa);
 845 }
 846
 847 static int __set_pages_np(struct page *page, int numpages)
 848 {
 849         struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
 850                                 .numpages = numpages,
 851                                 .mask_set = __pgprot(0),
 852                                 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
 853
 854         return __change_page_attr_set_clr(&cpa);
 855 }
 856
 857 void kernel_map_pages(struct page *page, int numpages, int enable)
 858 {
 859         if (PageHighMem(page))
 860                 return;
 861         if (!enable) {
 862                 debug_check_no_locks_freed(page_address(page),
 863                                            numpages * PAGE_SIZE);
 864         }
 865
 866         /*
 867          * If page allocator is not up yet then do not call c_p_a():
 868          */
 869         if (!debug_pagealloc_enabled)
 870                 return;
 871
 872         /*
 873          * The return value is ignored as the calls cannot fail.
 874          * Large pages are kept enabled at boot time, and are
 875          * split up quickly with DEBUG_PAGEALLOC. If a splitup
 876          * fails here (due to temporary memory shortage) no damage
 877          * is done because we just keep the largepage intact up
 878          * to the next attempt when it will likely be split up:
 879          */
 880         if (enable)
 881                 __set_pages_p(page, numpages);
 882         else
 883                 __set_pages_np(page, numpages);
 884
 885         /*
 886          * We should perform an IPI and flush all tlbs,
 887          * but that can deadlock->flush only current cpu:
 888          */
 889         __flush_tlb_all();
 890
 891         /*
 892          * Try to refill the page pool here. We can do this only after
 893          * the tlb flush.
 894          */
 895         cpa_fill_pool();
 896 }
 897 #endif
 898
 899 /*
 900  * The testcases use internal knowledge of the implementation that shouldn't
 901  * be exposed to the rest of the kernel. Include these directly here.
 902  */
 903 #ifdef CONFIG_CPA_DEBUG
 904 #include "pageattr-test.c"
 905 #endif