arch/powerpc/mm/hugetlbpage.c

   1 /*
   2  * PPC64 (POWER4) Huge TLB Page Support for Kernel.
   3  *
   4  * Copyright (C) 2003 David Gibson, IBM Corporation.
   5  *
   6  * Based on the IA-32 version:
   7  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
   8  */
   9
  10 #include <linux/init.h>
  11 #include <linux/fs.h>
  12 #include <linux/mm.h>
  13 #include <linux/hugetlb.h>
  14 #include <linux/pagemap.h>
  15 #include <linux/slab.h>
  16 #include <linux/err.h>
  17 #include <linux/sysctl.h>
  18 #include <asm/mman.h>
  19 #include <asm/pgalloc.h>
  20 #include <asm/tlb.h>
  21 #include <asm/tlbflush.h>
  22 #include <asm/mmu_context.h>
  23 #include <asm/machdep.h>
  24 #include <asm/cputable.h>
  25 #include <asm/spu.h>
  26
  27 #define PAGE_SHIFT_64K  16
  28 #define PAGE_SHIFT_16M  24
  29 #define PAGE_SHIFT_16G  34
  30
  31 #define NUM_LOW_AREAS   (0x100000000UL >> SID_SHIFT)
  32 #define NUM_HIGH_AREAS  (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
  33 #define MAX_NUMBER_GPAGES       1024
  34
  35 /* Tracks the 16G pages after the device tree is scanned and before the
  36  * huge_boot_pages list is ready.  */
  37 static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
  38 static unsigned nr_gpages;
  39
  40 /* Flag to mark huge PD pointers.  This means pmd_bad() and pud_bad()
  41  * will choke on pointers to hugepte tables, which is handy for
  42  * catching screwups early. */
  43
  44 static inline int shift_to_mmu_psize(unsigned int shift)
  45 {
  46         int psize;
  47
  48         for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
  49                 if (mmu_psize_defs[psize].shift == shift)
  50                         return psize;
  51         return -1;
  52 }
  53
  54 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
  55 {
  56         if (mmu_psize_defs[mmu_psize].shift)
  57                 return mmu_psize_defs[mmu_psize].shift;
  58         BUG();
  59 }
  60
  61 #define hugepd_none(hpd)        ((hpd).pd == 0)
  62
  63 static inline pte_t *hugepd_page(hugepd_t hpd)
  64 {
  65         BUG_ON(!hugepd_ok(hpd));
  66         return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
  67 }
  68
  69 static inline unsigned int hugepd_shift(hugepd_t hpd)
  70 {
  71         return hpd.pd & HUGEPD_SHIFT_MASK;
  72 }
  73
  74 static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
  75 {
  76         unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
  77         pte_t *dir = hugepd_page(*hpdp);
  78
  79         return dir + idx;
  80 }
  81
  82 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
  83 {
  84         pgd_t *pg;
  85         pud_t *pu;
  86         pmd_t *pm;
  87         hugepd_t *hpdp = NULL;
  88         unsigned pdshift = PGDIR_SHIFT;
  89
  90         if (shift)
  91                 *shift = 0;
  92
  93         pg = pgdir + pgd_index(ea);
  94         if (is_hugepd(pg)) {
  95                 hpdp = (hugepd_t *)pg;
  96         } else if (!pgd_none(*pg)) {
  97                 pdshift = PUD_SHIFT;
  98                 pu = pud_offset(pg, ea);
  99                 if (is_hugepd(pu))
 100                         hpdp = (hugepd_t *)pu;
 101                 else if (!pud_none(*pu)) {
 102                         pdshift = PMD_SHIFT;
 103                         pm = pmd_offset(pu, ea);
 104                         if (is_hugepd(pm))
 105                                 hpdp = (hugepd_t *)pm;
 106                         else if (!pmd_none(*pm)) {
 107                                 return pte_offset_map(pm, ea);
 108                         }
 109                 }
 110         }
 111
 112         if (!hpdp)
 113                 return NULL;
 114
 115         if (shift)
 116                 *shift = hugepd_shift(*hpdp);
 117         return hugepte_offset(hpdp, ea, pdshift);
 118 }
 119
 120 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 121 {
 122         return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
 123 }
 124
 125 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 126                            unsigned long address, unsigned pdshift, unsigned pshift)
 127 {
 128         pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift),
 129                                        GFP_KERNEL|__GFP_REPEAT);
 130
 131         BUG_ON(pshift > HUGEPD_SHIFT_MASK);
 132         BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
 133
 134         if (! new)
 135                 return -ENOMEM;
 136
 137         spin_lock(&mm->page_table_lock);
 138         if (!hugepd_none(*hpdp))
 139                 kmem_cache_free(PGT_CACHE(pdshift - pshift), new);
 140         else
 141                 hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift;
 142         spin_unlock(&mm->page_table_lock);
 143         return 0;
 144 }
 145
 146 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 147 {
 148         pgd_t *pg;
 149         pud_t *pu;
 150         pmd_t *pm;
 151         hugepd_t *hpdp = NULL;
 152         unsigned pshift = __ffs(sz);
 153         unsigned pdshift = PGDIR_SHIFT;
 154
 155         addr &= ~(sz-1);
 156
 157         pg = pgd_offset(mm, addr);
 158         if (pshift >= PUD_SHIFT) {
 159                 hpdp = (hugepd_t *)pg;
 160         } else {
 161                 pdshift = PUD_SHIFT;
 162                 pu = pud_alloc(mm, pg, addr);
 163                 if (pshift >= PMD_SHIFT) {
 164                         hpdp = (hugepd_t *)pu;
 165                 } else {
 166                         pdshift = PMD_SHIFT;
 167                         pm = pmd_alloc(mm, pu, addr);
 168                         hpdp = (hugepd_t *)pm;
 169                 }
 170         }
 171
 172         if (!hpdp)
 173                 return NULL;
 174
 175         BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
 176
 177         if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
 178                 return NULL;
 179
 180         return hugepte_offset(hpdp, addr, pdshift);
 181 }
 182
 183 /* Build list of addresses of gigantic pages.  This function is used in early
 184  * boot before the buddy or bootmem allocator is setup.
 185  */
 186 void add_gpage(unsigned long addr, unsigned long page_size,
 187         unsigned long number_of_pages)
 188 {
 189         if (!addr)
 190                 return;
 191         while (number_of_pages > 0) {
 192                 gpage_freearray[nr_gpages] = addr;
 193                 nr_gpages++;
 194                 number_of_pages--;
 195                 addr += page_size;
 196         }
 197 }
 198
 199 /* Moves the gigantic page addresses from the temporary list to the
 200  * huge_boot_pages list.
 201  */
 202 int alloc_bootmem_huge_page(struct hstate *hstate)
 203 {
 204         struct huge_bootmem_page *m;
 205         if (nr_gpages == 0)
 206                 return 0;
 207         m = phys_to_virt(gpage_freearray[--nr_gpages]);
 208         gpage_freearray[nr_gpages] = 0;
 209         list_add(&m->list, &huge_boot_pages);
 210         m->hstate = hstate;
 211         return 1;
 212 }
 213
 214 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 215 {
 216         return 0;
 217 }
 218
 219 static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
 220                               unsigned long start, unsigned long end,
 221                               unsigned long floor, unsigned long ceiling)
 222 {
 223         pte_t *hugepte = hugepd_page(*hpdp);
 224         unsigned shift = hugepd_shift(*hpdp);
 225         unsigned long pdmask = ~((1UL << pdshift) - 1);
 226
 227         start &= pdmask;
 228         if (start < floor)
 229                 return;
 230         if (ceiling) {
 231                 ceiling &= pdmask;
 232                 if (! ceiling)
 233                         return;
 234         }
 235         if (end - 1 > ceiling - 1)
 236                 return;
 237
 238         hpdp->pd = 0;
 239         tlb->need_flush = 1;
 240         pgtable_free_tlb(tlb, hugepte, pdshift - shift);
 241 }
 242
 243 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 244                                    unsigned long addr, unsigned long end,
 245                                    unsigned long floor, unsigned long ceiling)
 246 {
 247         pmd_t *pmd;
 248         unsigned long next;
 249         unsigned long start;
 250
 251         start = addr;
 252         pmd = pmd_offset(pud, addr);
 253         do {
 254                 next = pmd_addr_end(addr, end);
 255                 if (pmd_none(*pmd))
 256                         continue;
 257                 free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
 258                                   addr, next, floor, ceiling);
 259         } while (pmd++, addr = next, addr != end);
 260
 261         start &= PUD_MASK;
 262         if (start < floor)
 263                 return;
 264         if (ceiling) {
 265                 ceiling &= PUD_MASK;
 266                 if (!ceiling)
 267                         return;
 268         }
 269         if (end - 1 > ceiling - 1)
 270                 return;
 271
 272         pmd = pmd_offset(pud, start);
 273         pud_clear(pud);
 274         pmd_free_tlb(tlb, pmd, start);
 275 }
 276
 277 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 278                                    unsigned long addr, unsigned long end,
 279                                    unsigned long floor, unsigned long ceiling)
 280 {
 281         pud_t *pud;
 282         unsigned long next;
 283         unsigned long start;
 284
 285         start = addr;
 286         pud = pud_offset(pgd, addr);
 287         do {
 288                 next = pud_addr_end(addr, end);
 289                 if (!is_hugepd(pud)) {
 290                         if (pud_none_or_clear_bad(pud))
 291                                 continue;
 292                         hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
 293                                                ceiling);
 294                 } else {
 295                         free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
 296                                           addr, next, floor, ceiling);
 297                 }
 298         } while (pud++, addr = next, addr != end);
 299
 300         start &= PGDIR_MASK;
 301         if (start < floor)
 302                 return;
 303         if (ceiling) {
 304                 ceiling &= PGDIR_MASK;
 305                 if (!ceiling)
 306                         return;
 307         }
 308         if (end - 1 > ceiling - 1)
 309                 return;
 310
 311         pud = pud_offset(pgd, start);
 312         pgd_clear(pgd);
 313         pud_free_tlb(tlb, pud, start);
 314 }
 315
 316 /*
 317  * This function frees user-level page tables of a process.
 318  *
 319  * Must be called with pagetable lock held.
 320  */
 321 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 322                             unsigned long addr, unsigned long end,
 323                             unsigned long floor, unsigned long ceiling)
 324 {
 325         pgd_t *pgd;
 326         unsigned long next;
 327
 328         /*
 329          * Because there are a number of different possible pagetable
 330          * layouts for hugepage ranges, we limit knowledge of how
 331          * things should be laid out to the allocation path
 332          * (huge_pte_alloc(), above).  Everything else works out the
 333          * structure as it goes from information in the hugepd
 334          * pointers.  That means that we can't here use the
 335          * optimization used in the normal page free_pgd_range(), of
 336          * checking whether we're actually covering a large enough
 337          * range to have to do anything at the top level of the walk
 338          * instead of at the bottom.
 339          *
 340          * To make sense of this, you should probably go read the big
 341          * block comment at the top of the normal free_pgd_range(),
 342          * too.
 343          */
 344
 345         pgd = pgd_offset(tlb->mm, addr);
 346         do {
 347                 next = pgd_addr_end(addr, end);
 348                 if (!is_hugepd(pgd)) {
 349                         if (pgd_none_or_clear_bad(pgd))
 350                                 continue;
 351                         hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 352                 } else {
 353                         free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
 354                                           addr, next, floor, ceiling);
 355                 }
 356         } while (pgd++, addr = next, addr != end);
 357 }
 358
 359 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 360                      pte_t *ptep, pte_t pte)
 361 {
 362         if (pte_present(*ptep)) {
 363                 /* We open-code pte_clear because we need to pass the right
 364                  * argument to hpte_need_flush (huge / !huge). Might not be
 365                  * necessary anymore if we make hpte_need_flush() get the
 366                  * page size from the slices
 367                  */
 368                 pte_update(mm, addr, ptep, ~0UL, 1);
 369         }
 370         *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 371 }
 372
 373 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 374                               pte_t *ptep)
 375 {
 376         unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
 377         return __pte(old);
 378 }
 379
 380 struct page *
 381 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 382 {
 383         pte_t *ptep;
 384         struct page *page;
 385         unsigned shift;
 386         unsigned long mask;
 387
 388         ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
 389
 390         /* Verify it is a huge page else bail. */
 391         if (!ptep || !shift)
 392                 return ERR_PTR(-EINVAL);
 393
 394         mask = (1UL << shift) - 1;
 395         page = pte_page(*ptep);
 396         if (page)
 397                 page += (address & mask) / PAGE_SIZE;
 398
 399         return page;
 400 }
 401
 402 int pmd_huge(pmd_t pmd)
 403 {
 404         return 0;
 405 }
 406
 407 int pud_huge(pud_t pud)
 408 {
 409         return 0;
 410 }
 411
 412 struct page *
 413 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 414                 pmd_t *pmd, int write)
 415 {
 416         BUG();
 417         return NULL;
 418 }
 419
 420 static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 421                        unsigned long end, int write, struct page **pages, int *nr)
 422 {
 423         unsigned long mask;
 424         unsigned long pte_end;
 425         struct page *head, *page;
 426         pte_t pte;
 427         int refs;
 428
 429         pte_end = (addr + sz) & ~(sz-1);
 430         if (pte_end < end)
 431                 end = pte_end;
 432
 433         pte = *ptep;
 434         mask = _PAGE_PRESENT | _PAGE_USER;
 435         if (write)
 436                 mask |= _PAGE_RW;
 437
 438         if ((pte_val(pte) & mask) != mask)
 439                 return 0;
 440
 441         /* hugepages are never "special" */
 442         VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 443
 444         refs = 0;
 445         head = pte_page(pte);
 446
 447         page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
 448         do {
 449                 VM_BUG_ON(compound_head(page) != head);
 450                 pages[*nr] = page;
 451                 (*nr)++;
 452                 page++;
 453                 refs++;
 454         } while (addr += PAGE_SIZE, addr != end);
 455
 456         if (!page_cache_add_speculative(head, refs)) {
 457                 *nr -= refs;
 458                 return 0;
 459         }
 460
 461         if (unlikely(pte_val(pte) != pte_val(*ptep))) {
 462                 /* Could be optimized better */
 463                 while (*nr) {
 464                         put_page(page);
 465                         (*nr)--;
 466                 }
 467         }
 468
 469         return 1;
 470 }
 471
 472 int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
 473                unsigned long addr, unsigned long end,
 474                int write, struct page **pages, int *nr)
 475 {
 476         pte_t *ptep;
 477         unsigned long sz = 1UL << hugepd_shift(*hugepd);
 478
 479         ptep = hugepte_offset(hugepd, addr, pdshift);
 480         do {
 481                 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
 482                         return 0;
 483         } while (ptep++, addr += sz, addr != end);
 484
 485         return 1;
 486 }
 487
 488 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 489                                         unsigned long len, unsigned long pgoff,
 490                                         unsigned long flags)
 491 {
 492         struct hstate *hstate = hstate_file(file);
 493         int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
 494
 495         return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
 496 }
 497
 498 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 499 {
 500         unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
 501
 502         return 1UL << mmu_psize_to_shift(psize);
 503 }
 504
 505 /*
 506  * Called by asm hashtable.S for doing lazy icache flush
 507  */
 508 static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
 509                                         pte_t pte, int trap, unsigned long sz)
 510 {
 511         struct page *page;
 512         int i;
 513
 514         if (!pfn_valid(pte_pfn(pte)))
 515                 return rflags;
 516
 517         page = pte_page(pte);
 518
 519         /* page is dirty */
 520         if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
 521                 if (trap == 0x400) {
 522                         for (i = 0; i < (sz / PAGE_SIZE); i++)
 523                                 __flush_dcache_icache(page_address(page+i));
 524                         set_bit(PG_arch_1, &page->flags);
 525                 } else {
 526                         rflags |= HPTE_R_N;
 527                 }
 528         }
 529         return rflags;
 530 }
 531
 532 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 533                      pte_t *ptep, unsigned long trap, int local, int ssize,
 534                      unsigned int shift, unsigned int mmu_psize)
 535 {
 536         unsigned long old_pte, new_pte;
 537         unsigned long va, rflags, pa, sz;
 538         long slot;
 539         int err = 1;
 540
 541         BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 542
 543         /* Search the Linux page table for a match with va */
 544         va = hpt_va(ea, vsid, ssize);
 545
 546         /*
 547          * Check the user's access rights to the page.  If access should be
 548          * prevented then send the problem up to do_page_fault.
 549          */
 550         if (unlikely(access & ~pte_val(*ptep)))
 551                 goto out;
 552         /*
 553          * At this point, we have a pte (old_pte) which can be used to build
 554          * or update an HPTE. There are 2 cases:
 555          *
 556          * 1. There is a valid (present) pte with no associated HPTE (this is
 557          *      the most common case)
 558          * 2. There is a valid (present) pte with an associated HPTE. The
 559          *      current values of the pp bits in the HPTE prevent access
 560          *      because we are doing software DIRTY bit management and the
 561          *      page is currently not DIRTY.
 562          */
 563
 564
 565         do {
 566                 old_pte = pte_val(*ptep);
 567                 if (old_pte & _PAGE_BUSY)
 568                         goto out;
 569                 new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
 570         } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
 571                                          old_pte, new_pte));
 572
 573         rflags = 0x2 | (!(new_pte & _PAGE_RW));
 574         /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
 575         rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
 576         sz = ((1UL) << shift);
 577         if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
 578                 /* No CPU has hugepages but lacks no execute, so we
 579                  * don't need to worry about that case */
 580                 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
 581                                                        trap, sz);
 582
 583         /* Check if pte already has an hpte (case 2) */
 584         if (unlikely(old_pte & _PAGE_HASHPTE)) {
 585                 /* There MIGHT be an HPTE for this pte */
 586                 unsigned long hash, slot;
 587
 588                 hash = hpt_hash(va, shift, ssize);
 589                 if (old_pte & _PAGE_F_SECOND)
 590                         hash = ~hash;
 591                 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 592                 slot += (old_pte & _PAGE_F_GIX) >> 12;
 593
 594                 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
 595                                          ssize, local) == -1)
 596                         old_pte &= ~_PAGE_HPTEFLAGS;
 597         }
 598
 599         if (likely(!(old_pte & _PAGE_HASHPTE))) {
 600                 unsigned long hash = hpt_hash(va, shift, ssize);
 601                 unsigned long hpte_group;
 602
 603                 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
 604
 605 repeat:
 606                 hpte_group = ((hash & htab_hash_mask) *
 607                               HPTES_PER_GROUP) & ~0x7UL;
 608
 609                 /* clear HPTE slot informations in new PTE */
 610 #ifdef CONFIG_PPC_64K_PAGES
 611                 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
 612 #else
 613                 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
 614 #endif
 615                 /* Add in WIMG bits */
 616                 rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
 617                                       _PAGE_COHERENT | _PAGE_GUARDED));
 618
 619                 /* Insert into the hash table, primary slot */
 620                 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
 621                                           mmu_psize, ssize);
 622
 623                 /* Primary is full, try the secondary */
 624                 if (unlikely(slot == -1)) {
 625                         hpte_group = ((~hash & htab_hash_mask) *
 626                                       HPTES_PER_GROUP) & ~0x7UL;
 627                         slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
 628                                                   HPTE_V_SECONDARY,
 629                                                   mmu_psize, ssize);
 630                         if (slot == -1) {
 631                                 if (mftb() & 0x1)
 632                                         hpte_group = ((hash & htab_hash_mask) *
 633                                                       HPTES_PER_GROUP)&~0x7UL;
 634
 635                                 ppc_md.hpte_remove(hpte_group);
 636                                 goto repeat;
 637                         }
 638                 }
 639
 640                 if (unlikely(slot == -2))
 641                         panic("hash_huge_page: pte_insert failed\n");
 642
 643                 new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
 644         }
 645
 646         /*
 647          * No need to use ldarx/stdcx here
 648          */
 649         *ptep = __pte(new_pte & ~_PAGE_BUSY);
 650
 651         err = 0;
 652
 653  out:
 654         return err;
 655 }
 656
 657 static int __init add_huge_page_size(unsigned long long size)
 658 {
 659         int shift = __ffs(size);
 660         int mmu_psize;
 661
 662         /* Check that it is a page size supported by the hardware and
 663          * that it fits within pagetable and slice limits. */
 664         if (!is_power_of_2(size)
 665             || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
 666                 return -EINVAL;
 667
 668         if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
 669                 return -EINVAL;
 670
 671 #ifdef CONFIG_SPU_FS_64K_LS
 672         /* Disable support for 64K huge pages when 64K SPU local store
 673          * support is enabled as the current implementation conflicts.
 674          */
 675         if (shift == PAGE_SHIFT_64K)
 676                 return -EINVAL;
 677 #endif /* CONFIG_SPU_FS_64K_LS */
 678
 679         BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
 680
 681         /* Return if huge page size has already been setup */
 682         if (size_to_hstate(size))
 683                 return 0;
 684
 685         hugetlb_add_hstate(shift - PAGE_SHIFT);
 686
 687         return 0;
 688 }
 689
 690 static int __init hugepage_setup_sz(char *str)
 691 {
 692         unsigned long long size;
 693
 694         size = memparse(str, &str);
 695
 696         if (add_huge_page_size(size) != 0)
 697                 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
 698
 699         return 1;
 700 }
 701 __setup("hugepagesz=", hugepage_setup_sz);
 702
 703 static int __init hugetlbpage_init(void)
 704 {
 705         int psize;
 706
 707         if (!cpu_has_feature(CPU_FTR_16M_PAGE))
 708                 return -ENODEV;
 709
 710         for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
 711                 unsigned shift;
 712                 unsigned pdshift;
 713
 714                 if (!mmu_psize_defs[psize].shift)
 715                         continue;
 716
 717                 shift = mmu_psize_to_shift(psize);
 718
 719                 if (add_huge_page_size(1ULL << shift) < 0)
 720                         continue;
 721
 722                 if (shift < PMD_SHIFT)
 723                         pdshift = PMD_SHIFT;
 724                 else if (shift < PUD_SHIFT)
 725                         pdshift = PUD_SHIFT;
 726                 else
 727                         pdshift = PGDIR_SHIFT;
 728
 729                 pgtable_cache_add(pdshift - shift, NULL);
 730                 if (!PGT_CACHE(pdshift - shift))
 731                         panic("hugetlbpage_init(): could not create "
 732                               "pgtable cache for %d bit pagesize\n", shift);
 733         }
 734
 735         /* Set default large page size. Currently, we pick 16M or 1M
 736          * depending on what is available
 737          */
 738         if (mmu_psize_defs[MMU_PAGE_16M].shift)
 739                 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
 740         else if (mmu_psize_defs[MMU_PAGE_1M].shift)
 741                 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
 742
 743         return 0;
 744 }
 745
 746 module_init(hugetlbpage_init);