2 * IA-64 Huge TLB Page Support for Kernel.
4 * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
5 * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
7 * Sep, 2003: add numa support
8 * Feb, 2004: dynamic hugetlb page size via boot parameter
11 #include <linux/config.h>
12 #include <linux/init.h>
15 #include <linux/hugetlb.h>
16 #include <linux/pagemap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/slab.h>
19 #include <linux/sysctl.h>
21 #include <asm/pgalloc.h>
23 #include <asm/tlbflush.h>
25 unsigned int hpage_shift
=HPAGE_SHIFT_DEFAULT
;
28 huge_pte_alloc (struct mm_struct
*mm
, unsigned long addr
)
30 unsigned long taddr
= htlbpage_to_page(addr
);
36 pgd
= pgd_offset(mm
, taddr
);
37 pud
= pud_alloc(mm
, pgd
, taddr
);
39 pmd
= pmd_alloc(mm
, pud
, taddr
);
41 pte
= pte_alloc_map(mm
, pmd
, taddr
);
47 huge_pte_offset (struct mm_struct
*mm
, unsigned long addr
)
49 unsigned long taddr
= htlbpage_to_page(addr
);
55 pgd
= pgd_offset(mm
, taddr
);
56 if (pgd_present(*pgd
)) {
57 pud
= pud_offset(pgd
, taddr
);
58 if (pud_present(*pud
)) {
59 pmd
= pmd_offset(pud
, taddr
);
60 if (pmd_present(*pmd
))
61 pte
= pte_offset_map(pmd
, taddr
);
68 #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
71 set_huge_pte (struct mm_struct
*mm
, struct vm_area_struct
*vma
,
72 struct page
*page
, pte_t
* page_table
, int write_access
)
76 add_mm_counter(mm
, rss
, HPAGE_SIZE
/ PAGE_SIZE
);
79 pte_mkwrite(pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
)));
81 entry
= pte_wrprotect(mk_pte(page
, vma
->vm_page_prot
));
82 entry
= pte_mkyoung(entry
);
84 set_pte(page_table
, entry
);
88 * This function checks for proper alignment of input addr and len parameters.
90 int is_aligned_hugepage_range(unsigned long addr
, unsigned long len
)
92 if (len
& ~HPAGE_MASK
)
94 if (addr
& ~HPAGE_MASK
)
96 if (REGION_NUMBER(addr
) != REGION_HPAGE
)
102 int copy_hugetlb_page_range(struct mm_struct
*dst
, struct mm_struct
*src
,
103 struct vm_area_struct
*vma
)
105 pte_t
*src_pte
, *dst_pte
, entry
;
106 struct page
*ptepage
;
107 unsigned long addr
= vma
->vm_start
;
108 unsigned long end
= vma
->vm_end
;
111 dst_pte
= huge_pte_alloc(dst
, addr
);
114 src_pte
= huge_pte_offset(src
, addr
);
116 ptepage
= pte_page(entry
);
118 set_pte(dst_pte
, entry
);
119 add_mm_counter(dst
, rss
, HPAGE_SIZE
/ PAGE_SIZE
);
128 follow_hugetlb_page(struct mm_struct
*mm
, struct vm_area_struct
*vma
,
129 struct page
**pages
, struct vm_area_struct
**vmas
,
130 unsigned long *st
, int *length
, int i
)
133 unsigned long start
= *st
;
134 unsigned long pstart
;
139 pstart
= start
& HPAGE_MASK
;
140 ptep
= huge_pte_offset(mm
, start
);
144 page
= pte_page(pte
);
146 page
+= ((start
& ~HPAGE_MASK
) >> PAGE_SHIFT
);
155 if (((start
& HPAGE_MASK
) == pstart
) && len
&&
156 (start
< vma
->vm_end
))
158 } while (len
&& start
< vma
->vm_end
);
164 struct page
*follow_huge_addr(struct mm_struct
*mm
, unsigned long addr
, int write
)
169 if (REGION_NUMBER(addr
) != REGION_HPAGE
)
170 return ERR_PTR(-EINVAL
);
172 ptep
= huge_pte_offset(mm
, addr
);
173 if (!ptep
|| pte_none(*ptep
))
175 page
= pte_page(*ptep
);
176 page
+= ((addr
& ~HPAGE_MASK
) >> PAGE_SHIFT
);
179 int pmd_huge(pmd_t pmd
)
184 follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
, pmd_t
*pmd
, int write
)
190 * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
191 * are hugetlb region specific.
193 void hugetlb_free_pgtables(struct mmu_gather
*tlb
, struct vm_area_struct
*prev
,
194 unsigned long start
, unsigned long end
)
196 unsigned long first
= start
& HUGETLB_PGDIR_MASK
;
197 unsigned long last
= end
+ HUGETLB_PGDIR_SIZE
- 1;
198 struct mm_struct
*mm
= tlb
->mm
;
204 if (prev
->vm_end
> start
) {
205 if (last
> prev
->vm_start
)
206 last
= prev
->vm_start
;
211 struct vm_area_struct
*next
= prev
->vm_next
;
214 if (next
->vm_start
< start
) {
218 if (last
> next
->vm_start
)
219 last
= next
->vm_start
;
221 if (prev
->vm_end
> first
)
222 first
= prev
->vm_end
;
226 if (last
< first
) /* for arches with discontiguous pgd indices */
228 clear_page_range(tlb
, first
, last
);
231 void unmap_hugepage_range(struct vm_area_struct
*vma
, unsigned long start
, unsigned long end
)
233 struct mm_struct
*mm
= vma
->vm_mm
;
234 unsigned long address
;
238 BUG_ON(start
& (HPAGE_SIZE
- 1));
239 BUG_ON(end
& (HPAGE_SIZE
- 1));
241 for (address
= start
; address
< end
; address
+= HPAGE_SIZE
) {
242 pte
= huge_pte_offset(mm
, address
);
245 page
= pte_page(*pte
);
247 pte_clear(mm
, address
, pte
);
249 add_mm_counter(mm
, rss
, - ((end
- start
) >> PAGE_SHIFT
));
250 flush_tlb_range(vma
, start
, end
);
253 int hugetlb_prefault(struct address_space
*mapping
, struct vm_area_struct
*vma
)
255 struct mm_struct
*mm
= current
->mm
;
259 BUG_ON(vma
->vm_start
& ~HPAGE_MASK
);
260 BUG_ON(vma
->vm_end
& ~HPAGE_MASK
);
262 spin_lock(&mm
->page_table_lock
);
263 for (addr
= vma
->vm_start
; addr
< vma
->vm_end
; addr
+= HPAGE_SIZE
) {
265 pte_t
*pte
= huge_pte_alloc(mm
, addr
);
275 idx
= ((addr
- vma
->vm_start
) >> HPAGE_SHIFT
)
276 + (vma
->vm_pgoff
>> (HPAGE_SHIFT
- PAGE_SHIFT
));
277 page
= find_get_page(mapping
, idx
);
279 /* charge the fs quota first */
280 if (hugetlb_get_quota(mapping
)) {
284 page
= alloc_huge_page();
286 hugetlb_put_quota(mapping
);
290 ret
= add_to_page_cache(page
, mapping
, idx
, GFP_ATOMIC
);
294 hugetlb_put_quota(mapping
);
295 page_cache_release(page
);
299 set_huge_pte(mm
, vma
, page
, pte
, vma
->vm_flags
& VM_WRITE
);
302 spin_unlock(&mm
->page_table_lock
);
306 unsigned long hugetlb_get_unmapped_area(struct file
*file
, unsigned long addr
, unsigned long len
,
307 unsigned long pgoff
, unsigned long flags
)
309 struct vm_area_struct
*vmm
;
311 if (len
> RGN_MAP_LIMIT
)
313 if (len
& ~HPAGE_MASK
)
315 /* This code assumes that REGION_HPAGE != 0. */
316 if ((REGION_NUMBER(addr
) != REGION_HPAGE
) || (addr
& (HPAGE_SIZE
- 1)))
317 addr
= HPAGE_REGION_BASE
;
319 addr
= ALIGN(addr
, HPAGE_SIZE
);
320 for (vmm
= find_vma(current
->mm
, addr
); ; vmm
= vmm
->vm_next
) {
321 /* At this point: (!vmm || addr < vmm->vm_end). */
322 if (REGION_OFFSET(addr
) + len
> RGN_MAP_LIMIT
)
324 if (!vmm
|| (addr
+ len
) <= vmm
->vm_start
)
326 addr
= ALIGN(vmm
->vm_end
, HPAGE_SIZE
);
330 static int __init
hugetlb_setup_sz(char *str
)
333 unsigned long long size
;
335 if (ia64_pal_vm_page_size(&tr_pages
, NULL
) != 0)
337 * shouldn't happen, but just in case.
339 tr_pages
= 0x15557000UL
;
341 size
= memparse(str
, &str
);
342 if (*str
|| (size
& (size
-1)) || !(tr_pages
& size
) ||
344 size
>= (1UL << PAGE_SHIFT
<< MAX_ORDER
)) {
345 printk(KERN_WARNING
"Invalid huge page size specified\n");
349 hpage_shift
= __ffs(size
);
351 * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
352 * override here with new page shift.
354 ia64_set_rr(HPAGE_REGION_BASE
, hpage_shift
<< 2);
357 __setup("hugepagesz=", hugetlb_setup_sz
);