2 * IA-64 Huge TLB Page Support for Kernel.
4 * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
5 * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
7 * Sep, 2003: add numa support
8 * Feb, 2004: dynamic hugetlb page size via boot parameter
11 #include <linux/config.h>
12 #include <linux/init.h>
15 #include <linux/hugetlb.h>
16 #include <linux/pagemap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/slab.h>
19 #include <linux/sysctl.h>
21 #include <asm/pgalloc.h>
23 #include <asm/tlbflush.h>
25 unsigned int hpage_shift
=HPAGE_SHIFT_DEFAULT
;
28 huge_pte_alloc (struct mm_struct
*mm
, unsigned long addr
)
30 unsigned long taddr
= htlbpage_to_page(addr
);
35 pgd
= pgd_offset(mm
, taddr
);
36 pmd
= pmd_alloc(mm
, pgd
, taddr
);
38 pte
= pte_alloc_map(mm
, pmd
, taddr
);
43 huge_pte_offset (struct mm_struct
*mm
, unsigned long addr
)
45 unsigned long taddr
= htlbpage_to_page(addr
);
50 pgd
= pgd_offset(mm
, taddr
);
51 if (pgd_present(*pgd
)) {
52 pmd
= pmd_offset(pgd
, taddr
);
53 if (pmd_present(*pmd
))
54 pte
= pte_offset_map(pmd
, taddr
);
60 #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
63 set_huge_pte (struct mm_struct
*mm
, struct vm_area_struct
*vma
,
64 struct page
*page
, pte_t
* page_table
, int write_access
)
68 mm
->rss
+= (HPAGE_SIZE
/ PAGE_SIZE
);
71 pte_mkwrite(pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
)));
73 entry
= pte_wrprotect(mk_pte(page
, vma
->vm_page_prot
));
74 entry
= pte_mkyoung(entry
);
76 set_pte(page_table
, entry
);
80 * This function checks for proper alignment of input addr and len parameters.
82 int is_aligned_hugepage_range(unsigned long addr
, unsigned long len
)
84 if (len
& ~HPAGE_MASK
)
86 if (addr
& ~HPAGE_MASK
)
88 if (REGION_NUMBER(addr
) != REGION_HPAGE
)
94 int copy_hugetlb_page_range(struct mm_struct
*dst
, struct mm_struct
*src
,
95 struct vm_area_struct
*vma
)
97 pte_t
*src_pte
, *dst_pte
, entry
;
99 unsigned long addr
= vma
->vm_start
;
100 unsigned long end
= vma
->vm_end
;
103 dst_pte
= huge_pte_alloc(dst
, addr
);
106 src_pte
= huge_pte_offset(src
, addr
);
108 ptepage
= pte_page(entry
);
110 set_pte(dst_pte
, entry
);
111 dst
->rss
+= (HPAGE_SIZE
/ PAGE_SIZE
);
120 follow_hugetlb_page(struct mm_struct
*mm
, struct vm_area_struct
*vma
,
121 struct page
**pages
, struct vm_area_struct
**vmas
,
122 unsigned long *st
, int *length
, int i
)
125 unsigned long start
= *st
;
126 unsigned long pstart
;
131 pstart
= start
& HPAGE_MASK
;
132 ptep
= huge_pte_offset(mm
, start
);
136 page
= pte_page(pte
);
138 page
+= ((start
& ~HPAGE_MASK
) >> PAGE_SHIFT
);
147 if (((start
& HPAGE_MASK
) == pstart
) && len
&&
148 (start
< vma
->vm_end
))
150 } while (len
&& start
< vma
->vm_end
);
156 struct page
*follow_huge_addr(struct mm_struct
*mm
, unsigned long addr
, int write
)
161 if (REGION_NUMBER(addr
) != REGION_HPAGE
)
162 return ERR_PTR(-EINVAL
);
164 ptep
= huge_pte_offset(mm
, addr
);
165 if (!ptep
|| pte_none(*ptep
))
167 page
= pte_page(*ptep
);
168 page
+= ((addr
& ~HPAGE_MASK
) >> PAGE_SHIFT
);
171 int pmd_huge(pmd_t pmd
)
176 follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
, pmd_t
*pmd
, int write
)
182 * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
183 * are hugetlb region specific.
185 void hugetlb_free_pgtables(struct mmu_gather
*tlb
, struct vm_area_struct
*prev
,
186 unsigned long start
, unsigned long end
)
188 unsigned long first
= start
& HUGETLB_PGDIR_MASK
;
189 unsigned long last
= end
+ HUGETLB_PGDIR_SIZE
- 1;
190 unsigned long start_index
, end_index
;
191 struct mm_struct
*mm
= tlb
->mm
;
197 if (prev
->vm_end
> start
) {
198 if (last
> prev
->vm_start
)
199 last
= prev
->vm_start
;
204 struct vm_area_struct
*next
= prev
->vm_next
;
207 if (next
->vm_start
< start
) {
211 if (last
> next
->vm_start
)
212 last
= next
->vm_start
;
214 if (prev
->vm_end
> first
)
215 first
= prev
->vm_end
+ HUGETLB_PGDIR_SIZE
- 1;
219 if (last
< first
) /* for arches with discontiguous pgd indices */
222 * If the PGD bits are not consecutive in the virtual address, the
223 * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
226 start_index
= pgd_index(htlbpage_to_page(first
));
227 end_index
= pgd_index(htlbpage_to_page(last
));
229 if (end_index
> start_index
) {
230 clear_page_tables(tlb
, start_index
, end_index
- start_index
);
234 void unmap_hugepage_range(struct vm_area_struct
*vma
, unsigned long start
, unsigned long end
)
236 struct mm_struct
*mm
= vma
->vm_mm
;
237 unsigned long address
;
241 BUG_ON(start
& (HPAGE_SIZE
- 1));
242 BUG_ON(end
& (HPAGE_SIZE
- 1));
244 for (address
= start
; address
< end
; address
+= HPAGE_SIZE
) {
245 pte
= huge_pte_offset(mm
, address
);
248 page
= pte_page(*pte
);
252 mm
->rss
-= (end
- start
) >> PAGE_SHIFT
;
253 flush_tlb_range(vma
, start
, end
);
256 int hugetlb_prefault(struct address_space
*mapping
, struct vm_area_struct
*vma
)
258 struct mm_struct
*mm
= current
->mm
;
262 BUG_ON(vma
->vm_start
& ~HPAGE_MASK
);
263 BUG_ON(vma
->vm_end
& ~HPAGE_MASK
);
265 spin_lock(&mm
->page_table_lock
);
266 for (addr
= vma
->vm_start
; addr
< vma
->vm_end
; addr
+= HPAGE_SIZE
) {
268 pte_t
*pte
= huge_pte_alloc(mm
, addr
);
278 idx
= ((addr
- vma
->vm_start
) >> HPAGE_SHIFT
)
279 + (vma
->vm_pgoff
>> (HPAGE_SHIFT
- PAGE_SHIFT
));
280 page
= find_get_page(mapping
, idx
);
282 /* charge the fs quota first */
283 if (hugetlb_get_quota(mapping
)) {
287 page
= alloc_huge_page();
289 hugetlb_put_quota(mapping
);
293 ret
= add_to_page_cache(page
, mapping
, idx
, GFP_ATOMIC
);
297 hugetlb_put_quota(mapping
);
298 page_cache_release(page
);
302 set_huge_pte(mm
, vma
, page
, pte
, vma
->vm_flags
& VM_WRITE
);
305 spin_unlock(&mm
->page_table_lock
);
309 unsigned long hugetlb_get_unmapped_area(struct file
*file
, unsigned long addr
, unsigned long len
,
310 unsigned long pgoff
, unsigned long flags
)
312 struct vm_area_struct
*vmm
;
314 if (len
> RGN_MAP_LIMIT
)
316 if (len
& ~HPAGE_MASK
)
318 /* This code assumes that REGION_HPAGE != 0. */
319 if ((REGION_NUMBER(addr
) != REGION_HPAGE
) || (addr
& (HPAGE_SIZE
- 1)))
320 addr
= HPAGE_REGION_BASE
;
322 addr
= ALIGN(addr
, HPAGE_SIZE
);
323 for (vmm
= find_vma(current
->mm
, addr
); ; vmm
= vmm
->vm_next
) {
324 /* At this point: (!vmm || addr < vmm->vm_end). */
325 if (REGION_OFFSET(addr
) + len
> RGN_MAP_LIMIT
)
327 if (!vmm
|| (addr
+ len
) <= vmm
->vm_start
)
329 addr
= ALIGN(vmm
->vm_end
, HPAGE_SIZE
);
333 static int __init
hugetlb_setup_sz(char *str
)
336 unsigned long long size
;
338 if (ia64_pal_vm_page_size(&tr_pages
, NULL
) != 0)
340 * shouldn't happen, but just in case.
342 tr_pages
= 0x15557000UL
;
344 size
= memparse(str
, &str
);
345 if (*str
|| (size
& (size
-1)) || !(tr_pages
& size
) ||
347 size
>= (1UL << PAGE_SHIFT
<< MAX_ORDER
)) {
348 printk(KERN_WARNING
"Invalid huge page size specified\n");
352 hpage_shift
= __ffs(size
);
354 * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
355 * override here with new page shift.
357 ia64_set_rr(HPAGE_REGION_BASE
, hpage_shift
<< 2);
360 __setup("hugepagesz=", hugetlb_setup_sz
);