2 #include <linux/highmem.h>
3 #include <linux/sched.h>
4 #include <linux/hugetlb.h>
6 static int walk_pte_range(pmd_t
*pmd
, unsigned long addr
, unsigned long end
,
12 pte
= pte_offset_map(pmd
, addr
);
14 err
= walk
->pte_entry(pte
, addr
, addr
+ PAGE_SIZE
, walk
);
27 static int walk_pmd_range(pud_t
*pud
, unsigned long addr
, unsigned long end
,
34 pmd
= pmd_offset(pud
, addr
);
37 next
= pmd_addr_end(addr
, end
);
40 err
= walk
->pte_hole(addr
, next
, walk
);
46 * This implies that each ->pmd_entry() handler
47 * needs to know about pmd_trans_huge() pmds
50 err
= walk
->pmd_entry(pmd
, addr
, next
, walk
);
55 * Check this here so we only break down trans_huge
56 * pages when we _need_ to
61 split_huge_page_pmd(walk
->mm
, pmd
);
62 if (pmd_none_or_trans_huge_or_clear_bad(pmd
))
64 err
= walk_pte_range(pmd
, addr
, next
, walk
);
67 } while (pmd
++, addr
= next
, addr
!= end
);
72 static int walk_pud_range(pgd_t
*pgd
, unsigned long addr
, unsigned long end
,
79 pud
= pud_offset(pgd
, addr
);
81 next
= pud_addr_end(addr
, end
);
82 if (pud_none_or_clear_bad(pud
)) {
84 err
= walk
->pte_hole(addr
, next
, walk
);
90 err
= walk
->pud_entry(pud
, addr
, next
, walk
);
91 if (!err
&& (walk
->pmd_entry
|| walk
->pte_entry
))
92 err
= walk_pmd_range(pud
, addr
, next
, walk
);
95 } while (pud
++, addr
= next
, addr
!= end
);
100 #ifdef CONFIG_HUGETLB_PAGE
101 static unsigned long hugetlb_entry_end(struct hstate
*h
, unsigned long addr
,
104 unsigned long boundary
= (addr
& huge_page_mask(h
)) + huge_page_size(h
);
105 return boundary
< end
? boundary
: end
;
108 static int walk_hugetlb_range(struct vm_area_struct
*vma
,
109 unsigned long addr
, unsigned long end
,
110 struct mm_walk
*walk
)
112 struct hstate
*h
= hstate_vma(vma
);
114 unsigned long hmask
= huge_page_mask(h
);
119 next
= hugetlb_entry_end(h
, addr
, end
);
120 pte
= huge_pte_offset(walk
->mm
, addr
& hmask
);
121 if (pte
&& walk
->hugetlb_entry
)
122 err
= walk
->hugetlb_entry(pte
, hmask
, addr
, next
, walk
);
125 } while (addr
= next
, addr
!= end
);
130 static struct vm_area_struct
* hugetlb_vma(unsigned long addr
, struct mm_walk
*walk
)
132 struct vm_area_struct
*vma
;
134 /* We don't need vma lookup at all. */
135 if (!walk
->hugetlb_entry
)
138 VM_BUG_ON(!rwsem_is_locked(&walk
->mm
->mmap_sem
));
139 vma
= find_vma(walk
->mm
, addr
);
140 if (vma
&& vma
->vm_start
<= addr
&& is_vm_hugetlb_page(vma
))
146 #else /* CONFIG_HUGETLB_PAGE */
147 static struct vm_area_struct
* hugetlb_vma(unsigned long addr
, struct mm_walk
*walk
)
152 static int walk_hugetlb_range(struct vm_area_struct
*vma
,
153 unsigned long addr
, unsigned long end
,
154 struct mm_walk
*walk
)
159 #endif /* CONFIG_HUGETLB_PAGE */
164 * walk_page_range - walk a memory map's page tables with a callback
165 * @addr: starting address
166 * @end: ending address
167 * @walk: set of callbacks to invoke for each level of the tree
169 * Recursively walk the page table for the memory area in a VMA,
170 * calling supplied callbacks. Callbacks are called in-order (first
171 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
172 * etc.). If lower-level callbacks are omitted, walking depth is reduced.
174 * Each callback receives an entry pointer and the start and end of the
175 * associated range, and a copy of the original mm_walk for access to
176 * the ->private or ->mm fields.
178 * Usually no locks are taken, but splitting transparent huge page may
179 * take page table lock. And the bottom level iterator will map PTE
180 * directories from highmem if necessary.
182 * If any callback returns a non-zero value, the walk is aborted and
183 * the return value is propagated back to the caller. Otherwise 0 is returned.
185 * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
188 int walk_page_range(unsigned long addr
, unsigned long end
,
189 struct mm_walk
*walk
)
201 pgd
= pgd_offset(walk
->mm
, addr
);
203 struct vm_area_struct
*vma
;
205 next
= pgd_addr_end(addr
, end
);
208 * handle hugetlb vma individually because pagetable walk for
209 * the hugetlb page is dependent on the architecture and
210 * we can't handled it in the same manner as non-huge pages.
212 vma
= hugetlb_vma(addr
, walk
);
214 if (vma
->vm_end
< next
)
217 * Hugepage is very tightly coupled with vma, so
218 * walk through hugetlb entries within a given vma.
220 err
= walk_hugetlb_range(vma
, addr
, next
, walk
);
223 pgd
= pgd_offset(walk
->mm
, next
);
227 if (pgd_none_or_clear_bad(pgd
)) {
229 err
= walk
->pte_hole(addr
, next
, walk
);
236 err
= walk
->pgd_entry(pgd
, addr
, next
, walk
);
238 (walk
->pud_entry
|| walk
->pmd_entry
|| walk
->pte_entry
))
239 err
= walk_pud_range(pgd
, addr
, next
, walk
);
243 } while (addr
= next
, addr
!= end
);