1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/rmap.h>
4 #include <linux/hugetlb.h>
5 #include <linux/swap.h>
6 #include <linux/swapops.h>
10 static inline bool not_found(struct page_vma_mapped_walk
*pvmw
)
12 page_vma_mapped_walk_done(pvmw
);
16 static bool map_pte(struct page_vma_mapped_walk
*pvmw
, spinlock_t
**ptlp
)
20 if (pvmw
->flags
& PVMW_SYNC
) {
21 /* Use the stricter lookup */
22 pvmw
->pte
= pte_offset_map_lock(pvmw
->vma
->vm_mm
, pvmw
->pmd
,
23 pvmw
->address
, &pvmw
->ptl
);
29 * It is important to return the ptl corresponding to pte,
30 * in case *pvmw->pmd changes underneath us; so we need to
31 * return it even when choosing not to lock, in case caller
32 * proceeds to loop over next ptes, and finds a match later.
33 * Though, in most cases, page lock already protects this.
35 pvmw
->pte
= pte_offset_map_nolock(pvmw
->vma
->vm_mm
, pvmw
->pmd
,
40 ptent
= ptep_get(pvmw
->pte
);
42 if (pvmw
->flags
& PVMW_MIGRATION
) {
43 if (!is_swap_pte(ptent
))
45 } else if (is_swap_pte(ptent
)) {
48 * Handle un-addressable ZONE_DEVICE memory.
50 * We get here when we are trying to unmap a private
51 * device page from the process address space. Such
52 * page is not CPU accessible and thus is mapped as
53 * a special swap entry, nonetheless it still does
54 * count as a valid regular mapping for the page
55 * (and is accounted as such in page maps count).
57 * So handle this special case as if it was a normal
58 * page mapping ie lock CPU page table and return true.
60 * For more details on device private memory see HMM
61 * (include/linux/hmm.h or mm/hmm.c).
63 entry
= pte_to_swp_entry(ptent
);
64 if (!is_device_private_entry(entry
) &&
65 !is_device_exclusive_entry(entry
))
67 } else if (!pte_present(ptent
)) {
76 * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is
77 * mapped at the @pvmw->pte
78 * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range
81 * page_vma_mapped_walk() found a place where pfn range is *potentially*
82 * mapped. check_pte() has to validate this.
84 * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to
87 * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
88 * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
90 * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to
91 * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
93 * Otherwise, return false.
96 static bool check_pte(struct page_vma_mapped_walk
*pvmw
)
99 pte_t ptent
= ptep_get(pvmw
->pte
);
101 if (pvmw
->flags
& PVMW_MIGRATION
) {
103 if (!is_swap_pte(ptent
))
105 entry
= pte_to_swp_entry(ptent
);
107 if (!is_migration_entry(entry
) &&
108 !is_device_exclusive_entry(entry
))
111 pfn
= swp_offset_pfn(entry
);
112 } else if (is_swap_pte(ptent
)) {
115 /* Handle un-addressable ZONE_DEVICE memory */
116 entry
= pte_to_swp_entry(ptent
);
117 if (!is_device_private_entry(entry
) &&
118 !is_device_exclusive_entry(entry
))
121 pfn
= swp_offset_pfn(entry
);
123 if (!pte_present(ptent
))
126 pfn
= pte_pfn(ptent
);
129 return (pfn
- pvmw
->pfn
) < pvmw
->nr_pages
;
132 /* Returns true if the two ranges overlap. Careful to not overflow. */
133 static bool check_pmd(unsigned long pfn
, struct page_vma_mapped_walk
*pvmw
)
135 if ((pfn
+ HPAGE_PMD_NR
- 1) < pvmw
->pfn
)
137 if (pfn
> pvmw
->pfn
+ pvmw
->nr_pages
- 1)
142 static void step_forward(struct page_vma_mapped_walk
*pvmw
, unsigned long size
)
144 pvmw
->address
= (pvmw
->address
+ size
) & ~(size
- 1);
146 pvmw
->address
= ULONG_MAX
;
150 * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
152 * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
153 * must be set. pmd, pte and ptl must be NULL.
155 * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
156 * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
157 * adjusted if needed (for PTE-mapped THPs).
159 * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
160 * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
161 * a loop to find all PTEs that map the THP.
163 * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
164 * regardless of which page table level the page is mapped at. @pvmw->pmd is
167 * Returns false if there are no more page table entries for the page in
168 * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
170 * If you need to stop the walk before page_vma_mapped_walk() returned false,
171 * use page_vma_mapped_walk_done(). It will do the housekeeping.
173 bool page_vma_mapped_walk(struct page_vma_mapped_walk
*pvmw
)
175 struct vm_area_struct
*vma
= pvmw
->vma
;
176 struct mm_struct
*mm
= vma
->vm_mm
;
184 /* The only possible pmd mapping has been handled on last iteration */
185 if (pvmw
->pmd
&& !pvmw
->pte
)
186 return not_found(pvmw
);
188 if (unlikely(is_vm_hugetlb_page(vma
))) {
189 struct hstate
*hstate
= hstate_vma(vma
);
190 unsigned long size
= huge_page_size(hstate
);
191 /* The only possible mapping was handled on last iteration */
193 return not_found(pvmw
);
195 * All callers that get here will already hold the
196 * i_mmap_rwsem. Therefore, no additional locks need to be
197 * taken before calling hugetlb_walk().
199 pvmw
->pte
= hugetlb_walk(vma
, pvmw
->address
, size
);
203 pvmw
->ptl
= huge_pte_lock(hstate
, mm
, pvmw
->pte
);
204 if (!check_pte(pvmw
))
205 return not_found(pvmw
);
209 end
= vma_address_end(pvmw
);
214 pgd
= pgd_offset(mm
, pvmw
->address
);
215 if (!pgd_present(*pgd
)) {
216 step_forward(pvmw
, PGDIR_SIZE
);
219 p4d
= p4d_offset(pgd
, pvmw
->address
);
220 if (!p4d_present(*p4d
)) {
221 step_forward(pvmw
, P4D_SIZE
);
224 pud
= pud_offset(p4d
, pvmw
->address
);
225 if (!pud_present(*pud
)) {
226 step_forward(pvmw
, PUD_SIZE
);
230 pvmw
->pmd
= pmd_offset(pud
, pvmw
->address
);
232 * Make sure the pmd value isn't cached in a register by the
233 * compiler and used as a stale value after we've observed a
236 pmde
= pmdp_get_lockless(pvmw
->pmd
);
238 if (pmd_trans_huge(pmde
) || is_pmd_migration_entry(pmde
) ||
239 (pmd_present(pmde
) && pmd_devmap(pmde
))) {
240 pvmw
->ptl
= pmd_lock(mm
, pvmw
->pmd
);
242 if (!pmd_present(pmde
)) {
245 if (!thp_migration_supported() ||
246 !(pvmw
->flags
& PVMW_MIGRATION
))
247 return not_found(pvmw
);
248 entry
= pmd_to_swp_entry(pmde
);
249 if (!is_migration_entry(entry
) ||
250 !check_pmd(swp_offset_pfn(entry
), pvmw
))
251 return not_found(pvmw
);
254 if (likely(pmd_trans_huge(pmde
) || pmd_devmap(pmde
))) {
255 if (pvmw
->flags
& PVMW_MIGRATION
)
256 return not_found(pvmw
);
257 if (!check_pmd(pmd_pfn(pmde
), pvmw
))
258 return not_found(pvmw
);
261 /* THP pmd was split under us: handle on pte level */
262 spin_unlock(pvmw
->ptl
);
264 } else if (!pmd_present(pmde
)) {
266 * If PVMW_SYNC, take and drop THP pmd lock so that we
267 * cannot return prematurely, while zap_huge_pmd() has
268 * cleared *pmd but not decremented compound_mapcount().
270 if ((pvmw
->flags
& PVMW_SYNC
) &&
271 thp_vma_suitable_order(vma
, pvmw
->address
,
273 (pvmw
->nr_pages
>= HPAGE_PMD_NR
)) {
274 spinlock_t
*ptl
= pmd_lock(mm
, pvmw
->pmd
);
278 step_forward(pvmw
, PMD_SIZE
);
281 if (!map_pte(pvmw
, &ptl
)) {
291 pvmw
->address
+= PAGE_SIZE
;
292 if (pvmw
->address
>= end
)
293 return not_found(pvmw
);
294 /* Did we cross page table boundary? */
295 if ((pvmw
->address
& (PMD_SIZE
- PAGE_SIZE
)) == 0) {
297 spin_unlock(pvmw
->ptl
);
300 pte_unmap(pvmw
->pte
);
305 } while (pte_none(ptep_get(pvmw
->pte
)));
309 spin_lock(pvmw
->ptl
);
312 } while (pvmw
->address
< end
);
317 #ifdef CONFIG_MEMORY_FAILURE
319 * page_mapped_in_vma - check whether a page is really mapped in a VMA
320 * @page: the page to test
321 * @vma: the VMA to test
323 * Return: The address the page is mapped at if the page is in the range
324 * covered by the VMA and present in the page table. If the page is
325 * outside the VMA or not present, returns -EFAULT.
326 * Only valid for normal file or anonymous VMAs.
328 unsigned long page_mapped_in_vma(struct page
*page
, struct vm_area_struct
*vma
)
330 struct folio
*folio
= page_folio(page
);
331 pgoff_t pgoff
= folio
->index
+ folio_page_idx(folio
, page
);
332 struct page_vma_mapped_walk pvmw
= {
333 .pfn
= page_to_pfn(page
),
339 pvmw
.address
= vma_address(vma
, pgoff
, 1);
340 if (pvmw
.address
== -EFAULT
)
342 if (!page_vma_mapped_walk(&pvmw
))
344 page_vma_mapped_walk_done(&pvmw
);