2 * Copyright 2002 Andi Kleen, SuSE Labs.
3 * Thanks to Ben LaHaise for precious feedback.
5 #include <linux/highmem.h>
6 #include <linux/bootmem.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/slab.h>
13 #include <asm/processor.h>
14 #include <asm/tlbflush.h>
15 #include <asm/sections.h>
16 #include <asm/uaccess.h>
17 #include <asm/pgalloc.h>
20 within(unsigned long addr
, unsigned long start
, unsigned long end
)
22 return addr
>= start
&& addr
< end
;
26 * Certain areas of memory on x86 require very specific protection flags,
27 * for example the BIOS area or kernel text. Callers don't always get this
28 * right (again, ioremap() on BIOS memory is not uncommon) so this function
29 * checks and fixes these known static required protection bits.
31 static inline pgprot_t
static_protections(pgprot_t prot
, unsigned long address
)
33 pgprot_t forbidden
= __pgprot(0);
36 * The BIOS area between 640k and 1Mb needs to be executable for
37 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
39 if (within(__pa(address
), BIOS_BEGIN
, BIOS_END
))
40 pgprot_val(forbidden
) |= _PAGE_NX
;
43 * The kernel text needs to be executable for obvious reasons
44 * Does not cover __inittext since that is gone later on
46 if (within(address
, (unsigned long)_text
, (unsigned long)_etext
))
47 pgprot_val(forbidden
) |= _PAGE_NX
;
49 #ifdef CONFIG_DEBUG_RODATA
50 /* The .rodata section needs to be read-only */
51 if (within(address
, (unsigned long)__start_rodata
,
52 (unsigned long)__end_rodata
))
53 pgprot_val(forbidden
) |= _PAGE_RW
;
56 prot
= __pgprot(pgprot_val(prot
) & ~pgprot_val(forbidden
));
61 pte_t
*lookup_address(unsigned long address
, int *level
)
63 pgd_t
*pgd
= pgd_offset_k(address
);
67 *level
= PG_LEVEL_NONE
;
71 pud
= pud_offset(pgd
, address
);
74 pmd
= pmd_offset(pud
, address
);
83 return pte_offset_kernel(pmd
, address
);
86 static void __set_pmd_pte(pte_t
*kpte
, unsigned long address
, pte_t pte
)
89 set_pte_atomic(kpte
, pte
);
91 if (!SHARED_KERNEL_PMD
) {
94 for (page
= pgd_list
; page
; page
= (struct page
*)page
->index
) {
99 pgd
= (pgd_t
*)page_address(page
) + pgd_index(address
);
100 pud
= pud_offset(pgd
, address
);
101 pmd
= pmd_offset(pud
, address
);
102 set_pte_atomic((pte_t
*)pmd
, pte
);
108 static int split_large_page(pte_t
*kpte
, unsigned long address
)
110 pgprot_t ref_prot
= pte_pgprot(pte_clrhuge(*kpte
));
111 gfp_t gfp_flags
= GFP_KERNEL
;
118 #ifdef CONFIG_DEBUG_PAGEALLOC
119 gfp_flags
= GFP_ATOMIC
;
121 base
= alloc_pages(gfp_flags
, 0);
125 spin_lock_irqsave(&pgd_lock
, flags
);
127 * Check for races, another CPU might have split this page
130 tmp
= lookup_address(address
, &level
);
136 address
= __pa(address
);
137 addr
= address
& LARGE_PAGE_MASK
;
138 pbase
= (pte_t
*)page_address(base
);
140 paravirt_alloc_pt(&init_mm
, page_to_pfn(base
));
143 for (i
= 0; i
< PTRS_PER_PTE
; i
++, addr
+= PAGE_SIZE
)
144 set_pte(&pbase
[i
], pfn_pte(addr
>> PAGE_SHIFT
, ref_prot
));
147 * Install the new, split up pagetable. Important detail here:
149 * On Intel the NX bit of all levels must be cleared to make a
150 * page executable. See section 4.13.2 of Intel 64 and IA-32
151 * Architectures Software Developer's Manual).
153 ref_prot
= pte_pgprot(pte_mkexec(pte_clrhuge(*kpte
)));
154 __set_pmd_pte(kpte
, address
, mk_pte(base
, ref_prot
));
158 spin_unlock_irqrestore(&pgd_lock
, flags
);
161 __free_pages(base
, 0);
167 __change_page_attr(unsigned long address
, unsigned long pfn
, pgprot_t prot
)
169 struct page
*kpte_page
;
174 BUG_ON(pfn
> max_low_pfn
);
178 kpte
= lookup_address(address
, &level
);
182 kpte_page
= virt_to_page(kpte
);
183 BUG_ON(PageLRU(kpte_page
));
184 BUG_ON(PageCompound(kpte_page
));
186 prot
= static_protections(prot
, address
);
188 if (level
== PG_LEVEL_4K
) {
189 set_pte_atomic(kpte
, pfn_pte(pfn
, canon_pgprot(prot
)));
191 err
= split_large_page(kpte
, address
);
199 * change_page_attr_addr - Change page table attributes in linear mapping
200 * @address: Virtual address in linear mapping.
201 * @numpages: Number of pages to change
202 * @prot: New page table attribute (PAGE_*)
204 * Change page attributes of a page in the direct mapping. This is a variant
205 * of change_page_attr() that also works on memory holes that do not have
206 * mem_map entry (pfn_valid() is false).
208 * See change_page_attr() documentation for more details.
210 * Modules and drivers should use the set_memory_* APIs instead.
213 int change_page_attr_addr(unsigned long address
, int numpages
, pgprot_t prot
)
215 int err
= 0, kernel_map
= 0, i
;
218 if (address
>= __START_KERNEL_map
&&
219 address
< __START_KERNEL_map
+ KERNEL_TEXT_SIZE
) {
221 address
= (unsigned long)__va(__pa(address
));
226 for (i
= 0; i
< numpages
; i
++, address
+= PAGE_SIZE
) {
227 unsigned long pfn
= __pa(address
) >> PAGE_SHIFT
;
229 if (!kernel_map
|| pte_present(pfn_pte(0, prot
))) {
230 err
= __change_page_attr(address
, pfn
, prot
);
236 * Handle kernel mapping too which aliases part of
239 if (__pa(address
) < KERNEL_TEXT_SIZE
) {
243 addr2
= __START_KERNEL_map
+ __pa(address
);
244 /* Make sure the kernel mappings stay executable */
245 prot2
= pte_pgprot(pte_mkexec(pfn_pte(0, prot
)));
246 err
= __change_page_attr(addr2
, pfn
, prot2
);
255 * change_page_attr - Change page table attributes in the linear mapping.
256 * @page: First page to change
257 * @numpages: Number of pages to change
258 * @prot: New protection/caching type (PAGE_*)
260 * Returns 0 on success, otherwise a negated errno.
262 * This should be used when a page is mapped with a different caching policy
263 * than write-back somewhere - some CPUs do not like it when mappings with
264 * different caching policies exist. This changes the page attributes of the
265 * in kernel linear mapping too.
267 * Caller must call global_flush_tlb() later to make the changes active.
269 * The caller needs to ensure that there are no conflicting mappings elsewhere
270 * (e.g. in user space) * This function only deals with the kernel linear map.
272 * For MMIO areas without mem_map use change_page_attr_addr() instead.
274 * Modules and drivers should use the set_pages_* APIs instead.
276 int change_page_attr(struct page
*page
, int numpages
, pgprot_t prot
)
278 unsigned long addr
= (unsigned long)page_address(page
);
280 return change_page_attr_addr(addr
, numpages
, prot
);
282 EXPORT_UNUSED_SYMBOL(change_page_attr
); /* to be removed in 2.6.27 */
285 * change_page_attr_set - Change page table attributes in the linear mapping.
286 * @addr: Virtual address in linear mapping.
287 * @numpages: Number of pages to change
288 * @prot: Protection/caching type bits to set (PAGE_*)
290 * Returns 0 on success, otherwise a negated errno.
292 * This should be used when a page is mapped with a different caching policy
293 * than write-back somewhere - some CPUs do not like it when mappings with
294 * different caching policies exist. This changes the page attributes of the
295 * in kernel linear mapping too.
297 * Caller must call global_flush_tlb() later to make the changes active.
299 * The caller needs to ensure that there are no conflicting mappings elsewhere
300 * (e.g. in user space) * This function only deals with the kernel linear map.
302 * This function is different from change_page_attr() in that only selected bits
303 * are impacted, all other bits remain as is.
305 int change_page_attr_set(unsigned long addr
, int numpages
, pgprot_t prot
)
307 pgprot_t current_prot
;
311 pte
= lookup_address(addr
, &level
);
313 current_prot
= pte_pgprot(*pte
);
315 pgprot_val(current_prot
) = 0;
317 pgprot_val(prot
) = pgprot_val(current_prot
) | pgprot_val(prot
);
319 return change_page_attr_addr(addr
, numpages
, prot
);
323 * change_page_attr_clear - Change page table attributes in the linear mapping.
324 * @addr: Virtual address in linear mapping.
325 * @numpages: Number of pages to change
326 * @prot: Protection/caching type bits to clear (PAGE_*)
328 * Returns 0 on success, otherwise a negated errno.
330 * This should be used when a page is mapped with a different caching policy
331 * than write-back somewhere - some CPUs do not like it when mappings with
332 * different caching policies exist. This changes the page attributes of the
333 * in kernel linear mapping too.
335 * Caller must call global_flush_tlb() later to make the changes active.
337 * The caller needs to ensure that there are no conflicting mappings elsewhere
338 * (e.g. in user space) * This function only deals with the kernel linear map.
340 * This function is different from change_page_attr() in that only selected bits
341 * are impacted, all other bits remain as is.
343 int change_page_attr_clear(unsigned long addr
, int numpages
, pgprot_t prot
)
345 pgprot_t current_prot
;
349 pte
= lookup_address(addr
, &level
);
351 current_prot
= pte_pgprot(*pte
);
353 pgprot_val(current_prot
) = 0;
355 pgprot_val(prot
) = pgprot_val(current_prot
) & ~pgprot_val(prot
);
357 return change_page_attr_addr(addr
, numpages
, prot
);
362 int set_memory_uc(unsigned long addr
, int numpages
)
366 pgprot_val(uncached
) = _PAGE_PCD
| _PAGE_PWT
;
367 return change_page_attr_set(addr
, numpages
, uncached
);
369 EXPORT_SYMBOL(set_memory_uc
);
371 int set_memory_wb(unsigned long addr
, int numpages
)
375 pgprot_val(uncached
) = _PAGE_PCD
| _PAGE_PWT
;
376 return change_page_attr_clear(addr
, numpages
, uncached
);
378 EXPORT_SYMBOL(set_memory_wb
);
380 int set_memory_x(unsigned long addr
, int numpages
)
384 pgprot_val(nx
) = _PAGE_NX
;
385 return change_page_attr_clear(addr
, numpages
, nx
);
387 EXPORT_SYMBOL(set_memory_x
);
389 int set_memory_nx(unsigned long addr
, int numpages
)
393 pgprot_val(nx
) = _PAGE_NX
;
394 return change_page_attr_set(addr
, numpages
, nx
);
396 EXPORT_SYMBOL(set_memory_nx
);
398 int set_memory_ro(unsigned long addr
, int numpages
)
402 pgprot_val(rw
) = _PAGE_RW
;
403 return change_page_attr_clear(addr
, numpages
, rw
);
405 EXPORT_SYMBOL(set_memory_ro
);
407 int set_memory_rw(unsigned long addr
, int numpages
)
411 pgprot_val(rw
) = _PAGE_RW
;
412 return change_page_attr_set(addr
, numpages
, rw
);
414 EXPORT_SYMBOL(set_memory_rw
);
416 int set_pages_uc(struct page
*page
, int numpages
)
418 unsigned long addr
= (unsigned long)page_address(page
);
421 pgprot_val(uncached
) = _PAGE_PCD
| _PAGE_PWT
;
422 return change_page_attr_set(addr
, numpages
, uncached
);
424 EXPORT_SYMBOL(set_pages_uc
);
426 int set_pages_wb(struct page
*page
, int numpages
)
428 unsigned long addr
= (unsigned long)page_address(page
);
431 pgprot_val(uncached
) = _PAGE_PCD
| _PAGE_PWT
;
432 return change_page_attr_clear(addr
, numpages
, uncached
);
434 EXPORT_SYMBOL(set_pages_wb
);
436 int set_pages_x(struct page
*page
, int numpages
)
438 unsigned long addr
= (unsigned long)page_address(page
);
441 pgprot_val(nx
) = _PAGE_NX
;
442 return change_page_attr_clear(addr
, numpages
, nx
);
444 EXPORT_SYMBOL(set_pages_x
);
446 int set_pages_nx(struct page
*page
, int numpages
)
448 unsigned long addr
= (unsigned long)page_address(page
);
451 pgprot_val(nx
) = _PAGE_NX
;
452 return change_page_attr_set(addr
, numpages
, nx
);
454 EXPORT_SYMBOL(set_pages_nx
);
456 int set_pages_ro(struct page
*page
, int numpages
)
458 unsigned long addr
= (unsigned long)page_address(page
);
461 pgprot_val(rw
) = _PAGE_RW
;
462 return change_page_attr_clear(addr
, numpages
, rw
);
464 EXPORT_SYMBOL(set_pages_ro
);
466 int set_pages_rw(struct page
*page
, int numpages
)
468 unsigned long addr
= (unsigned long)page_address(page
);
471 pgprot_val(rw
) = _PAGE_RW
;
472 return change_page_attr_set(addr
, numpages
, rw
);
474 EXPORT_SYMBOL(set_pages_rw
);
477 void clflush_cache_range(void *addr
, int size
)
481 for (i
= 0; i
< size
; i
+= boot_cpu_data
.x86_clflush_size
)
485 static void flush_kernel_map(void *arg
)
488 * Flush all to work around Errata in early athlons regarding
489 * large page flushing.
493 if (boot_cpu_data
.x86_model
>= 4)
497 void global_flush_tlb(void)
499 BUG_ON(irqs_disabled());
501 on_each_cpu(flush_kernel_map
, NULL
, 1, 1);
503 EXPORT_SYMBOL(global_flush_tlb
);
505 #ifdef CONFIG_DEBUG_PAGEALLOC
506 void kernel_map_pages(struct page
*page
, int numpages
, int enable
)
508 if (PageHighMem(page
))
511 debug_check_no_locks_freed(page_address(page
),
512 numpages
* PAGE_SIZE
);
516 * If page allocator is not up yet then do not call c_p_a():
518 if (!debug_pagealloc_enabled
)
522 * The return value is ignored - the calls cannot fail,
523 * large pages are disabled at boot time:
525 change_page_attr(page
, numpages
, enable
? PAGE_KERNEL
: __pgprot(0));
528 * We should perform an IPI and flush all tlbs,
529 * but that can deadlock->flush only current cpu: