1 #ifndef _I386_PGTABLE_H
2 #define _I386_PGTABLE_H
4 #include <linux/config.h>
7 * The Linux memory management assumes a three-level page table setup. On
8 * the i386, we use that, but "fold" the mid level into the top-level page
9 * table, so that we physically have the same two-level page table as the
12 * This file contains the functions and defines necessary to modify and use
13 * the i386 page table tree.
16 #include <asm/processor.h>
17 #include <asm/fixmap.h>
18 #include <linux/tasks.h>
20 /* Caches aren't brain-dead on the intel. */
21 #define flush_cache_all() do { } while (0)
22 #define flush_cache_mm(mm) do { } while (0)
23 #define flush_cache_range(mm, start, end) do { } while (0)
24 #define flush_cache_page(vma, vmaddr) do { } while (0)
25 #define flush_page_to_ram(page) do { } while (0)
26 #define flush_icache_range(start, end) do { } while (0)
31 * - flush_tlb() flushes the current mm struct TLBs
32 * - flush_tlb_all() flushes all processes TLBs
33 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
34 * - flush_tlb_page(vma, vmaddr) flushes one page
35 * - flush_tlb_range(mm, start, end) flushes a range of pages
37 * ..but the i386 has somewhat limited tlb flushing capabilities,
38 * and page-granular flushes are available only on i486 and up.
41 #define __flush_tlb() \
42 do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3":"=r" (tmpreg) : :"memory"); } while (0)
45 #define __flush_tlb_one(addr) flush_tlb()
47 #define __flush_tlb_one(addr) \
48 __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
53 #define flush_tlb() __flush_tlb()
54 #define flush_tlb_all() __flush_tlb()
55 #define local_flush_tlb() __flush_tlb()
57 static inline void flush_tlb_mm(struct mm_struct
*mm
)
59 if (mm
== current
->mm
)
63 static inline void flush_tlb_page(struct vm_area_struct
*vma
,
66 if (vma
->vm_mm
== current
->mm
)
67 __flush_tlb_one(addr
);
70 static inline void flush_tlb_range(struct mm_struct
*mm
,
71 unsigned long start
, unsigned long end
)
73 if (mm
== current
->mm
)
80 * We aren't very clever about this yet - SMP could certainly
81 * avoid some global flushes..
86 #define local_flush_tlb() \
90 #define CLEVER_SMP_INVALIDATE
91 #ifdef CLEVER_SMP_INVALIDATE
94 * Smarter SMP flushing macros.
97 * These mean you can really definitely utterly forget about
98 * writing to user space from interrupts. (Its not allowed anyway).
101 static inline void flush_tlb_current_task(void)
103 /* just one copy of this mm? */
104 if (atomic_read(¤t
->mm
->count
) == 1)
105 local_flush_tlb(); /* and that's us, so.. */
110 #define flush_tlb() flush_tlb_current_task()
112 #define flush_tlb_all() smp_flush_tlb()
114 static inline void flush_tlb_mm(struct mm_struct
* mm
)
116 if (mm
== current
->mm
&& atomic_read(&mm
->count
) == 1)
122 static inline void flush_tlb_page(struct vm_area_struct
* vma
,
125 if (vma
->vm_mm
== current
->mm
&& atomic_read(¤t
->mm
->count
) == 1)
131 static inline void flush_tlb_range(struct mm_struct
* mm
,
132 unsigned long start
, unsigned long end
)
140 #define flush_tlb() \
143 #define flush_tlb_all() flush_tlb()
145 static inline void flush_tlb_mm(struct mm_struct
*mm
)
150 static inline void flush_tlb_page(struct vm_area_struct
*vma
,
156 static inline void flush_tlb_range(struct mm_struct
*mm
,
157 unsigned long start
, unsigned long end
)
163 #endif /* !__ASSEMBLY__ */
166 /* Certain architectures need to do special things when PTEs
167 * within a page table are directly modified. Thus, the following
168 * hook is made available.
170 #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
172 /* PMD_SHIFT determines the size of the area a second-level page table can map */
174 #define PMD_SIZE (1UL << PMD_SHIFT)
175 #define PMD_MASK (~(PMD_SIZE-1))
177 /* PGDIR_SHIFT determines what a third-level page table entry can map */
178 #define PGDIR_SHIFT 22
179 #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
180 #define PGDIR_MASK (~(PGDIR_SIZE-1))
183 * entries per page directory level: the i386 is two-level, so
184 * we don't really have any PMD directory physically.
186 #define PTRS_PER_PTE 1024
187 #define PTRS_PER_PMD 1
188 #define PTRS_PER_PGD 1024
189 #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
192 * pgd entries used up by user/kernel:
195 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
196 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
197 #define __USER_PGD_PTRS ((__PAGE_OFFSET >> PGDIR_SHIFT) & 0x3ff)
198 #define __KERNEL_PGD_PTRS (PTRS_PER_PGD-__USER_PGD_PTRS)
201 /* Just any arbitrary offset to the start of the vmalloc VM area: the
202 * current 8MB value just means that there will be a 8MB "hole" after the
203 * physical memory until the kernel virtual memory starts. That means that
204 * any out-of-bounds memory accesses will hopefully be caught.
205 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
206 * area for the same reason. ;)
208 #define VMALLOC_OFFSET (8*1024*1024)
209 #define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
210 #define VMALLOC_VMADDR(x) ((unsigned long)(x))
211 #define VMALLOC_END (FIXADDR_START)
214 * The 4MB page is guessing.. Detailed in the infamous "Chapter H"
215 * of the Pentium details, but assuming intel did the straightforward
216 * thing, this bit set in the page directory entry just means that
217 * the page directory entry points directly to a 4MB-aligned block of
220 #define _PAGE_PRESENT 0x001
221 #define _PAGE_RW 0x002
222 #define _PAGE_USER 0x004
223 #define _PAGE_WT 0x008
224 #define _PAGE_PCD 0x010
225 #define _PAGE_ACCESSED 0x020
226 #define _PAGE_DIRTY 0x040
227 #define _PAGE_4M 0x080 /* 4 MB page, Pentium+, if present.. */
228 #define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */
230 #define _PAGE_PROTNONE 0x080 /* If not present */
232 #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
233 #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
234 #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
236 #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
237 #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
238 #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
239 #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
240 #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
241 #define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
244 * The i386 can't do page protection for execute, and considers that the same are read.
245 * Also, write permissions imply read permissions. This is the closest we can get..
247 #define __P000 PAGE_NONE
248 #define __P001 PAGE_READONLY
249 #define __P010 PAGE_COPY
250 #define __P011 PAGE_COPY
251 #define __P100 PAGE_READONLY
252 #define __P101 PAGE_READONLY
253 #define __P110 PAGE_COPY
254 #define __P111 PAGE_COPY
256 #define __S000 PAGE_NONE
257 #define __S001 PAGE_READONLY
258 #define __S010 PAGE_SHARED
259 #define __S011 PAGE_SHARED
260 #define __S100 PAGE_READONLY
261 #define __S101 PAGE_READONLY
262 #define __S110 PAGE_SHARED
263 #define __S111 PAGE_SHARED
266 * Define this if things work differently on an i386 and an i486:
267 * it will (on an i486) warn about kernel memory accesses that are
268 * done without a 'verify_area(VERIFY_WRITE,..)'
270 #undef TEST_VERIFY_AREA
272 /* page table for 0-4MB for everybody */
273 extern unsigned long pg0
[1024];
274 /* zero page used for uninitialized stuff */
275 extern unsigned long empty_zero_page
[1024];
278 * BAD_PAGETABLE is used when we need a bogus page-table, while
279 * BAD_PAGE is used for a bogus page.
281 * ZERO_PAGE is a global shared page that is always zero: used
282 * for zero-mapped memory areas etc..
284 extern pte_t
__bad_page(void);
285 extern pte_t
* __bad_pagetable(void);
287 #define BAD_PAGETABLE __bad_pagetable()
288 #define BAD_PAGE __bad_page()
289 #define ZERO_PAGE ((unsigned long) empty_zero_page)
291 /* number of bits that fit into a memory pointer */
292 #define BITS_PER_PTR (8*sizeof(unsigned long))
294 /* to align the pointer to a pointer address */
295 #define PTR_MASK (~(sizeof(void*)-1))
297 /* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
298 /* 64-bit machines, beware! SRB. */
299 #define SIZEOF_PTR_LOG2 2
301 /* to find an entry in a page-table */
302 #define PAGE_PTR(address) \
303 ((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)
305 /* to set the page-dir */
306 #define SET_PAGE_DIR(tsk,pgdir) \
308 unsigned long __pgdir = __pa(pgdir); \
309 (tsk)->tss.cr3 = __pgdir; \
310 if ((tsk) == current) \
311 __asm__ __volatile__("movl %0,%%cr3": :"r" (__pgdir)); \
314 #define pte_none(x) (!pte_val(x))
315 #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
316 #define pte_clear(xp) do { pte_val(*(xp)) = 0; } while (0)
318 #define pmd_none(x) (!pmd_val(x))
319 #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
320 #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
321 #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
324 * The "pgd_xxx()" functions here are trivial for a folded two-level
325 * setup: the pgd is never bad, and a pmd always exists (as it's folded
326 * into the pgd entry)
328 extern inline int pgd_none(pgd_t pgd
) { return 0; }
329 extern inline int pgd_bad(pgd_t pgd
) { return 0; }
330 extern inline int pgd_present(pgd_t pgd
) { return 1; }
331 extern inline void pgd_clear(pgd_t
* pgdp
) { }
334 * The following only work if pte_present() is true.
335 * Undefined behaviour if not..
337 extern inline int pte_read(pte_t pte
) { return pte_val(pte
) & _PAGE_USER
; }
338 extern inline int pte_exec(pte_t pte
) { return pte_val(pte
) & _PAGE_USER
; }
339 extern inline int pte_dirty(pte_t pte
) { return pte_val(pte
) & _PAGE_DIRTY
; }
340 extern inline int pte_young(pte_t pte
) { return pte_val(pte
) & _PAGE_ACCESSED
; }
341 extern inline int pte_write(pte_t pte
) { return pte_val(pte
) & _PAGE_RW
; }
343 extern inline pte_t
pte_rdprotect(pte_t pte
) { pte_val(pte
) &= ~_PAGE_USER
; return pte
; }
344 extern inline pte_t
pte_exprotect(pte_t pte
) { pte_val(pte
) &= ~_PAGE_USER
; return pte
; }
345 extern inline pte_t
pte_mkclean(pte_t pte
) { pte_val(pte
) &= ~_PAGE_DIRTY
; return pte
; }
346 extern inline pte_t
pte_mkold(pte_t pte
) { pte_val(pte
) &= ~_PAGE_ACCESSED
; return pte
; }
347 extern inline pte_t
pte_wrprotect(pte_t pte
) { pte_val(pte
) &= ~_PAGE_RW
; return pte
; }
348 extern inline pte_t
pte_mkread(pte_t pte
) { pte_val(pte
) |= _PAGE_USER
; return pte
; }
349 extern inline pte_t
pte_mkexec(pte_t pte
) { pte_val(pte
) |= _PAGE_USER
; return pte
; }
350 extern inline pte_t
pte_mkdirty(pte_t pte
) { pte_val(pte
) |= _PAGE_DIRTY
; return pte
; }
351 extern inline pte_t
pte_mkyoung(pte_t pte
) { pte_val(pte
) |= _PAGE_ACCESSED
; return pte
; }
352 extern inline pte_t
pte_mkwrite(pte_t pte
) { pte_val(pte
) |= _PAGE_RW
; return pte
; }
355 * Conversion functions: convert a page and protection to a page entry,
356 * and a page entry and page directory to the page they refer to.
358 #define mk_pte(page, pgprot) \
359 ({ pte_t __pte; pte_val(__pte) = __pa(page) + pgprot_val(pgprot); __pte; })
361 /* This takes a physical page address that is used by the remapping functions */
362 #define mk_pte_phys(physpage, pgprot) \
363 ({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
365 extern inline pte_t
pte_modify(pte_t pte
, pgprot_t newprot
)
366 { pte_val(pte
) = (pte_val(pte
) & _PAGE_CHG_MASK
) | pgprot_val(newprot
); return pte
; }
368 #define pte_page(pte) \
369 ((unsigned long) __va(pte_val(pte) & PAGE_MASK))
371 #define pmd_page(pmd) \
372 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
374 /* to find an entry in a page-table-directory */
375 #define pgd_offset(mm, address) \
376 ((mm)->pgd + ((address) >> PGDIR_SHIFT))
378 /* to find an entry in a kernel page-table-directory */
379 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
381 /* Find an entry in the second-level page table.. */
382 extern inline pmd_t
* pmd_offset(pgd_t
* dir
, unsigned long address
)
384 return (pmd_t
*) dir
;
387 /* Find an entry in the third-level page table.. */
388 #define pte_offset(pmd, address) \
389 ((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2))))
392 * Allocate and free page tables. The xxx_kernel() versions are
393 * used to allocate a kernel page table - this turns on ASN bits
397 #define pgd_quicklist (current_cpu_data.pgd_quick)
398 #define pmd_quicklist ((unsigned long *)0)
399 #define pte_quicklist (current_cpu_data.pte_quick)
400 #define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
402 extern __inline__ pgd_t
*get_pgd_slow(void)
404 pgd_t
*ret
= (pgd_t
*)__get_free_page(GFP_KERNEL
), *init
;
407 init
= pgd_offset(&init_mm
, 0);
408 memset (ret
, 0, USER_PTRS_PER_PGD
* sizeof(pgd_t
));
409 memcpy (ret
+ USER_PTRS_PER_PGD
, init
+ USER_PTRS_PER_PGD
,
410 (PTRS_PER_PGD
- USER_PTRS_PER_PGD
) * sizeof(pgd_t
));
415 extern __inline__ pgd_t
*get_pgd_fast(void)
419 if((ret
= pgd_quicklist
) != NULL
) {
420 pgd_quicklist
= (unsigned long *)(*ret
);
422 pgtable_cache_size
--;
424 ret
= (unsigned long *)get_pgd_slow();
428 extern __inline__
void free_pgd_fast(pgd_t
*pgd
)
430 *(unsigned long *)pgd
= (unsigned long) pgd_quicklist
;
431 pgd_quicklist
= (unsigned long *) pgd
;
432 pgtable_cache_size
++;
435 extern __inline__
void free_pgd_slow(pgd_t
*pgd
)
437 free_page((unsigned long)pgd
);
440 extern pte_t
*get_pte_slow(pmd_t
*pmd
, unsigned long address_preadjusted
);
441 extern pte_t
*get_pte_kernel_slow(pmd_t
*pmd
, unsigned long address_preadjusted
);
443 extern __inline__ pte_t
*get_pte_fast(void)
447 if((ret
= (unsigned long *)pte_quicklist
) != NULL
) {
448 pte_quicklist
= (unsigned long *)(*ret
);
450 pgtable_cache_size
--;
455 extern __inline__
void free_pte_fast(pte_t
*pte
)
457 *(unsigned long *)pte
= (unsigned long) pte_quicklist
;
458 pte_quicklist
= (unsigned long *) pte
;
459 pgtable_cache_size
++;
462 extern __inline__
void free_pte_slow(pte_t
*pte
)
464 free_page((unsigned long)pte
);
467 /* We don't use pmd cache, so these are dummy routines */
468 extern __inline__ pmd_t
*get_pmd_fast(void)
473 extern __inline__
void free_pmd_fast(pmd_t
*pmd
)
477 extern __inline__
void free_pmd_slow(pmd_t
*pmd
)
481 extern void __bad_pte(pmd_t
*pmd
);
482 extern void __bad_pte_kernel(pmd_t
*pmd
);
484 #define pte_free_kernel(pte) free_pte_fast(pte)
485 #define pte_free(pte) free_pte_fast(pte)
486 #define pgd_free(pgd) free_pgd_fast(pgd)
487 #define pgd_alloc() get_pgd_fast()
489 extern inline pte_t
* pte_alloc_kernel(pmd_t
* pmd
, unsigned long address
)
491 address
= (address
>> PAGE_SHIFT
) & (PTRS_PER_PTE
- 1);
492 if (pmd_none(*pmd
)) {
493 pte_t
* page
= (pte_t
*) get_pte_fast();
496 return get_pte_kernel_slow(pmd
, address
);
497 pmd_val(*pmd
) = _KERNPG_TABLE
+ __pa(page
);
498 return page
+ address
;
501 __bad_pte_kernel(pmd
);
504 return (pte_t
*) pmd_page(*pmd
) + address
;
507 extern inline pte_t
* pte_alloc(pmd_t
* pmd
, unsigned long address
)
509 address
= (address
>> (PAGE_SHIFT
-2)) & 4*(PTRS_PER_PTE
- 1);
515 return (pte_t
*) (pmd_page(*pmd
) + address
);
518 unsigned long page
= (unsigned long) get_pte_fast();
521 return get_pte_slow(pmd
, address
);
522 pmd_val(*pmd
) = _PAGE_TABLE
+ __pa(page
);
523 return (pte_t
*) (page
+ address
);
531 * allocating and freeing a pmd is trivial: the 1-entry pmd is
532 * inside the pgd, so has no extra memory associated with it.
534 extern inline void pmd_free(pmd_t
* pmd
)
538 extern inline pmd_t
* pmd_alloc(pgd_t
* pgd
, unsigned long address
)
540 return (pmd_t
*) pgd
;
543 #define pmd_free_kernel pmd_free
544 #define pmd_alloc_kernel pmd_alloc
546 extern int do_check_pgt_cache(int, int);
548 extern inline void set_pgdir(unsigned long address
, pgd_t entry
)
550 struct task_struct
* p
;
556 read_lock(&tasklist_lock
);
560 *pgd_offset(p
->mm
,address
) = entry
;
562 read_unlock(&tasklist_lock
);
564 for (pgd
= (pgd_t
*)pgd_quicklist
; pgd
; pgd
= (pgd_t
*)*(unsigned long *)pgd
)
565 pgd
[address
>> PGDIR_SHIFT
] = entry
;
567 /* To pgd_alloc/pgd_free, one holds master kernel lock and so does our callee, so we can
568 modify pgd caches of other CPUs as well. -jj */
569 for (i
= 0; i
< NR_CPUS
; i
++)
570 for (pgd
= (pgd_t
*)cpu_data
[i
].pgd_quick
; pgd
; pgd
= (pgd_t
*)*(unsigned long *)pgd
)
571 pgd
[address
>> PGDIR_SHIFT
] = entry
;
575 extern pgd_t swapper_pg_dir
[1024];
578 * The i386 doesn't have any external MMU info: the kernel page
579 * tables contain all the necessary information.
581 extern inline void update_mmu_cache(struct vm_area_struct
* vma
,
582 unsigned long address
, pte_t pte
)
586 #define SWP_TYPE(entry) (((entry) >> 1) & 0x3f)
587 #define SWP_OFFSET(entry) ((entry) >> 8)
588 #define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << 8))
590 #define module_map vmalloc
591 #define module_unmap vfree
593 #endif /* !__ASSEMBLY__ */
595 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
596 #define PageSkip(page) (0)
598 #endif /* _I386_PAGE_H */