Import 2.3.18pre1
[davej-history.git] / include / asm-i386 / pgtable.h
blob36303437b527d6b09df5787225b63b60daa61d69
1 #ifndef _I386_PGTABLE_H
2 #define _I386_PGTABLE_H
4 #include <linux/config.h>
6 /*
7 * The Linux memory management assumes a three-level page table setup. On
8 * the i386, we use that, but "fold" the mid level into the top-level page
9 * table, so that we physically have the same two-level page table as the
10 * i386 mmu expects.
12 * This file contains the functions and defines necessary to modify and use
13 * the i386 page table tree.
15 #ifndef __ASSEMBLY__
16 #include <asm/processor.h>
17 #include <asm/fixmap.h>
18 #include <linux/threads.h>
20 extern pgd_t swapper_pg_dir[1024];
22 /* Caches aren't brain-dead on the intel. */
23 #define flush_cache_all() do { } while (0)
24 #define flush_cache_mm(mm) do { } while (0)
25 #define flush_cache_range(mm, start, end) do { } while (0)
26 #define flush_cache_page(vma, vmaddr) do { } while (0)
27 #define flush_page_to_ram(page) do { } while (0)
28 #define flush_icache_range(start, end) do { } while (0)
31 * TLB flushing:
33 * - flush_tlb() flushes the current mm struct TLBs
34 * - flush_tlb_all() flushes all processes TLBs
35 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
36 * - flush_tlb_page(vma, vmaddr) flushes one page
37 * - flush_tlb_range(mm, start, end) flushes a range of pages
39 * ..but the i386 has somewhat limited tlb flushing capabilities,
40 * and page-granular flushes are available only on i486 and up.
43 #define __flush_tlb() \
44 do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3":"=r" (tmpreg) : :"memory"); } while (0)
46 #ifndef CONFIG_X86_INVLPG
47 #define __flush_tlb_one(addr) flush_tlb()
48 #else
49 #define __flush_tlb_one(addr) \
50 __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
51 #endif
53 #ifndef __SMP__
55 #define flush_tlb() __flush_tlb()
56 #define flush_tlb_all() __flush_tlb()
57 #define local_flush_tlb() __flush_tlb()
59 static inline void flush_tlb_mm(struct mm_struct *mm)
61 if (mm == current->active_mm)
62 __flush_tlb();
65 static inline void flush_tlb_page(struct vm_area_struct *vma,
66 unsigned long addr)
68 if (vma->vm_mm == current->active_mm)
69 __flush_tlb_one(addr);
72 static inline void flush_tlb_range(struct mm_struct *mm,
73 unsigned long start, unsigned long end)
75 if (mm == current->active_mm)
76 __flush_tlb();
79 #else
82 * We aren't very clever about this yet - SMP could certainly
83 * avoid some global flushes..
86 #include <asm/smp.h>
88 #define local_flush_tlb() \
89 __flush_tlb()
91 extern void flush_tlb_all(void);
92 extern void flush_tlb_current_task(void);
93 extern void flush_tlb_mm(struct mm_struct *);
94 extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
96 #define flush_tlb() flush_tlb_current_task()
98 static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end)
100 flush_tlb_mm(mm);
104 #endif
105 #endif /* !__ASSEMBLY__ */
108 /* Certain architectures need to do special things when PTEs
109 * within a page table are directly modified. Thus, the following
110 * hook is made available.
112 #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
114 /* PMD_SHIFT determines the size of the area a second-level page table can map */
115 #define PMD_SHIFT 22
116 #define PMD_SIZE (1UL << PMD_SHIFT)
117 #define PMD_MASK (~(PMD_SIZE-1))
119 /* PGDIR_SHIFT determines what a third-level page table entry can map */
120 #define PGDIR_SHIFT 22
121 #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
122 #define PGDIR_MASK (~(PGDIR_SIZE-1))
125 * entries per page directory level: the i386 is two-level, so
126 * we don't really have any PMD directory physically.
128 #define PTRS_PER_PTE 1024
129 #define PTRS_PER_PMD 1
130 #define PTRS_PER_PGD 1024
131 #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
134 * pgd entries used up by user/kernel:
137 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
138 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
139 #define __USER_PGD_PTRS ((__PAGE_OFFSET >> PGDIR_SHIFT) & 0x3ff)
140 #define __KERNEL_PGD_PTRS (PTRS_PER_PGD-__USER_PGD_PTRS)
142 #ifndef __ASSEMBLY__
143 /* Just any arbitrary offset to the start of the vmalloc VM area: the
144 * current 8MB value just means that there will be a 8MB "hole" after the
145 * physical memory until the kernel virtual memory starts. That means that
146 * any out-of-bounds memory accesses will hopefully be caught.
147 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
148 * area for the same reason. ;)
150 #define VMALLOC_OFFSET (8*1024*1024)
151 #define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
152 #define VMALLOC_VMADDR(x) ((unsigned long)(x))
153 #define VMALLOC_END (FIXADDR_START)
156 * The 4MB page is guessing.. Detailed in the infamous "Chapter H"
157 * of the Pentium details, but assuming intel did the straightforward
158 * thing, this bit set in the page directory entry just means that
159 * the page directory entry points directly to a 4MB-aligned block of
160 * memory.
162 #define _PAGE_PRESENT 0x001
163 #define _PAGE_RW 0x002
164 #define _PAGE_USER 0x004
165 #define _PAGE_PWT 0x008
166 #define _PAGE_PCD 0x010
167 #define _PAGE_ACCESSED 0x020
168 #define _PAGE_DIRTY 0x040
169 #define _PAGE_4M 0x080 /* 4 MB page, Pentium+, if present.. */
170 #define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */
172 #define _PAGE_PROTNONE 0x080 /* If not present */
174 #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
175 #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
176 #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
178 #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
179 #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
180 #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
181 #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
182 #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
183 #define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
186 * The i386 can't do page protection for execute, and considers that the same are read.
187 * Also, write permissions imply read permissions. This is the closest we can get..
189 #define __P000 PAGE_NONE
190 #define __P001 PAGE_READONLY
191 #define __P010 PAGE_COPY
192 #define __P011 PAGE_COPY
193 #define __P100 PAGE_READONLY
194 #define __P101 PAGE_READONLY
195 #define __P110 PAGE_COPY
196 #define __P111 PAGE_COPY
198 #define __S000 PAGE_NONE
199 #define __S001 PAGE_READONLY
200 #define __S010 PAGE_SHARED
201 #define __S011 PAGE_SHARED
202 #define __S100 PAGE_READONLY
203 #define __S101 PAGE_READONLY
204 #define __S110 PAGE_SHARED
205 #define __S111 PAGE_SHARED
208 * Define this if things work differently on an i386 and an i486:
209 * it will (on an i486) warn about kernel memory accesses that are
210 * done without a 'verify_area(VERIFY_WRITE,..)'
212 #undef TEST_VERIFY_AREA
214 /* page table for 0-4MB for everybody */
215 extern unsigned long pg0[1024];
216 /* zero page used for uninitialized stuff */
217 extern unsigned long empty_zero_page[1024];
220 * BAD_PAGETABLE is used when we need a bogus page-table, while
221 * BAD_PAGE is used for a bogus page.
223 * ZERO_PAGE is a global shared page that is always zero: used
224 * for zero-mapped memory areas etc..
226 extern pte_t __bad_page(void);
227 extern pte_t * __bad_pagetable(void);
229 #define BAD_PAGETABLE __bad_pagetable()
230 #define BAD_PAGE __bad_page()
231 #define ZERO_PAGE(vaddr) ((unsigned long) empty_zero_page)
233 /* number of bits that fit into a memory pointer */
234 #define BITS_PER_PTR (8*sizeof(unsigned long))
236 /* to align the pointer to a pointer address */
237 #define PTR_MASK (~(sizeof(void*)-1))
239 /* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
240 /* 64-bit machines, beware! SRB. */
241 #define SIZEOF_PTR_LOG2 2
243 /* to find an entry in a page-table */
244 #define PAGE_PTR(address) \
245 ((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)
247 #define pte_none(x) (!pte_val(x))
248 #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
249 #define pte_clear(xp) do { pte_val(*(xp)) = 0; } while (0)
251 #define pmd_none(x) (!pmd_val(x))
252 #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
253 #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
254 #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
257 * The "pgd_xxx()" functions here are trivial for a folded two-level
258 * setup: the pgd is never bad, and a pmd always exists (as it's folded
259 * into the pgd entry)
261 extern inline int pgd_none(pgd_t pgd) { return 0; }
262 extern inline int pgd_bad(pgd_t pgd) { return 0; }
263 extern inline int pgd_present(pgd_t pgd) { return 1; }
264 extern inline void pgd_clear(pgd_t * pgdp) { }
267 * The following only work if pte_present() is true.
268 * Undefined behaviour if not..
270 extern inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; }
271 extern inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_USER; }
272 extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
273 extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
274 extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
276 extern inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_USER; return pte; }
277 extern inline pte_t pte_exprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_USER; return pte; }
278 extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
279 extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
280 extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_RW; return pte; }
281 extern inline pte_t pte_mkread(pte_t pte) { pte_val(pte) |= _PAGE_USER; return pte; }
282 extern inline pte_t pte_mkexec(pte_t pte) { pte_val(pte) |= _PAGE_USER; return pte; }
283 extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; }
284 extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
285 extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_RW; return pte; }
288 * Conversion functions: convert a page and protection to a page entry,
289 * and a page entry and page directory to the page they refer to.
291 #define mk_pte(page, pgprot) \
292 ({ pte_t __pte; pte_val(__pte) = __pa(page) + pgprot_val(pgprot); __pte; })
294 /* This takes a physical page address that is used by the remapping functions */
295 #define mk_pte_phys(physpage, pgprot) \
296 ({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
298 extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
299 { pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
301 #define pte_page(pte) \
302 ((unsigned long) __va(pte_val(pte) & PAGE_MASK))
304 #define pmd_page(pmd) \
305 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
307 /* to find an entry in a page-table-directory */
308 #define pgd_offset(mm, address) \
309 ((mm)->pgd + ((address) >> PGDIR_SHIFT))
311 /* to find an entry in a kernel page-table-directory */
312 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
314 /* Find an entry in the second-level page table.. */
315 extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
317 return (pmd_t *) dir;
320 /* Find an entry in the third-level page table.. */
321 #define pte_offset(pmd, address) \
322 ((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2))))
325 * Allocate and free page tables. The xxx_kernel() versions are
326 * used to allocate a kernel page table - this turns on ASN bits
327 * if any.
330 #define pgd_quicklist (current_cpu_data.pgd_quick)
331 #define pmd_quicklist ((unsigned long *)0)
332 #define pte_quicklist (current_cpu_data.pte_quick)
333 #define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
335 extern __inline__ pgd_t *get_pgd_slow(void)
337 pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL);
339 if (ret) {
340 memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
341 memcpy(ret + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
343 return ret;
346 extern __inline__ pgd_t *get_pgd_fast(void)
348 unsigned long *ret;
350 if ((ret = pgd_quicklist) != NULL) {
351 pgd_quicklist = (unsigned long *)(*ret);
352 ret[0] = 0;
353 pgtable_cache_size--;
354 } else
355 ret = (unsigned long *)get_pgd_slow();
356 return (pgd_t *)ret;
359 extern __inline__ void free_pgd_fast(pgd_t *pgd)
361 *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
362 pgd_quicklist = (unsigned long *) pgd;
363 pgtable_cache_size++;
366 extern __inline__ void free_pgd_slow(pgd_t *pgd)
368 free_page((unsigned long)pgd);
371 extern pte_t *get_pte_slow(pmd_t *pmd, unsigned long address_preadjusted);
372 extern pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long address_preadjusted);
374 extern __inline__ pte_t *get_pte_fast(void)
376 unsigned long *ret;
378 if((ret = (unsigned long *)pte_quicklist) != NULL) {
379 pte_quicklist = (unsigned long *)(*ret);
380 ret[0] = ret[1];
381 pgtable_cache_size--;
383 return (pte_t *)ret;
386 extern __inline__ void free_pte_fast(pte_t *pte)
388 *(unsigned long *)pte = (unsigned long) pte_quicklist;
389 pte_quicklist = (unsigned long *) pte;
390 pgtable_cache_size++;
393 extern __inline__ void free_pte_slow(pte_t *pte)
395 free_page((unsigned long)pte);
398 /* We don't use pmd cache, so these are dummy routines */
399 extern __inline__ pmd_t *get_pmd_fast(void)
401 return (pmd_t *)0;
404 extern __inline__ void free_pmd_fast(pmd_t *pmd)
408 extern __inline__ void free_pmd_slow(pmd_t *pmd)
412 extern void __bad_pte(pmd_t *pmd);
413 extern void __bad_pte_kernel(pmd_t *pmd);
415 #define pte_free_kernel(pte) free_pte_slow(pte)
416 #define pte_free(pte) free_pte_slow(pte)
417 #define pgd_free(pgd) free_pgd_slow(pgd)
418 #define pgd_alloc() get_pgd_fast()
420 extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address)
422 address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
423 if (pmd_none(*pmd)) {
424 pte_t * page = (pte_t *) get_pte_fast();
426 if (!page)
427 return get_pte_kernel_slow(pmd, address);
428 pmd_val(*pmd) = _KERNPG_TABLE + __pa(page);
429 return page + address;
431 if (pmd_bad(*pmd)) {
432 __bad_pte_kernel(pmd);
433 return NULL;
435 return (pte_t *) pmd_page(*pmd) + address;
438 extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address)
440 address = (address >> (PAGE_SHIFT-2)) & 4*(PTRS_PER_PTE - 1);
442 if (pmd_none(*pmd))
443 goto getnew;
444 if (pmd_bad(*pmd))
445 goto fix;
446 return (pte_t *) (pmd_page(*pmd) + address);
447 getnew:
449 unsigned long page = (unsigned long) get_pte_fast();
451 if (!page)
452 return get_pte_slow(pmd, address);
453 pmd_val(*pmd) = _PAGE_TABLE + __pa(page);
454 return (pte_t *) (page + address);
456 fix:
457 __bad_pte(pmd);
458 return NULL;
462 * allocating and freeing a pmd is trivial: the 1-entry pmd is
463 * inside the pgd, so has no extra memory associated with it.
465 extern inline void pmd_free(pmd_t * pmd)
469 extern inline pmd_t * pmd_alloc(pgd_t * pgd, unsigned long address)
471 return (pmd_t *) pgd;
474 #define pmd_free_kernel pmd_free
475 #define pmd_alloc_kernel pmd_alloc
477 extern int do_check_pgt_cache(int, int);
479 extern inline void set_pgdir(unsigned long address, pgd_t entry)
481 struct task_struct * p;
482 pgd_t *pgd;
483 #ifdef __SMP__
484 int i;
485 #endif
487 read_lock(&tasklist_lock);
488 for_each_task(p) {
489 if (!p->mm)
490 continue;
491 *pgd_offset(p->mm,address) = entry;
493 read_unlock(&tasklist_lock);
494 #ifndef __SMP__
495 for (pgd = (pgd_t *)pgd_quicklist; pgd; pgd = (pgd_t *)*(unsigned long *)pgd)
496 pgd[address >> PGDIR_SHIFT] = entry;
497 #else
498 /* To pgd_alloc/pgd_free, one holds master kernel lock and so does our callee, so we can
499 modify pgd caches of other CPUs as well. -jj */
500 for (i = 0; i < NR_CPUS; i++)
501 for (pgd = (pgd_t *)cpu_data[i].pgd_quick; pgd; pgd = (pgd_t *)*(unsigned long *)pgd)
502 pgd[address >> PGDIR_SHIFT] = entry;
503 #endif
507 * The i386 doesn't have any external MMU info: the kernel page
508 * tables contain all the necessary information.
510 extern inline void update_mmu_cache(struct vm_area_struct * vma,
511 unsigned long address, pte_t pte)
515 #define SWP_TYPE(entry) (((entry) >> 1) & 0x3f)
516 #define SWP_OFFSET(entry) ((entry) >> 8)
517 #define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << 8))
519 #define module_map vmalloc
520 #define module_unmap vfree
522 #endif /* !__ASSEMBLY__ */
524 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
525 #define PageSkip(page) (0)
526 #define kern_addr_valid(addr) (1)
528 #define io_remap_page_range remap_page_range
530 #endif /* _I386_PAGE_H */