Linux 2.2.0
[davej-history.git] / arch / i386 / mm / init.c
blobdc96ad4bb78fba5e21671e0118ae8f5c7cec9871
1 /*
2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/signal.h>
9 #include <linux/sched.h>
10 #include <linux/kernel.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/types.h>
14 #include <linux/ptrace.h>
15 #include <linux/mman.h>
16 #include <linux/mm.h>
17 #include <linux/swap.h>
18 #include <linux/smp.h>
19 #include <linux/init.h>
20 #ifdef CONFIG_BLK_DEV_INITRD
21 #include <linux/blk.h>
22 #endif
24 #include <asm/processor.h>
25 #include <asm/system.h>
26 #include <asm/uaccess.h>
27 #include <asm/pgtable.h>
28 #include <asm/dma.h>
29 #include <asm/fixmap.h>
31 extern void show_net_buffers(void);
32 extern unsigned long init_smp_mappings(unsigned long);
34 void __bad_pte_kernel(pmd_t *pmd)
36 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
37 pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
40 void __bad_pte(pmd_t *pmd)
42 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
43 pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
46 pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
48 pte_t *pte;
50 pte = (pte_t *) __get_free_page(GFP_KERNEL);
51 if (pmd_none(*pmd)) {
52 if (pte) {
53 clear_page((unsigned long)pte);
54 pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
55 return pte + offset;
57 pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
58 return NULL;
60 free_page((unsigned long)pte);
61 if (pmd_bad(*pmd)) {
62 __bad_pte_kernel(pmd);
63 return NULL;
65 return (pte_t *) pmd_page(*pmd) + offset;
68 pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
70 unsigned long pte;
72 pte = (unsigned long) __get_free_page(GFP_KERNEL);
73 if (pmd_none(*pmd)) {
74 if (pte) {
75 clear_page(pte);
76 pmd_val(*pmd) = _PAGE_TABLE + __pa(pte);
77 return (pte_t *)(pte + offset);
79 pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
80 return NULL;
82 free_page(pte);
83 if (pmd_bad(*pmd)) {
84 __bad_pte(pmd);
85 return NULL;
87 return (pte_t *) (pmd_page(*pmd) + offset);
90 int do_check_pgt_cache(int low, int high)
92 int freed = 0;
93 if(pgtable_cache_size > high) {
94 do {
95 if(pgd_quicklist)
96 free_pgd_slow(get_pgd_fast()), freed++;
97 if(pmd_quicklist)
98 free_pmd_slow(get_pmd_fast()), freed++;
99 if(pte_quicklist)
100 free_pte_slow(get_pte_fast()), freed++;
101 } while(pgtable_cache_size > low);
103 return freed;
107 * BAD_PAGE is the page that is used for page faults when linux
108 * is out-of-memory. Older versions of linux just did a
109 * do_exit(), but using this instead means there is less risk
110 * for a process dying in kernel mode, possibly leaving an inode
111 * unused etc..
113 * BAD_PAGETABLE is the accompanying page-table: it is initialized
114 * to point to BAD_PAGE entries.
116 * ZERO_PAGE is a special page that is used for zero-initialized
117 * data and COW.
119 pte_t * __bad_pagetable(void)
121 extern char empty_bad_page_table[PAGE_SIZE];
122 int d0, d1;
124 __asm__ __volatile__("cld ; rep ; stosl"
125 : "=&D" (d0), "=&c" (d1)
126 : "a" (pte_val(BAD_PAGE)),
127 "0" ((long) empty_bad_page_table),
128 "1" (PAGE_SIZE/4)
129 : "memory");
130 return (pte_t *) empty_bad_page_table;
133 pte_t __bad_page(void)
135 extern char empty_bad_page[PAGE_SIZE];
136 int d0, d1;
138 __asm__ __volatile__("cld ; rep ; stosl"
139 : "=&D" (d0), "=&c" (d1)
140 : "a" (0),
141 "0" ((long) empty_bad_page),
142 "1" (PAGE_SIZE/4)
143 : "memory");
144 return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED));
147 void show_mem(void)
149 int i,free = 0,total = 0,reserved = 0;
150 int shared = 0, cached = 0;
152 printk("Mem-info:\n");
153 show_free_areas();
154 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
155 i = max_mapnr;
156 while (i-- > 0) {
157 total++;
158 if (PageReserved(mem_map+i))
159 reserved++;
160 else if (PageSwapCache(mem_map+i))
161 cached++;
162 else if (!atomic_read(&mem_map[i].count))
163 free++;
164 else
165 shared += atomic_read(&mem_map[i].count) - 1;
167 printk("%d pages of RAM\n",total);
168 printk("%d reserved pages\n",reserved);
169 printk("%d pages shared\n",shared);
170 printk("%d pages swap cached\n",cached);
171 printk("%ld pages in page table cache\n",pgtable_cache_size);
172 show_buffers();
173 #ifdef CONFIG_NET
174 show_net_buffers();
175 #endif
178 extern unsigned long free_area_init(unsigned long, unsigned long);
180 /* References to section boundaries */
182 extern char _text, _etext, _edata, __bss_start, _end;
183 extern char __init_begin, __init_end;
185 #define X86_CR4_VME 0x0001 /* enable vm86 extensions */
186 #define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
187 #define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
188 #define X86_CR4_DE 0x0008 /* enable debugging extensions */
189 #define X86_CR4_PSE 0x0010 /* enable page size extensions */
190 #define X86_CR4_PAE 0x0020 /* enable physical address extensions */
191 #define X86_CR4_MCE 0x0040 /* Machine check enable */
192 #define X86_CR4_PGE 0x0080 /* enable global pages */
193 #define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
196 * Save the cr4 feature set we're using (ie
197 * Pentium 4MB enable and PPro Global page
198 * enable), so that any CPU's that boot up
199 * after us can get the correct flags.
201 unsigned long mmu_cr4_features __initdata = 0;
203 static inline void set_in_cr4(unsigned long mask)
205 mmu_cr4_features |= mask;
206 __asm__("movl %%cr4,%%eax\n\t"
207 "orl %0,%%eax\n\t"
208 "movl %%eax,%%cr4\n"
209 : : "irg" (mask)
210 :"ax");
214 * allocate page table(s) for compile-time fixed mappings
216 static unsigned long __init fixmap_init(unsigned long start_mem)
218 pgd_t * pg_dir;
219 unsigned int idx;
220 unsigned long address;
222 start_mem = PAGE_ALIGN(start_mem);
224 for (idx=1; idx <= __end_of_fixed_addresses; idx += PTRS_PER_PTE)
226 address = __fix_to_virt(__end_of_fixed_addresses-idx);
227 pg_dir = swapper_pg_dir + (address >> PGDIR_SHIFT);
228 memset((void *)start_mem, 0, PAGE_SIZE);
229 pgd_val(*pg_dir) = _PAGE_TABLE | __pa(start_mem);
230 start_mem += PAGE_SIZE;
233 return start_mem;
236 static void set_pte_phys (unsigned long vaddr, unsigned long phys)
238 pgprot_t prot;
239 pte_t * pte;
241 pte = pte_offset(pmd_offset(pgd_offset_k(vaddr), vaddr), vaddr);
242 prot = PAGE_KERNEL;
243 if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
244 pgprot_val(prot) |= _PAGE_GLOBAL;
245 set_pte(pte, mk_pte_phys(phys, prot));
247 local_flush_tlb();
250 void set_fixmap (enum fixed_addresses idx, unsigned long phys)
252 unsigned long address = __fix_to_virt(idx);
254 if (idx >= __end_of_fixed_addresses) {
255 printk("Invalid set_fixmap\n");
256 return;
258 set_pte_phys (address,phys);
262 * paging_init() sets up the page tables - note that the first 4MB are
263 * already mapped by head.S.
265 * This routines also unmaps the page at virtual kernel address 0, so
266 * that we can trap those pesky NULL-reference errors in the kernel.
268 __initfunc(unsigned long paging_init(unsigned long start_mem, unsigned long end_mem))
270 pgd_t * pg_dir;
271 pte_t * pg_table;
272 unsigned long tmp;
273 unsigned long address;
276 * Physical page 0 is special; it's not touched by Linux since BIOS
277 * and SMM (for laptops with [34]86/SL chips) may need it. It is read
278 * and write protected to detect null pointer references in the
279 * kernel.
280 * It may also hold the MP configuration table when we are booting SMP.
282 start_mem = PAGE_ALIGN(start_mem);
283 address = PAGE_OFFSET;
284 pg_dir = swapper_pg_dir;
285 /* unmap the original low memory mappings */
286 pgd_val(pg_dir[0]) = 0;
288 /* Map whole memory from PAGE_OFFSET */
289 pg_dir += USER_PGD_PTRS;
290 while (address < end_mem) {
292 * If we're running on a Pentium CPU, we can use the 4MB
293 * page tables.
295 * The page tables we create span up to the next 4MB
296 * virtual memory boundary, but that's OK as we won't
297 * use that memory anyway.
299 if (boot_cpu_data.x86_capability & X86_FEATURE_PSE) {
300 unsigned long __pe;
302 set_in_cr4(X86_CR4_PSE);
303 boot_cpu_data.wp_works_ok = 1;
304 __pe = _KERNPG_TABLE + _PAGE_4M + __pa(address);
305 /* Make it "global" too if supported */
306 if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) {
307 set_in_cr4(X86_CR4_PGE);
308 __pe += _PAGE_GLOBAL;
310 pgd_val(*pg_dir) = __pe;
311 pg_dir++;
312 address += 4*1024*1024;
313 continue;
317 * We're on a [34]86, use normal page tables.
318 * pg_table is physical at this point
320 pg_table = (pte_t *) (PAGE_MASK & pgd_val(*pg_dir));
321 if (!pg_table) {
322 pg_table = (pte_t *) __pa(start_mem);
323 start_mem += PAGE_SIZE;
326 pgd_val(*pg_dir) = _PAGE_TABLE | (unsigned long) pg_table;
327 pg_dir++;
329 /* now change pg_table to kernel virtual addresses */
330 pg_table = (pte_t *) __va(pg_table);
331 for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
332 pte_t pte = mk_pte(address, PAGE_KERNEL);
333 if (address >= end_mem)
334 pte_val(pte) = 0;
335 set_pte(pg_table, pte);
336 address += PAGE_SIZE;
339 start_mem = fixmap_init(start_mem);
340 #ifdef __SMP__
341 start_mem = init_smp_mappings(start_mem);
342 #endif
343 local_flush_tlb();
345 return free_area_init(start_mem, end_mem);
349 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
350 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
351 * before and after the test are here to work-around some nasty CPU bugs.
354 __initfunc(void test_wp_bit(void))
356 unsigned char tmp_reg;
357 unsigned long old = pg0[0];
359 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
360 pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_READONLY));
361 local_flush_tlb();
362 current->mm->mmap->vm_start += PAGE_SIZE;
363 __asm__ __volatile__(
364 "jmp 1f; 1:\n"
365 "movb %0,%1\n"
366 "movb %1,%0\n"
367 "jmp 1f; 1:\n"
368 :"=m" (*(char *) __va(0)),
369 "=q" (tmp_reg)
370 :/* no inputs */
371 :"memory");
372 pg0[0] = old;
373 local_flush_tlb();
374 current->mm->mmap->vm_start -= PAGE_SIZE;
375 if (boot_cpu_data.wp_works_ok < 0) {
376 boot_cpu_data.wp_works_ok = 0;
377 printk("No.\n");
378 #ifdef CONFIG_X86_WP_WORKS_OK
379 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
380 #endif
381 } else
382 printk(".\n");
385 __initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem))
387 unsigned long start_low_mem = PAGE_SIZE;
388 int codepages = 0;
389 int reservedpages = 0;
390 int datapages = 0;
391 int initpages = 0;
392 unsigned long tmp;
394 end_mem &= PAGE_MASK;
395 high_memory = (void *) end_mem;
396 max_mapnr = num_physpages = MAP_NR(end_mem);
398 /* clear the zero-page */
399 memset(empty_zero_page, 0, PAGE_SIZE);
401 /* mark usable pages in the mem_map[] */
402 start_low_mem = PAGE_ALIGN(start_low_mem)+PAGE_OFFSET;
404 #ifdef __SMP__
406 * But first pinch a few for the stack/trampoline stuff
407 * FIXME: Don't need the extra page at 4K, but need to fix
408 * trampoline before removing it. (see the GDT stuff)
411 start_low_mem += PAGE_SIZE; /* 32bit startup code */
412 start_low_mem = smp_alloc_memory(start_low_mem); /* AP processor stacks */
413 #endif
414 start_mem = PAGE_ALIGN(start_mem);
417 * IBM messed up *AGAIN* in their thinkpad: 0xA0000 -> 0x9F000.
418 * They seem to have done something stupid with the floppy
419 * controller as well..
421 while (start_low_mem < 0x9f000+PAGE_OFFSET) {
422 clear_bit(PG_reserved, &mem_map[MAP_NR(start_low_mem)].flags);
423 start_low_mem += PAGE_SIZE;
426 while (start_mem < end_mem) {
427 clear_bit(PG_reserved, &mem_map[MAP_NR(start_mem)].flags);
428 start_mem += PAGE_SIZE;
430 for (tmp = PAGE_OFFSET ; tmp < end_mem ; tmp += PAGE_SIZE) {
431 if (tmp >= MAX_DMA_ADDRESS)
432 clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags);
433 if (PageReserved(mem_map+MAP_NR(tmp))) {
434 if (tmp >= (unsigned long) &_text && tmp < (unsigned long) &_edata) {
435 if (tmp < (unsigned long) &_etext)
436 codepages++;
437 else
438 datapages++;
439 } else if (tmp >= (unsigned long) &__init_begin
440 && tmp < (unsigned long) &__init_end)
441 initpages++;
442 else if (tmp >= (unsigned long) &__bss_start
443 && tmp < (unsigned long) start_mem)
444 datapages++;
445 else
446 reservedpages++;
447 continue;
449 atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
450 #ifdef CONFIG_BLK_DEV_INITRD
451 if (!initrd_start || (tmp < initrd_start || tmp >=
452 initrd_end))
453 #endif
454 free_page(tmp);
456 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
457 (unsigned long) nr_free_pages << (PAGE_SHIFT-10),
458 max_mapnr << (PAGE_SHIFT-10),
459 codepages << (PAGE_SHIFT-10),
460 reservedpages << (PAGE_SHIFT-10),
461 datapages << (PAGE_SHIFT-10),
462 initpages << (PAGE_SHIFT-10));
464 if (boot_cpu_data.wp_works_ok < 0)
465 test_wp_bit();
468 void free_initmem(void)
470 unsigned long addr;
472 addr = (unsigned long)(&__init_begin);
473 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
474 mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
475 atomic_set(&mem_map[MAP_NR(addr)].count, 1);
476 free_page(addr);
478 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
481 void si_meminfo(struct sysinfo *val)
483 int i;
485 i = max_mapnr;
486 val->totalram = 0;
487 val->sharedram = 0;
488 val->freeram = nr_free_pages << PAGE_SHIFT;
489 val->bufferram = buffermem;
490 while (i-- > 0) {
491 if (PageReserved(mem_map+i))
492 continue;
493 val->totalram++;
494 if (!atomic_read(&mem_map[i].count))
495 continue;
496 val->sharedram += atomic_read(&mem_map[i].count) - 1;
498 val->totalram <<= PAGE_SHIFT;
499 val->sharedram <<= PAGE_SHIFT;
500 return;