Import 2.1.118
[davej-history.git] / arch / i386 / mm / init.c
blobb5febaf59c7fa0110aacec478f0ebef7a68cbcd0
1 /*
2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/signal.h>
9 #include <linux/sched.h>
10 #include <linux/head.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #ifdef CONFIG_BLK_DEV_INITRD
22 #include <linux/blk.h>
23 #endif
25 #include <asm/processor.h>
26 #include <asm/system.h>
27 #include <asm/uaccess.h>
28 #include <asm/pgtable.h>
29 #include <asm/dma.h>
30 #include <asm/fixmap.h>
32 extern void show_net_buffers(void);
33 extern unsigned long init_smp_mappings(unsigned long);
35 void __bad_pte_kernel(pmd_t *pmd)
37 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
38 pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
41 void __bad_pte(pmd_t *pmd)
43 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
44 pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
47 pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
49 pte_t *pte;
51 pte = (pte_t *) __get_free_page(GFP_KERNEL);
52 if (pmd_none(*pmd)) {
53 if (pte) {
54 clear_page((unsigned long)pte);
55 pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
56 return pte + offset;
58 pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
59 return NULL;
61 free_page((unsigned long)pte);
62 if (pmd_bad(*pmd)) {
63 __bad_pte_kernel(pmd);
64 return NULL;
66 return (pte_t *) pmd_page(*pmd) + offset;
69 pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
71 unsigned long pte;
73 pte = (unsigned long) __get_free_page(GFP_KERNEL);
74 if (pmd_none(*pmd)) {
75 if (pte) {
76 clear_page(pte);
77 pmd_val(*pmd) = _PAGE_TABLE + __pa(pte);
78 return (pte_t *)(pte + offset);
80 pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
81 return NULL;
83 free_page(pte);
84 if (pmd_bad(*pmd)) {
85 __bad_pte(pmd);
86 return NULL;
88 return (pte_t *) (pmd_page(*pmd) + offset);
91 int do_check_pgt_cache(int low, int high)
93 int freed = 0;
94 if(pgtable_cache_size > high) {
95 do {
96 if(pgd_quicklist)
97 free_pgd_slow(get_pgd_fast()), freed++;
98 if(pmd_quicklist)
99 free_pmd_slow(get_pmd_fast()), freed++;
100 if(pte_quicklist)
101 free_pte_slow(get_pte_fast()), freed++;
102 } while(pgtable_cache_size > low);
104 return freed;
108 * BAD_PAGE is the page that is used for page faults when linux
109 * is out-of-memory. Older versions of linux just did a
110 * do_exit(), but using this instead means there is less risk
111 * for a process dying in kernel mode, possibly leaving an inode
112 * unused etc..
114 * BAD_PAGETABLE is the accompanying page-table: it is initialized
115 * to point to BAD_PAGE entries.
117 * ZERO_PAGE is a special page that is used for zero-initialized
118 * data and COW.
120 pte_t * __bad_pagetable(void)
122 extern char empty_bad_page_table[PAGE_SIZE];
124 __asm__ __volatile__("cld ; rep ; stosl":
125 :"a" (pte_val(BAD_PAGE)),
126 "D" ((long) empty_bad_page_table),
127 "c" (PAGE_SIZE/4)
128 :"di","cx");
129 return (pte_t *) empty_bad_page_table;
132 pte_t __bad_page(void)
134 extern char empty_bad_page[PAGE_SIZE];
136 __asm__ __volatile__("cld ; rep ; stosl":
137 :"a" (0),
138 "D" ((long) empty_bad_page),
139 "c" (PAGE_SIZE/4)
140 :"di","cx");
141 return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED));
144 void show_mem(void)
146 int i,free = 0,total = 0,reserved = 0;
147 int shared = 0, cached = 0;
149 printk("Mem-info:\n");
150 show_free_areas();
151 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
152 i = max_mapnr;
153 while (i-- > 0) {
154 total++;
155 if (PageReserved(mem_map+i))
156 reserved++;
157 else if (PageSwapCache(mem_map+i))
158 cached++;
159 else if (!atomic_read(&mem_map[i].count))
160 free++;
161 else
162 shared += atomic_read(&mem_map[i].count) - 1;
164 printk("%d pages of RAM\n",total);
165 printk("%d reserved pages\n",reserved);
166 printk("%d pages shared\n",shared);
167 printk("%d pages swap cached\n",cached);
168 printk("%ld pages in page table cache\n",pgtable_cache_size);
169 show_buffers();
170 #ifdef CONFIG_NET
171 show_net_buffers();
172 #endif
175 extern unsigned long free_area_init(unsigned long, unsigned long);
177 /* References to section boundaries */
179 extern char _text, _etext, _edata, __bss_start, _end;
180 extern char __init_begin, __init_end;
182 #define X86_CR4_VME 0x0001 /* enable vm86 extensions */
183 #define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
184 #define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
185 #define X86_CR4_DE 0x0008 /* enable debugging extensions */
186 #define X86_CR4_PSE 0x0010 /* enable page size extensions */
187 #define X86_CR4_PAE 0x0020 /* enable physical address extensions */
188 #define X86_CR4_MCE 0x0040 /* Machine check enable */
189 #define X86_CR4_PGE 0x0080 /* enable global pages */
190 #define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
193 * Save the cr4 feature set we're using (ie
194 * Pentium 4MB enable and PPro Global page
195 * enable), so that any CPU's that boot up
196 * after us can get the correct flags.
198 unsigned long mmu_cr4_features __initdata = 0;
200 static inline void set_in_cr4(unsigned long mask)
202 mmu_cr4_features |= mask;
203 __asm__("movl %%cr4,%%eax\n\t"
204 "orl %0,%%eax\n\t"
205 "movl %%eax,%%cr4\n"
206 : : "irg" (mask)
207 :"ax");
211 * allocate page table(s) for compile-time fixed mappings
213 static unsigned long __init fixmap_init(unsigned long start_mem)
215 pgd_t * pg_dir;
216 unsigned int idx;
217 unsigned long address;
219 start_mem = PAGE_ALIGN(start_mem);
221 for (idx=1; idx <= __end_of_fixed_addresses; idx += PTRS_PER_PTE)
223 address = __fix_to_virt(__end_of_fixed_addresses-idx);
224 pg_dir = swapper_pg_dir + (address >> PGDIR_SHIFT);
225 memset((void *)start_mem, 0, PAGE_SIZE);
226 pgd_val(*pg_dir) = _PAGE_TABLE | __pa(start_mem);
227 start_mem += PAGE_SIZE;
230 return start_mem;
233 static void set_pte_phys (unsigned long vaddr, unsigned long phys)
235 pgprot_t prot;
236 pte_t * pte;
238 pte = pte_offset(pmd_offset(pgd_offset_k(vaddr), vaddr), vaddr);
239 prot = PAGE_KERNEL;
240 if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
241 pgprot_val(prot) |= _PAGE_GLOBAL;
242 set_pte(pte, mk_pte_phys(phys, prot));
244 local_flush_tlb();
247 void set_fixmap (enum fixed_addresses idx, unsigned long phys)
249 unsigned long address = __fix_to_virt(idx);
251 if (idx >= __end_of_fixed_addresses) {
252 printk("Invalid set_fixmap\n");
253 return;
255 set_pte_phys (address,phys);
259 * paging_init() sets up the page tables - note that the first 4MB are
260 * already mapped by head.S.
262 * This routines also unmaps the page at virtual kernel address 0, so
263 * that we can trap those pesky NULL-reference errors in the kernel.
265 __initfunc(unsigned long paging_init(unsigned long start_mem, unsigned long end_mem))
267 pgd_t * pg_dir;
268 pte_t * pg_table;
269 unsigned long tmp;
270 unsigned long address;
273 * Physical page 0 is special; it's not touched by Linux since BIOS
274 * and SMM (for laptops with [34]86/SL chips) may need it. It is read
275 * and write protected to detect null pointer references in the
276 * kernel.
277 * It may also hold the MP configuration table when we are booting SMP.
279 #ifdef __SMP__
281 * FIXME: Linux assumes you have 640K of base ram..
282 * this continues the error...
284 * 1) Scan the bottom 1K for a signature
285 * 2) Scan the top 1K of base RAM
286 * 3) Scan the 64K of bios
288 if (!smp_scan_config(0x0,0x400) &&
289 !smp_scan_config(639*0x400,0x400) &&
290 !smp_scan_config(0xF0000,0x10000)) {
292 * If it is an SMP machine we should know now, unless the
293 * configuration is in an EISA/MCA bus machine with an
294 * extended bios data area.
296 * there is a real-mode segmented pointer pointing to the
297 * 4K EBDA area at 0x40E, calculate and scan it here:
299 address = *(unsigned short *)phys_to_virt(0x40E);
300 address<<=4;
301 smp_scan_config(address, 0x1000);
303 #endif
304 start_mem = PAGE_ALIGN(start_mem);
305 address = PAGE_OFFSET;
306 pg_dir = swapper_pg_dir;
307 /* unmap the original low memory mappings */
308 pgd_val(pg_dir[0]) = 0;
310 /* Map whole memory from PAGE_OFFSET */
311 pg_dir += USER_PGD_PTRS;
312 while (address < end_mem) {
314 * If we're running on a Pentium CPU, we can use the 4MB
315 * page tables.
317 * The page tables we create span up to the next 4MB
318 * virtual memory boundary, but that's OK as we won't
319 * use that memory anyway.
321 if (boot_cpu_data.x86_capability & X86_FEATURE_PSE) {
322 unsigned long __pe;
324 set_in_cr4(X86_CR4_PSE);
325 boot_cpu_data.wp_works_ok = 1;
326 __pe = _KERNPG_TABLE + _PAGE_4M + __pa(address);
327 /* Make it "global" too if supported */
328 if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) {
329 set_in_cr4(X86_CR4_PGE);
330 __pe += _PAGE_GLOBAL;
332 pgd_val(*pg_dir) = __pe;
333 pg_dir++;
334 address += 4*1024*1024;
335 continue;
339 * We're on a [34]86, use normal page tables.
340 * pg_table is physical at this point
342 pg_table = (pte_t *) (PAGE_MASK & pgd_val(*pg_dir));
343 if (!pg_table) {
344 pg_table = (pte_t *) __pa(start_mem);
345 start_mem += PAGE_SIZE;
348 pgd_val(*pg_dir) = _PAGE_TABLE | (unsigned long) pg_table;
349 pg_dir++;
351 /* now change pg_table to kernel virtual addresses */
352 pg_table = (pte_t *) __va(pg_table);
353 for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
354 pte_t pte = mk_pte(address, PAGE_KERNEL);
355 if (address >= end_mem)
356 pte_val(pte) = 0;
357 set_pte(pg_table, pte);
358 address += PAGE_SIZE;
361 start_mem = fixmap_init(start_mem);
362 #ifdef __SMP__
363 start_mem = init_smp_mappings(start_mem);
364 #endif
365 local_flush_tlb();
367 return free_area_init(start_mem, end_mem);
371 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
372 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
373 * before and after the test are here to work-around some nasty CPU bugs.
376 __initfunc(void test_wp_bit(void))
378 unsigned char tmp_reg;
379 unsigned long old = pg0[0];
381 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
382 pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_READONLY));
383 local_flush_tlb();
384 current->mm->mmap->vm_start += PAGE_SIZE;
385 __asm__ __volatile__(
386 "jmp 1f; 1:\n"
387 "movb %0,%1\n"
388 "movb %1,%0\n"
389 "jmp 1f; 1:\n"
390 :"=m" (*(char *) __va(0)),
391 "=q" (tmp_reg)
392 :/* no inputs */
393 :"memory");
394 pg0[0] = old;
395 local_flush_tlb();
396 current->mm->mmap->vm_start -= PAGE_SIZE;
397 if (boot_cpu_data.wp_works_ok < 0) {
398 boot_cpu_data.wp_works_ok = 0;
399 printk("No.\n");
400 #ifndef CONFIG_M386
401 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
402 #endif
403 } else
404 printk(".\n");
407 __initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem))
409 unsigned long start_low_mem = PAGE_SIZE;
410 int codepages = 0;
411 int reservedpages = 0;
412 int datapages = 0;
413 int initpages = 0;
414 unsigned long tmp;
416 end_mem &= PAGE_MASK;
417 high_memory = (void *) end_mem;
418 max_mapnr = num_physpages = MAP_NR(end_mem);
420 /* clear the zero-page */
421 memset(empty_zero_page, 0, PAGE_SIZE);
423 /* mark usable pages in the mem_map[] */
424 start_low_mem = PAGE_ALIGN(start_low_mem)+PAGE_OFFSET;
426 #ifdef __SMP__
428 * But first pinch a few for the stack/trampoline stuff
429 * FIXME: Don't need the extra page at 4K, but need to fix
430 * trampoline before removing it. (see the GDT stuff)
433 start_low_mem += PAGE_SIZE; /* 32bit startup code */
434 start_low_mem = smp_alloc_memory(start_low_mem); /* AP processor stacks */
435 #endif
436 start_mem = PAGE_ALIGN(start_mem);
439 * IBM messed up *AGAIN* in their thinkpad: 0xA0000 -> 0x9F000.
440 * They seem to have done something stupid with the floppy
441 * controller as well..
443 while (start_low_mem < 0x9f000+PAGE_OFFSET) {
444 clear_bit(PG_reserved, &mem_map[MAP_NR(start_low_mem)].flags);
445 start_low_mem += PAGE_SIZE;
448 while (start_mem < end_mem) {
449 clear_bit(PG_reserved, &mem_map[MAP_NR(start_mem)].flags);
450 start_mem += PAGE_SIZE;
452 for (tmp = PAGE_OFFSET ; tmp < end_mem ; tmp += PAGE_SIZE) {
453 if (tmp >= MAX_DMA_ADDRESS)
454 clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags);
455 if (PageReserved(mem_map+MAP_NR(tmp))) {
456 if (tmp >= (unsigned long) &_text && tmp < (unsigned long) &_edata) {
457 if (tmp < (unsigned long) &_etext)
458 codepages++;
459 else
460 datapages++;
461 } else if (tmp >= (unsigned long) &__init_begin
462 && tmp < (unsigned long) &__init_end)
463 initpages++;
464 else if (tmp >= (unsigned long) &__bss_start
465 && tmp < (unsigned long) start_mem)
466 datapages++;
467 else
468 reservedpages++;
469 continue;
471 atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
472 #ifdef CONFIG_BLK_DEV_INITRD
473 if (!initrd_start || (tmp < initrd_start || tmp >=
474 initrd_end))
475 #endif
476 free_page(tmp);
478 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
479 (unsigned long) nr_free_pages << (PAGE_SHIFT-10),
480 max_mapnr << (PAGE_SHIFT-10),
481 codepages << (PAGE_SHIFT-10),
482 reservedpages << (PAGE_SHIFT-10),
483 datapages << (PAGE_SHIFT-10),
484 initpages << (PAGE_SHIFT-10));
486 if (boot_cpu_data.wp_works_ok < 0)
487 test_wp_bit();
490 void free_initmem(void)
492 unsigned long addr;
494 addr = (unsigned long)(&__init_begin);
495 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
496 mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
497 atomic_set(&mem_map[MAP_NR(addr)].count, 1);
498 free_page(addr);
500 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
503 void si_meminfo(struct sysinfo *val)
505 int i;
507 i = max_mapnr;
508 val->totalram = 0;
509 val->sharedram = 0;
510 val->freeram = nr_free_pages << PAGE_SHIFT;
511 val->bufferram = buffermem;
512 while (i-- > 0) {
513 if (PageReserved(mem_map+i))
514 continue;
515 val->totalram++;
516 if (!atomic_read(&mem_map[i].count))
517 continue;
518 val->sharedram += atomic_read(&mem_map[i].count) - 1;
520 val->totalram <<= PAGE_SHIFT;
521 val->sharedram <<= PAGE_SHIFT;
522 return;