Import 2.1.116pre2
[davej-history.git] / arch / i386 / mm / init.c
blobb4cba8730475b2ea80d8602b62669ef7f5c2029f
1 /*
2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/signal.h>
9 #include <linux/sched.h>
10 #include <linux/head.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #ifdef CONFIG_BLK_DEV_INITRD
22 #include <linux/blk.h>
23 #endif
25 #include <asm/processor.h>
26 #include <asm/system.h>
27 #include <asm/uaccess.h>
28 #include <asm/pgtable.h>
29 #include <asm/dma.h>
30 #include <asm/fixmap.h>
32 extern void show_net_buffers(void);
33 extern unsigned long init_smp_mappings(unsigned long);
35 void __bad_pte_kernel(pmd_t *pmd)
37 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
38 pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
41 void __bad_pte(pmd_t *pmd)
43 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
44 pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
47 pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
49 pte_t *pte;
51 pte = (pte_t *) __get_free_page(GFP_KERNEL);
52 if (pmd_none(*pmd)) {
53 if (pte) {
54 clear_page((unsigned long)pte);
55 pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
56 return pte + offset;
58 pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
59 return NULL;
61 free_page((unsigned long)pte);
62 if (pmd_bad(*pmd)) {
63 __bad_pte_kernel(pmd);
64 return NULL;
66 return (pte_t *) pmd_page(*pmd) + offset;
69 pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
71 unsigned long pte;
73 pte = (unsigned long) __get_free_page(GFP_KERNEL);
74 if (pmd_none(*pmd)) {
75 if (pte) {
76 clear_page(pte);
77 pmd_val(*pmd) = _PAGE_TABLE + __pa(pte);
78 return (pte_t *)(pte + offset);
80 pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
81 return NULL;
83 free_page(pte);
84 if (pmd_bad(*pmd)) {
85 __bad_pte(pmd);
86 return NULL;
88 return (pte_t *) (pmd_page(*pmd) + offset);
91 int do_check_pgt_cache(int low, int high)
93 int freed = 0;
94 if(pgtable_cache_size > high) {
95 do {
96 if(pgd_quicklist)
97 free_pgd_slow(get_pgd_fast()), freed++;
98 if(pmd_quicklist)
99 free_pmd_slow(get_pmd_fast()), freed++;
100 if(pte_quicklist)
101 free_pte_slow(get_pte_fast()), freed++;
102 } while(pgtable_cache_size > low);
104 return freed;
108 * BAD_PAGE is the page that is used for page faults when linux
109 * is out-of-memory. Older versions of linux just did a
110 * do_exit(), but using this instead means there is less risk
111 * for a process dying in kernel mode, possibly leaving an inode
112 * unused etc..
114 * BAD_PAGETABLE is the accompanying page-table: it is initialized
115 * to point to BAD_PAGE entries.
117 * ZERO_PAGE is a special page that is used for zero-initialized
118 * data and COW.
120 pte_t * __bad_pagetable(void)
122 extern char empty_bad_page_table[PAGE_SIZE];
124 __asm__ __volatile__("cld ; rep ; stosl":
125 :"a" (pte_val(BAD_PAGE)),
126 "D" ((long) empty_bad_page_table),
127 "c" (PAGE_SIZE/4)
128 :"di","cx");
129 return (pte_t *) empty_bad_page_table;
132 pte_t __bad_page(void)
134 extern char empty_bad_page[PAGE_SIZE];
136 __asm__ __volatile__("cld ; rep ; stosl":
137 :"a" (0),
138 "D" ((long) empty_bad_page),
139 "c" (PAGE_SIZE/4)
140 :"di","cx");
141 return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED));
144 void show_mem(void)
146 int i,free = 0,total = 0,reserved = 0;
147 int shared = 0, cached = 0;
149 printk("Mem-info:\n");
150 show_free_areas();
151 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
152 i = max_mapnr;
153 while (i-- > 0) {
154 total++;
155 if (PageReserved(mem_map+i))
156 reserved++;
157 else if (PageSwapCache(mem_map+i))
158 cached++;
159 else if (!atomic_read(&mem_map[i].count))
160 free++;
161 else
162 shared += atomic_read(&mem_map[i].count) - 1;
164 printk("%d pages of RAM\n",total);
165 printk("%d reserved pages\n",reserved);
166 printk("%d pages shared\n",shared);
167 printk("%d pages swap cached\n",cached);
168 printk("%ld pages in page table cache\n",pgtable_cache_size);
169 show_buffers();
170 #ifdef CONFIG_NET
171 show_net_buffers();
172 #endif
175 extern unsigned long free_area_init(unsigned long, unsigned long);
177 /* References to section boundaries */
179 extern char _text, _etext, _edata, __bss_start, _end;
180 extern char __init_begin, __init_end;
182 #define X86_CR4_VME 0x0001 /* enable vm86 extensions */
183 #define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
184 #define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
185 #define X86_CR4_DE 0x0008 /* enable debugging extensions */
186 #define X86_CR4_PSE 0x0010 /* enable page size extensions */
187 #define X86_CR4_PAE 0x0020 /* enable physical address extensions */
188 #define X86_CR4_MCE 0x0040 /* Machine check enable */
189 #define X86_CR4_PGE 0x0080 /* enable global pages */
190 #define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
193 * Save the cr4 feature set we're using (ie
194 * Pentium 4MB enable and PPro Global page
195 * enable), so that any CPU's that boot up
196 * after us can get the correct flags.
198 unsigned long mmu_cr4_features __initdata = 0;
200 static inline void set_in_cr4(unsigned long mask)
202 mmu_cr4_features |= mask;
203 __asm__("movl %%cr4,%%eax\n\t"
204 "orl %0,%%eax\n\t"
205 "movl %%eax,%%cr4\n"
206 : : "irg" (mask)
207 :"ax");
211 * allocate page table(s) for compile-time fixed mappings
213 static unsigned long __init fixmap_init(unsigned long start_mem)
215 pgd_t * pg_dir;
216 unsigned int idx;
217 unsigned long address;
219 start_mem = PAGE_ALIGN(start_mem);
221 for (idx=1; idx <= __end_of_fixed_addresses; idx += PTRS_PER_PTE)
223 address = fix_to_virt(__end_of_fixed_addresses-idx);
224 pg_dir = swapper_pg_dir + (address >> PGDIR_SHIFT);
225 memset((void *)start_mem, 0, PAGE_SIZE);
226 pgd_val(*pg_dir) = _PAGE_TABLE | __pa(start_mem);
227 start_mem += PAGE_SIZE;
230 return start_mem;
233 static void set_pte_phys (unsigned long vaddr, unsigned long phys)
235 pgprot_t prot;
236 pte_t * pte;
238 pte = pte_offset(pmd_offset(pgd_offset_k(vaddr), vaddr), vaddr);
239 prot = PAGE_KERNEL;
240 if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
241 pgprot_val(prot) |= _PAGE_GLOBAL;
242 set_pte(pte, mk_pte_phys(phys, prot));
244 local_flush_tlb();
247 void set_fixmap (enum fixed_addresses idx, unsigned long phys)
249 unsigned long address = fix_to_virt(idx);
251 set_pte_phys (address,phys);
255 * paging_init() sets up the page tables - note that the first 4MB are
256 * already mapped by head.S.
258 * This routines also unmaps the page at virtual kernel address 0, so
259 * that we can trap those pesky NULL-reference errors in the kernel.
261 __initfunc(unsigned long paging_init(unsigned long start_mem, unsigned long end_mem))
263 pgd_t * pg_dir;
264 pte_t * pg_table;
265 unsigned long tmp;
266 unsigned long address;
269 * Physical page 0 is special; it's not touched by Linux since BIOS
270 * and SMM (for laptops with [34]86/SL chips) may need it. It is read
271 * and write protected to detect null pointer references in the
272 * kernel.
273 * It may also hold the MP configuration table when we are booting SMP.
275 #ifdef __SMP__
277 * FIXME: Linux assumes you have 640K of base ram..
278 * this continues the error...
280 * 1) Scan the bottom 1K for a signature
281 * 2) Scan the top 1K of base RAM
282 * 3) Scan the 64K of bios
284 if (!smp_scan_config(0x0,0x400) &&
285 !smp_scan_config(639*0x400,0x400) &&
286 !smp_scan_config(0xF0000,0x10000)) {
288 * If it is an SMP machine we should know now, unless the
289 * configuration is in an EISA/MCA bus machine with an
290 * extended bios data area.
292 * there is a real-mode segmented pointer pointing to the
293 * 4K EBDA area at 0x40E, calculate and scan it here:
295 address = *(unsigned short *)phys_to_virt(0x40E);
296 address<<=4;
297 smp_scan_config(address, 0x1000);
299 #endif
300 start_mem = PAGE_ALIGN(start_mem);
301 address = PAGE_OFFSET;
302 pg_dir = swapper_pg_dir;
303 /* unmap the original low memory mappings */
304 pgd_val(pg_dir[0]) = 0;
306 /* Map whole memory from PAGE_OFFSET */
307 pg_dir += USER_PGD_PTRS;
308 while (address < end_mem) {
310 * If we're running on a Pentium CPU, we can use the 4MB
311 * page tables.
313 * The page tables we create span up to the next 4MB
314 * virtual memory boundary, but that's OK as we won't
315 * use that memory anyway.
317 if (boot_cpu_data.x86_capability & X86_FEATURE_PSE) {
318 unsigned long __pe;
320 set_in_cr4(X86_CR4_PSE);
321 boot_cpu_data.wp_works_ok = 1;
322 __pe = _KERNPG_TABLE + _PAGE_4M + __pa(address);
323 /* Make it "global" too if supported */
324 if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) {
325 set_in_cr4(X86_CR4_PGE);
326 __pe += _PAGE_GLOBAL;
328 pgd_val(*pg_dir) = __pe;
329 pg_dir++;
330 address += 4*1024*1024;
331 continue;
335 * We're on a [34]86, use normal page tables.
336 * pg_table is physical at this point
338 pg_table = (pte_t *) (PAGE_MASK & pgd_val(*pg_dir));
339 if (!pg_table) {
340 pg_table = (pte_t *) __pa(start_mem);
341 start_mem += PAGE_SIZE;
344 pgd_val(*pg_dir) = _PAGE_TABLE | (unsigned long) pg_table;
345 pg_dir++;
347 /* now change pg_table to kernel virtual addresses */
348 pg_table = (pte_t *) __va(pg_table);
349 for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
350 pte_t pte = mk_pte(address, PAGE_KERNEL);
351 if (address >= end_mem)
352 pte_val(pte) = 0;
353 set_pte(pg_table, pte);
354 address += PAGE_SIZE;
357 start_mem = fixmap_init(start_mem);
358 #ifdef __SMP__
359 start_mem = init_smp_mappings(start_mem);
360 #endif
361 local_flush_tlb();
363 return free_area_init(start_mem, end_mem);
367 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
368 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
369 * before and after the test are here to work-around some nasty CPU bugs.
372 __initfunc(void test_wp_bit(void))
374 unsigned char tmp_reg;
375 unsigned long old = pg0[0];
377 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
378 pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_READONLY));
379 local_flush_tlb();
380 current->mm->mmap->vm_start += PAGE_SIZE;
381 __asm__ __volatile__(
382 "jmp 1f; 1:\n"
383 "movb %0,%1\n"
384 "movb %1,%0\n"
385 "jmp 1f; 1:\n"
386 :"=m" (*(char *) __va(0)),
387 "=q" (tmp_reg)
388 :/* no inputs */
389 :"memory");
390 pg0[0] = old;
391 local_flush_tlb();
392 current->mm->mmap->vm_start -= PAGE_SIZE;
393 if (boot_cpu_data.wp_works_ok < 0) {
394 boot_cpu_data.wp_works_ok = 0;
395 printk("No.\n");
396 #ifndef CONFIG_M386
397 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
398 #endif
399 } else
400 printk(".\n");
403 __initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem))
405 unsigned long start_low_mem = PAGE_SIZE;
406 int codepages = 0;
407 int reservedpages = 0;
408 int datapages = 0;
409 int initpages = 0;
410 unsigned long tmp;
412 end_mem &= PAGE_MASK;
413 high_memory = (void *) end_mem;
414 max_mapnr = num_physpages = MAP_NR(end_mem);
416 /* clear the zero-page */
417 memset(empty_zero_page, 0, PAGE_SIZE);
419 /* mark usable pages in the mem_map[] */
420 start_low_mem = PAGE_ALIGN(start_low_mem)+PAGE_OFFSET;
422 #ifdef __SMP__
424 * But first pinch a few for the stack/trampoline stuff
425 * FIXME: Don't need the extra page at 4K, but need to fix
426 * trampoline before removing it. (see the GDT stuff)
429 start_low_mem += PAGE_SIZE; /* 32bit startup code */
430 start_low_mem = smp_alloc_memory(start_low_mem); /* AP processor stacks */
431 #endif
432 start_mem = PAGE_ALIGN(start_mem);
435 * IBM messed up *AGAIN* in their thinkpad: 0xA0000 -> 0x9F000.
436 * They seem to have done something stupid with the floppy
437 * controller as well..
439 while (start_low_mem < 0x9f000+PAGE_OFFSET) {
440 clear_bit(PG_reserved, &mem_map[MAP_NR(start_low_mem)].flags);
441 start_low_mem += PAGE_SIZE;
444 while (start_mem < end_mem) {
445 clear_bit(PG_reserved, &mem_map[MAP_NR(start_mem)].flags);
446 start_mem += PAGE_SIZE;
448 for (tmp = PAGE_OFFSET ; tmp < end_mem ; tmp += PAGE_SIZE) {
449 if (tmp >= MAX_DMA_ADDRESS)
450 clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags);
451 if (PageReserved(mem_map+MAP_NR(tmp))) {
452 if (tmp >= (unsigned long) &_text && tmp < (unsigned long) &_edata) {
453 if (tmp < (unsigned long) &_etext)
454 codepages++;
455 else
456 datapages++;
457 } else if (tmp >= (unsigned long) &__init_begin
458 && tmp < (unsigned long) &__init_end)
459 initpages++;
460 else if (tmp >= (unsigned long) &__bss_start
461 && tmp < (unsigned long) start_mem)
462 datapages++;
463 else
464 reservedpages++;
465 continue;
467 atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
468 #ifdef CONFIG_BLK_DEV_INITRD
469 if (!initrd_start || (tmp < initrd_start || tmp >=
470 initrd_end))
471 #endif
472 free_page(tmp);
474 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
475 (unsigned long) nr_free_pages << (PAGE_SHIFT-10),
476 max_mapnr << (PAGE_SHIFT-10),
477 codepages << (PAGE_SHIFT-10),
478 reservedpages << (PAGE_SHIFT-10),
479 datapages << (PAGE_SHIFT-10),
480 initpages << (PAGE_SHIFT-10));
482 if (boot_cpu_data.wp_works_ok < 0)
483 test_wp_bit();
486 void free_initmem(void)
488 unsigned long addr;
490 addr = (unsigned long)(&__init_begin);
491 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
492 mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
493 atomic_set(&mem_map[MAP_NR(addr)].count, 1);
494 free_page(addr);
496 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
499 void si_meminfo(struct sysinfo *val)
501 int i;
503 i = max_mapnr;
504 val->totalram = 0;
505 val->sharedram = 0;
506 val->freeram = nr_free_pages << PAGE_SHIFT;
507 val->bufferram = buffermem;
508 while (i-- > 0) {
509 if (PageReserved(mem_map+i))
510 continue;
511 val->totalram++;
512 if (!atomic_read(&mem_map[i].count))
513 continue;
514 val->sharedram += atomic_read(&mem_map[i].count) - 1;
516 val->totalram <<= PAGE_SHIFT;
517 val->sharedram <<= PAGE_SHIFT;
518 return;