Linux 2.4.0-test10pre4
[davej-history.git] / arch / i386 / mm / init.c
blob4b3cc4dfc3b621d33762b30083406e47248cabaa
1 /*
2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 */
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #ifdef CONFIG_BLK_DEV_INITRD
23 #include <linux/blk.h>
24 #endif
25 #include <linux/highmem.h>
26 #include <linux/pagemap.h>
27 #include <linux/bootmem.h>
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
34 #include <asm/dma.h>
35 #include <asm/fixmap.h>
36 #include <asm/e820.h>
37 #include <asm/apic.h>
39 unsigned long highstart_pfn, highend_pfn;
40 static unsigned long totalram_pages;
41 static unsigned long totalhigh_pages;
44 * BAD_PAGE is the page that is used for page faults when linux
45 * is out-of-memory. Older versions of linux just did a
46 * do_exit(), but using this instead means there is less risk
47 * for a process dying in kernel mode, possibly leaving an inode
48 * unused etc..
50 * BAD_PAGETABLE is the accompanying page-table: it is initialized
51 * to point to BAD_PAGE entries.
53 * ZERO_PAGE is a special page that is used for zero-initialized
54 * data and COW.
58 * These are allocated in head.S so that we get proper page alignment.
59 * If you change the size of these then change head.S as well.
61 extern char empty_bad_page[PAGE_SIZE];
62 #if CONFIG_X86_PAE
63 extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD];
64 #endif
65 extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
68 * We init them before every return and make them writable-shared.
69 * This guarantees we get out of the kernel in some more or less sane
70 * way.
72 #if CONFIG_X86_PAE
73 static pmd_t * get_bad_pmd_table(void)
75 pmd_t v;
76 int i;
78 set_pmd(&v, __pmd(_PAGE_TABLE + __pa(empty_bad_pte_table)));
80 for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++)
81 empty_bad_pmd_table[i] = v;
83 return empty_bad_pmd_table;
85 #endif
87 static pte_t * get_bad_pte_table(void)
89 pte_t v;
90 int i;
92 v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED));
94 for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++)
95 empty_bad_pte_table[i] = v;
97 return empty_bad_pte_table;
102 void __handle_bad_pmd(pmd_t *pmd)
104 pmd_ERROR(*pmd);
105 set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table())));
108 void __handle_bad_pmd_kernel(pmd_t *pmd)
110 pmd_ERROR(*pmd);
111 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table())));
114 pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
116 pte_t *pte;
118 pte = (pte_t *) __get_free_page(GFP_KERNEL);
119 if (pmd_none(*pmd)) {
120 if (pte) {
121 clear_page(pte);
122 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
123 return pte + offset;
125 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table())));
126 return NULL;
128 free_page((unsigned long)pte);
129 if (pmd_bad(*pmd)) {
130 __handle_bad_pmd_kernel(pmd);
131 return NULL;
133 return (pte_t *) pmd_page(*pmd) + offset;
136 pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
138 unsigned long pte;
140 pte = (unsigned long) __get_free_page(GFP_KERNEL);
141 if (pmd_none(*pmd)) {
142 if (pte) {
143 clear_page((void *)pte);
144 set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));
145 return (pte_t *)pte + offset;
147 set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table())));
148 return NULL;
150 free_page(pte);
151 if (pmd_bad(*pmd)) {
152 __handle_bad_pmd(pmd);
153 return NULL;
155 return (pte_t *) pmd_page(*pmd) + offset;
158 int do_check_pgt_cache(int low, int high)
160 int freed = 0;
161 if(pgtable_cache_size > high) {
162 do {
163 if(pgd_quicklist)
164 free_pgd_slow(get_pgd_fast()), freed++;
165 if(pmd_quicklist)
166 free_pmd_slow(get_pmd_fast()), freed++;
167 if(pte_quicklist)
168 free_pte_slow(get_pte_fast()), freed++;
169 } while(pgtable_cache_size > low);
171 return freed;
175 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
176 * physical space so we can cache the place of the first one and move
177 * around without checking the pgd every time.
180 #if CONFIG_HIGHMEM
181 pte_t *kmap_pte;
182 pgprot_t kmap_prot;
184 #define kmap_get_fixmap_pte(vaddr) \
185 pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
187 void __init kmap_init(void)
189 unsigned long kmap_vstart;
191 /* cache the first kmap pte */
192 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
193 kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
195 kmap_prot = PAGE_KERNEL;
197 #endif /* CONFIG_HIGHMEM */
199 void show_mem(void)
201 int i,free = 0, total = 0, reserved = 0;
202 int shared = 0, cached = 0;
203 int highmem = 0;
205 printk("Mem-info:\n");
206 show_free_areas();
207 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
208 i = max_mapnr;
209 while (i-- > 0) {
210 total++;
211 if (PageHighMem(mem_map+i))
212 highmem++;
213 if (PageReserved(mem_map+i))
214 reserved++;
215 else if (PageSwapCache(mem_map+i))
216 cached++;
217 else if (!page_count(mem_map+i))
218 free++;
219 else
220 shared += page_count(mem_map+i) - 1;
222 printk("%d pages of RAM\n", total);
223 printk("%d pages of HIGHMEM\n",highmem);
224 printk("%d reserved pages\n",reserved);
225 printk("%d pages shared\n",shared);
226 printk("%d pages swap cached\n",cached);
227 printk("%ld pages in page table cache\n",pgtable_cache_size);
228 show_buffers();
231 /* References to section boundaries */
233 extern char _text, _etext, _edata, __bss_start, _end;
234 extern char __init_begin, __init_end;
236 static inline void set_pte_phys (unsigned long vaddr,
237 unsigned long phys, pgprot_t flags)
239 pgprot_t prot;
240 pgd_t *pgd;
241 pmd_t *pmd;
242 pte_t *pte;
244 pgd = swapper_pg_dir + __pgd_offset(vaddr);
245 if (pgd_none(*pgd)) {
246 printk("PAE BUG #00!\n");
247 return;
249 pmd = pmd_offset(pgd, vaddr);
250 if (pmd_none(*pmd)) {
251 printk("PAE BUG #01!\n");
252 return;
254 pte = pte_offset(pmd, vaddr);
255 if (pte_val(*pte))
256 pte_ERROR(*pte);
257 pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
258 set_pte(pte, mk_pte_phys(phys, prot));
261 * It's enough to flush this one mapping.
262 * (PGE mappings get flushed as well)
264 __flush_tlb_one(vaddr);
267 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
269 unsigned long address = __fix_to_virt(idx);
271 if (idx >= __end_of_fixed_addresses) {
272 printk("Invalid __set_fixmap\n");
273 return;
275 set_pte_phys(address, phys, flags);
278 static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
280 pgd_t *pgd;
281 pmd_t *pmd;
282 pte_t *pte;
283 int i, j;
284 unsigned long vaddr;
286 vaddr = start;
287 i = __pgd_offset(vaddr);
288 j = __pmd_offset(vaddr);
289 pgd = pgd_base + i;
291 for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
292 #if CONFIG_X86_PAE
293 if (pgd_none(*pgd)) {
294 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
295 set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
296 if (pmd != pmd_offset(pgd, 0))
297 printk("PAE BUG #02!\n");
299 pmd = pmd_offset(pgd, vaddr);
300 #else
301 pmd = (pmd_t *)pgd;
302 #endif
303 for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
304 if (pmd_none(*pmd)) {
305 pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
306 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
307 if (pte != pte_offset(pmd, 0))
308 BUG();
310 vaddr += PMD_SIZE;
312 j = 0;
316 static void __init pagetable_init (void)
318 unsigned long vaddr, end;
319 pgd_t *pgd, *pgd_base;
320 int i, j, k;
321 pmd_t *pmd;
322 pte_t *pte;
325 * This can be zero as well - no problem, in that case we exit
326 * the loops anyway due to the PTRS_PER_* conditions.
328 end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
330 pgd_base = swapper_pg_dir;
331 #if CONFIG_X86_PAE
332 for (i = 0; i < PTRS_PER_PGD; i++) {
333 pgd = pgd_base + i;
334 __pgd_clear(pgd);
336 #endif
337 i = __pgd_offset(PAGE_OFFSET);
338 pgd = pgd_base + i;
340 for (; i < PTRS_PER_PGD; pgd++, i++) {
341 vaddr = i*PGDIR_SIZE;
342 if (end && (vaddr >= end))
343 break;
344 #if CONFIG_X86_PAE
345 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
346 set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
347 #else
348 pmd = (pmd_t *)pgd;
349 #endif
350 if (pmd != pmd_offset(pgd, 0))
351 BUG();
352 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
353 vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
354 if (end && (vaddr >= end))
355 break;
356 if (cpu_has_pse) {
357 unsigned long __pe;
359 set_in_cr4(X86_CR4_PSE);
360 boot_cpu_data.wp_works_ok = 1;
361 __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr);
362 /* Make it "global" too if supported */
363 if (cpu_has_pge) {
364 set_in_cr4(X86_CR4_PGE);
365 __pe += _PAGE_GLOBAL;
367 set_pmd(pmd, __pmd(__pe));
368 continue;
371 pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
372 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
374 if (pte != pte_offset(pmd, 0))
375 BUG();
377 for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
378 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
379 if (end && (vaddr >= end))
380 break;
381 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
387 * Fixed mappings, only the page table structure has to be
388 * created - mappings will be set by set_fixmap():
390 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
391 fixrange_init(vaddr, 0, pgd_base);
393 #if CONFIG_HIGHMEM
395 * Permanent kmaps:
397 vaddr = PKMAP_BASE;
398 fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
400 pgd = swapper_pg_dir + __pgd_offset(vaddr);
401 pmd = pmd_offset(pgd, vaddr);
402 pte = pte_offset(pmd, vaddr);
403 pkmap_page_table = pte;
404 #endif
406 #if CONFIG_X86_PAE
408 * Add low memory identity-mappings - SMP needs it when
409 * starting up on an AP from real-mode. In the non-PAE
410 * case we already have these mappings through head.S.
411 * All user-space mappings are explicitly cleared after
412 * SMP startup.
414 pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
415 #endif
418 void __init zap_low_mappings (void)
420 int i;
422 * Zap initial low-memory mappings.
424 * Note that "pgd_clear()" doesn't do it for
425 * us in this case, because pgd_clear() is a
426 * no-op in the 2-level case (pmd_clear() is
427 * the thing that clears the page-tables in
428 * that case).
430 for (i = 0; i < USER_PTRS_PER_PGD; i++)
431 #if CONFIG_X86_PAE
432 pgd_clear(swapper_pg_dir+i);
433 #else
434 set_pgd(swapper_pg_dir+i, __pgd(0));
435 #endif
436 flush_tlb_all();
440 * paging_init() sets up the page tables - note that the first 4MB are
441 * already mapped by head.S.
443 * This routines also unmaps the page at virtual kernel address 0, so
444 * that we can trap those pesky NULL-reference errors in the kernel.
446 void __init paging_init(void)
448 pagetable_init();
450 __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));
452 #if CONFIG_X86_PAE
454 * We will bail out later - printk doesnt work right now so
455 * the user would just see a hanging kernel.
457 if (cpu_has_pae)
458 set_in_cr4(X86_CR4_PAE);
459 #endif
461 __flush_tlb_all();
463 #ifdef CONFIG_HIGHMEM
464 kmap_init();
465 #endif
467 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
468 unsigned int max_dma, high, low;
470 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
471 low = max_low_pfn;
472 high = highend_pfn;
474 if (low < max_dma)
475 zones_size[ZONE_DMA] = low;
476 else {
477 zones_size[ZONE_DMA] = max_dma;
478 zones_size[ZONE_NORMAL] = low - max_dma;
479 #ifdef CONFIG_HIGHMEM
480 zones_size[ZONE_HIGHMEM] = high - low;
481 #endif
483 free_area_init(zones_size);
485 return;
489 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
490 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
491 * before and after the test are here to work-around some nasty CPU bugs.
495 * This function cannot be __init, since exceptions don't work in that
496 * section.
498 static int do_test_wp_bit(unsigned long vaddr)
500 char tmp_reg;
501 int flag;
503 __asm__ __volatile__(
504 " movb %0,%1 \n"
505 "1: movb %1,%0 \n"
506 " xorl %2,%2 \n"
507 "2: \n"
508 ".section __ex_table,\"a\"\n"
509 " .align 4 \n"
510 " .long 1b,2b \n"
511 ".previous \n"
512 :"=m" (*(char *) vaddr),
513 "=q" (tmp_reg),
514 "=r" (flag)
515 :"2" (1)
516 :"memory");
518 return flag;
521 void __init test_wp_bit(void)
524 * Ok, all PSE-capable CPUs are definitely handling the WP bit right.
526 const unsigned long vaddr = PAGE_OFFSET;
527 pgd_t *pgd;
528 pmd_t *pmd;
529 pte_t *pte, old_pte;
531 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
533 pgd = swapper_pg_dir + __pgd_offset(vaddr);
534 pmd = pmd_offset(pgd, vaddr);
535 pte = pte_offset(pmd, vaddr);
536 old_pte = *pte;
537 *pte = mk_pte_phys(0, PAGE_READONLY);
538 local_flush_tlb();
540 boot_cpu_data.wp_works_ok = do_test_wp_bit(vaddr);
542 *pte = old_pte;
543 local_flush_tlb();
545 if (!boot_cpu_data.wp_works_ok) {
546 printk("No.\n");
547 #ifdef CONFIG_X86_WP_WORKS_OK
548 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
549 #endif
550 } else {
551 printk("Ok.\n");
555 static inline int page_is_ram (unsigned long pagenr)
557 int i;
559 for (i = 0; i < e820.nr_map; i++) {
560 unsigned long addr, end;
562 if (e820.map[i].type != E820_RAM) /* not usable memory */
563 continue;
565 * !!!FIXME!!! Some BIOSen report areas as RAM that
566 * are not. Notably the 640->1Mb area. We need a sanity
567 * check here.
569 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
570 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
571 if ((pagenr >= addr) && (pagenr < end))
572 return 1;
574 return 0;
577 void __init mem_init(void)
579 int codesize, reservedpages, datasize, initsize;
580 int tmp;
582 if (!mem_map)
583 BUG();
585 #ifdef CONFIG_HIGHMEM
586 highmem_start_page = mem_map + highstart_pfn;
587 max_mapnr = num_physpages = highend_pfn;
588 #else
589 max_mapnr = num_physpages = max_low_pfn;
590 #endif
591 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
593 /* clear the zero-page */
594 memset(empty_zero_page, 0, PAGE_SIZE);
596 /* this will put all low memory onto the freelists */
597 totalram_pages += free_all_bootmem();
599 reservedpages = 0;
600 for (tmp = 0; tmp < max_low_pfn; tmp++)
602 * Only count reserved RAM pages
604 if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
605 reservedpages++;
606 #ifdef CONFIG_HIGHMEM
607 for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
608 struct page *page = mem_map + tmp;
610 if (!page_is_ram(tmp)) {
611 SetPageReserved(page);
612 continue;
614 ClearPageReserved(page);
615 set_bit(PG_highmem, &page->flags);
616 atomic_set(&page->count, 1);
617 __free_page(page);
618 totalhigh_pages++;
620 totalram_pages += totalhigh_pages;
621 #endif
622 codesize = (unsigned long) &_etext - (unsigned long) &_text;
623 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
624 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
626 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
627 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
628 max_mapnr << (PAGE_SHIFT-10),
629 codesize >> 10,
630 reservedpages << (PAGE_SHIFT-10),
631 datasize >> 10,
632 initsize >> 10,
633 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
636 #if CONFIG_X86_PAE
637 if (!cpu_has_pae)
638 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
639 #endif
640 if (boot_cpu_data.wp_works_ok < 0)
641 test_wp_bit();
644 * Subtle. SMP is doing it's boot stuff late (because it has to
645 * fork idle threads) - but it also needs low mappings for the
646 * protected-mode entry to work. We zap these entries only after
647 * the WP-bit has been tested.
649 #ifndef CONFIG_SMP
650 zap_low_mappings();
651 #endif
655 void free_initmem(void)
657 unsigned long addr;
659 addr = (unsigned long)(&__init_begin);
660 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
661 ClearPageReserved(virt_to_page(addr));
662 set_page_count(virt_to_page(addr), 1);
663 free_page(addr);
664 totalram_pages++;
666 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
669 #ifdef CONFIG_BLK_DEV_INITRD
670 void free_initrd_mem(unsigned long start, unsigned long end)
672 if (start < end)
673 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
674 for (; start < end; start += PAGE_SIZE) {
675 ClearPageReserved(virt_to_page(start));
676 set_page_count(virt_to_page(start), 1);
677 free_page(start);
678 totalram_pages++;
681 #endif
683 void si_meminfo(struct sysinfo *val)
685 val->totalram = totalram_pages;
686 val->sharedram = 0;
687 val->freeram = nr_free_pages();
688 val->bufferram = atomic_read(&buffermem_pages);
689 val->totalhigh = totalhigh_pages;
690 val->freehigh = nr_free_highpages();
691 val->mem_unit = PAGE_SIZE;
692 return;