2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #ifdef CONFIG_BLK_DEV_INITRD
23 #include <linux/blk.h>
25 #include <linux/highmem.h>
26 #include <linux/pagemap.h>
27 #include <linux/bootmem.h>
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
35 #include <asm/fixmap.h>
38 unsigned long highstart_pfn
, highend_pfn
;
39 static unsigned long totalram_pages
= 0;
40 static unsigned long totalhigh_pages
= 0;
42 extern void show_net_buffers(void);
45 * BAD_PAGE is the page that is used for page faults when linux
46 * is out-of-memory. Older versions of linux just did a
47 * do_exit(), but using this instead means there is less risk
48 * for a process dying in kernel mode, possibly leaving an inode
51 * BAD_PAGETABLE is the accompanying page-table: it is initialized
52 * to point to BAD_PAGE entries.
54 * ZERO_PAGE is a special page that is used for zero-initialized
59 * These are allocated in head.S so that we get proper page alignment.
60 * If you change the size of these then change head.S as well.
62 extern char empty_bad_page
[PAGE_SIZE
];
64 extern pmd_t empty_bad_pmd_table
[PTRS_PER_PMD
];
66 extern pte_t empty_bad_pte_table
[PTRS_PER_PTE
];
69 * We init them before every return and make them writable-shared.
70 * This guarantees we get out of the kernel in some more or less sane
74 static pmd_t
* get_bad_pmd_table(void)
79 set_pmd(&v
, __pmd(_PAGE_TABLE
+ __pa(empty_bad_pte_table
)));
81 for (i
= 0; i
< PAGE_SIZE
/sizeof(pmd_t
); i
++)
82 empty_bad_pmd_table
[i
] = v
;
84 return empty_bad_pmd_table
;
88 static pte_t
* get_bad_pte_table(void)
93 v
= pte_mkdirty(mk_pte_phys(__pa(empty_bad_page
), PAGE_SHARED
));
95 for (i
= 0; i
< PAGE_SIZE
/sizeof(pte_t
); i
++)
96 empty_bad_pte_table
[i
] = v
;
98 return empty_bad_pte_table
;
103 void __handle_bad_pmd(pmd_t
*pmd
)
106 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(get_bad_pte_table())));
109 void __handle_bad_pmd_kernel(pmd_t
*pmd
)
112 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(get_bad_pte_table())));
115 pte_t
*get_pte_kernel_slow(pmd_t
*pmd
, unsigned long offset
)
119 pte
= (pte_t
*) __get_free_page(GFP_KERNEL
);
120 if (pmd_none(*pmd
)) {
123 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
126 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(get_bad_pte_table())));
129 free_page((unsigned long)pte
);
131 __handle_bad_pmd_kernel(pmd
);
134 return (pte_t
*) pmd_page(*pmd
) + offset
;
137 pte_t
*get_pte_slow(pmd_t
*pmd
, unsigned long offset
)
141 pte
= (unsigned long) __get_free_page(GFP_KERNEL
);
142 if (pmd_none(*pmd
)) {
144 clear_page((void *)pte
);
145 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(pte
)));
146 return (pte_t
*)pte
+ offset
;
148 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(get_bad_pte_table())));
153 __handle_bad_pmd(pmd
);
156 return (pte_t
*) pmd_page(*pmd
) + offset
;
159 int do_check_pgt_cache(int low
, int high
)
162 if(pgtable_cache_size
> high
) {
165 free_pgd_slow(get_pgd_fast()), freed
++;
167 free_pmd_slow(get_pmd_fast()), freed
++;
169 free_pte_slow(get_pte_fast()), freed
++;
170 } while(pgtable_cache_size
> low
);
176 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
177 * physical space so we can cache the place of the first one and move
178 * around without checking the pgd every time.
185 #define kmap_get_fixmap_pte(vaddr) \
186 pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
188 void __init
kmap_init(void)
190 unsigned long kmap_vstart
;
192 /* cache the first kmap pte */
193 kmap_vstart
= __fix_to_virt(FIX_KMAP_BEGIN
);
194 kmap_pte
= kmap_get_fixmap_pte(kmap_vstart
);
196 kmap_prot
= PAGE_KERNEL
;
197 if (boot_cpu_data
.x86_capability
& X86_FEATURE_PGE
)
198 pgprot_val(kmap_prot
) |= _PAGE_GLOBAL
;
204 int i
,free
= 0, total
= 0, reserved
= 0;
205 int shared
= 0, cached
= 0;
208 printk("Mem-info:\n");
210 printk("Free swap: %6dkB\n",nr_swap_pages
<<(PAGE_SHIFT
-10));
214 if (PageHighMem(mem_map
+i
))
216 if (PageReserved(mem_map
+i
))
218 else if (PageSwapCache(mem_map
+i
))
220 else if (!page_count(mem_map
+i
))
223 shared
+= page_count(mem_map
+i
) - 1;
225 printk("%d pages of RAM\n", total
);
226 printk("%d pages of HIGHMEM\n",highmem
);
227 printk("%d reserved pages\n",reserved
);
228 printk("%d pages shared\n",shared
);
229 printk("%d pages swap cached\n",cached
);
230 printk("%ld pages in page table cache\n",pgtable_cache_size
);
237 /* References to section boundaries */
239 extern char _text
, _etext
, _edata
, __bss_start
, _end
;
240 extern char __init_begin
, __init_end
;
242 static void set_pte_phys (unsigned long vaddr
, unsigned long phys
)
249 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
250 pmd
= pmd_offset(pgd
, vaddr
);
251 pte
= pte_offset(pmd
, vaddr
);
253 if (boot_cpu_data
.x86_capability
& X86_FEATURE_PGE
)
254 pgprot_val(prot
) |= _PAGE_GLOBAL
;
255 set_pte(pte
, mk_pte_phys(phys
, prot
));
258 * It's enough to flush this one mapping.
260 __flush_tlb_one(vaddr
);
263 void set_fixmap (enum fixed_addresses idx
, unsigned long phys
)
265 unsigned long address
= __fix_to_virt(idx
);
267 if (idx
>= __end_of_fixed_addresses
) {
268 printk("Invalid set_fixmap\n");
271 set_pte_phys(address
,phys
);
274 static void __init
fixrange_init (unsigned long start
, unsigned long end
, pgd_t
*pgd_base
)
281 i
= __pgd_offset(start
);
282 j
= __pmd_offset(start
);
285 for ( ; (i
< PTRS_PER_PGD
) && (start
!= end
); pgd
++, i
++) {
287 if (pgd_none(*pgd
)) {
288 pmd
= (pmd_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
289 set_pgd(pgd
, __pgd(__pa(pmd
) + 0x1));
290 if (pmd
!= pmd_offset(pgd
, start
))
293 pmd
= pmd_offset(pgd
, start
);
297 for (; (j
< PTRS_PER_PMD
) && start
; pmd
++, j
++) {
298 if (pmd_none(*pmd
)) {
299 pte
= (pte_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
300 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
301 if (pte
!= pte_offset(pmd
, 0))
310 static void __init
pagetable_init(void)
312 pgd_t
*pgd
, *pgd_base
;
317 unsigned long end
= (unsigned long)__va(max_low_pfn
*PAGE_SIZE
);
319 pgd_base
= swapper_pg_dir
;
322 i
= __pgd_offset(vaddr
);
325 for (; (i
< PTRS_PER_PGD
) && (vaddr
<= end
); pgd
++, i
++) {
326 vaddr
= i
*PGDIR_SIZE
;
328 pmd
= (pmd_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
329 set_pgd(pgd
, __pgd(__pa(pmd
) + 0x1));
333 if (pmd
!= pmd_offset(pgd
, 0))
335 for (j
= 0; (j
< PTRS_PER_PMD
) && (vaddr
<= end
); pmd
++, j
++) {
336 vaddr
= i
*PGDIR_SIZE
+ j
*PMD_SIZE
;
340 set_in_cr4(X86_CR4_PSE
);
341 boot_cpu_data
.wp_works_ok
= 1;
342 __pe
= _KERNPG_TABLE
+ _PAGE_PSE
+ __pa(vaddr
);
343 /* Make it "global" too if supported */
345 set_in_cr4(X86_CR4_PGE
);
346 __pe
+= _PAGE_GLOBAL
;
348 set_pmd(pmd
, __pmd(__pe
));
352 pte
= (pte_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
353 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
355 if (pte
!= pte_offset(pmd
, 0))
359 (k
< PTRS_PER_PTE
) && (vaddr
<= end
);
361 vaddr
= i
*PGDIR_SIZE
+ j
*PMD_SIZE
+ k
*PAGE_SIZE
;
362 *pte
= mk_pte_phys(__pa(vaddr
), PAGE_KERNEL
);
368 * Fixed mappings, only the page table structure has to be
369 * created - mappings will be set by set_fixmap():
371 vaddr
= __fix_to_virt(__end_of_fixed_addresses
- 1) & PMD_MASK
;
372 fixrange_init(vaddr
, 0, pgd_base
);
379 fixrange_init(vaddr
, vaddr
+ PAGE_SIZE
*LAST_PKMAP
, pgd_base
);
381 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
382 pmd
= pmd_offset(pgd
, vaddr
);
383 pte
= pte_offset(pmd
, vaddr
);
384 pkmap_page_table
= pte
;
389 * Add low memory identity-mappings - SMP needs it when
390 * starting up on an AP from real-mode. In the non-PAE
391 * case we already have these mappings through head.S.
392 * All user-space mappings are explicitly cleared after
395 pgd_base
[0] = pgd_base
[USER_PTRS_PER_PGD
];
399 void __init
zap_low_mappings (void)
403 * Zap initial low-memory mappings.
405 * Note that "pgd_clear()" doesn't do it for
406 * us in this case, because pgd_clear() is a
407 * no-op in the 2-level case (pmd_clear() is
408 * the thing that clears the page-tables in
411 for (i
= 0; i
< USER_PTRS_PER_PGD
; i
++)
413 pgd_clear(swapper_pg_dir
+i
);
415 set_pgd(swapper_pg_dir
+i
, __pgd(0));
421 * paging_init() sets up the page tables - note that the first 4MB are
422 * already mapped by head.S.
424 * This routines also unmaps the page at virtual kernel address 0, so
425 * that we can trap those pesky NULL-reference errors in the kernel.
427 void __init
paging_init(void)
431 __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir
)));
435 * We will bail out later - printk doesnt work right now so
436 * the user would just see a hanging kernel.
439 set_in_cr4(X86_CR4_PAE
);
448 #ifdef CONFIG_HIGHMEM
452 unsigned int zones_size
[MAX_NR_ZONES
] = {0, 0, 0};
453 unsigned int max_dma
, high
, low
;
455 max_dma
= virt_to_phys((char *)MAX_DMA_ADDRESS
) >> PAGE_SHIFT
;
460 zones_size
[ZONE_DMA
] = low
;
462 zones_size
[ZONE_DMA
] = max_dma
;
463 zones_size
[ZONE_NORMAL
] = low
- max_dma
;
464 #ifdef CONFIG_HIGHMEM
465 zones_size
[ZONE_HIGHMEM
] = high
- low
;
468 free_area_init(zones_size
);
474 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
475 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
476 * before and after the test are here to work-around some nasty CPU bugs.
479 void __init
test_wp_bit(void)
482 * Ok, all PAE-capable CPUs are definitely handling the WP bit right.
484 const unsigned long vaddr
= PAGE_OFFSET
;
490 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
492 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
493 pmd
= pmd_offset(pgd
, vaddr
);
494 pte
= pte_offset(pmd
, vaddr
);
496 *pte
= mk_pte_phys(0, PAGE_READONLY
);
499 __asm__
__volatile__(
504 :"=m" (*(char *) vaddr
),
512 if (boot_cpu_data
.wp_works_ok
< 0) {
513 boot_cpu_data
.wp_works_ok
= 0;
515 #ifdef CONFIG_X86_WP_WORKS_OK
516 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
522 static inline int page_is_ram (unsigned long pagenr
)
526 for (i
= 0; i
< e820
.nr_map
; i
++) {
527 unsigned long addr
, end
;
529 if (e820
.map
[i
].type
!= E820_RAM
) /* not usable memory */
532 * !!!FIXME!!! Some BIOSen report areas as RAM that
533 * are not. Notably the 640->1Mb area. We need a sanity
536 addr
= (e820
.map
[i
].addr
+PAGE_SIZE
-1) >> PAGE_SHIFT
;
537 end
= (e820
.map
[i
].addr
+e820
.map
[i
].size
) >> PAGE_SHIFT
;
538 if ((pagenr
>= addr
) && (pagenr
< end
))
544 void __init
mem_init(void)
546 int codesize
, reservedpages
, datasize
, initsize
;
552 #ifdef CONFIG_HIGHMEM
553 highmem_start_page
= mem_map
+ highstart_pfn
;
554 /* cache the highmem_mapnr */
555 highmem_mapnr
= highstart_pfn
;
556 max_mapnr
= num_physpages
= highend_pfn
;
558 max_mapnr
= num_physpages
= max_low_pfn
;
560 high_memory
= (void *) __va(max_low_pfn
* PAGE_SIZE
);
562 /* clear the zero-page */
563 memset(empty_zero_page
, 0, PAGE_SIZE
);
565 /* this will put all low memory onto the freelists */
566 totalram_pages
+= free_all_bootmem();
569 for (tmp
= 0; tmp
< max_low_pfn
; tmp
++)
571 * Only count reserved RAM pages
573 if (page_is_ram(tmp
) && PageReserved(mem_map
+tmp
))
575 #ifdef CONFIG_HIGHMEM
576 for (tmp
= highstart_pfn
; tmp
< highend_pfn
; tmp
++) {
577 struct page
*page
= mem_map
+ tmp
;
579 if (!page_is_ram(tmp
)) {
580 SetPageReserved(page
);
583 ClearPageReserved(page
);
584 set_bit(PG_highmem
, &page
->flags
);
585 atomic_set(&page
->count
, 1);
589 totalram_pages
+= totalhigh_pages
;
591 codesize
= (unsigned long) &_etext
- (unsigned long) &_text
;
592 datasize
= (unsigned long) &_edata
- (unsigned long) &_etext
;
593 initsize
= (unsigned long) &__init_end
- (unsigned long) &__init_begin
;
595 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
596 (unsigned long) nr_free_pages() << (PAGE_SHIFT
-10),
597 max_mapnr
<< (PAGE_SHIFT
-10),
599 reservedpages
<< (PAGE_SHIFT
-10),
602 (unsigned long) (totalhigh_pages
<< (PAGE_SHIFT
-10))
607 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
609 if (boot_cpu_data
.wp_works_ok
< 0)
613 * Subtle. SMP is doing it's boot stuff late (because it has to
614 * fork idle threads) - but it also needs low mappings for the
615 * protected-mode entry to work. We zap these entries only after
616 * the WP-bit has been tested.
624 void free_initmem(void)
628 addr
= (unsigned long)(&__init_begin
);
629 for (; addr
< (unsigned long)(&__init_end
); addr
+= PAGE_SIZE
) {
630 ClearPageReserved(mem_map
+ MAP_NR(addr
));
631 set_page_count(mem_map
+MAP_NR(addr
), 1);
635 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end
- &__init_begin
) >> 10);
638 #ifdef CONFIG_BLK_DEV_INITRD
639 void free_initrd_mem(unsigned long start
, unsigned long end
)
641 for (; start
< end
; start
+= PAGE_SIZE
) {
642 ClearPageReserved(mem_map
+ MAP_NR(start
));
643 set_page_count(mem_map
+MAP_NR(start
), 1);
647 printk ("Freeing initrd memory: %ldk freed\n", (end
- start
) >> 10);
651 void si_meminfo(struct sysinfo
*val
)
653 val
->totalram
= totalram_pages
;
655 val
->freeram
= nr_free_pages();
656 val
->bufferram
= atomic_read(&buffermem_pages
);
657 val
->totalhigh
= totalhigh_pages
;
658 val
->freehigh
= nr_free_highpages();
659 val
->mem_unit
= PAGE_SIZE
;