2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #ifdef CONFIG_BLK_DEV_INITRD
23 #include <linux/blk.h>
25 #include <linux/highmem.h>
26 #include <linux/pagemap.h>
27 #include <linux/bootmem.h>
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
35 #include <asm/fixmap.h>
39 unsigned long highstart_pfn
, highend_pfn
;
40 static unsigned long totalram_pages
;
41 static unsigned long totalhigh_pages
;
44 * BAD_PAGE is the page that is used for page faults when linux
45 * is out-of-memory. Older versions of linux just did a
46 * do_exit(), but using this instead means there is less risk
47 * for a process dying in kernel mode, possibly leaving an inode
50 * BAD_PAGETABLE is the accompanying page-table: it is initialized
51 * to point to BAD_PAGE entries.
53 * ZERO_PAGE is a special page that is used for zero-initialized
58 * These are allocated in head.S so that we get proper page alignment.
59 * If you change the size of these then change head.S as well.
61 extern char empty_bad_page
[PAGE_SIZE
];
63 extern pmd_t empty_bad_pmd_table
[PTRS_PER_PMD
];
65 extern pte_t empty_bad_pte_table
[PTRS_PER_PTE
];
68 * We init them before every return and make them writable-shared.
69 * This guarantees we get out of the kernel in some more or less sane
73 static pmd_t
* get_bad_pmd_table(void)
78 set_pmd(&v
, __pmd(_PAGE_TABLE
+ __pa(empty_bad_pte_table
)));
80 for (i
= 0; i
< PAGE_SIZE
/sizeof(pmd_t
); i
++)
81 empty_bad_pmd_table
[i
] = v
;
83 return empty_bad_pmd_table
;
87 static pte_t
* get_bad_pte_table(void)
92 v
= pte_mkdirty(mk_pte_phys(__pa(empty_bad_page
), PAGE_SHARED
));
94 for (i
= 0; i
< PAGE_SIZE
/sizeof(pte_t
); i
++)
95 empty_bad_pte_table
[i
] = v
;
97 return empty_bad_pte_table
;
102 void __handle_bad_pmd(pmd_t
*pmd
)
105 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(get_bad_pte_table())));
108 void __handle_bad_pmd_kernel(pmd_t
*pmd
)
111 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(get_bad_pte_table())));
114 pte_t
*get_pte_kernel_slow(pmd_t
*pmd
, unsigned long offset
)
118 pte
= (pte_t
*) __get_free_page(GFP_KERNEL
);
119 if (pmd_none(*pmd
)) {
122 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
125 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(get_bad_pte_table())));
128 free_page((unsigned long)pte
);
130 __handle_bad_pmd_kernel(pmd
);
133 return (pte_t
*) pmd_page(*pmd
) + offset
;
136 pte_t
*get_pte_slow(pmd_t
*pmd
, unsigned long offset
)
140 pte
= (unsigned long) __get_free_page(GFP_KERNEL
);
141 if (pmd_none(*pmd
)) {
143 clear_page((void *)pte
);
144 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(pte
)));
145 return (pte_t
*)pte
+ offset
;
147 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(get_bad_pte_table())));
152 __handle_bad_pmd(pmd
);
155 return (pte_t
*) pmd_page(*pmd
) + offset
;
158 int do_check_pgt_cache(int low
, int high
)
161 if(pgtable_cache_size
> high
) {
164 free_pgd_slow(get_pgd_fast()), freed
++;
166 free_pmd_slow(get_pmd_fast()), freed
++;
168 free_pte_slow(get_pte_fast()), freed
++;
169 } while(pgtable_cache_size
> low
);
175 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
176 * physical space so we can cache the place of the first one and move
177 * around without checking the pgd every time.
184 #define kmap_get_fixmap_pte(vaddr) \
185 pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
187 void __init
kmap_init(void)
189 unsigned long kmap_vstart
;
191 /* cache the first kmap pte */
192 kmap_vstart
= __fix_to_virt(FIX_KMAP_BEGIN
);
193 kmap_pte
= kmap_get_fixmap_pte(kmap_vstart
);
195 kmap_prot
= PAGE_KERNEL
;
197 #endif /* CONFIG_HIGHMEM */
201 int i
,free
= 0, total
= 0, reserved
= 0;
202 int shared
= 0, cached
= 0;
205 printk("Mem-info:\n");
207 printk("Free swap: %6dkB\n",nr_swap_pages
<<(PAGE_SHIFT
-10));
211 if (PageHighMem(mem_map
+i
))
213 if (PageReserved(mem_map
+i
))
215 else if (PageSwapCache(mem_map
+i
))
217 else if (!page_count(mem_map
+i
))
220 shared
+= page_count(mem_map
+i
) - 1;
222 printk("%d pages of RAM\n", total
);
223 printk("%d pages of HIGHMEM\n",highmem
);
224 printk("%d reserved pages\n",reserved
);
225 printk("%d pages shared\n",shared
);
226 printk("%d pages swap cached\n",cached
);
227 printk("%ld pages in page table cache\n",pgtable_cache_size
);
231 /* References to section boundaries */
233 extern char _text
, _etext
, _edata
, __bss_start
, _end
;
234 extern char __init_begin
, __init_end
;
236 static inline void set_pte_phys (unsigned long vaddr
,
237 unsigned long phys
, pgprot_t flags
)
244 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
245 if (pgd_none(*pgd
)) {
246 printk("PAE BUG #00!\n");
249 pmd
= pmd_offset(pgd
, vaddr
);
250 if (pmd_none(*pmd
)) {
251 printk("PAE BUG #01!\n");
254 pte
= pte_offset(pmd
, vaddr
);
257 pgprot_val(prot
) = pgprot_val(PAGE_KERNEL
) | pgprot_val(flags
);
258 set_pte(pte
, mk_pte_phys(phys
, prot
));
261 * It's enough to flush this one mapping.
262 * (PGE mappings get flushed as well)
264 __flush_tlb_one(vaddr
);
267 void __set_fixmap (enum fixed_addresses idx
, unsigned long phys
, pgprot_t flags
)
269 unsigned long address
= __fix_to_virt(idx
);
271 if (idx
>= __end_of_fixed_addresses
) {
272 printk("Invalid __set_fixmap\n");
275 set_pte_phys(address
, phys
, flags
);
278 static void __init
fixrange_init (unsigned long start
, unsigned long end
, pgd_t
*pgd_base
)
287 i
= __pgd_offset(vaddr
);
288 j
= __pmd_offset(vaddr
);
291 for ( ; (i
< PTRS_PER_PGD
) && (vaddr
!= end
); pgd
++, i
++) {
293 if (pgd_none(*pgd
)) {
294 pmd
= (pmd_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
295 set_pgd(pgd
, __pgd(__pa(pmd
) + 0x1));
296 if (pmd
!= pmd_offset(pgd
, 0))
297 printk("PAE BUG #02!\n");
299 pmd
= pmd_offset(pgd
, vaddr
);
303 for (; (j
< PTRS_PER_PMD
) && (vaddr
!= end
); pmd
++, j
++) {
304 if (pmd_none(*pmd
)) {
305 pte
= (pte_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
306 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
307 if (pte
!= pte_offset(pmd
, 0))
316 static void __init
pagetable_init (void)
318 unsigned long vaddr
, end
;
319 pgd_t
*pgd
, *pgd_base
;
325 * This can be zero as well - no problem, in that case we exit
326 * the loops anyway due to the PTRS_PER_* conditions.
328 end
= (unsigned long)__va(max_low_pfn
*PAGE_SIZE
);
330 pgd_base
= swapper_pg_dir
;
332 for (i
= 0; i
< PTRS_PER_PGD
; i
++) {
337 i
= __pgd_offset(PAGE_OFFSET
);
340 for (; i
< PTRS_PER_PGD
; pgd
++, i
++) {
341 vaddr
= i
*PGDIR_SIZE
;
342 if (end
&& (vaddr
>= end
))
345 pmd
= (pmd_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
346 set_pgd(pgd
, __pgd(__pa(pmd
) + 0x1));
350 if (pmd
!= pmd_offset(pgd
, 0))
352 for (j
= 0; j
< PTRS_PER_PMD
; pmd
++, j
++) {
353 vaddr
= i
*PGDIR_SIZE
+ j
*PMD_SIZE
;
354 if (end
&& (vaddr
>= end
))
359 set_in_cr4(X86_CR4_PSE
);
360 boot_cpu_data
.wp_works_ok
= 1;
361 __pe
= _KERNPG_TABLE
+ _PAGE_PSE
+ __pa(vaddr
);
362 /* Make it "global" too if supported */
364 set_in_cr4(X86_CR4_PGE
);
365 __pe
+= _PAGE_GLOBAL
;
367 set_pmd(pmd
, __pmd(__pe
));
371 pte
= (pte_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
372 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
374 if (pte
!= pte_offset(pmd
, 0))
377 for (k
= 0; k
< PTRS_PER_PTE
; pte
++, k
++) {
378 vaddr
= i
*PGDIR_SIZE
+ j
*PMD_SIZE
+ k
*PAGE_SIZE
;
379 if (end
&& (vaddr
>= end
))
381 *pte
= mk_pte_phys(__pa(vaddr
), PAGE_KERNEL
);
387 * Fixed mappings, only the page table structure has to be
388 * created - mappings will be set by set_fixmap():
390 vaddr
= __fix_to_virt(__end_of_fixed_addresses
- 1) & PMD_MASK
;
391 fixrange_init(vaddr
, 0, pgd_base
);
398 fixrange_init(vaddr
, vaddr
+ PAGE_SIZE
*LAST_PKMAP
, pgd_base
);
400 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
401 pmd
= pmd_offset(pgd
, vaddr
);
402 pte
= pte_offset(pmd
, vaddr
);
403 pkmap_page_table
= pte
;
408 * Add low memory identity-mappings - SMP needs it when
409 * starting up on an AP from real-mode. In the non-PAE
410 * case we already have these mappings through head.S.
411 * All user-space mappings are explicitly cleared after
414 pgd_base
[0] = pgd_base
[USER_PTRS_PER_PGD
];
418 void __init
zap_low_mappings (void)
422 * Zap initial low-memory mappings.
424 * Note that "pgd_clear()" doesn't do it for
425 * us in this case, because pgd_clear() is a
426 * no-op in the 2-level case (pmd_clear() is
427 * the thing that clears the page-tables in
430 for (i
= 0; i
< USER_PTRS_PER_PGD
; i
++)
432 pgd_clear(swapper_pg_dir
+i
);
434 set_pgd(swapper_pg_dir
+i
, __pgd(0));
440 * paging_init() sets up the page tables - note that the first 4MB are
441 * already mapped by head.S.
443 * This routines also unmaps the page at virtual kernel address 0, so
444 * that we can trap those pesky NULL-reference errors in the kernel.
446 void __init
paging_init(void)
450 __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir
)));
454 * We will bail out later - printk doesnt work right now so
455 * the user would just see a hanging kernel.
458 set_in_cr4(X86_CR4_PAE
);
463 #ifdef CONFIG_HIGHMEM
467 unsigned long zones_size
[MAX_NR_ZONES
] = {0, 0, 0};
468 unsigned int max_dma
, high
, low
;
470 max_dma
= virt_to_phys((char *)MAX_DMA_ADDRESS
) >> PAGE_SHIFT
;
475 zones_size
[ZONE_DMA
] = low
;
477 zones_size
[ZONE_DMA
] = max_dma
;
478 zones_size
[ZONE_NORMAL
] = low
- max_dma
;
479 #ifdef CONFIG_HIGHMEM
480 zones_size
[ZONE_HIGHMEM
] = high
- low
;
483 free_area_init(zones_size
);
489 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
490 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
491 * before and after the test are here to work-around some nasty CPU bugs.
495 * This function cannot be __init, since exceptions don't work in that
498 static int do_test_wp_bit(unsigned long vaddr
)
503 __asm__
__volatile__(
508 ".section __ex_table,\"a\"\n"
512 :"=m" (*(char *) vaddr
),
521 void __init
test_wp_bit(void)
524 * Ok, all PSE-capable CPUs are definitely handling the WP bit right.
526 const unsigned long vaddr
= PAGE_OFFSET
;
531 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
533 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
534 pmd
= pmd_offset(pgd
, vaddr
);
535 pte
= pte_offset(pmd
, vaddr
);
537 *pte
= mk_pte_phys(0, PAGE_READONLY
);
540 boot_cpu_data
.wp_works_ok
= do_test_wp_bit(vaddr
);
545 if (!boot_cpu_data
.wp_works_ok
) {
547 #ifdef CONFIG_X86_WP_WORKS_OK
548 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
555 static inline int page_is_ram (unsigned long pagenr
)
559 for (i
= 0; i
< e820
.nr_map
; i
++) {
560 unsigned long addr
, end
;
562 if (e820
.map
[i
].type
!= E820_RAM
) /* not usable memory */
565 * !!!FIXME!!! Some BIOSen report areas as RAM that
566 * are not. Notably the 640->1Mb area. We need a sanity
569 addr
= (e820
.map
[i
].addr
+PAGE_SIZE
-1) >> PAGE_SHIFT
;
570 end
= (e820
.map
[i
].addr
+e820
.map
[i
].size
) >> PAGE_SHIFT
;
571 if ((pagenr
>= addr
) && (pagenr
< end
))
577 void __init
mem_init(void)
579 int codesize
, reservedpages
, datasize
, initsize
;
585 #ifdef CONFIG_HIGHMEM
586 highmem_start_page
= mem_map
+ highstart_pfn
;
587 max_mapnr
= num_physpages
= highend_pfn
;
589 max_mapnr
= num_physpages
= max_low_pfn
;
591 high_memory
= (void *) __va(max_low_pfn
* PAGE_SIZE
);
593 /* clear the zero-page */
594 memset(empty_zero_page
, 0, PAGE_SIZE
);
596 /* this will put all low memory onto the freelists */
597 totalram_pages
+= free_all_bootmem();
600 for (tmp
= 0; tmp
< max_low_pfn
; tmp
++)
602 * Only count reserved RAM pages
604 if (page_is_ram(tmp
) && PageReserved(mem_map
+tmp
))
606 #ifdef CONFIG_HIGHMEM
607 for (tmp
= highstart_pfn
; tmp
< highend_pfn
; tmp
++) {
608 struct page
*page
= mem_map
+ tmp
;
610 if (!page_is_ram(tmp
)) {
611 SetPageReserved(page
);
614 ClearPageReserved(page
);
615 set_bit(PG_highmem
, &page
->flags
);
616 atomic_set(&page
->count
, 1);
620 totalram_pages
+= totalhigh_pages
;
622 codesize
= (unsigned long) &_etext
- (unsigned long) &_text
;
623 datasize
= (unsigned long) &_edata
- (unsigned long) &_etext
;
624 initsize
= (unsigned long) &__init_end
- (unsigned long) &__init_begin
;
626 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
627 (unsigned long) nr_free_pages() << (PAGE_SHIFT
-10),
628 max_mapnr
<< (PAGE_SHIFT
-10),
630 reservedpages
<< (PAGE_SHIFT
-10),
633 (unsigned long) (totalhigh_pages
<< (PAGE_SHIFT
-10))
638 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
640 if (boot_cpu_data
.wp_works_ok
< 0)
644 * Subtle. SMP is doing it's boot stuff late (because it has to
645 * fork idle threads) - but it also needs low mappings for the
646 * protected-mode entry to work. We zap these entries only after
647 * the WP-bit has been tested.
655 void free_initmem(void)
659 addr
= (unsigned long)(&__init_begin
);
660 for (; addr
< (unsigned long)(&__init_end
); addr
+= PAGE_SIZE
) {
661 ClearPageReserved(virt_to_page(addr
));
662 set_page_count(virt_to_page(addr
), 1);
666 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end
- &__init_begin
) >> 10);
669 #ifdef CONFIG_BLK_DEV_INITRD
670 void free_initrd_mem(unsigned long start
, unsigned long end
)
673 printk ("Freeing initrd memory: %ldk freed\n", (end
- start
) >> 10);
674 for (; start
< end
; start
+= PAGE_SIZE
) {
675 ClearPageReserved(virt_to_page(start
));
676 set_page_count(virt_to_page(start
), 1);
683 void si_meminfo(struct sysinfo
*val
)
685 val
->totalram
= totalram_pages
;
687 val
->freeram
= nr_free_pages();
688 val
->bufferram
= atomic_read(&buffermem_pages
);
689 val
->totalhigh
= totalhigh_pages
;
690 val
->freehigh
= nr_free_highpages();
691 val
->mem_unit
= PAGE_SIZE
;