2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #ifdef CONFIG_BLK_DEV_INITRD
23 #include <linux/blk.h>
25 #include <linux/highmem.h>
26 #include <linux/pagemap.h>
27 #include <linux/bootmem.h>
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
35 #include <asm/fixmap.h>
39 unsigned long highstart_pfn
, highend_pfn
;
40 static unsigned long totalram_pages
= 0;
41 static unsigned long totalhigh_pages
= 0;
44 * BAD_PAGE is the page that is used for page faults when linux
45 * is out-of-memory. Older versions of linux just did a
46 * do_exit(), but using this instead means there is less risk
47 * for a process dying in kernel mode, possibly leaving an inode
50 * BAD_PAGETABLE is the accompanying page-table: it is initialized
51 * to point to BAD_PAGE entries.
53 * ZERO_PAGE is a special page that is used for zero-initialized
58 * These are allocated in head.S so that we get proper page alignment.
59 * If you change the size of these then change head.S as well.
61 extern char empty_bad_page
[PAGE_SIZE
];
63 extern pmd_t empty_bad_pmd_table
[PTRS_PER_PMD
];
65 extern pte_t empty_bad_pte_table
[PTRS_PER_PTE
];
68 * We init them before every return and make them writable-shared.
69 * This guarantees we get out of the kernel in some more or less sane
73 static pmd_t
* get_bad_pmd_table(void)
78 set_pmd(&v
, __pmd(_PAGE_TABLE
+ __pa(empty_bad_pte_table
)));
80 for (i
= 0; i
< PAGE_SIZE
/sizeof(pmd_t
); i
++)
81 empty_bad_pmd_table
[i
] = v
;
83 return empty_bad_pmd_table
;
87 static pte_t
* get_bad_pte_table(void)
92 v
= pte_mkdirty(mk_pte_phys(__pa(empty_bad_page
), PAGE_SHARED
));
94 for (i
= 0; i
< PAGE_SIZE
/sizeof(pte_t
); i
++)
95 empty_bad_pte_table
[i
] = v
;
97 return empty_bad_pte_table
;
102 void __handle_bad_pmd(pmd_t
*pmd
)
105 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(get_bad_pte_table())));
108 void __handle_bad_pmd_kernel(pmd_t
*pmd
)
111 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(get_bad_pte_table())));
114 pte_t
*get_pte_kernel_slow(pmd_t
*pmd
, unsigned long offset
)
118 pte
= (pte_t
*) __get_free_page(GFP_KERNEL
);
119 if (pmd_none(*pmd
)) {
122 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
125 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(get_bad_pte_table())));
128 free_page((unsigned long)pte
);
130 __handle_bad_pmd_kernel(pmd
);
133 return (pte_t
*) pmd_page(*pmd
) + offset
;
136 pte_t
*get_pte_slow(pmd_t
*pmd
, unsigned long offset
)
140 pte
= (unsigned long) __get_free_page(GFP_KERNEL
);
141 if (pmd_none(*pmd
)) {
143 clear_page((void *)pte
);
144 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(pte
)));
145 return (pte_t
*)pte
+ offset
;
147 set_pmd(pmd
, __pmd(_PAGE_TABLE
+ __pa(get_bad_pte_table())));
152 __handle_bad_pmd(pmd
);
155 return (pte_t
*) pmd_page(*pmd
) + offset
;
158 int do_check_pgt_cache(int low
, int high
)
161 if(pgtable_cache_size
> high
) {
164 free_pgd_slow(get_pgd_fast()), freed
++;
166 free_pmd_slow(get_pmd_fast()), freed
++;
168 free_pte_slow(get_pte_fast()), freed
++;
169 } while(pgtable_cache_size
> low
);
175 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
176 * physical space so we can cache the place of the first one and move
177 * around without checking the pgd every time.
184 #define kmap_get_fixmap_pte(vaddr) \
185 pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
187 void __init
kmap_init(void)
189 unsigned long kmap_vstart
;
191 /* cache the first kmap pte */
192 kmap_vstart
= __fix_to_virt(FIX_KMAP_BEGIN
);
193 kmap_pte
= kmap_get_fixmap_pte(kmap_vstart
);
195 kmap_prot
= PAGE_KERNEL
;
197 #endif /* CONFIG_HIGHMEM */
201 int i
,free
= 0, total
= 0, reserved
= 0;
202 int shared
= 0, cached
= 0;
205 printk("Mem-info:\n");
207 printk("Free swap: %6dkB\n",nr_swap_pages
<<(PAGE_SHIFT
-10));
211 if (PageHighMem(mem_map
+i
))
213 if (PageReserved(mem_map
+i
))
215 else if (PageSwapCache(mem_map
+i
))
217 else if (!page_count(mem_map
+i
))
220 shared
+= page_count(mem_map
+i
) - 1;
222 printk("%d pages of RAM\n", total
);
223 printk("%d pages of HIGHMEM\n",highmem
);
224 printk("%d reserved pages\n",reserved
);
225 printk("%d pages shared\n",shared
);
226 printk("%d pages swap cached\n",cached
);
227 printk("%ld pages in page table cache\n",pgtable_cache_size
);
231 /* References to section boundaries */
233 extern char _text
, _etext
, _edata
, __bss_start
, _end
;
234 extern char __init_begin
, __init_end
;
236 static inline void set_pte_phys (unsigned long vaddr
,
237 unsigned long phys
, pgprot_t flags
)
244 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
245 pmd
= pmd_offset(pgd
, vaddr
);
246 pte
= pte_offset(pmd
, vaddr
);
247 pgprot_val(prot
) = pgprot_val(PAGE_KERNEL
) | pgprot_val(flags
);
248 set_pte(pte
, mk_pte_phys(phys
, prot
));
251 * It's enough to flush this one mapping.
252 * (PGE mappings get flushed as well)
254 __flush_tlb_one(vaddr
);
257 void __set_fixmap (enum fixed_addresses idx
, unsigned long phys
, pgprot_t flags
)
259 unsigned long address
= __fix_to_virt(idx
);
261 if (idx
>= __end_of_fixed_addresses
) {
262 printk("Invalid __set_fixmap\n");
265 set_pte_phys(address
, phys
, flags
);
268 static void __init
fixrange_init (unsigned long start
, unsigned long end
, pgd_t
*pgd_base
)
275 i
= __pgd_offset(start
);
276 j
= __pmd_offset(start
);
279 for ( ; (i
< PTRS_PER_PGD
) && (start
!= end
); pgd
++, i
++) {
281 if (pgd_none(*pgd
)) {
282 pmd
= (pmd_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
283 set_pgd(pgd
, __pgd(__pa(pmd
) + 0x1));
284 if (pmd
!= pmd_offset(pgd
, start
))
287 pmd
= pmd_offset(pgd
, start
);
291 for (; (j
< PTRS_PER_PMD
) && start
; pmd
++, j
++) {
292 if (pmd_none(*pmd
)) {
293 pte
= (pte_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
294 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
295 if (pte
!= pte_offset(pmd
, 0))
304 static void __init
pagetable_init(void)
306 pgd_t
*pgd
, *pgd_base
;
310 unsigned long vaddr
, end
;
312 end
= (unsigned long)__va(max_low_pfn
*PAGE_SIZE
) - 1;
314 i
= __pgd_offset(PAGE_OFFSET
);
315 pgd_base
= swapper_pg_dir
;
318 for (; i
< PTRS_PER_PGD
; pgd
++, i
++) {
319 vaddr
= i
*PGDIR_SIZE
;
323 pmd
= (pmd_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
324 set_pgd(pgd
, __pgd(__pa(pmd
) + 0x1));
328 if (pmd
!= pmd_offset(pgd
, 0))
330 for (j
= 0; j
< PTRS_PER_PMD
; pmd
++, j
++) {
331 vaddr
= i
*PGDIR_SIZE
+ j
*PMD_SIZE
;
337 set_in_cr4(X86_CR4_PSE
);
338 boot_cpu_data
.wp_works_ok
= 1;
339 __pe
= _KERNPG_TABLE
+ _PAGE_PSE
+ __pa(vaddr
);
340 /* Make it "global" too if supported */
342 set_in_cr4(X86_CR4_PGE
);
343 __pe
+= _PAGE_GLOBAL
;
345 set_pmd(pmd
, __pmd(__pe
));
349 pte
= (pte_t
*) alloc_bootmem_low_pages(PAGE_SIZE
);
350 set_pmd(pmd
, __pmd(_KERNPG_TABLE
+ __pa(pte
)));
352 if (pte
!= pte_offset(pmd
, 0))
355 for (k
= 0; k
< PTRS_PER_PTE
; pte
++, k
++) {
356 vaddr
= i
*PGDIR_SIZE
+ j
*PMD_SIZE
+ k
*PAGE_SIZE
;
359 *pte
= mk_pte_phys(__pa(vaddr
), PAGE_KERNEL
);
365 * Fixed mappings, only the page table structure has to be
366 * created - mappings will be set by set_fixmap():
368 vaddr
= __fix_to_virt(__end_of_fixed_addresses
- 1) & PMD_MASK
;
369 fixrange_init(vaddr
, 0, pgd_base
);
376 fixrange_init(vaddr
, vaddr
+ PAGE_SIZE
*LAST_PKMAP
, pgd_base
);
378 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
379 pmd
= pmd_offset(pgd
, vaddr
);
380 pte
= pte_offset(pmd
, vaddr
);
381 pkmap_page_table
= pte
;
386 * Add low memory identity-mappings - SMP needs it when
387 * starting up on an AP from real-mode. In the non-PAE
388 * case we already have these mappings through head.S.
389 * All user-space mappings are explicitly cleared after
392 pgd_base
[0] = pgd_base
[USER_PTRS_PER_PGD
];
396 void __init
zap_low_mappings (void)
400 * Zap initial low-memory mappings.
402 * Note that "pgd_clear()" doesn't do it for
403 * us in this case, because pgd_clear() is a
404 * no-op in the 2-level case (pmd_clear() is
405 * the thing that clears the page-tables in
408 for (i
= 0; i
< USER_PTRS_PER_PGD
; i
++)
410 pgd_clear(swapper_pg_dir
+i
);
412 set_pgd(swapper_pg_dir
+i
, __pgd(0));
418 * paging_init() sets up the page tables - note that the first 4MB are
419 * already mapped by head.S.
421 * This routines also unmaps the page at virtual kernel address 0, so
422 * that we can trap those pesky NULL-reference errors in the kernel.
424 void __init
paging_init(void)
428 __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir
)));
432 * We will bail out later - printk doesnt work right now so
433 * the user would just see a hanging kernel.
436 set_in_cr4(X86_CR4_PAE
);
441 #ifdef CONFIG_HIGHMEM
445 unsigned long zones_size
[MAX_NR_ZONES
] = {0, 0, 0};
446 unsigned int max_dma
, high
, low
;
448 max_dma
= virt_to_phys((char *)MAX_DMA_ADDRESS
) >> PAGE_SHIFT
;
453 zones_size
[ZONE_DMA
] = low
;
455 zones_size
[ZONE_DMA
] = max_dma
;
456 zones_size
[ZONE_NORMAL
] = low
- max_dma
;
457 #ifdef CONFIG_HIGHMEM
458 zones_size
[ZONE_HIGHMEM
] = high
- low
;
461 free_area_init(zones_size
);
467 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
468 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
469 * before and after the test are here to work-around some nasty CPU bugs.
472 void __init
test_wp_bit(void)
475 * Ok, all PAE-capable CPUs are definitely handling the WP bit right.
477 const unsigned long vaddr
= PAGE_OFFSET
;
483 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
485 pgd
= swapper_pg_dir
+ __pgd_offset(vaddr
);
486 pmd
= pmd_offset(pgd
, vaddr
);
487 pte
= pte_offset(pmd
, vaddr
);
489 *pte
= mk_pte_phys(0, PAGE_READONLY
);
492 __asm__
__volatile__(
497 :"=m" (*(char *) vaddr
),
505 if (boot_cpu_data
.wp_works_ok
< 0) {
506 boot_cpu_data
.wp_works_ok
= 0;
508 #ifdef CONFIG_X86_WP_WORKS_OK
509 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
515 static inline int page_is_ram (unsigned long pagenr
)
519 for (i
= 0; i
< e820
.nr_map
; i
++) {
520 unsigned long addr
, end
;
522 if (e820
.map
[i
].type
!= E820_RAM
) /* not usable memory */
525 * !!!FIXME!!! Some BIOSen report areas as RAM that
526 * are not. Notably the 640->1Mb area. We need a sanity
529 addr
= (e820
.map
[i
].addr
+PAGE_SIZE
-1) >> PAGE_SHIFT
;
530 end
= (e820
.map
[i
].addr
+e820
.map
[i
].size
) >> PAGE_SHIFT
;
531 if ((pagenr
>= addr
) && (pagenr
< end
))
537 void __init
mem_init(void)
539 int codesize
, reservedpages
, datasize
, initsize
;
545 #ifdef CONFIG_HIGHMEM
546 highmem_start_page
= mem_map
+ highstart_pfn
;
547 /* cache the highmem_mapnr */
548 highmem_mapnr
= highstart_pfn
;
549 max_mapnr
= num_physpages
= highend_pfn
;
551 max_mapnr
= num_physpages
= max_low_pfn
;
553 high_memory
= (void *) __va(max_low_pfn
* PAGE_SIZE
);
555 /* clear the zero-page */
556 memset(empty_zero_page
, 0, PAGE_SIZE
);
558 /* this will put all low memory onto the freelists */
559 totalram_pages
+= free_all_bootmem();
562 for (tmp
= 0; tmp
< max_low_pfn
; tmp
++)
564 * Only count reserved RAM pages
566 if (page_is_ram(tmp
) && PageReserved(mem_map
+tmp
))
568 #ifdef CONFIG_HIGHMEM
569 for (tmp
= highstart_pfn
; tmp
< highend_pfn
; tmp
++) {
570 struct page
*page
= mem_map
+ tmp
;
572 if (!page_is_ram(tmp
)) {
573 SetPageReserved(page
);
576 ClearPageReserved(page
);
577 set_bit(PG_highmem
, &page
->flags
);
578 atomic_set(&page
->count
, 1);
582 totalram_pages
+= totalhigh_pages
;
584 codesize
= (unsigned long) &_etext
- (unsigned long) &_text
;
585 datasize
= (unsigned long) &_edata
- (unsigned long) &_etext
;
586 initsize
= (unsigned long) &__init_end
- (unsigned long) &__init_begin
;
588 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
589 (unsigned long) nr_free_pages() << (PAGE_SHIFT
-10),
590 max_mapnr
<< (PAGE_SHIFT
-10),
592 reservedpages
<< (PAGE_SHIFT
-10),
595 (unsigned long) (totalhigh_pages
<< (PAGE_SHIFT
-10))
600 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
602 if (boot_cpu_data
.wp_works_ok
< 0)
606 * Subtle. SMP is doing it's boot stuff late (because it has to
607 * fork idle threads) - but it also needs low mappings for the
608 * protected-mode entry to work. We zap these entries only after
609 * the WP-bit has been tested.
617 void free_initmem(void)
621 addr
= (unsigned long)(&__init_begin
);
622 for (; addr
< (unsigned long)(&__init_end
); addr
+= PAGE_SIZE
) {
623 ClearPageReserved(mem_map
+ MAP_NR(addr
));
624 set_page_count(mem_map
+MAP_NR(addr
), 1);
628 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end
- &__init_begin
) >> 10);
631 #ifdef CONFIG_BLK_DEV_INITRD
632 void free_initrd_mem(unsigned long start
, unsigned long end
)
635 printk ("Freeing initrd memory: %ldk freed\n", (end
- start
) >> 10);
636 for (; start
< end
; start
+= PAGE_SIZE
) {
637 ClearPageReserved(mem_map
+ MAP_NR(start
));
638 set_page_count(mem_map
+MAP_NR(start
), 1);
645 void si_meminfo(struct sysinfo
*val
)
647 val
->totalram
= totalram_pages
;
649 val
->freeram
= nr_free_pages();
650 val
->bufferram
= atomic_read(&buffermem_pages
);
651 val
->totalhigh
= totalhigh_pages
;
652 val
->freehigh
= nr_free_highpages();
653 val
->mem_unit
= PAGE_SIZE
;