2 * linux/arch/i386/mm/init.c
4 * Copyright (C) 1995 Linus Torvalds
7 #include <linux/config.h>
8 #include <linux/signal.h>
9 #include <linux/sched.h>
10 #include <linux/kernel.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/types.h>
14 #include <linux/ptrace.h>
15 #include <linux/mman.h>
17 #include <linux/swap.h>
18 #include <linux/smp.h>
19 #include <linux/init.h>
20 #ifdef CONFIG_BLK_DEV_INITRD
21 #include <linux/blk.h>
24 #include <asm/processor.h>
25 #include <asm/system.h>
26 #include <asm/uaccess.h>
27 #include <asm/pgtable.h>
29 #include <asm/fixmap.h>
31 extern void show_net_buffers(void);
32 extern unsigned long init_smp_mappings(unsigned long);
34 void __bad_pte_kernel(pmd_t
*pmd
)
36 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd
));
37 pmd_val(*pmd
) = _KERNPG_TABLE
+ __pa(BAD_PAGETABLE
);
40 void __bad_pte(pmd_t
*pmd
)
42 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd
));
43 pmd_val(*pmd
) = _PAGE_TABLE
+ __pa(BAD_PAGETABLE
);
46 pte_t
*get_pte_kernel_slow(pmd_t
*pmd
, unsigned long offset
)
50 pte
= (pte_t
*) __get_free_page(GFP_KERNEL
);
53 clear_page((unsigned long)pte
);
54 pmd_val(*pmd
) = _KERNPG_TABLE
+ __pa(pte
);
57 pmd_val(*pmd
) = _KERNPG_TABLE
+ __pa(BAD_PAGETABLE
);
60 free_page((unsigned long)pte
);
62 __bad_pte_kernel(pmd
);
65 return (pte_t
*) pmd_page(*pmd
) + offset
;
68 pte_t
*get_pte_slow(pmd_t
*pmd
, unsigned long offset
)
72 pte
= (unsigned long) __get_free_page(GFP_KERNEL
);
76 pmd_val(*pmd
) = _PAGE_TABLE
+ __pa(pte
);
77 return (pte_t
*)(pte
+ offset
);
79 pmd_val(*pmd
) = _PAGE_TABLE
+ __pa(BAD_PAGETABLE
);
87 return (pte_t
*) (pmd_page(*pmd
) + offset
);
90 int do_check_pgt_cache(int low
, int high
)
93 if(pgtable_cache_size
> high
) {
96 free_pgd_slow(get_pgd_fast()), freed
++;
98 free_pmd_slow(get_pmd_fast()), freed
++;
100 free_pte_slow(get_pte_fast()), freed
++;
101 } while(pgtable_cache_size
> low
);
107 * BAD_PAGE is the page that is used for page faults when linux
108 * is out-of-memory. Older versions of linux just did a
109 * do_exit(), but using this instead means there is less risk
110 * for a process dying in kernel mode, possibly leaving an inode
113 * BAD_PAGETABLE is the accompanying page-table: it is initialized
114 * to point to BAD_PAGE entries.
116 * ZERO_PAGE is a special page that is used for zero-initialized
119 pte_t
* __bad_pagetable(void)
121 extern char empty_bad_page_table
[PAGE_SIZE
];
124 __asm__
__volatile__("cld ; rep ; stosl"
125 : "=&D" (d0
), "=&c" (d1
)
126 : "a" (pte_val(BAD_PAGE
)),
127 "0" ((long) empty_bad_page_table
),
130 return (pte_t
*) empty_bad_page_table
;
133 pte_t
__bad_page(void)
135 extern char empty_bad_page
[PAGE_SIZE
];
138 __asm__
__volatile__("cld ; rep ; stosl"
139 : "=&D" (d0
), "=&c" (d1
)
141 "0" ((long) empty_bad_page
),
144 return pte_mkdirty(mk_pte((unsigned long) empty_bad_page
, PAGE_SHARED
));
149 int i
,free
= 0,total
= 0,reserved
= 0;
150 int shared
= 0, cached
= 0;
152 printk("Mem-info:\n");
154 printk("Free swap: %6dkB\n",nr_swap_pages
<<(PAGE_SHIFT
-10));
158 if (PageReserved(mem_map
+i
))
160 else if (PageSwapCache(mem_map
+i
))
162 else if (!atomic_read(&mem_map
[i
].count
))
165 shared
+= atomic_read(&mem_map
[i
].count
) - 1;
167 printk("%d pages of RAM\n",total
);
168 printk("%d reserved pages\n",reserved
);
169 printk("%d pages shared\n",shared
);
170 printk("%d pages swap cached\n",cached
);
171 printk("%ld pages in page table cache\n",pgtable_cache_size
);
178 extern unsigned long free_area_init(unsigned long, unsigned long);
180 /* References to section boundaries */
182 extern char _text
, _etext
, _edata
, __bss_start
, _end
;
183 extern char __init_begin
, __init_end
;
185 #define X86_CR4_VME 0x0001 /* enable vm86 extensions */
186 #define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
187 #define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
188 #define X86_CR4_DE 0x0008 /* enable debugging extensions */
189 #define X86_CR4_PSE 0x0010 /* enable page size extensions */
190 #define X86_CR4_PAE 0x0020 /* enable physical address extensions */
191 #define X86_CR4_MCE 0x0040 /* Machine check enable */
192 #define X86_CR4_PGE 0x0080 /* enable global pages */
193 #define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
196 * Save the cr4 feature set we're using (ie
197 * Pentium 4MB enable and PPro Global page
198 * enable), so that any CPU's that boot up
199 * after us can get the correct flags.
201 unsigned long mmu_cr4_features __initdata
= 0;
203 static inline void set_in_cr4(unsigned long mask
)
205 mmu_cr4_features
|= mask
;
206 __asm__("movl %%cr4,%%eax\n\t"
214 * allocate page table(s) for compile-time fixed mappings
216 static unsigned long __init
fixmap_init(unsigned long start_mem
)
220 unsigned long address
;
222 start_mem
= PAGE_ALIGN(start_mem
);
224 for (idx
=1; idx
<= __end_of_fixed_addresses
; idx
+= PTRS_PER_PTE
)
226 address
= __fix_to_virt(__end_of_fixed_addresses
-idx
);
227 pg_dir
= swapper_pg_dir
+ (address
>> PGDIR_SHIFT
);
228 memset((void *)start_mem
, 0, PAGE_SIZE
);
229 pgd_val(*pg_dir
) = _PAGE_TABLE
| __pa(start_mem
);
230 start_mem
+= PAGE_SIZE
;
236 static void set_pte_phys (unsigned long vaddr
, unsigned long phys
)
241 pte
= pte_offset(pmd_offset(pgd_offset_k(vaddr
), vaddr
), vaddr
);
243 if (boot_cpu_data
.x86_capability
& X86_FEATURE_PGE
)
244 pgprot_val(prot
) |= _PAGE_GLOBAL
;
245 set_pte(pte
, mk_pte_phys(phys
, prot
));
250 void set_fixmap (enum fixed_addresses idx
, unsigned long phys
)
252 unsigned long address
= __fix_to_virt(idx
);
254 if (idx
>= __end_of_fixed_addresses
) {
255 printk("Invalid set_fixmap\n");
258 set_pte_phys (address
,phys
);
262 * paging_init() sets up the page tables - note that the first 4MB are
263 * already mapped by head.S.
265 * This routines also unmaps the page at virtual kernel address 0, so
266 * that we can trap those pesky NULL-reference errors in the kernel.
268 __initfunc(unsigned long paging_init(unsigned long start_mem
, unsigned long end_mem
))
273 unsigned long address
;
276 * Physical page 0 is special; it's not touched by Linux since BIOS
277 * and SMM (for laptops with [34]86/SL chips) may need it. It is read
278 * and write protected to detect null pointer references in the
280 * It may also hold the MP configuration table when we are booting SMP.
282 start_mem
= PAGE_ALIGN(start_mem
);
283 address
= PAGE_OFFSET
;
284 pg_dir
= swapper_pg_dir
;
285 /* unmap the original low memory mappings */
286 pgd_val(pg_dir
[0]) = 0;
288 /* Map whole memory from PAGE_OFFSET */
289 pg_dir
+= USER_PGD_PTRS
;
290 while (address
< end_mem
) {
292 * If we're running on a Pentium CPU, we can use the 4MB
295 * The page tables we create span up to the next 4MB
296 * virtual memory boundary, but that's OK as we won't
297 * use that memory anyway.
299 if (boot_cpu_data
.x86_capability
& X86_FEATURE_PSE
) {
302 set_in_cr4(X86_CR4_PSE
);
303 boot_cpu_data
.wp_works_ok
= 1;
304 __pe
= _KERNPG_TABLE
+ _PAGE_4M
+ __pa(address
);
305 /* Make it "global" too if supported */
306 if (boot_cpu_data
.x86_capability
& X86_FEATURE_PGE
) {
307 set_in_cr4(X86_CR4_PGE
);
308 __pe
+= _PAGE_GLOBAL
;
310 pgd_val(*pg_dir
) = __pe
;
312 address
+= 4*1024*1024;
317 * We're on a [34]86, use normal page tables.
318 * pg_table is physical at this point
320 pg_table
= (pte_t
*) (PAGE_MASK
& pgd_val(*pg_dir
));
322 pg_table
= (pte_t
*) __pa(start_mem
);
323 start_mem
+= PAGE_SIZE
;
326 pgd_val(*pg_dir
) = _PAGE_TABLE
| (unsigned long) pg_table
;
329 /* now change pg_table to kernel virtual addresses */
330 pg_table
= (pte_t
*) __va(pg_table
);
331 for (tmp
= 0 ; tmp
< PTRS_PER_PTE
; tmp
++,pg_table
++) {
332 pte_t pte
= mk_pte(address
, PAGE_KERNEL
);
333 if (address
>= end_mem
)
335 set_pte(pg_table
, pte
);
336 address
+= PAGE_SIZE
;
339 start_mem
= fixmap_init(start_mem
);
341 start_mem
= init_smp_mappings(start_mem
);
345 return free_area_init(start_mem
, end_mem
);
349 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
350 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
351 * before and after the test are here to work-around some nasty CPU bugs.
354 __initfunc(void test_wp_bit(void))
356 unsigned char tmp_reg
;
357 unsigned long old
= pg0
[0];
359 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
360 pg0
[0] = pte_val(mk_pte(PAGE_OFFSET
, PAGE_READONLY
));
362 current
->mm
->mmap
->vm_start
+= PAGE_SIZE
;
363 __asm__
__volatile__(
368 :"=m" (*(char *) __va(0)),
374 current
->mm
->mmap
->vm_start
-= PAGE_SIZE
;
375 if (boot_cpu_data
.wp_works_ok
< 0) {
376 boot_cpu_data
.wp_works_ok
= 0;
378 #ifdef CONFIG_X86_WP_WORKS_OK
379 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
385 __initfunc(void mem_init(unsigned long start_mem
, unsigned long end_mem
))
387 unsigned long start_low_mem
= PAGE_SIZE
;
389 int reservedpages
= 0;
394 end_mem
&= PAGE_MASK
;
395 high_memory
= (void *) end_mem
;
396 max_mapnr
= num_physpages
= MAP_NR(end_mem
);
398 /* clear the zero-page */
399 memset(empty_zero_page
, 0, PAGE_SIZE
);
401 /* mark usable pages in the mem_map[] */
402 start_low_mem
= PAGE_ALIGN(start_low_mem
)+PAGE_OFFSET
;
406 * But first pinch a few for the stack/trampoline stuff
407 * FIXME: Don't need the extra page at 4K, but need to fix
408 * trampoline before removing it. (see the GDT stuff)
411 start_low_mem
+= PAGE_SIZE
; /* 32bit startup code */
412 start_low_mem
= smp_alloc_memory(start_low_mem
); /* AP processor stacks */
414 start_mem
= PAGE_ALIGN(start_mem
);
417 * IBM messed up *AGAIN* in their thinkpad: 0xA0000 -> 0x9F000.
418 * They seem to have done something stupid with the floppy
419 * controller as well..
421 while (start_low_mem
< 0x9f000+PAGE_OFFSET
) {
422 clear_bit(PG_reserved
, &mem_map
[MAP_NR(start_low_mem
)].flags
);
423 start_low_mem
+= PAGE_SIZE
;
426 while (start_mem
< end_mem
) {
427 clear_bit(PG_reserved
, &mem_map
[MAP_NR(start_mem
)].flags
);
428 start_mem
+= PAGE_SIZE
;
430 for (tmp
= PAGE_OFFSET
; tmp
< end_mem
; tmp
+= PAGE_SIZE
) {
431 if (tmp
>= MAX_DMA_ADDRESS
)
432 clear_bit(PG_DMA
, &mem_map
[MAP_NR(tmp
)].flags
);
433 if (PageReserved(mem_map
+MAP_NR(tmp
))) {
434 if (tmp
>= (unsigned long) &_text
&& tmp
< (unsigned long) &_edata
) {
435 if (tmp
< (unsigned long) &_etext
)
439 } else if (tmp
>= (unsigned long) &__init_begin
440 && tmp
< (unsigned long) &__init_end
)
442 else if (tmp
>= (unsigned long) &__bss_start
443 && tmp
< (unsigned long) start_mem
)
449 atomic_set(&mem_map
[MAP_NR(tmp
)].count
, 1);
450 #ifdef CONFIG_BLK_DEV_INITRD
451 if (!initrd_start
|| (tmp
< initrd_start
|| tmp
>=
456 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
457 (unsigned long) nr_free_pages
<< (PAGE_SHIFT
-10),
458 max_mapnr
<< (PAGE_SHIFT
-10),
459 codepages
<< (PAGE_SHIFT
-10),
460 reservedpages
<< (PAGE_SHIFT
-10),
461 datapages
<< (PAGE_SHIFT
-10),
462 initpages
<< (PAGE_SHIFT
-10));
464 if (boot_cpu_data
.wp_works_ok
< 0)
468 void free_initmem(void)
472 addr
= (unsigned long)(&__init_begin
);
473 for (; addr
< (unsigned long)(&__init_end
); addr
+= PAGE_SIZE
) {
474 mem_map
[MAP_NR(addr
)].flags
&= ~(1 << PG_reserved
);
475 atomic_set(&mem_map
[MAP_NR(addr
)].count
, 1);
478 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end
- &__init_begin
) >> 10);
481 void si_meminfo(struct sysinfo
*val
)
488 val
->freeram
= nr_free_pages
<< PAGE_SHIFT
;
489 val
->bufferram
= buffermem
;
491 if (PageReserved(mem_map
+i
))
494 if (!atomic_read(&mem_map
[i
].count
))
496 val
->sharedram
+= atomic_read(&mem_map
[i
].count
) - 1;
498 val
->totalram
<<= PAGE_SHIFT
;
499 val
->sharedram
<<= PAGE_SHIFT
;