2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/config.h>
27 #include <linux/sched.h>
29 #include <linux/mmzone.h>
30 #include <linux/tty.h>
31 #include <linux/ioport.h>
32 #include <linux/acpi.h>
33 #include <linux/apm_bios.h>
34 #include <linux/initrd.h>
35 #include <linux/bootmem.h>
36 #include <linux/seq_file.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kexec.h>
47 #include <linux/crash_dump.h>
49 #include <video/edid.h>
53 #include <asm/mpspec.h>
54 #include <asm/setup.h>
55 #include <asm/arch_hooks.h>
56 #include <asm/sections.h>
57 #include <asm/io_apic.h>
60 #include "setup_arch_pre.h"
61 #include <bios_ebda.h>
63 /* Forward Declaration. */
64 void __init
find_max_pfn(void);
66 /* This value is set up by the early boot code to point to the value
67 immediately after the boot time page tables. It contains a *physical*
68 address, and must not be in the .bss segment! */
69 unsigned long init_pg_tables_end __initdata
= ~0UL;
71 int disable_pse __devinitdata
= 0;
79 EXPORT_SYMBOL(efi_enabled
);
82 /* cpu data as detected by the assembly code in head.S */
83 struct cpuinfo_x86 new_cpu_data __initdata
= { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
84 /* common cpu data for all cpus */
85 struct cpuinfo_x86 boot_cpu_data __read_mostly
= { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
86 EXPORT_SYMBOL(boot_cpu_data
);
88 unsigned long mmu_cr4_features
;
91 int acpi_disabled
= 0;
93 int acpi_disabled
= 1;
95 EXPORT_SYMBOL(acpi_disabled
);
98 int __initdata acpi_force
= 0;
99 extern acpi_interrupt_flags acpi_sci_flags
;
102 /* for MCA, but anyone else can use it if they want */
103 unsigned int machine_id
;
105 EXPORT_SYMBOL(machine_id
);
107 unsigned int machine_submodel_id
;
108 unsigned int BIOS_revision
;
109 unsigned int mca_pentium_flag
;
111 /* For PCI or other memory-mapped resources */
112 unsigned long pci_mem_start
= 0x10000000;
114 EXPORT_SYMBOL(pci_mem_start
);
117 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
120 /* user-defined highmem size */
121 static unsigned int highmem_pages
= -1;
126 struct drive_info_struct
{ char dummy
[32]; } drive_info
;
127 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
128 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
129 EXPORT_SYMBOL(drive_info
);
131 struct screen_info screen_info
;
133 EXPORT_SYMBOL(screen_info
);
135 struct apm_info apm_info
;
136 EXPORT_SYMBOL(apm_info
);
137 struct sys_desc_table_struct
{
138 unsigned short length
;
139 unsigned char table
[0];
141 struct edid_info edid_info
;
142 EXPORT_SYMBOL_GPL(edid_info
);
143 struct ist_info ist_info
;
144 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
145 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
146 EXPORT_SYMBOL(ist_info
);
150 extern void early_cpu_init(void);
151 extern void dmi_scan_machine(void);
152 extern void generic_apic_probe(char *);
153 extern int root_mountflags
;
155 unsigned long saved_videomode
;
157 #define RAMDISK_IMAGE_START_MASK 0x07FF
158 #define RAMDISK_PROMPT_FLAG 0x8000
159 #define RAMDISK_LOAD_FLAG 0x4000
161 static char command_line
[COMMAND_LINE_SIZE
];
163 unsigned char __initdata boot_params
[PARAM_SIZE
];
165 static struct resource data_resource
= {
166 .name
= "Kernel data",
169 .flags
= IORESOURCE_BUSY
| IORESOURCE_MEM
172 static struct resource code_resource
= {
173 .name
= "Kernel code",
176 .flags
= IORESOURCE_BUSY
| IORESOURCE_MEM
179 static struct resource system_rom_resource
= {
180 .name
= "System ROM",
183 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
186 static struct resource extension_rom_resource
= {
187 .name
= "Extension ROM",
190 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
193 static struct resource adapter_rom_resources
[] = { {
194 .name
= "Adapter ROM",
197 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
199 .name
= "Adapter ROM",
202 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
204 .name
= "Adapter ROM",
207 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
209 .name
= "Adapter ROM",
212 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
214 .name
= "Adapter ROM",
217 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
219 .name
= "Adapter ROM",
222 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
225 #define ADAPTER_ROM_RESOURCES \
226 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
228 static struct resource video_rom_resource
= {
232 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
235 static struct resource video_ram_resource
= {
236 .name
= "Video RAM area",
239 .flags
= IORESOURCE_BUSY
| IORESOURCE_MEM
242 static struct resource standard_io_resources
[] = { {
246 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
251 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
256 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
261 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
266 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
268 .name
= "dma page reg",
271 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
276 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
281 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
286 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
289 #define STANDARD_IO_RESOURCES \
290 (sizeof standard_io_resources / sizeof standard_io_resources[0])
292 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
294 static int __init
romchecksum(unsigned char *rom
, unsigned long length
)
296 unsigned char *p
, sum
= 0;
298 for (p
= rom
; p
< rom
+ length
; p
++)
303 static void __init
probe_roms(void)
305 unsigned long start
, length
, upper
;
310 upper
= adapter_rom_resources
[0].start
;
311 for (start
= video_rom_resource
.start
; start
< upper
; start
+= 2048) {
312 rom
= isa_bus_to_virt(start
);
313 if (!romsignature(rom
))
316 video_rom_resource
.start
= start
;
318 /* 0 < length <= 0x7f * 512, historically */
319 length
= rom
[2] * 512;
321 /* if checksum okay, trust length byte */
322 if (length
&& romchecksum(rom
, length
))
323 video_rom_resource
.end
= start
+ length
- 1;
325 request_resource(&iomem_resource
, &video_rom_resource
);
329 start
= (video_rom_resource
.end
+ 1 + 2047) & ~2047UL;
334 request_resource(&iomem_resource
, &system_rom_resource
);
335 upper
= system_rom_resource
.start
;
337 /* check for extension rom (ignore length byte!) */
338 rom
= isa_bus_to_virt(extension_rom_resource
.start
);
339 if (romsignature(rom
)) {
340 length
= extension_rom_resource
.end
- extension_rom_resource
.start
+ 1;
341 if (romchecksum(rom
, length
)) {
342 request_resource(&iomem_resource
, &extension_rom_resource
);
343 upper
= extension_rom_resource
.start
;
347 /* check for adapter roms on 2k boundaries */
348 for (i
= 0; i
< ADAPTER_ROM_RESOURCES
&& start
< upper
; start
+= 2048) {
349 rom
= isa_bus_to_virt(start
);
350 if (!romsignature(rom
))
353 /* 0 < length <= 0x7f * 512, historically */
354 length
= rom
[2] * 512;
356 /* but accept any length that fits if checksum okay */
357 if (!length
|| start
+ length
> upper
|| !romchecksum(rom
, length
))
360 adapter_rom_resources
[i
].start
= start
;
361 adapter_rom_resources
[i
].end
= start
+ length
- 1;
362 request_resource(&iomem_resource
, &adapter_rom_resources
[i
]);
364 start
= adapter_rom_resources
[i
++].end
& ~2047UL;
368 static void __init
limit_regions(unsigned long long size
)
370 unsigned long long current_addr
= 0;
374 efi_memory_desc_t
*md
;
377 for (p
= memmap
.map
, i
= 0; p
< memmap
.map_end
;
378 p
+= memmap
.desc_size
, i
++) {
380 current_addr
= md
->phys_addr
+ (md
->num_pages
<< 12);
381 if (md
->type
== EFI_CONVENTIONAL_MEMORY
) {
382 if (current_addr
>= size
) {
384 (((current_addr
-size
) + PAGE_SIZE
-1) >> PAGE_SHIFT
);
385 memmap
.nr_map
= i
+ 1;
391 for (i
= 0; i
< e820
.nr_map
; i
++) {
392 if (e820
.map
[i
].type
== E820_RAM
) {
393 current_addr
= e820
.map
[i
].addr
+ e820
.map
[i
].size
;
394 if (current_addr
>= size
) {
395 e820
.map
[i
].size
-= current_addr
-size
;
403 static void __init
add_memory_region(unsigned long long start
,
404 unsigned long long size
, int type
)
412 printk(KERN_ERR
"Ooops! Too many entries in the memory map!\n");
416 e820
.map
[x
].addr
= start
;
417 e820
.map
[x
].size
= size
;
418 e820
.map
[x
].type
= type
;
421 } /* add_memory_region */
425 static void __init
print_memory_map(char *who
)
429 for (i
= 0; i
< e820
.nr_map
; i
++) {
430 printk(" %s: %016Lx - %016Lx ", who
,
432 e820
.map
[i
].addr
+ e820
.map
[i
].size
);
433 switch (e820
.map
[i
].type
) {
434 case E820_RAM
: printk("(usable)\n");
437 printk("(reserved)\n");
440 printk("(ACPI data)\n");
443 printk("(ACPI NVS)\n");
445 default: printk("type %lu\n", e820
.map
[i
].type
);
452 * Sanitize the BIOS e820 map.
454 * Some e820 responses include overlapping entries. The following
455 * replaces the original e820 map with a new one, removing overlaps.
458 struct change_member
{
459 struct e820entry
*pbios
; /* pointer to original bios entry */
460 unsigned long long addr
; /* address for this change point */
462 static struct change_member change_point_list
[2*E820MAX
] __initdata
;
463 static struct change_member
*change_point
[2*E820MAX
] __initdata
;
464 static struct e820entry
*overlap_list
[E820MAX
] __initdata
;
465 static struct e820entry new_bios
[E820MAX
] __initdata
;
467 static int __init
sanitize_e820_map(struct e820entry
* biosmap
, char * pnr_map
)
469 struct change_member
*change_tmp
;
470 unsigned long current_type
, last_type
;
471 unsigned long long last_addr
;
472 int chgidx
, still_changing
;
475 int old_nr
, new_nr
, chg_nr
;
479 Visually we're performing the following (1,2,3,4 = memory types)...
481 Sample memory map (w/overlaps):
482 ____22__________________
483 ______________________4_
484 ____1111________________
485 _44_____________________
486 11111111________________
487 ____________________33__
488 ___________44___________
489 __________33333_________
490 ______________22________
491 ___________________2222_
492 _________111111111______
493 _____________________11_
494 _________________4______
496 Sanitized equivalent (no overlap):
497 1_______________________
498 _44_____________________
499 ___1____________________
500 ____22__________________
501 ______11________________
502 _________1______________
503 __________3_____________
504 ___________44___________
505 _____________33_________
506 _______________2________
507 ________________1_______
508 _________________4______
509 ___________________2____
510 ____________________33__
511 ______________________4_
514 /* if there's only one memory region, don't bother */
520 /* bail out if we find any unreasonable addresses in bios map */
521 for (i
=0; i
<old_nr
; i
++)
522 if (biosmap
[i
].addr
+ biosmap
[i
].size
< biosmap
[i
].addr
)
525 /* create pointers for initial change-point information (for sorting) */
526 for (i
=0; i
< 2*old_nr
; i
++)
527 change_point
[i
] = &change_point_list
[i
];
529 /* record all known change-points (starting and ending addresses),
530 omitting those that are for empty memory regions */
532 for (i
=0; i
< old_nr
; i
++) {
533 if (biosmap
[i
].size
!= 0) {
534 change_point
[chgidx
]->addr
= biosmap
[i
].addr
;
535 change_point
[chgidx
++]->pbios
= &biosmap
[i
];
536 change_point
[chgidx
]->addr
= biosmap
[i
].addr
+ biosmap
[i
].size
;
537 change_point
[chgidx
++]->pbios
= &biosmap
[i
];
540 chg_nr
= chgidx
; /* true number of change-points */
542 /* sort change-point list by memory addresses (low -> high) */
544 while (still_changing
) {
546 for (i
=1; i
< chg_nr
; i
++) {
547 /* if <current_addr> > <last_addr>, swap */
548 /* or, if current=<start_addr> & last=<end_addr>, swap */
549 if ((change_point
[i
]->addr
< change_point
[i
-1]->addr
) ||
550 ((change_point
[i
]->addr
== change_point
[i
-1]->addr
) &&
551 (change_point
[i
]->addr
== change_point
[i
]->pbios
->addr
) &&
552 (change_point
[i
-1]->addr
!= change_point
[i
-1]->pbios
->addr
))
555 change_tmp
= change_point
[i
];
556 change_point
[i
] = change_point
[i
-1];
557 change_point
[i
-1] = change_tmp
;
563 /* create a new bios memory map, removing overlaps */
564 overlap_entries
=0; /* number of entries in the overlap table */
565 new_bios_entry
=0; /* index for creating new bios map entries */
566 last_type
= 0; /* start with undefined memory type */
567 last_addr
= 0; /* start with 0 as last starting address */
568 /* loop through change-points, determining affect on the new bios map */
569 for (chgidx
=0; chgidx
< chg_nr
; chgidx
++)
571 /* keep track of all overlapping bios entries */
572 if (change_point
[chgidx
]->addr
== change_point
[chgidx
]->pbios
->addr
)
574 /* add map entry to overlap list (> 1 entry implies an overlap) */
575 overlap_list
[overlap_entries
++]=change_point
[chgidx
]->pbios
;
579 /* remove entry from list (order independent, so swap with last) */
580 for (i
=0; i
<overlap_entries
; i
++)
582 if (overlap_list
[i
] == change_point
[chgidx
]->pbios
)
583 overlap_list
[i
] = overlap_list
[overlap_entries
-1];
587 /* if there are overlapping entries, decide which "type" to use */
588 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
590 for (i
=0; i
<overlap_entries
; i
++)
591 if (overlap_list
[i
]->type
> current_type
)
592 current_type
= overlap_list
[i
]->type
;
593 /* continue building up new bios map based on this information */
594 if (current_type
!= last_type
) {
595 if (last_type
!= 0) {
596 new_bios
[new_bios_entry
].size
=
597 change_point
[chgidx
]->addr
- last_addr
;
598 /* move forward only if the new size was non-zero */
599 if (new_bios
[new_bios_entry
].size
!= 0)
600 if (++new_bios_entry
>= E820MAX
)
601 break; /* no more space left for new bios entries */
603 if (current_type
!= 0) {
604 new_bios
[new_bios_entry
].addr
= change_point
[chgidx
]->addr
;
605 new_bios
[new_bios_entry
].type
= current_type
;
606 last_addr
=change_point
[chgidx
]->addr
;
608 last_type
= current_type
;
611 new_nr
= new_bios_entry
; /* retain count for new bios entries */
613 /* copy new bios mapping into original location */
614 memcpy(biosmap
, new_bios
, new_nr
*sizeof(struct e820entry
));
621 * Copy the BIOS e820 map into a safe place.
623 * Sanity-check it while we're at it..
625 * If we're lucky and live on a modern system, the setup code
626 * will have given us a memory map that we can use to properly
627 * set up memory. If we aren't, we'll fake a memory map.
629 * We check to see that the memory map contains at least 2 elements
630 * before we'll use it, because the detection code in setup.S may
631 * not be perfect and most every PC known to man has two memory
632 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
633 * thinkpad 560x, for example, does not cooperate with the memory
636 static int __init
copy_e820_map(struct e820entry
* biosmap
, int nr_map
)
638 /* Only one memory region (or negative)? Ignore it */
643 unsigned long long start
= biosmap
->addr
;
644 unsigned long long size
= biosmap
->size
;
645 unsigned long long end
= start
+ size
;
646 unsigned long type
= biosmap
->type
;
648 /* Overflow in 64 bits? Ignore the memory map. */
653 * Some BIOSes claim RAM in the 640k - 1M region.
654 * Not right. Fix it up.
656 if (type
== E820_RAM
) {
657 if (start
< 0x100000ULL
&& end
> 0xA0000ULL
) {
658 if (start
< 0xA0000ULL
)
659 add_memory_region(start
, 0xA0000ULL
-start
, type
);
660 if (end
<= 0x100000ULL
)
666 add_memory_region(start
, size
, type
);
667 } while (biosmap
++,--nr_map
);
671 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
673 #ifdef CONFIG_EDD_MODULE
677 * copy_edd() - Copy the BIOS EDD information
678 * from boot_params into a safe place.
681 static inline void copy_edd(void)
683 memcpy(edd
.mbr_signature
, EDD_MBR_SIGNATURE
, sizeof(edd
.mbr_signature
));
684 memcpy(edd
.edd_info
, EDD_BUF
, sizeof(edd
.edd_info
));
685 edd
.mbr_signature_nr
= EDD_MBR_SIG_NR
;
686 edd
.edd_info_nr
= EDD_NR
;
689 static inline void copy_edd(void)
695 * Do NOT EVER look at the BIOS memory size location.
696 * It does not work on many machines.
698 #define LOWMEMSIZE() (0x9f000)
700 static void __init
parse_cmdline_early (char ** cmdline_p
)
702 char c
= ' ', *to
= command_line
, *from
= saved_command_line
;
706 /* Save unparsed command line copy for /proc/cmdline */
707 saved_command_line
[COMMAND_LINE_SIZE
-1] = '\0';
713 * "mem=nopentium" disables the 4MB page tables.
714 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
715 * to <mem>, overriding the bios size.
716 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
717 * <start> to <start>+<mem>, overriding the bios size.
719 * HPA tells me bootloaders need to parse mem=, so no new
720 * option should be mem= [also see Documentation/i386/boot.txt]
722 if (!memcmp(from
, "mem=", 4)) {
723 if (to
!= command_line
)
725 if (!memcmp(from
+4, "nopentium", 9)) {
727 clear_bit(X86_FEATURE_PSE
, boot_cpu_data
.x86_capability
);
730 /* If the user specifies memory size, we
731 * limit the BIOS-provided memory map to
732 * that size. exactmap can be used to specify
733 * the exact map. mem=number can be used to
734 * trim the existing memory map.
736 unsigned long long mem_size
;
738 mem_size
= memparse(from
+4, &from
);
739 limit_regions(mem_size
);
744 else if (!memcmp(from
, "memmap=", 7)) {
745 if (to
!= command_line
)
747 if (!memcmp(from
+7, "exactmap", 8)) {
748 #ifdef CONFIG_CRASH_DUMP
749 /* If we are doing a crash dump, we
750 * still need to know the real mem
751 * size before original memory map is
755 saved_max_pfn
= max_pfn
;
761 /* If the user specifies memory size, we
762 * limit the BIOS-provided memory map to
763 * that size. exactmap can be used to specify
764 * the exact map. mem=number can be used to
765 * trim the existing memory map.
767 unsigned long long start_at
, mem_size
;
769 mem_size
= memparse(from
+7, &from
);
771 start_at
= memparse(from
+1, &from
);
772 add_memory_region(start_at
, mem_size
, E820_RAM
);
773 } else if (*from
== '#') {
774 start_at
= memparse(from
+1, &from
);
775 add_memory_region(start_at
, mem_size
, E820_ACPI
);
776 } else if (*from
== '$') {
777 start_at
= memparse(from
+1, &from
);
778 add_memory_region(start_at
, mem_size
, E820_RESERVED
);
780 limit_regions(mem_size
);
786 else if (!memcmp(from
, "noexec=", 7))
787 noexec_setup(from
+ 7);
790 #ifdef CONFIG_X86_SMP
792 * If the BIOS enumerates physical processors before logical,
793 * maxcpus=N at enumeration-time can be used to disable HT.
795 else if (!memcmp(from
, "maxcpus=", 8)) {
796 extern unsigned int maxcpus
;
798 maxcpus
= simple_strtoul(from
+ 8, NULL
, 0);
803 /* "acpi=off" disables both ACPI table parsing and interpreter */
804 else if (!memcmp(from
, "acpi=off", 8)) {
808 /* acpi=force to over-ride black-list */
809 else if (!memcmp(from
, "acpi=force", 10)) {
815 /* acpi=strict disables out-of-spec workarounds */
816 else if (!memcmp(from
, "acpi=strict", 11)) {
820 /* Limit ACPI just to boot-time to enable HT */
821 else if (!memcmp(from
, "acpi=ht", 7)) {
827 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
828 else if (!memcmp(from
, "pci=noacpi", 10)) {
831 /* "acpi=noirq" disables ACPI interrupt routing */
832 else if (!memcmp(from
, "acpi=noirq", 10)) {
836 else if (!memcmp(from
, "acpi_sci=edge", 13))
837 acpi_sci_flags
.trigger
= 1;
839 else if (!memcmp(from
, "acpi_sci=level", 14))
840 acpi_sci_flags
.trigger
= 3;
842 else if (!memcmp(from
, "acpi_sci=high", 13))
843 acpi_sci_flags
.polarity
= 1;
845 else if (!memcmp(from
, "acpi_sci=low", 12))
846 acpi_sci_flags
.polarity
= 3;
848 #ifdef CONFIG_X86_IO_APIC
849 else if (!memcmp(from
, "acpi_skip_timer_override", 24))
850 acpi_skip_timer_override
= 1;
853 #ifdef CONFIG_X86_LOCAL_APIC
854 if (!memcmp(from
, "disable_timer_pin_1", 19))
855 disable_timer_pin_1
= 1;
856 if (!memcmp(from
, "enable_timer_pin_1", 18))
857 disable_timer_pin_1
= -1;
859 /* disable IO-APIC */
860 else if (!memcmp(from
, "noapic", 6))
861 disable_ioapic_setup();
862 #endif /* CONFIG_X86_LOCAL_APIC */
863 #endif /* CONFIG_ACPI */
865 #ifdef CONFIG_X86_LOCAL_APIC
866 /* enable local APIC */
867 else if (!memcmp(from
, "lapic", 5))
870 /* disable local APIC */
871 else if (!memcmp(from
, "nolapic", 6))
873 #endif /* CONFIG_X86_LOCAL_APIC */
876 /* crashkernel=size@addr specifies the location to reserve for
877 * a crash kernel. By reserving this memory we guarantee
878 * that linux never set's it up as a DMA target.
879 * Useful for holding code to do something appropriate
880 * after a kernel panic.
882 else if (!memcmp(from
, "crashkernel=", 12)) {
883 unsigned long size
, base
;
884 size
= memparse(from
+12, &from
);
886 base
= memparse(from
+1, &from
);
887 /* FIXME: Do I want a sanity check
888 * to validate the memory range?
890 crashk_res
.start
= base
;
891 crashk_res
.end
= base
+ size
- 1;
895 #ifdef CONFIG_CRASH_DUMP
896 /* elfcorehdr= specifies the location of elf core header
897 * stored by the crashed kernel.
899 else if (!memcmp(from
, "elfcorehdr=", 11))
900 elfcorehdr_addr
= memparse(from
+11, &from
);
904 * highmem=size forces highmem to be exactly 'size' bytes.
905 * This works even on boxes that have no highmem otherwise.
906 * This also works to reduce highmem size on bigger boxes.
908 else if (!memcmp(from
, "highmem=", 8))
909 highmem_pages
= memparse(from
+8, &from
) >> PAGE_SHIFT
;
912 * vmalloc=size forces the vmalloc area to be exactly 'size'
913 * bytes. This can be used to increase (or decrease) the
914 * vmalloc area - the default is 128m.
916 else if (!memcmp(from
, "vmalloc=", 8))
917 __VMALLOC_RESERVE
= memparse(from
+8, &from
);
923 if (COMMAND_LINE_SIZE
<= ++len
)
928 *cmdline_p
= command_line
;
930 printk(KERN_INFO
"user-defined physical RAM map:\n");
931 print_memory_map("user");
936 * Callback for efi_memory_walk.
939 efi_find_max_pfn(unsigned long start
, unsigned long end
, void *arg
)
941 unsigned long *max_pfn
= arg
, pfn
;
944 pfn
= PFN_UP(end
-1);
953 * Find the highest page frame number we have available
955 void __init
find_max_pfn(void)
961 efi_memmap_walk(efi_find_max_pfn
, &max_pfn
);
965 for (i
= 0; i
< e820
.nr_map
; i
++) {
966 unsigned long start
, end
;
968 if (e820
.map
[i
].type
!= E820_RAM
)
970 start
= PFN_UP(e820
.map
[i
].addr
);
971 end
= PFN_DOWN(e820
.map
[i
].addr
+ e820
.map
[i
].size
);
980 * Determine low and high memory ranges:
982 unsigned long __init
find_max_low_pfn(void)
984 unsigned long max_low_pfn
;
986 max_low_pfn
= max_pfn
;
987 if (max_low_pfn
> MAXMEM_PFN
) {
988 if (highmem_pages
== -1)
989 highmem_pages
= max_pfn
- MAXMEM_PFN
;
990 if (highmem_pages
+ MAXMEM_PFN
< max_pfn
)
991 max_pfn
= MAXMEM_PFN
+ highmem_pages
;
992 if (highmem_pages
+ MAXMEM_PFN
> max_pfn
) {
993 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn
- MAXMEM_PFN
), pages_to_mb(highmem_pages
));
996 max_low_pfn
= MAXMEM_PFN
;
997 #ifndef CONFIG_HIGHMEM
998 /* Maximum memory usable is what is directly addressable */
999 printk(KERN_WARNING
"Warning only %ldMB will be used.\n",
1001 if (max_pfn
> MAX_NONPAE_PFN
)
1002 printk(KERN_WARNING
"Use a PAE enabled kernel.\n");
1004 printk(KERN_WARNING
"Use a HIGHMEM enabled kernel.\n");
1005 max_pfn
= MAXMEM_PFN
;
1006 #else /* !CONFIG_HIGHMEM */
1007 #ifndef CONFIG_X86_PAE
1008 if (max_pfn
> MAX_NONPAE_PFN
) {
1009 max_pfn
= MAX_NONPAE_PFN
;
1010 printk(KERN_WARNING
"Warning only 4GB will be used.\n");
1011 printk(KERN_WARNING
"Use a PAE enabled kernel.\n");
1013 #endif /* !CONFIG_X86_PAE */
1014 #endif /* !CONFIG_HIGHMEM */
1016 if (highmem_pages
== -1)
1018 #ifdef CONFIG_HIGHMEM
1019 if (highmem_pages
>= max_pfn
) {
1020 printk(KERN_ERR
"highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages
), pages_to_mb(max_pfn
));
1023 if (highmem_pages
) {
1024 if (max_low_pfn
-highmem_pages
< 64*1024*1024/PAGE_SIZE
){
1025 printk(KERN_ERR
"highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages
));
1028 max_low_pfn
-= highmem_pages
;
1032 printk(KERN_ERR
"ignoring highmem size on non-highmem kernel!\n");
1039 * Free all available memory for boot time allocation. Used
1040 * as a callback function by efi_memory_walk()
1044 free_available_memory(unsigned long start
, unsigned long end
, void *arg
)
1046 /* check max_low_pfn */
1047 if (start
>= ((max_low_pfn
+ 1) << PAGE_SHIFT
))
1049 if (end
>= ((max_low_pfn
+ 1) << PAGE_SHIFT
))
1050 end
= (max_low_pfn
+ 1) << PAGE_SHIFT
;
1052 free_bootmem(start
, end
- start
);
1057 * Register fully available low RAM pages with the bootmem allocator.
1059 static void __init
register_bootmem_low_pages(unsigned long max_low_pfn
)
1064 efi_memmap_walk(free_available_memory
, NULL
);
1067 for (i
= 0; i
< e820
.nr_map
; i
++) {
1068 unsigned long curr_pfn
, last_pfn
, size
;
1070 * Reserve usable low memory
1072 if (e820
.map
[i
].type
!= E820_RAM
)
1075 * We are rounding up the start address of usable memory:
1077 curr_pfn
= PFN_UP(e820
.map
[i
].addr
);
1078 if (curr_pfn
>= max_low_pfn
)
1081 * ... and at the end of the usable range downwards:
1083 last_pfn
= PFN_DOWN(e820
.map
[i
].addr
+ e820
.map
[i
].size
);
1085 if (last_pfn
> max_low_pfn
)
1086 last_pfn
= max_low_pfn
;
1089 * .. finally, did all the rounding and playing
1090 * around just make the area go away?
1092 if (last_pfn
<= curr_pfn
)
1095 size
= last_pfn
- curr_pfn
;
1096 free_bootmem(PFN_PHYS(curr_pfn
), PFN_PHYS(size
));
1101 * workaround for Dell systems that neglect to reserve EBDA
1103 static void __init
reserve_ebda_region(void)
1106 addr
= get_bios_ebda();
1108 reserve_bootmem(addr
, PAGE_SIZE
);
1111 #ifndef CONFIG_NEED_MULTIPLE_NODES
1112 void __init
setup_bootmem_allocator(void);
1113 static unsigned long __init
setup_memory(void)
1116 * partially used pages are not usable - thus
1117 * we are rounding upwards:
1119 min_low_pfn
= PFN_UP(init_pg_tables_end
);
1123 max_low_pfn
= find_max_low_pfn();
1125 #ifdef CONFIG_HIGHMEM
1126 highstart_pfn
= highend_pfn
= max_pfn
;
1127 if (max_pfn
> max_low_pfn
) {
1128 highstart_pfn
= max_low_pfn
;
1130 printk(KERN_NOTICE
"%ldMB HIGHMEM available.\n",
1131 pages_to_mb(highend_pfn
- highstart_pfn
));
1133 printk(KERN_NOTICE
"%ldMB LOWMEM available.\n",
1134 pages_to_mb(max_low_pfn
));
1136 setup_bootmem_allocator();
1141 void __init
zone_sizes_init(void)
1143 unsigned long zones_size
[MAX_NR_ZONES
] = {0, 0, 0};
1144 unsigned int max_dma
, low
;
1146 max_dma
= virt_to_phys((char *)MAX_DMA_ADDRESS
) >> PAGE_SHIFT
;
1150 zones_size
[ZONE_DMA
] = low
;
1152 zones_size
[ZONE_DMA
] = max_dma
;
1153 zones_size
[ZONE_NORMAL
] = low
- max_dma
;
1154 #ifdef CONFIG_HIGHMEM
1155 zones_size
[ZONE_HIGHMEM
] = highend_pfn
- low
;
1158 free_area_init(zones_size
);
1161 extern unsigned long __init
setup_memory(void);
1162 extern void zone_sizes_init(void);
1163 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1165 void __init
setup_bootmem_allocator(void)
1167 unsigned long bootmap_size
;
1169 * Initialize the boot-time allocator (with low memory only):
1171 bootmap_size
= init_bootmem(min_low_pfn
, max_low_pfn
);
1173 register_bootmem_low_pages(max_low_pfn
);
1176 * Reserve the bootmem bitmap itself as well. We do this in two
1177 * steps (first step was init_bootmem()) because this catches
1178 * the (very unlikely) case of us accidentally initializing the
1179 * bootmem allocator with an invalid RAM area.
1181 reserve_bootmem(__PHYSICAL_START
, (PFN_PHYS(min_low_pfn
) +
1182 bootmap_size
+ PAGE_SIZE
-1) - (__PHYSICAL_START
));
1185 * reserve physical page 0 - it's a special BIOS page on many boxes,
1186 * enabling clean reboots, SMP operation, laptop functions.
1188 reserve_bootmem(0, PAGE_SIZE
);
1190 /* reserve EBDA region, it's a 4K region */
1191 reserve_ebda_region();
1193 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1194 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1195 unless you have no PS/2 mouse plugged in. */
1196 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
&&
1197 boot_cpu_data
.x86
== 6)
1198 reserve_bootmem(0xa0000 - 4096, 4096);
1202 * But first pinch a few for the stack/trampoline stuff
1203 * FIXME: Don't need the extra page at 4K, but need to fix
1204 * trampoline before removing it. (see the GDT stuff)
1206 reserve_bootmem(PAGE_SIZE
, PAGE_SIZE
);
1208 #ifdef CONFIG_ACPI_SLEEP
1210 * Reserve low memory region for sleep support.
1212 acpi_reserve_bootmem();
1214 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1216 * Find and reserve possible boot-time SMP configuration:
1221 #ifdef CONFIG_BLK_DEV_INITRD
1222 if (LOADER_TYPE
&& INITRD_START
) {
1223 if (INITRD_START
+ INITRD_SIZE
<= (max_low_pfn
<< PAGE_SHIFT
)) {
1224 reserve_bootmem(INITRD_START
, INITRD_SIZE
);
1226 INITRD_START
? INITRD_START
+ PAGE_OFFSET
: 0;
1227 initrd_end
= initrd_start
+INITRD_SIZE
;
1230 printk(KERN_ERR
"initrd extends beyond end of memory "
1231 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1232 INITRD_START
+ INITRD_SIZE
,
1233 max_low_pfn
<< PAGE_SHIFT
);
1239 if (crashk_res
.start
!= crashk_res
.end
)
1240 reserve_bootmem(crashk_res
.start
,
1241 crashk_res
.end
- crashk_res
.start
+ 1);
1246 * The node 0 pgdat is initialized before all of these because
1247 * it's needed for bootmem. node>0 pgdats have their virtual
1248 * space allocated before the pagetables are in place to access
1249 * them, so they can't be cleared then.
1251 * This should all compile down to nothing when NUMA is off.
1253 void __init
remapped_pgdat_init(void)
1257 for_each_online_node(nid
) {
1259 memset(NODE_DATA(nid
), 0, sizeof(struct pglist_data
));
1264 * Request address space for all standard RAM and ROM resources
1265 * and also for regions reported as reserved by the e820.
1268 legacy_init_iomem_resources(struct resource
*code_resource
, struct resource
*data_resource
)
1273 for (i
= 0; i
< e820
.nr_map
; i
++) {
1274 struct resource
*res
;
1275 if (e820
.map
[i
].addr
+ e820
.map
[i
].size
> 0x100000000ULL
)
1277 res
= alloc_bootmem_low(sizeof(struct resource
));
1278 switch (e820
.map
[i
].type
) {
1279 case E820_RAM
: res
->name
= "System RAM"; break;
1280 case E820_ACPI
: res
->name
= "ACPI Tables"; break;
1281 case E820_NVS
: res
->name
= "ACPI Non-volatile Storage"; break;
1282 default: res
->name
= "reserved";
1284 res
->start
= e820
.map
[i
].addr
;
1285 res
->end
= res
->start
+ e820
.map
[i
].size
- 1;
1286 res
->flags
= IORESOURCE_MEM
| IORESOURCE_BUSY
;
1287 request_resource(&iomem_resource
, res
);
1288 if (e820
.map
[i
].type
== E820_RAM
) {
1290 * We don't know which RAM region contains kernel data,
1291 * so we try it repeatedly and let the resource manager
1294 request_resource(res
, code_resource
);
1295 request_resource(res
, data_resource
);
1297 request_resource(res
, &crashk_res
);
1304 * Request address space for all standard resources
1306 static void __init
register_memory(void)
1308 unsigned long gapstart
, gapsize
, round
;
1309 unsigned long long last
;
1313 efi_initialize_iomem_resources(&code_resource
, &data_resource
);
1315 legacy_init_iomem_resources(&code_resource
, &data_resource
);
1317 /* EFI systems may still have VGA */
1318 request_resource(&iomem_resource
, &video_ram_resource
);
1320 /* request I/O space for devices used on all i[345]86 PCs */
1321 for (i
= 0; i
< STANDARD_IO_RESOURCES
; i
++)
1322 request_resource(&ioport_resource
, &standard_io_resources
[i
]);
1325 * Search for the bigest gap in the low 32 bits of the e820
1328 last
= 0x100000000ull
;
1329 gapstart
= 0x10000000;
1333 unsigned long long start
= e820
.map
[i
].addr
;
1334 unsigned long long end
= start
+ e820
.map
[i
].size
;
1337 * Since "last" is at most 4GB, we know we'll
1338 * fit in 32 bits if this condition is true
1341 unsigned long gap
= last
- end
;
1343 if (gap
> gapsize
) {
1353 * See how much we want to round up: start off with
1354 * rounding to the next 1MB area.
1357 while ((gapsize
>> 4) > round
)
1359 /* Fun with two's complement */
1360 pci_mem_start
= (gapstart
+ round
) & -round
;
1362 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1363 pci_mem_start
, gapstart
, gapsize
);
1366 /* Use inline assembly to define this because the nops are defined
1367 as inline assembly strings in the include files and we cannot
1368 get them easily into strings. */
1369 asm("\t.data\nintelnops: "
1370 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1371 GENERIC_NOP7 GENERIC_NOP8
);
1372 asm("\t.data\nk8nops: "
1373 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1375 asm("\t.data\nk7nops: "
1376 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1379 extern unsigned char intelnops
[], k8nops
[], k7nops
[];
1380 static unsigned char *intel_nops
[ASM_NOP_MAX
+1] = {
1385 intelnops
+ 1 + 2 + 3,
1386 intelnops
+ 1 + 2 + 3 + 4,
1387 intelnops
+ 1 + 2 + 3 + 4 + 5,
1388 intelnops
+ 1 + 2 + 3 + 4 + 5 + 6,
1389 intelnops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
1391 static unsigned char *k8_nops
[ASM_NOP_MAX
+1] = {
1397 k8nops
+ 1 + 2 + 3 + 4,
1398 k8nops
+ 1 + 2 + 3 + 4 + 5,
1399 k8nops
+ 1 + 2 + 3 + 4 + 5 + 6,
1400 k8nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
1402 static unsigned char *k7_nops
[ASM_NOP_MAX
+1] = {
1408 k7nops
+ 1 + 2 + 3 + 4,
1409 k7nops
+ 1 + 2 + 3 + 4 + 5,
1410 k7nops
+ 1 + 2 + 3 + 4 + 5 + 6,
1411 k7nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
1415 unsigned char **noptable
;
1417 { X86_FEATURE_K8
, k8_nops
},
1418 { X86_FEATURE_K7
, k7_nops
},
1422 /* Replace instructions with better alternatives for this CPU type.
1424 This runs before SMP is initialized to avoid SMP problems with
1425 self modifying code. This implies that assymetric systems where
1426 APs have less capabilities than the boot processor are not handled.
1427 Tough. Make sure you disable such features by hand. */
1428 void apply_alternatives(void *start
, void *end
)
1430 struct alt_instr
*a
;
1432 unsigned char **noptable
= intel_nops
;
1433 for (i
= 0; noptypes
[i
].cpuid
>= 0; i
++) {
1434 if (boot_cpu_has(noptypes
[i
].cpuid
)) {
1435 noptable
= noptypes
[i
].noptable
;
1439 for (a
= start
; (void *)a
< end
; a
++) {
1440 if (!boot_cpu_has(a
->cpuid
))
1442 BUG_ON(a
->replacementlen
> a
->instrlen
);
1443 memcpy(a
->instr
, a
->replacement
, a
->replacementlen
);
1444 diff
= a
->instrlen
- a
->replacementlen
;
1445 /* Pad the rest with nops */
1446 for (i
= a
->replacementlen
; diff
> 0; diff
-= k
, i
+= k
) {
1448 if (k
> ASM_NOP_MAX
)
1450 memcpy(a
->instr
+ i
, noptable
[k
], k
);
1455 void __init
alternative_instructions(void)
1457 extern struct alt_instr __alt_instructions
[], __alt_instructions_end
[];
1458 apply_alternatives(__alt_instructions
, __alt_instructions_end
);
1461 static char * __init
machine_specific_memory_setup(void);
1464 static void set_mca_bus(int x
)
1469 static void set_mca_bus(int x
) { }
1473 * Determine if we were loaded by an EFI loader. If so, then we have also been
1474 * passed the efi memmap, systab, etc., so we should use these data structures
1475 * for initialization. Note, the efi init code path is determined by the
1476 * global efi_enabled. This allows the same kernel image to be used on existing
1477 * systems (with a traditional BIOS) as well as on EFI systems.
1479 void __init
setup_arch(char **cmdline_p
)
1481 unsigned long max_low_pfn
;
1483 memcpy(&boot_cpu_data
, &new_cpu_data
, sizeof(new_cpu_data
));
1484 pre_setup_arch_hook();
1488 * FIXME: This isn't an official loader_type right
1489 * now but does currently work with elilo.
1490 * If we were configured as an EFI kernel, check to make
1491 * sure that we were loaded correctly from elilo and that
1492 * the system table is valid. If not, then initialize normally.
1495 if ((LOADER_TYPE
== 0x50) && EFI_SYSTAB
)
1499 ROOT_DEV
= old_decode_dev(ORIG_ROOT_DEV
);
1500 drive_info
= DRIVE_INFO
;
1501 screen_info
= SCREEN_INFO
;
1502 edid_info
= EDID_INFO
;
1503 apm_info
.bios
= APM_BIOS_INFO
;
1504 ist_info
= IST_INFO
;
1505 saved_videomode
= VIDEO_MODE
;
1506 if( SYS_DESC_TABLE
.length
!= 0 ) {
1507 set_mca_bus(SYS_DESC_TABLE
.table
[3] & 0x2);
1508 machine_id
= SYS_DESC_TABLE
.table
[0];
1509 machine_submodel_id
= SYS_DESC_TABLE
.table
[1];
1510 BIOS_revision
= SYS_DESC_TABLE
.table
[2];
1512 bootloader_type
= LOADER_TYPE
;
1514 #ifdef CONFIG_BLK_DEV_RAM
1515 rd_image_start
= RAMDISK_FLAGS
& RAMDISK_IMAGE_START_MASK
;
1516 rd_prompt
= ((RAMDISK_FLAGS
& RAMDISK_PROMPT_FLAG
) != 0);
1517 rd_doload
= ((RAMDISK_FLAGS
& RAMDISK_LOAD_FLAG
) != 0);
1523 printk(KERN_INFO
"BIOS-provided physical RAM map:\n");
1524 print_memory_map(machine_specific_memory_setup());
1529 if (!MOUNT_ROOT_RDONLY
)
1530 root_mountflags
&= ~MS_RDONLY
;
1531 init_mm
.start_code
= (unsigned long) _text
;
1532 init_mm
.end_code
= (unsigned long) _etext
;
1533 init_mm
.end_data
= (unsigned long) _edata
;
1534 init_mm
.brk
= init_pg_tables_end
+ PAGE_OFFSET
;
1536 code_resource
.start
= virt_to_phys(_text
);
1537 code_resource
.end
= virt_to_phys(_etext
)-1;
1538 data_resource
.start
= virt_to_phys(_etext
);
1539 data_resource
.end
= virt_to_phys(_edata
)-1;
1541 parse_cmdline_early(cmdline_p
);
1543 max_low_pfn
= setup_memory();
1546 * NOTE: before this point _nobody_ is allowed to allocate
1547 * any memory using the bootmem allocator. Although the
1548 * alloctor is now initialised only the first 8Mb of the kernel
1549 * virtual address space has been mapped. All allocations before
1550 * paging_init() has completed must use the alloc_bootmem_low_pages()
1551 * variant (which allocates DMA'able memory) and care must be taken
1552 * not to exceed the 8Mb limit.
1556 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1559 remapped_pgdat_init();
1564 * NOTE: at this point the bootmem allocator is fully available.
1567 #ifdef CONFIG_EARLY_PRINTK
1569 char *s
= strstr(*cmdline_p
, "earlyprintk=");
1571 extern void setup_early_printk(char *);
1573 setup_early_printk(s
);
1574 printk("early console enabled\n");
1582 #ifdef CONFIG_X86_GENERICARCH
1583 generic_apic_probe(*cmdline_p
);
1590 * Parse the ACPI tables for possible boot-time SMP configuration.
1592 acpi_boot_table_init();
1595 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1597 printk(KERN_WARNING
"More than 8 CPUs detected and "
1598 "CONFIG_X86_PC cannot handle it.\nUse "
1599 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1602 #ifdef CONFIG_X86_LOCAL_APIC
1603 if (smp_found_config
)
1610 #if defined(CONFIG_VGA_CONSOLE)
1611 if (!efi_enabled
|| (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY
))
1612 conswitchp
= &vga_con
;
1613 #elif defined(CONFIG_DUMMY_CONSOLE)
1614 conswitchp
= &dummy_con
;
1619 #include "setup_arch_post.h"
1623 * c-file-style:"k&r"