[PATCH] x86_64: Collected NMI watchdog fixes.
[linux-2.6/linux-loongson.git] / arch / x86_64 / kernel / smpboot.c
blobd00e494c1a39dd748c55d35402f5533c963406bb
1 /*
2 * x86 SMP booting functions
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
15 * This code is released under the GNU General Public License version 2
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
33 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
40 #include <linux/config.h>
41 #include <linux/init.h>
43 #include <linux/mm.h>
44 #include <linux/kernel_stat.h>
45 #include <linux/smp_lock.h>
46 #include <linux/irq.h>
47 #include <linux/bootmem.h>
48 #include <linux/thread_info.h>
49 #include <linux/module.h>
51 #include <linux/delay.h>
52 #include <linux/mc146818rtc.h>
53 #include <asm/mtrr.h>
54 #include <asm/pgalloc.h>
55 #include <asm/desc.h>
56 #include <asm/kdebug.h>
57 #include <asm/tlbflush.h>
58 #include <asm/proto.h>
59 #include <asm/nmi.h>
61 /* Change for real CPU hotplug. Note other files need to be fixed
62 first too. */
63 #define __cpuinit __init
64 #define __cpuinitdata __initdata
66 /* Number of siblings per CPU package */
67 int smp_num_siblings = 1;
68 /* Package ID of each logical CPU */
69 u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
70 u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
71 EXPORT_SYMBOL(phys_proc_id);
72 EXPORT_SYMBOL(cpu_core_id);
74 /* Bitmask of currently online CPUs */
75 cpumask_t cpu_online_map;
77 EXPORT_SYMBOL(cpu_online_map);
80 * Private maps to synchronize booting between AP and BP.
81 * Probably not needed anymore, but it makes for easier debugging. -AK
83 cpumask_t cpu_callin_map;
84 cpumask_t cpu_callout_map;
86 cpumask_t cpu_possible_map;
87 EXPORT_SYMBOL(cpu_possible_map);
89 /* Per CPU bogomips and other parameters */
90 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
92 /* Set when the idlers are all forked */
93 int smp_threads_ready;
95 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
96 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
99 * Trampoline 80x86 program as an array.
102 extern unsigned char trampoline_data[];
103 extern unsigned char trampoline_end[];
106 * Currently trivial. Write the real->protected mode
107 * bootstrap into the page concerned. The caller
108 * has made sure it's suitably aligned.
111 static unsigned long __cpuinit setup_trampoline(void)
113 void *tramp = __va(SMP_TRAMPOLINE_BASE);
114 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
115 return virt_to_phys(tramp);
119 * The bootstrap kernel entry code has set these up. Save them for
120 * a given CPU
123 static void __cpuinit smp_store_cpu_info(int id)
125 struct cpuinfo_x86 *c = cpu_data + id;
127 *c = boot_cpu_data;
128 identify_cpu(c);
129 print_cpu_info(c);
133 * New Funky TSC sync algorithm borrowed from IA64.
134 * Main advantage is that it doesn't reset the TSCs fully and
135 * in general looks more robust and it works better than my earlier
136 * attempts. I believe it was written by David Mosberger. Some minor
137 * adjustments for x86-64 by me -AK
139 * Original comment reproduced below.
141 * Synchronize TSC of the current (slave) CPU with the TSC of the
142 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
143 * eliminate the possibility of unaccounted-for errors (such as
144 * getting a machine check in the middle of a calibration step). The
145 * basic idea is for the slave to ask the master what itc value it has
146 * and to read its own itc before and after the master responds. Each
147 * iteration gives us three timestamps:
149 * slave master
151 * t0 ---\
152 * ---\
153 * --->
154 * tm
155 * /---
156 * /---
157 * t1 <---
160 * The goal is to adjust the slave's TSC such that tm falls exactly
161 * half-way between t0 and t1. If we achieve this, the clocks are
162 * synchronized provided the interconnect between the slave and the
163 * master is symmetric. Even if the interconnect were asymmetric, we
164 * would still know that the synchronization error is smaller than the
165 * roundtrip latency (t0 - t1).
167 * When the interconnect is quiet and symmetric, this lets us
168 * synchronize the TSC to within one or two cycles. However, we can
169 * only *guarantee* that the synchronization is accurate to within a
170 * round-trip time, which is typically in the range of several hundred
171 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
172 * are usually almost perfectly synchronized, but we shouldn't assume
173 * that the accuracy is much better than half a micro second or so.
175 * [there are other errors like the latency of RDTSC and of the
176 * WRMSR. These can also account to hundreds of cycles. So it's
177 * probably worse. It claims 153 cycles error on a dual Opteron,
178 * but I suspect the numbers are actually somewhat worse -AK]
181 #define MASTER 0
182 #define SLAVE (SMP_CACHE_BYTES/8)
184 /* Intentionally don't use cpu_relax() while TSC synchronization
185 because we don't want to go into funky power save modi or cause
186 hypervisors to schedule us away. Going to sleep would likely affect
187 latency and low latency is the primary objective here. -AK */
188 #define no_cpu_relax() barrier()
190 static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
191 static volatile __cpuinitdata unsigned long go[SLAVE + 1];
192 static int notscsync __cpuinitdata;
194 #undef DEBUG_TSC_SYNC
196 #define NUM_ROUNDS 64 /* magic value */
197 #define NUM_ITERS 5 /* likewise */
199 /* Callback on boot CPU */
200 static __cpuinit void sync_master(void *arg)
202 unsigned long flags, i;
204 if (smp_processor_id() != boot_cpu_id)
205 return;
207 go[MASTER] = 0;
209 local_irq_save(flags);
211 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
212 while (!go[MASTER])
213 no_cpu_relax();
214 go[MASTER] = 0;
215 rdtscll(go[SLAVE]);
218 local_irq_restore(flags);
222 * Return the number of cycles by which our tsc differs from the tsc
223 * on the master (time-keeper) CPU. A positive number indicates our
224 * tsc is ahead of the master, negative that it is behind.
226 static inline long
227 get_delta(long *rt, long *master)
229 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
230 unsigned long tcenter, t0, t1, tm;
231 int i;
233 for (i = 0; i < NUM_ITERS; ++i) {
234 rdtscll(t0);
235 go[MASTER] = 1;
236 while (!(tm = go[SLAVE]))
237 no_cpu_relax();
238 go[SLAVE] = 0;
239 rdtscll(t1);
241 if (t1 - t0 < best_t1 - best_t0)
242 best_t0 = t0, best_t1 = t1, best_tm = tm;
245 *rt = best_t1 - best_t0;
246 *master = best_tm - best_t0;
248 /* average best_t0 and best_t1 without overflow: */
249 tcenter = (best_t0/2 + best_t1/2);
250 if (best_t0 % 2 + best_t1 % 2 == 2)
251 ++tcenter;
252 return tcenter - best_tm;
255 static __cpuinit void sync_tsc(void)
257 int i, done = 0;
258 long delta, adj, adjust_latency = 0;
259 unsigned long flags, rt, master_time_stamp, bound;
260 #if DEBUG_TSC_SYNC
261 static struct syncdebug {
262 long rt; /* roundtrip time */
263 long master; /* master's timestamp */
264 long diff; /* difference between midpoint and master's timestamp */
265 long lat; /* estimate of tsc adjustment latency */
266 } t[NUM_ROUNDS] __cpuinitdata;
267 #endif
269 go[MASTER] = 1;
271 smp_call_function(sync_master, NULL, 1, 0);
273 while (go[MASTER]) /* wait for master to be ready */
274 no_cpu_relax();
276 spin_lock_irqsave(&tsc_sync_lock, flags);
278 for (i = 0; i < NUM_ROUNDS; ++i) {
279 delta = get_delta(&rt, &master_time_stamp);
280 if (delta == 0) {
281 done = 1; /* let's lock on to this... */
282 bound = rt;
285 if (!done) {
286 unsigned long t;
287 if (i > 0) {
288 adjust_latency += -delta;
289 adj = -delta + adjust_latency/4;
290 } else
291 adj = -delta;
293 rdtscll(t);
294 wrmsrl(MSR_IA32_TSC, t + adj);
296 #if DEBUG_TSC_SYNC
297 t[i].rt = rt;
298 t[i].master = master_time_stamp;
299 t[i].diff = delta;
300 t[i].lat = adjust_latency/4;
301 #endif
304 spin_unlock_irqrestore(&tsc_sync_lock, flags);
306 #if DEBUG_TSC_SYNC
307 for (i = 0; i < NUM_ROUNDS; ++i)
308 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
309 t[i].rt, t[i].master, t[i].diff, t[i].lat);
310 #endif
312 printk(KERN_INFO
313 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
314 "maxerr %lu cycles)\n",
315 smp_processor_id(), boot_cpu_id, delta, rt);
318 static void __cpuinit tsc_sync_wait(void)
320 if (notscsync || !cpu_has_tsc)
321 return;
322 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
323 boot_cpu_id);
324 sync_tsc();
327 static __init int notscsync_setup(char *s)
329 notscsync = 1;
330 return 0;
332 __setup("notscsync", notscsync_setup);
334 static atomic_t init_deasserted __cpuinitdata;
337 * Report back to the Boot Processor.
338 * Running on AP.
340 void __cpuinit smp_callin(void)
342 int cpuid, phys_id;
343 unsigned long timeout;
346 * If waken up by an INIT in an 82489DX configuration
347 * we may get here before an INIT-deassert IPI reaches
348 * our local APIC. We have to wait for the IPI or we'll
349 * lock up on an APIC access.
351 while (!atomic_read(&init_deasserted))
352 cpu_relax();
355 * (This works even if the APIC is not enabled.)
357 phys_id = GET_APIC_ID(apic_read(APIC_ID));
358 cpuid = smp_processor_id();
359 if (cpu_isset(cpuid, cpu_callin_map)) {
360 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
361 phys_id, cpuid);
363 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
366 * STARTUP IPIs are fragile beasts as they might sometimes
367 * trigger some glue motherboard logic. Complete APIC bus
368 * silence for 1 second, this overestimates the time the
369 * boot CPU is spending to send the up to 2 STARTUP IPIs
370 * by a factor of two. This should be enough.
374 * Waiting 2s total for startup (udelay is not yet working)
376 timeout = jiffies + 2*HZ;
377 while (time_before(jiffies, timeout)) {
379 * Has the boot CPU finished it's STARTUP sequence?
381 if (cpu_isset(cpuid, cpu_callout_map))
382 break;
383 cpu_relax();
386 if (!time_before(jiffies, timeout)) {
387 panic("smp_callin: CPU%d started up but did not get a callout!\n",
388 cpuid);
392 * the boot CPU has finished the init stage and is spinning
393 * on callin_map until we finish. We are free to set up this
394 * CPU, first the APIC. (this is probably redundant on most
395 * boards)
398 Dprintk("CALLIN, before setup_local_APIC().\n");
399 setup_local_APIC();
402 * Get our bogomips.
404 calibrate_delay();
405 Dprintk("Stack at about %p\n",&cpuid);
407 disable_APIC_timer();
410 * Save our processor parameters
412 smp_store_cpu_info(cpuid);
415 * Allow the master to continue.
417 cpu_set(cpuid, cpu_callin_map);
421 * Setup code on secondary processor (after comming out of the trampoline)
423 void __cpuinit start_secondary(void)
426 * Dont put anything before smp_callin(), SMP
427 * booting is too fragile that we want to limit the
428 * things done here to the most necessary things.
430 cpu_init();
431 smp_callin();
433 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
434 barrier();
436 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
437 setup_secondary_APIC_clock();
439 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
441 if (nmi_watchdog == NMI_IO_APIC) {
442 disable_8259A_irq(0);
443 enable_NMI_through_LVT0(NULL);
444 enable_8259A_irq(0);
447 enable_APIC_timer();
450 * Allow the master to continue.
452 cpu_set(smp_processor_id(), cpu_online_map);
453 mb();
455 /* Wait for TSC sync to not schedule things before.
456 We still process interrupts, which could see an inconsistent
457 time in that window unfortunately. */
458 tsc_sync_wait();
460 cpu_idle();
463 extern volatile unsigned long init_rsp;
464 extern void (*initial_code)(void);
466 #if APIC_DEBUG
467 static void inquire_remote_apic(int apicid)
469 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
470 char *names[] = { "ID", "VERSION", "SPIV" };
471 int timeout, status;
473 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
475 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
476 printk("... APIC #%d %s: ", apicid, names[i]);
479 * Wait for idle.
481 apic_wait_icr_idle();
483 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
484 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
486 timeout = 0;
487 do {
488 udelay(100);
489 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
490 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
492 switch (status) {
493 case APIC_ICR_RR_VALID:
494 status = apic_read(APIC_RRR);
495 printk("%08x\n", status);
496 break;
497 default:
498 printk("failed\n");
502 #endif
505 * Kick the secondary to wake up.
507 static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
509 unsigned long send_status = 0, accept_status = 0;
510 int maxlvt, timeout, num_starts, j;
512 Dprintk("Asserting INIT.\n");
515 * Turn INIT on target chip
517 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
520 * Send IPI
522 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
523 | APIC_DM_INIT);
525 Dprintk("Waiting for send to finish...\n");
526 timeout = 0;
527 do {
528 Dprintk("+");
529 udelay(100);
530 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
531 } while (send_status && (timeout++ < 1000));
533 mdelay(10);
535 Dprintk("Deasserting INIT.\n");
537 /* Target chip */
538 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
540 /* Send IPI */
541 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
543 Dprintk("Waiting for send to finish...\n");
544 timeout = 0;
545 do {
546 Dprintk("+");
547 udelay(100);
548 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
549 } while (send_status && (timeout++ < 1000));
551 atomic_set(&init_deasserted, 1);
554 * Should we send STARTUP IPIs ?
556 * Determine this based on the APIC version.
557 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
559 if (APIC_INTEGRATED(apic_version[phys_apicid]))
560 num_starts = 2;
561 else
562 num_starts = 0;
565 * Run STARTUP IPI loop.
567 Dprintk("#startup loops: %d.\n", num_starts);
569 maxlvt = get_maxlvt();
571 for (j = 1; j <= num_starts; j++) {
572 Dprintk("Sending STARTUP #%d.\n",j);
573 apic_read_around(APIC_SPIV);
574 apic_write(APIC_ESR, 0);
575 apic_read(APIC_ESR);
576 Dprintk("After apic_write.\n");
579 * STARTUP IPI
582 /* Target chip */
583 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
585 /* Boot on the stack */
586 /* Kick the second */
587 apic_write_around(APIC_ICR, APIC_DM_STARTUP
588 | (start_rip >> 12));
591 * Give the other CPU some time to accept the IPI.
593 udelay(300);
595 Dprintk("Startup point 1.\n");
597 Dprintk("Waiting for send to finish...\n");
598 timeout = 0;
599 do {
600 Dprintk("+");
601 udelay(100);
602 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
603 } while (send_status && (timeout++ < 1000));
606 * Give the other CPU some time to accept the IPI.
608 udelay(200);
610 * Due to the Pentium erratum 3AP.
612 if (maxlvt > 3) {
613 apic_read_around(APIC_SPIV);
614 apic_write(APIC_ESR, 0);
616 accept_status = (apic_read(APIC_ESR) & 0xEF);
617 if (send_status || accept_status)
618 break;
620 Dprintk("After Startup.\n");
622 if (send_status)
623 printk(KERN_ERR "APIC never delivered???\n");
624 if (accept_status)
625 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
627 return (send_status | accept_status);
631 * Boot one CPU.
633 static int __cpuinit do_boot_cpu(int cpu, int apicid)
635 struct task_struct *idle;
636 unsigned long boot_error;
637 int timeout;
638 unsigned long start_rip;
640 * We can't use kernel_thread since we must avoid to
641 * reschedule the child.
643 idle = fork_idle(cpu);
644 if (IS_ERR(idle)) {
645 printk("failed fork for CPU %d\n", cpu);
646 return PTR_ERR(idle);
648 x86_cpu_to_apicid[cpu] = apicid;
650 cpu_pda[cpu].pcurrent = idle;
652 start_rip = setup_trampoline();
654 init_rsp = idle->thread.rsp;
655 per_cpu(init_tss,cpu).rsp0 = init_rsp;
656 initial_code = start_secondary;
657 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
659 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
660 start_rip, init_rsp);
663 * This grunge runs the startup process for
664 * the targeted processor.
667 atomic_set(&init_deasserted, 0);
669 Dprintk("Setting warm reset code and vector.\n");
671 CMOS_WRITE(0xa, 0xf);
672 local_flush_tlb();
673 Dprintk("1.\n");
674 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
675 Dprintk("2.\n");
676 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
677 Dprintk("3.\n");
680 * Be paranoid about clearing APIC errors.
682 if (APIC_INTEGRATED(apic_version[apicid])) {
683 apic_read_around(APIC_SPIV);
684 apic_write(APIC_ESR, 0);
685 apic_read(APIC_ESR);
689 * Status is now clean
691 boot_error = 0;
694 * Starting actual IPI sequence...
696 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
698 if (!boot_error) {
700 * allow APs to start initializing.
702 Dprintk("Before Callout %d.\n", cpu);
703 cpu_set(cpu, cpu_callout_map);
704 Dprintk("After Callout %d.\n", cpu);
707 * Wait 5s total for a response
709 for (timeout = 0; timeout < 50000; timeout++) {
710 if (cpu_isset(cpu, cpu_callin_map))
711 break; /* It has booted */
712 udelay(100);
715 if (cpu_isset(cpu, cpu_callin_map)) {
716 /* number CPUs logically, starting from 1 (BSP is 0) */
717 Dprintk("CPU has booted.\n");
718 } else {
719 boot_error = 1;
720 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
721 == 0xA5)
722 /* trampoline started but...? */
723 printk("Stuck ??\n");
724 else
725 /* trampoline code not run */
726 printk("Not responding.\n");
727 #if APIC_DEBUG
728 inquire_remote_apic(apicid);
729 #endif
732 if (boot_error) {
733 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
734 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
735 cpu_clear(cpu, cpu_present_map);
736 cpu_clear(cpu, cpu_possible_map);
737 x86_cpu_to_apicid[cpu] = BAD_APICID;
738 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
739 return -EIO;
742 return 0;
745 cycles_t cacheflush_time;
746 unsigned long cache_decay_ticks;
749 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
750 * on SMT systems efficiently.
752 static __cpuinit void detect_siblings(void)
754 int cpu;
756 for (cpu = 0; cpu < NR_CPUS; cpu++) {
757 cpus_clear(cpu_sibling_map[cpu]);
758 cpus_clear(cpu_core_map[cpu]);
761 for_each_online_cpu (cpu) {
762 struct cpuinfo_x86 *c = cpu_data + cpu;
763 int siblings = 0;
764 int i;
765 if (smp_num_siblings > 1) {
766 for_each_online_cpu (i) {
767 if (cpu_core_id[cpu] == cpu_core_id[i]) {
768 siblings++;
769 cpu_set(i, cpu_sibling_map[cpu]);
772 } else {
773 siblings++;
774 cpu_set(cpu, cpu_sibling_map[cpu]);
777 if (siblings != smp_num_siblings) {
778 printk(KERN_WARNING
779 "WARNING: %d siblings found for CPU%d, should be %d\n",
780 siblings, cpu, smp_num_siblings);
781 smp_num_siblings = siblings;
783 if (c->x86_num_cores > 1) {
784 for_each_online_cpu(i) {
785 if (phys_proc_id[cpu] == phys_proc_id[i])
786 cpu_set(i, cpu_core_map[cpu]);
788 } else
789 cpu_core_map[cpu] = cpu_sibling_map[cpu];
794 * Cleanup possible dangling ends...
796 static __cpuinit void smp_cleanup_boot(void)
799 * Paranoid: Set warm reset code and vector here back
800 * to default values.
802 CMOS_WRITE(0, 0xf);
805 * Reset trampoline flag
807 *((volatile int *) phys_to_virt(0x467)) = 0;
809 #ifndef CONFIG_HOTPLUG_CPU
811 * Free pages reserved for SMP bootup.
812 * When you add hotplug CPU support later remove this
813 * Note there is more work to be done for later CPU bootup.
816 free_page((unsigned long) __va(PAGE_SIZE));
817 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
818 #endif
822 * Fall back to non SMP mode after errors.
824 * RED-PEN audit/test this more. I bet there is more state messed up here.
826 static __cpuinit void disable_smp(void)
828 cpu_present_map = cpumask_of_cpu(0);
829 cpu_possible_map = cpumask_of_cpu(0);
830 if (smp_found_config)
831 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
832 else
833 phys_cpu_present_map = physid_mask_of_physid(0);
834 cpu_set(0, cpu_sibling_map[0]);
835 cpu_set(0, cpu_core_map[0]);
839 * Handle user cpus=... parameter.
841 static __cpuinit void enforce_max_cpus(unsigned max_cpus)
843 int i, k;
844 k = 0;
845 for (i = 0; i < NR_CPUS; i++) {
846 if (!cpu_possible(i))
847 continue;
848 if (++k > max_cpus) {
849 cpu_clear(i, cpu_possible_map);
850 cpu_clear(i, cpu_present_map);
856 * Various sanity checks.
858 static int __cpuinit smp_sanity_check(unsigned max_cpus)
860 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
861 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
862 hard_smp_processor_id());
863 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
867 * If we couldn't find an SMP configuration at boot time,
868 * get out of here now!
870 if (!smp_found_config) {
871 printk(KERN_NOTICE "SMP motherboard not detected.\n");
872 disable_smp();
873 if (APIC_init_uniprocessor())
874 printk(KERN_NOTICE "Local APIC not detected."
875 " Using dummy APIC emulation.\n");
876 return -1;
880 * Should not be necessary because the MP table should list the boot
881 * CPU too, but we do it for the sake of robustness anyway.
883 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
884 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
885 boot_cpu_id);
886 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
890 * If we couldn't find a local APIC, then get out of here now!
892 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
893 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
894 boot_cpu_id);
895 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
896 nr_ioapics = 0;
897 return -1;
901 * If SMP should be disabled, then really disable it!
903 if (!max_cpus) {
904 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
905 nr_ioapics = 0;
906 return -1;
909 return 0;
913 * Prepare for SMP bootup. The MP table or ACPI has been read
914 * earlier. Just do some sanity checking here and enable APIC mode.
916 void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
918 int i;
920 nmi_watchdog_default();
921 current_cpu_data = boot_cpu_data;
922 current_thread_info()->cpu = 0; /* needed? */
924 enforce_max_cpus(max_cpus);
927 * Fill in cpu_present_mask
929 for (i = 0; i < NR_CPUS; i++) {
930 int apicid = cpu_present_to_apicid(i);
931 if (physid_isset(apicid, phys_cpu_present_map)) {
932 cpu_set(i, cpu_present_map);
933 /* possible map would be different if we supported real
934 CPU hotplug. */
935 cpu_set(i, cpu_possible_map);
939 if (smp_sanity_check(max_cpus) < 0) {
940 printk(KERN_INFO "SMP disabled\n");
941 disable_smp();
942 return;
947 * Switch from PIC to APIC mode.
949 connect_bsp_APIC();
950 setup_local_APIC();
952 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
953 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
954 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
955 /* Or can we switch back to PIC here? */
957 x86_cpu_to_apicid[0] = boot_cpu_id;
960 * Now start the IO-APICs
962 if (!skip_ioapic_setup && nr_ioapics)
963 setup_IO_APIC();
964 else
965 nr_ioapics = 0;
968 * Set up local APIC timer on boot CPU.
971 setup_boot_APIC_clock();
975 * Early setup to make printk work.
977 void __init smp_prepare_boot_cpu(void)
979 int me = smp_processor_id();
980 cpu_set(me, cpu_online_map);
981 cpu_set(me, cpu_callout_map);
985 * Entry point to boot a CPU.
987 * This is all __cpuinit, not __devinit for now because we don't support
988 * CPU hotplug (yet).
990 int __cpuinit __cpu_up(unsigned int cpu)
992 int err;
993 int apicid = cpu_present_to_apicid(cpu);
995 WARN_ON(irqs_disabled());
997 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
999 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
1000 !physid_isset(apicid, phys_cpu_present_map)) {
1001 printk("__cpu_up: bad cpu %d\n", cpu);
1002 return -EINVAL;
1005 /* Boot it! */
1006 err = do_boot_cpu(cpu, apicid);
1007 if (err < 0) {
1008 Dprintk("do_boot_cpu failed %d\n", err);
1009 return err;
1012 /* Unleash the CPU! */
1013 Dprintk("waiting for cpu %d\n", cpu);
1015 while (!cpu_isset(cpu, cpu_online_map))
1016 cpu_relax();
1017 return 0;
1021 * Finish the SMP boot.
1023 void __cpuinit smp_cpus_done(unsigned int max_cpus)
1025 zap_low_mappings();
1026 smp_cleanup_boot();
1028 #ifdef CONFIG_X86_IO_APIC
1029 setup_ioapic_dest();
1030 #endif
1032 detect_siblings();
1033 time_init_gtod();
1035 check_nmi_watchdog();