2.2.0-final
[davej-history.git] / arch / i386 / kernel / smp.c
blob05e2af7b8b44e7d5f93a0373086671ac9037578e
1 /*
2 * Intel MP v1.1/v1.4 specification support routines for multi-pentium
3 * hosts.
5 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
6 * (c) 1998 Ingo Molnar
8 * Supported by Caldera http://www.caldera.com.
9 * Much of the core SMP work is based on previous work by Thomas Radke, to
10 * whom a great many thanks are extended.
12 * Thanks to Intel for making available several different Pentium,
13 * Pentium Pro and Pentium-II/Xeon MP machines.
15 * This code is released under the GNU public license version 2 or
16 * later.
18 * Fixes
19 * Felix Koop : NR_CPUS used properly
20 * Jose Renau : Handle single CPU case.
21 * Alan Cox : By repeated request 8) - Total BogoMIP report.
22 * Greg Wright : Fix for kernel stacks panic.
23 * Erich Boleyn : MP v1.4 and additional changes.
24 * Matthias Sattler : Changes for 2.1 kernel map.
25 * Michel Lespinasse : Changes for 2.1 kernel map.
26 * Michael Chastain : Change trampoline.S to gnu as.
27 * Alan Cox : Dumb bug: 'B' step PPro's are fine
28 * Ingo Molnar : Added APIC timers, based on code
29 * from Jose Renau
30 * Alan Cox : Added EBDA scanning
31 * Ingo Molnar : various cleanups and rewrites
34 #include <linux/config.h>
35 #include <linux/mm.h>
36 #include <linux/kernel_stat.h>
37 #include <linux/delay.h>
38 #include <linux/mc146818rtc.h>
39 #include <linux/smp_lock.h>
40 #include <linux/interrupt.h>
41 #include <linux/init.h>
42 #include <asm/pgtable.h>
43 #include <asm/bitops.h>
44 #include <asm/pgtable.h>
45 #include <asm/io.h>
47 #ifdef CONFIG_MTRR
48 # include <asm/mtrr.h>
49 #endif
51 #define __KERNEL_SYSCALLS__
52 #include <linux/unistd.h>
54 #include "irq.h"
56 extern unsigned long start_kernel, _etext;
57 extern void update_one_process( struct task_struct *p,
58 unsigned long ticks, unsigned long user,
59 unsigned long system, int cpu);
61 * Some notes on processor bugs:
63 * Pentium and Pentium Pro (and all CPUs) have bugs. The Linux issues
64 * for SMP are handled as follows.
66 * Pentium Pro
67 * Occasional delivery of 'spurious interrupt' as trap #16. This
68 * is very rare. The kernel logs the event and recovers
70 * Pentium
71 * There is a marginal case where REP MOVS on 100MHz SMP
72 * machines with B stepping processors can fail. XXX should provide
73 * an L1cache=Writethrough or L1cache=off option.
75 * B stepping CPUs may hang. There are hardware work arounds
76 * for this. We warn about it in case your board doesnt have the work
77 * arounds. Basically thats so I can tell anyone with a B stepping
78 * CPU and SMP problems "tough".
80 * Specific items [From Pentium Processor Specification Update]
82 * 1AP. Linux doesn't use remote read
83 * 2AP. Linux doesn't trust APIC errors
84 * 3AP. We work around this
85 * 4AP. Linux never generated 3 interrupts of the same priority
86 * to cause a lost local interrupt.
87 * 5AP. Remote read is never used
88 * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX
89 * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX
90 * 11AP. Linux reads the APIC between writes to avoid this, as per
91 * the documentation. Make sure you preserve this as it affects
92 * the C stepping chips too.
94 * If this sounds worrying believe me these bugs are ___RARE___ and
95 * there's about nothing of note with C stepping upwards.
99 /* Kernel spinlock */
100 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
103 * Why isn't this somewhere standard ??
105 * Maybe because this procedure is horribly buggy, and does
106 * not deserve to live. Think about signedness issues for five
107 * seconds to see why. - Linus
110 extern __inline int max(int a,int b)
112 if (a>b)
113 return a;
114 return b;
118 * function prototypes:
120 static void cache_APIC_registers (void);
121 static void stop_this_cpu (void);
123 static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */
125 static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */
126 int smp_found_config=0; /* Have we found an SMP box */
128 unsigned long cpu_present_map = 0; /* Bitmask of physically existing CPUs */
129 unsigned long cpu_online_map = 0; /* Bitmask of currently online CPUs */
130 int smp_num_cpus = 1; /* Total count of live CPUs */
131 int smp_threads_ready=0; /* Set when the idlers are all forked */
132 volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical number */
133 volatile int __cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */
134 static volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
135 static volatile unsigned long cpu_callout_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
136 volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */
137 volatile unsigned long kstack_ptr; /* Stack vector for booting CPUs */
138 struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per CPU bogomips and other parameters */
139 static unsigned int num_processors = 1; /* Internal processor count */
140 unsigned long mp_ioapic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */
141 unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */
142 static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
143 int apic_version[NR_CPUS]; /* APIC version number */
144 unsigned long apic_retval; /* Just debugging the assembler.. */
146 volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */
147 volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */
149 volatile unsigned long ipi_count; /* Number of IPIs delivered */
151 const char lk_lockmsg[] = "lock from interrupt context at %p\n";
153 int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
154 extern int mp_irq_entries;
155 extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
156 extern int mpc_default_type;
157 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
158 int mp_current_pci_id = 0;
159 unsigned long mp_lapic_addr = 0;
160 int skip_ioapic_setup = 0; /* 1 if "noapic" boot option passed */
162 /* #define SMP_DEBUG */
164 #ifdef SMP_DEBUG
165 #define SMP_PRINTK(x) printk x
166 #else
167 #define SMP_PRINTK(x)
168 #endif
171 * Setup routine for controlling SMP activation
173 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
174 * activation entirely (the MPS table probe still happens, though).
176 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
177 * greater than 0, limits the maximum number of CPUs activated in
178 * SMP mode to <NUM>.
181 void __init smp_setup(char *str, int *ints)
183 if (ints && ints[0] > 0)
184 max_cpus = ints[1];
185 else
186 max_cpus = 0;
189 void ack_APIC_irq(void)
191 /* Clear the IPI */
193 /* Dummy read */
194 apic_read(APIC_SPIV);
196 /* Docs say use 0 for future compatibility */
197 apic_write(APIC_EOI, 0);
200 #ifdef CONFIG_X86_VISWS_APIC
202 * hacky!
204 int __init smp_scan_config(unsigned long base, unsigned long length)
206 cpu_present_map |= 2; /* or in id 1 */
207 apic_version[1] |= 0x10; /* integrated APIC */
208 num_processors = 2;
210 return 1;
212 #else
214 * Checksum an MP configuration block.
217 static int mpf_checksum(unsigned char *mp, int len)
219 int sum=0;
220 while(len--)
221 sum+=*mp++;
222 return sum&0xFF;
226 * Processor encoding in an MP configuration block
229 static char *mpc_family(int family,int model)
231 static char n[32];
232 static char *model_defs[]=
234 "80486DX","80486DX",
235 "80486SX","80486DX/2 or 80487",
236 "80486SL","Intel5X2(tm)",
237 "Unknown","Unknown",
238 "80486DX/4"
240 if (family==0x6)
241 return("Pentium(tm) Pro");
242 if (family==0x5)
243 return("Pentium(tm)");
244 if (family==0x0F && model==0x0F)
245 return("Special controller");
246 if (family==0x04 && model<9)
247 return model_defs[model];
248 sprintf(n,"Unknown CPU [%d:%d]",family, model);
249 return n;
253 * Read the MPC
256 static int __init smp_read_mpc(struct mp_config_table *mpc)
258 char str[16];
259 int count=sizeof(*mpc);
260 int ioapics = 0;
261 unsigned char *mpt=((unsigned char *)mpc)+count;
263 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4))
265 panic("SMP mptable: bad signature [%c%c%c%c]!\n",
266 mpc->mpc_signature[0],
267 mpc->mpc_signature[1],
268 mpc->mpc_signature[2],
269 mpc->mpc_signature[3]);
270 return 1;
272 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length))
274 panic("SMP mptable: checksum error!\n");
275 return 1;
277 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04)
279 printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec);
280 return 1;
282 memcpy(str,mpc->mpc_oem,8);
283 str[8]=0;
284 memcpy(ioapic_OEM_ID,str,9);
285 printk("OEM ID: %s ",str);
287 memcpy(str,mpc->mpc_productid,12);
288 str[12]=0;
289 memcpy(ioapic_Product_ID,str,13);
290 printk("Product ID: %s ",str);
292 printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
294 /* save the local APIC address, it might be non-default */
295 mp_lapic_addr = mpc->mpc_lapic;
298 * Now process the configuration blocks.
301 while(count<mpc->mpc_length)
303 switch(*mpt)
305 case MP_PROCESSOR:
307 struct mpc_config_processor *m=
308 (struct mpc_config_processor *)mpt;
309 if (m->mpc_cpuflag&CPU_ENABLED)
311 printk("Processor #%d %s APIC version %d\n",
312 m->mpc_apicid,
313 mpc_family((m->mpc_cpufeature&
314 CPU_FAMILY_MASK)>>8,
315 (m->mpc_cpufeature&
316 CPU_MODEL_MASK)>>4),
317 m->mpc_apicver);
318 #ifdef SMP_DEBUG
319 if (m->mpc_featureflag&(1<<0))
320 printk(" Floating point unit present.\n");
321 if (m->mpc_featureflag&(1<<7))
322 printk(" Machine Exception supported.\n");
323 if (m->mpc_featureflag&(1<<8))
324 printk(" 64 bit compare & exchange supported.\n");
325 if (m->mpc_featureflag&(1<<9))
326 printk(" Internal APIC present.\n");
327 #endif
328 if (m->mpc_cpuflag&CPU_BOOTPROCESSOR)
330 SMP_PRINTK((" Bootup CPU\n"));
331 boot_cpu_id=m->mpc_apicid;
333 else /* Boot CPU already counted */
334 num_processors++;
336 if (m->mpc_apicid>NR_CPUS)
337 printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS);
338 else
340 cpu_present_map|=(1<<m->mpc_apicid);
341 apic_version[m->mpc_apicid]=m->mpc_apicver;
344 mpt+=sizeof(*m);
345 count+=sizeof(*m);
346 break;
348 case MP_BUS:
350 struct mpc_config_bus *m=
351 (struct mpc_config_bus *)mpt;
352 memcpy(str,m->mpc_bustype,6);
353 str[6]=0;
354 SMP_PRINTK(("Bus #%d is %s\n",
355 m->mpc_busid,
356 str));
357 if ((strncmp(m->mpc_bustype,"ISA",3) == 0) ||
358 (strncmp(m->mpc_bustype,"EISA",4) == 0))
359 mp_bus_id_to_type[m->mpc_busid] =
360 MP_BUS_ISA;
361 else
362 if (strncmp(m->mpc_bustype,"PCI",3) == 0) {
363 mp_bus_id_to_type[m->mpc_busid] =
364 MP_BUS_PCI;
365 mp_bus_id_to_pci_bus[m->mpc_busid] =
366 mp_current_pci_id;
367 mp_current_pci_id++;
369 mpt+=sizeof(*m);
370 count+=sizeof(*m);
371 break;
373 case MP_IOAPIC:
375 struct mpc_config_ioapic *m=
376 (struct mpc_config_ioapic *)mpt;
377 if (m->mpc_flags&MPC_APIC_USABLE)
379 ioapics++;
380 printk("I/O APIC #%d Version %d at 0x%lX.\n",
381 m->mpc_apicid,m->mpc_apicver,
382 m->mpc_apicaddr);
384 * we use the first one only currently
386 if (ioapics == 1)
387 mp_ioapic_addr = m->mpc_apicaddr;
389 mpt+=sizeof(*m);
390 count+=sizeof(*m);
391 break;
393 case MP_INTSRC:
395 struct mpc_config_intsrc *m=
396 (struct mpc_config_intsrc *)mpt;
398 mp_irqs [mp_irq_entries] = *m;
399 if (++mp_irq_entries == MAX_IRQ_SOURCES) {
400 printk("Max irq sources exceeded!!\n");
401 printk("Skipping remaining sources.\n");
402 --mp_irq_entries;
405 mpt+=sizeof(*m);
406 count+=sizeof(*m);
407 break;
409 case MP_LINTSRC:
411 struct mpc_config_intlocal *m=
412 (struct mpc_config_intlocal *)mpt;
413 mpt+=sizeof(*m);
414 count+=sizeof(*m);
415 break;
419 if (ioapics > 1)
421 printk("Warning: Multiple IO-APICs not yet supported.\n");
422 printk("Warning: switching to non APIC mode.\n");
423 skip_ioapic_setup=1;
425 return num_processors;
429 * Scan the memory blocks for an SMP configuration block.
432 int __init smp_scan_config(unsigned long base, unsigned long length)
434 unsigned long *bp=phys_to_virt(base);
435 struct intel_mp_floating *mpf;
437 SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
438 bp,length));
439 if (sizeof(*mpf)!=16)
440 printk("Error: MPF size\n");
442 while (length>0)
444 if (*bp==SMP_MAGIC_IDENT)
446 mpf=(struct intel_mp_floating *)bp;
447 if (mpf->mpf_length==1 &&
448 !mpf_checksum((unsigned char *)bp,16) &&
449 (mpf->mpf_specification == 1
450 || mpf->mpf_specification == 4) )
452 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
453 if (mpf->mpf_feature2&(1<<7))
454 printk(" IMCR and PIC compatibility mode.\n");
455 else
456 printk(" Virtual Wire compatibility mode.\n");
457 smp_found_config=1;
459 * Now see if we need to read further.
461 if (mpf->mpf_feature1!=0)
463 unsigned long cfg;
465 /* local APIC has default address */
466 mp_lapic_addr = 0xFEE00000;
468 * We need to know what the local
469 * APIC id of the boot CPU is!
474 * HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
476 * It's not just a crazy hack. ;-)
479 * Standard page mapping
480 * functions don't work yet.
481 * We know that page 0 is not
482 * used. Steal it for now!
485 cfg=pg0[0];
486 pg0[0] = (mp_lapic_addr | 7);
487 local_flush_tlb();
489 boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID));
492 * Give it back
495 pg0[0]= cfg;
496 local_flush_tlb();
500 * END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK
504 * 2 CPUs, numbered 0 & 1.
506 cpu_present_map=3;
507 num_processors=2;
508 printk("I/O APIC at 0xFEC00000.\n");
511 * Save the default type number, we
512 * need it later to set the IO-APIC
513 * up properly:
515 mpc_default_type = mpf->mpf_feature1;
517 printk("Bus #0 is ");
519 switch(mpf->mpf_feature1)
521 case 1:
522 case 5:
523 printk("ISA\n");
524 break;
525 case 2:
526 printk("EISA with no IRQ8 chaining\n");
527 break;
528 case 6:
529 case 3:
530 printk("EISA\n");
531 break;
532 case 4:
533 case 7:
534 printk("MCA\n");
535 break;
536 case 0:
537 break;
538 default:
539 printk("???\nUnknown standard configuration %d\n",
540 mpf->mpf_feature1);
541 return 1;
543 if (mpf->mpf_feature1>4)
545 printk("Bus #1 is PCI\n");
548 * Set local APIC version to
549 * the integrated form.
550 * It's initialized to zero
551 * otherwise, representing
552 * a discrete 82489DX.
554 apic_version[0] = 0x10;
555 apic_version[1] = 0x10;
558 * Read the physical hardware table.
559 * Anything here will override the
560 * defaults.
562 if (mpf->mpf_physptr)
563 smp_read_mpc((void *)mpf->mpf_physptr);
565 __cpu_logical_map[0] = boot_cpu_id;
566 global_irq_holder = boot_cpu_id;
567 current->processor = boot_cpu_id;
569 printk("Processors: %d\n", num_processors);
571 * Only use the first configuration found.
573 return 1;
576 bp+=4;
577 length-=16;
580 return 0;
582 #endif
585 * Trampoline 80x86 program as an array.
588 extern unsigned char trampoline_data [];
589 extern unsigned char trampoline_end [];
590 static unsigned char *trampoline_base;
593 * Currently trivial. Write the real->protected mode
594 * bootstrap into the page concerned. The caller
595 * has made sure it's suitably aligned.
598 static unsigned long __init setup_trampoline(void)
600 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
601 return virt_to_phys(trampoline_base);
605 * We are called very early to get the low memory for the
606 * SMP bootup trampoline page.
608 unsigned long __init smp_alloc_memory(unsigned long mem_base)
610 if (virt_to_phys((void *)mem_base) >= 0x9F000)
611 panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.", mem_base);
612 trampoline_base = (void *)mem_base;
613 return mem_base + PAGE_SIZE;
617 * The bootstrap kernel entry code has set these up. Save them for
618 * a given CPU
621 void __init smp_store_cpu_info(int id)
623 struct cpuinfo_x86 *c=&cpu_data[id];
625 *c = boot_cpu_data;
626 c->pte_quick = 0;
627 c->pgd_quick = 0;
628 c->pgtable_cache_sz = 0;
629 identify_cpu(c);
631 * Mask B, Pentium, but not Pentium MMX
633 if (c->x86_vendor == X86_VENDOR_INTEL &&
634 c->x86 == 5 &&
635 c->x86_mask >= 1 && c->x86_mask <= 4 &&
636 c->x86_model <= 3)
637 smp_b_stepping=1; /* Remember we have B step Pentia with bugs */
641 * Architecture specific routine called by the kernel just before init is
642 * fired off. This allows the BP to have everything in order [we hope].
643 * At the end of this all the APs will hit the system scheduling and off
644 * we go. Each AP will load the system gdt's and jump through the kernel
645 * init into idle(). At this point the scheduler will one day take over
646 * and give them jobs to do. smp_callin is a standard routine
647 * we use to track CPUs as they power up.
650 static atomic_t smp_commenced = ATOMIC_INIT(0);
652 void __init smp_commence(void)
655 * Lets the callins below out of their loop.
657 SMP_PRINTK(("Setting commenced=1, go go go\n"));
659 wmb();
660 atomic_set(&smp_commenced,1);
663 void __init enable_local_APIC(void)
665 unsigned long value;
667 value = apic_read(APIC_SPIV);
668 value |= (1<<8); /* Enable APIC (bit==1) */
669 value &= ~(1<<9); /* Enable focus processor (bit==0) */
670 value |= 0xff; /* Set spurious IRQ vector to 0xff */
671 apic_write(APIC_SPIV,value);
673 value = apic_read(APIC_TASKPRI);
674 value &= ~APIC_TPRI_MASK; /* Set Task Priority to 'accept all' */
675 apic_write(APIC_TASKPRI,value);
677 udelay(100); /* B safe */
678 ack_APIC_irq();
679 udelay(100);
682 unsigned long __init init_smp_mappings(unsigned long memory_start)
684 unsigned long apic_phys, ioapic_phys;
686 memory_start = PAGE_ALIGN(memory_start);
687 if (smp_found_config) {
688 apic_phys = mp_lapic_addr;
689 #ifdef CONFIG_X86_IO_APIC
690 ioapic_phys = mp_ioapic_addr;
691 #endif
692 } else {
694 * set up a fake all zeroes page to simulate the
695 * local APIC and another one for the IO-APIC. We
696 * could use the real zero-page, but it's safer
697 * this way if some buggy code writes to this page ...
699 apic_phys = __pa(memory_start);
700 ioapic_phys = __pa(memory_start+PAGE_SIZE);
701 memset((void *)memory_start, 0, 2*PAGE_SIZE);
702 memory_start += 2*PAGE_SIZE;
705 #ifdef CONFIG_X86_IO_APIC
706 set_fixmap(FIX_APIC_BASE,apic_phys);
707 set_fixmap(FIX_IO_APIC_BASE,ioapic_phys);
709 printk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
710 printk("mapped IOAPIC to %08lx (%08lx)\n", fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys);
711 #endif
713 return memory_start;
716 extern void calibrate_delay(void);
718 void __init smp_callin(void)
720 int cpuid;
721 unsigned long timeout;
724 * (This works even if the APIC is not enabled.)
726 cpuid = GET_APIC_ID(apic_read(APIC_ID));
728 SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid));
731 * STARTUP IPIs are fragile beasts as they might sometimes
732 * trigger some glue motherboard logic. Complete APIC bus
733 * silence for 1 second, this overestimates the time the
734 * boot CPU is spending to send the up to 2 STARTUP IPIs
735 * by a factor of two. This should be enough.
739 * Waiting 2s total for startup (udelay is not yet working)
741 timeout = jiffies + 2*HZ;
742 while (time_before(jiffies,timeout))
745 * Has the boot CPU finished it's STARTUP sequence?
747 if (test_bit(cpuid, (unsigned long *)&cpu_callout_map[0]))
748 break;
751 while (!time_before(jiffies,timeout)) {
752 printk("BUG: CPU%d started up but did not get a callout!\n",
753 cpuid);
754 stop_this_cpu();
758 * the boot CPU has finished the init stage and is spinning
759 * on callin_map until we finish. We are free to set up this
760 * CPU, first the APIC. (this is probably redundant on most
761 * boards)
764 SMP_PRINTK(("CALLIN, before enable_local_APIC().\n"));
765 enable_local_APIC();
768 * Set up our APIC timer.
770 setup_APIC_clock();
772 __sti();
774 #ifdef CONFIG_MTRR
775 /* Must be done before calibration delay is computed */
776 mtrr_init_secondary_cpu ();
777 #endif
779 * Get our bogomips.
781 calibrate_delay();
782 SMP_PRINTK(("Stack at about %p\n",&cpuid));
785 * Save our processor parameters
787 smp_store_cpu_info(cpuid);
790 * Allow the master to continue.
792 set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
795 int cpucount = 0;
797 extern int cpu_idle(void * unused);
800 * Activate a secondary processor.
802 int __init start_secondary(void *unused)
805 * Dont put anything before smp_callin(), SMP
806 * booting is too fragile that we want to limit the
807 * things done here to the most necessary things.
809 smp_callin();
810 while (!atomic_read(&smp_commenced))
811 /* nothing */ ;
812 return cpu_idle(NULL);
816 * Everything has been set up for the secondary
817 * CPUs - they just need to reload everything
818 * from the task structure
820 void __init initialize_secondary(void)
822 struct thread_struct * p = &current->tss;
825 * Load up the LDT and the task register.
827 asm volatile("lldt %%ax": :"a" (p->ldt));
828 asm volatile("ltr %%ax": :"a" (p->tr));
829 stts();
832 * We don't actually need to load the full TSS,
833 * basically just the stack pointer and the eip.
836 asm volatile(
837 "movl %0,%%esp\n\t"
838 "jmp *%1"
840 :"r" (p->esp),"r" (p->eip));
843 extern struct {
844 void * esp;
845 unsigned short ss;
846 } stack_start;
848 static void __init do_boot_cpu(int i)
850 unsigned long cfg;
851 pgd_t maincfg;
852 struct task_struct *idle;
853 unsigned long send_status, accept_status;
854 int timeout, num_starts, j;
855 unsigned long start_eip;
858 * We need an idle process for each processor.
861 kernel_thread(start_secondary, NULL, CLONE_PID);
862 cpucount++;
864 idle = task[cpucount];
865 if (!idle)
866 panic("No idle process for CPU %d", i);
868 idle->processor = i;
869 __cpu_logical_map[cpucount] = i;
870 cpu_number_map[i] = cpucount;
872 /* start_eip had better be page-aligned! */
873 start_eip = setup_trampoline();
875 printk("Booting processor %d eip %lx\n", i, start_eip); /* So we see what's up */
876 stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
879 * This grunge runs the startup process for
880 * the targeted processor.
883 SMP_PRINTK(("Setting warm reset code and vector.\n"));
885 CMOS_WRITE(0xa, 0xf);
886 local_flush_tlb();
887 SMP_PRINTK(("1.\n"));
888 *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
889 SMP_PRINTK(("2.\n"));
890 *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
891 SMP_PRINTK(("3.\n"));
893 maincfg=swapper_pg_dir[0];
894 ((unsigned long *)swapper_pg_dir)[0]=0x102007;
897 * Be paranoid about clearing APIC errors.
900 if ( apic_version[i] & 0xF0 )
902 apic_write(APIC_ESR, 0);
903 accept_status = (apic_read(APIC_ESR) & 0xEF);
907 * Status is now clean
910 send_status = 0;
911 accept_status = 0;
914 * Starting actual IPI sequence...
917 SMP_PRINTK(("Asserting INIT.\n"));
920 * Turn INIT on
923 cfg=apic_read(APIC_ICR2);
924 cfg&=0x00FFFFFF;
925 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
926 cfg=apic_read(APIC_ICR);
927 cfg&=~0xCDFFF; /* Clear bits */
928 cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
929 | APIC_DEST_ASSERT | APIC_DEST_DM_INIT);
930 apic_write(APIC_ICR, cfg); /* Send IPI */
932 udelay(200);
933 SMP_PRINTK(("Deasserting INIT.\n"));
935 cfg=apic_read(APIC_ICR2);
936 cfg&=0x00FFFFFF;
937 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
938 cfg=apic_read(APIC_ICR);
939 cfg&=~0xCDFFF; /* Clear bits */
940 cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
941 | APIC_DEST_DM_INIT);
942 apic_write(APIC_ICR, cfg); /* Send IPI */
945 * Should we send STARTUP IPIs ?
947 * Determine this based on the APIC version.
948 * If we don't have an integrated APIC, don't
949 * send the STARTUP IPIs.
952 if ( apic_version[i] & 0xF0 )
953 num_starts = 2;
954 else
955 num_starts = 0;
958 * Run STARTUP IPI loop.
961 for (j = 1; !(send_status || accept_status)
962 && (j <= num_starts) ; j++)
964 SMP_PRINTK(("Sending STARTUP #%d.\n",j));
965 apic_write(APIC_ESR, 0);
966 SMP_PRINTK(("After apic_write.\n"));
969 * STARTUP IPI
972 cfg=apic_read(APIC_ICR2);
973 cfg&=0x00FFFFFF;
974 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
975 cfg=apic_read(APIC_ICR);
976 cfg&=~0xCDFFF; /* Clear bits */
977 cfg |= (APIC_DEST_FIELD
978 | APIC_DEST_DM_STARTUP
979 | (start_eip >> 12)); /* Boot on the stack */
980 SMP_PRINTK(("Before start apic_write.\n"));
981 apic_write(APIC_ICR, cfg); /* Kick the second */
983 SMP_PRINTK(("Startup point 1.\n"));
985 timeout = 0;
986 SMP_PRINTK(("Waiting for send to finish...\n"));
987 do {
988 SMP_PRINTK(("+"));
989 udelay(100);
990 send_status = apic_read(APIC_ICR) & 0x1000;
991 } while (send_status && (timeout++ < 1000));
994 * Give the other CPU some time to accept the IPI.
996 udelay(200);
997 accept_status = (apic_read(APIC_ESR) & 0xEF);
999 SMP_PRINTK(("After Startup.\n"));
1001 if (send_status) /* APIC never delivered?? */
1002 printk("APIC never delivered???\n");
1003 if (accept_status) /* Send accept error */
1004 printk("APIC delivery error (%lx).\n", accept_status);
1006 if ( !(send_status || accept_status) )
1009 * allow APs to start initializing.
1011 SMP_PRINTK(("Before Callout %d.\n", i));
1012 set_bit(i, (unsigned long *)&cpu_callout_map[0]);
1013 SMP_PRINTK(("After Callout %d.\n", i));
1015 for(timeout=0;timeout<50000;timeout++)
1017 if (cpu_callin_map[0]&(1<<i))
1018 break; /* It has booted */
1019 udelay(100); /* Wait 5s total for a response */
1021 if (cpu_callin_map[0]&(1<<i))
1023 /* number CPUs logically, starting from 1 (BSP is 0) */
1024 #if 0
1025 cpu_number_map[i] = cpucount;
1026 __cpu_logical_map[cpucount] = i;
1027 #endif
1028 printk("OK.\n");
1029 printk("CPU%d: ", i);
1030 print_cpu_info(&cpu_data[i]);
1032 else
1034 if (*((volatile unsigned char *)phys_to_virt(8192))==0xA5)
1035 printk("Stuck ??\n");
1036 else
1037 printk("Not responding.\n");
1039 SMP_PRINTK(("CPU has booted.\n"));
1041 else
1043 __cpu_logical_map[cpucount] = -1;
1044 cpu_number_map[i] = -1;
1045 cpucount--;
1048 swapper_pg_dir[0]=maincfg;
1049 local_flush_tlb();
1051 /* mark "stuck" area as not stuck */
1052 *((volatile unsigned long *)phys_to_virt(8192)) = 0;
1055 cycles_t cacheflush_time;
1056 extern unsigned long cpu_hz;
1058 static void smp_tune_scheduling (void)
1060 unsigned long cachesize;
1062 * Rough estimation for SMP scheduling, this is the number of
1063 * cycles it takes for a fully memory-limited process to flush
1064 * the SMP-local cache.
1066 * (For a P5 this pretty much means we will choose another idle
1067 * CPU almost always at wakeup time (this is due to the small
1068 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1069 * the cache size)
1072 if (!cpu_hz) {
1074 * this basically disables processor-affinity
1075 * scheduling on SMP without a TSC.
1077 cacheflush_time = 0;
1078 return;
1079 } else {
1080 cachesize = boot_cpu_data.x86_cache_size;
1081 if (cachesize == -1)
1082 cachesize = 8; /* Pentiums */
1084 cacheflush_time = cpu_hz/1024*cachesize/5000;
1087 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
1088 (long)cacheflush_time/(cpu_hz/1000000),
1089 ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100);
1092 unsigned int prof_multiplier[NR_CPUS];
1093 unsigned int prof_counter[NR_CPUS];
1096 * Cycle through the processors sending APIC IPIs to boot each.
1099 void __init smp_boot_cpus(void)
1101 int i;
1102 unsigned long cfg;
1104 #ifdef CONFIG_MTRR
1105 /* Must be done before other processors booted */
1106 mtrr_init_boot_cpu ();
1107 #endif
1109 * Initialize the logical to physical CPU number mapping
1110 * and the per-CPU profiling counter/multiplier
1113 for (i = 0; i < NR_CPUS; i++) {
1114 cpu_number_map[i] = -1;
1115 prof_counter[i] = 1;
1116 prof_multiplier[i] = 1;
1120 * Setup boot CPU information
1123 smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */
1124 smp_tune_scheduling();
1125 printk("CPU%d: ", boot_cpu_id);
1126 print_cpu_info(&cpu_data[boot_cpu_id]);
1129 * not necessary because the MP table should list the boot
1130 * CPU too, but we do it for the sake of robustness anyway.
1131 * (and for the case when a non-SMP board boots an SMP kernel)
1133 cpu_present_map |= (1 << hard_smp_processor_id());
1135 cpu_number_map[boot_cpu_id] = 0;
1137 #ifdef CONFIG_X86_IO_APIC
1139 * If we don't conform to the Intel MPS standard, get out
1140 * of here now!
1143 if (!smp_found_config)
1145 printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n");
1146 io_apic_irqs = 0;
1147 cpu_online_map = cpu_present_map;
1148 goto smp_done;
1150 #endif
1153 * If SMP should be disabled, then really disable it!
1156 if (!max_cpus)
1158 smp_found_config = 0;
1159 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1162 #ifdef SMP_DEBUG
1164 int reg;
1167 * This is to verify that we're looking at
1168 * a real local APIC. Check these against
1169 * your board if the CPUs aren't getting
1170 * started for no apparent reason.
1173 reg = apic_read(APIC_VERSION);
1174 SMP_PRINTK(("Getting VERSION: %x\n", reg));
1176 apic_write(APIC_VERSION, 0);
1177 reg = apic_read(APIC_VERSION);
1178 SMP_PRINTK(("Getting VERSION: %x\n", reg));
1181 * The two version reads above should print the same
1182 * NON-ZERO!!! numbers. If the second one is zero,
1183 * there is a problem with the APIC write/read
1184 * definitions.
1186 * The next two are just to see if we have sane values.
1187 * They're only really relevant if we're in Virtual Wire
1188 * compatibility mode, but most boxes are anymore.
1192 reg = apic_read(APIC_LVT0);
1193 SMP_PRINTK(("Getting LVT0: %x\n", reg));
1195 reg = apic_read(APIC_LVT1);
1196 SMP_PRINTK(("Getting LVT1: %x\n", reg));
1198 #endif
1200 enable_local_APIC();
1203 * Set up our local APIC timer:
1205 setup_APIC_clock ();
1208 * Now scan the CPU present map and fire up the other CPUs.
1212 * Add all detected CPUs. (later on we can down individual
1213 * CPUs which will change cpu_online_map but not necessarily
1214 * cpu_present_map. We are pretty much ready for hot-swap CPUs.)
1216 cpu_online_map = cpu_present_map;
1217 mb();
1219 SMP_PRINTK(("CPU map: %lx\n", cpu_present_map));
1221 for(i=0;i<NR_CPUS;i++)
1224 * Don't even attempt to start the boot CPU!
1226 if (i == boot_cpu_id)
1227 continue;
1229 if ((cpu_online_map & (1 << i))
1230 && (max_cpus < 0 || max_cpus > cpucount+1))
1232 do_boot_cpu(i);
1236 * Make sure we unmap all failed CPUs
1239 if (cpu_number_map[i] == -1 && (cpu_online_map & (1 << i))) {
1240 printk("CPU #%d not responding. Removing from cpu_online_map.\n",i);
1241 cpu_online_map &= ~(1 << i);
1246 * Cleanup possible dangling ends...
1250 * Install writable page 0 entry.
1253 cfg = pg0[0];
1254 pg0[0] = 3; /* writeable, present, addr 0 */
1255 local_flush_tlb();
1258 * Paranoid: Set warm reset code and vector here back
1259 * to default values.
1262 CMOS_WRITE(0, 0xf);
1264 *((volatile long *) phys_to_virt(0x467)) = 0;
1267 * Restore old page 0 entry.
1270 pg0[0] = cfg;
1271 local_flush_tlb();
1274 * Allow the user to impress friends.
1277 SMP_PRINTK(("Before bogomips.\n"));
1278 if (cpucount==0)
1280 printk(KERN_ERR "Error: only one processor found.\n");
1281 cpu_online_map = (1<<hard_smp_processor_id());
1283 else
1285 unsigned long bogosum=0;
1286 for(i=0;i<32;i++)
1288 if (cpu_online_map&(1<<i))
1289 bogosum+=cpu_data[i].loops_per_sec;
1291 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1292 cpucount+1,
1293 (bogosum+2500)/500000,
1294 ((bogosum+2500)/5000)%100);
1295 SMP_PRINTK(("Before bogocount - setting activated=1.\n"));
1296 smp_activated=1;
1297 smp_num_cpus=cpucount+1;
1299 if (smp_b_stepping)
1300 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1301 SMP_PRINTK(("Boot done.\n"));
1303 cache_APIC_registers();
1304 #ifdef CONFIG_X86_IO_APIC
1306 * Here we can be sure that there is an IO-APIC in the system. Let's
1307 * go and set it up:
1309 if (!skip_ioapic_setup)
1310 setup_IO_APIC();
1311 smp_done:
1312 #endif
1317 * the following functions deal with sending IPIs between CPUs.
1319 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
1324 * Silly serialization to work around CPU bug in P5s.
1325 * We can safely turn it off on a 686.
1327 #ifdef CONFIG_X86_GOOD_APIC
1328 # define FORCE_APIC_SERIALIZATION 0
1329 #else
1330 # define FORCE_APIC_SERIALIZATION 1
1331 #endif
1333 static unsigned int cached_APIC_ICR;
1334 static unsigned int cached_APIC_ICR2;
1337 * Caches reserved bits, APIC reads are (mildly) expensive
1338 * and force otherwise unnecessary CPU synchronization.
1340 * (We could cache other APIC registers too, but these are the
1341 * main ones used in RL.)
1343 #define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
1344 #define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
1346 void cache_APIC_registers (void)
1348 cached_APIC_ICR = slow_ICR;
1349 cached_APIC_ICR2 = slow_ICR2;
1350 mb();
1353 static inline unsigned int __get_ICR (void)
1355 #if FORCE_APIC_SERIALIZATION
1357 * Wait for the APIC to become ready - this should never occur. It's
1358 * a debugging check really.
1360 int count = 0;
1361 unsigned int cfg;
1363 while (count < 1000)
1365 cfg = slow_ICR;
1366 if (!(cfg&(1<<12))) {
1367 if (count)
1368 atomic_add(count, (atomic_t*)&ipi_count);
1369 return cfg;
1371 count++;
1372 udelay(10);
1374 printk("CPU #%d: previous IPI still not cleared after 10mS\n",
1375 smp_processor_id());
1376 return cfg;
1377 #else
1378 return cached_APIC_ICR;
1379 #endif
1382 static inline unsigned int __get_ICR2 (void)
1384 #if FORCE_APIC_SERIALIZATION
1385 return slow_ICR2;
1386 #else
1387 return cached_APIC_ICR2;
1388 #endif
1391 static inline int __prepare_ICR (unsigned int shortcut, int vector)
1393 unsigned int cfg;
1395 cfg = __get_ICR();
1396 cfg |= APIC_DEST_FIELD|APIC_DEST_DM_FIXED|shortcut|vector;
1398 return cfg;
1401 static inline int __prepare_ICR2 (unsigned int dest)
1403 unsigned int cfg;
1405 cfg = __get_ICR2();
1406 cfg |= SET_APIC_DEST_FIELD(dest);
1408 return cfg;
1411 static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
1413 unsigned int cfg;
1415 * Subtle. In the case of the 'never do double writes' workaround we
1416 * have to lock out interrupts to be safe. Otherwise it's just one
1417 * single atomic write to the APIC, no need for cli/sti.
1419 #if FORCE_APIC_SERIALIZATION
1420 unsigned long flags;
1422 __save_flags(flags);
1423 __cli();
1424 #endif
1427 * No need to touch the target chip field
1430 cfg = __prepare_ICR(shortcut, vector);
1433 * Send the IPI. The write to APIC_ICR fires this off.
1435 apic_write(APIC_ICR, cfg);
1436 #if FORCE_APIC_SERIALIZATION
1437 __restore_flags(flags);
1438 #endif
1441 static inline void send_IPI_allbutself(int vector)
1443 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
1446 static inline void send_IPI_all(int vector)
1448 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
1451 void send_IPI_self(int vector)
1453 __send_IPI_shortcut(APIC_DEST_SELF, vector);
1456 static inline void send_IPI_single(int dest, int vector)
1458 unsigned long cfg;
1459 #if FORCE_APIC_SERIALIZATION
1460 unsigned long flags;
1462 __save_flags(flags);
1463 __cli();
1464 #endif
1467 * prepare target chip field
1470 cfg = __prepare_ICR2(dest);
1471 apic_write(APIC_ICR2, cfg);
1474 * program the ICR
1476 cfg = __prepare_ICR(0, vector);
1479 * Send the IPI. The write to APIC_ICR fires this off.
1481 apic_write(APIC_ICR, cfg);
1482 #if FORCE_APIC_SERIALIZATION
1483 __restore_flags(flags);
1484 #endif
1488 * This is fraught with deadlocks. Probably the situation is not that
1489 * bad as in the early days of SMP, so we might ease some of the
1490 * paranoia here.
1493 void smp_flush_tlb(void)
1495 int cpu = smp_processor_id();
1496 int stuck;
1497 unsigned long flags;
1500 * it's important that we do not generate any APIC traffic
1501 * until the AP CPUs have booted up!
1503 if (cpu_online_map) {
1505 * The assignment is safe because it's volatile so the
1506 * compiler cannot reorder it, because the i586 has
1507 * strict memory ordering and because only the kernel
1508 * lock holder may issue a tlb flush. If you break any
1509 * one of those three change this to an atomic bus
1510 * locked or.
1513 smp_invalidate_needed = cpu_online_map;
1516 * Processors spinning on some lock with IRQs disabled
1517 * will see this IRQ late. The smp_invalidate_needed
1518 * map will ensure they don't do a spurious flush tlb
1519 * or miss one.
1522 __save_flags(flags);
1523 __cli();
1525 send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
1528 * Spin waiting for completion
1531 stuck = 50000000;
1532 while (smp_invalidate_needed) {
1534 * Take care of "crossing" invalidates
1536 if (test_bit(cpu, &smp_invalidate_needed))
1537 clear_bit(cpu, &smp_invalidate_needed);
1538 --stuck;
1539 if (!stuck) {
1540 printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
1541 break;
1544 __restore_flags(flags);
1548 * Flush the local TLB
1550 local_flush_tlb();
1556 * this function sends a 'reschedule' IPI to another CPU.
1557 * it goes straight through and wastes no time serializing
1558 * anything. Worst case is that we lose a reschedule ...
1561 void smp_send_reschedule(int cpu)
1563 send_IPI_single(cpu, RESCHEDULE_VECTOR);
1567 * this function sends a 'stop' IPI to all other CPUs in the system.
1568 * it goes straight through.
1571 void smp_send_stop(void)
1573 send_IPI_allbutself(STOP_CPU_VECTOR);
1577 * this function sends an 'reload MTRR state' IPI to all other CPUs
1578 * in the system. it goes straight through, completion processing
1579 * is done on the mttr.c level.
1582 void smp_send_mtrr(void)
1584 send_IPI_allbutself(MTRR_CHANGE_VECTOR);
1588 * Local timer interrupt handler. It does both profiling and
1589 * process statistics/rescheduling.
1591 * We do profiling in every local tick, statistics/rescheduling
1592 * happen only every 'profiling multiplier' ticks. The default
1593 * multiplier is 1 and it can be changed by writing the new multiplier
1594 * value into /proc/profile.
1597 void smp_local_timer_interrupt(struct pt_regs * regs)
1599 int cpu = smp_processor_id();
1602 * The profiling function is SMP safe. (nothing can mess
1603 * around with "current", and the profiling counters are
1604 * updated with atomic operations). This is especially
1605 * useful with a profiling multiplier != 1
1607 if (!user_mode(regs))
1608 x86_do_profile(regs->eip);
1610 if (!--prof_counter[cpu]) {
1611 int user=0,system=0;
1612 struct task_struct * p = current;
1615 * After doing the above, we need to make like
1616 * a normal interrupt - otherwise timer interrupts
1617 * ignore the global interrupt lock, which is the
1618 * WrongThing (tm) to do.
1621 if (user_mode(regs))
1622 user=1;
1623 else
1624 system=1;
1626 irq_enter(cpu, 0);
1627 if (p->pid) {
1628 update_one_process(p, 1, user, system, cpu);
1630 p->counter -= 1;
1631 if (p->counter < 0) {
1632 p->counter = 0;
1633 p->need_resched = 1;
1635 if (p->priority < DEF_PRIORITY) {
1636 kstat.cpu_nice += user;
1637 kstat.per_cpu_nice[cpu] += user;
1638 } else {
1639 kstat.cpu_user += user;
1640 kstat.per_cpu_user[cpu] += user;
1643 kstat.cpu_system += system;
1644 kstat.per_cpu_system[cpu] += system;
1647 prof_counter[cpu]=prof_multiplier[cpu];
1648 irq_exit(cpu, 0);
1652 * We take the 'long' return path, and there every subsystem
1653 * grabs the apropriate locks (kernel lock/ irq lock).
1655 * we might want to decouple profiling from the 'long path',
1656 * and do the profiling totally in assembly.
1658 * Currently this isn't too much of an issue (performance wise),
1659 * we can take more than 100K local irqs per second on a 100 MHz P5.
1664 * Local APIC timer interrupt. This is the most natural way for doing
1665 * local interrupts, but local timer interrupts can be emulated by
1666 * broadcast interrupts too. [in case the hw doesnt support APIC timers]
1668 * [ if a single-CPU system runs an SMP kernel then we call the local
1669 * interrupt as well. Thus we cannot inline the local irq ... ]
1671 void smp_apic_timer_interrupt(struct pt_regs * regs)
1674 * NOTE! We'd better ACK the irq immediately,
1675 * because timer handling can be slow, and we
1676 * want to be able to accept NMI tlb invalidates
1677 * during this time.
1679 ack_APIC_irq();
1680 smp_local_timer_interrupt(regs);
1684 * Reschedule call back. Nothing to do,
1685 * all the work is done automatically when
1686 * we return from the interrupt.
1688 asmlinkage void smp_reschedule_interrupt(void)
1690 ack_APIC_irq();
1694 * Invalidate call-back
1696 asmlinkage void smp_invalidate_interrupt(void)
1698 if (test_and_clear_bit(smp_processor_id(), &smp_invalidate_needed))
1699 local_flush_tlb();
1701 ack_APIC_irq();
1704 static void stop_this_cpu (void)
1707 * Remove this CPU:
1709 clear_bit(smp_processor_id(), &cpu_online_map);
1711 if (cpu_data[smp_processor_id()].hlt_works_ok)
1712 for(;;) __asm__("hlt");
1713 for (;;);
1717 * CPU halt call-back
1719 asmlinkage void smp_stop_cpu_interrupt(void)
1721 stop_this_cpu();
1724 void (*mtrr_hook) (void) = NULL;
1726 asmlinkage void smp_mtrr_interrupt(void)
1728 ack_APIC_irq();
1729 if (mtrr_hook) (*mtrr_hook)();
1733 * This interrupt should _never_ happen with our APIC/SMP architecture
1735 asmlinkage void smp_spurious_interrupt(void)
1737 /* ack_APIC_irq(); see sw-dev-man vol 3, chapter 7.4.13.5 */
1738 printk("spurious APIC interrupt, ayiee, should never happen.\n");
1742 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
1743 * per second. We assume that the caller has already set up the local
1744 * APIC.
1746 * The APIC timer is not exactly sync with the external timer chip, it
1747 * closely follows bus clocks.
1750 #define RDTSC(x) __asm__ __volatile__ ( "rdtsc" \
1751 :"=a" (((unsigned long*)&x)[0]), \
1752 "=d" (((unsigned long*)&x)[1]))
1755 * The timer chip is already set up at HZ interrupts per second here,
1756 * but we do not accept timer interrupts yet. We only allow the BP
1757 * to calibrate.
1759 static unsigned int __init get_8254_timer_count(void)
1761 unsigned int count;
1763 outb_p(0x00, 0x43);
1764 count = inb_p(0x40);
1765 count |= inb_p(0x40) << 8;
1767 return count;
1771 * This function sets up the local APIC timer, with a timeout of
1772 * 'clocks' APIC bus clock. During calibration we actually call
1773 * this function twice, once with a bogus timeout value, second
1774 * time for real. The other (noncalibrating) CPUs call this
1775 * function only once, with the real value.
1777 * We are strictly in irqs off mode here, as we do not want to
1778 * get an APIC interrupt go off accidentally.
1780 * We do reads before writes even if unnecessary, to get around the
1781 * APIC double write bug.
1784 #define APIC_DIVISOR 16
1786 void setup_APIC_timer(unsigned int clocks)
1788 unsigned long lvtt1_value;
1789 unsigned int tmp_value;
1792 * Unfortunately the local APIC timer cannot be set up into NMI
1793 * mode. With the IO APIC we can re-route the external timer
1794 * interrupt and broadcast it as an NMI to all CPUs, so no pain.
1796 tmp_value = apic_read(APIC_LVTT);
1797 lvtt1_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
1798 apic_write(APIC_LVTT , lvtt1_value);
1801 * Divide PICLK by 16
1803 tmp_value = apic_read(APIC_TDCR);
1804 apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 )
1805 | APIC_TDR_DIV_16);
1807 tmp_value = apic_read(APIC_TMICT);
1808 apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
1811 void __init wait_8254_wraparound(void)
1813 unsigned int curr_count, prev_count=~0;
1814 int delta;
1816 curr_count = get_8254_timer_count();
1818 do {
1819 prev_count = curr_count;
1820 curr_count = get_8254_timer_count();
1821 delta = curr_count-prev_count;
1824 * This limit for delta seems arbitrary, but it isn't, it's
1825 * slightly above the level of error a buggy Mercury/Neptune
1826 * chipset timer can cause.
1829 } while (delta<300);
1833 * In this function we calibrate APIC bus clocks to the external
1834 * timer. Unfortunately we cannot use jiffies and the timer irq
1835 * to calibrate, since some later bootup code depends on getting
1836 * the first irq? Ugh.
1838 * We want to do the calibration only once since we
1839 * want to have local timer irqs syncron. CPUs connected
1840 * by the same APIC bus have the very same bus frequency.
1841 * And we want to have irqs off anyways, no accidental
1842 * APIC irq that way.
1845 int __init calibrate_APIC_clock(void)
1847 unsigned long long t1,t2;
1848 long tt1,tt2;
1849 long calibration_result;
1850 int i;
1852 printk("calibrating APIC timer ... ");
1855 * Put whatever arbitrary (but long enough) timeout
1856 * value into the APIC clock, we just want to get the
1857 * counter running for calibration.
1859 setup_APIC_timer(1000000000);
1862 * The timer chip counts down to zero. Let's wait
1863 * for a wraparound to start exact measurement:
1864 * (the current tick might have been already half done)
1867 wait_8254_wraparound ();
1870 * We wrapped around just now. Let's start:
1872 RDTSC(t1);
1873 tt1=apic_read(APIC_TMCCT);
1875 #define LOOPS (HZ/10)
1877 * Let's wait LOOPS wraprounds:
1879 for (i=0; i<LOOPS; i++)
1880 wait_8254_wraparound ();
1882 tt2=apic_read(APIC_TMCCT);
1883 RDTSC(t2);
1886 * The APIC bus clock counter is 32 bits only, it
1887 * might have overflown, but note that we use signed
1888 * longs, thus no extra care needed.
1890 * underflown to be exact, as the timer counts down ;)
1893 calibration_result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
1895 SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip tick.",
1896 (unsigned long)(t2-t1)/LOOPS));
1898 SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer chip tick.",
1899 calibration_result));
1902 printk("\n..... CPU clock speed is %ld.%04ld MHz.\n",
1903 ((long)(t2-t1)/LOOPS)/(1000000/HZ),
1904 ((long)(t2-t1)/LOOPS)%(1000000/HZ) );
1906 printk("..... APIC bus clock speed is %ld.%04ld MHz.\n",
1907 calibration_result/(1000000/HZ),
1908 calibration_result%(1000000/HZ) );
1909 #undef LOOPS
1911 return calibration_result;
1914 static unsigned int calibration_result;
1916 void __init setup_APIC_clock(void)
1918 unsigned long flags;
1920 static volatile int calibration_lock;
1922 __save_flags(flags);
1923 __cli();
1925 SMP_PRINTK(("setup_APIC_clock() called.\n"));
1928 * [ setup_APIC_clock() is called from all CPUs, but we want
1929 * to do this part of the setup only once ... and it fits
1930 * here best ]
1932 if (!test_and_set_bit(0,&calibration_lock)) {
1934 calibration_result=calibrate_APIC_clock();
1936 * Signal completion to the other CPU[s]:
1938 calibration_lock = 3;
1940 } else {
1942 * Other CPU is calibrating, wait for finish:
1944 SMP_PRINTK(("waiting for other CPU calibrating APIC ... "));
1945 while (calibration_lock == 1);
1946 SMP_PRINTK(("done, continuing.\n"));
1950 * Now set up the timer for real.
1953 setup_APIC_timer (calibration_result);
1956 * We ACK the APIC, just in case there is something pending.
1959 ack_APIC_irq ();
1961 __restore_flags(flags);
1965 * the frequency of the profiling timer can be changed
1966 * by writing a multiplier value into /proc/profile.
1968 * usually you want to run this on all CPUs ;)
1970 int setup_profiling_timer(unsigned int multiplier)
1972 int cpu = smp_processor_id();
1973 unsigned long flags;
1976 * Sanity check. [at least 500 APIC cycles should be
1977 * between APIC interrupts as a rule of thumb, to avoid
1978 * irqs flooding us]
1980 if ( (!multiplier) || (calibration_result/multiplier < 500))
1981 return -EINVAL;
1983 save_flags(flags);
1984 cli();
1985 setup_APIC_timer(calibration_result/multiplier);
1986 prof_multiplier[cpu]=multiplier;
1987 restore_flags(flags);
1989 return 0;
1992 #undef APIC_DIVISOR