Linux 2.2.0
[davej-history.git] / arch / i386 / kernel / smp.c
blob2960d521c09acdf4e9f1109fc1c3f353458ee7ee
1 /*
2 * Intel MP v1.1/v1.4 specification support routines for multi-pentium
3 * hosts.
5 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
6 * (c) 1998 Ingo Molnar
8 * Supported by Caldera http://www.caldera.com.
9 * Much of the core SMP work is based on previous work by Thomas Radke, to
10 * whom a great many thanks are extended.
12 * Thanks to Intel for making available several different Pentium,
13 * Pentium Pro and Pentium-II/Xeon MP machines.
15 * This code is released under the GNU public license version 2 or
16 * later.
18 * Fixes
19 * Felix Koop : NR_CPUS used properly
20 * Jose Renau : Handle single CPU case.
21 * Alan Cox : By repeated request 8) - Total BogoMIP report.
22 * Greg Wright : Fix for kernel stacks panic.
23 * Erich Boleyn : MP v1.4 and additional changes.
24 * Matthias Sattler : Changes for 2.1 kernel map.
25 * Michel Lespinasse : Changes for 2.1 kernel map.
26 * Michael Chastain : Change trampoline.S to gnu as.
27 * Alan Cox : Dumb bug: 'B' step PPro's are fine
28 * Ingo Molnar : Added APIC timers, based on code
29 * from Jose Renau
30 * Alan Cox : Added EBDA scanning
31 * Ingo Molnar : various cleanups and rewrites
34 #include <linux/config.h>
35 #include <linux/mm.h>
36 #include <linux/kernel_stat.h>
37 #include <linux/delay.h>
38 #include <linux/mc146818rtc.h>
39 #include <linux/smp_lock.h>
40 #include <linux/init.h>
41 #include <asm/mtrr.h>
43 #include "irq.h"
45 extern unsigned long start_kernel, _etext;
46 extern void update_one_process( struct task_struct *p,
47 unsigned long ticks, unsigned long user,
48 unsigned long system, int cpu);
50 * Some notes on processor bugs:
52 * Pentium and Pentium Pro (and all CPUs) have bugs. The Linux issues
53 * for SMP are handled as follows.
55 * Pentium Pro
56 * Occasional delivery of 'spurious interrupt' as trap #16. This
57 * is very rare. The kernel logs the event and recovers
59 * Pentium
60 * There is a marginal case where REP MOVS on 100MHz SMP
61 * machines with B stepping processors can fail. XXX should provide
62 * an L1cache=Writethrough or L1cache=off option.
64 * B stepping CPUs may hang. There are hardware work arounds
65 * for this. We warn about it in case your board doesnt have the work
66 * arounds. Basically thats so I can tell anyone with a B stepping
67 * CPU and SMP problems "tough".
69 * Specific items [From Pentium Processor Specification Update]
71 * 1AP. Linux doesn't use remote read
72 * 2AP. Linux doesn't trust APIC errors
73 * 3AP. We work around this
74 * 4AP. Linux never generated 3 interrupts of the same priority
75 * to cause a lost local interrupt.
76 * 5AP. Remote read is never used
77 * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX
78 * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX
79 * 11AP. Linux reads the APIC between writes to avoid this, as per
80 * the documentation. Make sure you preserve this as it affects
81 * the C stepping chips too.
83 * If this sounds worrying believe me these bugs are ___RARE___ and
84 * there's about nothing of note with C stepping upwards.
88 /* Kernel spinlock */
89 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
92 * function prototypes:
94 static void cache_APIC_registers (void);
95 static void stop_this_cpu (void);
97 static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */
99 static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */
100 int smp_found_config=0; /* Have we found an SMP box */
102 unsigned long cpu_present_map = 0; /* Bitmask of physically existing CPUs */
103 unsigned long cpu_online_map = 0; /* Bitmask of currently online CPUs */
104 int smp_num_cpus = 1; /* Total count of live CPUs */
105 int smp_threads_ready=0; /* Set when the idlers are all forked */
106 volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical number */
107 volatile int __cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */
108 static volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
109 static volatile unsigned long cpu_callout_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
110 volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */
111 volatile unsigned long kstack_ptr; /* Stack vector for booting CPUs */
112 struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per CPU bogomips and other parameters */
113 static unsigned int num_processors = 1; /* Internal processor count */
114 unsigned long mp_ioapic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */
115 unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */
116 static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
117 int apic_version[NR_CPUS]; /* APIC version number */
118 unsigned long apic_retval; /* Just debugging the assembler.. */
120 volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */
121 volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */
123 volatile unsigned long ipi_count; /* Number of IPIs delivered */
125 const char lk_lockmsg[] = "lock from interrupt context at %p\n";
127 int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
128 extern int mp_irq_entries;
129 extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
130 extern int mpc_default_type;
131 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
132 int mp_current_pci_id = 0;
133 unsigned long mp_lapic_addr = 0;
134 int skip_ioapic_setup = 0; /* 1 if "noapic" boot option passed */
136 /* #define SMP_DEBUG */
138 #ifdef SMP_DEBUG
139 #define SMP_PRINTK(x) printk x
140 #else
141 #define SMP_PRINTK(x)
142 #endif
145 * IA s/w dev Vol 3, Section 7.4
147 #define APIC_DEFAULT_PHYS_BASE 0xfee00000
150 * Setup routine for controlling SMP activation
152 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
153 * activation entirely (the MPS table probe still happens, though).
155 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
156 * greater than 0, limits the maximum number of CPUs activated in
157 * SMP mode to <NUM>.
160 void __init smp_setup(char *str, int *ints)
162 if (ints && ints[0] > 0)
163 max_cpus = ints[1];
164 else
165 max_cpus = 0;
168 void ack_APIC_irq(void)
170 /* Clear the IPI */
172 /* Dummy read */
173 apic_read(APIC_SPIV);
175 /* Docs say use 0 for future compatibility */
176 apic_write(APIC_EOI, 0);
180 * Intel MP BIOS table parsing routines:
183 #ifndef CONFIG_X86_VISWS_APIC
185 * Checksum an MP configuration block.
188 static int mpf_checksum(unsigned char *mp, int len)
190 int sum=0;
191 while(len--)
192 sum+=*mp++;
193 return sum&0xFF;
197 * Processor encoding in an MP configuration block
200 static char *mpc_family(int family,int model)
202 static char n[32];
203 static char *model_defs[]=
205 "80486DX","80486DX",
206 "80486SX","80486DX/2 or 80487",
207 "80486SL","Intel5X2(tm)",
208 "Unknown","Unknown",
209 "80486DX/4"
211 if (family==0x6)
212 return("Pentium(tm) Pro");
213 if (family==0x5)
214 return("Pentium(tm)");
215 if (family==0x0F && model==0x0F)
216 return("Special controller");
217 if (family==0x04 && model<9)
218 return model_defs[model];
219 sprintf(n,"Unknown CPU [%d:%d]",family, model);
220 return n;
224 * Read the MPC
227 static int __init smp_read_mpc(struct mp_config_table *mpc)
229 char str[16];
230 int count=sizeof(*mpc);
231 int ioapics = 0;
232 unsigned char *mpt=((unsigned char *)mpc)+count;
234 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4))
236 panic("SMP mptable: bad signature [%c%c%c%c]!\n",
237 mpc->mpc_signature[0],
238 mpc->mpc_signature[1],
239 mpc->mpc_signature[2],
240 mpc->mpc_signature[3]);
241 return 1;
243 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length))
245 panic("SMP mptable: checksum error!\n");
246 return 1;
248 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04)
250 printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec);
251 return 1;
253 memcpy(str,mpc->mpc_oem,8);
254 str[8]=0;
255 memcpy(ioapic_OEM_ID,str,9);
256 printk("OEM ID: %s ",str);
258 memcpy(str,mpc->mpc_productid,12);
259 str[12]=0;
260 memcpy(ioapic_Product_ID,str,13);
261 printk("Product ID: %s ",str);
263 printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
265 /* save the local APIC address, it might be non-default */
266 mp_lapic_addr = mpc->mpc_lapic;
269 * Now process the configuration blocks.
272 while(count<mpc->mpc_length)
274 switch(*mpt)
276 case MP_PROCESSOR:
278 struct mpc_config_processor *m=
279 (struct mpc_config_processor *)mpt;
280 if (m->mpc_cpuflag&CPU_ENABLED)
282 printk("Processor #%d %s APIC version %d\n",
283 m->mpc_apicid,
284 mpc_family((m->mpc_cpufeature&
285 CPU_FAMILY_MASK)>>8,
286 (m->mpc_cpufeature&
287 CPU_MODEL_MASK)>>4),
288 m->mpc_apicver);
289 #ifdef SMP_DEBUG
290 if (m->mpc_featureflag&(1<<0))
291 printk(" Floating point unit present.\n");
292 if (m->mpc_featureflag&(1<<7))
293 printk(" Machine Exception supported.\n");
294 if (m->mpc_featureflag&(1<<8))
295 printk(" 64 bit compare & exchange supported.\n");
296 if (m->mpc_featureflag&(1<<9))
297 printk(" Internal APIC present.\n");
298 #endif
299 if (m->mpc_cpuflag&CPU_BOOTPROCESSOR)
301 SMP_PRINTK((" Bootup CPU\n"));
302 boot_cpu_id=m->mpc_apicid;
304 else /* Boot CPU already counted */
305 num_processors++;
307 if (m->mpc_apicid>NR_CPUS)
308 printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS);
309 else
311 cpu_present_map|=(1<<m->mpc_apicid);
312 apic_version[m->mpc_apicid]=m->mpc_apicver;
315 mpt+=sizeof(*m);
316 count+=sizeof(*m);
317 break;
319 case MP_BUS:
321 struct mpc_config_bus *m=
322 (struct mpc_config_bus *)mpt;
323 memcpy(str,m->mpc_bustype,6);
324 str[6]=0;
325 SMP_PRINTK(("Bus #%d is %s\n",
326 m->mpc_busid,
327 str));
328 if ((strncmp(m->mpc_bustype,"ISA",3) == 0) ||
329 (strncmp(m->mpc_bustype,"EISA",4) == 0))
330 mp_bus_id_to_type[m->mpc_busid] =
331 MP_BUS_ISA;
332 else
333 if (strncmp(m->mpc_bustype,"PCI",3) == 0) {
334 mp_bus_id_to_type[m->mpc_busid] =
335 MP_BUS_PCI;
336 mp_bus_id_to_pci_bus[m->mpc_busid] =
337 mp_current_pci_id;
338 mp_current_pci_id++;
340 mpt+=sizeof(*m);
341 count+=sizeof(*m);
342 break;
344 case MP_IOAPIC:
346 struct mpc_config_ioapic *m=
347 (struct mpc_config_ioapic *)mpt;
348 if (m->mpc_flags&MPC_APIC_USABLE)
350 ioapics++;
351 printk("I/O APIC #%d Version %d at 0x%lX.\n",
352 m->mpc_apicid,m->mpc_apicver,
353 m->mpc_apicaddr);
355 * we use the first one only currently
357 if (ioapics == 1)
358 mp_ioapic_addr = m->mpc_apicaddr;
360 mpt+=sizeof(*m);
361 count+=sizeof(*m);
362 break;
364 case MP_INTSRC:
366 struct mpc_config_intsrc *m=
367 (struct mpc_config_intsrc *)mpt;
369 mp_irqs [mp_irq_entries] = *m;
370 if (++mp_irq_entries == MAX_IRQ_SOURCES) {
371 printk("Max irq sources exceeded!!\n");
372 printk("Skipping remaining sources.\n");
373 --mp_irq_entries;
376 mpt+=sizeof(*m);
377 count+=sizeof(*m);
378 break;
380 case MP_LINTSRC:
382 struct mpc_config_intlocal *m=
383 (struct mpc_config_intlocal *)mpt;
384 mpt+=sizeof(*m);
385 count+=sizeof(*m);
386 break;
390 if (ioapics > 1)
392 printk("Warning: Multiple IO-APICs not yet supported.\n");
393 printk("Warning: switching to non APIC mode.\n");
394 skip_ioapic_setup=1;
396 return num_processors;
400 * Scan the memory blocks for an SMP configuration block.
403 static int __init smp_scan_config(unsigned long base, unsigned long length)
405 unsigned long *bp=phys_to_virt(base);
406 struct intel_mp_floating *mpf;
408 SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
409 bp,length));
410 if (sizeof(*mpf)!=16)
411 printk("Error: MPF size\n");
413 while (length>0)
415 if (*bp==SMP_MAGIC_IDENT)
417 mpf=(struct intel_mp_floating *)bp;
418 if (mpf->mpf_length==1 &&
419 !mpf_checksum((unsigned char *)bp,16) &&
420 (mpf->mpf_specification == 1
421 || mpf->mpf_specification == 4) )
423 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
424 if (mpf->mpf_feature2&(1<<7))
425 printk(" IMCR and PIC compatibility mode.\n");
426 else
427 printk(" Virtual Wire compatibility mode.\n");
428 smp_found_config=1;
430 * Now see if we need to read further.
432 if (mpf->mpf_feature1!=0)
434 unsigned long cfg;
436 /* local APIC has default address */
437 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
439 * We need to know what the local
440 * APIC id of the boot CPU is!
445 * HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
447 * It's not just a crazy hack. ;-)
450 * Standard page mapping
451 * functions don't work yet.
452 * We know that page 0 is not
453 * used. Steal it for now!
456 cfg=pg0[0];
457 pg0[0] = (mp_lapic_addr | 7);
458 local_flush_tlb();
460 boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID));
463 * Give it back
466 pg0[0]= cfg;
467 local_flush_tlb();
471 * END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK
475 * 2 CPUs, numbered 0 & 1.
477 cpu_present_map=3;
478 num_processors=2;
479 printk("I/O APIC at 0xFEC00000.\n");
482 * Save the default type number, we
483 * need it later to set the IO-APIC
484 * up properly:
486 mpc_default_type = mpf->mpf_feature1;
488 printk("Bus #0 is ");
490 switch(mpf->mpf_feature1)
492 case 1:
493 case 5:
494 printk("ISA\n");
495 break;
496 case 2:
497 printk("EISA with no IRQ8 chaining\n");
498 break;
499 case 6:
500 case 3:
501 printk("EISA\n");
502 break;
503 case 4:
504 case 7:
505 printk("MCA\n");
506 break;
507 case 0:
508 break;
509 default:
510 printk("???\nUnknown standard configuration %d\n",
511 mpf->mpf_feature1);
512 return 1;
514 if (mpf->mpf_feature1>4)
516 printk("Bus #1 is PCI\n");
519 * Set local APIC version to
520 * the integrated form.
521 * It's initialized to zero
522 * otherwise, representing
523 * a discrete 82489DX.
525 apic_version[0] = 0x10;
526 apic_version[1] = 0x10;
529 * Read the physical hardware table.
530 * Anything here will override the
531 * defaults.
533 if (mpf->mpf_physptr)
534 smp_read_mpc((void *)mpf->mpf_physptr);
536 __cpu_logical_map[0] = boot_cpu_id;
537 global_irq_holder = boot_cpu_id;
538 current->processor = boot_cpu_id;
540 printk("Processors: %d\n", num_processors);
542 * Only use the first configuration found.
544 return 1;
547 bp+=4;
548 length-=16;
551 return 0;
554 void __init init_intel_smp (void)
557 * FIXME: Linux assumes you have 640K of base ram..
558 * this continues the error...
560 * 1) Scan the bottom 1K for a signature
561 * 2) Scan the top 1K of base RAM
562 * 3) Scan the 64K of bios
564 if (!smp_scan_config(0x0,0x400) &&
565 !smp_scan_config(639*0x400,0x400) &&
566 !smp_scan_config(0xF0000,0x10000)) {
568 * If it is an SMP machine we should know now, unless the
569 * configuration is in an EISA/MCA bus machine with an
570 * extended bios data area.
572 * there is a real-mode segmented pointer pointing to the
573 * 4K EBDA area at 0x40E, calculate and scan it here.
575 * NOTE! There are Linux loaders that will corrupt the EBDA
576 * area, and as such this kind of SMP config may be less
577 * trustworthy, simply because the SMP table may have been
578 * stomped on during early boot. These loaders are buggy and
579 * should be fixed.
581 unsigned int address;
583 address = *(unsigned short *)phys_to_virt(0x40E);
584 address<<=4;
585 smp_scan_config(address, 0x1000);
586 if (smp_found_config)
587 printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n");
591 #else
594 * The Visual Workstation is Intel MP compliant in the hardware
595 * sense, but it doesnt have a BIOS(-configuration table).
596 * No problem for Linux.
598 void __init init_visws_smp(void)
600 smp_found_config = 1;
602 cpu_present_map |= 2; /* or in id 1 */
603 apic_version[1] |= 0x10; /* integrated APIC */
604 apic_version[0] |= 0x10;
606 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
609 #endif
612 * - Intel MP Configuration Table
613 * - or SGI Visual Workstation configuration
615 void __init init_smp_config (void)
617 #ifndef CONFIG_VISWS
618 init_intel_smp();
619 #else
620 init_visws_smp();
621 #endif
625 * Trampoline 80x86 program as an array.
628 extern unsigned char trampoline_data [];
629 extern unsigned char trampoline_end [];
630 static unsigned char *trampoline_base;
633 * Currently trivial. Write the real->protected mode
634 * bootstrap into the page concerned. The caller
635 * has made sure it's suitably aligned.
638 static unsigned long __init setup_trampoline(void)
640 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
641 return virt_to_phys(trampoline_base);
645 * We are called very early to get the low memory for the
646 * SMP bootup trampoline page.
648 unsigned long __init smp_alloc_memory(unsigned long mem_base)
650 if (virt_to_phys((void *)mem_base) >= 0x9F000)
651 panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.", mem_base);
652 trampoline_base = (void *)mem_base;
653 return mem_base + PAGE_SIZE;
657 * The bootstrap kernel entry code has set these up. Save them for
658 * a given CPU
661 void __init smp_store_cpu_info(int id)
663 struct cpuinfo_x86 *c=&cpu_data[id];
665 *c = boot_cpu_data;
666 c->pte_quick = 0;
667 c->pgd_quick = 0;
668 c->pgtable_cache_sz = 0;
669 identify_cpu(c);
671 * Mask B, Pentium, but not Pentium MMX
673 if (c->x86_vendor == X86_VENDOR_INTEL &&
674 c->x86 == 5 &&
675 c->x86_mask >= 1 && c->x86_mask <= 4 &&
676 c->x86_model <= 3)
677 smp_b_stepping=1; /* Remember we have B step Pentia with bugs */
681 * Architecture specific routine called by the kernel just before init is
682 * fired off. This allows the BP to have everything in order [we hope].
683 * At the end of this all the APs will hit the system scheduling and off
684 * we go. Each AP will load the system gdt's and jump through the kernel
685 * init into idle(). At this point the scheduler will one day take over
686 * and give them jobs to do. smp_callin is a standard routine
687 * we use to track CPUs as they power up.
690 static atomic_t smp_commenced = ATOMIC_INIT(0);
692 void __init smp_commence(void)
695 * Lets the callins below out of their loop.
697 SMP_PRINTK(("Setting commenced=1, go go go\n"));
699 wmb();
700 atomic_set(&smp_commenced,1);
703 void __init enable_local_APIC(void)
705 unsigned long value;
707 value = apic_read(APIC_SPIV);
708 value |= (1<<8); /* Enable APIC (bit==1) */
709 value &= ~(1<<9); /* Enable focus processor (bit==0) */
710 value |= 0xff; /* Set spurious IRQ vector to 0xff */
711 apic_write(APIC_SPIV,value);
713 value = apic_read(APIC_TASKPRI);
714 value &= ~APIC_TPRI_MASK; /* Set Task Priority to 'accept all' */
715 apic_write(APIC_TASKPRI,value);
718 * Set arbitrarion priority to 0
720 value = apic_read(APIC_ARBPRI);
721 value &= ~APIC_ARBPRI_MASK;
722 apic_write(APIC_ARBPRI, value);
725 * Set the logical destination ID to 'all', just to be safe.
726 * also, put the APIC into flat delivery mode.
728 value = apic_read(APIC_LDR);
729 value &= ~APIC_LDR_MASK;
730 value |= SET_APIC_LOGICAL_ID(0xff);
731 apic_write(APIC_LDR,value);
733 value = apic_read(APIC_DFR);
734 value |= SET_APIC_DFR(0xf);
735 apic_write(APIC_DFR, value);
737 udelay(100); /* B safe */
738 ack_APIC_irq();
739 udelay(100);
742 unsigned long __init init_smp_mappings(unsigned long memory_start)
744 unsigned long apic_phys;
746 memory_start = PAGE_ALIGN(memory_start);
747 if (smp_found_config) {
748 apic_phys = mp_lapic_addr;
749 } else {
751 * set up a fake all zeroes page to simulate the
752 * local APIC and another one for the IO-APIC. We
753 * could use the real zero-page, but it's safer
754 * this way if some buggy code writes to this page ...
756 apic_phys = __pa(memory_start);
757 memset((void *)memory_start, 0, PAGE_SIZE);
758 memory_start += PAGE_SIZE;
760 set_fixmap(FIX_APIC_BASE,apic_phys);
761 printk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
763 #ifdef CONFIG_X86_IO_APIC
765 unsigned long ioapic_phys;
767 if (smp_found_config) {
768 ioapic_phys = mp_ioapic_addr;
769 } else {
770 ioapic_phys = __pa(memory_start);
771 memset((void *)memory_start, 0, PAGE_SIZE);
772 memory_start += PAGE_SIZE;
774 set_fixmap(FIX_IO_APIC_BASE,ioapic_phys);
775 printk("mapped IOAPIC to %08lx (%08lx)\n",
776 fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys);
778 #endif
780 return memory_start;
783 extern void calibrate_delay(void);
785 void __init smp_callin(void)
787 int cpuid;
788 unsigned long timeout;
791 * (This works even if the APIC is not enabled.)
793 cpuid = GET_APIC_ID(apic_read(APIC_ID));
795 SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid));
798 * STARTUP IPIs are fragile beasts as they might sometimes
799 * trigger some glue motherboard logic. Complete APIC bus
800 * silence for 1 second, this overestimates the time the
801 * boot CPU is spending to send the up to 2 STARTUP IPIs
802 * by a factor of two. This should be enough.
806 * Waiting 2s total for startup (udelay is not yet working)
808 timeout = jiffies + 2*HZ;
809 while (time_before(jiffies,timeout))
812 * Has the boot CPU finished it's STARTUP sequence?
814 if (test_bit(cpuid, (unsigned long *)&cpu_callout_map[0]))
815 break;
818 while (!time_before(jiffies,timeout)) {
819 printk("BUG: CPU%d started up but did not get a callout!\n",
820 cpuid);
821 stop_this_cpu();
825 * the boot CPU has finished the init stage and is spinning
826 * on callin_map until we finish. We are free to set up this
827 * CPU, first the APIC. (this is probably redundant on most
828 * boards)
831 SMP_PRINTK(("CALLIN, before enable_local_APIC().\n"));
832 enable_local_APIC();
835 * Set up our APIC timer.
837 setup_APIC_clock();
839 __sti();
841 #ifdef CONFIG_MTRR
842 /* Must be done before calibration delay is computed */
843 mtrr_init_secondary_cpu ();
844 #endif
846 * Get our bogomips.
848 calibrate_delay();
849 SMP_PRINTK(("Stack at about %p\n",&cpuid));
852 * Save our processor parameters
854 smp_store_cpu_info(cpuid);
857 * Allow the master to continue.
859 set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
862 int cpucount = 0;
864 extern int cpu_idle(void * unused);
867 * Activate a secondary processor.
869 int __init start_secondary(void *unused)
872 * Dont put anything before smp_callin(), SMP
873 * booting is too fragile that we want to limit the
874 * things done here to the most necessary things.
876 smp_callin();
877 while (!atomic_read(&smp_commenced))
878 /* nothing */ ;
879 return cpu_idle(NULL);
883 * Everything has been set up for the secondary
884 * CPUs - they just need to reload everything
885 * from the task structure
887 void __init initialize_secondary(void)
889 struct thread_struct * p = &current->tss;
892 * Load up the LDT and the task register.
894 asm volatile("lldt %%ax": :"a" (p->ldt));
895 asm volatile("ltr %%ax": :"a" (p->tr));
896 stts();
899 * We don't actually need to load the full TSS,
900 * basically just the stack pointer and the eip.
903 asm volatile(
904 "movl %0,%%esp\n\t"
905 "jmp *%1"
907 :"r" (p->esp),"r" (p->eip));
910 extern struct {
911 void * esp;
912 unsigned short ss;
913 } stack_start;
915 static void __init do_boot_cpu(int i)
917 unsigned long cfg;
918 pgd_t maincfg;
919 struct task_struct *idle;
920 unsigned long send_status, accept_status;
921 int timeout, num_starts, j;
922 unsigned long start_eip;
925 * We need an idle process for each processor.
928 kernel_thread(start_secondary, NULL, CLONE_PID);
929 cpucount++;
931 idle = task[cpucount];
932 if (!idle)
933 panic("No idle process for CPU %d", i);
935 idle->processor = i;
936 __cpu_logical_map[cpucount] = i;
937 cpu_number_map[i] = cpucount;
939 /* start_eip had better be page-aligned! */
940 start_eip = setup_trampoline();
942 printk("Booting processor %d eip %lx\n", i, start_eip); /* So we see what's up */
943 stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
946 * This grunge runs the startup process for
947 * the targeted processor.
950 SMP_PRINTK(("Setting warm reset code and vector.\n"));
952 CMOS_WRITE(0xa, 0xf);
953 local_flush_tlb();
954 SMP_PRINTK(("1.\n"));
955 *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
956 SMP_PRINTK(("2.\n"));
957 *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
958 SMP_PRINTK(("3.\n"));
960 maincfg=swapper_pg_dir[0];
961 ((unsigned long *)swapper_pg_dir)[0]=0x102007;
964 * Be paranoid about clearing APIC errors.
967 if ( apic_version[i] & 0xF0 )
969 apic_write(APIC_ESR, 0);
970 accept_status = (apic_read(APIC_ESR) & 0xEF);
974 * Status is now clean
977 send_status = 0;
978 accept_status = 0;
981 * Starting actual IPI sequence...
984 SMP_PRINTK(("Asserting INIT.\n"));
987 * Turn INIT on
990 cfg=apic_read(APIC_ICR2);
991 cfg&=0x00FFFFFF;
992 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
993 cfg=apic_read(APIC_ICR);
994 cfg&=~0xCDFFF; /* Clear bits */
995 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT | APIC_DEST_DM_INIT);
996 apic_write(APIC_ICR, cfg); /* Send IPI */
998 udelay(200);
999 SMP_PRINTK(("Deasserting INIT.\n"));
1001 cfg=apic_read(APIC_ICR2);
1002 cfg&=0x00FFFFFF;
1003 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
1004 cfg=apic_read(APIC_ICR);
1005 cfg&=~0xCDFFF; /* Clear bits */
1006 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT);
1007 apic_write(APIC_ICR, cfg); /* Send IPI */
1010 * Should we send STARTUP IPIs ?
1012 * Determine this based on the APIC version.
1013 * If we don't have an integrated APIC, don't
1014 * send the STARTUP IPIs.
1017 if ( apic_version[i] & 0xF0 )
1018 num_starts = 2;
1019 else
1020 num_starts = 0;
1023 * Run STARTUP IPI loop.
1026 for (j = 1; !(send_status || accept_status)
1027 && (j <= num_starts) ; j++)
1029 SMP_PRINTK(("Sending STARTUP #%d.\n",j));
1030 apic_write(APIC_ESR, 0);
1031 SMP_PRINTK(("After apic_write.\n"));
1034 * STARTUP IPI
1037 cfg=apic_read(APIC_ICR2);
1038 cfg&=0x00FFFFFF;
1039 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
1040 cfg=apic_read(APIC_ICR);
1041 cfg&=~0xCDFFF; /* Clear bits */
1042 cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12)); /* Boot on the stack */
1043 SMP_PRINTK(("Before start apic_write.\n"));
1044 apic_write(APIC_ICR, cfg); /* Kick the second */
1046 SMP_PRINTK(("Startup point 1.\n"));
1048 timeout = 0;
1049 SMP_PRINTK(("Waiting for send to finish...\n"));
1050 do {
1051 SMP_PRINTK(("+"));
1052 udelay(100);
1053 send_status = apic_read(APIC_ICR) & 0x1000;
1054 } while (send_status && (timeout++ < 1000));
1057 * Give the other CPU some time to accept the IPI.
1059 udelay(200);
1060 accept_status = (apic_read(APIC_ESR) & 0xEF);
1062 SMP_PRINTK(("After Startup.\n"));
1064 if (send_status) /* APIC never delivered?? */
1065 printk("APIC never delivered???\n");
1066 if (accept_status) /* Send accept error */
1067 printk("APIC delivery error (%lx).\n", accept_status);
1069 if ( !(send_status || accept_status) )
1072 * allow APs to start initializing.
1074 SMP_PRINTK(("Before Callout %d.\n", i));
1075 set_bit(i, (unsigned long *)&cpu_callout_map[0]);
1076 SMP_PRINTK(("After Callout %d.\n", i));
1078 for(timeout=0;timeout<50000;timeout++)
1080 if (cpu_callin_map[0]&(1<<i))
1081 break; /* It has booted */
1082 udelay(100); /* Wait 5s total for a response */
1084 if (cpu_callin_map[0]&(1<<i))
1086 /* number CPUs logically, starting from 1 (BSP is 0) */
1087 #if 0
1088 cpu_number_map[i] = cpucount;
1089 __cpu_logical_map[cpucount] = i;
1090 #endif
1091 printk("OK.\n");
1092 printk("CPU%d: ", i);
1093 print_cpu_info(&cpu_data[i]);
1095 else
1097 if (*((volatile unsigned char *)phys_to_virt(8192))==0xA5)
1098 printk("Stuck ??\n");
1099 else
1100 printk("Not responding.\n");
1102 SMP_PRINTK(("CPU has booted.\n"));
1104 else
1106 __cpu_logical_map[cpucount] = -1;
1107 cpu_number_map[i] = -1;
1108 cpucount--;
1111 swapper_pg_dir[0]=maincfg;
1112 local_flush_tlb();
1114 /* mark "stuck" area as not stuck */
1115 *((volatile unsigned long *)phys_to_virt(8192)) = 0;
1118 cycles_t cacheflush_time;
1119 extern unsigned long cpu_hz;
1121 static void smp_tune_scheduling (void)
1123 unsigned long cachesize;
1125 * Rough estimation for SMP scheduling, this is the number of
1126 * cycles it takes for a fully memory-limited process to flush
1127 * the SMP-local cache.
1129 * (For a P5 this pretty much means we will choose another idle
1130 * CPU almost always at wakeup time (this is due to the small
1131 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1132 * the cache size)
1135 if (!cpu_hz) {
1137 * this basically disables processor-affinity
1138 * scheduling on SMP without a TSC.
1140 cacheflush_time = 0;
1141 return;
1142 } else {
1143 cachesize = boot_cpu_data.x86_cache_size;
1144 if (cachesize == -1)
1145 cachesize = 8; /* Pentiums */
1147 cacheflush_time = cpu_hz/1024*cachesize/5000;
1150 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
1151 (long)cacheflush_time/(cpu_hz/1000000),
1152 ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100);
1155 unsigned int prof_multiplier[NR_CPUS];
1156 unsigned int prof_counter[NR_CPUS];
1159 * Cycle through the processors sending APIC IPIs to boot each.
1162 void __init smp_boot_cpus(void)
1164 int i;
1166 #ifdef CONFIG_MTRR
1167 /* Must be done before other processors booted */
1168 mtrr_init_boot_cpu ();
1169 #endif
1171 * Initialize the logical to physical CPU number mapping
1172 * and the per-CPU profiling counter/multiplier
1175 for (i = 0; i < NR_CPUS; i++) {
1176 cpu_number_map[i] = -1;
1177 prof_counter[i] = 1;
1178 prof_multiplier[i] = 1;
1182 * Setup boot CPU information
1185 smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */
1186 smp_tune_scheduling();
1187 printk("CPU%d: ", boot_cpu_id);
1188 print_cpu_info(&cpu_data[boot_cpu_id]);
1191 * not necessary because the MP table should list the boot
1192 * CPU too, but we do it for the sake of robustness anyway.
1193 * (and for the case when a non-SMP board boots an SMP kernel)
1195 cpu_present_map |= (1 << hard_smp_processor_id());
1197 cpu_number_map[boot_cpu_id] = 0;
1200 * If we couldnt find an SMP configuration at boot time,
1201 * get out of here now!
1204 if (!smp_found_config)
1206 printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n");
1207 #ifndef CONFIG_VISWS
1208 io_apic_irqs = 0;
1209 #endif
1210 cpu_online_map = cpu_present_map;
1211 goto smp_done;
1215 * If SMP should be disabled, then really disable it!
1218 if (!max_cpus)
1220 smp_found_config = 0;
1221 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1224 #ifdef SMP_DEBUG
1226 int reg;
1229 * This is to verify that we're looking at
1230 * a real local APIC. Check these against
1231 * your board if the CPUs aren't getting
1232 * started for no apparent reason.
1235 reg = apic_read(APIC_VERSION);
1236 SMP_PRINTK(("Getting VERSION: %x\n", reg));
1238 apic_write(APIC_VERSION, 0);
1239 reg = apic_read(APIC_VERSION);
1240 SMP_PRINTK(("Getting VERSION: %x\n", reg));
1243 * The two version reads above should print the same
1244 * NON-ZERO!!! numbers. If the second one is zero,
1245 * there is a problem with the APIC write/read
1246 * definitions.
1248 * The next two are just to see if we have sane values.
1249 * They're only really relevant if we're in Virtual Wire
1250 * compatibility mode, but most boxes are anymore.
1254 reg = apic_read(APIC_LVT0);
1255 SMP_PRINTK(("Getting LVT0: %x\n", reg));
1257 reg = apic_read(APIC_LVT1);
1258 SMP_PRINTK(("Getting LVT1: %x\n", reg));
1260 #endif
1262 enable_local_APIC();
1265 * Set up our local APIC timer:
1267 setup_APIC_clock ();
1270 * Now scan the CPU present map and fire up the other CPUs.
1274 * Add all detected CPUs. (later on we can down individual
1275 * CPUs which will change cpu_online_map but not necessarily
1276 * cpu_present_map. We are pretty much ready for hot-swap CPUs.)
1278 cpu_online_map = cpu_present_map;
1279 mb();
1281 SMP_PRINTK(("CPU map: %lx\n", cpu_present_map));
1283 for(i=0;i<NR_CPUS;i++)
1286 * Don't even attempt to start the boot CPU!
1288 if (i == boot_cpu_id)
1289 continue;
1291 if ((cpu_online_map & (1 << i))
1292 && (max_cpus < 0 || max_cpus > cpucount+1))
1294 do_boot_cpu(i);
1298 * Make sure we unmap all failed CPUs
1301 if (cpu_number_map[i] == -1 && (cpu_online_map & (1 << i))) {
1302 printk("CPU #%d not responding. Removing from cpu_online_map.\n",i);
1303 cpu_online_map &= ~(1 << i);
1308 * Cleanup possible dangling ends...
1311 #ifndef CONFIG_VISWS
1313 unsigned long cfg;
1316 * Install writable page 0 entry.
1318 cfg = pg0[0];
1319 pg0[0] = 3; /* writeable, present, addr 0 */
1320 local_flush_tlb();
1323 * Paranoid: Set warm reset code and vector here back
1324 * to default values.
1327 CMOS_WRITE(0, 0xf);
1329 *((volatile long *) phys_to_virt(0x467)) = 0;
1332 * Restore old page 0 entry.
1335 pg0[0] = cfg;
1336 local_flush_tlb();
1338 #endif
1341 * Allow the user to impress friends.
1344 SMP_PRINTK(("Before bogomips.\n"));
1345 if (cpucount==0)
1347 printk(KERN_ERR "Error: only one processor found.\n");
1348 cpu_online_map = (1<<hard_smp_processor_id());
1350 else
1352 unsigned long bogosum=0;
1353 for(i=0;i<32;i++)
1355 if (cpu_online_map&(1<<i))
1356 bogosum+=cpu_data[i].loops_per_sec;
1358 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1359 cpucount+1,
1360 (bogosum+2500)/500000,
1361 ((bogosum+2500)/5000)%100);
1362 SMP_PRINTK(("Before bogocount - setting activated=1.\n"));
1363 smp_activated=1;
1364 smp_num_cpus=cpucount+1;
1366 if (smp_b_stepping)
1367 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1368 SMP_PRINTK(("Boot done.\n"));
1370 cache_APIC_registers();
1371 #ifndef CONFIG_VISWS
1373 * Here we can be sure that there is an IO-APIC in the system. Let's
1374 * go and set it up:
1376 if (!skip_ioapic_setup)
1377 setup_IO_APIC();
1378 #endif
1380 smp_done:
1385 * the following functions deal with sending IPIs between CPUs.
1387 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
1392 * Silly serialization to work around CPU bug in P5s.
1393 * We can safely turn it off on a 686.
1395 #ifdef CONFIG_X86_GOOD_APIC
1396 # define FORCE_APIC_SERIALIZATION 0
1397 #else
1398 # define FORCE_APIC_SERIALIZATION 1
1399 #endif
1401 static unsigned int cached_APIC_ICR;
1402 static unsigned int cached_APIC_ICR2;
1405 * Caches reserved bits, APIC reads are (mildly) expensive
1406 * and force otherwise unnecessary CPU synchronization.
1408 * (We could cache other APIC registers too, but these are the
1409 * main ones used in RL.)
1411 #define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
1412 #define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
1414 void cache_APIC_registers (void)
1416 cached_APIC_ICR = slow_ICR;
1417 cached_APIC_ICR2 = slow_ICR2;
1418 mb();
1421 static inline unsigned int __get_ICR (void)
1423 #if FORCE_APIC_SERIALIZATION
1425 * Wait for the APIC to become ready - this should never occur. It's
1426 * a debugging check really.
1428 int count = 0;
1429 unsigned int cfg;
1431 while (count < 1000)
1433 cfg = slow_ICR;
1434 if (!(cfg&(1<<12))) {
1435 if (count)
1436 atomic_add(count, (atomic_t*)&ipi_count);
1437 return cfg;
1439 count++;
1440 udelay(10);
1442 printk("CPU #%d: previous IPI still not cleared after 10mS\n",
1443 smp_processor_id());
1444 return cfg;
1445 #else
1446 return cached_APIC_ICR;
1447 #endif
1450 static inline unsigned int __get_ICR2 (void)
1452 #if FORCE_APIC_SERIALIZATION
1453 return slow_ICR2;
1454 #else
1455 return cached_APIC_ICR2;
1456 #endif
1459 static inline int __prepare_ICR (unsigned int shortcut, int vector)
1461 unsigned int cfg;
1463 cfg = __get_ICR();
1464 cfg |= APIC_DEST_DM_FIXED|shortcut|vector;
1466 return cfg;
1469 static inline int __prepare_ICR2 (unsigned int dest)
1471 unsigned int cfg;
1473 cfg = __get_ICR2();
1474 cfg |= SET_APIC_DEST_FIELD(dest);
1476 return cfg;
1479 static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
1481 unsigned int cfg;
1483 * Subtle. In the case of the 'never do double writes' workaround we
1484 * have to lock out interrupts to be safe. Otherwise it's just one
1485 * single atomic write to the APIC, no need for cli/sti.
1487 #if FORCE_APIC_SERIALIZATION
1488 unsigned long flags;
1490 __save_flags(flags);
1491 __cli();
1492 #endif
1495 * No need to touch the target chip field
1498 cfg = __prepare_ICR(shortcut, vector);
1501 * Send the IPI. The write to APIC_ICR fires this off.
1503 apic_write(APIC_ICR, cfg);
1504 #if FORCE_APIC_SERIALIZATION
1505 __restore_flags(flags);
1506 #endif
1509 static inline void send_IPI_allbutself(int vector)
1511 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
1514 static inline void send_IPI_all(int vector)
1516 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
1519 void send_IPI_self(int vector)
1521 __send_IPI_shortcut(APIC_DEST_SELF, vector);
1524 static inline void send_IPI_single(int dest, int vector)
1526 unsigned long cfg;
1527 #if FORCE_APIC_SERIALIZATION
1528 unsigned long flags;
1530 __save_flags(flags);
1531 __cli();
1532 #endif
1535 * prepare target chip field
1538 cfg = __prepare_ICR2(dest);
1539 apic_write(APIC_ICR2, cfg);
1542 * program the ICR
1544 cfg = __prepare_ICR(0, vector);
1547 * Send the IPI. The write to APIC_ICR fires this off.
1549 apic_write(APIC_ICR, cfg);
1550 #if FORCE_APIC_SERIALIZATION
1551 __restore_flags(flags);
1552 #endif
1556 * This is fraught with deadlocks. Probably the situation is not that
1557 * bad as in the early days of SMP, so we might ease some of the
1558 * paranoia here.
1561 void smp_flush_tlb(void)
1563 int cpu = smp_processor_id();
1564 int stuck;
1565 unsigned long flags;
1568 * it's important that we do not generate any APIC traffic
1569 * until the AP CPUs have booted up!
1571 if (cpu_online_map) {
1573 * The assignment is safe because it's volatile so the
1574 * compiler cannot reorder it, because the i586 has
1575 * strict memory ordering and because only the kernel
1576 * lock holder may issue a tlb flush. If you break any
1577 * one of those three change this to an atomic bus
1578 * locked or.
1581 smp_invalidate_needed = cpu_online_map;
1584 * Processors spinning on some lock with IRQs disabled
1585 * will see this IRQ late. The smp_invalidate_needed
1586 * map will ensure they don't do a spurious flush tlb
1587 * or miss one.
1590 __save_flags(flags);
1591 __cli();
1593 send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
1596 * Spin waiting for completion
1599 stuck = 50000000;
1600 while (smp_invalidate_needed) {
1602 * Take care of "crossing" invalidates
1604 if (test_bit(cpu, &smp_invalidate_needed))
1605 clear_bit(cpu, &smp_invalidate_needed);
1606 --stuck;
1607 if (!stuck) {
1608 printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
1609 break;
1612 __restore_flags(flags);
1616 * Flush the local TLB
1618 local_flush_tlb();
1624 * this function sends a 'reschedule' IPI to another CPU.
1625 * it goes straight through and wastes no time serializing
1626 * anything. Worst case is that we lose a reschedule ...
1629 void smp_send_reschedule(int cpu)
1631 send_IPI_single(cpu, RESCHEDULE_VECTOR);
1635 * this function sends a 'stop' IPI to all other CPUs in the system.
1636 * it goes straight through.
1639 void smp_send_stop(void)
1641 send_IPI_allbutself(STOP_CPU_VECTOR);
1645 * this function sends an 'reload MTRR state' IPI to all other CPUs
1646 * in the system. it goes straight through, completion processing
1647 * is done on the mttr.c level.
1650 void smp_send_mtrr(void)
1652 send_IPI_allbutself(MTRR_CHANGE_VECTOR);
1656 * Local timer interrupt handler. It does both profiling and
1657 * process statistics/rescheduling.
1659 * We do profiling in every local tick, statistics/rescheduling
1660 * happen only every 'profiling multiplier' ticks. The default
1661 * multiplier is 1 and it can be changed by writing the new multiplier
1662 * value into /proc/profile.
1665 void smp_local_timer_interrupt(struct pt_regs * regs)
1667 int cpu = smp_processor_id();
1670 * The profiling function is SMP safe. (nothing can mess
1671 * around with "current", and the profiling counters are
1672 * updated with atomic operations). This is especially
1673 * useful with a profiling multiplier != 1
1675 if (!user_mode(regs))
1676 x86_do_profile(regs->eip);
1678 if (!--prof_counter[cpu]) {
1679 int user=0,system=0;
1680 struct task_struct * p = current;
1683 * After doing the above, we need to make like
1684 * a normal interrupt - otherwise timer interrupts
1685 * ignore the global interrupt lock, which is the
1686 * WrongThing (tm) to do.
1689 if (user_mode(regs))
1690 user=1;
1691 else
1692 system=1;
1694 irq_enter(cpu, 0);
1695 if (p->pid) {
1696 update_one_process(p, 1, user, system, cpu);
1698 p->counter -= 1;
1699 if (p->counter < 0) {
1700 p->counter = 0;
1701 p->need_resched = 1;
1703 if (p->priority < DEF_PRIORITY) {
1704 kstat.cpu_nice += user;
1705 kstat.per_cpu_nice[cpu] += user;
1706 } else {
1707 kstat.cpu_user += user;
1708 kstat.per_cpu_user[cpu] += user;
1711 kstat.cpu_system += system;
1712 kstat.per_cpu_system[cpu] += system;
1715 prof_counter[cpu]=prof_multiplier[cpu];
1716 irq_exit(cpu, 0);
1720 * We take the 'long' return path, and there every subsystem
1721 * grabs the apropriate locks (kernel lock/ irq lock).
1723 * we might want to decouple profiling from the 'long path',
1724 * and do the profiling totally in assembly.
1726 * Currently this isn't too much of an issue (performance wise),
1727 * we can take more than 100K local irqs per second on a 100 MHz P5.
1732 * Local APIC timer interrupt. This is the most natural way for doing
1733 * local interrupts, but local timer interrupts can be emulated by
1734 * broadcast interrupts too. [in case the hw doesnt support APIC timers]
1736 * [ if a single-CPU system runs an SMP kernel then we call the local
1737 * interrupt as well. Thus we cannot inline the local irq ... ]
1739 void smp_apic_timer_interrupt(struct pt_regs * regs)
1742 * NOTE! We'd better ACK the irq immediately,
1743 * because timer handling can be slow, and we
1744 * want to be able to accept NMI tlb invalidates
1745 * during this time.
1747 ack_APIC_irq();
1748 smp_local_timer_interrupt(regs);
1752 * Reschedule call back. Nothing to do,
1753 * all the work is done automatically when
1754 * we return from the interrupt.
1756 asmlinkage void smp_reschedule_interrupt(void)
1758 ack_APIC_irq();
1762 * Invalidate call-back
1764 asmlinkage void smp_invalidate_interrupt(void)
1766 if (test_and_clear_bit(smp_processor_id(), &smp_invalidate_needed))
1767 local_flush_tlb();
1769 ack_APIC_irq();
1772 static void stop_this_cpu (void)
1775 * Remove this CPU:
1777 clear_bit(smp_processor_id(), &cpu_online_map);
1779 if (cpu_data[smp_processor_id()].hlt_works_ok)
1780 for(;;) __asm__("hlt");
1781 for (;;);
1785 * CPU halt call-back
1787 asmlinkage void smp_stop_cpu_interrupt(void)
1789 stop_this_cpu();
1792 void (*mtrr_hook) (void) = NULL;
1794 asmlinkage void smp_mtrr_interrupt(void)
1796 ack_APIC_irq();
1797 if (mtrr_hook) (*mtrr_hook)();
1801 * This interrupt should _never_ happen with our APIC/SMP architecture
1803 asmlinkage void smp_spurious_interrupt(void)
1805 /* ack_APIC_irq(); see sw-dev-man vol 3, chapter 7.4.13.5 */
1806 printk("spurious APIC interrupt, ayiee, should never happen.\n");
1810 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
1811 * per second. We assume that the caller has already set up the local
1812 * APIC.
1814 * The APIC timer is not exactly sync with the external timer chip, it
1815 * closely follows bus clocks.
1818 #define RDTSC(x) __asm__ __volatile__ ( "rdtsc" \
1819 :"=a" (((unsigned long*)&x)[0]), \
1820 "=d" (((unsigned long*)&x)[1]))
1823 * The timer chip is already set up at HZ interrupts per second here,
1824 * but we do not accept timer interrupts yet. We only allow the BP
1825 * to calibrate.
1827 static unsigned int __init get_8254_timer_count(void)
1829 unsigned int count;
1831 outb_p(0x00, 0x43);
1832 count = inb_p(0x40);
1833 count |= inb_p(0x40) << 8;
1835 return count;
1839 * This function sets up the local APIC timer, with a timeout of
1840 * 'clocks' APIC bus clock. During calibration we actually call
1841 * this function twice, once with a bogus timeout value, second
1842 * time for real. The other (noncalibrating) CPUs call this
1843 * function only once, with the real value.
1845 * We are strictly in irqs off mode here, as we do not want to
1846 * get an APIC interrupt go off accidentally.
1848 * We do reads before writes even if unnecessary, to get around the
1849 * APIC double write bug.
1852 #define APIC_DIVISOR 16
1854 void setup_APIC_timer(unsigned int clocks)
1856 unsigned long lvtt1_value;
1857 unsigned int tmp_value;
1860 * Unfortunately the local APIC timer cannot be set up into NMI
1861 * mode. With the IO APIC we can re-route the external timer
1862 * interrupt and broadcast it as an NMI to all CPUs, so no pain.
1864 tmp_value = apic_read(APIC_LVTT);
1865 lvtt1_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
1866 apic_write(APIC_LVTT , lvtt1_value);
1869 * Divide PICLK by 16
1871 tmp_value = apic_read(APIC_TDCR);
1872 apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 )
1873 | APIC_TDR_DIV_16);
1875 tmp_value = apic_read(APIC_TMICT);
1876 apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
1879 void __init wait_8254_wraparound(void)
1881 unsigned int curr_count, prev_count=~0;
1882 int delta;
1884 curr_count = get_8254_timer_count();
1886 do {
1887 prev_count = curr_count;
1888 curr_count = get_8254_timer_count();
1889 delta = curr_count-prev_count;
1892 * This limit for delta seems arbitrary, but it isn't, it's
1893 * slightly above the level of error a buggy Mercury/Neptune
1894 * chipset timer can cause.
1897 } while (delta<300);
1901 * In this function we calibrate APIC bus clocks to the external
1902 * timer. Unfortunately we cannot use jiffies and the timer irq
1903 * to calibrate, since some later bootup code depends on getting
1904 * the first irq? Ugh.
1906 * We want to do the calibration only once since we
1907 * want to have local timer irqs syncron. CPUs connected
1908 * by the same APIC bus have the very same bus frequency.
1909 * And we want to have irqs off anyways, no accidental
1910 * APIC irq that way.
1913 int __init calibrate_APIC_clock(void)
1915 unsigned long long t1,t2;
1916 long tt1,tt2;
1917 long calibration_result;
1918 int i;
1920 printk("calibrating APIC timer ... ");
1923 * Put whatever arbitrary (but long enough) timeout
1924 * value into the APIC clock, we just want to get the
1925 * counter running for calibration.
1927 setup_APIC_timer(1000000000);
1930 * The timer chip counts down to zero. Let's wait
1931 * for a wraparound to start exact measurement:
1932 * (the current tick might have been already half done)
1935 wait_8254_wraparound ();
1938 * We wrapped around just now. Let's start:
1940 RDTSC(t1);
1941 tt1=apic_read(APIC_TMCCT);
1943 #define LOOPS (HZ/10)
1945 * Let's wait LOOPS wraprounds:
1947 for (i=0; i<LOOPS; i++)
1948 wait_8254_wraparound ();
1950 tt2=apic_read(APIC_TMCCT);
1951 RDTSC(t2);
1954 * The APIC bus clock counter is 32 bits only, it
1955 * might have overflown, but note that we use signed
1956 * longs, thus no extra care needed.
1958 * underflown to be exact, as the timer counts down ;)
1961 calibration_result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
1963 SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip tick.",
1964 (unsigned long)(t2-t1)/LOOPS));
1966 SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer chip tick.",
1967 calibration_result));
1970 printk("\n..... CPU clock speed is %ld.%04ld MHz.\n",
1971 ((long)(t2-t1)/LOOPS)/(1000000/HZ),
1972 ((long)(t2-t1)/LOOPS)%(1000000/HZ) );
1974 printk("..... system bus clock speed is %ld.%04ld MHz.\n",
1975 calibration_result/(1000000/HZ),
1976 calibration_result%(1000000/HZ) );
1977 #undef LOOPS
1979 return calibration_result;
1982 static unsigned int calibration_result;
1984 void __init setup_APIC_clock(void)
1986 unsigned long flags;
1988 static volatile int calibration_lock;
1990 __save_flags(flags);
1991 __cli();
1993 SMP_PRINTK(("setup_APIC_clock() called.\n"));
1996 * [ setup_APIC_clock() is called from all CPUs, but we want
1997 * to do this part of the setup only once ... and it fits
1998 * here best ]
2000 if (!test_and_set_bit(0,&calibration_lock)) {
2002 calibration_result=calibrate_APIC_clock();
2004 * Signal completion to the other CPU[s]:
2006 calibration_lock = 3;
2008 } else {
2010 * Other CPU is calibrating, wait for finish:
2012 SMP_PRINTK(("waiting for other CPU calibrating APIC ... "));
2013 while (calibration_lock == 1);
2014 SMP_PRINTK(("done, continuing.\n"));
2018 * Now set up the timer for real.
2021 setup_APIC_timer (calibration_result);
2024 * We ACK the APIC, just in case there is something pending.
2027 ack_APIC_irq ();
2029 __restore_flags(flags);
2033 * the frequency of the profiling timer can be changed
2034 * by writing a multiplier value into /proc/profile.
2036 * usually you want to run this on all CPUs ;)
2038 int setup_profiling_timer(unsigned int multiplier)
2040 int cpu = smp_processor_id();
2041 unsigned long flags;
2044 * Sanity check. [at least 500 APIC cycles should be
2045 * between APIC interrupts as a rule of thumb, to avoid
2046 * irqs flooding us]
2048 if ( (!multiplier) || (calibration_result/multiplier < 500))
2049 return -EINVAL;
2051 save_flags(flags);
2052 cli();
2053 setup_APIC_timer(calibration_result/multiplier);
2054 prof_multiplier[cpu]=multiplier;
2055 restore_flags(flags);
2057 return 0;
2060 #undef APIC_DIVISOR