Import 2.3.13pre1
[davej-history.git] / arch / i386 / kernel / smp.c
blob0402d822e327dde41e9ec2d5c5fee41d1c0938cf
1 /*
2 * Intel MP v1.1/v1.4 specification support routines for multi-pentium
3 * hosts.
5 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
6 * (c) 1998 Ingo Molnar
8 * Supported by Caldera http://www.caldera.com.
9 * Much of the core SMP work is based on previous work by Thomas Radke, to
10 * whom a great many thanks are extended.
12 * Thanks to Intel for making available several different Pentium,
13 * Pentium Pro and Pentium-II/Xeon MP machines.
15 * This code is released under the GNU public license version 2 or
16 * later.
18 * Fixes
19 * Felix Koop : NR_CPUS used properly
20 * Jose Renau : Handle single CPU case.
21 * Alan Cox : By repeated request 8) - Total BogoMIP report.
22 * Greg Wright : Fix for kernel stacks panic.
23 * Erich Boleyn : MP v1.4 and additional changes.
24 * Matthias Sattler : Changes for 2.1 kernel map.
25 * Michel Lespinasse : Changes for 2.1 kernel map.
26 * Michael Chastain : Change trampoline.S to gnu as.
27 * Alan Cox : Dumb bug: 'B' step PPro's are fine
28 * Ingo Molnar : Added APIC timers, based on code
29 * from Jose Renau
30 * Alan Cox : Added EBDA scanning
31 * Ingo Molnar : various cleanups and rewrites
32 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
35 #include <linux/config.h>
36 #include <linux/mm.h>
37 #include <linux/kernel_stat.h>
38 #include <linux/delay.h>
39 #include <linux/mc146818rtc.h>
40 #include <linux/smp_lock.h>
41 #include <linux/init.h>
42 #include <asm/mtrr.h>
43 #include <asm/msr.h>
45 #include "irq.h"
47 #define JIFFIE_TIMEOUT 100
49 extern void update_one_process( struct task_struct *p,
50 unsigned long ticks, unsigned long user,
51 unsigned long system, int cpu);
53 * Some notes on processor bugs:
55 * Pentium and Pentium Pro (and all CPUs) have bugs. The Linux issues
56 * for SMP are handled as follows.
58 * Pentium Pro
59 * Occasional delivery of 'spurious interrupt' as trap #16. This
60 * is very rare. The kernel logs the event and recovers
62 * Pentium
63 * There is a marginal case where REP MOVS on 100MHz SMP
64 * machines with B stepping processors can fail. XXX should provide
65 * an L1cache=Writethrough or L1cache=off option.
67 * B stepping CPUs may hang. There are hardware work arounds
68 * for this. We warn about it in case your board doesnt have the work
69 * arounds. Basically thats so I can tell anyone with a B stepping
70 * CPU and SMP problems "tough".
72 * Specific items [From Pentium Processor Specification Update]
74 * 1AP. Linux doesn't use remote read
75 * 2AP. Linux doesn't trust APIC errors
76 * 3AP. We work around this
77 * 4AP. Linux never generated 3 interrupts of the same priority
78 * to cause a lost local interrupt.
79 * 5AP. Remote read is never used
80 * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX
81 * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX
82 * 11AP. Linux reads the APIC between writes to avoid this, as per
83 * the documentation. Make sure you preserve this as it affects
84 * the C stepping chips too.
86 * If this sounds worrying believe me these bugs are ___RARE___ and
87 * there's about nothing of note with C stepping upwards.
91 /* Kernel spinlock */
92 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
95 * function prototypes:
97 static void cache_APIC_registers (void);
98 static void stop_this_cpu (void);
100 static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */
102 static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */
103 int smp_found_config=0; /* Have we found an SMP box */
105 unsigned long cpu_present_map = 0; /* Bitmask of physically existing CPUs */
106 unsigned long cpu_online_map = 0; /* Bitmask of currently online CPUs */
107 int smp_num_cpus = 0; /* Total count of live CPUs */
108 int smp_threads_ready=0; /* Set when the idlers are all forked */
109 volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical number */
110 volatile int __cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */
111 static volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
112 static volatile unsigned long cpu_callout_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
113 volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */
114 volatile unsigned long kstack_ptr; /* Stack vector for booting CPUs */
115 struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per CPU bogomips and other parameters */
116 static unsigned int num_processors = 1; /* Internal processor count */
117 unsigned long mp_ioapic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */
118 unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */
119 static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
120 int apic_version[NR_CPUS]; /* APIC version number */
121 unsigned long apic_retval; /* Just debugging the assembler.. */
123 volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */
124 volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */
126 volatile unsigned long ipi_count; /* Number of IPIs delivered */
128 const char lk_lockmsg[] = "lock from interrupt context at %p\n";
130 int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
131 extern int nr_ioapics;
132 extern struct mpc_config_ioapic mp_apics [MAX_IO_APICS];
133 extern int mp_irq_entries;
134 extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
135 extern int mpc_default_type;
136 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
137 int mp_current_pci_id = 0;
138 unsigned long mp_lapic_addr = 0;
139 int skip_ioapic_setup = 0; /* 1 if "noapic" boot option passed */
141 /* #define SMP_DEBUG */
143 #ifdef SMP_DEBUG
144 #define SMP_PRINTK(x) printk x
145 #else
146 #define SMP_PRINTK(x)
147 #endif
150 * IA s/w dev Vol 3, Section 7.4
152 #define APIC_DEFAULT_PHYS_BASE 0xfee00000
154 #define CLEAR_TSC wrmsr(0x10, 0x00001000, 0x00001000)
157 * Setup routine for controlling SMP activation
159 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
160 * activation entirely (the MPS table probe still happens, though).
162 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
163 * greater than 0, limits the maximum number of CPUs activated in
164 * SMP mode to <NUM>.
167 void __init smp_setup(char *str, int *ints)
169 if (ints && ints[0] > 0)
170 max_cpus = ints[1];
171 else
172 max_cpus = 0;
175 void ack_APIC_irq(void)
177 /* Clear the IPI */
179 /* Dummy read */
180 apic_read(APIC_SPIV);
182 /* Docs say use 0 for future compatibility */
183 apic_write(APIC_EOI, 0);
187 * Intel MP BIOS table parsing routines:
190 #ifndef CONFIG_X86_VISWS_APIC
192 * Checksum an MP configuration block.
195 static int mpf_checksum(unsigned char *mp, int len)
197 int sum=0;
198 while(len--)
199 sum+=*mp++;
200 return sum&0xFF;
204 * Processor encoding in an MP configuration block
207 static char *mpc_family(int family,int model)
209 static char n[32];
210 static char *model_defs[]=
212 "80486DX","80486DX",
213 "80486SX","80486DX/2 or 80487",
214 "80486SL","Intel5X2(tm)",
215 "Unknown","Unknown",
216 "80486DX/4"
218 if (family==0x6)
219 return("Pentium(tm) Pro");
220 if (family==0x5)
221 return("Pentium(tm)");
222 if (family==0x0F && model==0x0F)
223 return("Special controller");
224 if (family==0x04 && model<9)
225 return model_defs[model];
226 sprintf(n,"Unknown CPU [%d:%d]",family, model);
227 return n;
232 * Read the MPC
235 static int __init smp_read_mpc(struct mp_config_table *mpc)
237 char str[16];
238 int count=sizeof(*mpc);
239 int ioapics = 0;
240 unsigned char *mpt=((unsigned char *)mpc)+count;
242 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4))
244 panic("SMP mptable: bad signature [%c%c%c%c]!\n",
245 mpc->mpc_signature[0],
246 mpc->mpc_signature[1],
247 mpc->mpc_signature[2],
248 mpc->mpc_signature[3]);
249 return 1;
251 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length))
253 panic("SMP mptable: checksum error!\n");
254 return 1;
256 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04)
258 printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec);
259 return 1;
261 memcpy(str,mpc->mpc_oem,8);
262 str[8]=0;
263 printk("OEM ID: %s ",str);
265 memcpy(str,mpc->mpc_productid,12);
266 str[12]=0;
267 printk("Product ID: %s ",str);
269 printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
271 /* save the local APIC address, it might be non-default */
272 mp_lapic_addr = mpc->mpc_lapic;
275 * Now process the configuration blocks.
278 while(count<mpc->mpc_length)
280 switch(*mpt)
282 case MP_PROCESSOR:
284 struct mpc_config_processor *m=
285 (struct mpc_config_processor *)mpt;
286 if (m->mpc_cpuflag&CPU_ENABLED)
288 printk("Processor #%d %s APIC version %d\n",
289 m->mpc_apicid,
290 mpc_family((m->mpc_cpufeature&
291 CPU_FAMILY_MASK)>>8,
292 (m->mpc_cpufeature&
293 CPU_MODEL_MASK)>>4),
294 m->mpc_apicver);
295 #ifdef SMP_DEBUG
296 if (m->mpc_featureflag&(1<<0))
297 printk(" Floating point unit present.\n");
298 if (m->mpc_featureflag&(1<<7))
299 printk(" Machine Exception supported.\n");
300 if (m->mpc_featureflag&(1<<8))
301 printk(" 64 bit compare & exchange supported.\n");
302 if (m->mpc_featureflag&(1<<9))
303 printk(" Internal APIC present.\n");
304 #endif
305 if (m->mpc_cpuflag&CPU_BOOTPROCESSOR)
307 SMP_PRINTK((" Bootup CPU\n"));
308 boot_cpu_id=m->mpc_apicid;
310 else /* Boot CPU already counted */
311 num_processors++;
313 if (m->mpc_apicid>NR_CPUS)
314 printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS);
315 else
317 int ver = m->mpc_apicver;
319 cpu_present_map|=(1<<m->mpc_apicid);
321 * Validate version
323 if (ver == 0x0) {
324 printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
325 ver = 0x10;
327 apic_version[m->mpc_apicid] = ver;
330 mpt+=sizeof(*m);
331 count+=sizeof(*m);
332 break;
334 case MP_BUS:
336 struct mpc_config_bus *m=
337 (struct mpc_config_bus *)mpt;
338 memcpy(str,m->mpc_bustype,6);
339 str[6]=0;
340 SMP_PRINTK(("Bus #%d is %s\n",
341 m->mpc_busid,
342 str));
343 if (strncmp(m->mpc_bustype,"ISA",3) == 0)
344 mp_bus_id_to_type[m->mpc_busid] =
345 MP_BUS_ISA;
346 else
347 if (strncmp(m->mpc_bustype,"EISA",4) == 0)
348 mp_bus_id_to_type[m->mpc_busid] =
349 MP_BUS_EISA;
350 if (strncmp(m->mpc_bustype,"PCI",3) == 0) {
351 mp_bus_id_to_type[m->mpc_busid] =
352 MP_BUS_PCI;
353 mp_bus_id_to_pci_bus[m->mpc_busid] =
354 mp_current_pci_id;
355 mp_current_pci_id++;
357 mpt+=sizeof(*m);
358 count+=sizeof(*m);
359 break;
361 case MP_IOAPIC:
363 struct mpc_config_ioapic *m=
364 (struct mpc_config_ioapic *)mpt;
365 if (m->mpc_flags&MPC_APIC_USABLE)
367 ioapics++;
368 printk("I/O APIC #%d Version %d at 0x%lX.\n",
369 m->mpc_apicid,m->mpc_apicver,
370 m->mpc_apicaddr);
371 mp_apics [nr_ioapics] = *m;
372 if (++nr_ioapics > MAX_IO_APICS)
373 --nr_ioapics;
375 mpt+=sizeof(*m);
376 count+=sizeof(*m);
377 break;
379 case MP_INTSRC:
381 struct mpc_config_intsrc *m=
382 (struct mpc_config_intsrc *)mpt;
384 mp_irqs [mp_irq_entries] = *m;
385 if (++mp_irq_entries == MAX_IRQ_SOURCES) {
386 printk("Max irq sources exceeded!!\n");
387 printk("Skipping remaining sources.\n");
388 --mp_irq_entries;
391 mpt+=sizeof(*m);
392 count+=sizeof(*m);
393 break;
395 case MP_LINTSRC:
397 struct mpc_config_intlocal *m=
398 (struct mpc_config_intlocal *)mpt;
399 mpt+=sizeof(*m);
400 count+=sizeof(*m);
401 break;
405 if (ioapics > MAX_IO_APICS)
407 printk("Warning: Max I/O APICs exceeded (max %d, found %d).\n", MAX_IO_APICS, ioapics);
408 printk("Warning: switching to non APIC mode.\n");
409 skip_ioapic_setup=1;
411 return num_processors;
415 * Scan the memory blocks for an SMP configuration block.
418 static int __init smp_scan_config(unsigned long base, unsigned long length)
420 unsigned long *bp=phys_to_virt(base);
421 struct intel_mp_floating *mpf;
423 SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
424 bp,length));
425 if (sizeof(*mpf)!=16)
426 printk("Error: MPF size\n");
428 while (length>0)
430 if (*bp==SMP_MAGIC_IDENT)
432 mpf=(struct intel_mp_floating *)bp;
433 if (mpf->mpf_length==1 &&
434 !mpf_checksum((unsigned char *)bp,16) &&
435 (mpf->mpf_specification == 1
436 || mpf->mpf_specification == 4) )
438 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
439 if (mpf->mpf_feature2&(1<<7))
440 printk(" IMCR and PIC compatibility mode.\n");
441 else
442 printk(" Virtual Wire compatibility mode.\n");
443 smp_found_config=1;
445 * Now see if we need to read further.
447 if (mpf->mpf_feature1!=0)
449 unsigned long cfg;
451 /* local APIC has default address */
452 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
454 * We need to know what the local
455 * APIC id of the boot CPU is!
460 * HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
462 * It's not just a crazy hack. ;-)
465 * Standard page mapping
466 * functions don't work yet.
467 * We know that page 0 is not
468 * used. Steal it for now!
471 cfg=pg0[0];
472 pg0[0] = (mp_lapic_addr | _PAGE_RW | _PAGE_PRESENT);
473 local_flush_tlb();
475 boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID));
478 * Give it back
481 pg0[0]= cfg;
482 local_flush_tlb();
486 * END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK
490 * 2 CPUs, numbered 0 & 1.
492 cpu_present_map=3;
493 num_processors=2;
494 printk("I/O APIC at 0xFEC00000.\n");
497 * Save the default type number, we
498 * need it later to set the IO-APIC
499 * up properly:
501 mpc_default_type = mpf->mpf_feature1;
503 printk("Bus #0 is ");
505 switch(mpf->mpf_feature1)
507 case 1:
508 case 5:
509 printk("ISA\n");
510 break;
511 case 2:
512 printk("EISA with no IRQ8 chaining\n");
513 break;
514 case 6:
515 case 3:
516 printk("EISA\n");
517 break;
518 case 4:
519 case 7:
520 printk("MCA\n");
521 break;
522 case 0:
523 break;
524 default:
525 printk("???\nUnknown standard configuration %d\n",
526 mpf->mpf_feature1);
527 return 1;
529 if (mpf->mpf_feature1>4)
531 printk("Bus #1 is PCI\n");
534 * Set local APIC version to
535 * the integrated form.
536 * It's initialized to zero
537 * otherwise, representing
538 * a discrete 82489DX.
540 apic_version[0] = 0x10;
541 apic_version[1] = 0x10;
544 * Read the physical hardware table.
545 * Anything here will override the
546 * defaults.
548 if (mpf->mpf_physptr)
549 smp_read_mpc((void *)mpf->mpf_physptr);
551 __cpu_logical_map[0] = boot_cpu_id;
552 global_irq_holder = boot_cpu_id;
553 current->processor = boot_cpu_id;
555 printk("Processors: %d\n", num_processors);
557 * Only use the first configuration found.
559 return 1;
562 bp+=4;
563 length-=16;
566 return 0;
569 void __init init_intel_smp (void)
572 * FIXME: Linux assumes you have 640K of base ram..
573 * this continues the error...
575 * 1) Scan the bottom 1K for a signature
576 * 2) Scan the top 1K of base RAM
577 * 3) Scan the 64K of bios
579 if (!smp_scan_config(0x0,0x400) &&
580 !smp_scan_config(639*0x400,0x400) &&
581 !smp_scan_config(0xF0000,0x10000)) {
583 * If it is an SMP machine we should know now, unless the
584 * configuration is in an EISA/MCA bus machine with an
585 * extended bios data area.
587 * there is a real-mode segmented pointer pointing to the
588 * 4K EBDA area at 0x40E, calculate and scan it here.
590 * NOTE! There are Linux loaders that will corrupt the EBDA
591 * area, and as such this kind of SMP config may be less
592 * trustworthy, simply because the SMP table may have been
593 * stomped on during early boot. These loaders are buggy and
594 * should be fixed.
596 unsigned int address;
598 address = *(unsigned short *)phys_to_virt(0x40E);
599 address<<=4;
600 smp_scan_config(address, 0x1000);
601 if (smp_found_config)
602 printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n");
606 #else
609 * The Visual Workstation is Intel MP compliant in the hardware
610 * sense, but it doesnt have a BIOS(-configuration table).
611 * No problem for Linux.
613 void __init init_visws_smp(void)
615 smp_found_config = 1;
617 cpu_present_map |= 2; /* or in id 1 */
618 apic_version[1] |= 0x10; /* integrated APIC */
619 apic_version[0] |= 0x10;
621 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
624 #endif
627 * - Intel MP Configuration Table
628 * - or SGI Visual Workstation configuration
630 void __init init_smp_config (void)
632 #ifndef CONFIG_VISWS
633 init_intel_smp();
634 #else
635 init_visws_smp();
636 #endif
642 * Trampoline 80x86 program as an array.
645 extern unsigned char trampoline_data [];
646 extern unsigned char trampoline_end [];
647 static unsigned char *trampoline_base;
650 * Currently trivial. Write the real->protected mode
651 * bootstrap into the page concerned. The caller
652 * has made sure it's suitably aligned.
655 static unsigned long __init setup_trampoline(void)
657 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
658 return virt_to_phys(trampoline_base);
662 * We are called very early to get the low memory for the
663 * SMP bootup trampoline page.
665 unsigned long __init smp_alloc_memory(unsigned long mem_base)
667 if (virt_to_phys((void *)mem_base) >= 0x9F000)
668 panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.", mem_base);
669 trampoline_base = (void *)mem_base;
670 return mem_base + PAGE_SIZE;
674 * The bootstrap kernel entry code has set these up. Save them for
675 * a given CPU
678 void __init smp_store_cpu_info(int id)
680 struct cpuinfo_x86 *c=&cpu_data[id];
682 *c = boot_cpu_data;
683 c->pte_quick = 0;
684 c->pgd_quick = 0;
685 c->pgtable_cache_sz = 0;
686 identify_cpu(c);
688 * Mask B, Pentium, but not Pentium MMX
690 if (c->x86_vendor == X86_VENDOR_INTEL &&
691 c->x86 == 5 &&
692 c->x86_mask >= 1 && c->x86_mask <= 4 &&
693 c->x86_model <= 3)
694 smp_b_stepping=1; /* Remember we have B step Pentia with bugs */
698 * Architecture specific routine called by the kernel just before init is
699 * fired off. This allows the BP to have everything in order [we hope].
700 * At the end of this all the APs will hit the system scheduling and off
701 * we go. Each AP will load the system gdt's and jump through the kernel
702 * init into idle(). At this point the scheduler will one day take over
703 * and give them jobs to do. smp_callin is a standard routine
704 * we use to track CPUs as they power up.
707 static atomic_t smp_commenced = ATOMIC_INIT(0);
709 void __init smp_commence(void)
712 * Lets the callins below out of their loop.
714 SMP_PRINTK(("Setting commenced=1, go go go\n"));
716 wmb();
717 atomic_set(&smp_commenced,1);
720 void __init enable_local_APIC(void)
722 unsigned long value;
724 value = apic_read(APIC_SPIV);
725 value |= (1<<8); /* Enable APIC (bit==1) */
726 value &= ~(1<<9); /* Enable focus processor (bit==0) */
727 value |= 0xff; /* Set spurious IRQ vector to 0xff */
728 apic_write(APIC_SPIV,value);
731 * Set Task Priority to 'accept all'
733 value = apic_read(APIC_TASKPRI);
734 value &= ~APIC_TPRI_MASK;
735 apic_write(APIC_TASKPRI,value);
738 * Clear the logical destination ID, just to be safe.
739 * also, put the APIC into flat delivery mode.
741 value = apic_read(APIC_LDR);
742 value &= ~APIC_LDR_MASK;
743 apic_write(APIC_LDR,value);
745 value = apic_read(APIC_DFR);
746 value |= SET_APIC_DFR(0xf);
747 apic_write(APIC_DFR, value);
749 udelay(100); /* B safe */
752 unsigned long __init init_smp_mappings(unsigned long memory_start)
754 unsigned long apic_phys;
756 memory_start = PAGE_ALIGN(memory_start);
757 if (smp_found_config) {
758 apic_phys = mp_lapic_addr;
759 } else {
761 * set up a fake all zeroes page to simulate the
762 * local APIC and another one for the IO-APIC. We
763 * could use the real zero-page, but it's safer
764 * this way if some buggy code writes to this page ...
766 apic_phys = __pa(memory_start);
767 memset((void *)memory_start, 0, PAGE_SIZE);
768 memory_start += PAGE_SIZE;
770 set_fixmap(FIX_APIC_BASE,apic_phys);
771 printk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
773 #ifdef CONFIG_X86_IO_APIC
775 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
776 int i;
778 for (i = 0; i < nr_ioapics; i++) {
779 if (smp_found_config) {
780 ioapic_phys = mp_apics[i].mpc_apicaddr;
781 } else {
782 ioapic_phys = __pa(memory_start);
783 memset((void *)memory_start, 0, PAGE_SIZE);
784 memory_start += PAGE_SIZE;
786 set_fixmap(idx,ioapic_phys);
787 printk("mapped IOAPIC to %08lx (%08lx)\n",
788 __fix_to_virt(idx), ioapic_phys);
789 idx++;
792 #endif
794 return memory_start;
797 extern void calibrate_delay(void);
799 void __init smp_callin(void)
801 int cpuid;
802 unsigned long timeout;
805 * (This works even if the APIC is not enabled.)
807 cpuid = GET_APIC_ID(apic_read(APIC_ID));
809 SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid));
812 * STARTUP IPIs are fragile beasts as they might sometimes
813 * trigger some glue motherboard logic. Complete APIC bus
814 * silence for 1 second, this overestimates the time the
815 * boot CPU is spending to send the up to 2 STARTUP IPIs
816 * by a factor of two. This should be enough.
820 * Waiting 2s total for startup (udelay is not yet working)
822 timeout = jiffies + 2*HZ;
823 while (time_before(jiffies,timeout))
826 * Has the boot CPU finished it's STARTUP sequence?
828 if (test_bit(cpuid, (unsigned long *)&cpu_callout_map[0]))
829 break;
832 while (!time_before(jiffies,timeout)) {
833 printk("BUG: CPU%d started up but did not get a callout!\n",
834 cpuid);
835 stop_this_cpu();
839 * the boot CPU has finished the init stage and is spinning
840 * on callin_map until we finish. We are free to set up this
841 * CPU, first the APIC. (this is probably redundant on most
842 * boards)
845 SMP_PRINTK(("CALLIN, before enable_local_APIC().\n"));
846 enable_local_APIC();
849 * Set up our APIC timer.
851 setup_APIC_clock();
853 __sti();
855 #ifdef CONFIG_MTRR
856 /* Must be done before calibration delay is computed */
857 mtrr_init_secondary_cpu ();
858 #endif
860 * Get our bogomips.
862 calibrate_delay();
863 SMP_PRINTK(("Stack at about %p\n",&cpuid));
866 * Save our processor parameters
868 smp_store_cpu_info(cpuid);
871 * Allow the master to continue.
873 set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
876 int cpucount = 0;
878 extern int cpu_idle(void * unused);
881 * Activate a secondary processor.
883 int __init start_secondary(void *unused)
886 * Dont put anything before smp_callin(), SMP
887 * booting is too fragile that we want to limit the
888 * things done here to the most necessary things.
890 cpu_init();
891 smp_callin();
892 while (!atomic_read(&smp_commenced))
893 /* nothing */ ;
894 return cpu_idle(NULL);
898 * Everything has been set up for the secondary
899 * CPUs - they just need to reload everything
900 * from the task structure
901 * This function must not return.
903 void __init initialize_secondary(void)
906 * We don't actually need to load the full TSS,
907 * basically just the stack pointer and the eip.
910 asm volatile(
911 "movl %0,%%esp\n\t"
912 "jmp *%1"
914 :"r" (current->thread.esp),"r" (current->thread.eip));
917 extern struct {
918 void * esp;
919 unsigned short ss;
920 } stack_start;
922 static void __init do_boot_cpu(int i)
924 unsigned long cfg;
925 pgd_t maincfg;
926 struct task_struct *idle;
927 unsigned long send_status, accept_status;
928 int timeout, num_starts, j;
929 unsigned long start_eip;
932 * We need an idle process for each processor.
934 kernel_thread(start_secondary, NULL, CLONE_PID);
935 cpucount++;
938 * We remove it from the pidhash and the runqueue
939 * once we got the process:
941 idle = init_task.prev_task;
942 if (!idle)
943 panic("No idle process for CPU %d", i);
945 idle->processor = i;
946 __cpu_logical_map[cpucount] = i;
947 cpu_number_map[i] = cpucount;
948 idle->has_cpu = 1; /* we schedule the first task manually */
949 idle->thread.eip = (unsigned long) start_secondary;
951 del_from_runqueue(idle);
952 unhash_process(idle);
953 init_tasks[cpucount] = idle;
955 /* start_eip had better be page-aligned! */
956 start_eip = setup_trampoline();
958 printk("Booting processor %d eip %lx\n", i, start_eip); /* So we see what's up */
959 stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
962 * This grunge runs the startup process for
963 * the targeted processor.
966 SMP_PRINTK(("Setting warm reset code and vector.\n"));
968 CMOS_WRITE(0xa, 0xf);
969 local_flush_tlb();
970 SMP_PRINTK(("1.\n"));
971 *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
972 SMP_PRINTK(("2.\n"));
973 *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
974 SMP_PRINTK(("3.\n"));
976 maincfg=swapper_pg_dir[0];
977 ((unsigned long *)swapper_pg_dir)[0]=0x102007;
980 * Be paranoid about clearing APIC errors.
983 if ( apic_version[i] & 0xF0 )
985 apic_write(APIC_ESR, 0);
986 accept_status = (apic_read(APIC_ESR) & 0xEF);
990 * Status is now clean
993 send_status = 0;
994 accept_status = 0;
997 * Starting actual IPI sequence...
1000 SMP_PRINTK(("Asserting INIT.\n"));
1003 * Turn INIT on
1006 cfg=apic_read(APIC_ICR2);
1007 cfg&=0x00FFFFFF;
1008 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
1009 cfg=apic_read(APIC_ICR);
1010 cfg&=~0xCDFFF; /* Clear bits */
1011 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT | APIC_DEST_DM_INIT);
1012 apic_write(APIC_ICR, cfg); /* Send IPI */
1014 udelay(200);
1015 SMP_PRINTK(("Deasserting INIT.\n"));
1017 cfg=apic_read(APIC_ICR2);
1018 cfg&=0x00FFFFFF;
1019 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
1020 cfg=apic_read(APIC_ICR);
1021 cfg&=~0xCDFFF; /* Clear bits */
1022 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT);
1023 apic_write(APIC_ICR, cfg); /* Send IPI */
1026 * Should we send STARTUP IPIs ?
1028 * Determine this based on the APIC version.
1029 * If we don't have an integrated APIC, don't
1030 * send the STARTUP IPIs.
1033 if ( apic_version[i] & 0xF0 )
1034 num_starts = 2;
1035 else
1036 num_starts = 0;
1039 * Run STARTUP IPI loop.
1042 for (j = 1; !(send_status || accept_status)
1043 && (j <= num_starts) ; j++)
1045 SMP_PRINTK(("Sending STARTUP #%d.\n",j));
1046 apic_write(APIC_ESR, 0);
1047 SMP_PRINTK(("After apic_write.\n"));
1050 * STARTUP IPI
1053 cfg=apic_read(APIC_ICR2);
1054 cfg&=0x00FFFFFF;
1055 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
1056 cfg=apic_read(APIC_ICR);
1057 cfg&=~0xCDFFF; /* Clear bits */
1058 cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12)); /* Boot on the stack */
1059 SMP_PRINTK(("Before start apic_write.\n"));
1060 apic_write(APIC_ICR, cfg); /* Kick the second */
1062 SMP_PRINTK(("Startup point 1.\n"));
1064 timeout = 0;
1065 SMP_PRINTK(("Waiting for send to finish...\n"));
1066 do {
1067 SMP_PRINTK(("+"));
1068 udelay(100);
1069 send_status = apic_read(APIC_ICR) & 0x1000;
1070 } while (send_status && (timeout++ < 1000));
1073 * Give the other CPU some time to accept the IPI.
1075 udelay(200);
1076 accept_status = (apic_read(APIC_ESR) & 0xEF);
1078 SMP_PRINTK(("After Startup.\n"));
1080 if (send_status) /* APIC never delivered?? */
1081 printk("APIC never delivered???\n");
1082 if (accept_status) /* Send accept error */
1083 printk("APIC delivery error (%lx).\n", accept_status);
1085 if ( !(send_status || accept_status) )
1088 * allow APs to start initializing.
1090 SMP_PRINTK(("Before Callout %d.\n", i));
1091 set_bit(i, (unsigned long *)&cpu_callout_map[0]);
1092 SMP_PRINTK(("After Callout %d.\n", i));
1094 for(timeout=0;timeout<50000;timeout++)
1096 if (cpu_callin_map[0]&(1<<i))
1097 break; /* It has booted */
1098 udelay(100); /* Wait 5s total for a response */
1100 if (cpu_callin_map[0]&(1<<i))
1102 /* number CPUs logically, starting from 1 (BSP is 0) */
1103 #if 0
1104 cpu_number_map[i] = cpucount;
1105 __cpu_logical_map[cpucount] = i;
1106 #endif
1107 printk("OK.\n");
1108 printk("CPU%d: ", i);
1109 print_cpu_info(&cpu_data[i]);
1111 else
1113 if (*((volatile unsigned char *)phys_to_virt(8192))==0xA5)
1114 printk("Stuck ??\n");
1115 else
1116 printk("Not responding.\n");
1118 SMP_PRINTK(("CPU has booted.\n"));
1120 else
1122 __cpu_logical_map[cpucount] = -1;
1123 cpu_number_map[i] = -1;
1124 cpucount--;
1127 swapper_pg_dir[0]=maincfg;
1128 local_flush_tlb();
1130 /* mark "stuck" area as not stuck */
1131 *((volatile unsigned long *)phys_to_virt(8192)) = 0;
1134 cycles_t cacheflush_time;
1135 extern unsigned long cpu_hz;
1137 static void smp_tune_scheduling (void)
1139 unsigned long cachesize;
1141 * Rough estimation for SMP scheduling, this is the number of
1142 * cycles it takes for a fully memory-limited process to flush
1143 * the SMP-local cache.
1145 * (For a P5 this pretty much means we will choose another idle
1146 * CPU almost always at wakeup time (this is due to the small
1147 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1148 * the cache size)
1151 if (!cpu_hz) {
1153 * this basically disables processor-affinity
1154 * scheduling on SMP without a TSC.
1156 cacheflush_time = 0;
1157 return;
1158 } else {
1159 cachesize = boot_cpu_data.x86_cache_size;
1160 if (cachesize == -1)
1161 cachesize = 8; /* Pentiums */
1163 cacheflush_time = cpu_hz/1024*cachesize/5000;
1166 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
1167 (long)cacheflush_time/(cpu_hz/1000000),
1168 ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100);
1171 unsigned int prof_multiplier[NR_CPUS];
1172 unsigned int prof_old_multiplier[NR_CPUS];
1173 unsigned int prof_counter[NR_CPUS];
1176 * Cycle through the processors sending APIC IPIs to boot each.
1179 void __init smp_boot_cpus(void)
1181 int i;
1183 #ifdef CONFIG_MTRR
1184 /* Must be done before other processors booted */
1185 mtrr_init_boot_cpu ();
1186 #endif
1188 * Initialize the logical to physical CPU number mapping
1189 * and the per-CPU profiling counter/multiplier
1192 for (i = 0; i < NR_CPUS; i++) {
1193 cpu_number_map[i] = -1;
1194 prof_counter[i] = 1;
1195 prof_old_multiplier[i] = 1;
1196 prof_multiplier[i] = 1;
1200 * Setup boot CPU information
1203 smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */
1204 smp_tune_scheduling();
1205 printk("CPU%d: ", boot_cpu_id);
1206 print_cpu_info(&cpu_data[boot_cpu_id]);
1209 * not necessary because the MP table should list the boot
1210 * CPU too, but we do it for the sake of robustness anyway.
1211 * (and for the case when a non-SMP board boots an SMP kernel)
1213 cpu_present_map |= (1 << hard_smp_processor_id());
1215 cpu_number_map[boot_cpu_id] = 0;
1217 init_idle();
1220 * If we couldnt find an SMP configuration at boot time,
1221 * get out of here now!
1224 if (!smp_found_config)
1226 printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n");
1227 #ifndef CONFIG_VISWS
1228 io_apic_irqs = 0;
1229 #endif
1230 cpu_online_map = cpu_present_map;
1231 goto smp_done;
1235 * If SMP should be disabled, then really disable it!
1238 if (!max_cpus)
1240 smp_found_config = 0;
1241 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1244 #ifdef SMP_DEBUG
1246 int reg;
1249 * This is to verify that we're looking at
1250 * a real local APIC. Check these against
1251 * your board if the CPUs aren't getting
1252 * started for no apparent reason.
1255 reg = apic_read(APIC_VERSION);
1256 SMP_PRINTK(("Getting VERSION: %x\n", reg));
1258 apic_write(APIC_VERSION, 0);
1259 reg = apic_read(APIC_VERSION);
1260 SMP_PRINTK(("Getting VERSION: %x\n", reg));
1263 * The two version reads above should print the same
1264 * NON-ZERO!!! numbers. If the second one is zero,
1265 * there is a problem with the APIC write/read
1266 * definitions.
1268 * The next two are just to see if we have sane values.
1269 * They're only really relevant if we're in Virtual Wire
1270 * compatibility mode, but most boxes are anymore.
1274 reg = apic_read(APIC_LVT0);
1275 SMP_PRINTK(("Getting LVT0: %x\n", reg));
1277 reg = apic_read(APIC_LVT1);
1278 SMP_PRINTK(("Getting LVT1: %x\n", reg));
1280 #endif
1282 enable_local_APIC();
1285 * Set up our local APIC timer:
1287 setup_APIC_clock ();
1290 * Now scan the CPU present map and fire up the other CPUs.
1294 * Add all detected CPUs. (later on we can down individual
1295 * CPUs which will change cpu_online_map but not necessarily
1296 * cpu_present_map. We are pretty much ready for hot-swap CPUs.)
1298 cpu_online_map = cpu_present_map;
1299 mb();
1301 SMP_PRINTK(("CPU map: %lx\n", cpu_present_map));
1303 for(i=0;i<NR_CPUS;i++)
1306 * Don't even attempt to start the boot CPU!
1308 if (i == boot_cpu_id)
1309 continue;
1311 if ((cpu_online_map & (1 << i))
1312 && (max_cpus < 0 || max_cpus > cpucount+1))
1314 do_boot_cpu(i);
1318 * Make sure we unmap all failed CPUs
1321 if (cpu_number_map[i] == -1 && (cpu_online_map & (1 << i))) {
1322 printk("CPU #%d not responding. Removing from cpu_online_map.\n",i);
1323 cpu_online_map &= ~(1 << i);
1328 * Cleanup possible dangling ends...
1331 #ifndef CONFIG_VISWS
1333 unsigned long cfg;
1336 * Install writable page 0 entry.
1338 cfg = pg0[0];
1339 pg0[0] = _PAGE_RW | _PAGE_PRESENT; /* writeable, present, addr 0 */
1340 local_flush_tlb();
1343 * Paranoid: Set warm reset code and vector here back
1344 * to default values.
1347 CMOS_WRITE(0, 0xf);
1349 *((volatile long *) phys_to_virt(0x467)) = 0;
1352 * Restore old page 0 entry.
1355 pg0[0] = cfg;
1356 local_flush_tlb();
1358 #endif
1361 * Allow the user to impress friends.
1364 SMP_PRINTK(("Before bogomips.\n"));
1365 if (!cpucount) {
1366 printk(KERN_ERR "Error: only one processor found.\n");
1367 cpu_online_map = (1<<hard_smp_processor_id());
1368 } else {
1369 unsigned long bogosum = 0;
1370 for(i = 0; i < 32; i++)
1371 if (cpu_online_map&(1<<i))
1372 bogosum+=cpu_data[i].loops_per_sec;
1373 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1374 cpucount+1,
1375 (bogosum+2500)/500000,
1376 ((bogosum+2500)/5000)%100);
1377 SMP_PRINTK(("Before bogocount - setting activated=1.\n"));
1378 smp_activated = 1;
1380 smp_num_cpus = cpucount + 1;
1382 if (smp_b_stepping)
1383 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1384 SMP_PRINTK(("Boot done.\n"));
1386 cache_APIC_registers();
1387 #ifndef CONFIG_VISWS
1389 * Here we can be sure that there is an IO-APIC in the system. Let's
1390 * go and set it up:
1392 if (!skip_ioapic_setup)
1393 setup_IO_APIC();
1394 #endif
1396 smp_done:
1398 * now we know the other CPUs have fired off and we know our
1399 * APIC ID, so we can go init the TSS and stuff:
1401 cpu_init();
1406 * the following functions deal with sending IPIs between CPUs.
1408 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
1413 * Silly serialization to work around CPU bug in P5s.
1414 * We can safely turn it off on a 686.
1416 #ifdef CONFIG_X86_GOOD_APIC
1417 # define FORCE_APIC_SERIALIZATION 0
1418 #else
1419 # define FORCE_APIC_SERIALIZATION 1
1420 #endif
1422 static unsigned int cached_APIC_ICR;
1423 static unsigned int cached_APIC_ICR2;
1426 * Caches reserved bits, APIC reads are (mildly) expensive
1427 * and force otherwise unnecessary CPU synchronization.
1429 * (We could cache other APIC registers too, but these are the
1430 * main ones used in RL.)
1432 #define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
1433 #define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
1435 void cache_APIC_registers (void)
1437 cached_APIC_ICR = slow_ICR;
1438 cached_APIC_ICR2 = slow_ICR2;
1439 mb();
1442 static inline unsigned int __get_ICR (void)
1444 #if FORCE_APIC_SERIALIZATION
1446 * Wait for the APIC to become ready - this should never occur. It's
1447 * a debugging check really.
1449 int count = 0;
1450 unsigned int cfg;
1452 while (count < 1000)
1454 cfg = slow_ICR;
1455 if (!(cfg&(1<<12))) {
1456 if (count)
1457 atomic_add(count, (atomic_t*)&ipi_count);
1458 return cfg;
1460 count++;
1461 udelay(10);
1463 printk("CPU #%d: previous IPI still not cleared after 10mS\n",
1464 smp_processor_id());
1465 return cfg;
1466 #else
1467 return cached_APIC_ICR;
1468 #endif
1471 static inline unsigned int __get_ICR2 (void)
1473 #if FORCE_APIC_SERIALIZATION
1474 return slow_ICR2;
1475 #else
1476 return cached_APIC_ICR2;
1477 #endif
1480 static inline int __prepare_ICR (unsigned int shortcut, int vector)
1482 unsigned int cfg;
1484 cfg = __get_ICR();
1485 cfg |= APIC_DEST_DM_FIXED|shortcut|vector;
1487 return cfg;
1490 static inline int __prepare_ICR2 (unsigned int dest)
1492 unsigned int cfg;
1494 cfg = __get_ICR2();
1495 cfg |= SET_APIC_DEST_FIELD(dest);
1497 return cfg;
1500 static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
1502 unsigned int cfg;
1504 * Subtle. In the case of the 'never do double writes' workaround we
1505 * have to lock out interrupts to be safe. Otherwise it's just one
1506 * single atomic write to the APIC, no need for cli/sti.
1508 #if FORCE_APIC_SERIALIZATION
1509 unsigned long flags;
1511 __save_flags(flags);
1512 __cli();
1513 #endif
1516 * No need to touch the target chip field
1519 cfg = __prepare_ICR(shortcut, vector);
1522 * Send the IPI. The write to APIC_ICR fires this off.
1524 apic_write(APIC_ICR, cfg);
1525 #if FORCE_APIC_SERIALIZATION
1526 __restore_flags(flags);
1527 #endif
1530 static inline void send_IPI_allbutself(int vector)
1532 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
1535 static inline void send_IPI_all(int vector)
1537 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
1540 void send_IPI_self(int vector)
1542 __send_IPI_shortcut(APIC_DEST_SELF, vector);
1545 static inline void send_IPI_single(int dest, int vector)
1547 unsigned long cfg;
1548 #if FORCE_APIC_SERIALIZATION
1549 unsigned long flags;
1551 __save_flags(flags);
1552 __cli();
1553 #endif
1556 * prepare target chip field
1559 cfg = __prepare_ICR2(dest);
1560 apic_write(APIC_ICR2, cfg);
1563 * program the ICR
1565 cfg = __prepare_ICR(0, vector);
1568 * Send the IPI. The write to APIC_ICR fires this off.
1570 apic_write(APIC_ICR, cfg);
1571 #if FORCE_APIC_SERIALIZATION
1572 __restore_flags(flags);
1573 #endif
1577 * This is fraught with deadlocks. Probably the situation is not that
1578 * bad as in the early days of SMP, so we might ease some of the
1579 * paranoia here.
1581 static void flush_tlb_others(unsigned int cpumask)
1583 int cpu = smp_processor_id();
1584 int stuck;
1585 unsigned long flags;
1588 * it's important that we do not generate any APIC traffic
1589 * until the AP CPUs have booted up!
1591 cpumask &= cpu_online_map;
1592 if (cpumask) {
1593 atomic_set_mask(cpumask, &smp_invalidate_needed);
1596 * Processors spinning on some lock with IRQs disabled
1597 * will see this IRQ late. The smp_invalidate_needed
1598 * map will ensure they don't do a spurious flush tlb
1599 * or miss one.
1602 __save_flags(flags);
1603 __cli();
1605 send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
1608 * Spin waiting for completion
1611 stuck = 50000000;
1612 while (smp_invalidate_needed) {
1614 * Take care of "crossing" invalidates
1616 if (test_bit(cpu, &smp_invalidate_needed)) {
1617 clear_bit(cpu, &smp_invalidate_needed);
1618 local_flush_tlb();
1620 --stuck;
1621 if (!stuck) {
1622 printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
1623 break;
1626 __restore_flags(flags);
1631 * Smarter SMP flushing macros.
1632 * c/o Linus Torvalds.
1634 * These mean you can really definitely utterly forget about
1635 * writing to user space from interrupts. (Its not allowed anyway).
1637 void flush_tlb_current_task(void)
1639 unsigned long vm_mask = 1 << current->processor;
1640 struct mm_struct *mm = current->mm;
1641 unsigned long cpu_mask = mm->cpu_vm_mask & ~vm_mask;
1643 mm->cpu_vm_mask = vm_mask;
1644 flush_tlb_others(cpu_mask);
1645 local_flush_tlb();
1648 void flush_tlb_mm(struct mm_struct * mm)
1650 unsigned long vm_mask = 1 << current->processor;
1651 unsigned long cpu_mask = mm->cpu_vm_mask & ~vm_mask;
1653 mm->cpu_vm_mask = 0;
1654 if (current->active_mm == mm) {
1655 mm->cpu_vm_mask = vm_mask;
1656 local_flush_tlb();
1658 flush_tlb_others(cpu_mask);
1661 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
1663 unsigned long vm_mask = 1 << current->processor;
1664 struct mm_struct *mm = vma->vm_mm;
1665 unsigned long cpu_mask = mm->cpu_vm_mask & ~vm_mask;
1667 mm->cpu_vm_mask = 0;
1668 if (current->active_mm == mm) {
1669 __flush_tlb_one(va);
1670 mm->cpu_vm_mask = vm_mask;
1672 flush_tlb_others(cpu_mask);
1675 void flush_tlb_all(void)
1677 flush_tlb_others(~(1 << current->processor));
1678 local_flush_tlb();
1683 * this function sends a 'reschedule' IPI to another CPU.
1684 * it goes straight through and wastes no time serializing
1685 * anything. Worst case is that we lose a reschedule ...
1688 void smp_send_reschedule(int cpu)
1690 send_IPI_single(cpu, RESCHEDULE_VECTOR);
1694 * this function sends a 'stop' IPI to all other CPUs in the system.
1695 * it goes straight through.
1698 void smp_send_stop(void)
1700 send_IPI_allbutself(STOP_CPU_VECTOR);
1703 /* Structure and data for smp_call_function(). This is designed to minimise
1704 * static memory requirements. It also looks cleaner.
1706 struct smp_call_function_struct {
1707 void (*func) (void *info);
1708 void *info;
1709 atomic_t unstarted_count;
1710 atomic_t unfinished_count;
1711 int wait;
1713 static volatile struct smp_call_function_struct *smp_call_function_data = NULL;
1716 * this function sends a 'generic call function' IPI to all other CPUs
1717 * in the system.
1720 int smp_call_function (void (*func) (void *info), void *info, int retry,
1721 int wait)
1722 /* [SUMMARY] Run a function on all other CPUs.
1723 <func> The function to run. This must be fast and non-blocking.
1724 <info> An arbitrary pointer to pass to the function.
1725 <retry> If true, keep retrying until ready.
1726 <wait> If true, wait until function has completed on other CPUs.
1727 [RETURNS] 0 on success, else a negative status code. Does not return until
1728 remote CPUs are nearly ready to execute <<func>> or are or have executed.
1731 unsigned long timeout;
1732 struct smp_call_function_struct data;
1733 static spinlock_t lock = SPIN_LOCK_UNLOCKED;
1735 if (retry) {
1736 while (1) {
1737 if (smp_call_function_data) {
1738 schedule (); /* Give a mate a go */
1739 continue;
1741 spin_lock (&lock);
1742 if (smp_call_function_data) {
1743 spin_unlock (&lock); /* Bad luck */
1744 continue;
1746 /* Mine, all mine! */
1747 break;
1750 else {
1751 if (smp_call_function_data) return -EBUSY;
1752 spin_lock (&lock);
1753 if (smp_call_function_data) {
1754 spin_unlock (&lock);
1755 return -EBUSY;
1758 smp_call_function_data = &data;
1759 spin_unlock (&lock);
1760 data.func = func;
1761 data.info = info;
1762 atomic_set (&data.unstarted_count, smp_num_cpus - 1);
1763 data.wait = wait;
1764 if (wait) atomic_set (&data.unfinished_count, smp_num_cpus - 1);
1765 /* Send a message to all other CPUs and wait for them to respond */
1766 send_IPI_allbutself (CALL_FUNCTION_VECTOR);
1767 /* Wait for response */
1768 timeout = jiffies + JIFFIE_TIMEOUT;
1769 while ( (atomic_read (&data.unstarted_count) > 0) &&
1770 time_before (jiffies, timeout) )
1771 barrier ();
1772 if (atomic_read (&data.unstarted_count) > 0) {
1773 smp_call_function_data = NULL;
1774 return -ETIMEDOUT;
1776 if (wait)
1777 while (atomic_read (&data.unfinished_count) > 0)
1778 barrier ();
1779 smp_call_function_data = NULL;
1780 return 0;
1783 static unsigned int calibration_result;
1785 void setup_APIC_timer(unsigned int clocks);
1788 * Local timer interrupt handler. It does both profiling and
1789 * process statistics/rescheduling.
1791 * We do profiling in every local tick, statistics/rescheduling
1792 * happen only every 'profiling multiplier' ticks. The default
1793 * multiplier is 1 and it can be changed by writing the new multiplier
1794 * value into /proc/profile.
1797 void smp_local_timer_interrupt(struct pt_regs * regs)
1799 int user = (user_mode(regs) != 0);
1800 int cpu = smp_processor_id();
1803 * The profiling function is SMP safe. (nothing can mess
1804 * around with "current", and the profiling counters are
1805 * updated with atomic operations). This is especially
1806 * useful with a profiling multiplier != 1
1808 if (!user)
1809 x86_do_profile(regs->eip);
1811 if (!--prof_counter[cpu]) {
1812 int system = 1 - user;
1813 struct task_struct * p = current;
1816 * The multiplier may have changed since the last time we got
1817 * to this point as a result of the user writing to
1818 * /proc/profile. In this case we need to adjust the APIC
1819 * timer accordingly.
1821 * Interrupts are already masked off at this point.
1823 prof_counter[cpu] = prof_multiplier[cpu];
1824 if (prof_counter[cpu] != prof_old_multiplier[cpu]) {
1825 setup_APIC_timer(calibration_result/prof_counter[cpu]);
1826 prof_old_multiplier[cpu] = prof_counter[cpu];
1830 * After doing the above, we need to make like
1831 * a normal interrupt - otherwise timer interrupts
1832 * ignore the global interrupt lock, which is the
1833 * WrongThing (tm) to do.
1836 irq_enter(cpu, 0);
1837 update_one_process(p, 1, user, system, cpu);
1838 if (p->pid) {
1839 p->counter -= 1;
1840 if (p->counter <= 0) {
1841 p->counter = 0;
1842 p->need_resched = 1;
1844 if (p->priority < DEF_PRIORITY) {
1845 kstat.cpu_nice += user;
1846 kstat.per_cpu_nice[cpu] += user;
1847 } else {
1848 kstat.cpu_user += user;
1849 kstat.per_cpu_user[cpu] += user;
1851 kstat.cpu_system += system;
1852 kstat.per_cpu_system[cpu] += system;
1855 irq_exit(cpu, 0);
1859 * We take the 'long' return path, and there every subsystem
1860 * grabs the apropriate locks (kernel lock/ irq lock).
1862 * we might want to decouple profiling from the 'long path',
1863 * and do the profiling totally in assembly.
1865 * Currently this isn't too much of an issue (performance wise),
1866 * we can take more than 100K local irqs per second on a 100 MHz P5.
1871 * Local APIC timer interrupt. This is the most natural way for doing
1872 * local interrupts, but local timer interrupts can be emulated by
1873 * broadcast interrupts too. [in case the hw doesnt support APIC timers]
1875 * [ if a single-CPU system runs an SMP kernel then we call the local
1876 * interrupt as well. Thus we cannot inline the local irq ... ]
1878 void smp_apic_timer_interrupt(struct pt_regs * regs)
1881 * NOTE! We'd better ACK the irq immediately,
1882 * because timer handling can be slow, and we
1883 * want to be able to accept NMI tlb invalidates
1884 * during this time.
1886 ack_APIC_irq();
1887 smp_local_timer_interrupt(regs);
1891 * Reschedule call back. Nothing to do,
1892 * all the work is done automatically when
1893 * we return from the interrupt.
1895 asmlinkage void smp_reschedule_interrupt(void)
1897 ack_APIC_irq();
1901 * Invalidate call-back.
1903 * Mark the CPU as a VM user if there is a active
1904 * thread holding on to an mm at this time. This
1905 * allows us to optimize CPU cross-calls even in the
1906 * presense of lazy TLB handling.
1908 asmlinkage void smp_invalidate_interrupt(void)
1910 struct task_struct *tsk = current;
1911 unsigned int cpu = tsk->processor;
1913 if (test_and_clear_bit(cpu, &smp_invalidate_needed)) {
1914 struct mm_struct *mm = tsk->mm;
1915 if (mm)
1916 atomic_set_mask(1 << cpu, &mm->cpu_vm_mask);
1917 local_flush_tlb();
1919 ack_APIC_irq();
1923 static void stop_this_cpu (void)
1926 * Remove this CPU:
1928 clear_bit(smp_processor_id(), &cpu_online_map);
1930 if (cpu_data[smp_processor_id()].hlt_works_ok)
1931 for(;;) __asm__("hlt");
1932 for (;;);
1936 * CPU halt call-back
1938 asmlinkage void smp_stop_cpu_interrupt(void)
1940 stop_this_cpu();
1943 asmlinkage void smp_call_function_interrupt(void)
1945 void (*func) (void *info) = smp_call_function_data->func;
1946 void *info = smp_call_function_data->info;
1947 int wait = smp_call_function_data->wait;
1949 ack_APIC_irq ();
1950 /* Notify initiating CPU that I've grabbed the data and am about to
1951 execute the function */
1952 atomic_dec (&smp_call_function_data->unstarted_count);
1953 /* At this point the structure may be out of scope unless wait==1 */
1954 (*func) (info);
1955 if (wait) atomic_dec (&smp_call_function_data->unfinished_count);
1959 * This interrupt should _never_ happen with our APIC/SMP architecture
1961 asmlinkage void smp_spurious_interrupt(void)
1963 ack_APIC_irq();
1964 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1965 printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
1966 smp_processor_id());
1970 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
1971 * per second. We assume that the caller has already set up the local
1972 * APIC.
1974 * The APIC timer is not exactly sync with the external timer chip, it
1975 * closely follows bus clocks.
1979 * The timer chip is already set up at HZ interrupts per second here,
1980 * but we do not accept timer interrupts yet. We only allow the BP
1981 * to calibrate.
1983 static unsigned int __init get_8254_timer_count(void)
1985 unsigned int count;
1987 outb_p(0x00, 0x43);
1988 count = inb_p(0x40);
1989 count |= inb_p(0x40) << 8;
1991 return count;
1995 * This function sets up the local APIC timer, with a timeout of
1996 * 'clocks' APIC bus clock. During calibration we actually call
1997 * this function twice, once with a bogus timeout value, second
1998 * time for real. The other (noncalibrating) CPUs call this
1999 * function only once, with the real value.
2001 * We are strictly in irqs off mode here, as we do not want to
2002 * get an APIC interrupt go off accidentally.
2004 * We do reads before writes even if unnecessary, to get around the
2005 * APIC double write bug.
2008 #define APIC_DIVISOR 16
2010 void setup_APIC_timer(unsigned int clocks)
2012 unsigned long lvtt1_value;
2013 unsigned int tmp_value;
2016 * Unfortunately the local APIC timer cannot be set up into NMI
2017 * mode. With the IO APIC we can re-route the external timer
2018 * interrupt and broadcast it as an NMI to all CPUs, so no pain.
2020 tmp_value = apic_read(APIC_LVTT);
2021 lvtt1_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
2022 apic_write(APIC_LVTT , lvtt1_value);
2025 * Divide PICLK by 16
2027 tmp_value = apic_read(APIC_TDCR);
2028 apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 )
2029 | APIC_TDR_DIV_16);
2031 tmp_value = apic_read(APIC_TMICT);
2032 apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
2035 void __init wait_8254_wraparound(void)
2037 unsigned int curr_count, prev_count=~0;
2038 int delta;
2040 curr_count = get_8254_timer_count();
2042 do {
2043 prev_count = curr_count;
2044 curr_count = get_8254_timer_count();
2045 delta = curr_count-prev_count;
2048 * This limit for delta seems arbitrary, but it isn't, it's
2049 * slightly above the level of error a buggy Mercury/Neptune
2050 * chipset timer can cause.
2053 } while (delta<300);
2057 * In this function we calibrate APIC bus clocks to the external
2058 * timer. Unfortunately we cannot use jiffies and the timer irq
2059 * to calibrate, since some later bootup code depends on getting
2060 * the first irq? Ugh.
2062 * We want to do the calibration only once since we
2063 * want to have local timer irqs syncron. CPUs connected
2064 * by the same APIC bus have the very same bus frequency.
2065 * And we want to have irqs off anyways, no accidental
2066 * APIC irq that way.
2069 int __init calibrate_APIC_clock(void)
2071 unsigned long long t1,t2;
2072 long tt1,tt2;
2073 long calibration_result;
2074 int i;
2076 printk("calibrating APIC timer ... ");
2079 * Put whatever arbitrary (but long enough) timeout
2080 * value into the APIC clock, we just want to get the
2081 * counter running for calibration.
2083 setup_APIC_timer(1000000000);
2086 * The timer chip counts down to zero. Let's wait
2087 * for a wraparound to start exact measurement:
2088 * (the current tick might have been already half done)
2091 wait_8254_wraparound ();
2094 * We wrapped around just now. Let's start:
2096 rdtscll(t1);
2097 tt1=apic_read(APIC_TMCCT);
2099 #define LOOPS (HZ/10)
2101 * Let's wait LOOPS wraprounds:
2103 for (i=0; i<LOOPS; i++)
2104 wait_8254_wraparound ();
2106 tt2=apic_read(APIC_TMCCT);
2107 rdtscll(t2);
2110 * The APIC bus clock counter is 32 bits only, it
2111 * might have overflown, but note that we use signed
2112 * longs, thus no extra care needed.
2114 * underflown to be exact, as the timer counts down ;)
2117 calibration_result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
2119 SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip tick.",
2120 (unsigned long)(t2-t1)/LOOPS));
2122 SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer chip tick.",
2123 calibration_result));
2126 printk("\n..... CPU clock speed is %ld.%04ld MHz.\n",
2127 ((long)(t2-t1)/LOOPS)/(1000000/HZ),
2128 ((long)(t2-t1)/LOOPS)%(1000000/HZ) );
2130 printk("..... system bus clock speed is %ld.%04ld MHz.\n",
2131 calibration_result/(1000000/HZ),
2132 calibration_result%(1000000/HZ) );
2133 #undef LOOPS
2135 return calibration_result;
2138 void __init setup_APIC_clock(void)
2140 unsigned long flags;
2142 static volatile int calibration_lock;
2144 __save_flags(flags);
2145 __cli();
2147 SMP_PRINTK(("setup_APIC_clock() called.\n"));
2150 * [ setup_APIC_clock() is called from all CPUs, but we want
2151 * to do this part of the setup only once ... and it fits
2152 * here best ]
2154 if (!test_and_set_bit(0,&calibration_lock)) {
2156 calibration_result=calibrate_APIC_clock();
2158 * Signal completion to the other CPU[s]:
2160 calibration_lock = 3;
2162 } else {
2164 * Other CPU is calibrating, wait for finish:
2166 SMP_PRINTK(("waiting for other CPU calibrating APIC ... "));
2167 while (calibration_lock == 1);
2168 SMP_PRINTK(("done, continuing.\n"));
2172 * Now set up the timer for real.
2175 setup_APIC_timer (calibration_result);
2178 * We ACK the APIC, just in case there is something pending.
2181 ack_APIC_irq ();
2183 __restore_flags(flags);
2187 * the frequency of the profiling timer can be changed
2188 * by writing a multiplier value into /proc/profile.
2190 int setup_profiling_timer(unsigned int multiplier)
2192 int i;
2195 * Sanity check. [at least 500 APIC cycles should be
2196 * between APIC interrupts as a rule of thumb, to avoid
2197 * irqs flooding us]
2199 if ( (!multiplier) || (calibration_result/multiplier < 500))
2200 return -EINVAL;
2203 * Set the new multiplier for each CPU. CPUs don't start using the
2204 * new values until the next timer interrupt in which they do process
2205 * accounting. At that time they also adjust their APIC timers
2206 * accordingly.
2208 for (i = 0; i < NR_CPUS; ++i)
2209 prof_multiplier[i] = multiplier;
2211 return 0;
2214 #undef APIC_DIVISOR