2 * Intel MP v1.1/v1.4 specification support routines for multi-pentium
5 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
8 * Supported by Caldera http://www.caldera.com.
9 * Much of the core SMP work is based on previous work by Thomas Radke, to
10 * whom a great many thanks are extended.
12 * Thanks to Intel for making available several different Pentium,
13 * Pentium Pro and Pentium-II/Xeon MP machines.
15 * This code is released under the GNU public license version 2 or
19 * Felix Koop : NR_CPUS used properly
20 * Jose Renau : Handle single CPU case.
21 * Alan Cox : By repeated request 8) - Total BogoMIP report.
22 * Greg Wright : Fix for kernel stacks panic.
23 * Erich Boleyn : MP v1.4 and additional changes.
24 * Matthias Sattler : Changes for 2.1 kernel map.
25 * Michel Lespinasse : Changes for 2.1 kernel map.
26 * Michael Chastain : Change trampoline.S to gnu as.
27 * Alan Cox : Dumb bug: 'B' step PPro's are fine
28 * Ingo Molnar : Added APIC timers, based on code
30 * Alan Cox : Added EBDA scanning
31 * Ingo Molnar : various cleanups and rewrites
32 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
35 #include <linux/config.h>
37 #include <linux/kernel_stat.h>
38 #include <linux/delay.h>
39 #include <linux/mc146818rtc.h>
40 #include <linux/smp_lock.h>
41 #include <linux/init.h>
47 #define JIFFIE_TIMEOUT 100
49 extern void update_one_process( struct task_struct
*p
,
50 unsigned long ticks
, unsigned long user
,
51 unsigned long system
, int cpu
);
53 * Some notes on processor bugs:
55 * Pentium and Pentium Pro (and all CPUs) have bugs. The Linux issues
56 * for SMP are handled as follows.
59 * Occasional delivery of 'spurious interrupt' as trap #16. This
60 * is very rare. The kernel logs the event and recovers
63 * There is a marginal case where REP MOVS on 100MHz SMP
64 * machines with B stepping processors can fail. XXX should provide
65 * an L1cache=Writethrough or L1cache=off option.
67 * B stepping CPUs may hang. There are hardware work arounds
68 * for this. We warn about it in case your board doesnt have the work
69 * arounds. Basically thats so I can tell anyone with a B stepping
70 * CPU and SMP problems "tough".
72 * Specific items [From Pentium Processor Specification Update]
74 * 1AP. Linux doesn't use remote read
75 * 2AP. Linux doesn't trust APIC errors
76 * 3AP. We work around this
77 * 4AP. Linux never generated 3 interrupts of the same priority
78 * to cause a lost local interrupt.
79 * 5AP. Remote read is never used
80 * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX
81 * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX
82 * 11AP. Linux reads the APIC between writes to avoid this, as per
83 * the documentation. Make sure you preserve this as it affects
84 * the C stepping chips too.
86 * If this sounds worrying believe me these bugs are ___RARE___ and
87 * there's about nothing of note with C stepping upwards.
92 spinlock_t kernel_flag
= SPIN_LOCK_UNLOCKED
;
95 * function prototypes:
97 static void cache_APIC_registers (void);
98 static void stop_this_cpu (void);
100 static int smp_b_stepping
= 0; /* Set if we find a B stepping CPU */
102 static int max_cpus
= -1; /* Setup configured maximum number of CPUs to activate */
103 int smp_found_config
=0; /* Have we found an SMP box */
105 unsigned long cpu_present_map
= 0; /* Bitmask of physically existing CPUs */
106 unsigned long cpu_online_map
= 0; /* Bitmask of currently online CPUs */
107 int smp_num_cpus
= 0; /* Total count of live CPUs */
108 int smp_threads_ready
=0; /* Set when the idlers are all forked */
109 volatile int cpu_number_map
[NR_CPUS
]; /* which CPU maps to which logical number */
110 volatile int __cpu_logical_map
[NR_CPUS
]; /* which logical number maps to which CPU */
111 static volatile unsigned long cpu_callin_map
[NR_CPUS
] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
112 static volatile unsigned long cpu_callout_map
[NR_CPUS
] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
113 volatile unsigned long smp_invalidate_needed
; /* Used for the invalidate map that's also checked in the spinlock */
114 volatile unsigned long kstack_ptr
; /* Stack vector for booting CPUs */
115 struct cpuinfo_x86 cpu_data
[NR_CPUS
]; /* Per CPU bogomips and other parameters */
116 static unsigned int num_processors
= 1; /* Internal processor count */
117 unsigned long mp_ioapic_addr
= 0xFEC00000; /* Address of the I/O apic (not yet used) */
118 unsigned char boot_cpu_id
= 0; /* Processor that is doing the boot up */
119 static int smp_activated
= 0; /* Tripped once we need to start cross invalidating */
120 int apic_version
[NR_CPUS
]; /* APIC version number */
121 unsigned long apic_retval
; /* Just debugging the assembler.. */
123 volatile unsigned long kernel_counter
=0; /* Number of times the processor holds the lock */
124 volatile unsigned long syscall_count
=0; /* Number of times the processor holds the syscall lock */
126 volatile unsigned long ipi_count
; /* Number of IPIs delivered */
128 const char lk_lockmsg
[] = "lock from interrupt context at %p\n";
130 int mp_bus_id_to_type
[MAX_MP_BUSSES
] = { -1, };
131 extern int nr_ioapics
;
132 extern struct mpc_config_ioapic mp_apics
[MAX_IO_APICS
];
133 extern int mp_irq_entries
;
134 extern struct mpc_config_intsrc mp_irqs
[MAX_IRQ_SOURCES
];
135 extern int mpc_default_type
;
136 int mp_bus_id_to_pci_bus
[MAX_MP_BUSSES
] = { -1, };
137 int mp_current_pci_id
= 0;
138 unsigned long mp_lapic_addr
= 0;
139 int skip_ioapic_setup
= 0; /* 1 if "noapic" boot option passed */
141 /* #define SMP_DEBUG */
144 #define SMP_PRINTK(x) printk x
146 #define SMP_PRINTK(x)
150 * IA s/w dev Vol 3, Section 7.4
152 #define APIC_DEFAULT_PHYS_BASE 0xfee00000
154 #define CLEAR_TSC wrmsr(0x10, 0x00001000, 0x00001000)
157 * Setup routine for controlling SMP activation
159 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
160 * activation entirely (the MPS table probe still happens, though).
162 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
163 * greater than 0, limits the maximum number of CPUs activated in
167 void __init
smp_setup(char *str
, int *ints
)
169 if (ints
&& ints
[0] > 0)
175 void ack_APIC_irq(void)
180 apic_read(APIC_SPIV
);
182 /* Docs say use 0 for future compatibility */
183 apic_write(APIC_EOI
, 0);
187 * Intel MP BIOS table parsing routines:
190 #ifndef CONFIG_X86_VISWS_APIC
192 * Checksum an MP configuration block.
195 static int mpf_checksum(unsigned char *mp
, int len
)
204 * Processor encoding in an MP configuration block
207 static char *mpc_family(int family
,int model
)
210 static char *model_defs
[]=
213 "80486SX","80486DX/2 or 80487",
214 "80486SL","Intel5X2(tm)",
219 return("Pentium(tm) Pro");
221 return("Pentium(tm)");
222 if (family
==0x0F && model
==0x0F)
223 return("Special controller");
224 if (family
==0x04 && model
<9)
225 return model_defs
[model
];
226 sprintf(n
,"Unknown CPU [%d:%d]",family
, model
);
235 static int __init
smp_read_mpc(struct mp_config_table
*mpc
)
238 int count
=sizeof(*mpc
);
240 unsigned char *mpt
=((unsigned char *)mpc
)+count
;
242 if (memcmp(mpc
->mpc_signature
,MPC_SIGNATURE
,4))
244 panic("SMP mptable: bad signature [%c%c%c%c]!\n",
245 mpc
->mpc_signature
[0],
246 mpc
->mpc_signature
[1],
247 mpc
->mpc_signature
[2],
248 mpc
->mpc_signature
[3]);
251 if (mpf_checksum((unsigned char *)mpc
,mpc
->mpc_length
))
253 panic("SMP mptable: checksum error!\n");
256 if (mpc
->mpc_spec
!=0x01 && mpc
->mpc_spec
!=0x04)
258 printk("Bad Config Table version (%d)!!\n",mpc
->mpc_spec
);
261 memcpy(str
,mpc
->mpc_oem
,8);
263 printk("OEM ID: %s ",str
);
265 memcpy(str
,mpc
->mpc_productid
,12);
267 printk("Product ID: %s ",str
);
269 printk("APIC at: 0x%lX\n",mpc
->mpc_lapic
);
271 /* save the local APIC address, it might be non-default */
272 mp_lapic_addr
= mpc
->mpc_lapic
;
275 * Now process the configuration blocks.
278 while(count
<mpc
->mpc_length
)
284 struct mpc_config_processor
*m
=
285 (struct mpc_config_processor
*)mpt
;
286 if (m
->mpc_cpuflag
&CPU_ENABLED
)
288 printk("Processor #%d %s APIC version %d\n",
290 mpc_family((m
->mpc_cpufeature
&
296 if (m
->mpc_featureflag
&(1<<0))
297 printk(" Floating point unit present.\n");
298 if (m
->mpc_featureflag
&(1<<7))
299 printk(" Machine Exception supported.\n");
300 if (m
->mpc_featureflag
&(1<<8))
301 printk(" 64 bit compare & exchange supported.\n");
302 if (m
->mpc_featureflag
&(1<<9))
303 printk(" Internal APIC present.\n");
305 if (m
->mpc_cpuflag
&CPU_BOOTPROCESSOR
)
307 SMP_PRINTK((" Bootup CPU\n"));
308 boot_cpu_id
=m
->mpc_apicid
;
310 else /* Boot CPU already counted */
313 if (m
->mpc_apicid
>NR_CPUS
)
314 printk("Processor #%d unused. (Max %d processors).\n",m
->mpc_apicid
, NR_CPUS
);
317 int ver
= m
->mpc_apicver
;
319 cpu_present_map
|=(1<<m
->mpc_apicid
);
324 printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m
->mpc_apicid
);
327 apic_version
[m
->mpc_apicid
] = ver
;
336 struct mpc_config_bus
*m
=
337 (struct mpc_config_bus
*)mpt
;
338 memcpy(str
,m
->mpc_bustype
,6);
340 SMP_PRINTK(("Bus #%d is %s\n",
343 if (strncmp(m
->mpc_bustype
,"ISA",3) == 0)
344 mp_bus_id_to_type
[m
->mpc_busid
] =
347 if (strncmp(m
->mpc_bustype
,"EISA",4) == 0)
348 mp_bus_id_to_type
[m
->mpc_busid
] =
350 if (strncmp(m
->mpc_bustype
,"PCI",3) == 0) {
351 mp_bus_id_to_type
[m
->mpc_busid
] =
353 mp_bus_id_to_pci_bus
[m
->mpc_busid
] =
363 struct mpc_config_ioapic
*m
=
364 (struct mpc_config_ioapic
*)mpt
;
365 if (m
->mpc_flags
&MPC_APIC_USABLE
)
368 printk("I/O APIC #%d Version %d at 0x%lX.\n",
369 m
->mpc_apicid
,m
->mpc_apicver
,
371 mp_apics
[nr_ioapics
] = *m
;
372 if (++nr_ioapics
> MAX_IO_APICS
)
381 struct mpc_config_intsrc
*m
=
382 (struct mpc_config_intsrc
*)mpt
;
384 mp_irqs
[mp_irq_entries
] = *m
;
385 if (++mp_irq_entries
== MAX_IRQ_SOURCES
) {
386 printk("Max irq sources exceeded!!\n");
387 printk("Skipping remaining sources.\n");
397 struct mpc_config_intlocal
*m
=
398 (struct mpc_config_intlocal
*)mpt
;
405 if (ioapics
> MAX_IO_APICS
)
407 printk("Warning: Max I/O APICs exceeded (max %d, found %d).\n", MAX_IO_APICS
, ioapics
);
408 printk("Warning: switching to non APIC mode.\n");
411 return num_processors
;
415 * Scan the memory blocks for an SMP configuration block.
418 static int __init
smp_scan_config(unsigned long base
, unsigned long length
)
420 unsigned long *bp
=phys_to_virt(base
);
421 struct intel_mp_floating
*mpf
;
423 SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
425 if (sizeof(*mpf
)!=16)
426 printk("Error: MPF size\n");
430 if (*bp
==SMP_MAGIC_IDENT
)
432 mpf
=(struct intel_mp_floating
*)bp
;
433 if (mpf
->mpf_length
==1 &&
434 !mpf_checksum((unsigned char *)bp
,16) &&
435 (mpf
->mpf_specification
== 1
436 || mpf
->mpf_specification
== 4) )
438 printk("Intel MultiProcessor Specification v1.%d\n", mpf
->mpf_specification
);
439 if (mpf
->mpf_feature2
&(1<<7))
440 printk(" IMCR and PIC compatibility mode.\n");
442 printk(" Virtual Wire compatibility mode.\n");
445 * Now see if we need to read further.
447 if (mpf
->mpf_feature1
!=0)
451 /* local APIC has default address */
452 mp_lapic_addr
= APIC_DEFAULT_PHYS_BASE
;
454 * We need to know what the local
455 * APIC id of the boot CPU is!
460 * HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
462 * It's not just a crazy hack. ;-)
465 * Standard page mapping
466 * functions don't work yet.
467 * We know that page 0 is not
468 * used. Steal it for now!
472 pg0
[0] = (mp_lapic_addr
| _PAGE_RW
| _PAGE_PRESENT
);
475 boot_cpu_id
= GET_APIC_ID(*((volatile unsigned long *) APIC_ID
));
486 * END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK
490 * 2 CPUs, numbered 0 & 1.
494 printk("I/O APIC at 0xFEC00000.\n");
497 * Save the default type number, we
498 * need it later to set the IO-APIC
501 mpc_default_type
= mpf
->mpf_feature1
;
503 printk("Bus #0 is ");
505 switch(mpf
->mpf_feature1
)
512 printk("EISA with no IRQ8 chaining\n");
525 printk("???\nUnknown standard configuration %d\n",
529 if (mpf
->mpf_feature1
>4)
531 printk("Bus #1 is PCI\n");
534 * Set local APIC version to
535 * the integrated form.
536 * It's initialized to zero
537 * otherwise, representing
538 * a discrete 82489DX.
540 apic_version
[0] = 0x10;
541 apic_version
[1] = 0x10;
544 * Read the physical hardware table.
545 * Anything here will override the
548 if (mpf
->mpf_physptr
)
549 smp_read_mpc((void *)mpf
->mpf_physptr
);
551 __cpu_logical_map
[0] = boot_cpu_id
;
552 global_irq_holder
= boot_cpu_id
;
553 current
->processor
= boot_cpu_id
;
555 printk("Processors: %d\n", num_processors
);
557 * Only use the first configuration found.
569 void __init
init_intel_smp (void)
572 * FIXME: Linux assumes you have 640K of base ram..
573 * this continues the error...
575 * 1) Scan the bottom 1K for a signature
576 * 2) Scan the top 1K of base RAM
577 * 3) Scan the 64K of bios
579 if (!smp_scan_config(0x0,0x400) &&
580 !smp_scan_config(639*0x400,0x400) &&
581 !smp_scan_config(0xF0000,0x10000)) {
583 * If it is an SMP machine we should know now, unless the
584 * configuration is in an EISA/MCA bus machine with an
585 * extended bios data area.
587 * there is a real-mode segmented pointer pointing to the
588 * 4K EBDA area at 0x40E, calculate and scan it here.
590 * NOTE! There are Linux loaders that will corrupt the EBDA
591 * area, and as such this kind of SMP config may be less
592 * trustworthy, simply because the SMP table may have been
593 * stomped on during early boot. These loaders are buggy and
596 unsigned int address
;
598 address
= *(unsigned short *)phys_to_virt(0x40E);
600 smp_scan_config(address
, 0x1000);
601 if (smp_found_config
)
602 printk(KERN_WARNING
"WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n");
609 * The Visual Workstation is Intel MP compliant in the hardware
610 * sense, but it doesnt have a BIOS(-configuration table).
611 * No problem for Linux.
613 void __init
init_visws_smp(void)
615 smp_found_config
= 1;
617 cpu_present_map
|= 2; /* or in id 1 */
618 apic_version
[1] |= 0x10; /* integrated APIC */
619 apic_version
[0] |= 0x10;
621 mp_lapic_addr
= APIC_DEFAULT_PHYS_BASE
;
627 * - Intel MP Configuration Table
628 * - or SGI Visual Workstation configuration
630 void __init
init_smp_config (void)
642 * Trampoline 80x86 program as an array.
645 extern unsigned char trampoline_data
[];
646 extern unsigned char trampoline_end
[];
647 static unsigned char *trampoline_base
;
650 * Currently trivial. Write the real->protected mode
651 * bootstrap into the page concerned. The caller
652 * has made sure it's suitably aligned.
655 static unsigned long __init
setup_trampoline(void)
657 memcpy(trampoline_base
, trampoline_data
, trampoline_end
- trampoline_data
);
658 return virt_to_phys(trampoline_base
);
662 * We are called very early to get the low memory for the
663 * SMP bootup trampoline page.
665 unsigned long __init
smp_alloc_memory(unsigned long mem_base
)
667 if (virt_to_phys((void *)mem_base
) >= 0x9F000)
668 panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.", mem_base
);
669 trampoline_base
= (void *)mem_base
;
670 return mem_base
+ PAGE_SIZE
;
674 * The bootstrap kernel entry code has set these up. Save them for
678 void __init
smp_store_cpu_info(int id
)
680 struct cpuinfo_x86
*c
=&cpu_data
[id
];
685 c
->pgtable_cache_sz
= 0;
688 * Mask B, Pentium, but not Pentium MMX
690 if (c
->x86_vendor
== X86_VENDOR_INTEL
&&
692 c
->x86_mask
>= 1 && c
->x86_mask
<= 4 &&
694 smp_b_stepping
=1; /* Remember we have B step Pentia with bugs */
698 * Architecture specific routine called by the kernel just before init is
699 * fired off. This allows the BP to have everything in order [we hope].
700 * At the end of this all the APs will hit the system scheduling and off
701 * we go. Each AP will load the system gdt's and jump through the kernel
702 * init into idle(). At this point the scheduler will one day take over
703 * and give them jobs to do. smp_callin is a standard routine
704 * we use to track CPUs as they power up.
707 static atomic_t smp_commenced
= ATOMIC_INIT(0);
709 void __init
smp_commence(void)
712 * Lets the callins below out of their loop.
714 SMP_PRINTK(("Setting commenced=1, go go go\n"));
717 atomic_set(&smp_commenced
,1);
720 void __init
enable_local_APIC(void)
724 value
= apic_read(APIC_SPIV
);
725 value
|= (1<<8); /* Enable APIC (bit==1) */
726 value
&= ~(1<<9); /* Enable focus processor (bit==0) */
727 value
|= 0xff; /* Set spurious IRQ vector to 0xff */
728 apic_write(APIC_SPIV
,value
);
731 * Set Task Priority to 'accept all'
733 value
= apic_read(APIC_TASKPRI
);
734 value
&= ~APIC_TPRI_MASK
;
735 apic_write(APIC_TASKPRI
,value
);
738 * Clear the logical destination ID, just to be safe.
739 * also, put the APIC into flat delivery mode.
741 value
= apic_read(APIC_LDR
);
742 value
&= ~APIC_LDR_MASK
;
743 apic_write(APIC_LDR
,value
);
745 value
= apic_read(APIC_DFR
);
746 value
|= SET_APIC_DFR(0xf);
747 apic_write(APIC_DFR
, value
);
749 udelay(100); /* B safe */
752 unsigned long __init
init_smp_mappings(unsigned long memory_start
)
754 unsigned long apic_phys
;
756 memory_start
= PAGE_ALIGN(memory_start
);
757 if (smp_found_config
) {
758 apic_phys
= mp_lapic_addr
;
761 * set up a fake all zeroes page to simulate the
762 * local APIC and another one for the IO-APIC. We
763 * could use the real zero-page, but it's safer
764 * this way if some buggy code writes to this page ...
766 apic_phys
= __pa(memory_start
);
767 memset((void *)memory_start
, 0, PAGE_SIZE
);
768 memory_start
+= PAGE_SIZE
;
770 set_fixmap(FIX_APIC_BASE
,apic_phys
);
771 printk("mapped APIC to %08lx (%08lx)\n", APIC_BASE
, apic_phys
);
773 #ifdef CONFIG_X86_IO_APIC
775 unsigned long ioapic_phys
, idx
= FIX_IO_APIC_BASE_0
;
778 for (i
= 0; i
< nr_ioapics
; i
++) {
779 if (smp_found_config
) {
780 ioapic_phys
= mp_apics
[i
].mpc_apicaddr
;
782 ioapic_phys
= __pa(memory_start
);
783 memset((void *)memory_start
, 0, PAGE_SIZE
);
784 memory_start
+= PAGE_SIZE
;
786 set_fixmap(idx
,ioapic_phys
);
787 printk("mapped IOAPIC to %08lx (%08lx)\n",
788 __fix_to_virt(idx
), ioapic_phys
);
797 extern void calibrate_delay(void);
799 void __init
smp_callin(void)
802 unsigned long timeout
;
805 * (This works even if the APIC is not enabled.)
807 cpuid
= GET_APIC_ID(apic_read(APIC_ID
));
809 SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid
));
812 * STARTUP IPIs are fragile beasts as they might sometimes
813 * trigger some glue motherboard logic. Complete APIC bus
814 * silence for 1 second, this overestimates the time the
815 * boot CPU is spending to send the up to 2 STARTUP IPIs
816 * by a factor of two. This should be enough.
820 * Waiting 2s total for startup (udelay is not yet working)
822 timeout
= jiffies
+ 2*HZ
;
823 while (time_before(jiffies
,timeout
))
826 * Has the boot CPU finished it's STARTUP sequence?
828 if (test_bit(cpuid
, (unsigned long *)&cpu_callout_map
[0]))
832 while (!time_before(jiffies
,timeout
)) {
833 printk("BUG: CPU%d started up but did not get a callout!\n",
839 * the boot CPU has finished the init stage and is spinning
840 * on callin_map until we finish. We are free to set up this
841 * CPU, first the APIC. (this is probably redundant on most
845 SMP_PRINTK(("CALLIN, before enable_local_APIC().\n"));
849 * Set up our APIC timer.
856 /* Must be done before calibration delay is computed */
857 mtrr_init_secondary_cpu ();
863 SMP_PRINTK(("Stack at about %p\n",&cpuid
));
866 * Save our processor parameters
868 smp_store_cpu_info(cpuid
);
871 * Allow the master to continue.
873 set_bit(cpuid
, (unsigned long *)&cpu_callin_map
[0]);
878 extern int cpu_idle(void * unused
);
881 * Activate a secondary processor.
883 int __init
start_secondary(void *unused
)
886 * Dont put anything before smp_callin(), SMP
887 * booting is too fragile that we want to limit the
888 * things done here to the most necessary things.
892 while (!atomic_read(&smp_commenced
))
894 return cpu_idle(NULL
);
898 * Everything has been set up for the secondary
899 * CPUs - they just need to reload everything
900 * from the task structure
901 * This function must not return.
903 void __init
initialize_secondary(void)
906 * We don't actually need to load the full TSS,
907 * basically just the stack pointer and the eip.
914 :"r" (current
->thread
.esp
),"r" (current
->thread
.eip
));
922 static void __init
do_boot_cpu(int i
)
926 struct task_struct
*idle
;
927 unsigned long send_status
, accept_status
;
928 int timeout
, num_starts
, j
;
929 unsigned long start_eip
;
932 * We need an idle process for each processor.
934 kernel_thread(start_secondary
, NULL
, CLONE_PID
);
938 * We remove it from the pidhash and the runqueue
939 * once we got the process:
941 idle
= init_task
.prev_task
;
943 init_tasks
[cpucount
] = idle
;
945 panic("No idle process for CPU %d", i
);
948 __cpu_logical_map
[cpucount
] = i
;
949 cpu_number_map
[i
] = cpucount
;
950 idle
->has_cpu
= 1; /* we schedule the first task manually */
951 idle
->thread
.eip
= (unsigned long) start_secondary
;
953 del_from_runqueue(idle
);
954 unhash_process(idle
);
956 /* start_eip had better be page-aligned! */
957 start_eip
= setup_trampoline();
959 printk("Booting processor %d eip %lx\n", i
, start_eip
); /* So we see what's up */
960 stack_start
.esp
= (void *) (1024 + PAGE_SIZE
+ (char *)idle
);
963 * This grunge runs the startup process for
964 * the targeted processor.
967 SMP_PRINTK(("Setting warm reset code and vector.\n"));
969 CMOS_WRITE(0xa, 0xf);
971 SMP_PRINTK(("1.\n"));
972 *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip
>> 4;
973 SMP_PRINTK(("2.\n"));
974 *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip
& 0xf;
975 SMP_PRINTK(("3.\n"));
977 maincfg
=swapper_pg_dir
[0];
978 ((unsigned long *)swapper_pg_dir
)[0]=0x102007;
981 * Be paranoid about clearing APIC errors.
984 if ( apic_version
[i
] & 0xF0 )
986 apic_write(APIC_ESR
, 0);
987 accept_status
= (apic_read(APIC_ESR
) & 0xEF);
991 * Status is now clean
998 * Starting actual IPI sequence...
1001 SMP_PRINTK(("Asserting INIT.\n"));
1007 cfg
=apic_read(APIC_ICR2
);
1009 apic_write(APIC_ICR2
, cfg
|SET_APIC_DEST_FIELD(i
)); /* Target chip */
1010 cfg
=apic_read(APIC_ICR
);
1011 cfg
&=~0xCDFFF; /* Clear bits */
1012 cfg
|= (APIC_DEST_LEVELTRIG
| APIC_DEST_ASSERT
| APIC_DEST_DM_INIT
);
1013 apic_write(APIC_ICR
, cfg
); /* Send IPI */
1016 SMP_PRINTK(("Deasserting INIT.\n"));
1018 cfg
=apic_read(APIC_ICR2
);
1020 apic_write(APIC_ICR2
, cfg
|SET_APIC_DEST_FIELD(i
)); /* Target chip */
1021 cfg
=apic_read(APIC_ICR
);
1022 cfg
&=~0xCDFFF; /* Clear bits */
1023 cfg
|= (APIC_DEST_LEVELTRIG
| APIC_DEST_DM_INIT
);
1024 apic_write(APIC_ICR
, cfg
); /* Send IPI */
1027 * Should we send STARTUP IPIs ?
1029 * Determine this based on the APIC version.
1030 * If we don't have an integrated APIC, don't
1031 * send the STARTUP IPIs.
1034 if ( apic_version
[i
] & 0xF0 )
1040 * Run STARTUP IPI loop.
1043 for (j
= 1; !(send_status
|| accept_status
)
1044 && (j
<= num_starts
) ; j
++)
1046 SMP_PRINTK(("Sending STARTUP #%d.\n",j
));
1047 apic_write(APIC_ESR
, 0);
1048 SMP_PRINTK(("After apic_write.\n"));
1054 cfg
=apic_read(APIC_ICR2
);
1056 apic_write(APIC_ICR2
, cfg
|SET_APIC_DEST_FIELD(i
)); /* Target chip */
1057 cfg
=apic_read(APIC_ICR
);
1058 cfg
&=~0xCDFFF; /* Clear bits */
1059 cfg
|= (APIC_DEST_DM_STARTUP
| (start_eip
>> 12)); /* Boot on the stack */
1060 SMP_PRINTK(("Before start apic_write.\n"));
1061 apic_write(APIC_ICR
, cfg
); /* Kick the second */
1063 SMP_PRINTK(("Startup point 1.\n"));
1066 SMP_PRINTK(("Waiting for send to finish...\n"));
1070 send_status
= apic_read(APIC_ICR
) & 0x1000;
1071 } while (send_status
&& (timeout
++ < 1000));
1074 * Give the other CPU some time to accept the IPI.
1077 accept_status
= (apic_read(APIC_ESR
) & 0xEF);
1079 SMP_PRINTK(("After Startup.\n"));
1081 if (send_status
) /* APIC never delivered?? */
1082 printk("APIC never delivered???\n");
1083 if (accept_status
) /* Send accept error */
1084 printk("APIC delivery error (%lx).\n", accept_status
);
1086 if ( !(send_status
|| accept_status
) )
1089 * allow APs to start initializing.
1091 SMP_PRINTK(("Before Callout %d.\n", i
));
1092 set_bit(i
, (unsigned long *)&cpu_callout_map
[0]);
1093 SMP_PRINTK(("After Callout %d.\n", i
));
1095 for(timeout
=0;timeout
<50000;timeout
++)
1097 if (cpu_callin_map
[0]&(1<<i
))
1098 break; /* It has booted */
1099 udelay(100); /* Wait 5s total for a response */
1101 if (cpu_callin_map
[0]&(1<<i
))
1103 /* number CPUs logically, starting from 1 (BSP is 0) */
1105 cpu_number_map
[i
] = cpucount
;
1106 __cpu_logical_map
[cpucount
] = i
;
1109 printk("CPU%d: ", i
);
1110 print_cpu_info(&cpu_data
[i
]);
1114 if (*((volatile unsigned char *)phys_to_virt(8192))==0xA5)
1115 printk("Stuck ??\n");
1117 printk("Not responding.\n");
1119 SMP_PRINTK(("CPU has booted.\n"));
1123 __cpu_logical_map
[cpucount
] = -1;
1124 cpu_number_map
[i
] = -1;
1128 swapper_pg_dir
[0]=maincfg
;
1131 /* mark "stuck" area as not stuck */
1132 *((volatile unsigned long *)phys_to_virt(8192)) = 0;
1135 cycles_t cacheflush_time
;
1136 extern unsigned long cpu_hz
;
1138 static void smp_tune_scheduling (void)
1140 unsigned long cachesize
;
1142 * Rough estimation for SMP scheduling, this is the number of
1143 * cycles it takes for a fully memory-limited process to flush
1144 * the SMP-local cache.
1146 * (For a P5 this pretty much means we will choose another idle
1147 * CPU almost always at wakeup time (this is due to the small
1148 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1154 * this basically disables processor-affinity
1155 * scheduling on SMP without a TSC.
1157 cacheflush_time
= 0;
1160 cachesize
= boot_cpu_data
.x86_cache_size
;
1161 if (cachesize
== -1)
1162 cachesize
= 8; /* Pentiums */
1164 cacheflush_time
= cpu_hz
/1024*cachesize
/5000;
1167 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
1168 (long)cacheflush_time
/(cpu_hz
/1000000),
1169 ((long)cacheflush_time
*100/(cpu_hz
/1000000)) % 100);
1172 unsigned int prof_multiplier
[NR_CPUS
];
1173 unsigned int prof_old_multiplier
[NR_CPUS
];
1174 unsigned int prof_counter
[NR_CPUS
];
1177 * Cycle through the processors sending APIC IPIs to boot each.
1180 void __init
smp_boot_cpus(void)
1185 /* Must be done before other processors booted */
1186 mtrr_init_boot_cpu ();
1189 * Initialize the logical to physical CPU number mapping
1190 * and the per-CPU profiling counter/multiplier
1193 for (i
= 0; i
< NR_CPUS
; i
++) {
1194 cpu_number_map
[i
] = -1;
1195 prof_counter
[i
] = 1;
1196 prof_old_multiplier
[i
] = 1;
1197 prof_multiplier
[i
] = 1;
1201 * Setup boot CPU information
1204 smp_store_cpu_info(boot_cpu_id
); /* Final full version of the data */
1205 smp_tune_scheduling();
1206 printk("CPU%d: ", boot_cpu_id
);
1207 print_cpu_info(&cpu_data
[boot_cpu_id
]);
1210 * not necessary because the MP table should list the boot
1211 * CPU too, but we do it for the sake of robustness anyway.
1212 * (and for the case when a non-SMP board boots an SMP kernel)
1214 cpu_present_map
|= (1 << hard_smp_processor_id());
1216 cpu_number_map
[boot_cpu_id
] = 0;
1221 * If we couldnt find an SMP configuration at boot time,
1222 * get out of here now!
1225 if (!smp_found_config
)
1227 printk(KERN_NOTICE
"SMP motherboard not detected. Using dummy APIC emulation.\n");
1228 #ifndef CONFIG_VISWS
1231 cpu_online_map
= cpu_present_map
;
1236 * If SMP should be disabled, then really disable it!
1241 smp_found_config
= 0;
1242 printk(KERN_INFO
"SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1250 * This is to verify that we're looking at
1251 * a real local APIC. Check these against
1252 * your board if the CPUs aren't getting
1253 * started for no apparent reason.
1256 reg
= apic_read(APIC_VERSION
);
1257 SMP_PRINTK(("Getting VERSION: %x\n", reg
));
1259 apic_write(APIC_VERSION
, 0);
1260 reg
= apic_read(APIC_VERSION
);
1261 SMP_PRINTK(("Getting VERSION: %x\n", reg
));
1264 * The two version reads above should print the same
1265 * NON-ZERO!!! numbers. If the second one is zero,
1266 * there is a problem with the APIC write/read
1269 * The next two are just to see if we have sane values.
1270 * They're only really relevant if we're in Virtual Wire
1271 * compatibility mode, but most boxes are anymore.
1275 reg
= apic_read(APIC_LVT0
);
1276 SMP_PRINTK(("Getting LVT0: %x\n", reg
));
1278 reg
= apic_read(APIC_LVT1
);
1279 SMP_PRINTK(("Getting LVT1: %x\n", reg
));
1283 enable_local_APIC();
1286 * Set up our local APIC timer:
1288 setup_APIC_clock ();
1291 * Now scan the CPU present map and fire up the other CPUs.
1295 * Add all detected CPUs. (later on we can down individual
1296 * CPUs which will change cpu_online_map but not necessarily
1297 * cpu_present_map. We are pretty much ready for hot-swap CPUs.)
1299 cpu_online_map
= cpu_present_map
;
1302 SMP_PRINTK(("CPU map: %lx\n", cpu_present_map
));
1304 for(i
=0;i
<NR_CPUS
;i
++)
1307 * Don't even attempt to start the boot CPU!
1309 if (i
== boot_cpu_id
)
1312 if ((cpu_online_map
& (1 << i
))
1313 && (max_cpus
< 0 || max_cpus
> cpucount
+1))
1319 * Make sure we unmap all failed CPUs
1322 if (cpu_number_map
[i
] == -1 && (cpu_online_map
& (1 << i
))) {
1323 printk("CPU #%d not responding. Removing from cpu_online_map.\n",i
);
1324 cpu_online_map
&= ~(1 << i
);
1329 * Cleanup possible dangling ends...
1332 #ifndef CONFIG_VISWS
1337 * Install writable page 0 entry.
1340 pg0
[0] = _PAGE_RW
| _PAGE_PRESENT
; /* writeable, present, addr 0 */
1344 * Paranoid: Set warm reset code and vector here back
1345 * to default values.
1350 *((volatile long *) phys_to_virt(0x467)) = 0;
1353 * Restore old page 0 entry.
1362 * Allow the user to impress friends.
1365 SMP_PRINTK(("Before bogomips.\n"));
1367 printk(KERN_ERR
"Error: only one processor found.\n");
1368 cpu_online_map
= (1<<hard_smp_processor_id());
1370 unsigned long bogosum
= 0;
1371 for(i
= 0; i
< 32; i
++)
1372 if (cpu_online_map
&(1<<i
))
1373 bogosum
+=cpu_data
[i
].loops_per_sec
;
1374 printk(KERN_INFO
"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1376 (bogosum
+2500)/500000,
1377 ((bogosum
+2500)/5000)%100);
1378 SMP_PRINTK(("Before bogocount - setting activated=1.\n"));
1381 smp_num_cpus
= cpucount
+ 1;
1384 printk(KERN_WARNING
"WARNING: SMP operation may be unreliable with B stepping processors.\n");
1385 SMP_PRINTK(("Boot done.\n"));
1388 * now we know the other CPUs have fired off and we know our
1389 * APIC ID, so we can go init the TSS and stuff:
1393 cache_APIC_registers();
1394 #ifndef CONFIG_VISWS
1396 * Here we can be sure that there is an IO-APIC in the system. Let's
1399 if (!skip_ioapic_setup
)
1408 * the following functions deal with sending IPIs between CPUs.
1410 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
1415 * Silly serialization to work around CPU bug in P5s.
1416 * We can safely turn it off on a 686.
1418 #ifdef CONFIG_X86_GOOD_APIC
1419 # define FORCE_APIC_SERIALIZATION 0
1421 # define FORCE_APIC_SERIALIZATION 1
1424 static unsigned int cached_APIC_ICR
;
1425 static unsigned int cached_APIC_ICR2
;
1428 * Caches reserved bits, APIC reads are (mildly) expensive
1429 * and force otherwise unnecessary CPU synchronization.
1431 * (We could cache other APIC registers too, but these are the
1432 * main ones used in RL.)
1434 #define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
1435 #define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
1437 void cache_APIC_registers (void)
1439 cached_APIC_ICR
= slow_ICR
;
1440 cached_APIC_ICR2
= slow_ICR2
;
1444 static inline unsigned int __get_ICR (void)
1446 #if FORCE_APIC_SERIALIZATION
1448 * Wait for the APIC to become ready - this should never occur. It's
1449 * a debugging check really.
1454 while (count
< 1000)
1457 if (!(cfg
&(1<<12))) {
1459 atomic_add(count
, (atomic_t
*)&ipi_count
);
1465 printk("CPU #%d: previous IPI still not cleared after 10mS\n",
1466 smp_processor_id());
1469 return cached_APIC_ICR
;
1473 static inline unsigned int __get_ICR2 (void)
1475 #if FORCE_APIC_SERIALIZATION
1478 return cached_APIC_ICR2
;
1482 static inline int __prepare_ICR (unsigned int shortcut
, int vector
)
1487 cfg
|= APIC_DEST_DM_FIXED
|shortcut
|vector
;
1492 static inline int __prepare_ICR2 (unsigned int dest
)
1497 cfg
|= SET_APIC_DEST_FIELD(dest
);
1502 static inline void __send_IPI_shortcut(unsigned int shortcut
, int vector
)
1506 * Subtle. In the case of the 'never do double writes' workaround we
1507 * have to lock out interrupts to be safe. Otherwise it's just one
1508 * single atomic write to the APIC, no need for cli/sti.
1510 #if FORCE_APIC_SERIALIZATION
1511 unsigned long flags
;
1513 __save_flags(flags
);
1518 * No need to touch the target chip field
1521 cfg
= __prepare_ICR(shortcut
, vector
);
1524 * Send the IPI. The write to APIC_ICR fires this off.
1526 apic_write(APIC_ICR
, cfg
);
1527 #if FORCE_APIC_SERIALIZATION
1528 __restore_flags(flags
);
1532 static inline void send_IPI_allbutself(int vector
)
1534 __send_IPI_shortcut(APIC_DEST_ALLBUT
, vector
);
1537 static inline void send_IPI_all(int vector
)
1539 __send_IPI_shortcut(APIC_DEST_ALLINC
, vector
);
1542 void send_IPI_self(int vector
)
1544 __send_IPI_shortcut(APIC_DEST_SELF
, vector
);
1547 static inline void send_IPI_single(int dest
, int vector
)
1550 #if FORCE_APIC_SERIALIZATION
1551 unsigned long flags
;
1553 __save_flags(flags
);
1558 * prepare target chip field
1561 cfg
= __prepare_ICR2(dest
);
1562 apic_write(APIC_ICR2
, cfg
);
1567 cfg
= __prepare_ICR(0, vector
);
1570 * Send the IPI. The write to APIC_ICR fires this off.
1572 apic_write(APIC_ICR
, cfg
);
1573 #if FORCE_APIC_SERIALIZATION
1574 __restore_flags(flags
);
1579 * This is fraught with deadlocks. Probably the situation is not that
1580 * bad as in the early days of SMP, so we might ease some of the
1584 void smp_flush_tlb(void)
1586 int cpu
= smp_processor_id();
1588 unsigned long flags
;
1591 * it's important that we do not generate any APIC traffic
1592 * until the AP CPUs have booted up!
1594 if (cpu_online_map
) {
1596 * The assignment is safe because it's volatile so the
1597 * compiler cannot reorder it, because the i586 has
1598 * strict memory ordering and because only the kernel
1599 * lock holder may issue a tlb flush. If you break any
1600 * one of those three change this to an atomic bus
1604 smp_invalidate_needed
= cpu_online_map
;
1607 * Processors spinning on some lock with IRQs disabled
1608 * will see this IRQ late. The smp_invalidate_needed
1609 * map will ensure they don't do a spurious flush tlb
1613 __save_flags(flags
);
1616 send_IPI_allbutself(INVALIDATE_TLB_VECTOR
);
1619 * Spin waiting for completion
1623 while (smp_invalidate_needed
) {
1625 * Take care of "crossing" invalidates
1627 if (test_bit(cpu
, &smp_invalidate_needed
))
1628 clear_bit(cpu
, &smp_invalidate_needed
);
1631 printk("stuck on TLB IPI wait (CPU#%d)\n",cpu
);
1635 __restore_flags(flags
);
1639 * Flush the local TLB
1647 * this function sends a 'reschedule' IPI to another CPU.
1648 * it goes straight through and wastes no time serializing
1649 * anything. Worst case is that we lose a reschedule ...
1652 void smp_send_reschedule(int cpu
)
1654 send_IPI_single(cpu
, RESCHEDULE_VECTOR
);
1658 * this function sends a 'stop' IPI to all other CPUs in the system.
1659 * it goes straight through.
1662 void smp_send_stop(void)
1664 send_IPI_allbutself(STOP_CPU_VECTOR
);
1667 /* Structure and data for smp_call_function(). This is designed to minimise
1668 * static memory requirements. It also looks cleaner.
1670 struct smp_call_function_struct
{
1671 void (*func
) (void *info
);
1673 atomic_t unstarted_count
;
1674 atomic_t unfinished_count
;
1677 static volatile struct smp_call_function_struct
*smp_call_function_data
= NULL
;
1680 * this function sends a 'generic call function' IPI to all other CPUs
1684 int smp_call_function (void (*func
) (void *info
), void *info
, int retry
,
1686 /* [SUMMARY] Run a function on all other CPUs.
1687 <func> The function to run. This must be fast and non-blocking.
1688 <info> An arbitrary pointer to pass to the function.
1689 <retry> If true, keep retrying until ready.
1690 <wait> If true, wait until function has completed on other CPUs.
1691 [RETURNS] 0 on success, else a negative status code. Does not return until
1692 remote CPUs are nearly ready to execute <<func>> or are or have executed.
1695 unsigned long timeout
;
1696 struct smp_call_function_struct data
;
1697 static spinlock_t lock
= SPIN_LOCK_UNLOCKED
;
1701 if (smp_call_function_data
) {
1702 schedule (); /* Give a mate a go */
1706 if (smp_call_function_data
) {
1707 spin_unlock (&lock
); /* Bad luck */
1710 /* Mine, all mine! */
1715 if (smp_call_function_data
) return -EBUSY
;
1717 if (smp_call_function_data
) {
1718 spin_unlock (&lock
);
1722 smp_call_function_data
= &data
;
1723 spin_unlock (&lock
);
1726 atomic_set (&data
.unstarted_count
, smp_num_cpus
- 1);
1728 if (wait
) atomic_set (&data
.unfinished_count
, smp_num_cpus
- 1);
1729 /* Send a message to all other CPUs and wait for them to respond */
1730 send_IPI_allbutself (CALL_FUNCTION_VECTOR
);
1731 /* Wait for response */
1732 timeout
= jiffies
+ JIFFIE_TIMEOUT
;
1733 while ( (atomic_read (&data
.unstarted_count
) > 0) &&
1734 time_before (jiffies
, timeout
) )
1736 if (atomic_read (&data
.unstarted_count
) > 0) {
1737 smp_call_function_data
= NULL
;
1741 while (atomic_read (&data
.unfinished_count
) > 0)
1743 smp_call_function_data
= NULL
;
1747 static unsigned int calibration_result
;
1749 void setup_APIC_timer(unsigned int clocks
);
1752 * Local timer interrupt handler. It does both profiling and
1753 * process statistics/rescheduling.
1755 * We do profiling in every local tick, statistics/rescheduling
1756 * happen only every 'profiling multiplier' ticks. The default
1757 * multiplier is 1 and it can be changed by writing the new multiplier
1758 * value into /proc/profile.
1761 void smp_local_timer_interrupt(struct pt_regs
* regs
)
1763 int user
= (user_mode(regs
) != 0);
1764 int cpu
= smp_processor_id();
1767 * The profiling function is SMP safe. (nothing can mess
1768 * around with "current", and the profiling counters are
1769 * updated with atomic operations). This is especially
1770 * useful with a profiling multiplier != 1
1773 x86_do_profile(regs
->eip
);
1775 if (!--prof_counter
[cpu
]) {
1776 int system
= 1 - user
;
1777 struct task_struct
* p
= current
;
1780 * The multiplier may have changed since the last time we got
1781 * to this point as a result of the user writing to
1782 * /proc/profile. In this case we need to adjust the APIC
1783 * timer accordingly.
1785 * Interrupts are already masked off at this point.
1787 prof_counter
[cpu
] = prof_multiplier
[cpu
];
1788 if (prof_counter
[cpu
] != prof_old_multiplier
[cpu
]) {
1789 setup_APIC_timer(calibration_result
/prof_counter
[cpu
]);
1790 prof_old_multiplier
[cpu
] = prof_counter
[cpu
];
1794 * After doing the above, we need to make like
1795 * a normal interrupt - otherwise timer interrupts
1796 * ignore the global interrupt lock, which is the
1797 * WrongThing (tm) to do.
1801 update_one_process(p
, 1, user
, system
, cpu
);
1804 if (p
->counter
<= 0) {
1806 p
->need_resched
= 1;
1808 if (p
->priority
< DEF_PRIORITY
) {
1809 kstat
.cpu_nice
+= user
;
1810 kstat
.per_cpu_nice
[cpu
] += user
;
1812 kstat
.cpu_user
+= user
;
1813 kstat
.per_cpu_user
[cpu
] += user
;
1815 kstat
.cpu_system
+= system
;
1816 kstat
.per_cpu_system
[cpu
] += system
;
1823 * We take the 'long' return path, and there every subsystem
1824 * grabs the apropriate locks (kernel lock/ irq lock).
1826 * we might want to decouple profiling from the 'long path',
1827 * and do the profiling totally in assembly.
1829 * Currently this isn't too much of an issue (performance wise),
1830 * we can take more than 100K local irqs per second on a 100 MHz P5.
1835 * Local APIC timer interrupt. This is the most natural way for doing
1836 * local interrupts, but local timer interrupts can be emulated by
1837 * broadcast interrupts too. [in case the hw doesnt support APIC timers]
1839 * [ if a single-CPU system runs an SMP kernel then we call the local
1840 * interrupt as well. Thus we cannot inline the local irq ... ]
1842 void smp_apic_timer_interrupt(struct pt_regs
* regs
)
1845 * NOTE! We'd better ACK the irq immediately,
1846 * because timer handling can be slow, and we
1847 * want to be able to accept NMI tlb invalidates
1851 smp_local_timer_interrupt(regs
);
1855 * Reschedule call back. Nothing to do,
1856 * all the work is done automatically when
1857 * we return from the interrupt.
1859 asmlinkage
void smp_reschedule_interrupt(void)
1865 * Invalidate call-back
1867 asmlinkage
void smp_invalidate_interrupt(void)
1869 if (test_and_clear_bit(smp_processor_id(), &smp_invalidate_needed
))
1876 static void stop_this_cpu (void)
1881 clear_bit(smp_processor_id(), &cpu_online_map
);
1883 if (cpu_data
[smp_processor_id()].hlt_works_ok
)
1884 for(;;) __asm__("hlt");
1889 * CPU halt call-back
1891 asmlinkage
void smp_stop_cpu_interrupt(void)
1896 asmlinkage
void smp_call_function_interrupt(void)
1898 void (*func
) (void *info
) = smp_call_function_data
->func
;
1899 void *info
= smp_call_function_data
->info
;
1900 int wait
= smp_call_function_data
->wait
;
1903 /* Notify initiating CPU that I've grabbed the data and am about to
1904 execute the function */
1905 atomic_dec (&smp_call_function_data
->unstarted_count
);
1906 /* At this point the structure may be out of scope unless wait==1 */
1908 if (wait
) atomic_dec (&smp_call_function_data
->unfinished_count
);
1912 * This interrupt should _never_ happen with our APIC/SMP architecture
1914 asmlinkage
void smp_spurious_interrupt(void)
1917 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1918 printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
1919 smp_processor_id());
1923 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
1924 * per second. We assume that the caller has already set up the local
1927 * The APIC timer is not exactly sync with the external timer chip, it
1928 * closely follows bus clocks.
1932 * The timer chip is already set up at HZ interrupts per second here,
1933 * but we do not accept timer interrupts yet. We only allow the BP
1936 static unsigned int __init
get_8254_timer_count(void)
1941 count
= inb_p(0x40);
1942 count
|= inb_p(0x40) << 8;
1948 * This function sets up the local APIC timer, with a timeout of
1949 * 'clocks' APIC bus clock. During calibration we actually call
1950 * this function twice, once with a bogus timeout value, second
1951 * time for real. The other (noncalibrating) CPUs call this
1952 * function only once, with the real value.
1954 * We are strictly in irqs off mode here, as we do not want to
1955 * get an APIC interrupt go off accidentally.
1957 * We do reads before writes even if unnecessary, to get around the
1958 * APIC double write bug.
1961 #define APIC_DIVISOR 16
1963 void setup_APIC_timer(unsigned int clocks
)
1965 unsigned long lvtt1_value
;
1966 unsigned int tmp_value
;
1969 * Unfortunately the local APIC timer cannot be set up into NMI
1970 * mode. With the IO APIC we can re-route the external timer
1971 * interrupt and broadcast it as an NMI to all CPUs, so no pain.
1973 tmp_value
= apic_read(APIC_LVTT
);
1974 lvtt1_value
= APIC_LVT_TIMER_PERIODIC
| LOCAL_TIMER_VECTOR
;
1975 apic_write(APIC_LVTT
, lvtt1_value
);
1978 * Divide PICLK by 16
1980 tmp_value
= apic_read(APIC_TDCR
);
1981 apic_write(APIC_TDCR
, (tmp_value
& ~APIC_TDR_DIV_1
)
1984 tmp_value
= apic_read(APIC_TMICT
);
1985 apic_write(APIC_TMICT
, clocks
/APIC_DIVISOR
);
1988 void __init
wait_8254_wraparound(void)
1990 unsigned int curr_count
, prev_count
=~0;
1993 curr_count
= get_8254_timer_count();
1996 prev_count
= curr_count
;
1997 curr_count
= get_8254_timer_count();
1998 delta
= curr_count
-prev_count
;
2001 * This limit for delta seems arbitrary, but it isn't, it's
2002 * slightly above the level of error a buggy Mercury/Neptune
2003 * chipset timer can cause.
2006 } while (delta
<300);
2010 * In this function we calibrate APIC bus clocks to the external
2011 * timer. Unfortunately we cannot use jiffies and the timer irq
2012 * to calibrate, since some later bootup code depends on getting
2013 * the first irq? Ugh.
2015 * We want to do the calibration only once since we
2016 * want to have local timer irqs syncron. CPUs connected
2017 * by the same APIC bus have the very same bus frequency.
2018 * And we want to have irqs off anyways, no accidental
2019 * APIC irq that way.
2022 int __init
calibrate_APIC_clock(void)
2024 unsigned long long t1
,t2
;
2026 long calibration_result
;
2029 printk("calibrating APIC timer ... ");
2032 * Put whatever arbitrary (but long enough) timeout
2033 * value into the APIC clock, we just want to get the
2034 * counter running for calibration.
2036 setup_APIC_timer(1000000000);
2039 * The timer chip counts down to zero. Let's wait
2040 * for a wraparound to start exact measurement:
2041 * (the current tick might have been already half done)
2044 wait_8254_wraparound ();
2047 * We wrapped around just now. Let's start:
2050 tt1
=apic_read(APIC_TMCCT
);
2052 #define LOOPS (HZ/10)
2054 * Let's wait LOOPS wraprounds:
2056 for (i
=0; i
<LOOPS
; i
++)
2057 wait_8254_wraparound ();
2059 tt2
=apic_read(APIC_TMCCT
);
2063 * The APIC bus clock counter is 32 bits only, it
2064 * might have overflown, but note that we use signed
2065 * longs, thus no extra care needed.
2067 * underflown to be exact, as the timer counts down ;)
2070 calibration_result
= (tt1
-tt2
)*APIC_DIVISOR
/LOOPS
;
2072 SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip tick.",
2073 (unsigned long)(t2
-t1
)/LOOPS
));
2075 SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer chip tick.",
2076 calibration_result
));
2079 printk("\n..... CPU clock speed is %ld.%04ld MHz.\n",
2080 ((long)(t2
-t1
)/LOOPS
)/(1000000/HZ
),
2081 ((long)(t2
-t1
)/LOOPS
)%(1000000/HZ
) );
2083 printk("..... system bus clock speed is %ld.%04ld MHz.\n",
2084 calibration_result
/(1000000/HZ
),
2085 calibration_result
%(1000000/HZ
) );
2088 return calibration_result
;
2091 void __init
setup_APIC_clock(void)
2093 unsigned long flags
;
2095 static volatile int calibration_lock
;
2097 __save_flags(flags
);
2100 SMP_PRINTK(("setup_APIC_clock() called.\n"));
2103 * [ setup_APIC_clock() is called from all CPUs, but we want
2104 * to do this part of the setup only once ... and it fits
2107 if (!test_and_set_bit(0,&calibration_lock
)) {
2109 calibration_result
=calibrate_APIC_clock();
2111 * Signal completion to the other CPU[s]:
2113 calibration_lock
= 3;
2117 * Other CPU is calibrating, wait for finish:
2119 SMP_PRINTK(("waiting for other CPU calibrating APIC ... "));
2120 while (calibration_lock
== 1);
2121 SMP_PRINTK(("done, continuing.\n"));
2125 * Now set up the timer for real.
2128 setup_APIC_timer (calibration_result
);
2131 * We ACK the APIC, just in case there is something pending.
2136 __restore_flags(flags
);
2140 * the frequency of the profiling timer can be changed
2141 * by writing a multiplier value into /proc/profile.
2143 int setup_profiling_timer(unsigned int multiplier
)
2148 * Sanity check. [at least 500 APIC cycles should be
2149 * between APIC interrupts as a rule of thumb, to avoid
2152 if ( (!multiplier
) || (calibration_result
/multiplier
< 500))
2156 * Set the new multiplier for each CPU. CPUs don't start using the
2157 * new values until the next timer interrupt in which they do process
2158 * accounting. At that time they also adjust their APIC timers
2161 for (i
= 0; i
< NR_CPUS
; ++i
)
2162 prof_multiplier
[i
] = multiplier
;