2 * Copyright (c) 1996, by Steve Passe
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
33 #include <sys/machintr.h>
34 #include <machine/globaldata.h>
35 #include <machine/clock.h>
36 #include <machine/limits.h>
37 #include <machine/smp.h>
38 #include <machine/md_var.h>
39 #include <machine/pmap.h>
40 #include <machine/specialreg.h>
41 #include <machine_base/apic/lapic.h>
42 #include <machine_base/apic/ioapic.h>
43 #include <machine_base/apic/ioapic_abi.h>
44 #include <machine_base/apic/apicvar.h>
45 #include <machine_base/icu/icu_var.h>
46 #include <machine/segments.h>
47 #include <sys/thread2.h>
48 #include <sys/spinlock2.h>
50 #include <machine/cputypes.h>
51 #include <machine/intr_machdep.h>
53 #if !defined(KTR_LAPIC)
54 #define KTR_LAPIC KTR_ALL
56 KTR_INFO_MASTER(lapic
);
57 KTR_INFO(KTR_LAPIC
, lapic
, eoi
, 0, "eoi");
58 #define log_lapic(name) KTR_LOG(lapic_ ## name)
62 volatile lapic_t
*lapic
;
64 static void lapic_timer_calibrate(void);
65 static void lapic_timer_set_divisor(int);
66 static void lapic_timer_fixup_handler(void *);
67 static void lapic_timer_restart_handler(void *);
70 static int lapic_timer_enable
= 1;
71 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable
);
73 static int lapic_timer_tscdeadline
= 1;
74 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline
);
76 static int lapic_calibrate_test
= 0;
77 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test
);
79 static int lapic_calibrate_fast
= 1;
80 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast
);
82 static void lapic_timer_tscdlt_reload(struct cputimer_intr
*, sysclock_t
);
83 static void lapic_timer_intr_reload(struct cputimer_intr
*, sysclock_t
);
84 static void lapic_timer_intr_enable(struct cputimer_intr
*);
85 static void lapic_timer_intr_restart(struct cputimer_intr
*);
86 static void lapic_timer_intr_pmfixup(struct cputimer_intr
*);
88 static struct cputimer_intr lapic_cputimer_intr
= {
90 .reload
= lapic_timer_intr_reload
,
91 .enable
= lapic_timer_intr_enable
,
92 .config
= cputimer_intr_default_config
,
93 .restart
= lapic_timer_intr_restart
,
94 .pmfixup
= lapic_timer_intr_pmfixup
,
95 .initclock
= cputimer_intr_default_initclock
,
97 .next
= SLIST_ENTRY_INITIALIZER
,
99 .type
= CPUTIMER_INTR_LAPIC
,
100 .prio
= CPUTIMER_INTR_PRIO_LAPIC
,
101 .caps
= CPUTIMER_INTR_CAP_NONE
,
105 static int lapic_timer_divisor_idx
= -1;
106 static const uint32_t lapic_timer_divisors
[] = {
107 APIC_TDCR_2
, APIC_TDCR_4
, APIC_TDCR_8
, APIC_TDCR_16
,
108 APIC_TDCR_32
, APIC_TDCR_64
, APIC_TDCR_128
, APIC_TDCR_1
110 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors))
112 static int lapic_use_tscdeadline
= 0;
113 /* The raw TSC frequency might not fit into a sysclock_t value. */
114 static int lapic_timer_tscfreq_shift
;
117 * APIC ID <-> CPU ID mapping structures.
119 int cpu_id_to_apic_id
[NAPICID
];
120 int apic_id_to_cpu_id
[NAPICID
];
121 int lapic_enable
= 1;
123 /* Separate cachelines for each cpu's info. */
126 uint64_t downcount_time
;
129 struct deadlines
*tsc_deadlines
= NULL
;
131 static void lapic_eoi_func(void);
133 void (*lapic_eoi
)(void);
136 * Enable LAPIC, configure interrupts.
139 lapic_init(boolean_t bsp
)
145 /* Decide whether we want to use TSC Deadline mode. */
146 if (lapic_timer_tscdeadline
!= 0 &&
147 (cpu_feature2
& CPUID2_TSCDLT
) &&
148 tsc_invariant
&& tsc_frequency
!= 0) {
149 lapic_use_tscdeadline
= 1;
150 tsc_deadlines
= kmalloc_cachealign(
151 sizeof(struct deadlines
) * (naps
+ 1),
152 M_DEVBUF
, M_WAITOK
| M_ZERO
);
159 * Since IDT is shared between BSP and APs, these vectors
160 * only need to be installed once; we do it on BSP.
163 if (cpu_vendor_id
== CPU_VENDOR_AMD
&&
164 CPUID_TO_FAMILY(cpu_id
) >= 0x0f &&
165 CPUID_TO_FAMILY(cpu_id
) < 0x17) { /* XXX */
169 * Set the LINTEN bit in the HyperTransport
170 * Transaction Control Register.
172 * This will cause EXTINT and NMI interrupts
173 * routed over the hypertransport bus to be
174 * fed into the LAPIC LINT0/LINT1. If the bit
175 * isn't set, the interrupts will go to the
176 * general cpu INTR/NMI pins. On a dual-core
177 * cpu the interrupt winds up going to BOTH cpus.
178 * The first cpu that does the interrupt ack
179 * cycle will get the correct interrupt. The
180 * second cpu that does it will get a spurious
181 * interrupt vector (typically IRQ 7).
184 (1 << 31) | /* enable */
185 (0 << 16) | /* bus */
186 (0x18 << 11) | /* dev (cpu + 0x18) */
187 (0 << 8) | /* func */
191 if ((tcr
& 0x00010000) == 0) {
192 kprintf("LAPIC: AMD LINTEN on\n");
193 outl(0xcfc, tcr
|0x00010000);
198 /* Install a 'Spurious INTerrupt' vector */
199 setidt_global(XSPURIOUSINT_OFFSET
, Xspuriousint
,
200 SDT_SYSIGT
, SEL_KPL
, 0);
202 /* Install a timer vector */
203 setidt_global(XTIMER_OFFSET
, Xtimer
,
204 SDT_SYSIGT
, SEL_KPL
, 0);
206 /* Install an inter-CPU IPI for TLB invalidation */
207 setidt_global(XINVLTLB_OFFSET
, Xinvltlb
,
208 SDT_SYSIGT
, SEL_KPL
, 0);
210 /* Install an inter-CPU IPI for IPIQ messaging */
211 setidt_global(XIPIQ_OFFSET
, Xipiq
,
212 SDT_SYSIGT
, SEL_KPL
, 0);
214 /* Install an inter-CPU IPI for CPU stop/restart */
215 setidt_global(XCPUSTOP_OFFSET
, Xcpustop
,
216 SDT_SYSIGT
, SEL_KPL
, 0);
218 /* Install an inter-CPU IPI for TLB invalidation */
219 setidt_global(XSNIFF_OFFSET
, Xsniff
,
220 SDT_SYSIGT
, SEL_KPL
, 0);
224 * Setup LINT0 as ExtINT on the BSP. This is theoretically an
225 * aggregate interrupt input from the 8259. The INTA cycle
226 * will be routed to the external controller (the 8259) which
227 * is expected to supply the vector.
229 * Must be setup edge triggered, active high.
231 * Disable LINT0 on BSP, if I/O APIC is enabled.
233 * Disable LINT0 on the APs. It doesn't matter what delivery
234 * mode we use because we leave it masked.
236 temp
= lapic
->lvt_lint0
;
237 temp
&= ~(APIC_LVT_MASKED
| APIC_LVT_TRIG_MASK
|
238 APIC_LVT_POLARITY_MASK
| APIC_LVT_DM_MASK
);
240 temp
|= APIC_LVT_DM_EXTINT
;
242 temp
|= APIC_LVT_MASKED
;
244 temp
|= APIC_LVT_DM_FIXED
| APIC_LVT_MASKED
;
246 lapic
->lvt_lint0
= temp
;
249 * Setup LINT1 as NMI.
251 * Must be setup edge trigger, active high.
253 * Enable LINT1 on BSP, if I/O APIC is enabled.
255 * Disable LINT1 on the APs.
257 temp
= lapic
->lvt_lint1
;
258 temp
&= ~(APIC_LVT_MASKED
| APIC_LVT_TRIG_MASK
|
259 APIC_LVT_POLARITY_MASK
| APIC_LVT_DM_MASK
);
260 temp
|= APIC_LVT_MASKED
| APIC_LVT_DM_NMI
;
261 if (bsp
&& ioapic_enable
)
262 temp
&= ~APIC_LVT_MASKED
;
263 lapic
->lvt_lint1
= temp
;
266 * Mask the LAPIC error interrupt, LAPIC performance counter
269 lapic
->lvt_error
= lapic
->lvt_error
| APIC_LVT_MASKED
;
270 lapic
->lvt_pcint
= lapic
->lvt_pcint
| APIC_LVT_MASKED
;
273 * Set LAPIC timer vector and mask the LAPIC timer interrupt.
275 timer
= lapic
->lvt_timer
;
276 timer
&= ~APIC_LVTT_VECTOR
;
277 timer
|= XTIMER_OFFSET
;
278 timer
|= APIC_LVTT_MASKED
;
279 lapic
->lvt_timer
= timer
;
282 * Set the Task Priority Register as needed. At the moment allow
283 * interrupts on all cpus (the APs will remain CLId until they are
287 temp
&= ~APIC_TPR_PRIO
; /* clear priority field */
293 if (cpu_vendor_id
== CPU_VENDOR_AMD
&&
294 (lapic
->version
& APIC_VER_AMD_EXT_SPACE
)) {
301 ext_feat
= lapic
->ext_feat
;
302 count
= (ext_feat
& APIC_EXTFEAT_MASK
) >> APIC_EXTFEAT_SHIFT
;
303 max_count
= sizeof(lapic
->ext_lvt
) / sizeof(lapic
->ext_lvt
[0]);
304 if (count
> max_count
)
306 for (i
= 0; i
< count
; ++i
) {
307 lvt
= lapic
->ext_lvt
[i
].lvt
;
309 lvt
&= ~(APIC_LVT_POLARITY_MASK
| APIC_LVT_TRIG_MASK
|
310 APIC_LVT_DM_MASK
| APIC_LVT_MASKED
);
311 lvt
|= APIC_LVT_MASKED
| APIC_LVT_DM_FIXED
;
314 case APIC_EXTLVT_IBS
:
316 case APIC_EXTLVT_MCA
:
318 case APIC_EXTLVT_DEI
:
320 case APIC_EXTLVT_SBI
:
326 kprintf(" LAPIC AMD elvt%d: 0x%08x",
327 i
, lapic
->ext_lvt
[i
].lvt
);
328 if (lapic
->ext_lvt
[i
].lvt
!= lvt
)
329 kprintf(" -> 0x%08x", lvt
);
332 lapic
->ext_lvt
[i
].lvt
= lvt
;
340 temp
|= APIC_SVR_ENABLE
; /* enable the LAPIC */
341 temp
&= ~APIC_SVR_FOCUS_DISABLE
; /* enable lopri focus processor */
343 if (lapic
->version
& APIC_VER_EOI_SUPP
) {
344 if (temp
& APIC_SVR_EOI_SUPP
) {
345 temp
&= ~APIC_SVR_EOI_SUPP
;
347 kprintf(" LAPIC disabling EOI supp\n");
352 * Set the spurious interrupt vector. The low 4 bits of the vector
355 if ((XSPURIOUSINT_OFFSET
& 0x0F) != 0x0F)
356 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET
);
357 temp
&= ~APIC_SVR_VECTOR
;
358 temp
|= XSPURIOUSINT_OFFSET
;
363 * Pump out a few EOIs to clean out interrupts that got through
364 * before we were able to set the TPR.
371 lapic_timer_calibrate();
372 if (lapic_timer_enable
) {
373 if (cpu_thermal_feature
& CPUID_THERMAL_ARAT
) {
375 * Local APIC timer will not stop
378 lapic_cputimer_intr
.caps
|=
379 CPUTIMER_INTR_CAP_PS
;
381 if (lapic_use_tscdeadline
) {
382 lapic_cputimer_intr
.reload
=
383 lapic_timer_tscdlt_reload
;
385 cputimer_intr_register(&lapic_cputimer_intr
);
386 cputimer_intr_select(&lapic_cputimer_intr
, 0);
388 } else if (!lapic_use_tscdeadline
) {
389 lapic_timer_set_divisor(lapic_timer_divisor_idx
);
393 apic_dump("apic_initialize()");
397 lapic_timer_set_divisor(int divisor_idx
)
399 KKASSERT(divisor_idx
>= 0 && divisor_idx
< APIC_TIMER_NDIVISORS
);
400 lapic
->dcr_timer
= lapic_timer_divisors
[divisor_idx
];
404 lapic_timer_oneshot(u_int count
)
408 value
= lapic
->lvt_timer
;
409 value
&= ~(APIC_LVTT_PERIODIC
| APIC_LVTT_TSCDLT
);
410 lapic
->lvt_timer
= value
;
411 lapic
->icr_timer
= count
;
415 lapic_timer_oneshot_quick(u_int count
)
417 lapic
->icr_timer
= count
;
421 lapic_timer_tscdeadline_quick(uint64_t diff
)
423 uint64_t val
= rdtsc() + diff
;
425 wrmsr(MSR_TSC_DEADLINE
, val
);
426 tsc_deadlines
[mycpuid
].timestamp
= val
;
430 lapic_scale_to_tsc(unsigned value
, unsigned scale
)
435 val
*= tsc_frequency
;
441 #define MAX_MEASURE_RETRIES 100
444 do_tsc_calibration(u_int us
, u_int64_t apic_delay_tsc
)
446 u_int64_t old_tsc1
, old_tsc2
, new_tsc1
, new_tsc2
;
447 u_int64_t diff
, count
;
449 u_int32_t start
, end
;
450 int retries1
= 0, retries2
= 0;
453 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT
);
454 old_tsc1
= rdtsc_ordered();
455 start
= lapic
->ccr_timer
;
456 old_tsc2
= rdtsc_ordered();
457 if (apic_delay_tsc
> 0 && retries1
< MAX_MEASURE_RETRIES
&&
458 old_tsc2
- old_tsc1
> 2 * apic_delay_tsc
) {
464 new_tsc1
= rdtsc_ordered();
465 end
= lapic
->ccr_timer
;
466 new_tsc2
= rdtsc_ordered();
467 if (apic_delay_tsc
> 0 && retries2
< MAX_MEASURE_RETRIES
&&
468 new_tsc2
- new_tsc1
> 2 * apic_delay_tsc
) {
477 /* Make sure the lapic can count for up to 2s */
478 a
= (unsigned)APIC_TIMER_MAX_COUNT
;
479 if (us
< 2000000 && (u_int64_t
)count
* 2000000 >= a
* us
)
482 if (lapic_calibrate_test
> 0 && (retries1
> 0 || retries2
> 0)) {
483 kprintf("%s: retries1=%d retries2=%d\n",
484 __func__
, retries1
, retries2
);
487 diff
= (new_tsc1
- old_tsc1
) + (new_tsc2
- old_tsc2
);
488 /* XXX First estimate if the total TSC diff value makes sense */
489 /* This will almost overflow, but only almost :) */
490 count
= (2 * count
* tsc_frequency
) / diff
;
496 do_cputimer_calibration(u_int us
)
499 sysclock_t start
, end
, beginning
, finish
;
501 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT
);
502 beginning
= lapic
->ccr_timer
;
503 start
= sys_cputimer
->count();
505 end
= sys_cputimer
->count();
506 finish
= lapic
->ccr_timer
;
509 /* value is the LAPIC timer difference. */
510 value
= beginning
- finish
;
511 /* end is the sys_cputimer difference. */
515 value
= ((uint64_t)value
* sys_cputimer
->freq
) / end
;
520 lapic_timer_calibrate(void)
523 u_int64_t apic_delay_tsc
= 0;
524 int use_tsc_calibration
= 0;
526 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */
527 if (lapic_use_tscdeadline
) {
528 lapic_timer_tscfreq_shift
= 0;
529 while ((tsc_frequency
>> lapic_timer_tscfreq_shift
) > INT_MAX
)
530 lapic_timer_tscfreq_shift
++;
531 lapic_cputimer_intr
.freq
=
532 tsc_frequency
>> lapic_timer_tscfreq_shift
;
534 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n",
535 lapic_timer_tscfreq_shift
, lapic_cputimer_intr
.freq
);
540 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in
541 * a virtual machine the frequency may get changed by the host.
543 if (tsc_frequency
!= 0 && tsc_invariant
&& lapic_calibrate_fast
)
544 use_tsc_calibration
= 1;
546 if (use_tsc_calibration
) {
547 u_int64_t min_apic_tsc
= 0, max_apic_tsc
= 0;
548 u_int64_t old_tsc
, new_tsc
;
553 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT
);
554 for (i
= 0; i
< 10; i
++)
555 val
= lapic
->ccr_timer
;
557 for (i
= 0; i
< 100; i
++) {
558 old_tsc
= rdtsc_ordered();
559 val
= lapic
->ccr_timer
;
560 new_tsc
= rdtsc_ordered();
562 apic_delay_tsc
+= new_tsc
;
563 if (min_apic_tsc
== 0 ||
564 min_apic_tsc
> new_tsc
) {
565 min_apic_tsc
= new_tsc
;
567 if (max_apic_tsc
< new_tsc
)
568 max_apic_tsc
= new_tsc
;
570 apic_delay_tsc
/= 100;
572 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n",
573 apic_delay_tsc
, min_apic_tsc
, max_apic_tsc
);
574 apic_delay_tsc
= min_apic_tsc
;
577 if (!use_tsc_calibration
) {
581 * Do some exercising of the lapic timer access. This improves
582 * precision of the subsequent calibration run in at least some
583 * virtualization cases.
585 lapic_timer_set_divisor(0);
586 for (i
= 0; i
< 10; i
++)
587 (void)do_cputimer_calibration(100);
589 /* Try to calibrate the local APIC timer. */
590 for (lapic_timer_divisor_idx
= 0;
591 lapic_timer_divisor_idx
< APIC_TIMER_NDIVISORS
;
592 lapic_timer_divisor_idx
++) {
593 lapic_timer_set_divisor(lapic_timer_divisor_idx
);
594 if (use_tsc_calibration
) {
595 value
= do_tsc_calibration(200*1000, apic_delay_tsc
);
597 value
= do_cputimer_calibration(2*1000*1000);
602 if (lapic_timer_divisor_idx
>= APIC_TIMER_NDIVISORS
)
603 panic("lapic: no proper timer divisor?!");
604 lapic_cputimer_intr
.freq
= value
;
606 kprintf("lapic: divisor index %d, frequency %u Hz\n",
607 lapic_timer_divisor_idx
, lapic_cputimer_intr
.freq
);
609 if (lapic_calibrate_test
> 0) {
613 for (i
= 1; i
<= 20; i
++) {
614 if (use_tsc_calibration
) {
615 freq
= do_tsc_calibration(i
*100*1000,
618 freq
= do_cputimer_calibration(i
*100*1000);
621 kprintf("%ums: %lu\n", i
* 100, freq
);
627 lapic_timer_tscdlt_reload(struct cputimer_intr
*cti
, sysclock_t reload
)
629 struct globaldata
*gd
= mycpu
;
630 uint64_t diff
, now
, val
;
632 if (reload
> 1000*1000*1000)
633 reload
= 1000*1000*1000;
634 diff
= (uint64_t)reload
* tsc_frequency
/ sys_cputimer
->freq
;
637 if (cpu_vendor_id
== CPU_VENDOR_INTEL
)
643 if (gd
->gd_timer_running
) {
644 uint64_t deadline
= tsc_deadlines
[mycpuid
].timestamp
;
645 if (deadline
== 0 || now
> deadline
|| val
< deadline
) {
646 wrmsr(MSR_TSC_DEADLINE
, val
);
647 tsc_deadlines
[mycpuid
].timestamp
= val
;
650 gd
->gd_timer_running
= 1;
651 wrmsr(MSR_TSC_DEADLINE
, val
);
652 tsc_deadlines
[mycpuid
].timestamp
= val
;
657 lapic_timer_intr_reload(struct cputimer_intr
*cti
, sysclock_t reload
)
659 struct globaldata
*gd
= mycpu
;
661 reload
= (int64_t)reload
* cti
->freq
/ sys_cputimer
->freq
;
665 if (gd
->gd_timer_running
) {
666 if (reload
< lapic
->ccr_timer
)
667 lapic_timer_oneshot_quick(reload
);
669 gd
->gd_timer_running
= 1;
670 lapic_timer_oneshot_quick(reload
);
675 lapic_timer_intr_enable(struct cputimer_intr
*cti __unused
)
679 timer
= lapic
->lvt_timer
;
680 timer
&= ~(APIC_LVTT_MASKED
| APIC_LVTT_PERIODIC
| APIC_LVTT_TSCDLT
);
681 if (lapic_use_tscdeadline
)
682 timer
|= APIC_LVTT_TSCDLT
;
683 lapic
->lvt_timer
= timer
;
684 if (lapic_use_tscdeadline
)
687 lapic_timer_fixup_handler(NULL
);
691 lapic_timer_fixup_handler(void *arg
)
698 if (cpu_vendor_id
== CPU_VENDOR_AMD
) {
700 * Detect the presence of C1E capability mostly on latest
701 * dual-cores (or future) k8 family. This feature renders
702 * the local APIC timer dead, so we disable it by reading
703 * the Interrupt Pending Message register and clearing both
704 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
707 * "BIOS and Kernel Developer's Guide for AMD NPT
708 * Family 0Fh Processors"
709 * #32559 revision 3.00
711 if ((cpu_id
& 0x00000f00) == 0x00000f00 &&
712 (cpu_id
& 0x0fff0000) >= 0x00040000) {
715 msr
= rdmsr(0xc0010055);
716 if (msr
& 0x18000000) {
717 struct globaldata
*gd
= mycpu
;
719 kprintf("cpu%d: AMD C1E detected\n",
721 wrmsr(0xc0010055, msr
& ~0x18000000ULL
);
724 * We are kinda stalled;
727 gd
->gd_timer_running
= 1;
728 if (lapic_use_tscdeadline
) {
729 /* Maybe reached in Virtual Machines? */
730 lapic_timer_tscdeadline_quick(5000);
732 lapic_timer_oneshot_quick(2);
743 lapic_timer_restart_handler(void *dummy __unused
)
747 lapic_timer_fixup_handler(&started
);
749 struct globaldata
*gd
= mycpu
;
751 gd
->gd_timer_running
= 1;
752 if (lapic_use_tscdeadline
) {
753 /* Maybe reached in Virtual Machines? */
754 lapic_timer_tscdeadline_quick(5000);
756 lapic_timer_oneshot_quick(2);
762 * This function is called only by ACPICA code currently:
763 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI
764 * module controls PM. So once ACPICA is attached, we try
765 * to apply the fixup to prevent LAPIC timer from hanging.
768 lapic_timer_intr_pmfixup(struct cputimer_intr
*cti __unused
)
770 lwkt_send_ipiq_mask(smp_active_mask
,
771 lapic_timer_fixup_handler
, NULL
);
775 lapic_timer_intr_restart(struct cputimer_intr
*cti __unused
)
777 lwkt_send_ipiq_mask(smp_active_mask
, lapic_timer_restart_handler
, NULL
);
782 * dump contents of local APIC registers
787 kprintf("SMP: CPU%d %s:\n", mycpu
->gd_cpuid
, str
);
788 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
789 lapic
->lvt_lint0
, lapic
->lvt_lint1
, lapic
->tpr
, lapic
->svr
);
793 * Inter Processor Interrupt functions.
797 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'.
799 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF
800 * vector is any valid SYSTEM INT vector
801 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO
805 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than
806 * one IPI from being sent to any given cpu at a time. Thus we no longer
807 * have to process incoming IPIs while waiting for the status to clear.
808 * No deadlock should be possible.
810 * We now physically disable interrupts for the lapic ICR operation. If
811 * we do not do this then it looks like an EOI sent to the lapic (which
812 * occurs even with a critical section) can interfere with the command
813 * register ready status and cause an IPI to be lost.
815 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command
816 * register to busy just before we write to icr_lo, resulting in a lost
817 * issuance. This only appears to occur on Intel cpus and is not
818 * documented. It could simply be that cpus are so fast these days that
819 * it was always an issue, but is only now rearing its ugly head. This
823 apic_ipi(int dest_type
, int vector
, int delivery_mode
)
830 if ((lapic
->icr_lo
& APIC_DELSTAT_MASK
) != 0) {
832 while ((lapic
->icr_lo
& APIC_DELSTAT_MASK
) != 0) {
834 if ((tsc_sclock_t
)(rdtsc() -
835 (tsc
+ tsc_frequency
)) > 0) {
836 kprintf("apic_ipi stall cpu %d (sing)\n",
844 icr_hi
= lapic
->icr_hi
& ~APIC_ID_MASK
;
845 icr_lo
= (lapic
->icr_lo
& APIC_ICRLO_RESV_MASK
) | dest_type
|
846 APIC_LEVEL_ASSERT
| delivery_mode
| vector
;
847 lapic
->icr_hi
= icr_hi
;
848 lapic
->icr_lo
= icr_lo
;
854 * Interrupts must be hard-disabled by caller
857 single_apic_ipi(int cpu
, int vector
, int delivery_mode
)
864 if ((lapic
->icr_lo
& APIC_DELSTAT_MASK
) != 0) {
866 while ((lapic
->icr_lo
& APIC_DELSTAT_MASK
) != 0) {
868 if ((tsc_sclock_t
)(rdtsc() -
869 (tsc
+ tsc_frequency
)) > 0) {
870 kprintf("single_apic_ipi stall cpu %d (sing)\n",
878 icr_hi
= lapic
->icr_hi
& ~APIC_ID_MASK
;
879 icr_hi
|= (CPUID_TO_APICID(cpu
) << 24);
882 icr_lo
= (lapic
->icr_lo
& APIC_ICRLO_RESV_MASK
) |
883 APIC_LEVEL_ASSERT
| APIC_DEST_DESTFLD
| delivery_mode
| vector
;
886 lapic
->icr_hi
= icr_hi
;
887 lapic
->icr_lo
= icr_lo
;
893 * Returns 0 if the apic is busy, 1 if we were able to queue the request.
895 * NOT WORKING YET! The code as-is may end up not queueing an IPI at all
896 * to the target, and the scheduler does not 'poll' for IPI messages.
899 single_apic_ipi_passive(int cpu
, int vector
, int delivery_mode
)
903 unsigned long rflags
;
905 rflags
= read_rflags();
907 if ((lapic
->icr_lo
& APIC_DELSTAT_MASK
) != 0) {
908 write_rflags(rflags
);
911 icr_hi
= lapic
->icr_hi
& ~APIC_ID_MASK
;
912 icr_hi
|= (CPUID_TO_APICID(cpu
) << 24);
913 lapic
->icr_hi
= icr_hi
;
916 icr_lo
= (lapic
->icr_lo
& APIC_RESV2_MASK
) |
917 APIC_DEST_DESTFLD
| delivery_mode
| vector
;
920 lapic
->icr_lo
= icr_lo
;
921 write_rflags(rflags
);
929 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'.
931 * target is a bitmask of destination cpus. Vector is any
932 * valid system INT vector. Delivery mode may be either
933 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO.
935 * Interrupts must be hard-disabled by caller
938 selected_apic_ipi(cpumask_t target
, int vector
, int delivery_mode
)
940 while (CPUMASK_TESTNZERO(target
)) {
941 int n
= BSFCPUMASK(target
);
942 CPUMASK_NANDBIT(target
, n
);
943 single_apic_ipi(n
, vector
, delivery_mode
);
948 * Load a 'downcount time' in uSeconds.
951 set_apic_timer(int us
)
955 if (lapic_use_tscdeadline
) {
958 val
= lapic_scale_to_tsc(us
, 1000000);
960 /* No need to arm the lapic here, just track the timeout. */
961 tsc_deadlines
[mycpuid
].downcount_time
= val
;
966 * When we reach here, lapic timer's frequency
967 * must have been calculated as well as the
968 * divisor (lapic->dcr_timer is setup during the
969 * divisor calculation).
971 KKASSERT(lapic_cputimer_intr
.freq
!= 0 &&
972 lapic_timer_divisor_idx
>= 0);
974 count
= ((us
* (int64_t)lapic_cputimer_intr
.freq
) + 999999) / 1000000;
975 lapic_timer_oneshot(count
);
980 * Read remaining time in timer, in microseconds (rounded up).
983 read_apic_timer(void)
987 if (lapic_use_tscdeadline
) {
990 val
= tsc_deadlines
[mycpuid
].downcount_time
;
992 if (val
== 0 || now
> val
) {
997 val
+= (tsc_frequency
- 1);
998 val
/= tsc_frequency
;
1005 val
= lapic
->ccr_timer
;
1009 KKASSERT(lapic_cputimer_intr
.freq
> 0);
1011 val
+= (lapic_cputimer_intr
.freq
- 1);
1012 val
/= lapic_cputimer_intr
.freq
;
1020 * Spin-style delay, set delay time in uS, spin till it drains.
1025 set_apic_timer(count
);
1026 while (read_apic_timer())
1031 lapic_unused_apic_id(int start
)
1035 for (i
= start
; i
< APICID_MAX
; ++i
) {
1036 if (APICID_TO_CPUID(i
) == -1)
1043 lapic_map(vm_paddr_t lapic_addr
)
1045 lapic
= pmap_mapdev_uncacheable(lapic_addr
, sizeof(struct LAPIC
));
1048 static TAILQ_HEAD(, lapic_enumerator
) lapic_enumerators
=
1049 TAILQ_HEAD_INITIALIZER(lapic_enumerators
);
1054 struct lapic_enumerator
*e
;
1055 int error
, i
, ap_max
;
1057 KKASSERT(lapic_enable
);
1059 for (i
= 0; i
< NAPICID
; ++i
)
1060 APICID_TO_CPUID(i
) = -1;
1062 TAILQ_FOREACH(e
, &lapic_enumerators
, lapic_link
) {
1063 error
= e
->lapic_probe(e
);
1068 kprintf("LAPIC: Can't find LAPIC\n");
1072 error
= e
->lapic_enumerate(e
);
1074 kprintf("LAPIC: enumeration failed\n");
1078 ap_max
= MAXCPU
- 1;
1079 TUNABLE_INT_FETCH("hw.ap_max", &ap_max
);
1080 if (ap_max
> MAXCPU
- 1)
1081 ap_max
= MAXCPU
- 1;
1083 if (naps
> ap_max
) {
1084 kprintf("LAPIC: Warning use only %d out of %d "
1094 lapic_enumerator_register(struct lapic_enumerator
*ne
)
1096 struct lapic_enumerator
*e
;
1098 TAILQ_FOREACH(e
, &lapic_enumerators
, lapic_link
) {
1099 if (e
->lapic_prio
< ne
->lapic_prio
) {
1100 TAILQ_INSERT_BEFORE(e
, ne
, lapic_link
);
1104 TAILQ_INSERT_TAIL(&lapic_enumerators
, ne
, lapic_link
);
1108 lapic_set_cpuid(int cpu_id
, int apic_id
)
1110 CPUID_TO_APICID(cpu_id
) = apic_id
;
1111 APICID_TO_CPUID(apic_id
) = cpu_id
;
1115 lapic_fixup_noioapic(void)
1119 /* Only allowed on BSP */
1120 KKASSERT(mycpuid
== 0);
1121 KKASSERT(!ioapic_enable
);
1123 temp
= lapic
->lvt_lint0
;
1124 temp
&= ~APIC_LVT_MASKED
;
1125 lapic
->lvt_lint0
= temp
;
1127 temp
= lapic
->lvt_lint1
;
1128 temp
|= APIC_LVT_MASKED
;
1129 lapic
->lvt_lint1
= temp
;
1133 lapic_eoi_func(void)
1140 lapic_sysinit(void *dummy __unused
)
1145 lapic_eoi
= lapic_eoi_func
;
1147 error
= lapic_config();
1153 /* Initialize BSP's local APIC */
1155 } else if (ioapic_enable
) {
1157 icu_reinit_noioapic();
1160 SYSINIT(lapic
, SI_BOOT2_LAPIC
, SI_ORDER_FIRST
, lapic_sysinit
, NULL
);