2 * Copyright (c) 1996, by Steve Passe
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
33 #include <sys/machintr.h>
34 #include <sys/malloc.h>
35 #include <sys/sysctl.h>
36 #include <machine/globaldata.h>
37 #include <machine/clock.h>
38 #include <machine/limits.h>
39 #include <machine/smp.h>
40 #include <machine/md_var.h>
41 #include <machine/pmap.h>
42 #include <machine/specialreg.h>
43 #include <machine_base/apic/lapic.h>
44 #include <machine_base/apic/ioapic.h>
45 #include <machine_base/apic/ioapic_abi.h>
46 #include <machine_base/apic/apicvar.h>
47 #include <machine_base/icu/icu_var.h>
48 #include <machine/segments.h>
49 #include <sys/spinlock2.h>
51 #include <machine/cputypes.h>
52 #include <machine/intr_machdep.h>
54 #if !defined(KTR_LAPIC)
55 #define KTR_LAPIC KTR_ALL
57 KTR_INFO_MASTER(lapic
);
58 KTR_INFO(KTR_LAPIC
, lapic
, mem_eoi
, 0, "mem_eoi");
59 KTR_INFO(KTR_LAPIC
, lapic
, msr_eoi
, 0, "msr_eoi");
60 #define log_lapic(name) KTR_LOG(lapic_ ## name)
64 volatile lapic_t
*lapic_mem
;
66 static void lapic_timer_calibrate(void);
67 static void lapic_timer_set_divisor(int);
68 static void lapic_timer_fixup_handler(void *);
69 static void lapic_timer_restart_handler(void *);
71 static int lapic_timer_c1e_test
= -1; /* auto-detect */
72 TUNABLE_INT("hw.lapic_timer_c1e_test", &lapic_timer_c1e_test
);
74 static int lapic_timer_enable
= 1;
75 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable
);
77 static int lapic_timer_tscdeadline
= 1;
78 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline
);
80 static int lapic_calibrate_test
= 0;
81 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test
);
83 static int lapic_calibrate_fast
= 1;
84 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast
);
86 static void lapic_timer_tscdlt_reload(struct cputimer_intr
*, sysclock_t
);
87 static void lapic_mem_timer_intr_reload(struct cputimer_intr
*, sysclock_t
);
88 static void lapic_msr_timer_intr_reload(struct cputimer_intr
*, sysclock_t
);
89 static void lapic_timer_intr_enable(struct cputimer_intr
*);
90 static void lapic_timer_intr_restart(struct cputimer_intr
*);
91 static void lapic_timer_intr_pmfixup(struct cputimer_intr
*);
93 static struct cputimer_intr lapic_cputimer_intr
= {
95 .reload
= lapic_mem_timer_intr_reload
,
96 .enable
= lapic_timer_intr_enable
,
97 .config
= cputimer_intr_default_config
,
98 .restart
= lapic_timer_intr_restart
,
99 .pmfixup
= lapic_timer_intr_pmfixup
,
100 .initclock
= cputimer_intr_default_initclock
,
102 .next
= SLIST_ENTRY_INITIALIZER
,
104 .type
= CPUTIMER_INTR_LAPIC
,
105 .prio
= CPUTIMER_INTR_PRIO_LAPIC
,
106 .caps
= CPUTIMER_INTR_CAP_NONE
,
110 static int lapic_timer_divisor_idx
= -1;
111 static const uint32_t lapic_timer_divisors
[] = {
112 APIC_TDCR_2
, APIC_TDCR_4
, APIC_TDCR_8
, APIC_TDCR_16
,
113 APIC_TDCR_32
, APIC_TDCR_64
, APIC_TDCR_128
, APIC_TDCR_1
115 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors))
117 static int lapic_use_tscdeadline
= 0;
120 * APIC ID <-> CPU ID mapping structures.
122 int cpu_id_to_apic_id
[NAPICID
];
123 int apic_id_to_cpu_id
[NAPICID
];
124 int lapic_enable
= 1;
125 int lapic_usable
= 0;
126 int x2apic_enable
= 1;
128 SYSCTL_INT(_hw
, OID_AUTO
, x2apic_enable
, CTLFLAG_RD
, &x2apic_enable
, 0, "");
130 /* Separate cachelines for each cpu's info. */
133 uint64_t downcount_time
;
136 static struct deadlines
*tsc_deadlines
= NULL
;
138 static void lapic_mem_eoi(void);
139 static int lapic_mem_ipi(int dest_type
, int vector
, int delivery_mode
);
140 static void lapic_mem_single_ipi(int cpu
, int vector
, int delivery_mode
);
142 static void lapic_msr_eoi(void);
143 static int lapic_msr_ipi(int dest_type
, int vector
, int delivery_mode
);
144 static void lapic_msr_single_ipi(int cpu
, int vector
, int delivery_mode
);
146 void (*lapic_eoi
)(void);
147 int (*apic_ipi
)(int dest_type
, int vector
, int delivery_mode
);
148 void (*single_apic_ipi
)(int cpu
, int vector
, int delivery_mode
);
151 lapic_mem_icr_set(uint32_t apic_id
, uint32_t icr_lo_val
)
153 uint32_t icr_lo
, icr_hi
;
155 icr_hi
= (LAPIC_MEM_READ(icr_hi
) & ~APIC_ID_MASK
) |
156 (apic_id
<< APIC_ID_SHIFT
);
157 icr_lo
= (LAPIC_MEM_READ(icr_lo
) & APIC_ICRLO_RESV_MASK
) | icr_lo_val
;
159 LAPIC_MEM_WRITE(icr_hi
, icr_hi
);
160 LAPIC_MEM_WRITE(icr_lo
, icr_lo
);
164 lapic_msr_icr_set(uint32_t apic_id
, uint32_t icr_lo_val
)
166 LAPIC_MSR_WRITE(MSR_X2APIC_ICR
,
167 ((uint64_t)apic_id
<< 32) | ((uint64_t)icr_lo_val
));
171 * Enable LAPIC, configure interrupts.
174 lapic_init(boolean_t bsp
)
180 /* Decide whether we want to use TSC Deadline mode. */
181 if (lapic_timer_tscdeadline
!= 0 &&
182 (cpu_feature2
& CPUID2_TSCDLT
) &&
183 tsc_invariant
&& tsc_frequency
!= 0) {
184 lapic_use_tscdeadline
= 1;
186 kmalloc(sizeof(struct deadlines
) * (naps
+ 1),
188 M_WAITOK
| M_ZERO
| M_CACHEALIGN
);
195 * Since IDT is shared between BSP and APs, these vectors
196 * only need to be installed once; we do it on BSP.
199 if (cpu_vendor_id
== CPU_VENDOR_AMD
&&
200 CPUID_TO_FAMILY(cpu_id
) >= 0x0f &&
201 CPUID_TO_FAMILY(cpu_id
) < 0x17) { /* XXX */
205 * Set the LINTEN bit in the HyperTransport
206 * Transaction Control Register.
208 * This will cause EXTINT and NMI interrupts
209 * routed over the hypertransport bus to be
210 * fed into the LAPIC LINT0/LINT1. If the bit
211 * isn't set, the interrupts will go to the
212 * general cpu INTR/NMI pins. On a dual-core
213 * cpu the interrupt winds up going to BOTH cpus.
214 * The first cpu that does the interrupt ack
215 * cycle will get the correct interrupt. The
216 * second cpu that does it will get a spurious
217 * interrupt vector (typically IRQ 7).
220 (1 << 31) | /* enable */
221 (0 << 16) | /* bus */
222 (0x18 << 11) | /* dev (cpu + 0x18) */
223 (0 << 8) | /* func */
227 if ((tcr
& 0x00010000) == 0) {
228 kprintf("LAPIC: AMD LINTEN on\n");
229 outl(0xcfc, tcr
|0x00010000);
234 /* Install a 'Spurious INTerrupt' vector */
235 setidt_global(XSPURIOUSINT_OFFSET
, Xspuriousint
,
236 SDT_SYSIGT
, SEL_KPL
, 0);
238 /* Install a timer vector */
239 setidt_global(XTIMER_OFFSET
, Xtimer
,
240 SDT_SYSIGT
, SEL_KPL
, 0);
242 /* Install an inter-CPU IPI for TLB invalidation */
243 setidt_global(XINVLTLB_OFFSET
, Xinvltlb
,
244 SDT_SYSIGT
, SEL_KPL
, 0);
246 /* Install an inter-CPU IPI for IPIQ messaging */
247 setidt_global(XIPIQ_OFFSET
, Xipiq
,
248 SDT_SYSIGT
, SEL_KPL
, 0);
250 /* Install an inter-CPU IPI for CPU stop/restart */
251 setidt_global(XCPUSTOP_OFFSET
, Xcpustop
,
252 SDT_SYSIGT
, SEL_KPL
, 0);
254 /* Install an inter-CPU IPI for TLB invalidation */
255 setidt_global(XSNIFF_OFFSET
, Xsniff
,
256 SDT_SYSIGT
, SEL_KPL
, 0);
260 * Setup LINT0 as ExtINT on the BSP. This is theoretically an
261 * aggregate interrupt input from the 8259. The INTA cycle
262 * will be routed to the external controller (the 8259) which
263 * is expected to supply the vector.
265 * Must be setup edge triggered, active high.
267 * Disable LINT0 on BSP, if I/O APIC is enabled.
269 * Disable LINT0 on the APs. It doesn't matter what delivery
270 * mode we use because we leave it masked.
272 temp
= LAPIC_READ(lvt_lint0
);
273 temp
&= ~(APIC_LVT_MASKED
| APIC_LVT_TRIG_MASK
|
274 APIC_LVT_POLARITY_MASK
| APIC_LVT_DM_MASK
);
276 temp
|= APIC_LVT_DM_EXTINT
;
278 temp
|= APIC_LVT_MASKED
;
280 temp
|= APIC_LVT_DM_FIXED
| APIC_LVT_MASKED
;
282 LAPIC_WRITE(lvt_lint0
, temp
);
285 * Setup LINT1 as NMI.
287 * Must be setup edge trigger, active high.
289 * Enable LINT1 on BSP, if I/O APIC is enabled.
291 * Disable LINT1 on the APs.
293 temp
= LAPIC_READ(lvt_lint1
);
294 temp
&= ~(APIC_LVT_MASKED
| APIC_LVT_TRIG_MASK
|
295 APIC_LVT_POLARITY_MASK
| APIC_LVT_DM_MASK
);
296 temp
|= APIC_LVT_MASKED
| APIC_LVT_DM_NMI
;
297 if (bsp
&& ioapic_enable
)
298 temp
&= ~APIC_LVT_MASKED
;
299 LAPIC_WRITE(lvt_lint1
, temp
);
302 * Mask the LAPIC error interrupt, LAPIC performance counter
305 LAPIC_WRITE(lvt_error
, LAPIC_READ(lvt_error
) | APIC_LVT_MASKED
);
306 LAPIC_WRITE(lvt_pcint
, LAPIC_READ(lvt_pcint
) | APIC_LVT_MASKED
);
309 * Set LAPIC timer vector and mask the LAPIC timer interrupt.
311 timer
= LAPIC_READ(lvt_timer
);
312 timer
&= ~APIC_LVTT_VECTOR
;
313 timer
|= XTIMER_OFFSET
;
314 timer
|= APIC_LVTT_MASKED
;
315 LAPIC_WRITE(lvt_timer
, timer
);
318 * Set the Task Priority Register as needed. At the moment allow
319 * interrupts on all cpus (the APs will remain CLId until they are
322 temp
= LAPIC_READ(tpr
);
323 temp
&= ~APIC_TPR_PRIO
; /* clear priority field */
324 LAPIC_WRITE(tpr
, temp
);
329 if (cpu_vendor_id
== CPU_VENDOR_AMD
&& lapic_mem
!= NULL
&&
330 (LAPIC_MEM_READ(version
) & APIC_VER_AMD_EXT_SPACE
)) {
337 ext_feat
= LAPIC_MEM_READ(ext_feat
);
338 count
= (ext_feat
& APIC_EXTFEAT_MASK
) >> APIC_EXTFEAT_SHIFT
;
339 max_count
= sizeof(lapic_mem
->ext_lvt
) /
340 sizeof(lapic_mem
->ext_lvt
[0]);
341 if (count
> max_count
)
343 for (i
= 0; i
< count
; ++i
) {
344 lvt
= LAPIC_MEM_READ(ext_lvt
[i
].lvt
);
346 lvt
&= ~(APIC_LVT_POLARITY_MASK
| APIC_LVT_TRIG_MASK
|
347 APIC_LVT_DM_MASK
| APIC_LVT_MASKED
);
348 lvt
|= APIC_LVT_MASKED
| APIC_LVT_DM_FIXED
;
351 case APIC_EXTLVT_IBS
:
353 case APIC_EXTLVT_MCA
:
355 case APIC_EXTLVT_DEI
:
357 case APIC_EXTLVT_SBI
:
363 kprintf(" LAPIC AMD elvt%d: 0x%08x",
364 i
, LAPIC_MEM_READ(ext_lvt
[i
].lvt
));
365 if (LAPIC_MEM_READ(ext_lvt
[i
].lvt
) != lvt
)
366 kprintf(" -> 0x%08x", lvt
);
369 LAPIC_MEM_WRITE(ext_lvt
[i
].lvt
, lvt
);
376 temp
= LAPIC_READ(svr
);
377 temp
|= APIC_SVR_ENABLE
; /* enable the LAPIC */
378 temp
&= ~APIC_SVR_FOCUS_DISABLE
; /* enable lopri focus processor */
380 if (LAPIC_READ(version
) & APIC_VER_EOI_SUPP
) {
381 if (temp
& APIC_SVR_EOI_SUPP
) {
382 temp
&= ~APIC_SVR_EOI_SUPP
;
384 kprintf(" LAPIC disabling EOI supp\n");
386 /* (future, on KVM auto-EOI must be disabled) */
387 if (vmm_guest
== VMM_GUEST_KVM
)
388 temp
&= ~APIC_SVR_EOI_SUPP
;
392 * Set the spurious interrupt vector. The low 4 bits of the vector
395 if ((XSPURIOUSINT_OFFSET
& 0x0F) != 0x0F)
396 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET
);
397 temp
&= ~APIC_SVR_VECTOR
;
398 temp
|= XSPURIOUSINT_OFFSET
;
400 LAPIC_WRITE(svr
, temp
);
403 * Pump out a few EOIs to clean out interrupts that got through
404 * before we were able to set the TPR.
411 lapic_timer_calibrate();
412 if (lapic_timer_enable
) {
413 if (cpu_thermal_feature
& CPUID_THERMAL_ARAT
) {
415 * Local APIC timer will not stop
418 lapic_cputimer_intr
.caps
|=
419 CPUTIMER_INTR_CAP_PS
;
421 if (lapic_use_tscdeadline
) {
422 lapic_cputimer_intr
.reload
=
423 lapic_timer_tscdlt_reload
;
425 cputimer_intr_register(&lapic_cputimer_intr
);
426 cputimer_intr_select(&lapic_cputimer_intr
, 0);
428 } else if (!lapic_use_tscdeadline
) {
429 lapic_timer_set_divisor(lapic_timer_divisor_idx
);
433 apic_dump("apic_initialize()");
437 lapic_timer_set_divisor(int divisor_idx
)
439 KKASSERT(divisor_idx
>= 0 && divisor_idx
< APIC_TIMER_NDIVISORS
);
440 LAPIC_WRITE(dcr_timer
, lapic_timer_divisors
[divisor_idx
]);
444 lapic_timer_oneshot(u_int count
)
448 value
= LAPIC_READ(lvt_timer
);
449 value
&= ~(APIC_LVTT_PERIODIC
| APIC_LVTT_TSCDLT
);
450 LAPIC_WRITE(lvt_timer
, value
);
451 LAPIC_WRITE(icr_timer
, count
);
455 lapic_timer_oneshot_quick(u_int count
)
457 LAPIC_WRITE(icr_timer
, count
);
461 lapic_timer_tscdeadline_quick(uint64_t diff
)
463 uint64_t val
= rdtsc() + diff
;
465 wrmsr(MSR_TSC_DEADLINE
, val
);
466 tsc_deadlines
[mycpuid
].timestamp
= val
;
470 lapic_scale_to_tsc(unsigned value
, unsigned scale
)
475 val
*= tsc_frequency
;
481 #define MAX_MEASURE_RETRIES 100
484 do_tsc_calibration(u_int us
, u_int64_t apic_delay_tsc
)
486 u_int64_t old_tsc1
, old_tsc2
, new_tsc1
, new_tsc2
;
487 u_int64_t diff
, count
;
489 u_int32_t start
, end
;
490 int retries1
= 0, retries2
= 0;
493 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT
);
494 old_tsc1
= rdtsc_ordered();
495 start
= LAPIC_READ(ccr_timer
);
496 old_tsc2
= rdtsc_ordered();
497 if (apic_delay_tsc
> 0 && retries1
< MAX_MEASURE_RETRIES
&&
498 old_tsc2
- old_tsc1
> 2 * apic_delay_tsc
) {
504 new_tsc1
= rdtsc_ordered();
505 end
= LAPIC_READ(ccr_timer
);
506 new_tsc2
= rdtsc_ordered();
507 if (apic_delay_tsc
> 0 && retries2
< MAX_MEASURE_RETRIES
&&
508 new_tsc2
- new_tsc1
> 2 * apic_delay_tsc
) {
517 /* Make sure the lapic can count for up to 2s */
518 a
= (unsigned)APIC_TIMER_MAX_COUNT
;
519 if (us
< 2000000 && (u_int64_t
)count
* 2000000 >= a
* us
)
522 if (lapic_calibrate_test
> 0 && (retries1
> 0 || retries2
> 0)) {
523 kprintf("%s: retries1=%d retries2=%d\n",
524 __func__
, retries1
, retries2
);
527 diff
= (new_tsc1
- old_tsc1
) + (new_tsc2
- old_tsc2
);
528 /* XXX First estimate if the total TSC diff value makes sense */
529 /* This will almost overflow, but only almost :) */
530 count
= (2 * count
* tsc_frequency
) / diff
;
536 do_cputimer_calibration(u_int us
)
539 sysclock_t start
, end
;
540 uint32_t beginning
, finish
;
542 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT
);
543 beginning
= LAPIC_READ(ccr_timer
);
544 start
= sys_cputimer
->count();
546 end
= sys_cputimer
->count();
547 finish
= LAPIC_READ(ccr_timer
);
550 /* value is the LAPIC timer difference. */
551 value
= (uint32_t)(beginning
- finish
);
552 /* end is the sys_cputimer difference. */
556 value
= muldivu64(value
, sys_cputimer
->freq
, end
);
562 lapic_timer_calibrate(void)
565 u_int64_t apic_delay_tsc
= 0;
566 int use_tsc_calibration
= 0;
568 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */
569 if (lapic_use_tscdeadline
) {
570 lapic_cputimer_intr
.freq
= tsc_frequency
;
572 "lapic: TSC Deadline Mode: frequency %lu Hz\n",
573 lapic_cputimer_intr
.freq
);
578 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in
579 * a virtual machine the frequency may get changed by the host.
581 if (tsc_frequency
!= 0 && tsc_invariant
&& lapic_calibrate_fast
)
582 use_tsc_calibration
= 1;
584 if (use_tsc_calibration
) {
585 u_int64_t min_apic_tsc
= 0, max_apic_tsc
= 0;
586 u_int64_t old_tsc
, new_tsc
;
591 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT
);
592 for (i
= 0; i
< 10; i
++)
593 val
= LAPIC_READ(ccr_timer
);
595 for (i
= 0; i
< 100; i
++) {
596 old_tsc
= rdtsc_ordered();
597 val
= LAPIC_READ(ccr_timer
);
598 new_tsc
= rdtsc_ordered();
600 apic_delay_tsc
+= new_tsc
;
601 if (min_apic_tsc
== 0 ||
602 min_apic_tsc
> new_tsc
) {
603 min_apic_tsc
= new_tsc
;
605 if (max_apic_tsc
< new_tsc
)
606 max_apic_tsc
= new_tsc
;
608 apic_delay_tsc
/= 100;
610 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n",
611 apic_delay_tsc
, min_apic_tsc
, max_apic_tsc
);
612 apic_delay_tsc
= min_apic_tsc
;
615 if (!use_tsc_calibration
) {
619 * Do some exercising of the lapic timer access. This improves
620 * precision of the subsequent calibration run in at least some
621 * virtualization cases.
623 lapic_timer_set_divisor(0);
624 for (i
= 0; i
< 10; i
++)
625 (void)do_cputimer_calibration(100);
627 /* Try to calibrate the local APIC timer. */
628 for (lapic_timer_divisor_idx
= 0;
629 lapic_timer_divisor_idx
< APIC_TIMER_NDIVISORS
;
630 lapic_timer_divisor_idx
++) {
631 lapic_timer_set_divisor(lapic_timer_divisor_idx
);
632 if (use_tsc_calibration
) {
633 value
= do_tsc_calibration(200*1000, apic_delay_tsc
);
635 value
= do_cputimer_calibration(2*1000*1000);
640 if (lapic_timer_divisor_idx
>= APIC_TIMER_NDIVISORS
)
641 panic("lapic: no proper timer divisor?!");
642 lapic_cputimer_intr
.freq
= value
;
644 kprintf("lapic: divisor index %d, frequency %lu Hz\n",
645 lapic_timer_divisor_idx
, lapic_cputimer_intr
.freq
);
647 if (lapic_calibrate_test
> 0) {
651 for (i
= 1; i
<= 20; i
++) {
652 if (use_tsc_calibration
) {
653 freq
= do_tsc_calibration(i
*100*1000,
656 freq
= do_cputimer_calibration(i
*100*1000);
659 kprintf("%ums: %lu\n", i
* 100, freq
);
665 lapic_timer_tscdlt_reload(struct cputimer_intr
*cti
, sysclock_t reload
)
667 struct globaldata
*gd
= mycpu
;
668 uint64_t diff
, now
, val
;
671 * Set maximum deadline to 60 seconds
673 if (reload
> sys_cputimer
->freq
* 60)
674 reload
= sys_cputimer
->freq
* 60;
675 diff
= muldivu64(reload
, tsc_frequency
, sys_cputimer
->freq
);
678 if (cpu_vendor_id
== CPU_VENDOR_INTEL
)
684 if (gd
->gd_timer_running
) {
685 uint64_t deadline
= tsc_deadlines
[mycpuid
].timestamp
;
686 if (deadline
== 0 || now
> deadline
|| val
< deadline
) {
687 wrmsr(MSR_TSC_DEADLINE
, val
);
688 tsc_deadlines
[mycpuid
].timestamp
= val
;
691 gd
->gd_timer_running
= 1;
692 wrmsr(MSR_TSC_DEADLINE
, val
);
693 tsc_deadlines
[mycpuid
].timestamp
= val
;
698 lapic_mem_timer_intr_reload(struct cputimer_intr
*cti
, sysclock_t reload
)
700 struct globaldata
*gd
= mycpu
;
702 if ((ssysclock_t
)reload
< 0)
704 reload
= muldivu64(reload
, cti
->freq
, sys_cputimer
->freq
);
707 if (reload
> 0xFFFFFFFF)
710 if (gd
->gd_timer_running
) {
711 if (reload
< LAPIC_MEM_READ(ccr_timer
))
712 LAPIC_MEM_WRITE(icr_timer
, (uint32_t)reload
);
714 gd
->gd_timer_running
= 1;
715 LAPIC_MEM_WRITE(icr_timer
, (uint32_t)reload
);
720 lapic_msr_timer_intr_reload(struct cputimer_intr
*cti
, sysclock_t reload
)
722 struct globaldata
*gd
= mycpu
;
724 if ((ssysclock_t
)reload
< 0)
726 reload
= muldivu64(reload
, cti
->freq
, sys_cputimer
->freq
);
729 if (reload
> 0xFFFFFFFF)
732 if (gd
->gd_timer_running
) {
733 if (reload
< LAPIC_MSR_READ(MSR_X2APIC_CCR_TIMER
))
734 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER
, (uint32_t)reload
);
736 gd
->gd_timer_running
= 1;
737 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER
, (uint32_t)reload
);
742 lapic_timer_intr_enable(struct cputimer_intr
*cti __unused
)
746 timer
= LAPIC_READ(lvt_timer
);
747 timer
&= ~(APIC_LVTT_MASKED
| APIC_LVTT_PERIODIC
| APIC_LVTT_TSCDLT
);
748 if (lapic_use_tscdeadline
)
749 timer
|= APIC_LVTT_TSCDLT
;
750 LAPIC_WRITE(lvt_timer
, timer
);
751 if (lapic_use_tscdeadline
)
754 lapic_timer_fixup_handler(NULL
);
758 lapic_timer_fixup_handler(void *arg
)
765 if (cpu_vendor_id
== CPU_VENDOR_AMD
) {
766 int c1e_test
= lapic_timer_c1e_test
;
769 if (vmm_guest
== VMM_GUEST_NONE
) {
773 * Don't do this C1E testing and adjustment
774 * on virtual machines, the best case for
775 * accessing this MSR is a NOOP; the worst
776 * cases could be pretty nasty, e.g. crash.
783 * Detect the presence of C1E capability mostly on latest
784 * dual-cores (or future) k8 family. This feature renders
785 * the local APIC timer dead, so we disable it by reading
786 * the Interrupt Pending Message register and clearing both
787 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
790 * "BIOS and Kernel Developer's Guide for AMD NPT
791 * Family 0Fh Processors"
792 * #32559 revision 3.00
794 if ((cpu_id
& 0x00000f00) == 0x00000f00 &&
795 (cpu_id
& 0x0fff0000) >= 0x00040000 &&
799 msr
= rdmsr(0xc0010055);
800 if (msr
& 0x18000000) {
801 struct globaldata
*gd
= mycpu
;
803 kprintf("cpu%d: AMD C1E detected\n",
805 wrmsr(0xc0010055, msr
& ~0x18000000ULL
);
808 * We are kinda stalled;
811 gd
->gd_timer_running
= 1;
812 if (lapic_use_tscdeadline
) {
813 /* Maybe reached in Virtual Machines? */
814 lapic_timer_tscdeadline_quick(5000);
816 lapic_timer_oneshot_quick(2);
827 lapic_timer_restart_handler(void *dummy __unused
)
831 lapic_timer_fixup_handler(&started
);
833 struct globaldata
*gd
= mycpu
;
835 gd
->gd_timer_running
= 1;
836 if (lapic_use_tscdeadline
) {
837 /* Maybe reached in Virtual Machines? */
838 lapic_timer_tscdeadline_quick(5000);
840 lapic_timer_oneshot_quick(2);
846 * This function is called only by ACPICA code currently:
847 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI
848 * module controls PM. So once ACPICA is attached, we try
849 * to apply the fixup to prevent LAPIC timer from hanging.
852 lapic_timer_intr_pmfixup(struct cputimer_intr
*cti __unused
)
854 lwkt_send_ipiq_mask(smp_active_mask
,
855 lapic_timer_fixup_handler
, NULL
);
859 lapic_timer_intr_restart(struct cputimer_intr
*cti __unused
)
861 lwkt_send_ipiq_mask(smp_active_mask
, lapic_timer_restart_handler
, NULL
);
866 * dump contents of local APIC registers
871 kprintf("SMP: CPU%d %s:\n", mycpu
->gd_cpuid
, str
);
872 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
873 LAPIC_READ(lvt_lint0
), LAPIC_READ(lvt_lint1
), LAPIC_READ(tpr
),
878 * Inter Processor Interrupt functions.
882 lapic_mem_icr_unpend(const char *func
)
884 if (LAPIC_MEM_READ(icr_lo
) & APIC_DELSTAT_PEND
) {
889 while (LAPIC_MEM_READ(icr_lo
) & APIC_DELSTAT_PEND
) {
891 if ((tsc_sclock_t
)(rdtsc() -
892 (tsc
+ tsc_frequency
)) > 0) {
895 panic("%s: cpu%d apic stalled",
898 kprintf("%s: cpu%d apic stalled\n",
907 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'.
909 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF
910 * vector is any valid SYSTEM INT vector
911 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO
915 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than
916 * one IPI from being sent to any given cpu at a time. Thus we no longer
917 * have to process incoming IPIs while waiting for the status to clear.
918 * No deadlock should be possible.
920 * We now physically disable interrupts for the lapic ICR operation. If
921 * we do not do this then it looks like an EOI sent to the lapic (which
922 * occurs even with a critical section) can interfere with the command
923 * register ready status and cause an IPI to be lost.
925 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command
926 * register to busy just before we write to icr_lo, resulting in a lost
927 * issuance. This only appears to occur on Intel cpus and is not
928 * documented. It could simply be that cpus are so fast these days that
929 * it was always an issue, but is only now rearing its ugly head. This
933 lapic_mem_ipi(int dest_type
, int vector
, int delivery_mode
)
935 lapic_mem_icr_unpend(__func__
);
937 dest_type
| APIC_LEVEL_ASSERT
| delivery_mode
| vector
);
942 lapic_msr_ipi(int dest_type
, int vector
, int delivery_mode
)
945 dest_type
| APIC_LEVEL_ASSERT
| delivery_mode
| vector
);
950 * Interrupts must be hard-disabled by caller
953 lapic_mem_single_ipi(int cpu
, int vector
, int delivery_mode
)
955 lapic_mem_icr_unpend(__func__
);
956 lapic_mem_icr_set(CPUID_TO_APICID(cpu
),
957 APIC_DEST_DESTFLD
| APIC_LEVEL_ASSERT
| delivery_mode
| vector
);
961 lapic_msr_single_ipi(int cpu
, int vector
, int delivery_mode
)
963 lapic_msr_icr_set(CPUID_TO_APICID(cpu
),
964 APIC_DEST_DESTFLD
| APIC_LEVEL_ASSERT
| delivery_mode
| vector
);
968 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'.
970 * target is a bitmask of destination cpus. Vector is any
971 * valid system INT vector. Delivery mode may be either
972 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO.
974 * Interrupts must be hard-disabled by caller
977 selected_apic_ipi(cpumask_t target
, int vector
, int delivery_mode
)
979 while (CPUMASK_TESTNZERO(target
)) {
980 int n
= BSFCPUMASK(target
);
981 CPUMASK_NANDBIT(target
, n
);
982 single_apic_ipi(n
, vector
, delivery_mode
);
987 * Load a 'downcount time' in uSeconds.
990 set_apic_timer(int us
)
994 if (lapic_use_tscdeadline
) {
997 val
= lapic_scale_to_tsc(us
, 1000000);
999 /* No need to arm the lapic here, just track the timeout. */
1000 tsc_deadlines
[mycpuid
].downcount_time
= val
;
1005 * When we reach here, lapic timer's frequency
1006 * must have been calculated as well as the
1007 * divisor (lapic->dcr_timer is setup during the
1008 * divisor calculation).
1010 KKASSERT(lapic_cputimer_intr
.freq
!= 0 &&
1011 lapic_timer_divisor_idx
>= 0);
1013 count
= ((us
* (int64_t)lapic_cputimer_intr
.freq
) + 999999) / 1000000;
1014 lapic_timer_oneshot(count
);
1019 * Read remaining time in timer, in microseconds (rounded up).
1022 read_apic_timer(void)
1026 if (lapic_use_tscdeadline
) {
1029 val
= tsc_deadlines
[mycpuid
].downcount_time
;
1031 if (val
== 0 || now
> val
) {
1036 val
+= (tsc_frequency
- 1);
1037 val
/= tsc_frequency
;
1044 val
= LAPIC_READ(ccr_timer
);
1048 KKASSERT(lapic_cputimer_intr
.freq
> 0);
1050 val
+= (lapic_cputimer_intr
.freq
- 1);
1051 val
/= lapic_cputimer_intr
.freq
;
1059 * Spin-style delay, set delay time in uS, spin till it drains.
1064 set_apic_timer(count
);
1065 while (read_apic_timer())
1070 lapic_unused_apic_id(int start
)
1074 for (i
= start
; i
< APICID_MAX
; ++i
) {
1075 if (APICID_TO_CPUID(i
) == -1)
1082 lapic_map(vm_paddr_t lapic_addr
)
1084 lapic_mem
= pmap_mapdev_uncacheable(lapic_addr
, sizeof(struct LAPIC
));
1088 lapic_x2apic_enter(boolean_t bsp
)
1092 KASSERT(x2apic_enable
, ("X2APIC mode is not enabled"));
1095 * X2APIC mode is requested, if it has not been enabled by the BIOS,
1098 apic_base
= rdmsr(MSR_APICBASE
);
1099 if ((apic_base
& APICBASE_X2APIC
) == 0) {
1101 apic_base
| APICBASE_X2APIC
| APICBASE_ENABLED
);
1104 lapic_eoi
= lapic_msr_eoi
;
1105 apic_ipi
= lapic_msr_ipi
;
1106 single_apic_ipi
= lapic_msr_single_ipi
;
1107 lapic_cputimer_intr
.reload
= lapic_msr_timer_intr_reload
;
1111 static TAILQ_HEAD(, lapic_enumerator
) lapic_enumerators
=
1112 TAILQ_HEAD_INITIALIZER(lapic_enumerators
);
1117 struct lapic_enumerator
*e
;
1119 int error
, i
, ap_max
;
1121 KKASSERT(lapic_enable
);
1123 lapic_eoi
= lapic_mem_eoi
;
1124 apic_ipi
= lapic_mem_ipi
;
1125 single_apic_ipi
= lapic_mem_single_ipi
;
1127 TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_enable
);
1128 if (x2apic_enable
< 0)
1130 if ((cpu_feature2
& CPUID2_X2APIC
) == 0) {
1131 /* X2APIC is not supported. */
1135 * If the BIOS enabled the X2APIC mode, then we would stick
1136 * with the X2APIC mode.
1138 apic_base
= rdmsr(MSR_APICBASE
);
1139 if (apic_base
& APICBASE_X2APIC
) {
1140 if (x2apic_enable
== 0)
1141 kprintf("LAPIC: BIOS enabled X2APIC mode, force on\n");
1143 kprintf("LAPIC: BIOS enabled X2APIC mode\n");
1147 if (cpu_feature2
& CPUID2_X2APIC
) {
1148 apic_base
= rdmsr(MSR_APICBASE
);
1149 if (apic_base
& APICBASE_X2APIC
)
1150 kprintf("LAPIC: BIOS already enabled X2APIC mode\n");
1153 if (x2apic_enable
) {
1155 * Enter X2APIC mode.
1157 kprintf("LAPIC: enter X2APIC mode\n");
1158 lapic_x2apic_enter(TRUE
);
1161 for (i
= 0; i
< NAPICID
; ++i
)
1162 APICID_TO_CPUID(i
) = -1;
1164 TAILQ_FOREACH(e
, &lapic_enumerators
, lapic_link
) {
1165 error
= e
->lapic_probe(e
);
1170 kprintf("LAPIC: Can't find LAPIC\n");
1174 error
= e
->lapic_enumerate(e
);
1176 kprintf("LAPIC: enumeration failed\n");
1180 /* LAPIC is usable now. */
1183 ap_max
= MAXCPU
- 1;
1184 TUNABLE_INT_FETCH("hw.ap_max", &ap_max
);
1185 if (ap_max
> MAXCPU
- 1)
1186 ap_max
= MAXCPU
- 1;
1188 if (naps
> ap_max
) {
1189 kprintf("LAPIC: Warning use only %d out of %d "
1199 lapic_enumerator_register(struct lapic_enumerator
*ne
)
1201 struct lapic_enumerator
*e
;
1203 TAILQ_FOREACH(e
, &lapic_enumerators
, lapic_link
) {
1204 if (e
->lapic_prio
< ne
->lapic_prio
) {
1205 TAILQ_INSERT_BEFORE(e
, ne
, lapic_link
);
1209 TAILQ_INSERT_TAIL(&lapic_enumerators
, ne
, lapic_link
);
1213 lapic_set_cpuid(int cpu_id
, int apic_id
)
1215 CPUID_TO_APICID(cpu_id
) = apic_id
;
1216 APICID_TO_CPUID(apic_id
) = cpu_id
;
1220 lapic_fixup_noioapic(void)
1224 /* Only allowed on BSP */
1225 KKASSERT(mycpuid
== 0);
1226 KKASSERT(!ioapic_enable
);
1228 temp
= LAPIC_READ(lvt_lint0
);
1229 temp
&= ~APIC_LVT_MASKED
;
1230 LAPIC_WRITE(lvt_lint0
, temp
);
1232 temp
= LAPIC_READ(lvt_lint1
);
1233 temp
|= APIC_LVT_MASKED
;
1234 LAPIC_WRITE(lvt_lint1
, temp
);
1241 LAPIC_MEM_WRITE(eoi
, 0);
1248 LAPIC_MSR_WRITE(MSR_X2APIC_EOI
, 0);
1252 lapic_mem_seticr_sync(uint32_t apic_id
, uint32_t icr_lo_val
)
1254 lapic_mem_icr_set(apic_id
, icr_lo_val
);
1255 while (LAPIC_MEM_READ(icr_lo
) & APIC_DELSTAT_PEND
)
1260 lapic_seticr_sync(uint32_t apic_id
, uint32_t icr_lo_val
)
1263 lapic_msr_icr_set(apic_id
, icr_lo_val
);
1265 lapic_mem_seticr_sync(apic_id
, icr_lo_val
);
1269 lapic_sysinit(void *dummy __unused
)
1274 error
= lapic_config();
1282 /* Initialize BSP's local APIC */
1284 } else if (ioapic_enable
) {
1285 kprintf("IOAPIC disabled - lapic was not enabled\n");
1287 icu_reinit_noioapic();
1290 SYSINIT(lapic
, SI_BOOT2_LAPIC
, SI_ORDER_FIRST
, lapic_sysinit
, NULL
);