Sync ACPICA with Intel's version 20180508 (from previously 20170831).
[dragonfly.git] / sys / platform / pc64 / apic / lapic.c
blob9d8b166e25a1890b7872f6f2c1dd853bde38f9a4
1 /*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/ktr.h>
32 #include <sys/bus.h>
33 #include <sys/machintr.h>
34 #include <machine/globaldata.h>
35 #include <machine/clock.h>
36 #include <machine/limits.h>
37 #include <machine/smp.h>
38 #include <machine/md_var.h>
39 #include <machine/pmap.h>
40 #include <machine/specialreg.h>
41 #include <machine_base/apic/lapic.h>
42 #include <machine_base/apic/ioapic.h>
43 #include <machine_base/apic/ioapic_abi.h>
44 #include <machine_base/apic/apicvar.h>
45 #include <machine_base/icu/icu_var.h>
46 #include <machine/segments.h>
47 #include <sys/thread2.h>
48 #include <sys/spinlock2.h>
50 #include <machine/cputypes.h>
51 #include <machine/intr_machdep.h>
53 #if !defined(KTR_LAPIC)
54 #define KTR_LAPIC KTR_ALL
55 #endif
56 KTR_INFO_MASTER(lapic);
57 KTR_INFO(KTR_LAPIC, lapic, eoi, 0, "eoi");
58 #define log_lapic(name) KTR_LOG(lapic_ ## name)
60 extern int naps;
62 volatile lapic_t *lapic;
64 static void lapic_timer_calibrate(void);
65 static void lapic_timer_set_divisor(int);
66 static void lapic_timer_fixup_handler(void *);
67 static void lapic_timer_restart_handler(void *);
70 static int lapic_timer_enable = 1;
71 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable);
73 static int lapic_timer_tscdeadline = 1;
74 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline);
76 static int lapic_calibrate_test = 0;
77 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test);
79 static int lapic_calibrate_fast = 1;
80 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast);
82 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t);
83 static void lapic_timer_intr_reload(struct cputimer_intr *, sysclock_t);
84 static void lapic_timer_intr_enable(struct cputimer_intr *);
85 static void lapic_timer_intr_restart(struct cputimer_intr *);
86 static void lapic_timer_intr_pmfixup(struct cputimer_intr *);
88 static struct cputimer_intr lapic_cputimer_intr = {
89 .freq = 0,
90 .reload = lapic_timer_intr_reload,
91 .enable = lapic_timer_intr_enable,
92 .config = cputimer_intr_default_config,
93 .restart = lapic_timer_intr_restart,
94 .pmfixup = lapic_timer_intr_pmfixup,
95 .initclock = cputimer_intr_default_initclock,
96 .pcpuhand = NULL,
97 .next = SLIST_ENTRY_INITIALIZER,
98 .name = "lapic",
99 .type = CPUTIMER_INTR_LAPIC,
100 .prio = CPUTIMER_INTR_PRIO_LAPIC,
101 .caps = CPUTIMER_INTR_CAP_NONE,
102 .priv = NULL
105 static int lapic_timer_divisor_idx = -1;
106 static const uint32_t lapic_timer_divisors[] = {
107 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
108 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1
110 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors))
112 static int lapic_use_tscdeadline = 0;
113 /* The raw TSC frequency might not fit into a sysclock_t value. */
114 static int lapic_timer_tscfreq_shift;
117 * APIC ID <-> CPU ID mapping structures.
119 int cpu_id_to_apic_id[NAPICID];
120 int apic_id_to_cpu_id[NAPICID];
121 int lapic_enable = 1;
123 /* Separate cachelines for each cpu's info. */
124 struct deadlines {
125 uint64_t timestamp;
126 uint64_t downcount_time;
127 uint64_t padding[6];
129 struct deadlines *tsc_deadlines = NULL;
131 static void lapic_eoi_func(void);
133 void (*lapic_eoi)(void);
136 * Enable LAPIC, configure interrupts.
138 void
139 lapic_init(boolean_t bsp)
141 uint32_t timer;
142 u_int temp;
144 if (bsp) {
145 /* Decide whether we want to use TSC Deadline mode. */
146 if (lapic_timer_tscdeadline != 0 &&
147 (cpu_feature2 & CPUID2_TSCDLT) &&
148 tsc_invariant && tsc_frequency != 0) {
149 lapic_use_tscdeadline = 1;
150 tsc_deadlines = kmalloc_cachealign(
151 sizeof(struct deadlines) * (naps + 1),
152 M_DEVBUF, M_WAITOK | M_ZERO);
157 * Install vectors
159 * Since IDT is shared between BSP and APs, these vectors
160 * only need to be installed once; we do it on BSP.
162 if (bsp) {
163 if (cpu_vendor_id == CPU_VENDOR_AMD &&
164 CPUID_TO_FAMILY(cpu_id) >= 0x0f &&
165 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */
166 uint32_t tcr;
169 * Set the LINTEN bit in the HyperTransport
170 * Transaction Control Register.
172 * This will cause EXTINT and NMI interrupts
173 * routed over the hypertransport bus to be
174 * fed into the LAPIC LINT0/LINT1. If the bit
175 * isn't set, the interrupts will go to the
176 * general cpu INTR/NMI pins. On a dual-core
177 * cpu the interrupt winds up going to BOTH cpus.
178 * The first cpu that does the interrupt ack
179 * cycle will get the correct interrupt. The
180 * second cpu that does it will get a spurious
181 * interrupt vector (typically IRQ 7).
183 outl(0x0cf8,
184 (1 << 31) | /* enable */
185 (0 << 16) | /* bus */
186 (0x18 << 11) | /* dev (cpu + 0x18) */
187 (0 << 8) | /* func */
188 0x68 /* reg */
190 tcr = inl(0xcfc);
191 if ((tcr & 0x00010000) == 0) {
192 kprintf("LAPIC: AMD LINTEN on\n");
193 outl(0xcfc, tcr|0x00010000);
195 outl(0x0cf8, 0);
198 /* Install a 'Spurious INTerrupt' vector */
199 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint,
200 SDT_SYSIGT, SEL_KPL, 0);
202 /* Install a timer vector */
203 setidt_global(XTIMER_OFFSET, Xtimer,
204 SDT_SYSIGT, SEL_KPL, 0);
206 /* Install an inter-CPU IPI for TLB invalidation */
207 setidt_global(XINVLTLB_OFFSET, Xinvltlb,
208 SDT_SYSIGT, SEL_KPL, 0);
210 /* Install an inter-CPU IPI for IPIQ messaging */
211 setidt_global(XIPIQ_OFFSET, Xipiq,
212 SDT_SYSIGT, SEL_KPL, 0);
214 /* Install an inter-CPU IPI for CPU stop/restart */
215 setidt_global(XCPUSTOP_OFFSET, Xcpustop,
216 SDT_SYSIGT, SEL_KPL, 0);
218 /* Install an inter-CPU IPI for TLB invalidation */
219 setidt_global(XSNIFF_OFFSET, Xsniff,
220 SDT_SYSIGT, SEL_KPL, 0);
224 * Setup LINT0 as ExtINT on the BSP. This is theoretically an
225 * aggregate interrupt input from the 8259. The INTA cycle
226 * will be routed to the external controller (the 8259) which
227 * is expected to supply the vector.
229 * Must be setup edge triggered, active high.
231 * Disable LINT0 on BSP, if I/O APIC is enabled.
233 * Disable LINT0 on the APs. It doesn't matter what delivery
234 * mode we use because we leave it masked.
236 temp = lapic->lvt_lint0;
237 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK |
238 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK);
239 if (bsp) {
240 temp |= APIC_LVT_DM_EXTINT;
241 if (ioapic_enable)
242 temp |= APIC_LVT_MASKED;
243 } else {
244 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED;
246 lapic->lvt_lint0 = temp;
249 * Setup LINT1 as NMI.
251 * Must be setup edge trigger, active high.
253 * Enable LINT1 on BSP, if I/O APIC is enabled.
255 * Disable LINT1 on the APs.
257 temp = lapic->lvt_lint1;
258 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK |
259 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK);
260 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI;
261 if (bsp && ioapic_enable)
262 temp &= ~APIC_LVT_MASKED;
263 lapic->lvt_lint1 = temp;
266 * Mask the LAPIC error interrupt, LAPIC performance counter
267 * interrupt.
269 lapic->lvt_error = lapic->lvt_error | APIC_LVT_MASKED;
270 lapic->lvt_pcint = lapic->lvt_pcint | APIC_LVT_MASKED;
273 * Set LAPIC timer vector and mask the LAPIC timer interrupt.
275 timer = lapic->lvt_timer;
276 timer &= ~APIC_LVTT_VECTOR;
277 timer |= XTIMER_OFFSET;
278 timer |= APIC_LVTT_MASKED;
279 lapic->lvt_timer = timer;
282 * Set the Task Priority Register as needed. At the moment allow
283 * interrupts on all cpus (the APs will remain CLId until they are
284 * ready to deal).
286 temp = lapic->tpr;
287 temp &= ~APIC_TPR_PRIO; /* clear priority field */
288 lapic->tpr = temp;
291 * AMD specific setup
293 if (cpu_vendor_id == CPU_VENDOR_AMD &&
294 (lapic->version & APIC_VER_AMD_EXT_SPACE)) {
295 uint32_t ext_feat;
296 uint32_t count;
297 uint32_t max_count;
298 uint32_t lvt;
299 uint32_t i;
301 ext_feat = lapic->ext_feat;
302 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT;
303 max_count = sizeof(lapic->ext_lvt) / sizeof(lapic->ext_lvt[0]);
304 if (count > max_count)
305 count = max_count;
306 for (i = 0; i < count; ++i) {
307 lvt = lapic->ext_lvt[i].lvt;
309 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK |
310 APIC_LVT_DM_MASK | APIC_LVT_MASKED);
311 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED;
313 switch(i) {
314 case APIC_EXTLVT_IBS:
315 break;
316 case APIC_EXTLVT_MCA:
317 break;
318 case APIC_EXTLVT_DEI:
319 break;
320 case APIC_EXTLVT_SBI:
321 break;
322 default:
323 break;
325 if (bsp) {
326 kprintf(" LAPIC AMD elvt%d: 0x%08x",
327 i, lapic->ext_lvt[i].lvt);
328 if (lapic->ext_lvt[i].lvt != lvt)
329 kprintf(" -> 0x%08x", lvt);
330 kprintf("\n");
332 lapic->ext_lvt[i].lvt = lvt;
337 * Enable the LAPIC
339 temp = lapic->svr;
340 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */
341 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */
343 if (lapic->version & APIC_VER_EOI_SUPP) {
344 if (temp & APIC_SVR_EOI_SUPP) {
345 temp &= ~APIC_SVR_EOI_SUPP;
346 if (bsp)
347 kprintf(" LAPIC disabling EOI supp\n");
352 * Set the spurious interrupt vector. The low 4 bits of the vector
353 * must be 1111.
355 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F)
356 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET);
357 temp &= ~APIC_SVR_VECTOR;
358 temp |= XSPURIOUSINT_OFFSET;
360 lapic->svr = temp;
363 * Pump out a few EOIs to clean out interrupts that got through
364 * before we were able to set the TPR.
366 lapic->eoi = 0;
367 lapic->eoi = 0;
368 lapic->eoi = 0;
370 if (bsp) {
371 lapic_timer_calibrate();
372 if (lapic_timer_enable) {
373 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) {
375 * Local APIC timer will not stop
376 * in deep C-state.
378 lapic_cputimer_intr.caps |=
379 CPUTIMER_INTR_CAP_PS;
381 if (lapic_use_tscdeadline) {
382 lapic_cputimer_intr.reload =
383 lapic_timer_tscdlt_reload;
385 cputimer_intr_register(&lapic_cputimer_intr);
386 cputimer_intr_select(&lapic_cputimer_intr, 0);
388 } else if (!lapic_use_tscdeadline) {
389 lapic_timer_set_divisor(lapic_timer_divisor_idx);
392 if (bootverbose)
393 apic_dump("apic_initialize()");
396 static void
397 lapic_timer_set_divisor(int divisor_idx)
399 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS);
400 lapic->dcr_timer = lapic_timer_divisors[divisor_idx];
403 static void
404 lapic_timer_oneshot(u_int count)
406 uint32_t value;
408 value = lapic->lvt_timer;
409 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT);
410 lapic->lvt_timer = value;
411 lapic->icr_timer = count;
414 static void
415 lapic_timer_oneshot_quick(u_int count)
417 lapic->icr_timer = count;
420 static void
421 lapic_timer_tscdeadline_quick(uint64_t diff)
423 uint64_t val = rdtsc() + diff;
425 wrmsr(MSR_TSC_DEADLINE, val);
426 tsc_deadlines[mycpuid].timestamp = val;
429 static uint64_t
430 lapic_scale_to_tsc(unsigned value, unsigned scale)
432 uint64_t val;
434 val = value;
435 val *= tsc_frequency;
436 val += (scale - 1);
437 val /= scale;
438 return val;
441 #define MAX_MEASURE_RETRIES 100
443 static u_int64_t
444 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc)
446 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2;
447 u_int64_t diff, count;
448 u_int64_t a;
449 u_int32_t start, end;
450 int retries1 = 0, retries2 = 0;
452 retry1:
453 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT);
454 old_tsc1 = rdtsc_ordered();
455 start = lapic->ccr_timer;
456 old_tsc2 = rdtsc_ordered();
457 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES &&
458 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) {
459 retries1++;
460 goto retry1;
462 DELAY(us);
463 retry2:
464 new_tsc1 = rdtsc_ordered();
465 end = lapic->ccr_timer;
466 new_tsc2 = rdtsc_ordered();
467 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES &&
468 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) {
469 retries2++;
470 goto retry2;
472 if (end == 0)
473 return 0;
475 count = start - end;
477 /* Make sure the lapic can count for up to 2s */
478 a = (unsigned)APIC_TIMER_MAX_COUNT;
479 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us)
480 return 0;
482 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) {
483 kprintf("%s: retries1=%d retries2=%d\n",
484 __func__, retries1, retries2);
487 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2);
488 /* XXX First estimate if the total TSC diff value makes sense */
489 /* This will almost overflow, but only almost :) */
490 count = (2 * count * tsc_frequency) / diff;
492 return count;
495 static uint64_t
496 do_cputimer_calibration(u_int us)
498 sysclock_t value;
499 sysclock_t start, end, beginning, finish;
501 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT);
502 beginning = lapic->ccr_timer;
503 start = sys_cputimer->count();
504 DELAY(us);
505 end = sys_cputimer->count();
506 finish = lapic->ccr_timer;
507 if (finish == 0)
508 return 0;
509 /* value is the LAPIC timer difference. */
510 value = beginning - finish;
511 /* end is the sys_cputimer difference. */
512 end -= start;
513 if (end == 0)
514 return 0;
515 value = ((uint64_t)value * sys_cputimer->freq) / end;
516 return value;
519 static void
520 lapic_timer_calibrate(void)
522 sysclock_t value;
523 u_int64_t apic_delay_tsc = 0;
524 int use_tsc_calibration = 0;
526 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */
527 if (lapic_use_tscdeadline) {
528 lapic_timer_tscfreq_shift = 0;
529 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX)
530 lapic_timer_tscfreq_shift++;
531 lapic_cputimer_intr.freq =
532 tsc_frequency >> lapic_timer_tscfreq_shift;
533 kprintf(
534 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n",
535 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq);
536 return;
540 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in
541 * a virtual machine the frequency may get changed by the host.
543 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast)
544 use_tsc_calibration = 1;
546 if (use_tsc_calibration) {
547 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0;
548 u_int64_t old_tsc, new_tsc;
549 sysclock_t val;
550 int i;
552 /* warm up */
553 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT);
554 for (i = 0; i < 10; i++)
555 val = lapic->ccr_timer;
557 for (i = 0; i < 100; i++) {
558 old_tsc = rdtsc_ordered();
559 val = lapic->ccr_timer;
560 new_tsc = rdtsc_ordered();
561 new_tsc -= old_tsc;
562 apic_delay_tsc += new_tsc;
563 if (min_apic_tsc == 0 ||
564 min_apic_tsc > new_tsc) {
565 min_apic_tsc = new_tsc;
567 if (max_apic_tsc < new_tsc)
568 max_apic_tsc = new_tsc;
570 apic_delay_tsc /= 100;
571 kprintf(
572 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n",
573 apic_delay_tsc, min_apic_tsc, max_apic_tsc);
574 apic_delay_tsc = min_apic_tsc;
577 if (!use_tsc_calibration) {
578 int i;
581 * Do some exercising of the lapic timer access. This improves
582 * precision of the subsequent calibration run in at least some
583 * virtualization cases.
585 lapic_timer_set_divisor(0);
586 for (i = 0; i < 10; i++)
587 (void)do_cputimer_calibration(100);
589 /* Try to calibrate the local APIC timer. */
590 for (lapic_timer_divisor_idx = 0;
591 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS;
592 lapic_timer_divisor_idx++) {
593 lapic_timer_set_divisor(lapic_timer_divisor_idx);
594 if (use_tsc_calibration) {
595 value = do_tsc_calibration(200*1000, apic_delay_tsc);
596 } else {
597 value = do_cputimer_calibration(2*1000*1000);
599 if (value != 0)
600 break;
602 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS)
603 panic("lapic: no proper timer divisor?!");
604 lapic_cputimer_intr.freq = value;
606 kprintf("lapic: divisor index %d, frequency %u Hz\n",
607 lapic_timer_divisor_idx, lapic_cputimer_intr.freq);
609 if (lapic_calibrate_test > 0) {
610 uint64_t freq;
611 int i;
613 for (i = 1; i <= 20; i++) {
614 if (use_tsc_calibration) {
615 freq = do_tsc_calibration(i*100*1000,
616 apic_delay_tsc);
617 } else {
618 freq = do_cputimer_calibration(i*100*1000);
620 if (freq != 0)
621 kprintf("%ums: %lu\n", i * 100, freq);
626 static void
627 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload)
629 struct globaldata *gd = mycpu;
630 uint64_t diff, now, val;
632 if (reload > 1000*1000*1000)
633 reload = 1000*1000*1000;
634 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq;
635 if (diff < 4)
636 diff = 4;
637 if (cpu_vendor_id == CPU_VENDOR_INTEL)
638 cpu_lfence();
639 else
640 cpu_mfence();
641 now = rdtsc();
642 val = now + diff;
643 if (gd->gd_timer_running) {
644 uint64_t deadline = tsc_deadlines[mycpuid].timestamp;
645 if (deadline == 0 || now > deadline || val < deadline) {
646 wrmsr(MSR_TSC_DEADLINE, val);
647 tsc_deadlines[mycpuid].timestamp = val;
649 } else {
650 gd->gd_timer_running = 1;
651 wrmsr(MSR_TSC_DEADLINE, val);
652 tsc_deadlines[mycpuid].timestamp = val;
656 static void
657 lapic_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
659 struct globaldata *gd = mycpu;
661 reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
662 if (reload < 2)
663 reload = 2;
665 if (gd->gd_timer_running) {
666 if (reload < lapic->ccr_timer)
667 lapic_timer_oneshot_quick(reload);
668 } else {
669 gd->gd_timer_running = 1;
670 lapic_timer_oneshot_quick(reload);
674 static void
675 lapic_timer_intr_enable(struct cputimer_intr *cti __unused)
677 uint32_t timer;
679 timer = lapic->lvt_timer;
680 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT);
681 if (lapic_use_tscdeadline)
682 timer |= APIC_LVTT_TSCDLT;
683 lapic->lvt_timer = timer;
684 if (lapic_use_tscdeadline)
685 cpu_mfence();
687 lapic_timer_fixup_handler(NULL);
690 static void
691 lapic_timer_fixup_handler(void *arg)
693 int *started = arg;
695 if (started != NULL)
696 *started = 0;
698 if (cpu_vendor_id == CPU_VENDOR_AMD) {
700 * Detect the presence of C1E capability mostly on latest
701 * dual-cores (or future) k8 family. This feature renders
702 * the local APIC timer dead, so we disable it by reading
703 * the Interrupt Pending Message register and clearing both
704 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
706 * Reference:
707 * "BIOS and Kernel Developer's Guide for AMD NPT
708 * Family 0Fh Processors"
709 * #32559 revision 3.00
711 if ((cpu_id & 0x00000f00) == 0x00000f00 &&
712 (cpu_id & 0x0fff0000) >= 0x00040000) {
713 uint64_t msr;
715 msr = rdmsr(0xc0010055);
716 if (msr & 0x18000000) {
717 struct globaldata *gd = mycpu;
719 kprintf("cpu%d: AMD C1E detected\n",
720 gd->gd_cpuid);
721 wrmsr(0xc0010055, msr & ~0x18000000ULL);
724 * We are kinda stalled;
725 * kick start again.
727 gd->gd_timer_running = 1;
728 if (lapic_use_tscdeadline) {
729 /* Maybe reached in Virtual Machines? */
730 lapic_timer_tscdeadline_quick(5000);
731 } else {
732 lapic_timer_oneshot_quick(2);
735 if (started != NULL)
736 *started = 1;
742 static void
743 lapic_timer_restart_handler(void *dummy __unused)
745 int started;
747 lapic_timer_fixup_handler(&started);
748 if (!started) {
749 struct globaldata *gd = mycpu;
751 gd->gd_timer_running = 1;
752 if (lapic_use_tscdeadline) {
753 /* Maybe reached in Virtual Machines? */
754 lapic_timer_tscdeadline_quick(5000);
755 } else {
756 lapic_timer_oneshot_quick(2);
762 * This function is called only by ACPICA code currently:
763 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI
764 * module controls PM. So once ACPICA is attached, we try
765 * to apply the fixup to prevent LAPIC timer from hanging.
767 static void
768 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused)
770 lwkt_send_ipiq_mask(smp_active_mask,
771 lapic_timer_fixup_handler, NULL);
774 static void
775 lapic_timer_intr_restart(struct cputimer_intr *cti __unused)
777 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL);
782 * dump contents of local APIC registers
784 void
785 apic_dump(char* str)
787 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str);
788 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
789 lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
793 * Inter Processor Interrupt functions.
797 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'.
799 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF
800 * vector is any valid SYSTEM INT vector
801 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO
803 * WARNINGS!
805 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than
806 * one IPI from being sent to any given cpu at a time. Thus we no longer
807 * have to process incoming IPIs while waiting for the status to clear.
808 * No deadlock should be possible.
810 * We now physically disable interrupts for the lapic ICR operation. If
811 * we do not do this then it looks like an EOI sent to the lapic (which
812 * occurs even with a critical section) can interfere with the command
813 * register ready status and cause an IPI to be lost.
815 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command
816 * register to busy just before we write to icr_lo, resulting in a lost
817 * issuance. This only appears to occur on Intel cpus and is not
818 * documented. It could simply be that cpus are so fast these days that
819 * it was always an issue, but is only now rearing its ugly head. This
820 * is conjecture.
823 apic_ipi(int dest_type, int vector, int delivery_mode)
825 uint32_t icr_hi;
826 uint32_t icr_lo;
827 int64_t tsc;
828 int loops = 1;
830 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
831 tsc = rdtsc();
832 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
833 cpu_pause();
834 if ((tsc_sclock_t)(rdtsc() -
835 (tsc + tsc_frequency)) > 0) {
836 kprintf("apic_ipi stall cpu %d (sing)\n",
837 mycpuid);
838 tsc = rdtsc();
839 if (++loops > 30)
840 panic("apic stall");
844 icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
845 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | dest_type |
846 APIC_LEVEL_ASSERT | delivery_mode | vector;
847 lapic->icr_hi = icr_hi;
848 lapic->icr_lo = icr_lo;
850 return 0;
854 * Interrupts must be hard-disabled by caller
856 void
857 single_apic_ipi(int cpu, int vector, int delivery_mode)
859 uint32_t icr_lo;
860 uint32_t icr_hi;
861 int64_t tsc;
862 int loops = 1;
864 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
865 tsc = rdtsc();
866 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
867 cpu_pause();
868 if ((tsc_sclock_t)(rdtsc() -
869 (tsc + tsc_frequency)) > 0) {
870 kprintf("single_apic_ipi stall cpu %d (sing)\n",
871 mycpuid);
872 tsc = rdtsc();
873 if (++loops > 30)
874 panic("apic stall");
878 icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
879 icr_hi |= (CPUID_TO_APICID(cpu) << 24);
881 /* build ICR_LOW */
882 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) |
883 APIC_LEVEL_ASSERT | APIC_DEST_DESTFLD | delivery_mode | vector;
885 /* write APIC ICR */
886 lapic->icr_hi = icr_hi;
887 lapic->icr_lo = icr_lo;
890 #if 0
893 * Returns 0 if the apic is busy, 1 if we were able to queue the request.
895 * NOT WORKING YET! The code as-is may end up not queueing an IPI at all
896 * to the target, and the scheduler does not 'poll' for IPI messages.
899 single_apic_ipi_passive(int cpu, int vector, int delivery_mode)
901 u_long icr_lo;
902 u_long icr_hi;
903 unsigned long rflags;
905 rflags = read_rflags();
906 cpu_disable_intr();
907 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
908 write_rflags(rflags);
909 return(0);
911 icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
912 icr_hi |= (CPUID_TO_APICID(cpu) << 24);
913 lapic->icr_hi = icr_hi;
915 /* build IRC_LOW */
916 icr_lo = (lapic->icr_lo & APIC_RESV2_MASK) |
917 APIC_DEST_DESTFLD | delivery_mode | vector;
919 /* write APIC ICR */
920 lapic->icr_lo = icr_lo;
921 write_rflags(rflags);
923 return(1);
926 #endif
929 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'.
931 * target is a bitmask of destination cpus. Vector is any
932 * valid system INT vector. Delivery mode may be either
933 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO.
935 * Interrupts must be hard-disabled by caller
937 void
938 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode)
940 while (CPUMASK_TESTNZERO(target)) {
941 int n = BSFCPUMASK(target);
942 CPUMASK_NANDBIT(target, n);
943 single_apic_ipi(n, vector, delivery_mode);
948 * Load a 'downcount time' in uSeconds.
950 void
951 set_apic_timer(int us)
953 u_int count;
955 if (lapic_use_tscdeadline) {
956 uint64_t val;
958 val = lapic_scale_to_tsc(us, 1000000);
959 val += rdtsc();
960 /* No need to arm the lapic here, just track the timeout. */
961 tsc_deadlines[mycpuid].downcount_time = val;
962 return;
966 * When we reach here, lapic timer's frequency
967 * must have been calculated as well as the
968 * divisor (lapic->dcr_timer is setup during the
969 * divisor calculation).
971 KKASSERT(lapic_cputimer_intr.freq != 0 &&
972 lapic_timer_divisor_idx >= 0);
974 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000;
975 lapic_timer_oneshot(count);
980 * Read remaining time in timer, in microseconds (rounded up).
983 read_apic_timer(void)
985 uint64_t val;
987 if (lapic_use_tscdeadline) {
988 uint64_t now;
990 val = tsc_deadlines[mycpuid].downcount_time;
991 now = rdtsc();
992 if (val == 0 || now > val) {
993 return 0;
994 } else {
995 val -= now;
996 val *= 1000000;
997 val += (tsc_frequency - 1);
998 val /= tsc_frequency;
999 if (val > INT_MAX)
1000 val = INT_MAX;
1001 return val;
1005 val = lapic->ccr_timer;
1006 if (val == 0)
1007 return 0;
1009 KKASSERT(lapic_cputimer_intr.freq > 0);
1010 val *= 1000000;
1011 val += (lapic_cputimer_intr.freq - 1);
1012 val /= lapic_cputimer_intr.freq;
1013 if (val > INT_MAX)
1014 val = INT_MAX;
1015 return val;
1020 * Spin-style delay, set delay time in uS, spin till it drains.
1022 void
1023 u_sleep(int count)
1025 set_apic_timer(count);
1026 while (read_apic_timer())
1027 /* spin */ ;
1031 lapic_unused_apic_id(int start)
1033 int i;
1035 for (i = start; i < APICID_MAX; ++i) {
1036 if (APICID_TO_CPUID(i) == -1)
1037 return i;
1039 return NAPICID;
1042 void
1043 lapic_map(vm_paddr_t lapic_addr)
1045 lapic = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC));
1048 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators =
1049 TAILQ_HEAD_INITIALIZER(lapic_enumerators);
1052 lapic_config(void)
1054 struct lapic_enumerator *e;
1055 int error, i, ap_max;
1057 KKASSERT(lapic_enable);
1059 for (i = 0; i < NAPICID; ++i)
1060 APICID_TO_CPUID(i) = -1;
1062 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) {
1063 error = e->lapic_probe(e);
1064 if (!error)
1065 break;
1067 if (e == NULL) {
1068 kprintf("LAPIC: Can't find LAPIC\n");
1069 return ENXIO;
1072 error = e->lapic_enumerate(e);
1073 if (error) {
1074 kprintf("LAPIC: enumeration failed\n");
1075 return ENXIO;
1078 ap_max = MAXCPU - 1;
1079 TUNABLE_INT_FETCH("hw.ap_max", &ap_max);
1080 if (ap_max > MAXCPU - 1)
1081 ap_max = MAXCPU - 1;
1083 if (naps > ap_max) {
1084 kprintf("LAPIC: Warning use only %d out of %d "
1085 "available APs\n",
1086 ap_max, naps);
1087 naps = ap_max;
1090 return 0;
1093 void
1094 lapic_enumerator_register(struct lapic_enumerator *ne)
1096 struct lapic_enumerator *e;
1098 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) {
1099 if (e->lapic_prio < ne->lapic_prio) {
1100 TAILQ_INSERT_BEFORE(e, ne, lapic_link);
1101 return;
1104 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link);
1107 void
1108 lapic_set_cpuid(int cpu_id, int apic_id)
1110 CPUID_TO_APICID(cpu_id) = apic_id;
1111 APICID_TO_CPUID(apic_id) = cpu_id;
1114 void
1115 lapic_fixup_noioapic(void)
1117 u_int temp;
1119 /* Only allowed on BSP */
1120 KKASSERT(mycpuid == 0);
1121 KKASSERT(!ioapic_enable);
1123 temp = lapic->lvt_lint0;
1124 temp &= ~APIC_LVT_MASKED;
1125 lapic->lvt_lint0 = temp;
1127 temp = lapic->lvt_lint1;
1128 temp |= APIC_LVT_MASKED;
1129 lapic->lvt_lint1 = temp;
1132 static void
1133 lapic_eoi_func(void)
1135 log_lapic(eoi);
1136 lapic->eoi = 0;
1139 static void
1140 lapic_sysinit(void *dummy __unused)
1142 if (lapic_enable) {
1143 int error;
1145 lapic_eoi = lapic_eoi_func;
1147 error = lapic_config();
1148 if (error)
1149 lapic_enable = 0;
1152 if (lapic_enable) {
1153 /* Initialize BSP's local APIC */
1154 lapic_init(TRUE);
1155 } else if (ioapic_enable) {
1156 ioapic_enable = 0;
1157 icu_reinit_noioapic();
1160 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL);