1723 apix module mistakingly sets TPR
[illumos-gate.git] / usr / src / uts / i86pc / io / apix / apix.c
blob1e4b96657697fb070d1a3bd7bec924586844f1b0
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright (c) 2010, Intel Corporation.
27 * All rights reserved.
30 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
34 * To understand how the apix module interacts with the interrupt subsystem read
35 * the theory statement in uts/i86pc/os/intr.c.
39 * PSMI 1.1 extensions are supported only in 2.6 and later versions.
40 * PSMI 1.2 extensions are supported only in 2.7 and later versions.
41 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
42 * PSMI 1.5 extensions are supported in Solaris Nevada.
43 * PSMI 1.6 extensions are supported in Solaris Nevada.
44 * PSMI 1.7 extensions are supported in Solaris Nevada.
46 #define PSMI_1_7
48 #include <sys/processor.h>
49 #include <sys/time.h>
50 #include <sys/psm.h>
51 #include <sys/smp_impldefs.h>
52 #include <sys/cram.h>
53 #include <sys/acpi/acpi.h>
54 #include <sys/acpica.h>
55 #include <sys/psm_common.h>
56 #include <sys/pit.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/ddi_impldefs.h>
60 #include <sys/pci.h>
61 #include <sys/promif.h>
62 #include <sys/x86_archext.h>
63 #include <sys/cpc_impl.h>
64 #include <sys/uadmin.h>
65 #include <sys/panic.h>
66 #include <sys/debug.h>
67 #include <sys/archsystm.h>
68 #include <sys/trap.h>
69 #include <sys/machsystm.h>
70 #include <sys/sysmacros.h>
71 #include <sys/cpuvar.h>
72 #include <sys/rm_platter.h>
73 #include <sys/privregs.h>
74 #include <sys/note.h>
75 #include <sys/pci_intr_lib.h>
76 #include <sys/spl.h>
77 #include <sys/clock.h>
78 #include <sys/dditypes.h>
79 #include <sys/sunddi.h>
80 #include <sys/x_call.h>
81 #include <sys/reboot.h>
82 #include <sys/mach_intr.h>
83 #include <sys/apix.h>
84 #include <sys/apix_irm_impl.h>
86 static int apix_probe();
87 static void apix_init();
88 static void apix_picinit(void);
89 static int apix_intr_enter(int, int *);
90 static void apix_intr_exit(int, int);
91 static void apix_setspl(int);
92 static int apix_disable_intr(processorid_t);
93 static void apix_enable_intr(processorid_t);
94 static int apix_get_clkvect(int);
95 static int apix_get_ipivect(int, int);
96 static void apix_post_cyclic_setup(void *);
97 static int apix_post_cpu_start();
98 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
99 psm_intr_op_t, int *);
102 * Helper functions for apix_intr_ops()
104 static void apix_redistribute_compute(void);
105 static int apix_get_pending(apix_vector_t *);
106 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
107 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
108 static char *apix_get_apic_type(void);
109 static int apix_intx_get_pending(int);
110 static void apix_intx_set_mask(int irqno);
111 static void apix_intx_clear_mask(int irqno);
112 static int apix_intx_get_shared(int irqno);
113 static void apix_intx_set_shared(int irqno, int delta);
114 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
115 struct intrspec *);
116 static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
118 extern int apic_clkinit(int);
120 /* IRM initialization for APIX PSM module */
121 extern void apix_irm_init(void);
123 extern int irm_enable;
126 * Local static data
128 static struct psm_ops apix_ops = {
129 apix_probe,
131 apix_init,
132 apix_picinit,
133 apix_intr_enter,
134 apix_intr_exit,
135 apix_setspl,
136 apix_addspl,
137 apix_delspl,
138 apix_disable_intr,
139 apix_enable_intr,
140 NULL, /* psm_softlvl_to_irq */
141 NULL, /* psm_set_softintr */
143 apic_set_idlecpu,
144 apic_unset_idlecpu,
146 apic_clkinit,
147 apix_get_clkvect,
148 NULL, /* psm_hrtimeinit */
149 apic_gethrtime,
151 apic_get_next_processorid,
152 apic_cpu_start,
153 apix_post_cpu_start,
154 apic_shutdown,
155 apix_get_ipivect,
156 apic_send_ipi,
158 NULL, /* psm_translate_irq */
159 NULL, /* psm_notify_error */
160 NULL, /* psm_notify_func */
161 apic_timer_reprogram,
162 apic_timer_enable,
163 apic_timer_disable,
164 apix_post_cyclic_setup,
165 apic_preshutdown,
166 apix_intr_ops, /* Advanced DDI Interrupt framework */
167 apic_state, /* save, restore apic state for S3 */
168 apic_cpu_ops, /* CPU control interface. */
171 struct psm_ops *psmops = &apix_ops;
173 static struct psm_info apix_psm_info = {
174 PSM_INFO_VER01_7, /* version */
175 PSM_OWN_EXCLUSIVE, /* ownership */
176 &apix_ops, /* operation */
177 APIX_NAME, /* machine name */
178 "apix MPv1.4 compatible",
181 static void *apix_hdlp;
183 static int apix_is_enabled = 0;
186 * Flag to indicate if APIX is to be enabled only for platforms
187 * with specific hw feature(s).
189 int apix_hw_chk_enable = 1;
192 * Hw features that are checked for enabling APIX support.
194 #define APIX_SUPPORT_X2APIC 0x00000001
195 uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
198 * apix_lock is used for cpu selection and vector re-binding
200 lock_t apix_lock;
201 apix_impl_t *apixs[NCPU];
203 * Mapping between device interrupt and the allocated vector. Indexed
204 * by major number.
206 apix_dev_vector_t **apix_dev_vector;
208 * Mapping between device major number and cpu id. It gets used
209 * when interrupt binding policy round robin with affinity is
210 * applied. With that policy, devices with the same major number
211 * will be bound to the same CPU.
213 processorid_t *apix_major_to_cpu; /* major to cpu mapping */
214 kmutex_t apix_mutex; /* for apix_dev_vector & apix_major_to_cpu */
216 int apix_nipis = 16; /* Maximum number of IPIs */
218 * Maximum number of vectors in a CPU that can be used for interrupt
219 * allocation (including IPIs and the reserved vectors).
221 int apix_cpu_nvectors = APIX_NVECTOR;
223 /* gcpu.h */
225 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
226 extern void apic_change_eoi();
229 * This is the loadable module wrapper
233 _init(void)
235 if (apic_coarse_hrtime)
236 apix_ops.psm_gethrtime = &apic_gettime;
237 return (psm_mod_init(&apix_hdlp, &apix_psm_info));
241 _fini(void)
243 return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
247 _info(struct modinfo *modinfop)
249 return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
252 static int
253 apix_probe()
255 int rval;
257 if (apix_enable == 0)
258 return (PSM_FAILURE);
260 /* check for hw features if specified */
261 if (apix_hw_chk_enable) {
262 /* check if x2APIC mode is supported */
263 if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
264 APIX_SUPPORT_X2APIC) {
265 if (!((apic_local_mode() == LOCAL_X2APIC) ||
266 apic_detect_x2apic())) {
267 /* x2APIC mode is not supported in the hw */
268 apix_enable = 0;
271 if (apix_enable == 0)
272 return (PSM_FAILURE);
275 rval = apic_probe_common(apix_psm_info.p_mach_idstring);
276 if (rval == PSM_SUCCESS)
277 apix_is_enabled = 1;
278 else
279 apix_is_enabled = 0;
280 return (rval);
284 * Initialize the data structures needed by pcplusmpx module.
285 * Specifically, the data structures used by addspl() and delspl()
286 * routines.
288 static void
289 apix_softinit()
291 int i, *iptr;
292 apix_impl_t *hdlp;
293 int nproc;
295 nproc = max(apic_nproc, apic_max_nproc);
297 hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
298 for (i = 0; i < nproc; i++) {
299 apixs[i] = &hdlp[i];
300 apixs[i]->x_cpuid = i;
301 LOCK_INIT_CLEAR(&apixs[i]->x_lock);
304 /* cpu 0 is always up (for now) */
305 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
307 iptr = (int *)&apic_irq_table[0];
308 for (i = 0; i <= APIC_MAX_VECTOR; i++) {
309 apic_level_intr[i] = 0;
310 *iptr++ = NULL;
312 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
314 apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
315 KM_SLEEP);
317 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
318 apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
319 KM_SLEEP);
320 for (i = 0; i < devcnt; i++)
321 apix_major_to_cpu[i] = IRQ_UNINIT;
324 mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
327 static int
328 apix_get_pending_spl(void)
330 int cpuid = CPU->cpu_id;
332 return (bsrw_insn(apixs[cpuid]->x_intr_pending));
335 static uintptr_t
336 apix_get_intr_handler(int cpu, short vec)
338 apix_vector_t *apix_vector;
340 ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
341 if (cpu >= apic_nproc)
342 return (NULL);
344 apix_vector = apixs[cpu]->x_vectbl[vec];
346 return ((uintptr_t)(apix_vector->v_autovect));
349 #if defined(__amd64)
350 static unsigned char dummy_cpu_pri[MAXIPL + 1] = {
351 0, 0, 0, 0, 0, 0, 0, 0,
352 0, 0, 0, 0, 0, 0, 0, 0, 0
354 #endif
356 static void
357 apix_init()
359 extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
361 APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
363 do_interrupt_common = apix_do_interrupt;
364 addintr = apix_add_avintr;
365 remintr = apix_rem_avintr;
366 get_pending_spl = apix_get_pending_spl;
367 get_intr_handler = apix_get_intr_handler;
368 psm_get_localapicid = apic_get_localapicid;
369 psm_get_ioapicid = apic_get_ioapicid;
371 apix_softinit();
372 #if defined(__amd64)
374 * Make cpu-specific interrupt info point to cr8pri vector
376 CPU->cpu_pri_data = dummy_cpu_pri;
377 #else
378 if (cpuid_have_cr8access(CPU))
379 apic_have_32bit_cr8 = 1;
380 #endif /* __amd64 */
383 * Initialize IRM pool parameters
385 if (irm_enable) {
386 int i;
387 int lowest_irq;
388 int highest_irq;
390 /* number of CPUs present */
391 apix_irminfo.apix_ncpus = apic_nproc;
392 /* total number of entries in all of the IOAPICs present */
393 lowest_irq = apic_io_vectbase[0];
394 highest_irq = apic_io_vectend[0];
395 for (i = 1; i < apic_io_max; i++) {
396 if (apic_io_vectbase[i] < lowest_irq)
397 lowest_irq = apic_io_vectbase[i];
398 if (apic_io_vectend[i] > highest_irq)
399 highest_irq = apic_io_vectend[i];
401 apix_irminfo.apix_ioapic_max_vectors =
402 highest_irq - lowest_irq + 1;
404 * Number of available per-CPU vectors excluding
405 * reserved vectors for Dtrace, int80, system-call,
406 * fast-trap, etc.
408 apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
409 APIX_SW_RESERVED_VECTORS;
411 /* Number of vectors (pre) allocated (SCI and HPET) */
412 apix_irminfo.apix_vectors_allocated = 0;
413 if (apic_hpet_vect != -1)
414 apix_irminfo.apix_vectors_allocated++;
415 if (apic_sci_vect != -1)
416 apix_irminfo.apix_vectors_allocated++;
420 static void
421 apix_init_intr()
423 processorid_t cpun = psm_get_cpu_id();
424 uint_t nlvt;
425 uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
426 extern void cmi_cmci_trap(void);
428 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
430 if (apic_mode == LOCAL_APIC) {
432 * We are running APIC in MMIO mode.
434 if (apic_flat_model) {
435 apic_reg_ops->apic_write(APIC_FORMAT_REG,
436 APIC_FLAT_MODEL);
437 } else {
438 apic_reg_ops->apic_write(APIC_FORMAT_REG,
439 APIC_CLUSTER_MODEL);
442 apic_reg_ops->apic_write(APIC_DEST_REG,
443 AV_HIGH_ORDER >> cpun);
446 if (apic_directed_EOI_supported()) {
448 * Setting the 12th bit in the Spurious Interrupt Vector
449 * Register suppresses broadcast EOIs generated by the local
450 * APIC. The suppression of broadcast EOIs happens only when
451 * interrupts are level-triggered.
453 svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
456 /* need to enable APIC before unmasking NMI */
457 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
460 * Presence of an invalid vector with delivery mode AV_FIXED can
461 * cause an error interrupt, even if the entry is masked...so
462 * write a valid vector to LVT entries along with the mask bit
465 /* All APICs have timer and LINT0/1 */
466 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
467 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
468 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI); /* enable NMI */
471 * On integrated APICs, the number of LVT entries is
472 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
473 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
476 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
477 nlvt = 3;
478 } else {
479 nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
480 0xFF) + 1;
483 if (nlvt >= 5) {
484 /* Enable performance counter overflow interrupt */
486 if (!is_x86_feature(x86_featureset, X86FSET_MSR))
487 apic_enable_cpcovf_intr = 0;
488 if (apic_enable_cpcovf_intr) {
489 if (apic_cpcovf_vect == 0) {
490 int ipl = APIC_PCINT_IPL;
492 apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
493 ASSERT(apic_cpcovf_vect);
495 (void) add_avintr(NULL, ipl,
496 (avfunc)kcpc_hw_overflow_intr,
497 "apic pcint", apic_cpcovf_vect,
498 NULL, NULL, NULL, NULL);
499 kcpc_hw_overflow_intr_installed = 1;
500 kcpc_hw_enable_cpc_intr =
501 apic_cpcovf_mask_clear;
503 apic_reg_ops->apic_write(APIC_PCINT_VECT,
504 apic_cpcovf_vect);
508 if (nlvt >= 6) {
509 /* Only mask TM intr if the BIOS apparently doesn't use it */
511 uint32_t lvtval;
513 lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
514 if (((lvtval & AV_MASK) == AV_MASK) ||
515 ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
516 apic_reg_ops->apic_write(APIC_THERM_VECT,
517 AV_MASK|APIC_RESV_IRQ);
521 /* Enable error interrupt */
523 if (nlvt >= 4 && apic_enable_error_intr) {
524 if (apic_errvect == 0) {
525 int ipl = 0xf; /* get highest priority intr */
526 apic_errvect = apix_get_ipivect(ipl, -1);
527 ASSERT(apic_errvect);
529 * Not PSMI compliant, but we are going to merge
530 * with ON anyway
532 (void) add_avintr(NULL, ipl,
533 (avfunc)apic_error_intr, "apic error intr",
534 apic_errvect, NULL, NULL, NULL, NULL);
536 apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
537 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
538 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
541 /* Enable CMCI interrupt */
542 if (cmi_enable_cmci) {
543 mutex_enter(&cmci_cpu_setup_lock);
544 if (cmci_cpu_setup_registered == 0) {
545 mutex_enter(&cpu_lock);
546 register_cpu_setup_func(cmci_cpu_setup, NULL);
547 mutex_exit(&cpu_lock);
548 cmci_cpu_setup_registered = 1;
550 mutex_exit(&cmci_cpu_setup_lock);
552 if (apic_cmci_vect == 0) {
553 int ipl = 0x2;
554 apic_cmci_vect = apix_get_ipivect(ipl, -1);
555 ASSERT(apic_cmci_vect);
557 (void) add_avintr(NULL, ipl,
558 (avfunc)cmi_cmci_trap, "apic cmci intr",
559 apic_cmci_vect, NULL, NULL, NULL, NULL);
561 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
564 apic_reg_ops->apic_write_task_reg(0);
567 static void
568 apix_picinit(void)
570 int i, j;
571 uint_t isr;
573 APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
576 * initialize interrupt remapping before apic
577 * hardware initialization
579 apic_intrmap_init(apic_mode);
580 if (apic_vt_ops == psm_vt_ops)
581 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
584 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
585 * bit on without clearing it with EOI. Since softint
586 * uses vector 0x20 to interrupt itself, so softint will
587 * not work on this machine. In order to fix this problem
588 * a check is made to verify all the isr bits are clear.
589 * If not, EOIs are issued to clear the bits.
591 for (i = 7; i >= 1; i--) {
592 isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
593 if (isr != 0)
594 for (j = 0; ((j < 32) && (isr != 0)); j++)
595 if (isr & (1 << j)) {
596 apic_reg_ops->apic_write(
597 APIC_EOI_REG, 0);
598 isr &= ~(1 << j);
599 apic_error |= APIC_ERR_BOOT_EOI;
603 /* set a flag so we know we have run apic_picinit() */
604 apic_picinit_called = 1;
605 LOCK_INIT_CLEAR(&apic_gethrtime_lock);
606 LOCK_INIT_CLEAR(&apic_ioapic_lock);
607 LOCK_INIT_CLEAR(&apic_error_lock);
608 LOCK_INIT_CLEAR(&apic_mode_switch_lock);
610 picsetup(); /* initialise the 8259 */
612 /* add nmi handler - least priority nmi handler */
613 LOCK_INIT_CLEAR(&apic_nmi_lock);
615 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
616 "apix NMI handler", (caddr_t)NULL))
617 cmn_err(CE_WARN, "apix: Unable to add nmi handler");
619 apix_init_intr();
621 /* enable apic mode if imcr present */
622 if (apic_imcrp) {
623 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
624 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
627 ioapix_init_intr(IOAPIC_MASK);
629 /* setup global IRM pool if applicable */
630 if (irm_enable)
631 apix_irm_init();
634 static __inline__ void
635 apix_send_eoi(void)
637 if (apic_mode == LOCAL_APIC)
638 LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
639 else
640 X2APIC_WRITE(APIC_EOI_REG, 0);
644 * platform_intr_enter
646 * Called at the beginning of the interrupt service routine, but unlike
647 * pcplusmp, does not mask interrupts. An EOI is given to the interrupt
648 * controller to enable other HW interrupts but interrupts are still
649 * masked by the IF flag.
651 * Return -1 for spurious interrupts
654 static int
655 apix_intr_enter(int ipl, int *vectorp)
657 struct cpu *cpu = CPU;
658 uint32_t cpuid = CPU->cpu_id;
659 apic_cpus_info_t *cpu_infop;
660 uchar_t vector;
661 apix_vector_t *vecp;
662 int nipl = -1;
665 * The real vector delivered is (*vectorp + 0x20), but our caller
666 * subtracts 0x20 from the vector before passing it to us.
667 * (That's why APIC_BASE_VECT is 0x20.)
669 vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
671 cpu_infop = &apic_cpus[cpuid];
672 if (vector == APIC_SPUR_INTR) {
673 cpu_infop->aci_spur_cnt++;
674 return (APIC_INT_SPURIOUS);
677 vecp = xv_vector(cpuid, vector);
678 if (vecp == NULL) {
679 if (APIX_IS_FAKE_INTR(vector))
680 nipl = apix_rebindinfo.i_pri;
681 apix_send_eoi();
682 return (nipl);
684 nipl = vecp->v_pri;
686 /* if interrupted by the clock, increment apic_nsec_since_boot */
687 if (vector == (apic_clkvect + APIC_BASE_VECT)) {
688 if (!apic_oneshot) {
689 /* NOTE: this is not MT aware */
690 apic_hrtime_stamp++;
691 apic_nsec_since_boot += apic_nsec_per_intr;
692 apic_hrtime_stamp++;
693 last_count_read = apic_hertz_count;
694 apix_redistribute_compute();
697 apix_send_eoi();
699 return (nipl);
702 ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
704 /* pre-EOI handling for level-triggered interrupts */
705 if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
706 (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
707 apix_level_intr_pre_eoi(vecp->v_inum);
709 /* send back EOI */
710 apix_send_eoi();
712 cpu_infop->aci_current[nipl] = vector;
713 if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
714 cpu_infop->aci_curipl = (uchar_t)nipl;
715 cpu_infop->aci_ISR_in_progress |= 1 << nipl;
718 #ifdef DEBUG
719 if (vector >= APIX_IPI_MIN)
720 return (nipl); /* skip IPI */
722 APIC_DEBUG_BUF_PUT(vector);
723 APIC_DEBUG_BUF_PUT(vecp->v_inum);
724 APIC_DEBUG_BUF_PUT(nipl);
725 APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
726 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
727 drv_usecwait(apic_stretch_interrupts);
728 #endif /* DEBUG */
730 return (nipl);
734 * Any changes made to this function must also change X2APIC
735 * version of intr_exit.
737 static void
738 apix_intr_exit(int prev_ipl, int arg2)
740 int cpuid = psm_get_cpu_id();
741 apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
742 apix_impl_t *apixp = apixs[cpuid];
744 UNREFERENCED_1PARAMETER(arg2);
746 cpu_infop->aci_curipl = (uchar_t)prev_ipl;
747 /* ISR above current pri could not be in progress */
748 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
750 if (apixp->x_obsoletes != NULL) {
751 if (APIX_CPU_LOCK_HELD(cpuid))
752 return;
754 APIX_ENTER_CPU_LOCK(cpuid);
755 (void) apix_obsolete_vector(apixp->x_obsoletes);
756 APIX_LEAVE_CPU_LOCK(cpuid);
761 * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
762 * given ipl, but apix never uses the TPR and we never mask a subset of the
763 * interrupts. They are either all blocked by the IF flag or all can come in.
765 * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
766 * can come in if currently enabled by the IF flag. This table shows the state
767 * of the IF flag when we leave this function.
769 * curr IF | ipl == 15 ipl != 15
770 * --------+---------------------------
771 * 0 | 0 0
772 * 1 | 0 1
774 static void
775 apix_setspl(int ipl)
778 * Interrupts at ipl above this cannot be in progress, so the following
779 * mask is ok.
781 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
783 if (ipl == XC_HI_PIL)
784 cli();
788 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
790 uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
791 uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
792 apix_vector_t *vecp = xv_vector(cpuid, vector);
794 UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
795 ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
797 if (vecp->v_type == APIX_TYPE_FIXED)
798 apix_intx_set_shared(vecp->v_inum, 1);
800 /* There are more interrupts, so it's already been enabled */
801 if (vecp->v_share > 1)
802 return (PSM_SUCCESS);
804 /* return if it is not hardware interrupt */
805 if (vecp->v_type == APIX_TYPE_IPI)
806 return (PSM_SUCCESS);
809 * if apix_picinit() has not been called yet, just return.
810 * At the end of apic_picinit(), we will call setup_io_intr().
812 if (!apic_picinit_called)
813 return (PSM_SUCCESS);
815 (void) apix_setup_io_intr(vecp);
817 return (PSM_SUCCESS);
821 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
823 uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
824 uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
825 apix_vector_t *vecp = xv_vector(cpuid, vector);
827 UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
828 ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
830 if (vecp->v_type == APIX_TYPE_FIXED)
831 apix_intx_set_shared(vecp->v_inum, -1);
833 /* There are more interrupts */
834 if (vecp->v_share > 1)
835 return (PSM_SUCCESS);
837 /* return if it is not hardware interrupt */
838 if (vecp->v_type == APIX_TYPE_IPI)
839 return (PSM_SUCCESS);
841 if (!apic_picinit_called) {
842 cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
843 virtvec);
844 return (PSM_SUCCESS);
847 apix_disable_vector(vecp);
849 return (PSM_SUCCESS);
853 * Try and disable all interrupts. We just assign interrupts to other
854 * processors based on policy. If any were bound by user request, we
855 * let them continue and return failure. We do not bother to check
856 * for cache affinity while rebinding.
858 static int
859 apix_disable_intr(processorid_t cpun)
861 apix_impl_t *apixp = apixs[cpun];
862 apix_vector_t *vecp, *newp;
863 int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
865 lock_set(&apix_lock);
867 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
868 apic_cpus[cpun].aci_curipl = 0;
870 /* if this is for SUSPEND operation, skip rebinding */
871 if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
872 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
873 vecp = apixp->x_vectbl[i];
874 if (!IS_VECT_ENABLED(vecp))
875 continue;
877 apix_disable_vector(vecp);
879 lock_clear(&apix_lock);
880 return (PSM_SUCCESS);
883 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
884 vecp = apixp->x_vectbl[i];
885 if (!IS_VECT_ENABLED(vecp))
886 continue;
888 if (vecp->v_flags & APIX_VECT_USER_BOUND) {
889 hardbound++;
890 continue;
892 type = vecp->v_type;
895 * If there are bound interrupts on this cpu, then
896 * rebind them to other processors.
898 loop = 0;
899 do {
900 bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
902 if (type != APIX_TYPE_MSI)
903 newp = apix_set_cpu(vecp, bindcpu, &ret);
904 else
905 newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
906 } while ((newp == NULL) && (loop++ < apic_nproc));
908 if (loop >= apic_nproc) {
909 errbound++;
910 cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
911 vecp->v_cpuid, vecp->v_vector);
915 lock_clear(&apix_lock);
917 if (hardbound || errbound) {
918 cmn_err(CE_WARN, "Could not disable interrupts on %d"
919 "due to user bound interrupts or failed operation",
920 cpun);
921 return (PSM_FAILURE);
924 return (PSM_SUCCESS);
928 * Bind interrupts to specified CPU
930 static void
931 apix_enable_intr(processorid_t cpun)
933 apix_vector_t *vecp;
934 int i, ret;
935 processorid_t n;
937 lock_set(&apix_lock);
939 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
941 /* interrupt enabling for system resume */
942 if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
943 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
944 vecp = xv_vector(cpun, i);
945 if (!IS_VECT_ENABLED(vecp))
946 continue;
948 apix_enable_vector(vecp);
950 apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
953 for (n = 0; n < apic_nproc; n++) {
954 if (!apic_cpu_in_range(n) || n == cpun ||
955 (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
956 continue;
958 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
959 vecp = xv_vector(n, i);
960 if (!IS_VECT_ENABLED(vecp) ||
961 vecp->v_bound_cpuid != cpun)
962 continue;
964 if (vecp->v_type != APIX_TYPE_MSI)
965 (void) apix_set_cpu(vecp, cpun, &ret);
966 else
967 (void) apix_grp_set_cpu(vecp, cpun, &ret);
971 lock_clear(&apix_lock);
975 * Allocate vector for IPI
976 * type == -1 indicates it is an internal request. Do not change
977 * resv_vector for these requests.
979 static int
980 apix_get_ipivect(int ipl, int type)
982 uchar_t vector;
984 if ((vector = apix_alloc_ipi(ipl)) > 0) {
985 if (type != -1)
986 apic_resv_vector[ipl] = vector;
987 return (vector);
989 apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
990 return (-1); /* shouldn't happen */
993 static int
994 apix_get_clkvect(int ipl)
996 int vector;
998 if ((vector = apix_get_ipivect(ipl, -1)) == -1)
999 return (-1);
1001 apic_clkvect = vector - APIC_BASE_VECT;
1002 APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1003 apic_clkvect));
1004 return (vector);
1007 static int
1008 apix_post_cpu_start()
1010 int cpun;
1011 static int cpus_started = 1;
1013 /* We know this CPU + BSP started successfully. */
1014 cpus_started++;
1017 * On BSP we would have enabled X2APIC, if supported by processor,
1018 * in acpi_probe(), but on AP we do it here.
1020 * We enable X2APIC mode only if BSP is running in X2APIC & the
1021 * local APIC mode of the current CPU is MMIO (xAPIC).
1023 if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1024 apic_local_mode() == LOCAL_APIC) {
1025 apic_enable_x2apic();
1029 * Switch back to x2apic IPI sending method for performance when target
1030 * CPU has entered x2apic mode.
1032 if (apic_mode == LOCAL_X2APIC) {
1033 apic_switch_ipi_callback(B_FALSE);
1036 splx(ipltospl(LOCK_LEVEL));
1037 apix_init_intr();
1040 * since some systems don't enable the internal cache on the non-boot
1041 * cpus, so we have to enable them here
1043 setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1045 #ifdef DEBUG
1046 APIC_AV_PENDING_SET();
1047 #else
1048 if (apic_mode == LOCAL_APIC)
1049 APIC_AV_PENDING_SET();
1050 #endif /* DEBUG */
1053 * We may be booting, or resuming from suspend; aci_status will
1054 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1055 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1057 cpun = psm_get_cpu_id();
1058 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1060 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1062 return (PSM_SUCCESS);
1066 * If this module needs a periodic handler for the interrupt distribution, it
1067 * can be added here. The argument to the periodic handler is not currently
1068 * used, but is reserved for future.
1070 static void
1071 apix_post_cyclic_setup(void *arg)
1073 UNREFERENCED_1PARAMETER(arg);
1075 /* cpu_lock is held */
1076 /* set up a periodic handler for intr redistribution */
1079 * In peridoc mode intr redistribution processing is done in
1080 * apic_intr_enter during clk intr processing
1082 if (!apic_oneshot)
1083 return;
1086 * Register a periodical handler for the redistribution processing.
1087 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
1088 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
1090 apic_periodic_id = ddi_periodic_add(
1091 (void (*)(void *))apix_redistribute_compute, NULL,
1092 apic_redistribute_sample_interval, DDI_IPL_2);
1096 * Called the first time we enable x2apic mode on this cpu.
1097 * Update some of the function pointers to use x2apic routines.
1099 void
1100 x2apic_update_psm()
1102 struct psm_ops *pops = &apix_ops;
1104 ASSERT(pops != NULL);
1107 * The pcplusmp module's version of x2apic_update_psm makes additional
1108 * changes that we do not have to make here. It needs to make those
1109 * changes because pcplusmp relies on the TPR register and the means of
1110 * addressing that changes when using the local apic versus the x2apic.
1111 * It's also worth noting that the apix driver specific function end up
1112 * being apix_foo as opposed to apic_foo and x2apic_foo.
1114 pops->psm_send_ipi = x2apic_send_ipi;
1116 send_dirintf = pops->psm_send_ipi;
1118 apic_mode = LOCAL_X2APIC;
1119 apic_change_ops();
1123 * This function provides external interface to the nexus for all
1124 * functionalities related to the new DDI interrupt framework.
1126 * Input:
1127 * dip - pointer to the dev_info structure of the requested device
1128 * hdlp - pointer to the internal interrupt handle structure for the
1129 * requested interrupt
1130 * intr_op - opcode for this call
1131 * result - pointer to the integer that will hold the result to be
1132 * passed back if return value is PSM_SUCCESS
1134 * Output:
1135 * return value is either PSM_SUCCESS or PSM_FAILURE
1137 static int
1138 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1139 psm_intr_op_t intr_op, int *result)
1141 int cap;
1142 apix_vector_t *vecp, *newvecp;
1143 struct intrspec *ispec, intr_spec;
1144 processorid_t target;
1146 ispec = &intr_spec;
1147 ispec->intrspec_pri = hdlp->ih_pri;
1148 ispec->intrspec_vec = hdlp->ih_inum;
1149 ispec->intrspec_func = hdlp->ih_cb_func;
1151 switch (intr_op) {
1152 case PSM_INTR_OP_ALLOC_VECTORS:
1153 switch (hdlp->ih_type) {
1154 case DDI_INTR_TYPE_MSI:
1155 /* allocate MSI vectors */
1156 *result = apix_alloc_msi(dip, hdlp->ih_inum,
1157 hdlp->ih_scratch1,
1158 (int)(uintptr_t)hdlp->ih_scratch2);
1159 break;
1160 case DDI_INTR_TYPE_MSIX:
1161 /* allocate MSI-X vectors */
1162 *result = apix_alloc_msix(dip, hdlp->ih_inum,
1163 hdlp->ih_scratch1,
1164 (int)(uintptr_t)hdlp->ih_scratch2);
1165 break;
1166 case DDI_INTR_TYPE_FIXED:
1167 /* allocate or share vector for fixed */
1168 if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1169 return (PSM_FAILURE);
1171 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1172 *result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1173 ispec);
1174 break;
1175 default:
1176 return (PSM_FAILURE);
1178 break;
1179 case PSM_INTR_OP_FREE_VECTORS:
1180 apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1181 hdlp->ih_type);
1182 break;
1183 case PSM_INTR_OP_XLATE_VECTOR:
1185 * Vectors are allocated by ALLOC and freed by FREE.
1186 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1188 *result = APIX_INVALID_VECT;
1189 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1190 if (vecp != NULL) {
1191 *result = APIX_VIRTVECTOR(vecp->v_cpuid,
1192 vecp->v_vector);
1193 break;
1197 * No vector to device mapping exists. If this is FIXED type
1198 * then check if this IRQ is already mapped for another device
1199 * then return the vector number for it (i.e. shared IRQ case).
1200 * Otherwise, return PSM_FAILURE.
1202 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1203 vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1204 ispec);
1205 *result = (vecp == NULL) ? APIX_INVALID_VECT :
1206 APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1208 if (*result == APIX_INVALID_VECT)
1209 return (PSM_FAILURE);
1210 break;
1211 case PSM_INTR_OP_GET_PENDING:
1212 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1213 if (vecp == NULL)
1214 return (PSM_FAILURE);
1216 *result = apix_get_pending(vecp);
1217 break;
1218 case PSM_INTR_OP_CLEAR_MASK:
1219 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1220 return (PSM_FAILURE);
1222 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1223 if (vecp == NULL)
1224 return (PSM_FAILURE);
1226 apix_intx_clear_mask(vecp->v_inum);
1227 break;
1228 case PSM_INTR_OP_SET_MASK:
1229 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1230 return (PSM_FAILURE);
1232 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1233 if (vecp == NULL)
1234 return (PSM_FAILURE);
1236 apix_intx_set_mask(vecp->v_inum);
1237 break;
1238 case PSM_INTR_OP_GET_SHARED:
1239 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1240 return (PSM_FAILURE);
1242 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1243 if (vecp == NULL)
1244 return (PSM_FAILURE);
1246 *result = apix_intx_get_shared(vecp->v_inum);
1247 break;
1248 case PSM_INTR_OP_SET_PRI:
1250 * Called prior to adding the interrupt handler or when
1251 * an interrupt handler is unassigned.
1253 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1254 return (PSM_SUCCESS);
1256 if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1257 return (PSM_FAILURE);
1259 break;
1260 case PSM_INTR_OP_SET_CPU:
1261 case PSM_INTR_OP_GRP_SET_CPU:
1263 * The interrupt handle given here has been allocated
1264 * specifically for this command, and ih_private carries
1265 * a CPU value.
1267 *result = EINVAL;
1268 target = (int)(intptr_t)hdlp->ih_private;
1269 if (!apic_cpu_in_range(target)) {
1270 DDI_INTR_IMPLDBG((CE_WARN,
1271 "[grp_]set_cpu: cpu out of range: %d\n", target));
1272 return (PSM_FAILURE);
1275 lock_set(&apix_lock);
1277 vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1278 if (!IS_VECT_ENABLED(vecp)) {
1279 DDI_INTR_IMPLDBG((CE_WARN,
1280 "[grp]_set_cpu: invalid vector 0x%x\n",
1281 hdlp->ih_vector));
1282 lock_clear(&apix_lock);
1283 return (PSM_FAILURE);
1286 *result = 0;
1288 if (intr_op == PSM_INTR_OP_SET_CPU)
1289 newvecp = apix_set_cpu(vecp, target, result);
1290 else
1291 newvecp = apix_grp_set_cpu(vecp, target, result);
1293 lock_clear(&apix_lock);
1295 if (newvecp == NULL) {
1296 *result = EIO;
1297 return (PSM_FAILURE);
1299 newvecp->v_bound_cpuid = target;
1300 hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1301 newvecp->v_vector);
1302 break;
1304 case PSM_INTR_OP_GET_INTR:
1306 * The interrupt handle given here has been allocated
1307 * specifically for this command, and ih_private carries
1308 * a pointer to a apic_get_intr_t.
1310 if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1311 return (PSM_FAILURE);
1312 break;
1314 case PSM_INTR_OP_CHECK_MSI:
1316 * Check MSI/X is supported or not at APIC level and
1317 * masked off the MSI/X bits in hdlp->ih_type if not
1318 * supported before return. If MSI/X is supported,
1319 * leave the ih_type unchanged and return.
1321 * hdlp->ih_type passed in from the nexus has all the
1322 * interrupt types supported by the device.
1324 if (apic_support_msi == 0) { /* uninitialized */
1326 * if apic_support_msi is not set, call
1327 * apic_check_msi_support() to check whether msi
1328 * is supported first
1330 if (apic_check_msi_support() == PSM_SUCCESS)
1331 apic_support_msi = 1; /* supported */
1332 else
1333 apic_support_msi = -1; /* not-supported */
1335 if (apic_support_msi == 1) {
1336 if (apic_msix_enable)
1337 *result = hdlp->ih_type;
1338 else
1339 *result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1340 } else
1341 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1342 DDI_INTR_TYPE_MSIX);
1343 break;
1344 case PSM_INTR_OP_GET_CAP:
1345 cap = DDI_INTR_FLAG_PENDING;
1346 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1347 cap |= DDI_INTR_FLAG_MASKABLE;
1348 *result = cap;
1349 break;
1350 case PSM_INTR_OP_APIC_TYPE:
1351 ((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1352 apix_get_apic_type();
1353 ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1354 APIX_IPI_MIN;
1355 ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1356 apic_nproc;
1357 hdlp->ih_ver = apic_get_apic_version();
1358 break;
1359 case PSM_INTR_OP_SET_CAP:
1360 default:
1361 return (PSM_FAILURE);
1364 return (PSM_SUCCESS);
1367 static void
1368 apix_cleanup_busy(void)
1370 int i, j;
1371 apix_vector_t *vecp;
1373 for (i = 0; i < apic_nproc; i++) {
1374 if (!apic_cpu_in_range(i))
1375 continue;
1376 apic_cpus[i].aci_busy = 0;
1377 for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1378 if ((vecp = xv_vector(i, j)) != NULL)
1379 vecp->v_busy = 0;
1384 static void
1385 apix_redistribute_compute(void)
1387 int i, j, max_busy;
1389 if (!apic_enable_dynamic_migration)
1390 return;
1392 if (++apic_nticks == apic_sample_factor_redistribution) {
1394 * Time to call apic_intr_redistribute().
1395 * reset apic_nticks. This will cause max_busy
1396 * to be calculated below and if it is more than
1397 * apic_int_busy, we will do the whole thing
1399 apic_nticks = 0;
1401 max_busy = 0;
1402 for (i = 0; i < apic_nproc; i++) {
1403 if (!apic_cpu_in_range(i))
1404 continue;
1406 * Check if curipl is non zero & if ISR is in
1407 * progress
1409 if (((j = apic_cpus[i].aci_curipl) != 0) &&
1410 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1412 int vect;
1413 apic_cpus[i].aci_busy++;
1414 vect = apic_cpus[i].aci_current[j];
1415 apixs[i]->x_vectbl[vect]->v_busy++;
1418 if (!apic_nticks &&
1419 (apic_cpus[i].aci_busy > max_busy))
1420 max_busy = apic_cpus[i].aci_busy;
1422 if (!apic_nticks) {
1423 if (max_busy > apic_int_busy_mark) {
1425 * We could make the following check be
1426 * skipped > 1 in which case, we get a
1427 * redistribution at half the busy mark (due to
1428 * double interval). Need to be able to collect
1429 * more empirical data to decide if that is a
1430 * good strategy. Punt for now.
1432 apix_cleanup_busy();
1433 apic_skipped_redistribute = 0;
1434 } else
1435 apic_skipped_redistribute++;
1440 * intr_ops() service routines
1443 static int
1444 apix_get_pending(apix_vector_t *vecp)
1446 int bit, index, irr, pending;
1448 /* need to get on the bound cpu */
1449 mutex_enter(&cpu_lock);
1450 affinity_set(vecp->v_cpuid);
1452 index = vecp->v_vector / 32;
1453 bit = vecp->v_vector % 32;
1454 irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1456 affinity_clear();
1457 mutex_exit(&cpu_lock);
1459 pending = (irr & (1 << bit)) ? 1 : 0;
1460 if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1461 pending = apix_intx_get_pending(vecp->v_inum);
1463 return (pending);
1466 static apix_vector_t *
1467 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1469 apix_vector_t *vecp;
1470 processorid_t cpuid;
1471 int32_t virt_vec = 0;
1473 switch (flags & PSMGI_INTRBY_FLAGS) {
1474 case PSMGI_INTRBY_IRQ:
1475 return (apix_intx_get_vector(hdlp->ih_vector));
1476 case PSMGI_INTRBY_VEC:
1477 virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1479 cpuid = APIX_VIRTVEC_CPU(virt_vec);
1480 if (!apic_cpu_in_range(cpuid))
1481 return (NULL);
1483 vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1484 break;
1485 case PSMGI_INTRBY_DEFAULT:
1486 vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1487 hdlp->ih_type);
1488 break;
1489 default:
1490 return (NULL);
1493 return (vecp);
1496 static int
1497 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1498 apic_get_intr_t *intr_params_p)
1500 apix_vector_t *vecp;
1501 struct autovec *av_dev;
1502 int i;
1504 vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1505 if (IS_VECT_FREE(vecp)) {
1506 intr_params_p->avgi_num_devs = 0;
1507 intr_params_p->avgi_cpu_id = 0;
1508 intr_params_p->avgi_req_flags = 0;
1509 return (PSM_SUCCESS);
1512 if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1513 intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1515 /* Return user bound info for intrd. */
1516 if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1517 intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1518 intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1522 if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1523 intr_params_p->avgi_vector = vecp->v_vector;
1525 if (intr_params_p->avgi_req_flags &
1526 (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1527 /* Get number of devices from apic_irq table shared field. */
1528 intr_params_p->avgi_num_devs = vecp->v_share;
1530 if (intr_params_p->avgi_req_flags & PSMGI_REQ_GET_DEVS) {
1532 intr_params_p->avgi_req_flags |= PSMGI_REQ_NUM_DEVS;
1534 /* Some devices have NULL dip. Don't count these. */
1535 if (intr_params_p->avgi_num_devs > 0) {
1536 for (i = 0, av_dev = vecp->v_autovect; av_dev;
1537 av_dev = av_dev->av_link) {
1538 if (av_dev->av_vector && av_dev->av_dip)
1539 i++;
1541 intr_params_p->avgi_num_devs =
1542 (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1545 /* There are no viable dips to return. */
1546 if (intr_params_p->avgi_num_devs == 0) {
1547 intr_params_p->avgi_dip_list = NULL;
1549 } else { /* Return list of dips */
1551 /* Allocate space in array for that number of devs. */
1552 intr_params_p->avgi_dip_list = kmem_zalloc(
1553 intr_params_p->avgi_num_devs *
1554 sizeof (dev_info_t *),
1555 KM_NOSLEEP);
1556 if (intr_params_p->avgi_dip_list == NULL) {
1557 DDI_INTR_IMPLDBG((CE_WARN,
1558 "apix_get_vector_intr_info: no memory"));
1559 return (PSM_FAILURE);
1563 * Loop through the device list of the autovec table
1564 * filling in the dip array.
1566 * Note that the autovect table may have some special
1567 * entries which contain NULL dips. These will be
1568 * ignored.
1570 for (i = 0, av_dev = vecp->v_autovect; av_dev;
1571 av_dev = av_dev->av_link) {
1572 if (av_dev->av_vector && av_dev->av_dip)
1573 intr_params_p->avgi_dip_list[i++] =
1574 av_dev->av_dip;
1579 return (PSM_SUCCESS);
1582 static char *
1583 apix_get_apic_type(void)
1585 return (apix_psm_info.p_mach_idstring);
1588 apix_vector_t *
1589 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1591 apix_vector_t *newp = NULL;
1592 dev_info_t *dip;
1593 int inum, cap_ptr;
1594 ddi_acc_handle_t handle;
1595 ddi_intr_msix_t *msix_p = NULL;
1596 ushort_t msix_ctrl;
1597 uintptr_t off;
1598 uint32_t mask;
1600 ASSERT(LOCK_HELD(&apix_lock));
1601 *result = ENXIO;
1603 /* Fail if this is an MSI intr and is part of a group. */
1604 if (vecp->v_type == APIX_TYPE_MSI) {
1605 if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1606 return (NULL);
1607 else
1608 return (apix_grp_set_cpu(vecp, new_cpu, result));
1612 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1614 if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1615 if ((dip = APIX_GET_DIP(vecp)) == NULL)
1616 return (NULL);
1617 inum = vecp->v_devp->dv_inum;
1619 handle = i_ddi_get_pci_config_handle(dip);
1620 cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1621 msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1622 if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1624 * Function is not masked, then mask "inum"th
1625 * entry in the MSI-X table
1627 msix_p = i_ddi_get_msix(dip);
1628 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1629 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1630 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1631 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1632 mask | 1);
1636 *result = 0;
1637 if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1638 *result = EIO;
1640 /* Restore mask bit */
1641 if (msix_p != NULL)
1642 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1644 return (newp);
1648 * Set cpu for MSIs
1650 apix_vector_t *
1651 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1653 apix_vector_t *newp, *vp;
1654 uint32_t orig_cpu = vecp->v_cpuid;
1655 int orig_vect = vecp->v_vector;
1656 int i, num_vectors, cap_ptr, msi_mask_off;
1657 uint32_t msi_pvm;
1658 ushort_t msi_ctrl;
1659 ddi_acc_handle_t handle;
1660 dev_info_t *dip;
1662 APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1663 " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1665 ASSERT(LOCK_HELD(&apix_lock));
1667 *result = ENXIO;
1669 if (vecp->v_type != APIX_TYPE_MSI) {
1670 DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1671 return (NULL);
1674 if ((dip = APIX_GET_DIP(vecp)) == NULL)
1675 return (NULL);
1677 num_vectors = i_ddi_intr_get_current_nintrs(dip);
1678 if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1679 APIC_VERBOSE(INTR, (CE_WARN,
1680 "set_grp: base vec not part of a grp or not aligned: "
1681 "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1682 return (NULL);
1685 if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1686 return (NULL);
1688 *result = EIO;
1689 for (i = 1; i < num_vectors; i++) {
1690 if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1691 return (NULL);
1692 #ifdef DEBUG
1694 * Sanity check: CPU and dip is the same for all entries.
1695 * May be called when first msi to be enabled, at this time
1696 * add_avintr() is not called for other msi
1698 if ((vp->v_share != 0) &&
1699 ((APIX_GET_DIP(vp) != dip) ||
1700 (vp->v_cpuid != vecp->v_cpuid))) {
1701 APIC_VERBOSE(INTR, (CE_WARN,
1702 "set_grp: cpu or dip for vec 0x%x difft than for "
1703 "vec 0x%x\n", orig_vect, orig_vect + i));
1704 APIC_VERBOSE(INTR, (CE_WARN,
1705 " cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1706 vp->v_cpuid, (void *)dip,
1707 (void *)APIX_GET_DIP(vp)));
1708 return (NULL);
1710 #endif /* DEBUG */
1713 cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1714 handle = i_ddi_get_pci_config_handle(dip);
1715 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1717 /* MSI Per vector masking is supported. */
1718 if (msi_ctrl & PCI_MSI_PVM_MASK) {
1719 if (msi_ctrl & PCI_MSI_64BIT_MASK)
1720 msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1721 else
1722 msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1723 msi_pvm = pci_config_get32(handle, msi_mask_off);
1724 pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1725 APIC_VERBOSE(INTR, (CE_CONT,
1726 "set_grp: pvm supported. Mask set to 0x%x\n",
1727 pci_config_get32(handle, msi_mask_off)));
1730 if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1731 *result = 0;
1733 /* Reenable vectors if per vector masking is supported. */
1734 if (msi_ctrl & PCI_MSI_PVM_MASK) {
1735 pci_config_put32(handle, msi_mask_off, msi_pvm);
1736 APIC_VERBOSE(INTR, (CE_CONT,
1737 "set_grp: pvm supported. Mask restored to 0x%x\n",
1738 pci_config_get32(handle, msi_mask_off)));
1741 return (newp);
1744 void
1745 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1747 apic_irq_t *irqp;
1749 mutex_enter(&airq_mutex);
1750 irqp = apic_irq_table[irqno];
1751 irqp->airq_cpu = cpuid;
1752 irqp->airq_vector = vector;
1753 apic_record_rdt_entry(irqp, irqno);
1754 mutex_exit(&airq_mutex);
1757 apix_vector_t *
1758 apix_intx_get_vector(int irqno)
1760 apic_irq_t *irqp;
1761 uint32_t cpuid;
1762 uchar_t vector;
1764 mutex_enter(&airq_mutex);
1765 irqp = apic_irq_table[irqno & 0xff];
1766 if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1767 mutex_exit(&airq_mutex);
1768 return (NULL);
1770 cpuid = irqp->airq_cpu;
1771 vector = irqp->airq_vector;
1772 mutex_exit(&airq_mutex);
1774 return (xv_vector(cpuid, vector));
1778 * Must called with interrupts disabled and apic_ioapic_lock held
1780 void
1781 apix_intx_enable(int irqno)
1783 uchar_t ioapicindex, intin;
1784 apic_irq_t *irqp = apic_irq_table[irqno];
1785 ioapic_rdt_t irdt;
1786 apic_cpus_info_t *cpu_infop;
1787 apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1789 ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1791 ioapicindex = irqp->airq_ioapicindex;
1792 intin = irqp->airq_intin_no;
1793 cpu_infop = &apic_cpus[irqp->airq_cpu];
1795 irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1796 irdt.ir_hi = cpu_infop->aci_local_id;
1798 apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1799 vecp->v_type, 1, ioapicindex);
1800 apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1801 (void *)&irdt, vecp->v_type, 1);
1802 apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1804 /* write RDT entry high dword - destination */
1805 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1806 irdt.ir_hi);
1808 /* Write the vector, trigger, and polarity portion of the RDT */
1809 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1811 vecp->v_state = APIX_STATE_ENABLED;
1813 APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1814 " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1815 ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1819 * Must called with interrupts disabled and apic_ioapic_lock held
1821 void
1822 apix_intx_disable(int irqno)
1824 apic_irq_t *irqp = apic_irq_table[irqno];
1825 int ioapicindex, intin;
1827 ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1829 * The assumption here is that this is safe, even for
1830 * systems with IOAPICs that suffer from the hardware
1831 * erratum because all devices have been quiesced before
1832 * they unregister their interrupt handlers. If that
1833 * assumption turns out to be false, this mask operation
1834 * can induce the same erratum result we're trying to
1835 * avoid.
1837 ioapicindex = irqp->airq_ioapicindex;
1838 intin = irqp->airq_intin_no;
1839 ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1841 APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1842 " intin 0x%x\n", ioapicindex, intin));
1845 void
1846 apix_intx_free(int irqno)
1848 apic_irq_t *irqp;
1850 mutex_enter(&airq_mutex);
1851 irqp = apic_irq_table[irqno];
1853 if (IS_IRQ_FREE(irqp)) {
1854 mutex_exit(&airq_mutex);
1855 return;
1858 irqp->airq_mps_intr_index = FREE_INDEX;
1859 irqp->airq_cpu = IRQ_UNINIT;
1860 irqp->airq_vector = APIX_INVALID_VECT;
1861 mutex_exit(&airq_mutex);
1864 #ifdef DEBUG
1865 int apix_intr_deliver_timeouts = 0;
1866 int apix_intr_rirr_timeouts = 0;
1867 int apix_intr_rirr_reset_failure = 0;
1868 #endif
1869 int apix_max_reps_irr_pending = 10;
1871 #define GET_RDT_BITS(ioapic, intin, bits) \
1872 (READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1873 #define APIX_CHECK_IRR_DELAY drv_usectohz(5000)
1876 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1878 apic_irq_t *irqp = apic_irq_table[irqno];
1879 ulong_t iflag;
1880 int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1882 ASSERT(irqp != NULL);
1884 iflag = intr_clear();
1885 lock_set(&apic_ioapic_lock);
1887 ioapic_ix = irqp->airq_ioapicindex;
1888 intin_no = irqp->airq_intin_no;
1889 level = apic_level_intr[irqno];
1892 * Wait for the delivery status bit to be cleared. This should
1893 * be a very small amount of time.
1895 repeats = 0;
1896 do {
1897 repeats++;
1899 for (waited = 0; waited < apic_max_reps_clear_pending;
1900 waited++) {
1901 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1902 break;
1904 if (!level)
1905 break;
1908 * Mask the RDT entry for level-triggered interrupts.
1910 irqp->airq_rdt_entry |= AV_MASK;
1911 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1912 intin_no);
1913 if ((masked = (rdt_entry & AV_MASK)) == 0) {
1914 /* Mask it */
1915 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1916 AV_MASK | rdt_entry);
1920 * If there was a race and an interrupt was injected
1921 * just before we masked, check for that case here.
1922 * Then, unmask the RDT entry and try again. If we're
1923 * on our last try, don't unmask (because we want the
1924 * RDT entry to remain masked for the rest of the
1925 * function).
1927 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1928 intin_no);
1929 if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1930 (repeats < apic_max_reps_clear_pending)) {
1931 /* Unmask it */
1932 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1933 intin_no, rdt_entry & ~AV_MASK);
1934 irqp->airq_rdt_entry &= ~AV_MASK;
1936 } while ((rdt_entry & AV_PENDING) &&
1937 (repeats < apic_max_reps_clear_pending));
1939 #ifdef DEBUG
1940 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1941 apix_intr_deliver_timeouts++;
1942 #endif
1944 if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1945 goto done;
1948 * wait for remote IRR to be cleared for level-triggered
1949 * interrupts
1951 repeats = 0;
1952 do {
1953 repeats++;
1955 for (waited = 0; waited < apic_max_reps_clear_pending;
1956 waited++) {
1957 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1958 == 0)
1959 break;
1962 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1963 lock_clear(&apic_ioapic_lock);
1964 intr_restore(iflag);
1966 delay(APIX_CHECK_IRR_DELAY);
1968 iflag = intr_clear();
1969 lock_set(&apic_ioapic_lock);
1971 } while (repeats < apix_max_reps_irr_pending);
1973 if (repeats >= apix_max_reps_irr_pending) {
1974 #ifdef DEBUG
1975 apix_intr_rirr_timeouts++;
1976 #endif
1979 * If we waited and the Remote IRR bit is still not cleared,
1980 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1981 * times for this interrupt, try the last-ditch workaround:
1983 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1985 * Trying to clear the bit through normal
1986 * channels has failed. So as a last-ditch
1987 * effort, try to set the trigger mode to
1988 * edge, then to level. This has been
1989 * observed to work on many systems.
1991 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1992 intin_no,
1993 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1994 intin_no) & ~AV_LEVEL);
1995 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1996 intin_no,
1997 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1998 intin_no) | AV_LEVEL);
2001 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2002 #ifdef DEBUG
2003 apix_intr_rirr_reset_failure++;
2004 #endif
2005 lock_clear(&apic_ioapic_lock);
2006 intr_restore(iflag);
2007 prom_printf("apix: Remote IRR still "
2008 "not clear for IOAPIC %d intin %d.\n"
2009 "\tInterrupts to this pin may cease "
2010 "functioning.\n", ioapic_ix, intin_no);
2011 return (1); /* return failure */
2015 done:
2016 /* change apic_irq_table */
2017 lock_clear(&apic_ioapic_lock);
2018 intr_restore(iflag);
2019 apix_intx_set_vector(irqno, cpuid, vector);
2020 iflag = intr_clear();
2021 lock_set(&apic_ioapic_lock);
2023 /* reprogramme IO-APIC RDT entry */
2024 apix_intx_enable(irqno);
2026 lock_clear(&apic_ioapic_lock);
2027 intr_restore(iflag);
2029 return (0);
2032 static int
2033 apix_intx_get_pending(int irqno)
2035 apic_irq_t *irqp;
2036 int intin, ioapicindex, pending;
2037 ulong_t iflag;
2039 mutex_enter(&airq_mutex);
2040 irqp = apic_irq_table[irqno];
2041 if (IS_IRQ_FREE(irqp)) {
2042 mutex_exit(&airq_mutex);
2043 return (0);
2046 /* check IO-APIC delivery status */
2047 intin = irqp->airq_intin_no;
2048 ioapicindex = irqp->airq_ioapicindex;
2049 mutex_exit(&airq_mutex);
2051 iflag = intr_clear();
2052 lock_set(&apic_ioapic_lock);
2054 pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2055 AV_PENDING) ? 1 : 0;
2057 lock_clear(&apic_ioapic_lock);
2058 intr_restore(iflag);
2060 return (pending);
2064 * This function will mask the interrupt on the I/O APIC
2066 static void
2067 apix_intx_set_mask(int irqno)
2069 int intin, ioapixindex, rdt_entry;
2070 ulong_t iflag;
2071 apic_irq_t *irqp;
2073 mutex_enter(&airq_mutex);
2074 irqp = apic_irq_table[irqno];
2076 ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2078 intin = irqp->airq_intin_no;
2079 ioapixindex = irqp->airq_ioapicindex;
2080 mutex_exit(&airq_mutex);
2082 iflag = intr_clear();
2083 lock_set(&apic_ioapic_lock);
2085 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2087 /* clear mask */
2088 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2089 (AV_MASK | rdt_entry));
2091 lock_clear(&apic_ioapic_lock);
2092 intr_restore(iflag);
2096 * This function will clear the mask for the interrupt on the I/O APIC
2098 static void
2099 apix_intx_clear_mask(int irqno)
2101 int intin, ioapixindex, rdt_entry;
2102 ulong_t iflag;
2103 apic_irq_t *irqp;
2105 mutex_enter(&airq_mutex);
2106 irqp = apic_irq_table[irqno];
2108 ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2110 intin = irqp->airq_intin_no;
2111 ioapixindex = irqp->airq_ioapicindex;
2112 mutex_exit(&airq_mutex);
2114 iflag = intr_clear();
2115 lock_set(&apic_ioapic_lock);
2117 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2119 /* clear mask */
2120 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2121 ((~AV_MASK) & rdt_entry));
2123 lock_clear(&apic_ioapic_lock);
2124 intr_restore(iflag);
2128 * For level-triggered interrupt, mask the IRQ line. Mask means
2129 * new interrupts will not be delivered. The interrupt already
2130 * accepted by a local APIC is not affected
2132 void
2133 apix_level_intr_pre_eoi(int irq)
2135 apic_irq_t *irqp = apic_irq_table[irq];
2136 int apic_ix, intin_ix;
2138 if (irqp == NULL)
2139 return;
2141 ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2143 lock_set(&apic_ioapic_lock);
2145 intin_ix = irqp->airq_intin_no;
2146 apic_ix = irqp->airq_ioapicindex;
2148 if (irqp->airq_cpu != CPU->cpu_id) {
2149 if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2150 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2151 lock_clear(&apic_ioapic_lock);
2152 return;
2155 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2157 * This is a IOxAPIC and there is EOI register:
2158 * Change the vector to reserved unused vector, so that
2159 * the EOI from Local APIC won't clear the Remote IRR for
2160 * this level trigger interrupt. Instead, we'll manually
2161 * clear it in apix_post_hardint() after ISR handling.
2163 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2164 (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2165 } else {
2166 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2167 AV_MASK | irqp->airq_rdt_entry);
2170 lock_clear(&apic_ioapic_lock);
2174 * For level-triggered interrupt, unmask the IRQ line
2175 * or restore the original vector number.
2177 void
2178 apix_level_intr_post_dispatch(int irq)
2180 apic_irq_t *irqp = apic_irq_table[irq];
2181 int apic_ix, intin_ix;
2183 if (irqp == NULL)
2184 return;
2186 lock_set(&apic_ioapic_lock);
2188 intin_ix = irqp->airq_intin_no;
2189 apic_ix = irqp->airq_ioapicindex;
2191 if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2193 * Already sent EOI back to Local APIC.
2194 * Send EOI to IO-APIC
2196 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2197 } else {
2198 /* clear the mask or restore the vector */
2199 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2200 irqp->airq_rdt_entry);
2202 /* send EOI to IOxAPIC */
2203 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2204 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2207 lock_clear(&apic_ioapic_lock);
2210 static int
2211 apix_intx_get_shared(int irqno)
2213 apic_irq_t *irqp;
2214 int share;
2216 mutex_enter(&airq_mutex);
2217 irqp = apic_irq_table[irqno];
2218 if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2219 mutex_exit(&airq_mutex);
2220 return (0);
2222 share = irqp->airq_share;
2223 mutex_exit(&airq_mutex);
2225 return (share);
2228 static void
2229 apix_intx_set_shared(int irqno, int delta)
2231 apic_irq_t *irqp;
2233 mutex_enter(&airq_mutex);
2234 irqp = apic_irq_table[irqno];
2235 if (IS_IRQ_FREE(irqp)) {
2236 mutex_exit(&airq_mutex);
2237 return;
2239 irqp->airq_share += delta;
2240 mutex_exit(&airq_mutex);
2244 * Setup IRQ table. Return IRQ no or -1 on failure
2246 static int
2247 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2248 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2250 int origirq = ispec->intrspec_vec;
2251 int newirq;
2252 short intr_index;
2253 uchar_t ipin, ioapic, ioapicindex;
2254 apic_irq_t *irqp;
2256 UNREFERENCED_1PARAMETER(inum);
2258 if (intrp != NULL) {
2259 intr_index = (short)(intrp - apic_io_intrp);
2260 ioapic = intrp->intr_destid;
2261 ipin = intrp->intr_destintin;
2263 /* Find ioapicindex. If destid was ALL, we will exit with 0. */
2264 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2265 if (apic_io_id[ioapicindex] == ioapic)
2266 break;
2267 ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2268 (ioapic == INTR_ALL_APIC));
2270 /* check whether this intin# has been used by another irqno */
2271 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2272 return (newirq);
2274 } else if (iflagp != NULL) { /* ACPI */
2275 intr_index = ACPI_INDEX;
2276 ioapicindex = acpi_find_ioapic(irqno);
2277 ASSERT(ioapicindex != 0xFF);
2278 ioapic = apic_io_id[ioapicindex];
2279 ipin = irqno - apic_io_vectbase[ioapicindex];
2281 if (apic_irq_table[irqno] &&
2282 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2283 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2284 apic_irq_table[irqno]->airq_ioapicindex ==
2285 ioapicindex);
2286 return (irqno);
2289 } else { /* default configuration */
2290 intr_index = DEFAULT_INDEX;
2291 ioapicindex = 0;
2292 ioapic = apic_io_id[ioapicindex];
2293 ipin = (uchar_t)irqno;
2296 /* allocate a new IRQ no */
2297 if ((irqp = apic_irq_table[irqno]) == NULL) {
2298 irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2299 apic_irq_table[irqno] = irqp;
2300 } else {
2301 if (irqp->airq_mps_intr_index != FREE_INDEX) {
2302 newirq = apic_allocate_irq(apic_first_avail_irq);
2303 if (newirq == -1) {
2304 return (-1);
2306 irqno = newirq;
2307 irqp = apic_irq_table[irqno];
2308 ASSERT(irqp != NULL);
2311 apic_max_device_irq = max(irqno, apic_max_device_irq);
2312 apic_min_device_irq = min(irqno, apic_min_device_irq);
2314 irqp->airq_mps_intr_index = intr_index;
2315 irqp->airq_ioapicindex = ioapicindex;
2316 irqp->airq_intin_no = ipin;
2317 irqp->airq_dip = dip;
2318 irqp->airq_origirq = (uchar_t)origirq;
2319 if (iflagp != NULL)
2320 irqp->airq_iflag = *iflagp;
2321 irqp->airq_cpu = IRQ_UNINIT;
2322 irqp->airq_vector = 0;
2324 return (irqno);
2328 * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2330 static int
2331 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2332 struct intrspec *ispec)
2334 int irqno = ispec->intrspec_vec;
2335 int newirq, i;
2336 iflag_t intr_flag;
2337 ACPI_SUBTABLE_HEADER *hp;
2338 ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2339 struct apic_io_intr *intrp;
2341 if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2342 int busid;
2344 if (bustype == 0)
2345 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2347 /* loop checking BUS_ISA/BUS_EISA */
2348 for (i = 0; i < 2; i++) {
2349 if (((busid = apic_find_bus_id(bustype)) != -1) &&
2350 ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2351 != NULL)) {
2352 return (apix_intx_setup(dip, inum, irqno,
2353 intrp, ispec, NULL));
2355 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2358 /* fall back to default configuration */
2359 return (-1);
2362 /* search iso entries first */
2363 if (acpi_iso_cnt != 0) {
2364 hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2365 i = 0;
2366 while (i < acpi_iso_cnt) {
2367 if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2368 isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2369 if (isop->Bus == 0 &&
2370 isop->SourceIrq == irqno) {
2371 newirq = isop->GlobalIrq;
2372 intr_flag.intr_po = isop->IntiFlags &
2373 ACPI_MADT_POLARITY_MASK;
2374 intr_flag.intr_el = (isop->IntiFlags &
2375 ACPI_MADT_TRIGGER_MASK) >> 2;
2376 intr_flag.bustype = BUS_ISA;
2378 return (apix_intx_setup(dip, inum,
2379 newirq, NULL, ispec, &intr_flag));
2381 i++;
2383 hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2384 hp->Length);
2387 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2388 intr_flag.intr_el = INTR_EL_EDGE;
2389 intr_flag.bustype = BUS_ISA;
2390 return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2395 * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2397 static int
2398 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2399 struct intrspec *ispec)
2401 int busid, devid, pci_irq;
2402 ddi_acc_handle_t cfg_handle;
2403 uchar_t ipin;
2404 iflag_t intr_flag;
2405 struct apic_io_intr *intrp;
2407 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2408 return (-1);
2410 if (busid == 0 && apic_pci_bus_total == 1)
2411 busid = (int)apic_single_pci_busid;
2413 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2414 return (-1);
2415 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2416 pci_config_teardown(&cfg_handle);
2418 if (apic_enable_acpi && !apic_use_acpi_madt_only) { /* ACPI */
2419 if (apic_acpi_translate_pci_irq(dip, busid, devid,
2420 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2421 return (-1);
2423 intr_flag.bustype = (uchar_t)bustype;
2424 return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2425 &intr_flag));
2428 /* MP configuration table */
2429 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2430 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2431 pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2432 if (pci_irq == -1)
2433 return (-1);
2436 return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2440 * Translate and return IRQ no
2442 static int
2443 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2445 int newirq, irqno = ispec->intrspec_vec;
2446 int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2447 int bustype = 0, dev_len;
2448 char dev_type[16];
2450 if (apic_defconf) {
2451 mutex_enter(&airq_mutex);
2452 goto defconf;
2455 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2456 mutex_enter(&airq_mutex);
2457 goto nonpci;
2461 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2462 * to avoid extra buffer allocation.
2464 dev_len = sizeof (dev_type);
2465 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2466 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2467 &dev_len) == DDI_PROP_SUCCESS) {
2468 if ((strcmp(dev_type, "pci") == 0) ||
2469 (strcmp(dev_type, "pciex") == 0))
2470 parent_is_pci_or_pciex = 1;
2473 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2474 DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2475 &dev_len) == DDI_PROP_SUCCESS) {
2476 if (strstr(dev_type, "pciex"))
2477 child_is_pciex = 1;
2480 mutex_enter(&airq_mutex);
2482 if (parent_is_pci_or_pciex) {
2483 bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2484 newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2485 if (newirq != -1)
2486 goto done;
2487 bustype = 0;
2488 } else if (strcmp(dev_type, "isa") == 0)
2489 bustype = BUS_ISA;
2490 else if (strcmp(dev_type, "eisa") == 0)
2491 bustype = BUS_EISA;
2493 nonpci:
2494 newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2495 if (newirq != -1)
2496 goto done;
2498 defconf:
2499 newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2500 if (newirq == -1) {
2501 mutex_exit(&airq_mutex);
2502 return (-1);
2504 done:
2505 ASSERT(apic_irq_table[newirq]);
2506 mutex_exit(&airq_mutex);
2507 return (newirq);
2510 static int
2511 apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2513 int irqno;
2514 apix_vector_t *vecp;
2516 if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2517 return (0);
2519 if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2520 return (0);
2522 DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2523 "irqno=0x%x cpuid=%d vector=0x%x\n",
2524 (void *)dip, ddi_driver_name(dip), irqno,
2525 vecp->v_cpuid, vecp->v_vector));
2527 return (1);
2531 * Return the vector number if the translated IRQ for this device
2532 * has a vector mapping setup. If no IRQ setup exists or no vector is
2533 * allocated to it then return 0.
2535 static apix_vector_t *
2536 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2538 int irqno;
2539 apix_vector_t *vecp;
2541 /* get the IRQ number */
2542 if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2543 return (NULL);
2545 /* get the vector number if a vector is allocated to this irqno */
2546 vecp = apix_intx_get_vector(irqno);
2548 return (vecp);
2551 /* stub function */
2553 apix_loaded(void)
2555 return (apix_is_enabled);