Merge commit '15aeb4d1148772724cf568e1f7a13fbb99f11ab8'
[unleashed.git] / usr / src / uts / i86pc / io / pcplusmp / apic_common.c
blob9d57673c4762406facfcba8bc9bf6e23695125ab
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright 2018 Joyent, Inc.
27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
31 * PSMI 1.1 extensions are supported only in 2.6 and later versions.
32 * PSMI 1.2 extensions are supported only in 2.7 and later versions.
33 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
34 * PSMI 1.5 extensions are supported in Solaris Nevada.
35 * PSMI 1.6 extensions are supported in Solaris Nevada.
36 * PSMI 1.7 extensions are supported in Solaris Nevada.
38 #define PSMI_1_7
40 #include <sys/processor.h>
41 #include <sys/time.h>
42 #include <sys/psm.h>
43 #include <sys/smp_impldefs.h>
44 #include <sys/cram.h>
45 #include <sys/acpi/acpi.h>
46 #include <sys/acpica.h>
47 #include <sys/psm_common.h>
48 #include <sys/apic.h>
49 #include <sys/pit.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/ddi_impldefs.h>
53 #include <sys/pci.h>
54 #include <sys/promif.h>
55 #include <sys/x86_archext.h>
56 #include <sys/cpc_impl.h>
57 #include <sys/uadmin.h>
58 #include <sys/panic.h>
59 #include <sys/debug.h>
60 #include <sys/archsystm.h>
61 #include <sys/trap.h>
62 #include <sys/machsystm.h>
63 #include <sys/sysmacros.h>
64 #include <sys/cpuvar.h>
65 #include <sys/rm_platter.h>
66 #include <sys/privregs.h>
67 #include <sys/note.h>
68 #include <sys/pci_intr_lib.h>
69 #include <sys/spl.h>
70 #include <sys/clock.h>
71 #include <sys/dditypes.h>
72 #include <sys/sunddi.h>
73 #include <sys/x_call.h>
74 #include <sys/reboot.h>
75 #include <sys/hpet.h>
76 #include <sys/apic_common.h>
77 #include <sys/apic_timer.h>
79 static void apic_record_ioapic_rdt(void *intrmap_private,
80 ioapic_rdt_t *irdt);
81 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
84 * Common routines between pcplusmp & apix (taken from apic.c).
87 int apic_clkinit(int);
88 hrtime_t apic_gethrtime(void);
89 void apic_send_ipi(int, int);
90 void apic_set_idlecpu(processorid_t);
91 void apic_unset_idlecpu(processorid_t);
92 void apic_shutdown(int, int);
93 void apic_preshutdown(int, int);
94 processorid_t apic_get_next_processorid(processorid_t);
96 hrtime_t apic_gettime();
98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
100 /* Now the ones for Dynamic Interrupt distribution */
101 int apic_enable_dynamic_migration = 0;
103 /* maximum loop count when sending Start IPIs. */
104 int apic_sipi_max_loop_count = 0x1000;
107 * These variables are frequently accessed in apic_intr_enter(),
108 * apic_intr_exit and apic_setspl, so group them together
110 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */
111 int apic_setspl_delay = 1; /* apic_setspl - delay enable */
112 int apic_clkvect;
114 /* vector at which error interrupts come in */
115 int apic_errvect;
116 int apic_enable_error_intr = 1;
117 int apic_error_display_delay = 100;
119 /* vector at which performance counter overflow interrupts come in */
120 int apic_cpcovf_vect;
121 int apic_enable_cpcovf_intr = 1;
123 /* vector at which CMCI interrupts come in */
124 int apic_cmci_vect;
125 extern int cmi_enable_cmci;
126 extern void cmi_cmci_trap(void);
128 kmutex_t cmci_cpu_setup_lock; /* protects cmci_cpu_setup_registered */
129 int cmci_cpu_setup_registered;
131 lock_t apic_mode_switch_lock;
133 int apic_pir_vect;
136 * Patchable global variables.
138 int apic_forceload = 0;
140 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */
142 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */
143 int apic_panic_on_nmi = 0;
144 int apic_panic_on_apic_error = 0;
146 int apic_verbose = 0; /* 0x1ff */
148 #ifdef DEBUG
149 int apic_debug = 0;
150 int apic_restrict_vector = 0;
152 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
153 int apic_debug_msgbufindex = 0;
155 #endif /* DEBUG */
157 uint_t apic_nticks = 0;
158 uint_t apic_skipped_redistribute = 0;
160 uint_t last_count_read = 0;
161 lock_t apic_gethrtime_lock;
162 volatile int apic_hrtime_stamp = 0;
163 volatile hrtime_t apic_nsec_since_boot = 0;
165 static hrtime_t apic_last_hrtime = 0;
166 int apic_hrtime_error = 0;
167 int apic_remote_hrterr = 0;
168 int apic_num_nmis = 0;
169 int apic_apic_error = 0;
170 int apic_num_apic_errors = 0;
171 int apic_num_cksum_errors = 0;
173 int apic_error = 0;
175 static int apic_cmos_ssb_set = 0;
177 /* use to make sure only one cpu handles the nmi */
178 lock_t apic_nmi_lock;
179 /* use to make sure only one cpu handles the error interrupt */
180 lock_t apic_error_lock;
182 static struct {
183 uchar_t cntl;
184 uchar_t data;
185 } aspen_bmc[] = {
186 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
187 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
188 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */
189 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */
190 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */
191 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */
192 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */
193 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */
195 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
196 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
199 static struct {
200 int port;
201 uchar_t data;
202 } sitka_bmc[] = {
203 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
204 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
205 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
206 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */
207 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */
208 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */
209 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */
210 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */
211 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
212 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */
214 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
215 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
216 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
217 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
220 /* Patchable global variables. */
221 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */
222 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */
224 /* default apic ops without interrupt remapping */
225 static apic_intrmap_ops_t apic_nointrmap_ops = {
226 (int (*)(int))return_instr,
227 (void (*)(int))return_instr,
228 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
229 (void (*)(void *, void *, uint16_t, int))return_instr,
230 (void (*)(void **))return_instr,
231 apic_record_ioapic_rdt,
232 apic_record_msi,
235 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
236 apic_cpus_info_t *apic_cpus = NULL;
237 cpuset_t apic_cpumask;
238 uint_t apic_picinit_called;
240 /* Flag to indicate that we need to shut down all processors */
241 static uint_t apic_shutdown_processors;
244 * Probe the ioapic method for apix module. Called in apic_probe_common()
247 apic_ioapic_method_probe()
249 if (apix_enable == 0)
250 return (PSM_SUCCESS);
253 * Set IOAPIC EOI handling method. The priority from low to high is:
254 * 1. IOxAPIC: with EOI register
255 * 2. IOMMU interrupt mapping
256 * 3. Mask-Before-EOI method for systems without boot
257 * interrupt routing, such as systems with only one IOAPIC;
258 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution
259 * which disables the boot interrupt routing already.
260 * 4. Directed EOI
262 if (apic_io_ver[0] >= 0x20)
263 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
264 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
265 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
266 if (apic_directed_EOI_supported())
267 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
269 /* fall back to pcplusmp */
270 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
271 /* make sure apix is after pcplusmp in /etc/mach */
272 apix_enable = 0; /* go ahead with pcplusmp install next */
273 return (PSM_FAILURE);
276 return (PSM_SUCCESS);
280 * handler for APIC Error interrupt. Just print a warning and continue
283 apic_error_intr()
285 uint_t error0, error1, error;
286 uint_t i;
289 * We need to write before read as per 7.4.17 of system prog manual.
290 * We do both and or the results to be safe
292 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
293 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
294 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
295 error = error0 | error1;
298 * Clear the APIC error status (do this on all cpus that enter here)
299 * (two writes are required due to the semantics of accessing the
300 * error status register.)
302 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
303 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
306 * Prevent more than 1 CPU from handling error interrupt causing
307 * double printing (interleave of characters from multiple
308 * CPU's when using prom_printf)
310 if (lock_try(&apic_error_lock) == 0)
311 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
312 if (error) {
313 #if DEBUG
314 if (apic_debug)
315 debug_enter("pcplusmp: APIC Error interrupt received");
316 #endif /* DEBUG */
317 if (apic_panic_on_apic_error)
318 cmn_err(CE_PANIC,
319 "APIC Error interrupt on CPU %d. Status = %x",
320 psm_get_cpu_id(), error);
321 else {
322 if ((error & ~APIC_CS_ERRORS) == 0) {
323 /* cksum error only */
324 apic_error |= APIC_ERR_APIC_ERROR;
325 apic_apic_error |= error;
326 apic_num_apic_errors++;
327 apic_num_cksum_errors++;
328 } else {
330 * prom_printf is the best shot we have of
331 * something which is problem free from
332 * high level/NMI type of interrupts
334 prom_printf("APIC Error interrupt on CPU %d. "
335 "Status 0 = %x, Status 1 = %x\n",
336 psm_get_cpu_id(), error0, error1);
337 apic_error |= APIC_ERR_APIC_ERROR;
338 apic_apic_error |= error;
339 apic_num_apic_errors++;
340 for (i = 0; i < apic_error_display_delay; i++) {
341 tenmicrosec();
344 * provide more delay next time limited to
345 * roughly 1 clock tick time
347 if (apic_error_display_delay < 500)
348 apic_error_display_delay *= 2;
351 lock_clear(&apic_error_lock);
352 return (DDI_INTR_CLAIMED);
353 } else {
354 lock_clear(&apic_error_lock);
355 return (DDI_INTR_UNCLAIMED);
360 * Turn off the mask bit in the performance counter Local Vector Table entry.
362 void
363 apic_cpcovf_mask_clear(void)
365 apic_reg_ops->apic_write(APIC_PCINT_VECT,
366 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
369 /*ARGSUSED*/
370 static int
371 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
373 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
374 return (0);
377 /*ARGSUSED*/
378 static int
379 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
381 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
382 return (0);
385 /*ARGSUSED*/
387 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
389 cpuset_t cpu_set;
391 CPUSET_ONLY(cpu_set, cpuid);
393 switch (what) {
394 case CPU_ON:
395 xc_call((uintptr_t)NULL, (uintptr_t)NULL,
396 (uintptr_t)NULL, CPUSET2BV(cpu_set),
397 (xc_func_t)apic_cmci_enable);
398 break;
400 case CPU_OFF:
401 xc_call((uintptr_t)NULL, (uintptr_t)NULL,
402 (uintptr_t)NULL, CPUSET2BV(cpu_set),
403 (xc_func_t)apic_cmci_disable);
404 break;
406 default:
407 break;
410 return (0);
413 static void
414 apic_disable_local_apic(void)
416 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
417 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
419 /* local intr reg 0 */
420 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
422 /* disable NMI */
423 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
425 /* and error interrupt */
426 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
428 /* and perf counter intr */
429 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
431 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
434 static void
435 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
437 int loop_count;
438 uint32_t vector;
439 uint_t apicid;
440 ulong_t iflag;
442 apicid = apic_cpus[cpun].aci_local_id;
445 * Interrupts on current CPU will be disabled during the
446 * steps in order to avoid unwanted side effects from
447 * executing interrupt handlers on a problematic BIOS.
449 iflag = intr_clear();
451 if (start) {
452 outb(CMOS_ADDR, SSB);
453 outb(CMOS_DATA, BIOS_SHUTDOWN);
457 * According to X2APIC specification in section '2.3.5.1' of
458 * Interrupt Command Register Semantics, the semantics of
459 * programming the Interrupt Command Register to dispatch an interrupt
460 * is simplified. A single MSR write to the 64-bit ICR is required
461 * for dispatching an interrupt. Specifically, with the 64-bit MSR
462 * interface to ICR, system software is not required to check the
463 * status of the delivery status bit prior to writing to the ICR
464 * to send an IPI. With the removal of the Delivery Status bit,
465 * system software no longer has a reason to read the ICR. It remains
466 * readable only to aid in debugging.
468 #ifdef DEBUG
469 APIC_AV_PENDING_SET();
470 #else
471 if (apic_mode == LOCAL_APIC) {
472 APIC_AV_PENDING_SET();
474 #endif /* DEBUG */
476 /* for integrated - make sure there is one INIT IPI in buffer */
477 /* for external - it will wake up the cpu */
478 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
480 /* If only 1 CPU is installed, PENDING bit will not go low */
481 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
482 if (apic_mode == LOCAL_APIC &&
483 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
484 apic_ret();
485 else
486 break;
489 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
490 drv_usecwait(20000); /* 20 milli sec */
492 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
493 /* integrated apic */
495 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
496 (APIC_VECTOR_MASK | APIC_IPL_MASK);
498 /* to offset the INIT IPI queue up in the buffer */
499 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
500 drv_usecwait(200); /* 20 micro sec */
503 * send the second SIPI (Startup IPI) as recommended by Intel
504 * software development manual.
506 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
507 drv_usecwait(200); /* 20 micro sec */
510 intr_restore(iflag);
513 /*ARGSUSED1*/
515 apic_cpu_start(processorid_t cpun, caddr_t arg)
517 ASSERT(MUTEX_HELD(&cpu_lock));
519 if (!apic_cpu_in_range(cpun)) {
520 return (EINVAL);
524 * Switch to apic_common_send_ipi for safety during starting other CPUs.
526 if (apic_mode == LOCAL_X2APIC) {
527 apic_switch_ipi_callback(B_TRUE);
530 apic_cmos_ssb_set = 1;
531 apic_cpu_send_SIPI(cpun, B_TRUE);
533 return (0);
537 * Put CPU into halted state with interrupts disabled.
539 /*ARGSUSED1*/
541 apic_cpu_stop(processorid_t cpun, caddr_t arg)
543 int rc;
544 cpu_t *cp;
545 extern cpuset_t cpu_ready_set;
546 extern void cpu_idle_intercept_cpu(cpu_t *cp);
548 ASSERT(MUTEX_HELD(&cpu_lock));
550 if (!apic_cpu_in_range(cpun)) {
551 return (EINVAL);
553 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
554 return (ENOTSUP);
557 cp = cpu_get(cpun);
558 ASSERT(cp != NULL);
559 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
560 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
561 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
563 /* Clear CPU_READY flag to disable cross calls. */
564 cp->cpu_flags &= ~CPU_READY;
565 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
566 rc = xc_flush_cpu(cp);
567 if (rc != 0) {
568 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
569 cp->cpu_flags |= CPU_READY;
570 return (rc);
573 /* Intercept target CPU at a safe point before powering it off. */
574 cpu_idle_intercept_cpu(cp);
576 apic_cpu_send_SIPI(cpun, B_FALSE);
577 cp->cpu_flags &= ~CPU_RUNNING;
579 return (0);
583 apic_cpu_ops(psm_cpu_request_t *reqp)
585 if (reqp == NULL) {
586 return (EINVAL);
589 switch (reqp->pcr_cmd) {
590 case PSM_CPU_ADD:
591 return (apic_cpu_add(reqp));
593 case PSM_CPU_REMOVE:
594 return (apic_cpu_remove(reqp));
596 case PSM_CPU_STOP:
597 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
598 reqp->req.cpu_stop.ctx));
600 default:
601 return (ENOTSUP);
605 #ifdef DEBUG
606 int apic_break_on_cpu = 9;
607 int apic_stretch_interrupts = 0;
608 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */
609 #endif /* DEBUG */
612 * generates an interprocessor interrupt to another CPU. Any changes made to
613 * this routine must be accompanied by similar changes to
614 * apic_common_send_ipi().
616 void
617 apic_send_ipi(int cpun, int ipl)
619 int vector;
620 ulong_t flag;
622 vector = apic_resv_vector[ipl];
624 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
626 flag = intr_clear();
628 APIC_AV_PENDING_SET();
630 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
631 vector);
633 intr_restore(flag);
636 void
637 apic_send_pir_ipi(processorid_t cpun)
639 const int vector = apic_pir_vect;
640 ulong_t flag;
642 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
644 flag = intr_clear();
646 /* Self-IPI for inducing PIR makes no sense. */
647 if ((cpun != psm_get_cpu_id())) {
648 APIC_AV_PENDING_SET();
649 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
650 vector);
653 intr_restore(flag);
657 apic_get_pir_ipivect(void)
659 return (apic_pir_vect);
662 /*ARGSUSED*/
663 void
664 apic_set_idlecpu(processorid_t cpun)
668 /*ARGSUSED*/
669 void
670 apic_unset_idlecpu(processorid_t cpun)
675 void
676 apic_ret()
681 * If apic_coarse_time == 1, then apic_gettime() is used instead of
682 * apic_gethrtime(). This is used for performance instead of accuracy.
685 hrtime_t
686 apic_gettime()
688 int old_hrtime_stamp;
689 hrtime_t temp;
692 * In one-shot mode, we do not keep time, so if anyone
693 * calls psm_gettime() directly, we vector over to
694 * gethrtime().
695 * one-shot mode MUST NOT be enabled if this psm is the source of
696 * hrtime.
699 if (apic_oneshot)
700 return (gethrtime());
703 gettime_again:
704 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
705 apic_ret();
707 temp = apic_nsec_since_boot;
709 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
710 goto gettime_again;
712 return (temp);
716 * Here we return the number of nanoseconds since booting. Note every
717 * clock interrupt increments apic_nsec_since_boot by the appropriate
718 * amount.
720 hrtime_t
721 apic_gethrtime(void)
723 int curr_timeval, countval, elapsed_ticks;
724 int old_hrtime_stamp, status;
725 hrtime_t temp;
726 uint32_t cpun;
727 ulong_t oflags;
730 * In one-shot mode, we do not keep time, so if anyone
731 * calls psm_gethrtime() directly, we vector over to
732 * gethrtime().
733 * one-shot mode MUST NOT be enabled if this psm is the source of
734 * hrtime.
737 if (apic_oneshot)
738 return (gethrtime());
740 oflags = intr_clear(); /* prevent migration */
742 cpun = apic_reg_ops->apic_read(APIC_LID_REG);
743 if (apic_mode == LOCAL_APIC)
744 cpun >>= APIC_ID_BIT_OFFSET;
746 lock_set(&apic_gethrtime_lock);
748 gethrtime_again:
749 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
750 apic_ret();
753 * Check to see which CPU we are on. Note the time is kept on
754 * the local APIC of CPU 0. If on CPU 0, simply read the current
755 * counter. If on another CPU, issue a remote read command to CPU 0.
757 if (cpun == apic_cpus[0].aci_local_id) {
758 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
759 } else {
760 #ifdef DEBUG
761 APIC_AV_PENDING_SET();
762 #else
763 if (apic_mode == LOCAL_APIC)
764 APIC_AV_PENDING_SET();
765 #endif /* DEBUG */
767 apic_reg_ops->apic_write_int_cmd(
768 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
770 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
771 & AV_READ_PENDING) {
772 apic_ret();
775 if (status & AV_REMOTE_STATUS) /* 1 = valid */
776 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
777 else { /* 0 = invalid */
778 apic_remote_hrterr++;
780 * return last hrtime right now, will need more
781 * testing if change to retry
783 temp = apic_last_hrtime;
785 lock_clear(&apic_gethrtime_lock);
787 intr_restore(oflags);
789 return (temp);
792 if (countval > last_count_read)
793 countval = 0;
794 else
795 last_count_read = countval;
797 elapsed_ticks = apic_hertz_count - countval;
799 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
800 temp = apic_nsec_since_boot + curr_timeval;
802 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
803 /* we might have clobbered last_count_read. Restore it */
804 last_count_read = apic_hertz_count;
805 goto gethrtime_again;
808 if (temp < apic_last_hrtime) {
809 /* return last hrtime if error occurs */
810 apic_hrtime_error++;
811 temp = apic_last_hrtime;
813 else
814 apic_last_hrtime = temp;
816 lock_clear(&apic_gethrtime_lock);
817 intr_restore(oflags);
819 return (temp);
822 /* apic NMI handler */
823 /*ARGSUSED*/
824 void
825 apic_nmi_intr(caddr_t arg, struct regs *rp)
827 if (apic_shutdown_processors) {
828 apic_disable_local_apic();
829 return;
832 apic_error |= APIC_ERR_NMI;
834 if (!lock_try(&apic_nmi_lock))
835 return;
836 apic_num_nmis++;
838 if (apic_kmdb_on_nmi && psm_debugger()) {
839 debug_enter("NMI received: entering kmdb\n");
840 } else if (apic_panic_on_nmi) {
841 /* Keep panic from entering kmdb. */
842 nopanicdebug = 1;
843 panic("NMI received\n");
844 } else {
846 * prom_printf is the best shot we have of something which is
847 * problem free from high level/NMI type of interrupts
849 prom_printf("NMI received\n");
852 lock_clear(&apic_nmi_lock);
855 processorid_t
856 apic_get_next_processorid(processorid_t cpu_id)
859 int i;
861 if (cpu_id == -1)
862 return ((processorid_t)0);
864 for (i = cpu_id + 1; i < NCPU; i++) {
865 if (apic_cpu_in_range(i))
866 return (i);
869 return ((processorid_t)-1);
873 apic_cpu_add(psm_cpu_request_t *reqp)
875 int i, rv = 0;
876 ulong_t iflag;
877 boolean_t first = B_TRUE;
878 uchar_t localver = 0;
879 uint32_t localid, procid;
880 processorid_t cpuid = (processorid_t)-1;
881 mach_cpu_add_arg_t *ap;
883 ASSERT(reqp != NULL);
884 reqp->req.cpu_add.cpuid = (processorid_t)-1;
886 /* Check whether CPU hotplug is supported. */
887 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
888 return (ENOTSUP);
891 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
892 switch (ap->type) {
893 case MACH_CPU_ARG_LOCAL_APIC:
894 localid = ap->arg.apic.apic_id;
895 procid = ap->arg.apic.proc_id;
896 if (localid >= 255 || procid > 255) {
897 cmn_err(CE_WARN,
898 "!apic: apicid(%u) or procid(%u) is invalid.",
899 localid, procid);
900 return (EINVAL);
902 break;
904 case MACH_CPU_ARG_LOCAL_X2APIC:
905 localid = ap->arg.apic.apic_id;
906 procid = ap->arg.apic.proc_id;
907 if (localid >= UINT32_MAX) {
908 cmn_err(CE_WARN,
909 "!apic: x2apicid(%u) is invalid.", localid);
910 return (EINVAL);
911 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
912 cmn_err(CE_WARN, "!apic: system is in APIC mode, "
913 "can't support x2APIC processor.");
914 return (ENOTSUP);
916 break;
918 default:
919 cmn_err(CE_WARN,
920 "!apic: unknown argument type %d to apic_cpu_add().",
921 ap->type);
922 return (EINVAL);
925 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
926 iflag = intr_clear();
927 lock_set(&apic_ioapic_lock);
929 /* Check whether local APIC id already exists. */
930 for (i = 0; i < apic_nproc; i++) {
931 if (!CPU_IN_SET(apic_cpumask, i))
932 continue;
933 if (apic_cpus[i].aci_local_id == localid) {
934 lock_clear(&apic_ioapic_lock);
935 intr_restore(iflag);
936 cmn_err(CE_WARN,
937 "!apic: local apic id %u already exists.",
938 localid);
939 return (EEXIST);
940 } else if (apic_cpus[i].aci_processor_id == procid) {
941 lock_clear(&apic_ioapic_lock);
942 intr_restore(iflag);
943 cmn_err(CE_WARN,
944 "!apic: processor id %u already exists.",
945 (int)procid);
946 return (EEXIST);
950 * There's no local APIC version number available in MADT table,
951 * so assume that all CPUs are homogeneous and use local APIC
952 * version number of the first existing CPU.
954 if (first) {
955 first = B_FALSE;
956 localver = apic_cpus[i].aci_local_ver;
959 ASSERT(first == B_FALSE);
962 * Try to assign the same cpuid if APIC id exists in the dirty cache.
964 for (i = 0; i < apic_max_nproc; i++) {
965 if (CPU_IN_SET(apic_cpumask, i)) {
966 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
967 continue;
969 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
970 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
971 apic_cpus[i].aci_local_id == localid &&
972 apic_cpus[i].aci_processor_id == procid) {
973 cpuid = i;
974 break;
978 /* Avoid the dirty cache and allocate fresh slot if possible. */
979 if (cpuid == (processorid_t)-1) {
980 for (i = 0; i < apic_max_nproc; i++) {
981 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
982 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
983 cpuid = i;
984 break;
989 /* Try to find any free slot as last resort. */
990 if (cpuid == (processorid_t)-1) {
991 for (i = 0; i < apic_max_nproc; i++) {
992 if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
993 cpuid = i;
994 break;
999 if (cpuid == (processorid_t)-1) {
1000 lock_clear(&apic_ioapic_lock);
1001 intr_restore(iflag);
1002 cmn_err(CE_NOTE,
1003 "!apic: failed to allocate cpu id for processor %u.",
1004 procid);
1005 rv = EAGAIN;
1006 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
1007 lock_clear(&apic_ioapic_lock);
1008 intr_restore(iflag);
1009 cmn_err(CE_NOTE,
1010 "!apic: failed to build mapping for processor %u.",
1011 procid);
1012 rv = EBUSY;
1013 } else {
1014 ASSERT(cpuid >= 0 && cpuid < NCPU);
1015 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
1016 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
1017 apic_cpus[cpuid].aci_processor_id = procid;
1018 apic_cpus[cpuid].aci_local_id = localid;
1019 apic_cpus[cpuid].aci_local_ver = localver;
1020 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
1021 if (cpuid >= apic_nproc) {
1022 apic_nproc = cpuid + 1;
1024 lock_clear(&apic_ioapic_lock);
1025 intr_restore(iflag);
1026 reqp->req.cpu_add.cpuid = cpuid;
1029 return (rv);
1033 apic_cpu_remove(psm_cpu_request_t *reqp)
1035 int i;
1036 ulong_t iflag;
1037 processorid_t cpuid;
1039 /* Check whether CPU hotplug is supported. */
1040 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1041 return (ENOTSUP);
1044 cpuid = reqp->req.cpu_remove.cpuid;
1046 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1047 iflag = intr_clear();
1048 lock_set(&apic_ioapic_lock);
1050 if (!apic_cpu_in_range(cpuid)) {
1051 lock_clear(&apic_ioapic_lock);
1052 intr_restore(iflag);
1053 cmn_err(CE_WARN,
1054 "!apic: cpuid %d doesn't exist in apic_cpus array.",
1055 cpuid);
1056 return (ENODEV);
1058 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1060 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1061 lock_clear(&apic_ioapic_lock);
1062 intr_restore(iflag);
1063 return (ENOENT);
1066 if (cpuid == apic_nproc - 1) {
1068 * We are removing the highest numbered cpuid so we need to
1069 * find the next highest cpuid as the new value for apic_nproc.
1071 for (i = apic_nproc; i > 0; i--) {
1072 if (CPU_IN_SET(apic_cpumask, i - 1)) {
1073 apic_nproc = i;
1074 break;
1077 /* at least one CPU left */
1078 ASSERT(i > 0);
1080 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1081 /* mark slot as free and keep it in the dirty cache */
1082 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1084 lock_clear(&apic_ioapic_lock);
1085 intr_restore(iflag);
1087 return (0);
1091 * Return the number of ticks the APIC decrements in SF nanoseconds.
1092 * The fixed-frequency PIT (aka 8254) is used for the measurement.
1094 static uint64_t
1095 apic_calibrate_impl()
1097 uint8_t pit_tick_lo;
1098 uint16_t pit_tick, target_pit_tick, pit_ticks_adj;
1099 uint32_t pit_ticks;
1100 uint32_t start_apic_tick, end_apic_tick, apic_ticks;
1101 ulong_t iflag;
1103 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1104 apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
1106 iflag = intr_clear();
1108 do {
1109 pit_tick_lo = inb(PITCTR0_PORT);
1110 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1111 } while (pit_tick < APIC_TIME_MIN ||
1112 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1115 * Wait for the PIT to decrement by 5 ticks to ensure
1116 * we didn't start in the middle of a tick.
1117 * Compare with 0x10 for the wrap around case.
1119 target_pit_tick = pit_tick - 5;
1120 do {
1121 pit_tick_lo = inb(PITCTR0_PORT);
1122 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1123 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1125 start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1128 * Wait for the PIT to decrement by APIC_TIME_COUNT ticks
1130 target_pit_tick = pit_tick - APIC_TIME_COUNT;
1131 do {
1132 pit_tick_lo = inb(PITCTR0_PORT);
1133 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1134 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1136 end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1138 intr_restore(iflag);
1140 apic_ticks = start_apic_tick - end_apic_tick;
1142 /* The PIT might have decremented by more ticks than planned */
1143 pit_ticks_adj = target_pit_tick - pit_tick;
1144 /* total number of PIT ticks corresponding to apic_ticks */
1145 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
1148 * Determine the number of nanoseconds per APIC clock tick
1149 * and then determine how many APIC ticks to interrupt at the
1150 * desired frequency
1151 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
1152 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
1153 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
1154 * apic_ticks_per_SFns =
1155 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
1157 return ((SF * apic_ticks * PIT_HZ) / ((uint64_t)pit_ticks * NANOSEC));
1161 * It was found empirically that 5 measurements seem sufficient to give a good
1162 * accuracy. Most spurious measurements are higher than the target value thus
1163 * we eliminate up to 2/5 spurious measurements.
1165 #define APIC_CALIBRATE_MEASUREMENTS 5
1167 #define APIC_CALIBRATE_PERCENT_OFF_WARNING 10
1170 * Return the number of ticks the APIC decrements in SF nanoseconds.
1171 * Several measurements are taken to filter out outliers.
1173 uint64_t
1174 apic_calibrate()
1176 uint64_t measurements[APIC_CALIBRATE_MEASUREMENTS];
1177 int median_idx;
1178 uint64_t median;
1181 * When running under a virtual machine, the emulated PIT and APIC
1182 * counters do not always return the right values and can roll over.
1183 * Those spurious measurements are relatively rare but could
1184 * significantly affect the calibration.
1185 * Therefore we take several measurements and then keep the median.
1186 * The median is preferred to the average here as we only want to
1187 * discard outliers.
1189 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++)
1190 measurements[i] = apic_calibrate_impl();
1193 * sort results and retrieve median.
1195 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) {
1196 for (int j = i + 1; j < APIC_CALIBRATE_MEASUREMENTS; j++) {
1197 if (measurements[j] < measurements[i]) {
1198 uint64_t tmp = measurements[i];
1199 measurements[i] = measurements[j];
1200 measurements[j] = tmp;
1204 median_idx = APIC_CALIBRATE_MEASUREMENTS / 2;
1205 median = measurements[median_idx];
1207 #if (APIC_CALIBRATE_MEASUREMENTS >= 3)
1209 * Check that measurements are consistent. Post a warning
1210 * if the three middle values are not close to each other.
1212 uint64_t delta_warn = median *
1213 APIC_CALIBRATE_PERCENT_OFF_WARNING / 100;
1214 if ((median - measurements[median_idx - 1]) > delta_warn ||
1215 (measurements[median_idx + 1] - median) > delta_warn) {
1216 cmn_err(CE_WARN, "apic_calibrate measurements lack "
1217 "precision: %llu, %llu, %llu.",
1218 (u_longlong_t)measurements[median_idx - 1],
1219 (u_longlong_t)median,
1220 (u_longlong_t)measurements[median_idx + 1]);
1222 #endif
1224 return (median);
1228 * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1229 * frequency. Note at this stage in the boot sequence, the boot processor
1230 * is the only active processor.
1231 * hertz value of 0 indicates a one-shot mode request. In this case
1232 * the function returns the resolution (in nanoseconds) for the hardware
1233 * timer interrupt. If one-shot mode capability is not available,
1234 * the return value will be 0. apic_enable_oneshot is a global switch
1235 * for disabling the functionality.
1236 * A non-zero positive value for hertz indicates a periodic mode request.
1237 * In this case the hardware will be programmed to generate clock interrupts
1238 * at hertz frequency and returns the resolution of interrupts in
1239 * nanosecond.
1243 apic_clkinit(int hertz)
1245 int ret;
1247 apic_int_busy_mark = (apic_int_busy_mark *
1248 apic_sample_factor_redistribution) / 100;
1249 apic_int_free_mark = (apic_int_free_mark *
1250 apic_sample_factor_redistribution) / 100;
1251 apic_diff_for_redistribution = (apic_diff_for_redistribution *
1252 apic_sample_factor_redistribution) / 100;
1254 ret = apic_timer_init(hertz);
1255 return (ret);
1260 * apic_preshutdown:
1261 * Called early in shutdown whilst we can still access filesystems to do
1262 * things like loading modules which will be required to complete shutdown
1263 * after filesystems are all unmounted.
1265 void
1266 apic_preshutdown(int cmd, int fcn)
1268 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1269 cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1272 void
1273 apic_shutdown(int cmd, int fcn)
1275 int restarts, attempts;
1276 int i;
1277 uchar_t byte;
1278 ulong_t iflag;
1280 hpet_acpi_fini();
1282 /* Send NMI to all CPUs except self to do per processor shutdown */
1283 iflag = intr_clear();
1284 #ifdef DEBUG
1285 APIC_AV_PENDING_SET();
1286 #else
1287 if (apic_mode == LOCAL_APIC)
1288 APIC_AV_PENDING_SET();
1289 #endif /* DEBUG */
1290 apic_shutdown_processors = 1;
1291 apic_reg_ops->apic_write(APIC_INT_CMD1,
1292 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1294 /* restore cmos shutdown byte before reboot */
1295 if (apic_cmos_ssb_set) {
1296 outb(CMOS_ADDR, SSB);
1297 outb(CMOS_DATA, 0);
1300 ioapic_disable_redirection();
1302 /* disable apic mode if imcr present */
1303 if (apic_imcrp) {
1304 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1305 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1308 apic_disable_local_apic();
1310 intr_restore(iflag);
1312 /* remainder of function is for shutdown cases only */
1313 if (cmd != A_SHUTDOWN)
1314 return;
1317 * Switch system back into Legacy-Mode if using ACPI and
1318 * not powering-off. Some BIOSes need to remain in ACPI-mode
1319 * for power-off to succeed (Dell Dimension 4600)
1320 * Do not disable ACPI while doing fastreboot
1322 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1323 (void) AcpiDisable();
1325 if (fcn == AD_FASTREBOOT) {
1326 apic_reg_ops->apic_write(APIC_INT_CMD1,
1327 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1330 /* remainder of function is for shutdown+poweroff case only */
1331 if (fcn != AD_POWEROFF)
1332 return;
1334 switch (apic_poweroff_method) {
1335 case APIC_POWEROFF_VIA_RTC:
1337 /* select the extended NVRAM bank in the RTC */
1338 outb(CMOS_ADDR, RTC_REGA);
1339 byte = inb(CMOS_DATA);
1340 outb(CMOS_DATA, (byte | EXT_BANK));
1342 outb(CMOS_ADDR, PFR_REG);
1344 /* for Predator must toggle the PAB bit */
1345 byte = inb(CMOS_DATA);
1348 * clear power active bar, wakeup alarm and
1349 * kickstart
1351 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1352 outb(CMOS_DATA, byte);
1354 /* delay before next write */
1355 drv_usecwait(1000);
1357 /* for S40 the following would suffice */
1358 byte = inb(CMOS_DATA);
1360 /* power active bar control bit */
1361 byte |= PAB_CBIT;
1362 outb(CMOS_DATA, byte);
1364 break;
1366 case APIC_POWEROFF_VIA_ASPEN_BMC:
1367 restarts = 0;
1368 restart_aspen_bmc:
1369 if (++restarts == 3)
1370 break;
1371 attempts = 0;
1372 do {
1373 byte = inb(MISMIC_FLAG_REGISTER);
1374 byte &= MISMIC_BUSY_MASK;
1375 if (byte != 0) {
1376 drv_usecwait(1000);
1377 if (attempts >= 3)
1378 goto restart_aspen_bmc;
1379 ++attempts;
1381 } while (byte != 0);
1382 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1383 byte = inb(MISMIC_FLAG_REGISTER);
1384 byte |= 0x1;
1385 outb(MISMIC_FLAG_REGISTER, byte);
1386 i = 0;
1387 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1388 i++) {
1389 attempts = 0;
1390 do {
1391 byte = inb(MISMIC_FLAG_REGISTER);
1392 byte &= MISMIC_BUSY_MASK;
1393 if (byte != 0) {
1394 drv_usecwait(1000);
1395 if (attempts >= 3)
1396 goto restart_aspen_bmc;
1397 ++attempts;
1399 } while (byte != 0);
1400 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1401 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1402 byte = inb(MISMIC_FLAG_REGISTER);
1403 byte |= 0x1;
1404 outb(MISMIC_FLAG_REGISTER, byte);
1406 break;
1408 case APIC_POWEROFF_VIA_SITKA_BMC:
1409 restarts = 0;
1410 restart_sitka_bmc:
1411 if (++restarts == 3)
1412 break;
1413 attempts = 0;
1414 do {
1415 byte = inb(SMS_STATUS_REGISTER);
1416 byte &= SMS_STATE_MASK;
1417 if ((byte == SMS_READ_STATE) ||
1418 (byte == SMS_WRITE_STATE)) {
1419 drv_usecwait(1000);
1420 if (attempts >= 3)
1421 goto restart_sitka_bmc;
1422 ++attempts;
1424 } while ((byte == SMS_READ_STATE) ||
1425 (byte == SMS_WRITE_STATE));
1426 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1427 i = 0;
1428 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1429 i++) {
1430 attempts = 0;
1431 do {
1432 byte = inb(SMS_STATUS_REGISTER);
1433 byte &= SMS_IBF_MASK;
1434 if (byte != 0) {
1435 drv_usecwait(1000);
1436 if (attempts >= 3)
1437 goto restart_sitka_bmc;
1438 ++attempts;
1440 } while (byte != 0);
1441 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1443 break;
1445 case APIC_POWEROFF_NONE:
1447 /* If no APIC direct method, we will try using ACPI */
1448 if (apic_enable_acpi) {
1449 if (acpi_poweroff() == 1)
1450 return;
1451 } else
1452 return;
1454 break;
1457 * Wait a limited time here for power to go off.
1458 * If the power does not go off, then there was a
1459 * problem and we should continue to the halt which
1460 * prints a message for the user to press a key to
1461 * reboot.
1463 drv_usecwait(7000000); /* wait seven seconds */
1467 cyclic_id_t apic_cyclic_id;
1470 * The following functions are in the platform specific file so that they
1471 * can be different functions depending on whether we are running on
1472 * bare metal or a hypervisor.
1476 * map an apic for memory-mapped access
1478 uint32_t *
1479 mapin_apic(uint32_t addr, size_t len, int flags)
1481 return ((void *)psm_map_phys(addr, len, flags));
1484 uint32_t *
1485 mapin_ioapic(uint32_t addr, size_t len, int flags)
1487 return (mapin_apic(addr, len, flags));
1491 * unmap an apic
1493 void
1494 mapout_apic(caddr_t addr, size_t len)
1496 psm_unmap_phys(addr, len);
1499 void
1500 mapout_ioapic(caddr_t addr, size_t len)
1502 mapout_apic(addr, len);
1505 uint32_t
1506 ioapic_read(int ioapic_ix, uint32_t reg)
1508 volatile uint32_t *ioapic;
1510 ioapic = apicioadr[ioapic_ix];
1511 ioapic[APIC_IO_REG] = reg;
1512 return (ioapic[APIC_IO_DATA]);
1515 void
1516 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1518 volatile uint32_t *ioapic;
1520 ioapic = apicioadr[ioapic_ix];
1521 ioapic[APIC_IO_REG] = reg;
1522 ioapic[APIC_IO_DATA] = value;
1525 void
1526 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1528 volatile uint32_t *ioapic;
1530 ioapic = apicioadr[ioapic_ix];
1531 ioapic[APIC_IO_EOI] = value;
1535 * Round-robin algorithm to find the next CPU with interrupts enabled.
1536 * It can't share the same static variable apic_next_bind_cpu with
1537 * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1538 * bound to CPU1 at boot time. During boot, only CPU0 is online with
1539 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1540 * are called. However, the pcplusmp driver assumes that there will be
1541 * boot_ncpus CPUs configured eventually so it tries to distribute all
1542 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all
1543 * interrupts being targetted at CPU1, we need to use a dedicated static
1544 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1547 processorid_t
1548 apic_find_cpu(int flag)
1550 int i;
1551 static processorid_t acid = 0;
1553 /* Find the first CPU with the passed-in flag set */
1554 for (i = 0; i < apic_nproc; i++) {
1555 if (++acid >= apic_nproc) {
1556 acid = 0;
1558 if (apic_cpu_in_range(acid) &&
1559 (apic_cpus[acid].aci_status & flag)) {
1560 break;
1564 ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1565 return (acid);
1568 void
1569 apic_intrmap_init(int apic_mode)
1571 int suppress_brdcst_eoi = 0;
1574 * Intel Software Developer's Manual 3A, 10.12.7:
1576 * Routing of device interrupts to local APIC units operating in
1577 * x2APIC mode requires use of the interrupt-remapping architecture
1578 * specified in the Intel Virtualization Technology for Directed
1579 * I/O, Revision 1.3. Because of this, BIOS must enumerate support
1580 * for and software must enable this interrupt remapping with
1581 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1582 * the local APIC units.
1585 * In other words, to use the APIC in x2APIC mode, we need interrupt
1586 * remapping. Since we don't start up the IOMMU by default, we
1587 * won't be able to do any interrupt remapping and therefore have to
1588 * use the APIC in traditional 'local APIC' mode with memory mapped
1589 * I/O.
1592 if (psm_vt_ops != NULL) {
1593 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1594 apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1596 apic_vt_ops = psm_vt_ops;
1599 * We leverage the interrupt remapping engine to
1600 * suppress broadcast EOI; thus we must send the
1601 * directed EOI with the directed-EOI handler.
1603 if (apic_directed_EOI_supported() == 0) {
1604 suppress_brdcst_eoi = 1;
1607 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1609 if (apic_detect_x2apic()) {
1610 apic_enable_x2apic();
1613 if (apic_directed_EOI_supported() == 0) {
1614 apic_set_directed_EOI_handler();
1620 /*ARGSUSED*/
1621 static void
1622 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1624 irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1627 /*ARGSUSED*/
1628 static void
1629 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1631 mregs->mr_addr = MSI_ADDR_HDR |
1632 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1633 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1634 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1635 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1636 mregs->mr_data;
1640 * Functions from apic_introp.c
1642 * Those functions are used by apic_intr_ops().
1646 * MSI support flag:
1647 * reflects whether MSI is supported at APIC level
1648 * it can also be patched through /etc/system
1650 * 0 = default value - don't know and need to call apic_check_msi_support()
1651 * to find out then set it accordingly
1652 * 1 = supported
1653 * -1 = not supported
1655 int apic_support_msi = 0;
1657 /* Multiple vector support for MSI-X */
1658 int apic_msix_enable = 1;
1660 /* Multiple vector support for MSI */
1661 int apic_multi_msi_enable = 1;
1664 * Check whether the system supports MSI.
1666 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1667 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1668 * return PSM_SUCCESS to indicate this system supports MSI.
1670 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1671 * by detecting if we are running inside the KVM hypervisor, which guarantees
1672 * this version number.)
1675 apic_check_msi_support()
1677 dev_info_t *cdip;
1678 char dev_type[16];
1679 int dev_len;
1680 int hwenv = get_hwenv();
1682 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1685 * check whether the first level children of root_node have
1686 * PCI-E or PCI capability.
1688 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1689 cdip = ddi_get_next_sibling(cdip)) {
1691 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1692 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1693 ddi_driver_name(cdip), ddi_binding_name(cdip),
1694 ddi_node_name(cdip)));
1695 dev_len = sizeof (dev_type);
1696 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1697 "device_type", (caddr_t)dev_type, &dev_len)
1698 != DDI_PROP_SUCCESS)
1699 continue;
1700 if (strcmp(dev_type, "pciex") == 0)
1701 return (PSM_SUCCESS);
1702 if (strcmp(dev_type, "pci") == 0 &&
1703 (hwenv == HW_KVM || hwenv == HW_BHYVE))
1704 return (PSM_SUCCESS);
1707 /* MSI is not supported on this system */
1708 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1709 "device_type found\n"));
1710 return (PSM_FAILURE);
1714 * apic_pci_msi_unconfigure:
1716 * This and next two interfaces are copied from pci_intr_lib.c
1717 * Do ensure that these two files stay in sync.
1718 * These needed to be copied over here to avoid a deadlock situation on
1719 * certain mp systems that use MSI interrupts.
1721 * IMPORTANT regards next three interfaces:
1722 * i) are called only for MSI/X interrupts.
1723 * ii) called with interrupts disabled, and must not block
1725 void
1726 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1728 ushort_t msi_ctrl;
1729 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1730 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1732 ASSERT((handle != NULL) && (cap_ptr != 0));
1734 if (type == DDI_INTR_TYPE_MSI) {
1735 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1736 msi_ctrl &= (~PCI_MSI_MME_MASK);
1737 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1738 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1740 if (msi_ctrl & PCI_MSI_64BIT_MASK) {
1741 pci_config_put16(handle,
1742 cap_ptr + PCI_MSI_64BIT_DATA, 0);
1743 pci_config_put32(handle,
1744 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1745 } else {
1746 pci_config_put16(handle,
1747 cap_ptr + PCI_MSI_32BIT_DATA, 0);
1750 } else if (type == DDI_INTR_TYPE_MSIX) {
1751 uintptr_t off;
1752 uint32_t mask;
1753 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1755 ASSERT(msix_p != NULL);
1757 /* Offset into "inum"th entry in the MSI-X table & mask it */
1758 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1759 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1761 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1763 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1765 /* Offset into the "inum"th entry in the MSI-X table */
1766 off = (uintptr_t)msix_p->msix_tbl_addr +
1767 (inum * PCI_MSIX_VECTOR_SIZE);
1769 /* Reset the "data" and "addr" bits */
1770 ddi_put32(msix_p->msix_tbl_hdl,
1771 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1772 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1777 * apic_pci_msi_disable_mode:
1779 void
1780 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1782 ushort_t msi_ctrl;
1783 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1784 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1786 ASSERT((handle != NULL) && (cap_ptr != 0));
1788 if (type == DDI_INTR_TYPE_MSI) {
1789 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1790 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1791 return;
1793 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */
1794 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1796 } else if (type == DDI_INTR_TYPE_MSIX) {
1797 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1798 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1799 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1800 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1801 msi_ctrl);
1806 uint32_t
1807 apic_get_localapicid(uint32_t cpuid)
1809 ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1811 return (apic_cpus[cpuid].aci_local_id);
1814 uchar_t
1815 apic_get_ioapicid(uchar_t ioapicindex)
1817 ASSERT(ioapicindex < MAX_IO_APIC);
1819 return (apic_io_id[ioapicindex]);