2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2011 NetApp, Inc.
6 * Copyright (c) 2019 Joyent, Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * This file and its contents are supplied under the terms of the
33 * Common Development and Distribution License ("CDDL"), version 1.0.
34 * You may only use this file in accordance with the terms of version
37 * A full copy of the text of the CDDL should have accompanied this
38 * source. A copy of the CDDL is also available via the Internet at
39 * http://www.illumos.org/license/CDDL.
41 * Copyright 2014 Pluribus Networks Inc.
42 * Copyright 2018 Joyent, Inc.
43 * Copyright 2023 Oxide Computer Company
46 #include <sys/cdefs.h>
47 __FBSDID("$FreeBSD$");
49 #include <sys/param.h>
50 #include <sys/kernel.h>
52 #include <sys/mutex.h>
53 #include <sys/systm.h>
54 #include <sys/cpuset.h>
56 #include <x86/specialreg.h>
57 #include <x86/apicreg.h>
59 #include <machine/clock.h>
61 #include <machine/vmm.h>
62 #include <sys/vmm_kernel.h>
64 #include "vmm_lapic.h"
68 #include "vlapic_priv.h"
73 * The 4 high bits of a given interrupt vector represent its priority. The same
74 * is true for the contents of the TPR when it is used to calculate the ultimate
75 * PPR of an APIC - the 4 high bits hold the priority.
77 #define PRIO(x) ((x) & 0xf0)
79 #define VLAPIC_VERSION (0x14)
82 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the
83 * vlapic_callout_handler() and vcpu accesses to:
84 * - timer_freq_bt, timer_period_bt, timer_fire_bt
85 * - timer LVT register
87 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock))
88 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock))
89 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock))
92 * APIC timer frequency:
93 * - arbitrary but chosen to be in the ballpark of contemporary hardware.
94 * - power-of-two to avoid loss of precision when calculating times
96 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
98 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL
100 #define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \
101 APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
102 APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
103 APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
105 static void vlapic_set_error(struct vlapic
*, uint32_t, bool);
106 static void vlapic_callout_handler(void *arg
);
109 vlapic_x2mode(const struct vlapic
*vlapic
)
111 return ((vlapic
->msr_apicbase
& APICBASE_X2APIC
) != 0);
115 vlapic_hw_disabled(const struct vlapic
*vlapic
)
117 return ((vlapic
->msr_apicbase
& APICBASE_ENABLED
) == 0);
121 vlapic_sw_disabled(const struct vlapic
*vlapic
)
123 const struct LAPIC
*lapic
= vlapic
->apic_page
;
125 return ((lapic
->svr
& APIC_SVR_ENABLE
) == 0);
129 vlapic_enabled(const struct vlapic
*vlapic
)
131 return (!vlapic_hw_disabled(vlapic
) && !vlapic_sw_disabled(vlapic
));
134 static __inline
uint32_t
135 vlapic_get_id(const struct vlapic
*vlapic
)
138 if (vlapic_x2mode(vlapic
))
139 return (vlapic
->vcpuid
);
141 return (vlapic
->vcpuid
<< 24);
145 x2apic_ldr(const struct vlapic
*vlapic
)
150 apicid
= vlapic_get_id(vlapic
);
151 ldr
= 1 << (apicid
& 0xf);
152 ldr
|= (apicid
& 0xffff0) << 12;
157 vlapic_dfr_write_handler(struct vlapic
*vlapic
)
161 lapic
= vlapic
->apic_page
;
162 if (vlapic_x2mode(vlapic
)) {
163 /* Ignore write to DFR in x2APIC mode */
168 lapic
->dfr
&= APIC_DFR_MODEL_MASK
;
169 lapic
->dfr
|= APIC_DFR_RESERVED
;
173 vlapic_ldr_write_handler(struct vlapic
*vlapic
)
177 lapic
= vlapic
->apic_page
;
179 /* LDR is read-only in x2apic mode */
180 if (vlapic_x2mode(vlapic
)) {
181 /* Ignore write to LDR in x2APIC mode */
182 lapic
->ldr
= x2apic_ldr(vlapic
);
184 lapic
->ldr
&= ~APIC_LDR_RESERVED
;
189 vlapic_id_write_handler(struct vlapic
*vlapic
)
194 * We don't allow the ID register to be modified so reset it back to
197 lapic
= vlapic
->apic_page
;
198 lapic
->id
= vlapic_get_id(vlapic
);
202 vlapic_timer_divisor(uint32_t dcr
)
222 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr
);
228 vlapic_dump_lvt(uint32_t offset
, uint32_t *lvt
)
230 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset
,
231 *lvt
, *lvt
& APIC_LVTT_VECTOR
, *lvt
& APIC_LVTT_DS
,
237 vlapic_get_ccr(struct vlapic
*vlapic
)
243 lapic
= vlapic
->apic_page
;
245 VLAPIC_TIMER_LOCK(vlapic
);
246 if (callout_active(&vlapic
->callout
)) {
248 * If the timer is scheduled to expire in the future then
249 * compute the value of 'ccr' based on the remaining time.
252 const hrtime_t now
= gethrtime();
253 if (vlapic
->timer_fire_when
> now
) {
254 ccr
+= hrt_freq_count(vlapic
->timer_fire_when
- now
,
255 vlapic
->timer_cur_freq
);
260 * Clamp CCR value to that programmed in ICR - its theoretical maximum.
261 * Normal operation should never result in this being necessary. Only
262 * strange circumstances due to state importation as part of instance
263 * save/restore or live-migration require such wariness.
265 if (ccr
> lapic
->icr_timer
) {
266 ccr
= lapic
->icr_timer
;
267 vlapic
->stats
.vs_clamp_ccr
++;
269 VLAPIC_TIMER_UNLOCK(vlapic
);
274 vlapic_update_divider(struct vlapic
*vlapic
)
276 struct LAPIC
*lapic
= vlapic
->apic_page
;
278 ASSERT(VLAPIC_TIMER_LOCKED(vlapic
));
280 vlapic
->timer_cur_freq
=
281 VLAPIC_BUS_FREQ
/ vlapic_timer_divisor(lapic
->dcr_timer
);
282 vlapic
->timer_period
=
283 hrt_freq_interval(vlapic
->timer_cur_freq
, lapic
->icr_timer
);
287 vlapic_dcr_write_handler(struct vlapic
*vlapic
)
290 * Update the timer frequency and the timer period.
292 * XXX changes to the frequency divider will not take effect until
293 * the timer is reloaded.
295 VLAPIC_TIMER_LOCK(vlapic
);
296 vlapic_update_divider(vlapic
);
297 VLAPIC_TIMER_UNLOCK(vlapic
);
301 vlapic_esr_write_handler(struct vlapic
*vlapic
)
305 lapic
= vlapic
->apic_page
;
306 lapic
->esr
= vlapic
->esr_pending
;
307 vlapic
->esr_pending
= 0;
311 vlapic_set_intr_ready(struct vlapic
*vlapic
, int vector
, bool level
)
314 uint32_t *irrptr
, *tmrptr
, mask
, tmr
;
317 KASSERT(vector
>= 0 && vector
< 256, ("invalid vector %d", vector
));
319 lapic
= vlapic
->apic_page
;
320 if (!(lapic
->svr
& APIC_SVR_ENABLE
)) {
321 /* ignore interrupt on software-disabled APIC */
322 return (VCPU_NOTIFY_NONE
);
326 vlapic_set_error(vlapic
, APIC_ESR_RECEIVE_ILLEGAL_VECTOR
,
330 * If the error LVT is configured to interrupt the vCPU, it will
331 * have delivered a notification through that mechanism.
333 return (VCPU_NOTIFY_NONE
);
336 if (vlapic
->ops
.set_intr_ready
) {
337 return ((*vlapic
->ops
.set_intr_ready
)(vlapic
, vector
, level
));
340 idx
= (vector
/ 32) * 4;
341 mask
= 1 << (vector
% 32);
342 tmrptr
= &lapic
->tmr0
;
343 irrptr
= &lapic
->irr0
;
346 * Update TMR for requested vector, if necessary.
347 * This must be done prior to asserting the bit in IRR so that the
348 * proper TMR state is always visible before the to-be-queued interrupt
351 tmr
= atomic_load_acq_32(&tmrptr
[idx
]);
352 if ((tmr
& mask
) != (level
? mask
: 0)) {
354 atomic_set_int(&tmrptr
[idx
], mask
);
356 atomic_clear_int(&tmrptr
[idx
], mask
);
360 /* Now set the bit in IRR */
361 atomic_set_int(&irrptr
[idx
], mask
);
363 return (VCPU_NOTIFY_EXIT
);
366 static __inline
uint32_t *
367 vlapic_get_lvtptr(struct vlapic
*vlapic
, uint32_t offset
)
369 struct LAPIC
*lapic
= vlapic
->apic_page
;
373 case APIC_OFFSET_CMCI_LVT
:
374 return (&lapic
->lvt_cmci
);
375 case APIC_OFFSET_TIMER_LVT
... APIC_OFFSET_ERROR_LVT
:
376 i
= (offset
- APIC_OFFSET_TIMER_LVT
) >> 2;
377 return ((&lapic
->lvt_timer
) + i
);
379 panic("vlapic_get_lvt: invalid LVT\n");
384 lvt_off_to_idx(uint32_t offset
)
389 case APIC_OFFSET_CMCI_LVT
:
390 index
= APIC_LVT_CMCI
;
392 case APIC_OFFSET_TIMER_LVT
:
393 index
= APIC_LVT_TIMER
;
395 case APIC_OFFSET_THERM_LVT
:
396 index
= APIC_LVT_THERMAL
;
398 case APIC_OFFSET_PERF_LVT
:
399 index
= APIC_LVT_PMC
;
401 case APIC_OFFSET_LINT0_LVT
:
402 index
= APIC_LVT_LINT0
;
404 case APIC_OFFSET_LINT1_LVT
:
405 index
= APIC_LVT_LINT1
;
407 case APIC_OFFSET_ERROR_LVT
:
408 index
= APIC_LVT_ERROR
;
414 KASSERT(index
>= 0 && index
<= VLAPIC_MAXLVT_INDEX
, ("lvt_off_to_idx: "
415 "invalid lvt index %d for offset %x", index
, offset
));
420 static __inline
uint32_t
421 vlapic_get_lvt(struct vlapic
*vlapic
, uint32_t offset
)
426 idx
= lvt_off_to_idx(offset
);
427 val
= atomic_load_acq_32(&vlapic
->lvt_last
[idx
]);
432 vlapic_lvt_write_handler(struct vlapic
*vlapic
, uint32_t offset
)
434 uint32_t *lvtptr
, mask
, val
;
438 lapic
= vlapic
->apic_page
;
439 lvtptr
= vlapic_get_lvtptr(vlapic
, offset
);
441 idx
= lvt_off_to_idx(offset
);
443 if (!(lapic
->svr
& APIC_SVR_ENABLE
))
445 mask
= APIC_LVT_M
| APIC_LVT_DS
| APIC_LVT_VECTOR
;
447 case APIC_OFFSET_TIMER_LVT
:
448 mask
|= APIC_LVTT_TM
;
450 case APIC_OFFSET_ERROR_LVT
:
452 case APIC_OFFSET_LINT0_LVT
:
453 case APIC_OFFSET_LINT1_LVT
:
454 mask
|= APIC_LVT_TM
| APIC_LVT_RIRR
| APIC_LVT_IIPP
;
462 atomic_store_rel_32(&vlapic
->lvt_last
[idx
], val
);
466 vlapic_refresh_lvts(struct vlapic
*vlapic
)
468 vlapic_lvt_write_handler(vlapic
, APIC_OFFSET_CMCI_LVT
);
469 vlapic_lvt_write_handler(vlapic
, APIC_OFFSET_TIMER_LVT
);
470 vlapic_lvt_write_handler(vlapic
, APIC_OFFSET_THERM_LVT
);
471 vlapic_lvt_write_handler(vlapic
, APIC_OFFSET_PERF_LVT
);
472 vlapic_lvt_write_handler(vlapic
, APIC_OFFSET_LINT0_LVT
);
473 vlapic_lvt_write_handler(vlapic
, APIC_OFFSET_LINT1_LVT
);
474 vlapic_lvt_write_handler(vlapic
, APIC_OFFSET_ERROR_LVT
);
478 vlapic_mask_lvts(struct vlapic
*vlapic
)
480 struct LAPIC
*lapic
= vlapic
->apic_page
;
482 lapic
->lvt_cmci
|= APIC_LVT_M
;
483 lapic
->lvt_timer
|= APIC_LVT_M
;
484 lapic
->lvt_thermal
|= APIC_LVT_M
;
485 lapic
->lvt_pcint
|= APIC_LVT_M
;
486 lapic
->lvt_lint0
|= APIC_LVT_M
;
487 lapic
->lvt_lint1
|= APIC_LVT_M
;
488 lapic
->lvt_error
|= APIC_LVT_M
;
489 vlapic_refresh_lvts(vlapic
);
493 vlapic_fire_lvt(struct vlapic
*vlapic
, uint_t lvt
)
495 uint32_t mode
, reg
, vec
;
496 vcpu_notify_t notify
;
498 reg
= atomic_load_acq_32(&vlapic
->lvt_last
[lvt
]);
500 if (reg
& APIC_LVT_M
)
502 vec
= reg
& APIC_LVT_VECTOR
;
503 mode
= reg
& APIC_LVT_DM
;
506 case APIC_LVT_DM_FIXED
:
508 vlapic_set_error(vlapic
, APIC_ESR_SEND_ILLEGAL_VECTOR
,
509 lvt
== APIC_LVT_ERROR
);
512 notify
= vlapic_set_intr_ready(vlapic
, vec
, false);
513 vcpu_notify_event_type(vlapic
->vm
, vlapic
->vcpuid
, notify
);
515 case APIC_LVT_DM_NMI
:
516 (void) vm_inject_nmi(vlapic
->vm
, vlapic
->vcpuid
);
518 case APIC_LVT_DM_EXTINT
:
519 (void) vm_inject_extint(vlapic
->vm
, vlapic
->vcpuid
);
522 // Other modes ignored
529 vlapic_active_isr(struct vlapic
*vlapic
)
534 isrp
= &vlapic
->apic_page
->isr7
;
536 for (i
= 7; i
>= 0; i
--, isrp
-= 4) {
537 uint32_t reg
= *isrp
;
540 uint_t vec
= (i
* 32) + bsrl(reg
);
544 * Truncate the illegal low vectors to value of
545 * 0, indicating that no active ISR was found.
557 * After events which might arbitrarily change the value of PPR, such as a TPR
558 * write or an EOI, calculate that new PPR value and store it in the APIC page.
561 vlapic_update_ppr(struct vlapic
*vlapic
)
563 int isrvec
, tpr
, ppr
;
565 isrvec
= vlapic_active_isr(vlapic
);
566 tpr
= vlapic
->apic_page
->tpr
;
569 * Algorithm adopted from section "Interrupt, Task and Processor
570 * Priority" in Intel Architecture Manual Vol 3a.
572 if (PRIO(tpr
) >= PRIO(isrvec
)) {
578 vlapic
->apic_page
->ppr
= ppr
;
582 * When a vector is asserted in ISR as in-service, the PPR must be raised to the
583 * priority of that vector, as the vCPU would have been at a lower priority in
584 * order for the vector to be accepted.
587 vlapic_raise_ppr(struct vlapic
*vlapic
, int vec
)
589 struct LAPIC
*lapic
= vlapic
->apic_page
;
598 vlapic_sync_tpr(struct vlapic
*vlapic
)
600 vlapic_update_ppr(vlapic
);
603 static VMM_STAT(VLAPIC_GRATUITOUS_EOI
, "EOI without any in-service interrupt");
606 vlapic_process_eoi(struct vlapic
*vlapic
)
608 struct LAPIC
*lapic
= vlapic
->apic_page
;
609 uint32_t *isrptr
, *tmrptr
;
611 uint_t idx
, bitpos
, vector
;
613 isrptr
= &lapic
->isr0
;
614 tmrptr
= &lapic
->tmr0
;
616 for (i
= 7; i
>= 0; i
--) {
618 if (isrptr
[idx
] != 0) {
619 bitpos
= bsrl(isrptr
[idx
]);
620 vector
= i
* 32 + bitpos
;
622 isrptr
[idx
] &= ~(1 << bitpos
);
623 vlapic_update_ppr(vlapic
);
624 if ((tmrptr
[idx
] & (1 << bitpos
)) != 0) {
625 vioapic_process_eoi(vlapic
->vm
, vlapic
->vcpuid
,
631 vmm_stat_incr(vlapic
->vm
, vlapic
->vcpuid
, VLAPIC_GRATUITOUS_EOI
, 1);
635 vlapic_get_lvt_field(uint32_t lvt
, uint32_t mask
)
642 vlapic_periodic_timer(struct vlapic
*vlapic
)
646 lvt
= vlapic_get_lvt(vlapic
, APIC_OFFSET_TIMER_LVT
);
648 return (vlapic_get_lvt_field(lvt
, APIC_LVTT_TM_PERIODIC
));
651 static VMM_STAT(VLAPIC_INTR_ERROR
, "error interrupts generated by vlapic");
654 vlapic_set_error(struct vlapic
*vlapic
, uint32_t mask
, bool lvt_error
)
657 vlapic
->esr_pending
|= mask
;
660 * Avoid infinite recursion if the error LVT itself is configured with
666 if (vlapic_fire_lvt(vlapic
, APIC_LVT_ERROR
)) {
667 vmm_stat_incr(vlapic
->vm
, vlapic
->vcpuid
, VLAPIC_INTR_ERROR
, 1);
671 static VMM_STAT(VLAPIC_INTR_TIMER
, "timer interrupts generated by vlapic");
674 vlapic_fire_timer(struct vlapic
*vlapic
)
676 ASSERT(VLAPIC_TIMER_LOCKED(vlapic
));
678 if (vlapic_fire_lvt(vlapic
, APIC_LVT_TIMER
)) {
679 vmm_stat_incr(vlapic
->vm
, vlapic
->vcpuid
, VLAPIC_INTR_TIMER
, 1);
683 static VMM_STAT(VLAPIC_INTR_CMC
,
684 "corrected machine check interrupts generated by vlapic");
687 vlapic_fire_cmci(struct vlapic
*vlapic
)
690 if (vlapic_fire_lvt(vlapic
, APIC_LVT_CMCI
)) {
691 vmm_stat_incr(vlapic
->vm
, vlapic
->vcpuid
, VLAPIC_INTR_CMC
, 1);
695 static VMM_STAT_ARRAY(LVTS_TRIGGERRED
, VLAPIC_MAXLVT_INDEX
+ 1,
699 vlapic_trigger_lvt(struct vlapic
*vlapic
, int vector
)
701 if (!vlapic_enabled(vlapic
)) {
703 * When the local APIC is global/hardware disabled,
704 * LINT[1:0] pins are configured as INTR and NMI pins,
709 (void) vm_inject_extint(vlapic
->vm
,
713 (void) vm_inject_nmi(vlapic
->vm
,
728 case APIC_LVT_THERMAL
:
730 if (vlapic_fire_lvt(vlapic
, vector
)) {
731 vmm_stat_array_incr(vlapic
->vm
, vlapic
->vcpuid
,
732 LVTS_TRIGGERRED
, vector
, 1);
742 vlapic_callout_reset(struct vlapic
*vlapic
)
744 callout_reset_hrtime(&vlapic
->callout
, vlapic
->timer_fire_when
,
745 vlapic_callout_handler
, vlapic
, C_ABSOLUTE
);
749 vlapic_callout_handler(void *arg
)
751 struct vlapic
*vlapic
= arg
;
753 VLAPIC_TIMER_LOCK(vlapic
);
754 if (callout_pending(&vlapic
->callout
)) /* callout was reset */
757 if (!callout_active(&vlapic
->callout
)) /* callout was stopped */
760 callout_deactivate(&vlapic
->callout
);
762 vlapic_fire_timer(vlapic
);
764 if (vlapic_periodic_timer(vlapic
)) {
766 * Compute the delta between when the timer was supposed to
767 * fire and the present time. We can depend on the fact that
768 * cyclics (which underly these callouts) will never be called
771 const hrtime_t now
= gethrtime();
772 const hrtime_t delta
= now
- vlapic
->timer_fire_when
;
773 if (delta
>= vlapic
->timer_period
) {
775 * If we are so behind that we have missed an entire
776 * timer period, reset the time base rather than
777 * attempting to catch up.
779 vlapic
->timer_fire_when
= now
+ vlapic
->timer_period
;
781 vlapic
->timer_fire_when
+= vlapic
->timer_period
;
783 vlapic_callout_reset(vlapic
);
786 * Clear the target time so that logic can distinguish from a
787 * timer which has fired (where the value is zero) from one
788 * which is held pending due to the instance being paused (where
789 * the value is non-zero, but the callout is not pending).
791 vlapic
->timer_fire_when
= 0;
794 VLAPIC_TIMER_UNLOCK(vlapic
);
798 vlapic_icrtmr_write_handler(struct vlapic
*vlapic
)
800 struct LAPIC
*lapic
= vlapic
->apic_page
;
802 VLAPIC_TIMER_LOCK(vlapic
);
803 vlapic
->timer_period
= hrt_freq_interval(vlapic
->timer_cur_freq
,
805 if (vlapic
->timer_period
!= 0) {
806 vlapic
->timer_fire_when
= gethrtime() + vlapic
->timer_period
;
807 vlapic_callout_reset(vlapic
);
809 vlapic
->timer_fire_when
= 0;
810 callout_stop(&vlapic
->callout
);
812 VLAPIC_TIMER_UNLOCK(vlapic
);
816 * This function populates 'dmask' with the set of vcpus that match the
817 * addressing specified by the (dest, phys, lowprio) tuple.
819 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
820 * or xAPIC (8-bit) destination field.
823 vlapic_calcdest(struct vm
*vm
, cpuset_t
*dmask
, uint32_t dest
, bool phys
,
824 bool lowprio
, bool x2apic_dest
)
826 struct vlapic
*vlapic
;
827 uint32_t dfr
, ldr
, ldest
, cluster
;
828 uint32_t mda_flat_ldest
, mda_cluster_ldest
, mda_ldest
, mda_cluster_id
;
832 if ((x2apic_dest
&& dest
== 0xffffffff) ||
833 (!x2apic_dest
&& dest
== 0xff)) {
835 * Broadcast in both logical and physical modes.
837 *dmask
= vm_active_cpus(vm
);
843 * Physical mode: destination is APIC ID.
846 vcpuid
= vm_apicid2vcpuid(vm
, dest
);
847 amask
= vm_active_cpus(vm
);
848 if (vcpuid
< vm_get_maxcpus(vm
) && CPU_ISSET(vcpuid
, &amask
))
849 CPU_SET(vcpuid
, dmask
);
852 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
853 * bitmask. This model is only available in the xAPIC mode.
855 mda_flat_ldest
= dest
& 0xff;
858 * In the "Cluster Model" the MDA is used to identify a
859 * specific cluster and a set of APICs in that cluster.
862 mda_cluster_id
= dest
>> 16;
863 mda_cluster_ldest
= dest
& 0xffff;
865 mda_cluster_id
= (dest
>> 4) & 0xf;
866 mda_cluster_ldest
= dest
& 0xf;
870 * Logical mode: match each APIC that has a bit set
871 * in its LDR that matches a bit in the ldest.
874 amask
= vm_active_cpus(vm
);
875 while ((vcpuid
= CPU_FFS(&amask
)) != 0) {
877 CPU_CLR(vcpuid
, &amask
);
879 vlapic
= vm_lapic(vm
, vcpuid
);
880 dfr
= vlapic
->apic_page
->dfr
;
881 ldr
= vlapic
->apic_page
->ldr
;
883 if ((dfr
& APIC_DFR_MODEL_MASK
) ==
884 APIC_DFR_MODEL_FLAT
) {
886 mda_ldest
= mda_flat_ldest
;
887 } else if ((dfr
& APIC_DFR_MODEL_MASK
) ==
888 APIC_DFR_MODEL_CLUSTER
) {
889 if (vlapic_x2mode(vlapic
)) {
891 ldest
= ldr
& 0xffff;
894 ldest
= (ldr
>> 24) & 0xf;
896 if (cluster
!= mda_cluster_id
)
898 mda_ldest
= mda_cluster_ldest
;
901 * Guest has configured a bad logical
902 * model for this vcpu - skip it.
907 if ((mda_ldest
& ldest
) != 0) {
908 CPU_SET(vcpuid
, dmask
);
916 static VMM_STAT(VLAPIC_IPI_SEND
, "ipis sent from vcpu");
917 static VMM_STAT(VLAPIC_IPI_RECV
, "ipis received by vcpu");
920 vlapic_set_tpr(struct vlapic
*vlapic
, uint8_t val
)
922 struct LAPIC
*lapic
= vlapic
->apic_page
;
924 if (lapic
->tpr
!= val
) {
926 vlapic_update_ppr(vlapic
);
931 vlapic_set_cr8(struct vlapic
*vlapic
, uint64_t val
)
936 vm_inject_gp(vlapic
->vm
, vlapic
->vcpuid
);
941 vlapic_set_tpr(vlapic
, tpr
);
945 vlapic_get_cr8(const struct vlapic
*vlapic
)
947 const struct LAPIC
*lapic
= vlapic
->apic_page
;
949 return (lapic
->tpr
>> 4);
953 vlapic_is_icr_valid(uint64_t icrval
)
955 uint32_t mode
= icrval
& APIC_DELMODE_MASK
;
956 uint32_t level
= icrval
& APIC_LEVEL_MASK
;
957 uint32_t trigger
= icrval
& APIC_TRIGMOD_MASK
;
958 uint32_t shorthand
= icrval
& APIC_DEST_MASK
;
961 case APIC_DELMODE_FIXED
:
962 if (trigger
== APIC_TRIGMOD_EDGE
)
965 * AMD allows a level assert IPI and Intel converts a level
966 * assert IPI into an edge IPI.
968 if (trigger
== APIC_TRIGMOD_LEVEL
&& level
== APIC_LEVEL_ASSERT
)
971 case APIC_DELMODE_LOWPRIO
:
972 case APIC_DELMODE_SMI
:
973 case APIC_DELMODE_NMI
:
974 case APIC_DELMODE_INIT
:
975 if (trigger
== APIC_TRIGMOD_EDGE
&&
976 (shorthand
== APIC_DEST_DESTFLD
||
977 shorthand
== APIC_DEST_ALLESELF
)) {
981 * AMD allows a level assert IPI and Intel converts a level
982 * assert IPI into an edge IPI.
984 if (trigger
== APIC_TRIGMOD_LEVEL
&&
985 level
== APIC_LEVEL_ASSERT
&&
986 (shorthand
== APIC_DEST_DESTFLD
||
987 shorthand
== APIC_DEST_ALLESELF
)) {
991 * An level triggered deassert INIT is defined in the Intel
992 * Multiprocessor Specification and the Intel Software Developer
993 * Manual. Due to the MPS it's required to send a level assert
994 * INIT to a cpu and then a level deassert INIT. Some operating
995 * systems e.g. FreeBSD or Linux use that algorithm. According
996 * to the SDM a level deassert INIT is only supported by Pentium
997 * and P6 processors. It's always send to all cpus regardless of
998 * the destination or shorthand field. It resets the arbitration
999 * id register. This register is not software accessible and
1000 * only required for the APIC bus arbitration. So, the level
1001 * deassert INIT doesn't need any emulation and we should ignore
1002 * it. The SDM also defines that newer processors don't support
1003 * the level deassert INIT and it's not valid any more. As it's
1004 * defined for older systems, it can't be invalid per se.
1005 * Otherwise, backward compatibility would be broken. However,
1006 * when returning false here, it'll be ignored which is the
1007 * desired behaviour.
1009 if (mode
== APIC_DELMODE_INIT
&&
1010 trigger
== APIC_TRIGMOD_LEVEL
&&
1011 level
== APIC_LEVEL_DEASSERT
) {
1015 case APIC_DELMODE_STARTUP
:
1016 if (shorthand
== APIC_DEST_DESTFLD
||
1017 shorthand
== APIC_DEST_ALLESELF
) {
1021 case APIC_DELMODE_RR
:
1022 /* Only available on AMD! */
1023 if (trigger
== APIC_TRIGMOD_EDGE
&&
1024 shorthand
== APIC_DEST_DESTFLD
) {
1028 case APIC_DELMODE_RESV
:
1031 panic("vlapic_is_icr_valid: invalid mode 0x%08x", mode
);
1038 vlapic_icrlo_write_handler(struct vlapic
*vlapic
)
1043 uint32_t dest
, vec
, mode
, dsh
;
1044 struct LAPIC
*lapic
;
1046 lapic
= vlapic
->apic_page
;
1047 lapic
->icr_lo
&= ~APIC_DELSTAT_PEND
;
1048 icrval
= ((uint64_t)lapic
->icr_hi
<< 32) | lapic
->icr_lo
;
1051 * Ignore invalid combinations of the icr.
1053 if (!vlapic_is_icr_valid(icrval
))
1056 if (vlapic_x2mode(vlapic
))
1057 dest
= icrval
>> 32;
1059 dest
= icrval
>> (32 + 24);
1060 vec
= icrval
& APIC_VECTOR_MASK
;
1061 mode
= icrval
& APIC_DELMODE_MASK
;
1062 dsh
= icrval
& APIC_DEST_MASK
;
1064 if (mode
== APIC_DELMODE_FIXED
&& vec
< 16) {
1065 vlapic_set_error(vlapic
, APIC_ESR_SEND_ILLEGAL_VECTOR
, false);
1069 if (mode
== APIC_DELMODE_INIT
&&
1070 (icrval
& APIC_LEVEL_MASK
) == APIC_LEVEL_DEASSERT
) {
1071 /* No work required to deassert INIT */
1076 case APIC_DEST_DESTFLD
:
1077 vlapic_calcdest(vlapic
->vm
, &dmask
, dest
,
1078 (icrval
& APIC_DESTMODE_LOG
) == 0, false,
1079 vlapic_x2mode(vlapic
));
1081 case APIC_DEST_SELF
:
1082 CPU_SETOF(vlapic
->vcpuid
, &dmask
);
1084 case APIC_DEST_ALLISELF
:
1085 dmask
= vm_active_cpus(vlapic
->vm
);
1087 case APIC_DEST_ALLESELF
:
1088 dmask
= vm_active_cpus(vlapic
->vm
);
1089 CPU_CLR(vlapic
->vcpuid
, &dmask
);
1093 * All possible delivery notations are covered above.
1094 * We should never end up here.
1096 panic("unknown delivery shorthand: %x", dsh
);
1099 while ((i
= CPU_FFS(&dmask
)) != 0) {
1103 case APIC_DELMODE_FIXED
:
1104 (void) lapic_intr_edge(vlapic
->vm
, i
, vec
);
1105 vmm_stat_incr(vlapic
->vm
, vlapic
->vcpuid
,
1106 VLAPIC_IPI_SEND
, 1);
1107 vmm_stat_incr(vlapic
->vm
, i
,
1108 VLAPIC_IPI_RECV
, 1);
1110 case APIC_DELMODE_NMI
:
1111 (void) vm_inject_nmi(vlapic
->vm
, i
);
1113 case APIC_DELMODE_INIT
:
1114 (void) vm_inject_init(vlapic
->vm
, i
);
1116 case APIC_DELMODE_STARTUP
:
1117 (void) vm_inject_sipi(vlapic
->vm
, i
, vec
);
1119 case APIC_DELMODE_LOWPRIO
:
1120 case APIC_DELMODE_SMI
:
1122 /* Unhandled IPI modes (for now) */
1129 vlapic_self_ipi_handler(struct vlapic
*vlapic
, uint32_t val
)
1131 const int vec
= val
& 0xff;
1133 /* self-IPI is only exposed via x2APIC */
1134 ASSERT(vlapic_x2mode(vlapic
));
1136 (void) lapic_intr_edge(vlapic
->vm
, vlapic
->vcpuid
, vec
);
1137 vmm_stat_incr(vlapic
->vm
, vlapic
->vcpuid
, VLAPIC_IPI_SEND
, 1);
1138 vmm_stat_incr(vlapic
->vm
, vlapic
->vcpuid
, VLAPIC_IPI_RECV
, 1);
1142 vlapic_pending_intr(struct vlapic
*vlapic
, int *vecptr
)
1144 struct LAPIC
*lapic
= vlapic
->apic_page
;
1145 int idx
, i
, bitpos
, vector
;
1146 uint32_t *irrptr
, val
;
1148 if (vlapic
->ops
.sync_state
) {
1149 (*vlapic
->ops
.sync_state
)(vlapic
);
1152 irrptr
= &lapic
->irr0
;
1154 for (i
= 7; i
>= 0; i
--) {
1156 val
= atomic_load_acq_int(&irrptr
[idx
]);
1159 vector
= i
* 32 + (bitpos
- 1);
1160 if (PRIO(vector
) > PRIO(lapic
->ppr
)) {
1172 vlapic_intr_accepted(struct vlapic
*vlapic
, int vector
)
1174 struct LAPIC
*lapic
= vlapic
->apic_page
;
1175 uint32_t *irrptr
, *isrptr
;
1178 KASSERT(vector
>= 16 && vector
< 256, ("invalid vector %d", vector
));
1180 if (vlapic
->ops
.intr_accepted
)
1181 return ((*vlapic
->ops
.intr_accepted
)(vlapic
, vector
));
1184 * clear the ready bit for vector being accepted in irr
1185 * and set the vector as in service in isr.
1187 idx
= (vector
/ 32) * 4;
1189 irrptr
= &lapic
->irr0
;
1190 atomic_clear_int(&irrptr
[idx
], 1 << (vector
% 32));
1192 isrptr
= &lapic
->isr0
;
1193 isrptr
[idx
] |= 1 << (vector
% 32);
1196 * The only way a fresh vector could be accepted into ISR is if it was
1197 * of a higher priority than the current PPR. With that vector now
1198 * in-service, the PPR must be raised.
1200 vlapic_raise_ppr(vlapic
, vector
);
1204 vlapic_svr_write_handler(struct vlapic
*vlapic
)
1206 struct LAPIC
*lapic
;
1207 uint32_t old
, new, changed
;
1209 lapic
= vlapic
->apic_page
;
1212 old
= vlapic
->svr_last
;
1213 vlapic
->svr_last
= new;
1215 changed
= old
^ new;
1216 if ((changed
& APIC_SVR_ENABLE
) != 0) {
1217 if ((new & APIC_SVR_ENABLE
) == 0) {
1219 * The apic is now disabled so stop the apic timer
1220 * and mask all the LVT entries.
1222 VLAPIC_TIMER_LOCK(vlapic
);
1223 callout_stop(&vlapic
->callout
);
1224 VLAPIC_TIMER_UNLOCK(vlapic
);
1225 vlapic_mask_lvts(vlapic
);
1228 * The apic is now enabled so restart the apic timer
1229 * if it is configured in periodic mode.
1231 if (vlapic_periodic_timer(vlapic
))
1232 vlapic_icrtmr_write_handler(vlapic
);
1238 vlapic_read(struct vlapic
*vlapic
, uint16_t offset
, uint32_t *outp
)
1240 struct LAPIC
*lapic
= vlapic
->apic_page
;
1244 ASSERT3U(offset
& 0x3, ==, 0);
1245 ASSERT3U(offset
, <, PAGESIZE
);
1246 ASSERT3P(outp
, !=, NULL
);
1250 case APIC_OFFSET_ID
:
1253 case APIC_OFFSET_VER
:
1254 data
= lapic
->version
;
1256 case APIC_OFFSET_TPR
:
1259 case APIC_OFFSET_APR
:
1262 case APIC_OFFSET_PPR
:
1265 case APIC_OFFSET_LDR
:
1268 case APIC_OFFSET_DFR
:
1271 case APIC_OFFSET_SVR
:
1274 case APIC_OFFSET_ISR0
... APIC_OFFSET_ISR7
:
1275 i
= (offset
- APIC_OFFSET_ISR0
) >> 2;
1279 case APIC_OFFSET_TMR0
... APIC_OFFSET_TMR7
:
1280 i
= (offset
- APIC_OFFSET_TMR0
) >> 2;
1284 case APIC_OFFSET_IRR0
... APIC_OFFSET_IRR7
:
1285 i
= (offset
- APIC_OFFSET_IRR0
) >> 2;
1287 data
= atomic_load_acq_int(reg
+ i
);
1289 case APIC_OFFSET_ESR
:
1292 case APIC_OFFSET_ICR_LOW
:
1293 data
= lapic
->icr_lo
;
1295 case APIC_OFFSET_ICR_HI
:
1296 data
= lapic
->icr_hi
;
1298 case APIC_OFFSET_CMCI_LVT
:
1299 case APIC_OFFSET_TIMER_LVT
... APIC_OFFSET_ERROR_LVT
:
1300 data
= vlapic_get_lvt(vlapic
, offset
);
1302 reg
= vlapic_get_lvtptr(vlapic
, offset
);
1303 ASSERT3U(data
, ==, *reg
);
1306 case APIC_OFFSET_TIMER_ICR
:
1307 data
= lapic
->icr_timer
;
1309 case APIC_OFFSET_TIMER_CCR
:
1310 data
= vlapic_get_ccr(vlapic
);
1312 case APIC_OFFSET_TIMER_DCR
:
1313 data
= lapic
->dcr_timer
;
1315 case APIC_OFFSET_RRR
:
1319 case APIC_OFFSET_SELF_IPI
:
1320 case APIC_OFFSET_EOI
:
1321 /* Write-only register */
1326 /* Invalid register */
1336 vlapic_write(struct vlapic
*vlapic
, uint16_t offset
, uint32_t data
)
1338 struct LAPIC
*lapic
= vlapic
->apic_page
;
1341 ASSERT3U(offset
& 0xf, ==, 0);
1342 ASSERT3U(offset
, <, PAGESIZE
);
1345 case APIC_OFFSET_ID
:
1347 vlapic_id_write_handler(vlapic
);
1349 case APIC_OFFSET_TPR
:
1350 vlapic_set_tpr(vlapic
, data
& 0xff);
1352 case APIC_OFFSET_EOI
:
1353 vlapic_process_eoi(vlapic
);
1355 case APIC_OFFSET_LDR
:
1357 vlapic_ldr_write_handler(vlapic
);
1359 case APIC_OFFSET_DFR
:
1361 vlapic_dfr_write_handler(vlapic
);
1363 case APIC_OFFSET_SVR
:
1365 vlapic_svr_write_handler(vlapic
);
1367 case APIC_OFFSET_ICR_LOW
:
1368 lapic
->icr_lo
= data
;
1369 vlapic_icrlo_write_handler(vlapic
);
1371 case APIC_OFFSET_ICR_HI
:
1372 lapic
->icr_hi
= data
;
1374 case APIC_OFFSET_CMCI_LVT
:
1375 case APIC_OFFSET_TIMER_LVT
... APIC_OFFSET_ERROR_LVT
:
1376 regptr
= vlapic_get_lvtptr(vlapic
, offset
);
1378 vlapic_lvt_write_handler(vlapic
, offset
);
1380 case APIC_OFFSET_TIMER_ICR
:
1381 lapic
->icr_timer
= data
;
1382 vlapic_icrtmr_write_handler(vlapic
);
1385 case APIC_OFFSET_TIMER_DCR
:
1386 lapic
->dcr_timer
= data
;
1387 vlapic_dcr_write_handler(vlapic
);
1390 case APIC_OFFSET_ESR
:
1391 vlapic_esr_write_handler(vlapic
);
1394 case APIC_OFFSET_SELF_IPI
:
1395 if (vlapic_x2mode(vlapic
))
1396 vlapic_self_ipi_handler(vlapic
, data
);
1399 case APIC_OFFSET_VER
:
1400 case APIC_OFFSET_APR
:
1401 case APIC_OFFSET_PPR
:
1402 case APIC_OFFSET_RRR
:
1403 case APIC_OFFSET_ISR0
... APIC_OFFSET_ISR7
:
1404 case APIC_OFFSET_TMR0
... APIC_OFFSET_TMR7
:
1405 case APIC_OFFSET_IRR0
... APIC_OFFSET_IRR7
:
1406 case APIC_OFFSET_TIMER_CCR
:
1407 /* Read-only register */
1411 /* Invalid register */
1419 vlapic_reset(struct vlapic
*vlapic
)
1421 struct LAPIC
*lapic
= vlapic
->apic_page
;
1422 uint32_t *isrptr
, *tmrptr
, *irrptr
;
1424 /* Reset any timer-related state first */
1425 VLAPIC_TIMER_LOCK(vlapic
);
1426 callout_stop(&vlapic
->callout
);
1427 lapic
->icr_timer
= 0;
1428 lapic
->ccr_timer
= 0;
1429 lapic
->dcr_timer
= 0;
1430 vlapic_update_divider(vlapic
);
1431 VLAPIC_TIMER_UNLOCK(vlapic
);
1434 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
1435 * it is not leftover after the reset. This is performed after the APIC
1436 * timer has been stopped, in case it happened to fire just prior to
1437 * being deactivated.
1439 if (vlapic
->ops
.sync_state
) {
1440 (*vlapic
->ops
.sync_state
)(vlapic
);
1443 vlapic
->msr_apicbase
= DEFAULT_APIC_BASE
| APICBASE_ENABLED
;
1444 if (vlapic
->vcpuid
== 0)
1445 vlapic
->msr_apicbase
|= APICBASE_BSP
;
1447 lapic
->id
= vlapic_get_id(vlapic
);
1448 lapic
->version
= VLAPIC_VERSION
;
1449 lapic
->version
|= (VLAPIC_MAXLVT_INDEX
<< MAXLVTSHIFT
);
1457 lapic
->dfr
= 0xffffffff;
1458 lapic
->svr
= APIC_SVR_VECTOR
;
1459 vlapic
->svr_last
= lapic
->svr
;
1461 isrptr
= &lapic
->isr0
;
1462 tmrptr
= &lapic
->tmr0
;
1463 irrptr
= &lapic
->irr0
;
1464 for (uint_t i
= 0; i
< 8; i
++) {
1465 atomic_store_rel_int(&isrptr
[i
* 4], 0);
1466 atomic_store_rel_int(&tmrptr
[i
* 4], 0);
1467 atomic_store_rel_int(&irrptr
[i
* 4], 0);
1471 vlapic
->esr_pending
= 0;
1475 lapic
->lvt_cmci
= 0;
1476 lapic
->lvt_timer
= 0;
1477 lapic
->lvt_thermal
= 0;
1478 lapic
->lvt_pcint
= 0;
1479 lapic
->lvt_lint0
= 0;
1480 lapic
->lvt_lint1
= 0;
1481 lapic
->lvt_error
= 0;
1482 vlapic_mask_lvts(vlapic
);
1486 vlapic_init(struct vlapic
*vlapic
)
1488 KASSERT(vlapic
->vm
!= NULL
, ("vlapic_init: vm is not initialized"));
1489 KASSERT(vlapic
->vcpuid
>= 0 &&
1490 vlapic
->vcpuid
< vm_get_maxcpus(vlapic
->vm
),
1491 ("vlapic_init: vcpuid is not initialized"));
1492 KASSERT(vlapic
->apic_page
!= NULL
, ("vlapic_init: apic_page is not "
1496 * If the vlapic is configured in x2apic mode then it will be
1497 * accessed in the critical section via the MSR emulation code.
1499 * Therefore the timer mutex must be a spinlock because blockable
1500 * mutexes cannot be acquired in a critical section.
1502 mutex_init(&vlapic
->timer_lock
, NULL
, MUTEX_ADAPTIVE
, NULL
);
1503 callout_init(&vlapic
->callout
, 1);
1505 vlapic_reset(vlapic
);
1509 vlapic_cleanup(struct vlapic
*vlapic
)
1511 callout_drain(&vlapic
->callout
);
1512 mutex_destroy(&vlapic
->timer_lock
);
1516 vlapic_mmio_read(struct vlapic
*vlapic
, uint64_t gpa
, uint64_t *valp
,
1519 ASSERT3U(gpa
, >=, DEFAULT_APIC_BASE
);
1520 ASSERT3U(gpa
, <, DEFAULT_APIC_BASE
+ PAGE_SIZE
);
1522 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1523 if (vlapic_x2mode(vlapic
) || vlapic_hw_disabled(vlapic
)) {
1528 const uint16_t off
= gpa
- DEFAULT_APIC_BASE
;
1530 (void) vlapic_read(vlapic
, off
& ~0xf, &raw
);
1532 /* Shift and mask reads which are small and/or unaligned */
1533 const uint8_t align
= off
& 0xf;
1535 *valp
= (uint64_t)raw
<< (align
* 8);
1544 vlapic_mmio_write(struct vlapic
*vlapic
, uint64_t gpa
, uint64_t val
,
1547 ASSERT3U(gpa
, >=, DEFAULT_APIC_BASE
);
1548 ASSERT3U(gpa
, <, DEFAULT_APIC_BASE
+ PAGE_SIZE
);
1550 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1551 if (vlapic_x2mode(vlapic
) || vlapic_hw_disabled(vlapic
)) {
1555 const uint16_t off
= gpa
- DEFAULT_APIC_BASE
;
1556 /* Ignore writes which are not 32-bits wide and 16-byte aligned */
1557 if ((off
& 0xf) != 0 || size
!= 4) {
1561 (void) vlapic_write(vlapic
, off
, (uint32_t)val
);
1565 /* Should attempts to change the APIC base address be rejected with a #GP? */
1566 int vlapic_gp_on_addr_change
= 1;
1568 static vm_msr_result_t
1569 vlapic_set_apicbase(struct vlapic
*vlapic
, uint64_t val
)
1571 const uint64_t diff
= vlapic
->msr_apicbase
^ val
;
1574 * Until the LAPIC emulation for switching between xAPIC and x2APIC
1575 * modes is more polished, it will remain off-limits from being altered
1578 const uint64_t reserved_bits
= APICBASE_RESERVED
| APICBASE_X2APIC
|
1580 if ((diff
& reserved_bits
) != 0) {
1584 /* We do not presently allow the LAPIC access address to be modified. */
1585 if ((diff
& APICBASE_ADDR_MASK
) != 0) {
1587 * Explicitly rebuffing such requests with a #GP is the most
1588 * straightforward way to handle the situation, but certain
1589 * consumers (such as the KVM unit tests) may balk at the
1590 * otherwise unexpected exception.
1592 if (vlapic_gp_on_addr_change
) {
1596 /* If silence is required, just ignore the address change. */
1597 val
= (val
& ~APICBASE_ADDR_MASK
) | DEFAULT_APIC_BASE
;
1600 vlapic
->msr_apicbase
= val
;
1604 static __inline
uint16_t
1605 vlapic_msr_to_regoff(uint32_t msr
)
1607 ASSERT3U(msr
, >=, MSR_APIC_000
);
1608 ASSERT3U(msr
, <, (MSR_APIC_000
+ 0x100));
1610 return ((msr
- MSR_APIC_000
) << 4);
1614 vlapic_owned_msr(uint32_t msr
)
1616 if (msr
== MSR_APICBASE
) {
1619 if (msr
>= MSR_APIC_000
&&
1620 msr
< (MSR_APIC_000
+ 0x100)) {
1627 vlapic_rdmsr(struct vlapic
*vlapic
, uint32_t msr
, uint64_t *valp
)
1629 ASSERT(vlapic_owned_msr(msr
));
1630 ASSERT3P(valp
, !=, NULL
);
1632 if (msr
== MSR_APICBASE
) {
1633 *valp
= vlapic
->msr_apicbase
;
1637 /* #GP for x2APIC MSR accesses in xAPIC mode */
1638 if (!vlapic_x2mode(vlapic
)) {
1643 const uint16_t reg
= vlapic_msr_to_regoff(msr
);
1645 case APIC_OFFSET_ICR_LOW
: {
1646 /* Read from ICR register gets entire (64-bit) value */
1647 uint32_t low
= 0, high
= 0;
1650 valid
= vlapic_read(vlapic
, APIC_OFFSET_ICR_HI
, &high
);
1652 valid
= vlapic_read(vlapic
, APIC_OFFSET_ICR_LOW
, &low
);
1655 *valp
= ((uint64_t)high
<< 32) | low
;
1658 case APIC_OFFSET_ICR_HI
:
1659 /* Already covered by ICR_LOW */
1664 if (!vlapic_read(vlapic
, reg
, (uint32_t *)&out
)) {
1672 vlapic_wrmsr(struct vlapic
*vlapic
, uint32_t msr
, uint64_t val
)
1674 ASSERT(vlapic_owned_msr(msr
));
1676 if (msr
== MSR_APICBASE
) {
1677 return (vlapic_set_apicbase(vlapic
, val
));
1680 /* #GP for x2APIC MSR accesses in xAPIC mode */
1681 if (!vlapic_x2mode(vlapic
)) {
1685 const uint16_t reg
= vlapic_msr_to_regoff(msr
);
1687 case APIC_OFFSET_ICR_LOW
: {
1688 /* Write to ICR register sets entire (64-bit) value */
1691 valid
= vlapic_write(vlapic
, APIC_OFFSET_ICR_HI
, val
>> 32);
1693 valid
= vlapic_write(vlapic
, APIC_OFFSET_ICR_LOW
, val
);
1697 case APIC_OFFSET_ICR_HI
:
1698 /* Already covered by ICR_LOW */
1700 case APIC_OFFSET_ESR
:
1701 /* Only 0 may be written from x2APIC mode */
1709 if (!vlapic_write(vlapic
, reg
, val
)) {
1716 vlapic_set_x2apic_state(struct vm
*vm
, int vcpuid
, enum x2apic_state state
)
1718 struct vlapic
*vlapic
;
1719 struct LAPIC
*lapic
;
1721 vlapic
= vm_lapic(vm
, vcpuid
);
1723 if (state
== X2APIC_DISABLED
)
1724 vlapic
->msr_apicbase
&= ~APICBASE_X2APIC
;
1726 vlapic
->msr_apicbase
|= APICBASE_X2APIC
;
1729 * Reset the local APIC registers whose values are mode-dependent.
1731 * XXX this works because the APIC mode can be changed only at vcpu
1732 * initialization time.
1734 lapic
= vlapic
->apic_page
;
1735 lapic
->id
= vlapic_get_id(vlapic
);
1736 if (vlapic_x2mode(vlapic
)) {
1737 lapic
->ldr
= x2apic_ldr(vlapic
);
1741 lapic
->dfr
= 0xffffffff;
1744 if (state
== X2APIC_ENABLED
) {
1745 if (vlapic
->ops
.enable_x2apic_mode
)
1746 (*vlapic
->ops
.enable_x2apic_mode
)(vlapic
);
1751 vlapic_deliver_intr(struct vm
*vm
, bool level
, uint32_t dest
, bool phys
,
1752 int delmode
, int vec
)
1758 if (delmode
!= IOART_DELFIXED
&&
1759 delmode
!= IOART_DELLOPRI
&&
1760 delmode
!= IOART_DELEXINT
) {
1761 /* Invalid delivery mode */
1764 lowprio
= (delmode
== IOART_DELLOPRI
);
1767 * We don't provide any virtual interrupt redirection hardware so
1768 * all interrupts originating from the ioapic or MSI specify the
1769 * 'dest' in the legacy xAPIC format.
1771 vlapic_calcdest(vm
, &dmask
, dest
, phys
, lowprio
, false);
1773 while ((vcpuid
= CPU_FFS(&dmask
)) != 0) {
1775 CPU_CLR(vcpuid
, &dmask
);
1776 if (delmode
== IOART_DELEXINT
) {
1777 (void) vm_inject_extint(vm
, vcpuid
);
1779 (void) lapic_set_intr(vm
, vcpuid
, vec
, level
);
1785 vlapic_post_intr(struct vlapic
*vlapic
, int hostcpu
)
1788 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1790 * This is done by leveraging features like Posted Interrupts (Intel)
1791 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1793 * If neither of these features are available then fallback to
1794 * sending an IPI to 'hostcpu'.
1796 if (vlapic
->ops
.post_intr
)
1797 (*vlapic
->ops
.post_intr
)(vlapic
, hostcpu
);
1803 vlapic_localize_resources(struct vlapic
*vlapic
)
1805 vmm_glue_callout_localize(&vlapic
->callout
);
1809 vlapic_pause(struct vlapic
*vlapic
)
1811 VLAPIC_TIMER_LOCK(vlapic
);
1812 callout_stop(&vlapic
->callout
);
1813 VLAPIC_TIMER_UNLOCK(vlapic
);
1818 vlapic_resume(struct vlapic
*vlapic
)
1820 VLAPIC_TIMER_LOCK(vlapic
);
1821 if (vlapic
->timer_fire_when
!= 0) {
1822 vlapic_callout_reset(vlapic
);
1824 VLAPIC_TIMER_UNLOCK(vlapic
);
1828 vlapic_data_read(void *datap
, const vmm_data_req_t
*req
)
1830 VERIFY3U(req
->vdr_class
, ==, VDC_LAPIC
);
1831 VERIFY3U(req
->vdr_version
, ==, 1);
1832 VERIFY3U(req
->vdr_len
, >=, sizeof (struct vdi_lapic_v1
));
1834 struct vlapic
*vlapic
= datap
;
1835 struct vdi_lapic_v1
*out
= req
->vdr_data
;
1837 VLAPIC_TIMER_LOCK(vlapic
);
1839 if (vlapic
->ops
.sync_state
) {
1840 (*vlapic
->ops
.sync_state
)(vlapic
);
1843 out
->vl_msr_apicbase
= vlapic
->msr_apicbase
;
1844 out
->vl_esr_pending
= vlapic
->esr_pending
;
1845 if (vlapic
->timer_fire_when
!= 0) {
1846 out
->vl_timer_target
=
1847 vm_normalize_hrtime(vlapic
->vm
, vlapic
->timer_fire_when
);
1849 out
->vl_timer_target
= 0;
1852 const struct LAPIC
*lapic
= vlapic
->apic_page
;
1853 struct vdi_lapic_page_v1
*out_page
= &out
->vl_lapic
;
1856 * While this might appear, at first glance, to be missing some fields,
1857 * they are intentionally omitted:
1858 * - PPR: its contents are always generated at runtime
1859 * - EOI: write-only, and contents are ignored after handling
1860 * - RRD: (aka RRR) read-only and always 0
1861 * - CCR: calculated from underlying timer data
1863 out_page
->vlp_id
= lapic
->id
;
1864 out_page
->vlp_version
= lapic
->version
;
1865 out_page
->vlp_tpr
= lapic
->tpr
;
1866 out_page
->vlp_apr
= lapic
->apr
;
1867 out_page
->vlp_ldr
= lapic
->ldr
;
1868 out_page
->vlp_dfr
= lapic
->dfr
;
1869 out_page
->vlp_svr
= lapic
->svr
;
1870 out_page
->vlp_esr
= lapic
->esr
;
1871 out_page
->vlp_icr
= ((uint64_t)lapic
->icr_hi
<< 32) | lapic
->icr_lo
;
1872 out_page
->vlp_icr_timer
= lapic
->icr_timer
;
1873 out_page
->vlp_dcr_timer
= lapic
->dcr_timer
;
1875 out_page
->vlp_lvt_cmci
= lapic
->lvt_cmci
;
1876 out_page
->vlp_lvt_timer
= lapic
->lvt_timer
;
1877 out_page
->vlp_lvt_thermal
= lapic
->lvt_thermal
;
1878 out_page
->vlp_lvt_pcint
= lapic
->lvt_pcint
;
1879 out_page
->vlp_lvt_lint0
= lapic
->lvt_lint0
;
1880 out_page
->vlp_lvt_lint1
= lapic
->lvt_lint1
;
1881 out_page
->vlp_lvt_error
= lapic
->lvt_error
;
1883 const uint32_t *isrptr
= &lapic
->isr0
;
1884 const uint32_t *tmrptr
= &lapic
->tmr0
;
1885 const uint32_t *irrptr
= &lapic
->irr0
;
1886 for (uint_t i
= 0; i
< 8; i
++) {
1887 out_page
->vlp_isr
[i
] = isrptr
[i
* 4];
1888 out_page
->vlp_tmr
[i
] = tmrptr
[i
* 4];
1889 out_page
->vlp_irr
[i
] = irrptr
[i
* 4];
1891 VLAPIC_TIMER_UNLOCK(vlapic
);
1901 for (cnt
= 0; val
!= 0; val
&= (val
- 1)) {
1908 * Descriptions for the various failures which can occur when validating
1909 * to-be-written vlapic state.
1911 enum vlapic_validation_error
{
1922 static enum vlapic_validation_error
1923 vlapic_data_validate(const struct vlapic
*vlapic
, const vmm_data_req_t
*req
)
1925 ASSERT(req
->vdr_version
== 1 &&
1926 req
->vdr_len
>= sizeof (struct vdi_lapic_v1
));
1927 const struct vdi_lapic_v1
*src
= req
->vdr_data
;
1929 if ((src
->vl_esr_pending
& ~APIC_VALID_MASK_ESR
) != 0 ||
1930 (src
->vl_lapic
.vlp_esr
& ~APIC_VALID_MASK_ESR
) != 0) {
1931 return (VVE_BAD_ESR
);
1934 /* Use the same restrictions as the wrmsr accessor for now */
1935 const uint64_t apicbase_reserved
= APICBASE_RESERVED
| APICBASE_X2APIC
|
1937 const uint64_t diff
= src
->vl_msr_apicbase
^ vlapic
->msr_apicbase
;
1938 if ((diff
& apicbase_reserved
) != 0) {
1939 return (VVE_BAD_MSR_BASE
);
1942 const struct vdi_lapic_page_v1
*page
= &src
->vl_lapic
;
1944 * Demand that ID match for now. This can be further updated when some
1945 * of the x2apic handling is improved.
1947 if (page
->vlp_id
!= vlapic_get_id(vlapic
)) {
1948 return (VVE_BAD_ID
);
1951 if (page
->vlp_version
!= vlapic
->apic_page
->version
) {
1952 return (VVE_BAD_VERSION
);
1955 if (page
->vlp_tpr
> 0xff) {
1956 return (VVE_BAD_TPR
);
1959 /* Vectors 0-15 are not expected to be handled by the lapic */
1960 if ((page
->vlp_isr
[0] & 0xffff) != 0 ||
1961 (page
->vlp_irr
[0] & 0xffff) != 0 ||
1962 (page
->vlp_tmr
[0] & 0xffff) != 0) {
1963 return (VVE_LOW_VECTOR
);
1966 /* Only one interrupt should be in-service for each priority level */
1967 for (uint_t i
= 0; i
< 8; i
++) {
1968 if (popc8((uint8_t)page
->vlp_isr
[i
]) > 1 ||
1969 popc8((uint8_t)(page
->vlp_isr
[i
] >> 8)) > 1 ||
1970 popc8((uint8_t)(page
->vlp_isr
[i
] >> 16)) > 1 ||
1971 popc8((uint8_t)(page
->vlp_isr
[i
] >> 24)) > 1) {
1972 return (VVE_ISR_PRIORITY
);
1980 vlapic_data_write(void *datap
, const vmm_data_req_t
*req
)
1982 VERIFY3U(req
->vdr_class
, ==, VDC_LAPIC
);
1983 VERIFY3U(req
->vdr_version
, ==, 1);
1984 VERIFY3U(req
->vdr_len
, >=, sizeof (struct vdi_lapic_v1
));
1986 struct vlapic
*vlapic
= datap
;
1987 if (vlapic_data_validate(vlapic
, req
) != VVE_OK
) {
1990 const struct vdi_lapic_v1
*src
= req
->vdr_data
;
1991 const struct vdi_lapic_page_v1
*page
= &src
->vl_lapic
;
1992 struct LAPIC
*lapic
= vlapic
->apic_page
;
1994 VLAPIC_TIMER_LOCK(vlapic
);
1996 /* Already ensured by vlapic_data_validate() */
1997 VERIFY3U(page
->vlp_version
, ==, lapic
->version
);
1999 vlapic
->msr_apicbase
= src
->vl_msr_apicbase
;
2000 vlapic
->esr_pending
= src
->vl_esr_pending
;
2002 lapic
->tpr
= page
->vlp_tpr
;
2003 lapic
->apr
= page
->vlp_apr
;
2004 lapic
->ldr
= page
->vlp_ldr
;
2005 lapic
->dfr
= page
->vlp_dfr
;
2006 lapic
->svr
= page
->vlp_svr
;
2007 lapic
->esr
= page
->vlp_esr
;
2008 lapic
->icr_lo
= (uint32_t)page
->vlp_icr
;
2009 lapic
->icr_hi
= (uint32_t)(page
->vlp_icr
>> 32);
2011 lapic
->icr_timer
= page
->vlp_icr_timer
;
2012 lapic
->dcr_timer
= page
->vlp_dcr_timer
;
2013 vlapic_update_divider(vlapic
);
2015 /* cleanse LDR/DFR */
2016 vlapic_ldr_write_handler(vlapic
);
2017 vlapic_dfr_write_handler(vlapic
);
2019 lapic
->lvt_cmci
= page
->vlp_lvt_cmci
;
2020 lapic
->lvt_timer
= page
->vlp_lvt_timer
;
2021 lapic
->lvt_thermal
= page
->vlp_lvt_thermal
;
2022 lapic
->lvt_pcint
= page
->vlp_lvt_pcint
;
2023 lapic
->lvt_lint0
= page
->vlp_lvt_lint0
;
2024 lapic
->lvt_lint1
= page
->vlp_lvt_lint1
;
2025 lapic
->lvt_error
= page
->vlp_lvt_error
;
2027 vlapic_refresh_lvts(vlapic
);
2029 uint32_t *isrptr
= &lapic
->isr0
;
2030 uint32_t *tmrptr
= &lapic
->tmr0
;
2031 uint32_t *irrptr
= &lapic
->irr0
;
2032 for (uint_t i
= 0; i
< 8; i
++) {
2033 isrptr
[i
* 4] = page
->vlp_isr
[i
];
2034 tmrptr
[i
* 4] = page
->vlp_tmr
[i
];
2035 irrptr
[i
* 4] = page
->vlp_irr
[i
];
2038 if (src
->vl_timer_target
!= 0) {
2039 vlapic
->timer_fire_when
=
2040 vm_denormalize_hrtime(vlapic
->vm
, src
->vl_timer_target
);
2043 * Check to see if timer expiration would result computed CCR
2044 * values in excess of what is configured in ICR/DCR.
2046 const hrtime_t now
= gethrtime();
2047 if (vlapic
->timer_fire_when
> now
) {
2048 const uint32_t ccr
= hrt_freq_count(
2049 vlapic
->timer_fire_when
- now
,
2050 vlapic
->timer_cur_freq
);
2053 * Until we have a richer event/logging system
2054 * available, just note such an overage as a stat.
2056 if (ccr
> lapic
->icr_timer
) {
2057 vlapic
->stats
.vs_import_timer_overage
++;
2061 if (!vm_is_paused(vlapic
->vm
)) {
2062 vlapic_callout_reset(vlapic
);
2065 vlapic
->timer_fire_when
= 0;
2068 if (vlapic
->ops
.sync_state
) {
2069 (*vlapic
->ops
.sync_state
)(vlapic
);
2071 VLAPIC_TIMER_UNLOCK(vlapic
);
2076 static const vmm_data_version_entry_t lapic_v1
= {
2077 .vdve_class
= VDC_LAPIC
,
2079 .vdve_len_expect
= sizeof (struct vdi_lapic_v1
),
2080 .vdve_readf
= vlapic_data_read
,
2081 .vdve_writef
= vlapic_data_write
,
2083 VMM_DATA_VERSION(lapic_v1
);