4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Virtual CPU management.
30 * VCPUs can be controlled in one of two ways; through the domain itself
31 * (psradm, p_online(), etc.), and via changes in xenstore (vcpu_config()).
32 * Unfortunately, the terminology is used in different ways; they work out as
35 * P_ONLINE: the VCPU is up and running, taking interrupts and running threads
37 * P_OFFLINE: the VCPU is up and running, but quiesced (i.e. blocked in the
38 * hypervisor on the idle thread). It must be up since a downed VCPU cannot
39 * receive interrupts, and we require this for offline CPUs in Solaris.
41 * P_POWEROFF: the VCPU is down (we never called xen_vcpu_up(), or called
42 * xen_vcpu_down() for it). It can't take interrupts or run anything, though
43 * if it has run previously, its software state (cpu_t, machcpu structures, IPI
44 * event channels, etc.) will still exist.
46 * The hypervisor has two notions of CPU states as represented in the store:
48 * "offline": the VCPU is down. Corresponds to P_POWEROFF.
50 * "online": the VCPU is running. Corresponds to a CPU state other than
53 * Currently, only a notification via xenstore can bring a CPU into a
54 * P_POWEROFF state, and only the domain can change between P_ONLINE, P_NOINTR,
55 * P_OFFLINE, etc. We need to be careful to treat xenstore notifications
56 * idempotently, as we'll get 'duplicate' entries when we resume a domain.
58 * Note that the xenstore configuration is strictly advisory, in that a domain
59 * can choose to ignore it and still power up a VCPU in the offline state. To
60 * play nice, we don't allow it. Thus, any attempt to power on/off a CPU is
61 * ENOTSUP from within Solaris.
63 * Powering off a VCPU and suspending the domain use similar code. The
64 * difficulty here is that we must ensure that each VCPU is in a stable
65 * state: it must have a saved PCB, and not be responding to interrupts
66 * (since we are just about to remove its ability to run on a real CPU,
67 * possibly forever). However, an offline CPU in Solaris can take
68 * cross-call interrupts, as mentioned, so we must go through a
69 * two-stage process. First, we use the standard Solaris pause_cpus().
70 * This ensures that all CPUs are either in mach_cpu_pause() or
71 * mach_cpu_idle(), and nothing will cross-call them.
73 * Powered-off-CPUs are already safe, as we own the cpu_lock needed to
74 * bring them back up, and in state CPU_PHASE_POWERED_OFF.
76 * Running CPUs are spinning in mach_cpu_pause() waiting for either
77 * PAUSE_IDLE or CPU_PHASE_WAIT_SAFE.
79 * Offline CPUs are either running the idle thread and periodically
80 * checking for CPU_PHASE_WAIT_SAFE, or blocked in the hypervisor.
82 * Thus, we set CPU_PHASE_WAIT_SAFE for every powered-on CPU, as well as
83 * poking them to make sure they're not blocked[1]. When every CPU has
84 * responded by reaching a safe state and setting CPU_PHASE_SAFE, we
85 * know we can suspend, or power-off a CPU, without problems.
87 * [1] note that we have to repeatedly poke offline CPUs: it's the only
88 * way to ensure that the CPU doesn't miss the state change before
89 * dropping into HYPERVISOR_block().
92 #include <sys/types.h>
93 #include <sys/systm.h>
94 #include <sys/param.h>
95 #include <sys/taskq.h>
96 #include <sys/cmn_err.h>
97 #include <sys/archsystm.h>
98 #include <sys/machsystm.h>
99 #include <sys/segments.h>
100 #include <sys/cpuvar.h>
101 #include <sys/x86_archext.h>
102 #include <sys/controlregs.h>
103 #include <sys/hypervisor.h>
104 #include <sys/xpv_panic.h>
105 #include <sys/mman.h>
108 #include <sys/sunddi.h>
109 #include <util/sscanf.h>
110 #include <vm/hat_i86.h>
114 #include <xen/public/io/xs_wire.h>
115 #include <xen/sys/xenbus_impl.h>
116 #include <xen/public/vcpu.h>
118 extern cpuset_t cpu_ready_set
;
120 #define CPU_PHASE_NONE 0
121 #define CPU_PHASE_WAIT_SAFE 1
122 #define CPU_PHASE_SAFE 2
123 #define CPU_PHASE_POWERED_OFF 3
126 * We can only poke CPUs during barrier enter 256 times a second at
129 #define POKE_TIMEOUT (NANOSEC / 256)
131 static taskq_t
*cpu_config_tq
;
132 static int cpu_phase
[NCPU
];
134 static void vcpu_config_event(struct xenbus_watch
*, const char **, uint_t
);
135 static int xen_vcpu_initialize(processorid_t
, vcpu_guest_context_t
*);
138 * Return whether or not the vcpu is actually running on a pcpu
141 vcpu_on_pcpu(processorid_t cpu
)
143 struct vcpu_runstate_info runstate
;
144 int ret
= VCPU_STATE_UNKNOWN
;
148 * Don't bother with hypercall if we are asking about ourself
150 if (cpu
== CPU
->cpu_id
)
151 return (VCPU_ON_PCPU
);
152 if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info
, cpu
, &runstate
) != 0)
155 switch (runstate
.state
) {
156 case RUNSTATE_running
:
160 case RUNSTATE_runnable
:
161 case RUNSTATE_offline
:
162 case RUNSTATE_blocked
:
163 ret
= VCPU_NOT_ON_PCPU
;
175 * These routines allocate any global state that might be needed
176 * while starting cpus. For virtual cpus, there is no such state.
179 mach_cpucontext_init(void)
185 do_cpu_config_watch(int state
)
187 static struct xenbus_watch cpu_config_watch
;
189 if (state
!= XENSTORE_UP
)
191 cpu_config_watch
.node
= "cpu";
192 cpu_config_watch
.callback
= vcpu_config_event
;
193 if (register_xenbus_watch(&cpu_config_watch
)) {
194 taskq_destroy(cpu_config_tq
);
195 cmn_err(CE_WARN
, "do_cpu_config_watch: "
196 "failed to set vcpu config watch");
202 * This routine is called after all the "normal" MP startup has
203 * been done; a good place to start watching xen store for virtual
204 * cpu hot plug events.
207 mach_cpucontext_fini(void)
210 cpu_config_tq
= taskq_create("vcpu config taskq", 1,
211 maxclsyspri
- 1, 1, 1, TASKQ_PREPOPULATE
);
213 (void) xs_register_xenbus_callback(do_cpu_config_watch
);
217 * Fill in the remaining CPU context and initialize it.
220 mp_set_cpu_context(vcpu_guest_context_t
*vgc
, cpu_t
*cp
)
224 vgc
->flags
= VGCF_IN_KERNEL
;
227 * fpu_ctx we leave as zero; on first fault we'll store
228 * sse_initial into it anyway.
232 vgc
->user_regs
.cs
= KCS_SEL
| SEL_KPL
; /* force to ring 3 */
234 vgc
->user_regs
.cs
= KCS_SEL
;
236 vgc
->user_regs
.ds
= KDS_SEL
;
237 vgc
->user_regs
.es
= KDS_SEL
;
238 vgc
->user_regs
.ss
= KDS_SEL
;
239 vgc
->kernel_ss
= KDS_SEL
;
242 * Allow I/O privilege level for Dom0 kernel.
244 if (DOMAIN_IS_INITDOMAIN(xen_info
))
245 iopl
= (PS_IOPL
& 0x1000); /* ring 1 */
250 vgc
->user_regs
.fs
= 0;
251 vgc
->user_regs
.gs
= 0;
252 vgc
->user_regs
.rflags
= F_OFF
| iopl
;
253 #elif defined(__i386)
254 vgc
->user_regs
.fs
= KFS_SEL
;
255 vgc
->user_regs
.gs
= KGS_SEL
;
256 vgc
->user_regs
.eflags
= F_OFF
| iopl
;
257 vgc
->event_callback_cs
= vgc
->user_regs
.cs
;
258 vgc
->failsafe_callback_cs
= vgc
->user_regs
.cs
;
262 * Initialize the trap_info_t from the IDT
265 ASSERT(NIDT
== sizeof (vgc
->trap_ctxt
) / sizeof (vgc
->trap_ctxt
[0]));
267 for (vec
= 0; vec
< NIDT
; vec
++) {
268 trap_info_t
*ti
= &vgc
->trap_ctxt
[vec
];
270 if (xen_idt_to_trap_info(vec
,
271 &cp
->cpu_m
.mcpu_idt
[vec
], ti
) == 0) {
282 * (We assert in various places that the GDT is (a) aligned on a
283 * page boundary and (b) one page long, so this really should fit..)
286 vgc
->gdt_frames
[0] = pa_to_ma(mmu_btop(cp
->cpu_m
.mcpu_gdtpa
));
288 vgc
->gdt_frames
[0] = pfn_to_mfn(mmu_btop(cp
->cpu_m
.mcpu_gdtpa
));
290 vgc
->gdt_ents
= NGDT
;
292 vgc
->ctrlreg
[0] = CR0_ENABLE_FPU_FLAGS(getcr0());
297 xen_pfn_to_cr3(pfn_to_mfn(kas
.a_hat
->hat_htable
->ht_pfn
));
301 pa_to_ma(mmu_ptob(kas
.a_hat
->hat_htable
->ht_pfn
));
303 vgc
->ctrlreg
[4] = getcr4();
305 vgc
->event_callback_eip
= (uintptr_t)xen_callback
;
306 vgc
->failsafe_callback_eip
= (uintptr_t)xen_failsafe_callback
;
307 vgc
->flags
|= VGCF_failsafe_disables_events
;
311 * XXPV should this be moved to init_cpu_syscall?
313 vgc
->syscall_callback_eip
= (uintptr_t)sys_syscall
;
314 vgc
->flags
|= VGCF_syscall_disables_events
;
316 ASSERT(vgc
->user_regs
.gs
== 0);
317 vgc
->gs_base_kernel
= (uintptr_t)cp
;
320 return (xen_vcpu_initialize(cp
->cpu_id
, vgc
));
324 * Create a guest virtual cpu context so that the virtual cpu
325 * springs into life in the domain just about to call mp_startup()
327 * Virtual CPUs must be initialized once in the lifetime of the domain;
328 * after that subsequent attempts to start them will fail with X_EEXIST.
330 * Thus 'alloc' -really- creates and initializes the virtual
331 * CPU context just once. Once the initialisation succeeds, we never
332 * free it, nor the regular cpu_t to which it refers.
335 mach_cpucontext_alloc(struct cpu
*cp
)
337 kthread_t
*tp
= cp
->cpu_thread
;
338 vcpu_guest_context_t vgc
;
343 * First, augment the incoming cpu structure
344 * - vcpu pointer reference
345 * - pending event storage area
346 * - physical address of GDT
348 cp
->cpu_m
.mcpu_vcpu_info
=
349 &HYPERVISOR_shared_info
->vcpu_info
[cp
->cpu_id
];
350 cp
->cpu_m
.mcpu_evt_pend
= kmem_zalloc(
351 sizeof (struct xen_evt_data
), KM_SLEEP
);
352 cp
->cpu_m
.mcpu_gdtpa
=
353 mmu_ptob(hat_getpfnum(kas
.a_hat
, (caddr_t
)cp
->cpu_gdt
));
355 if ((err
= xen_gdt_setprot(cp
, PROT_READ
)) != 0)
359 * Now set up the vcpu context so that we can start this vcpu
360 * in the kernel at tp->t_pc (mp_startup). Note that the
361 * thread will thread_exit() shortly after performing the
362 * initialization; in particular, we will *never* take a
363 * privilege transition on this thread.
366 bzero(&vgc
, sizeof (vgc
));
369 vgc
.user_regs
.rip
= tp
->t_pc
;
370 vgc
.user_regs
.rsp
= tp
->t_sp
;
371 vgc
.user_regs
.rbp
= tp
->t_sp
- 2 * sizeof (greg_t
);
373 vgc
.user_regs
.eip
= tp
->t_pc
;
374 vgc
.user_regs
.esp
= tp
->t_sp
;
375 vgc
.user_regs
.ebp
= tp
->t_sp
- 2 * sizeof (greg_t
);
378 * XXPV Fix resume, if Russ didn't already fix it.
380 * Note that resume unconditionally puts t->t_stk + sizeof (regs)
381 * into kernel_sp via HYPERVISOR_stack_switch. This anticipates
382 * that only lwps take traps that switch to the kernel stack;
383 * part of creating an lwp adjusts the stack by subtracting
384 * sizeof (struct regs) off t_stk.
386 * The more interesting question is, why do we do all the work
387 * of a fully fledged lwp for a plain thread? In particular
388 * we don't have to call HYPERVISOR_stack_switch for lwp-less threads
389 * or futz with the LDT. This should probably all be done with
390 * an lwp context operator to keep pure thread context switch fast.
392 vgc
.kernel_sp
= (ulong_t
)tp
->t_stk
;
394 err
= mp_set_cpu_context(&vgc
, cp
);
398 mach_cpucontext_free(cp
, NULL
, err
);
405 * By the time we are called either we have successfully started
406 * the cpu, or our attempt to start it has failed.
411 mach_cpucontext_free(struct cpu
*cp
, void *arg
, int err
)
418 * The vcpu context is loaded into the hypervisor, and
419 * we've tried to start it, but the vcpu has not been set
420 * running yet, for whatever reason. We arrange to -not-
421 * free any data structures it may be referencing. In
422 * particular, we've already told the hypervisor about
423 * the GDT, and so we can't map it read-write again.
427 (void) xen_gdt_setprot(cp
, PROT_READ
| PROT_WRITE
);
428 kmem_free(cp
->cpu_m
.mcpu_evt_pend
,
429 sizeof (struct xen_evt_data
));
435 * Reset this CPU's context. Clear out any pending evtchn data, since event
436 * channel numbers will all change when we resume.
439 mach_cpucontext_reset(cpu_t
*cp
)
441 bzero(cp
->cpu_m
.mcpu_evt_pend
, sizeof (struct xen_evt_data
));
442 /* mcpu_intr_pending ? */
446 pcb_to_user_regs(label_t
*pcb
, vcpu_guest_context_t
*vgc
)
449 vgc
->user_regs
.rip
= pcb
->val
[REG_LABEL_PC
];
450 vgc
->user_regs
.rsp
= pcb
->val
[REG_LABEL_SP
];
451 vgc
->user_regs
.rbp
= pcb
->val
[REG_LABEL_BP
];
452 vgc
->user_regs
.rbx
= pcb
->val
[REG_LABEL_RBX
];
453 vgc
->user_regs
.r12
= pcb
->val
[REG_LABEL_R12
];
454 vgc
->user_regs
.r13
= pcb
->val
[REG_LABEL_R13
];
455 vgc
->user_regs
.r14
= pcb
->val
[REG_LABEL_R14
];
456 vgc
->user_regs
.r15
= pcb
->val
[REG_LABEL_R15
];
458 vgc
->user_regs
.eip
= pcb
->val
[REG_LABEL_PC
];
459 vgc
->user_regs
.esp
= pcb
->val
[REG_LABEL_SP
];
460 vgc
->user_regs
.ebp
= pcb
->val
[REG_LABEL_BP
];
461 vgc
->user_regs
.ebx
= pcb
->val
[REG_LABEL_EBX
];
462 vgc
->user_regs
.esi
= pcb
->val
[REG_LABEL_ESI
];
463 vgc
->user_regs
.edi
= pcb
->val
[REG_LABEL_EDI
];
468 * Restore the context of a CPU during resume. This context is always
469 * inside enter_safe_phase(), below.
472 mach_cpucontext_restore(cpu_t
*cp
)
474 vcpu_guest_context_t vgc
;
477 ASSERT(cp
->cpu_thread
== cp
->cpu_pause_thread
||
478 cp
->cpu_thread
== cp
->cpu_idle_thread
);
480 bzero(&vgc
, sizeof (vgc
));
482 pcb_to_user_regs(&cp
->cpu_thread
->t_pcb
, &vgc
);
485 * We're emulating a longjmp() here: in particular, we need to bump the
486 * stack pointer to account for the pop of xIP that returning from
487 * longjmp() normally would do, and set the return value in xAX to 1.
490 vgc
.user_regs
.rax
= 1;
491 vgc
.user_regs
.rsp
+= sizeof (ulong_t
);
493 vgc
.user_regs
.eax
= 1;
494 vgc
.user_regs
.esp
+= sizeof (ulong_t
);
497 vgc
.kernel_sp
= cp
->cpu_thread
->t_sp
;
499 err
= mp_set_cpu_context(&vgc
, cp
);
505 * Reach a point at which the CPU can be safely powered-off or
506 * suspended. Nothing can wake this CPU out of the loop.
509 enter_safe_phase(void)
511 ulong_t flags
= intr_clear();
513 if (setjmp(&curthread
->t_pcb
) == 0) {
514 cpu_phase
[CPU
->cpu_id
] = CPU_PHASE_SAFE
;
515 while (cpu_phase
[CPU
->cpu_id
] == CPU_PHASE_SAFE
)
519 ASSERT(!interrupts_enabled());
525 * Offline CPUs run this code even under a pause_cpus(), so we must
526 * check if we need to enter the safe phase.
531 if (IN_XPV_PANIC()) {
534 (void) HYPERVISOR_block();
535 if (cpu_phase
[CPU
->cpu_id
] == CPU_PHASE_WAIT_SAFE
)
541 * Spin until either start_cpus() wakes us up, or we get a request to
542 * enter the safe phase (followed by a later start_cpus()).
545 mach_cpu_pause(volatile char *safe
)
550 while (*safe
!= PAUSE_IDLE
) {
551 if (cpu_phase
[CPU
->cpu_id
] == CPU_PHASE_WAIT_SAFE
)
558 mach_cpu_halt(char *msg
)
561 prom_printf("%s\n", msg
);
562 (void) xen_vcpu_down(CPU
->cpu_id
);
567 mp_cpu_poweron(struct cpu
*cp
)
574 mp_cpu_poweroff(struct cpu
*cp
)
580 mp_enter_barrier(void)
582 hrtime_t last_poke_time
= 0;
583 int poke_allowed
= 0;
587 ASSERT(MUTEX_HELD(&cpu_lock
));
589 pause_cpus(NULL
, NULL
);
595 if (xpv_gethrtime() - last_poke_time
> POKE_TIMEOUT
) {
596 last_poke_time
= xpv_gethrtime();
600 for (i
= 0; i
< NCPU
; i
++) {
601 cpu_t
*cp
= cpu_get(i
);
603 if (cp
== NULL
|| cp
== CPU
)
606 switch (cpu_phase
[i
]) {
608 cpu_phase
[i
] = CPU_PHASE_WAIT_SAFE
;
613 case CPU_PHASE_WAIT_SAFE
:
620 case CPU_PHASE_POWERED_OFF
:
630 mp_leave_barrier(void)
634 ASSERT(MUTEX_HELD(&cpu_lock
));
636 for (i
= 0; i
< NCPU
; i
++) {
637 cpu_t
*cp
= cpu_get(i
);
639 if (cp
== NULL
|| cp
== CPU
)
642 switch (cpu_phase
[i
]) {
644 * If we see a CPU in one of these phases, something has
645 * gone badly wrong with the guarantees
646 * mp_enter_barrier() is supposed to provide. Rather
647 * than attempt to stumble along (and since we can't
648 * panic properly in this context), we tell the
649 * hypervisor we've crashed.
652 case CPU_PHASE_WAIT_SAFE
:
653 (void) HYPERVISOR_shutdown(SHUTDOWN_crash
);
656 case CPU_PHASE_POWERED_OFF
:
660 cpu_phase
[i
] = CPU_PHASE_NONE
;
668 poweroff_vcpu(struct cpu
*cp
)
672 ASSERT(MUTEX_HELD(&cpu_lock
));
674 ASSERT(CPU
->cpu_id
!= cp
->cpu_id
);
675 ASSERT(cp
->cpu_flags
& CPU_QUIESCED
);
679 if ((error
= xen_vcpu_down(cp
->cpu_id
)) == 0) {
680 ASSERT(cpu_phase
[cp
->cpu_id
] == CPU_PHASE_SAFE
);
682 CPUSET_DEL(cpu_ready_set
, cp
->cpu_id
);
684 cp
->cpu_flags
|= CPU_POWEROFF
| CPU_OFFLINE
;
686 ~(CPU_RUNNING
| CPU_READY
| CPU_EXISTS
| CPU_ENABLE
);
688 cpu_phase
[cp
->cpu_id
] = CPU_PHASE_POWERED_OFF
;
699 vcpu_config_poweroff(processorid_t id
)
705 mutex_enter(&cpu_lock
);
707 if ((cp
= cpu_get(id
)) == NULL
) {
708 mutex_exit(&cpu_lock
);
712 if (cpu_get_state(cp
) == P_POWEROFF
) {
713 mutex_exit(&cpu_lock
);
717 mutex_exit(&cpu_lock
);
720 error
= p_online_internal(id
, P_OFFLINE
,
727 * So we just changed it to P_OFFLINE. But then we dropped
728 * cpu_lock, so now it is possible for another thread to change
729 * the cpu back to a different, non-quiesced state e.g.
732 mutex_enter(&cpu_lock
);
733 if ((cp
= cpu_get(id
)) == NULL
)
736 if (cp
->cpu_flags
& CPU_QUIESCED
)
737 error
= poweroff_vcpu(cp
);
741 mutex_exit(&cpu_lock
);
742 } while (error
== EBUSY
);
748 * Add a new virtual cpu to the domain.
751 vcpu_config_new(processorid_t id
)
753 extern int start_cpu(processorid_t
);
757 printf("cannot (yet) add cpus to a single-cpu domain\n");
761 affinity_set(CPU_CURRENT
);
762 error
= start_cpu(id
);
768 poweron_vcpu(struct cpu
*cp
)
772 ASSERT(MUTEX_HELD(&cpu_lock
));
774 if (HYPERVISOR_vcpu_op(VCPUOP_is_up
, cp
->cpu_id
, NULL
) != 0) {
775 printf("poweron_vcpu: vcpu%d is not available!\n",
780 if ((error
= xen_vcpu_up(cp
->cpu_id
)) == 0) {
781 CPUSET_ADD(cpu_ready_set
, cp
->cpu_id
);
782 cp
->cpu_flags
|= CPU_EXISTS
| CPU_READY
| CPU_RUNNING
;
783 cp
->cpu_flags
&= ~CPU_POWEROFF
;
785 * There are some nasty races possible here.
786 * Tell the vcpu it's up one more time.
787 * XXPV Is this enough? Is this safe?
789 (void) xen_vcpu_up(cp
->cpu_id
);
791 cpu_phase
[cp
->cpu_id
] = CPU_PHASE_NONE
;
799 vcpu_config_poweron(processorid_t id
)
806 return (vcpu_config_new(id
));
808 mutex_enter(&cpu_lock
);
810 if ((cp
= cpu_get(id
)) == NULL
) {
811 mutex_exit(&cpu_lock
);
815 if (cpu_get_state(cp
) != P_POWEROFF
) {
816 mutex_exit(&cpu_lock
);
820 if ((error
= poweron_vcpu(cp
)) != 0) {
821 mutex_exit(&cpu_lock
);
825 mutex_exit(&cpu_lock
);
827 return (p_online_internal(id
, P_ONLINE
, &oldstate
));
830 #define REPORT_LEN 128
833 vcpu_config_report(processorid_t id
, uint_t newstate
, int error
)
835 char *report
= kmem_alloc(REPORT_LEN
, KM_SLEEP
);
847 cmn_err(CE_PANIC
, "unknown state %u\n", newstate
);
851 len
= snprintf(report
, REPORT_LEN
,
852 "cpu%d: externally initiated %s", id
, ps
);
855 cmn_err(CE_CONT
, "!%s\n", report
);
856 kmem_free(report
, REPORT_LEN
);
860 len
+= snprintf(report
+ len
, REPORT_LEN
- len
,
861 " failed, error %d: ", error
);
864 len
+= snprintf(report
+ len
, REPORT_LEN
- len
,
865 "cpu already %s", ps
? ps
: "?");
868 len
+= snprintf(report
+ len
, REPORT_LEN
- len
,
875 len
+= snprintf(report
+ len
, REPORT_LEN
- len
,
876 "insufficient privilege (0x%x)", id
);
882 * This return comes from mp_cpu_start -
883 * we cannot 'start' the boot CPU.
885 len
+= snprintf(report
+ len
, REPORT_LEN
- len
,
889 len
+= snprintf(report
+ len
, REPORT_LEN
- len
,
899 cmn_err(CE_CONT
, "%s\n", report
);
900 kmem_free(report
, REPORT_LEN
);
904 vcpu_config(void *arg
)
906 int id
= (int)(uintptr_t)arg
;
911 if ((uint_t
)id
>= max_ncpus
) {
913 "vcpu_config: cpu%d does not fit in this domain", id
);
917 (void) snprintf(dir
, sizeof (dir
), "cpu/%d", id
);
918 state
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
919 if (xenbus_scanf(XBT_NULL
, dir
, "availability", "%s", state
) == 0) {
920 if (strcmp(state
, "online") == 0) {
921 error
= vcpu_config_poweron(id
);
922 vcpu_config_report(id
, P_ONLINE
, error
);
923 } else if (strcmp(state
, "offline") == 0) {
924 error
= vcpu_config_poweroff(id
);
925 vcpu_config_report(id
, P_POWEROFF
, error
);
928 "cpu%d: unknown target state '%s'", id
, state
);
932 "cpu%d: unable to read target state from xenstore", id
);
934 kmem_free(state
, MAXPATHLEN
);
939 vcpu_config_event(struct xenbus_watch
*watch
, const char **vec
, uint_t len
)
941 const char *path
= vec
[XS_WATCH_PATH
];
945 if ((s
= strstr(path
, "cpu/")) != NULL
&&
946 sscanf(s
, "cpu/%d", &id
) == 1) {
948 * Run the virtual CPU configuration on a separate thread to
949 * avoid blocking on this event for too long (and for now,
950 * to ensure configuration requests are serialized.)
952 (void) taskq_dispatch(cpu_config_tq
,
953 vcpu_config
, (void *)(uintptr_t)id
, 0);
958 xen_vcpu_initialize(processorid_t id
, vcpu_guest_context_t
*vgc
)
962 if ((err
= HYPERVISOR_vcpu_op(VCPUOP_initialise
, id
, vgc
)) != 0) {
969 * This interface squashes multiple error sources
970 * to one error code. In particular, an X_EINVAL
973 * - the vcpu id is out of range
974 * - cs or ss are in ring 0
976 * - an entry in the new gdt is above the
978 * - a frame underneath the new gdt is bad
980 str
= "something is wrong :(";
986 str
= "no mem to copy ctxt";
993 * Hmm. This error is returned if the vcpu has already
994 * been initialized once before in the lifetime of this
995 * domain. This is a logic error in the kernel.
998 str
= "already initialized";
1002 str
= "<unexpected>";
1006 cmn_err(level
, "vcpu%d: failed to init: error %d: %s",
1013 xen_vcpu_up(processorid_t id
)
1017 if ((err
= HYPERVISOR_vcpu_op(VCPUOP_up
, id
, NULL
)) != 0) {
1022 str
= "no such cpu";
1026 * Perhaps this is diagnostic overkill.
1028 if (HYPERVISOR_vcpu_op(VCPUOP_is_up
, id
, NULL
) < 0)
1031 str
= "not initialized";
1034 str
= "<unexpected>";
1038 printf("vcpu%d: failed to start: error %d: %s\n",
1039 id
, -(int)err
, str
);
1040 return (EBFONT
); /* deliberately silly */
1046 xen_vcpu_down(processorid_t id
)
1050 if ((err
= HYPERVISOR_vcpu_op(VCPUOP_down
, id
, NULL
)) != 0) {
1052 * X_ENOENT: no such cpu
1053 * X_EINVAL: bad cpuid
1055 panic("vcpu%d: failed to stop: error %d", id
, -(int)err
);