qemu: process: Ensure that 'beingDestroyed' gets cleared only after VM id is reset
[libvirt.git] / src / qemu / qemu_process.c
blobae6594e10e2f4dfb1b3c580995eaf72a2c7a6844
1 /*
2 * qemu_process.c: QEMU process management
4 * Copyright (C) 2006-2016 Red Hat, Inc.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library. If not, see
18 * <http://www.gnu.org/licenses/>.
22 #include <config.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/stat.h>
28 #if defined(__linux__)
29 # include <linux/capability.h>
30 #elif defined(__FreeBSD__)
31 # include <sys/param.h>
32 # include <sys/cpuset.h>
33 #endif
35 #include <sys/utsname.h>
37 #if WITH_CAPNG
38 # include <cap-ng.h>
39 #endif
41 #include "qemu_process.h"
42 #define LIBVIRT_QEMU_PROCESSPRIV_H_ALLOW
43 #include "qemu_processpriv.h"
44 #include "qemu_alias.h"
45 #include "qemu_block.h"
46 #include "qemu_domain.h"
47 #include "qemu_domain_address.h"
48 #include "qemu_namespace.h"
49 #include "qemu_cgroup.h"
50 #include "qemu_capabilities.h"
51 #include "qemu_monitor.h"
52 #include "qemu_command.h"
53 #include "qemu_hostdev.h"
54 #include "qemu_hotplug.h"
55 #include "qemu_migration.h"
56 #include "qemu_migration_params.h"
57 #include "qemu_interface.h"
58 #include "qemu_security.h"
59 #include "qemu_extdevice.h"
60 #include "qemu_firmware.h"
61 #include "qemu_backup.h"
62 #include "qemu_dbus.h"
63 #include "qemu_snapshot.h"
65 #include "cpu/cpu.h"
66 #include "cpu/cpu_x86.h"
67 #include "datatypes.h"
68 #include "virlog.h"
69 #include "virerror.h"
70 #include "viralloc.h"
71 #include "virhook.h"
72 #include "virfile.h"
73 #include "virpidfile.h"
74 #include "virhostcpu.h"
75 #include "domain_audit.h"
76 #include "domain_cgroup.h"
77 #include "domain_interface.h"
78 #include "domain_nwfilter.h"
79 #include "domain_postparse.h"
80 #include "domain_validate.h"
81 #include "locking/domain_lock.h"
82 #include "viruuid.h"
83 #include "virprocess.h"
84 #include "virtime.h"
85 #include "virnetdevtap.h"
86 #include "virnetdevopenvswitch.h"
87 #include "virnetdevmidonet.h"
88 #include "virbitmap.h"
89 #include "virnuma.h"
90 #include "virstring.h"
91 #include "virhostdev.h"
92 #include "configmake.h"
93 #include "netdev_bandwidth_conf.h"
94 #include "virresctrl.h"
95 #include "virvsock.h"
96 #include "viridentity.h"
97 #include "virthreadjob.h"
98 #include "virutil.h"
99 #include "storage_source.h"
100 #include "backup_conf.h"
102 #include "logging/log_manager.h"
103 #include "logging/log_protocol.h"
105 #define VIR_FROM_THIS VIR_FROM_QEMU
107 VIR_LOG_INIT("qemu.qemu_process");
110 * qemuProcessRemoveDomainStatus
112 * remove all state files of a domain from statedir
114 static void
115 qemuProcessRemoveDomainStatus(virQEMUDriver *driver,
116 virDomainObj *vm)
118 g_autofree char *file = NULL;
119 qemuDomainObjPrivate *priv = vm->privateData;
120 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
122 file = g_strdup_printf("%s/%s.xml", cfg->stateDir, vm->def->name);
124 if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR)
125 VIR_WARN("Failed to remove domain XML for %s: %s",
126 vm->def->name, g_strerror(errno));
128 if (priv->pidfile &&
129 unlink(priv->pidfile) < 0 &&
130 errno != ENOENT)
131 VIR_WARN("Failed to remove PID file for %s: %s",
132 vm->def->name, g_strerror(errno));
137 * This is a callback registered with a qemuAgent *instance,
138 * and to be invoked when the agent console hits an end of file
139 * condition, or error, thus indicating VM shutdown should be
140 * performed
142 static void
143 qemuProcessHandleAgentEOF(qemuAgent *agent,
144 virDomainObj *vm)
146 qemuDomainObjPrivate *priv;
148 virObjectLock(vm);
149 VIR_DEBUG("Received EOF from agent on %p '%s'", vm, vm->def->name);
151 priv = vm->privateData;
153 if (!priv->agent) {
154 VIR_DEBUG("Agent freed already");
155 goto unlock;
158 if (priv->beingDestroyed) {
159 VIR_DEBUG("Domain is being destroyed, agent EOF is expected");
160 goto unlock;
163 qemuAgentClose(agent);
164 priv->agent = NULL;
165 priv->agentError = false;
167 virObjectUnlock(vm);
168 return;
170 unlock:
171 virObjectUnlock(vm);
172 return;
177 * This is invoked when there is some kind of error
178 * parsing data to/from the agent. The VM can continue
179 * to run, but no further agent commands will be
180 * allowed
182 static void
183 qemuProcessHandleAgentError(qemuAgent *agent G_GNUC_UNUSED,
184 virDomainObj *vm)
186 qemuDomainObjPrivate *priv;
188 virObjectLock(vm);
189 VIR_DEBUG("Received error from agent on %p '%s'", vm, vm->def->name);
191 priv = vm->privateData;
193 priv->agentError = true;
195 virObjectUnlock(vm);
199 static qemuAgentCallbacks agentCallbacks = {
200 .eofNotify = qemuProcessHandleAgentEOF,
201 .errorNotify = qemuProcessHandleAgentError,
206 qemuConnectAgent(virQEMUDriver *driver, virDomainObj *vm)
208 qemuDomainObjPrivate *priv = vm->privateData;
209 qemuAgent *agent = NULL;
210 virDomainChrDef *config = qemuFindAgentConfig(vm->def);
212 if (!config)
213 return 0;
215 if (priv->agent)
216 return 0;
218 if (config->state != VIR_DOMAIN_CHR_DEVICE_STATE_CONNECTED) {
219 VIR_DEBUG("Deferring connecting to guest agent");
220 return 0;
223 if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) {
224 VIR_ERROR(_("Failed to set security context for agent for %1$s"),
225 vm->def->name);
226 goto cleanup;
229 agent = qemuAgentOpen(vm,
230 config->source,
231 virEventThreadGetContext(priv->eventThread),
232 &agentCallbacks);
234 if (!virDomainObjIsActive(vm)) {
235 qemuAgentClose(agent);
236 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
237 _("guest crashed while connecting to the guest agent"));
238 return -1;
241 if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) {
242 VIR_ERROR(_("Failed to clear security context for agent for %1$s"),
243 vm->def->name);
244 qemuAgentClose(agent);
245 goto cleanup;
248 priv->agent = agent;
249 if (!priv->agent)
250 VIR_INFO("Failed to connect agent for %s", vm->def->name);
252 cleanup:
253 if (!priv->agent) {
254 VIR_WARN("Cannot connect to QEMU guest agent for %s", vm->def->name);
255 priv->agentError = true;
256 virResetLastError();
259 return 0;
264 * qemuProcessEventSubmit:
265 * @vm: pointer to the domain object, the function will take an extra reference
266 * @eventType: the event to be processed
267 * @action: event specific action to be taken
268 * @status: event specific status
269 * @data: additional data for the event processor (the pointer is stolen and it
270 * will be properly freed
272 * Submits @eventType to be processed by the asynchronous event handling thread.
274 static void
275 qemuProcessEventSubmit(virDomainObj *vm,
276 qemuProcessEventType eventType,
277 int action,
278 int status,
279 void *data)
281 struct qemuProcessEvent *event = g_new0(struct qemuProcessEvent, 1);
282 virQEMUDriver *driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
284 event->vm = virObjectRef(vm);
285 event->eventType = eventType;
286 event->action = action;
287 event->status = status;
288 event->data = data;
290 if (virThreadPoolSendJob(driver->workerPool, 0, event) < 0) {
291 virObjectUnref(event->vm);
292 qemuProcessEventFree(event);
298 * This is a callback registered with a qemuMonitor *instance,
299 * and to be invoked when the monitor console hits an end of file
300 * condition, or error, thus indicating VM shutdown should be
301 * performed
303 static void
304 qemuProcessHandleMonitorEOF(qemuMonitor *mon,
305 virDomainObj *vm)
307 qemuDomainObjPrivate *priv;
309 virObjectLock(vm);
311 VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
313 priv = vm->privateData;
314 if (priv->beingDestroyed) {
315 VIR_DEBUG("Domain is being destroyed, EOF is expected");
316 goto cleanup;
319 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_MONITOR_EOF,
320 0, 0, GINT_TO_POINTER(vm->def->id));
322 /* We don't want this EOF handler to be called over and over while the
323 * thread is waiting for a job.
325 virObjectLock(mon);
326 qemuMonitorUnregister(mon);
327 virObjectUnlock(mon);
329 /* We don't want any cleanup from EOF handler (or any other
330 * thread) to enter qemu namespace. */
331 qemuDomainDestroyNamespace(priv->driver, vm);
333 cleanup:
334 virObjectUnlock(vm);
339 * This is invoked when there is some kind of error
340 * parsing data to/from the monitor. The VM can continue
341 * to run, but no further monitor commands will be
342 * allowed
344 static void
345 qemuProcessHandleMonitorError(qemuMonitor *mon G_GNUC_UNUSED,
346 virDomainObj *vm)
348 qemuDomainObjPrivate *priv;
349 virObjectEvent *event = NULL;
351 virObjectLock(vm);
352 VIR_DEBUG("Received error on %p '%s'", vm, vm->def->name);
354 priv = vm->privateData;
355 priv->monError = true;
356 event = virDomainEventControlErrorNewFromObj(vm);
357 virObjectEventStateQueue(priv->driver->domainEventState, event);
359 virObjectUnlock(vm);
364 * qemuProcessFindDomainDiskByAliasOrQOM:
365 * @vm: domain object to search for the disk
366 * @alias: -drive or -device alias of the disk
367 * @qomid: QOM tree device name
369 * Looks up a disk in the domain definition of @vm which either matches the
370 * -drive or -device alias used for the backend and frontend respectively or the
371 * QOM name. If @alias is empty it's treated as NULL as it's a mandatory field
372 * in some cases.
374 * Returns a disk from @vm or NULL if it could not be found.
376 virDomainDiskDef *
377 qemuProcessFindDomainDiskByAliasOrQOM(virDomainObj *vm,
378 const char *alias,
379 const char *qomid)
381 size_t i;
383 if (alias && *alias == '\0')
384 alias = NULL;
386 if (alias)
387 alias = qemuAliasDiskDriveSkipPrefix(alias);
389 for (i = 0; i < vm->def->ndisks; i++) {
390 virDomainDiskDef *disk = vm->def->disks[i];
391 qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
393 if ((disk->info.alias && STREQ_NULLABLE(disk->info.alias, alias)) ||
394 (diskPriv->qomName && STREQ_NULLABLE(diskPriv->qomName, qomid)))
395 return disk;
398 virReportError(VIR_ERR_INTERNAL_ERROR,
399 _("no disk found with alias '%1$s' or id '%2$s'"),
400 NULLSTR(alias), NULLSTR(qomid));
401 return NULL;
405 static void
406 qemuProcessHandleReset(qemuMonitor *mon G_GNUC_UNUSED,
407 virDomainObj *vm)
409 virQEMUDriver *driver;
410 virObjectEvent *event = NULL;
411 qemuDomainObjPrivate *priv;
412 virDomainState state;
413 int reason;
415 virObjectLock(vm);
416 priv = vm->privateData;
417 driver = priv->driver;
419 state = virDomainObjGetState(vm, &reason);
421 /* ignore reset events on VM startup. Libvirt in certain instances does a
422 * reset during startup so that the ACPI tables are re-generated */
423 if (state == VIR_DOMAIN_PAUSED &&
424 reason == VIR_DOMAIN_PAUSED_STARTING_UP) {
425 VIR_DEBUG("ignoring reset event during startup");
426 goto unlock;
429 event = virDomainEventRebootNewFromObj(vm);
430 if (priv->agent)
431 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_RESET);
433 qemuDomainSetFakeReboot(vm, false);
434 qemuDomainSaveStatus(vm);
436 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_RESET, 0, 0, NULL);
438 unlock:
439 virObjectUnlock(vm);
440 virObjectEventStateQueue(driver->domainEventState, event);
445 * Since we have the '-no-shutdown' flag set, the
446 * QEMU process will currently have guest OS shutdown
447 * and the CPUS stopped. To fake the reboot, we thus
448 * want todo a reset of the virtual hardware, followed
449 * by restart of the CPUs. This should result in the
450 * guest OS booting up again
452 static void
453 qemuProcessFakeReboot(void *opaque)
455 virDomainObj *vm = opaque;
456 qemuDomainObjPrivate *priv = vm->privateData;
457 virQEMUDriver *driver = priv->driver;
458 virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED;
459 int ret = -1, rc;
461 VIR_DEBUG("vm=%p", vm);
462 virObjectLock(vm);
463 if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
464 goto cleanup;
466 if (!virDomainObjIsActive(vm)) {
467 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
468 _("guest unexpectedly quit"));
469 goto endjob;
472 qemuDomainObjEnterMonitor(vm);
473 rc = qemuMonitorSystemReset(priv->mon);
475 qemuDomainObjExitMonitor(vm);
477 if (rc < 0)
478 goto endjob;
480 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_CRASHED)
481 reason = VIR_DOMAIN_RUNNING_CRASHED;
483 if (qemuProcessStartCPUs(driver, vm,
484 reason,
485 VIR_ASYNC_JOB_NONE) < 0) {
486 if (virGetLastErrorCode() == VIR_ERR_OK)
487 virReportError(VIR_ERR_INTERNAL_ERROR,
488 "%s", _("resume operation failed"));
489 goto endjob;
492 qemuDomainSaveStatus(vm);
493 ret = 0;
495 endjob:
496 virDomainObjEndJob(vm);
498 cleanup:
499 priv->pausedShutdown = false;
500 qemuDomainSetFakeReboot(vm, false);
501 if (ret == -1)
502 ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE));
503 virDomainObjEndAPI(&vm);
507 void
508 qemuProcessShutdownOrReboot(virDomainObj *vm)
510 qemuDomainObjPrivate *priv = vm->privateData;
512 if (priv->fakeReboot ||
513 vm->def->onPoweroff == VIR_DOMAIN_LIFECYCLE_ACTION_RESTART) {
514 g_autofree char *name = g_strdup_printf("reboot-%s", vm->def->name);
515 virThread th;
517 virObjectRef(vm);
518 if (virThreadCreateFull(&th,
519 false,
520 qemuProcessFakeReboot,
521 name,
522 false,
523 vm) < 0) {
524 VIR_ERROR(_("Failed to create reboot thread, killing domain"));
525 ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
526 priv->pausedShutdown = false;
527 qemuDomainSetFakeReboot(vm, false);
528 virObjectUnref(vm);
530 } else {
531 ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
536 static void
537 qemuProcessHandleEvent(qemuMonitor *mon G_GNUC_UNUSED,
538 virDomainObj *vm,
539 const char *eventName,
540 long long seconds,
541 unsigned int micros,
542 const char *details)
544 virQEMUDriver *driver;
545 virObjectEvent *event = NULL;
547 VIR_DEBUG("vm=%p", vm);
549 virObjectLock(vm);
550 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
551 event = virDomainQemuMonitorEventNew(vm->def->id, vm->def->name,
552 vm->def->uuid, eventName,
553 seconds, micros, details);
555 virObjectUnlock(vm);
556 virObjectEventStateQueue(driver->domainEventState, event);
560 static void
561 qemuProcessHandleShutdown(qemuMonitor *mon G_GNUC_UNUSED,
562 virDomainObj *vm,
563 virTristateBool guest_initiated)
565 virQEMUDriver *driver;
566 qemuDomainObjPrivate *priv;
567 virObjectEvent *event = NULL;
568 int detail = 0;
570 VIR_DEBUG("vm=%p", vm);
572 virObjectLock(vm);
574 priv = vm->privateData;
575 driver = priv->driver;
577 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_SHUTDOWN) {
578 VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s",
579 vm->def->name);
580 goto unlock;
581 } else if (!virDomainObjIsActive(vm)) {
582 VIR_DEBUG("Ignoring SHUTDOWN event from inactive domain %s",
583 vm->def->name);
584 goto unlock;
587 /* In case of fake reboot qemu shutdown state is transient so don't
588 * change domain state nor send events. */
589 if (!priv->fakeReboot &&
590 vm->def->onPoweroff != VIR_DOMAIN_LIFECYCLE_ACTION_RESTART) {
591 VIR_DEBUG("Transitioned guest %s to shutdown state",
592 vm->def->name);
593 virDomainObjSetState(vm,
594 VIR_DOMAIN_SHUTDOWN,
595 VIR_DOMAIN_SHUTDOWN_UNKNOWN);
597 switch (guest_initiated) {
598 case VIR_TRISTATE_BOOL_YES:
599 detail = VIR_DOMAIN_EVENT_SHUTDOWN_GUEST;
600 break;
602 case VIR_TRISTATE_BOOL_NO:
603 detail = VIR_DOMAIN_EVENT_SHUTDOWN_HOST;
604 break;
606 case VIR_TRISTATE_BOOL_ABSENT:
607 case VIR_TRISTATE_BOOL_LAST:
608 default:
609 detail = VIR_DOMAIN_EVENT_SHUTDOWN_FINISHED;
610 break;
613 event = virDomainEventLifecycleNewFromObj(vm,
614 VIR_DOMAIN_EVENT_SHUTDOWN,
615 detail);
616 qemuDomainSaveStatus(vm);
617 } else {
618 priv->pausedShutdown = true;
621 if (priv->agent)
622 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SHUTDOWN);
624 qemuProcessShutdownOrReboot(vm);
626 unlock:
627 virObjectUnlock(vm);
628 virObjectEventStateQueue(driver->domainEventState, event);
632 static void
633 qemuProcessHandleStop(qemuMonitor *mon G_GNUC_UNUSED,
634 virDomainObj *vm)
636 virQEMUDriver *driver;
637 virObjectEvent *event = NULL;
638 virDomainPausedReason reason;
639 virDomainEventSuspendedDetailType detail;
640 qemuDomainObjPrivate *priv = vm->privateData;
642 virObjectLock(vm);
644 driver = priv->driver;
645 reason = priv->pausedReason;
646 priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
648 /* In case of fake reboot qemu paused state is transient so don't
649 * reveal it in domain state nor sent events */
650 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING &&
651 !priv->pausedShutdown) {
652 if (vm->job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT) {
653 if (vm->job->current->status == VIR_DOMAIN_JOB_STATUS_POSTCOPY)
654 reason = VIR_DOMAIN_PAUSED_POSTCOPY;
655 else
656 reason = VIR_DOMAIN_PAUSED_MIGRATION;
659 detail = qemuDomainPausedReasonToSuspendedEvent(reason);
660 VIR_DEBUG("Transitioned guest %s to paused state, "
661 "reason %s, event detail %d",
662 vm->def->name, virDomainPausedReasonTypeToString(reason),
663 detail);
665 if (vm->job->current)
666 ignore_value(virTimeMillisNow(&vm->job->current->stopped));
668 if (priv->signalStop)
669 virDomainObjBroadcast(vm);
671 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
672 event = virDomainEventLifecycleNewFromObj(vm,
673 VIR_DOMAIN_EVENT_SUSPENDED,
674 detail);
676 VIR_FREE(priv->lockState);
677 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
678 VIR_WARN("Unable to release lease on %s", vm->def->name);
679 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
681 qemuDomainSaveStatus(vm);
684 virObjectUnlock(vm);
685 virObjectEventStateQueue(driver->domainEventState, event);
689 static void
690 qemuProcessHandleResume(qemuMonitor *mon G_GNUC_UNUSED,
691 virDomainObj *vm)
693 virQEMUDriver *driver;
694 virObjectEvent *event = NULL;
695 qemuDomainObjPrivate *priv;
696 virDomainRunningReason reason = VIR_DOMAIN_RUNNING_UNPAUSED;
697 virDomainEventResumedDetailType eventDetail;
699 virObjectLock(vm);
701 priv = vm->privateData;
702 driver = priv->driver;
704 if (priv->runningReason != VIR_DOMAIN_RUNNING_UNKNOWN) {
705 reason = priv->runningReason;
706 priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
709 if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) {
710 eventDetail = qemuDomainRunningReasonToResumeEvent(reason);
711 VIR_DEBUG("Transitioned guest %s into running state, reason '%s', "
712 "event detail %d",
713 vm->def->name, virDomainRunningReasonTypeToString(reason),
714 eventDetail);
716 /* When a domain is running in (failed) post-copy migration on the
717 * destination host, we need to make sure to set the appropriate reason
718 * here. */
719 if (virDomainObjIsPostcopy(vm, vm->job)) {
720 if (virDomainObjIsFailedPostcopy(vm, vm->job))
721 reason = VIR_DOMAIN_RUNNING_POSTCOPY_FAILED;
722 else
723 reason = VIR_DOMAIN_RUNNING_POSTCOPY;
725 virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
726 event = virDomainEventLifecycleNewFromObj(vm,
727 VIR_DOMAIN_EVENT_RESUMED,
728 eventDetail);
729 qemuDomainSaveStatus(vm);
732 virObjectUnlock(vm);
733 virObjectEventStateQueue(driver->domainEventState, event);
736 static void
737 qemuProcessHandleRTCChange(qemuMonitor *mon G_GNUC_UNUSED,
738 virDomainObj *vm,
739 long long offset)
741 virQEMUDriver *driver;
742 virObjectEvent *event = NULL;
744 virObjectLock(vm);
745 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
747 if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE) {
748 /* when a basedate is manually given on the qemu commandline
749 * rather than simply "-rtc base=utc", the offset sent by qemu
750 * in this event is *not* the new offset from UTC, but is
751 * instead the new offset from the *original basedate* +
752 * uptime. For example, if the original offset was 3600 and
753 * the guest clock has been advanced by 10 seconds, qemu will
754 * send "10" in the event - this means that the new offset
755 * from UTC is 3610, *not* 10. If the guest clock is advanced
756 * by another 10 seconds, qemu will now send "20" - i.e. each
757 * event is the sum of the most recent change and all previous
758 * changes since the domain was started. Fortunately, we have
759 * saved the initial offset in "adjustment0", so to arrive at
760 * the proper new "adjustment", we just add the most recent
761 * offset to adjustment0.
763 offset += vm->def->clock.data.variable.adjustment0;
764 vm->def->clock.data.variable.adjustment = offset;
766 qemuDomainSaveStatus(vm);
769 event = virDomainEventRTCChangeNewFromObj(vm, offset);
771 virObjectUnlock(vm);
773 virObjectEventStateQueue(driver->domainEventState, event);
777 static void
778 qemuProcessHandleWatchdog(qemuMonitor *mon G_GNUC_UNUSED,
779 virDomainObj *vm,
780 int action)
782 virQEMUDriver *driver;
783 virObjectEvent *watchdogEvent = NULL;
784 virObjectEvent *lifecycleEvent = NULL;
786 virObjectLock(vm);
787 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
788 watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action);
790 if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE &&
791 virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
792 qemuDomainObjPrivate *priv = vm->privateData;
793 VIR_WARN("Transitioned guest %s to paused state due to watchdog", vm->def->name);
795 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG);
796 lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
797 VIR_DOMAIN_EVENT_SUSPENDED,
798 VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG);
800 VIR_FREE(priv->lockState);
801 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
802 VIR_WARN("Unable to release lease on %s", vm->def->name);
803 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
805 qemuDomainSaveStatus(vm);
808 if (vm->def->nwatchdogs &&
809 vm->def->watchdogs[0]->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) {
810 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_WATCHDOG,
811 VIR_DOMAIN_WATCHDOG_ACTION_DUMP, 0, NULL);
814 virObjectUnlock(vm);
815 virObjectEventStateQueue(driver->domainEventState, watchdogEvent);
816 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
820 static void
821 qemuProcessHandleIOError(qemuMonitor *mon G_GNUC_UNUSED,
822 virDomainObj *vm,
823 const char *diskAlias,
824 const char *nodename,
825 int action,
826 const char *reason)
828 virQEMUDriver *driver;
829 virObjectEvent *ioErrorEvent = NULL;
830 virObjectEvent *ioErrorEvent2 = NULL;
831 virObjectEvent *lifecycleEvent = NULL;
832 const char *srcPath;
833 const char *devAlias;
834 virDomainDiskDef *disk;
836 virObjectLock(vm);
837 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
839 if (*diskAlias == '\0')
840 diskAlias = NULL;
842 if (diskAlias)
843 disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, diskAlias, NULL);
844 else if (nodename)
845 disk = qemuDomainDiskLookupByNodename(vm->def, NULL, nodename, NULL);
846 else
847 disk = NULL;
849 if (disk) {
850 srcPath = virDomainDiskGetSource(disk);
851 devAlias = disk->info.alias;
852 } else {
853 srcPath = "";
854 devAlias = "";
857 ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action);
858 ioErrorEvent2 = virDomainEventIOErrorReasonNewFromObj(vm, srcPath, devAlias, action, reason);
860 if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE &&
861 virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
862 qemuDomainObjPrivate *priv = vm->privateData;
863 VIR_WARN("Transitioned guest %s to paused state due to IO error", vm->def->name);
865 if (priv->signalIOError)
866 virDomainObjBroadcast(vm);
868 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR);
869 lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
870 VIR_DOMAIN_EVENT_SUSPENDED,
871 VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);
873 VIR_FREE(priv->lockState);
874 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
875 VIR_WARN("Unable to release lease on %s", vm->def->name);
876 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
878 qemuDomainSaveStatus(vm);
880 virObjectUnlock(vm);
882 virObjectEventStateQueue(driver->domainEventState, ioErrorEvent);
883 virObjectEventStateQueue(driver->domainEventState, ioErrorEvent2);
884 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
888 static void
889 qemuProcessHandleJobStatusChange(qemuMonitor *mon G_GNUC_UNUSED,
890 virDomainObj *vm,
891 const char *jobname,
892 int status)
894 qemuDomainObjPrivate *priv;
895 qemuBlockJobData *job = NULL;
896 int jobnewstate;
898 virObjectLock(vm);
899 priv = vm->privateData;
901 VIR_DEBUG("job '%s'(domain: %p,%s) state changed to '%s'(%d)",
902 jobname, vm, vm->def->name,
903 qemuMonitorJobStatusTypeToString(status), status);
905 if ((jobnewstate = qemuBlockjobConvertMonitorStatus(status)) == QEMU_BLOCKJOB_STATE_LAST)
906 goto cleanup;
908 if (!(job = virHashLookup(priv->blockjobs, jobname))) {
909 VIR_DEBUG("job '%s' not registered", jobname);
910 goto cleanup;
913 job->newstate = jobnewstate;
915 if (job->synchronous) {
916 VIR_DEBUG("job '%s' handled synchronously", jobname);
917 virDomainObjBroadcast(vm);
918 } else {
919 VIR_DEBUG("job '%s' handled by event thread", jobname);
920 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_JOB_STATUS_CHANGE,
921 0, 0, virObjectRef(job));
924 cleanup:
925 virObjectUnlock(vm);
929 static void
930 qemuProcessHandleGraphics(qemuMonitor *mon G_GNUC_UNUSED,
931 virDomainObj *vm,
932 int phase,
933 int localFamily,
934 const char *localNode,
935 const char *localService,
936 int remoteFamily,
937 const char *remoteNode,
938 const char *remoteService,
939 const char *authScheme,
940 const char *x509dname,
941 const char *saslUsername)
943 virQEMUDriver *driver;
944 virObjectEvent *event;
945 virDomainEventGraphicsAddressPtr localAddr = NULL;
946 virDomainEventGraphicsAddressPtr remoteAddr = NULL;
947 virDomainEventGraphicsSubjectPtr subject = NULL;
949 localAddr = g_new0(virDomainEventGraphicsAddress, 1);
950 localAddr->family = localFamily;
951 localAddr->service = g_strdup(localService);
952 localAddr->node = g_strdup(localNode);
954 remoteAddr = g_new0(virDomainEventGraphicsAddress, 1);
955 remoteAddr->family = remoteFamily;
956 remoteAddr->service = g_strdup(remoteService);
957 remoteAddr->node = g_strdup(remoteNode);
959 subject = g_new0(virDomainEventGraphicsSubject, 1);
960 if (x509dname) {
961 VIR_REALLOC_N(subject->identities, subject->nidentity+1);
962 subject->nidentity++;
963 subject->identities[subject->nidentity - 1].type = g_strdup("x509dname");
964 subject->identities[subject->nidentity - 1].name = g_strdup(x509dname);
966 if (saslUsername) {
967 VIR_REALLOC_N(subject->identities, subject->nidentity+1);
968 subject->nidentity++;
969 subject->identities[subject->nidentity - 1].type = g_strdup("saslUsername");
970 subject->identities[subject->nidentity - 1].name = g_strdup(saslUsername);
973 virObjectLock(vm);
974 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
975 event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject);
976 virObjectUnlock(vm);
978 virObjectEventStateQueue(driver->domainEventState, event);
981 static void
982 qemuProcessHandleTrayChange(qemuMonitor *mon G_GNUC_UNUSED,
983 virDomainObj *vm,
984 const char *devAlias,
985 const char *devid,
986 int reason)
988 virQEMUDriver *driver;
989 virObjectEvent *event = NULL;
990 virDomainDiskDef *disk;
992 virObjectLock(vm);
993 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
994 disk = qemuProcessFindDomainDiskByAliasOrQOM(vm, devAlias, devid);
996 if (disk) {
997 event = virDomainEventTrayChangeNewFromObj(vm, disk->info.alias, reason);
998 /* Update disk tray status */
999 if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_OPEN)
1000 disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
1001 else if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_CLOSE)
1002 disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;
1004 qemuDomainSaveStatus(vm);
1005 virDomainObjBroadcast(vm);
1008 virObjectUnlock(vm);
1009 virObjectEventStateQueue(driver->domainEventState, event);
1012 static void
1013 qemuProcessHandlePMWakeup(qemuMonitor *mon G_GNUC_UNUSED,
1014 virDomainObj *vm)
1016 virQEMUDriver *driver;
1017 virObjectEvent *event = NULL;
1018 virObjectEvent *lifecycleEvent = NULL;
1020 virObjectLock(vm);
1021 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
1022 event = virDomainEventPMWakeupNewFromObj(vm);
1024 /* Don't set domain status back to running if it wasn't paused
1025 * from guest side, otherwise it can just cause confusion.
1027 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PMSUSPENDED) {
1028 VIR_DEBUG("Transitioned guest %s from pmsuspended to running "
1029 "state due to QMP wakeup event", vm->def->name);
1031 virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
1032 VIR_DOMAIN_RUNNING_WAKEUP);
1033 lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
1034 VIR_DOMAIN_EVENT_STARTED,
1035 VIR_DOMAIN_EVENT_STARTED_WAKEUP);
1036 qemuDomainSaveStatus(vm);
1039 virObjectUnlock(vm);
1040 virObjectEventStateQueue(driver->domainEventState, event);
1041 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1044 static void
1045 qemuProcessHandlePMSuspend(qemuMonitor *mon G_GNUC_UNUSED,
1046 virDomainObj *vm)
1048 virQEMUDriver *driver;
1049 virObjectEvent *event = NULL;
1050 virObjectEvent *lifecycleEvent = NULL;
1052 virObjectLock(vm);
1053 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
1054 event = virDomainEventPMSuspendNewFromObj(vm);
1056 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1057 qemuDomainObjPrivate *priv = vm->privateData;
1058 VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
1059 "QMP suspend event", vm->def->name);
1061 virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
1062 VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
1063 lifecycleEvent =
1064 virDomainEventLifecycleNewFromObj(vm,
1065 VIR_DOMAIN_EVENT_PMSUSPENDED,
1066 VIR_DOMAIN_EVENT_PMSUSPENDED_MEMORY);
1067 qemuDomainSaveStatus(vm);
1069 if (priv->agent)
1070 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1073 virObjectUnlock(vm);
1075 virObjectEventStateQueue(driver->domainEventState, event);
1076 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1079 static void
1080 qemuProcessHandleBalloonChange(qemuMonitor *mon G_GNUC_UNUSED,
1081 virDomainObj *vm,
1082 unsigned long long actual)
1084 virQEMUDriver *driver;
1085 virObjectEvent *event = NULL;
1086 size_t i;
1088 virObjectLock(vm);
1089 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
1090 event = virDomainEventBalloonChangeNewFromObj(vm, actual);
1092 /* We want the balloon size stored in domain definition to
1093 * account for the actual size of virtio-mem too. But the
1094 * balloon size as reported by QEMU (@actual) contains just
1095 * the balloon size without any virtio-mem. Do a wee bit of
1096 * math to fix it. */
1097 VIR_DEBUG("balloon size before fix is %lld", actual);
1098 for (i = 0; i < vm->def->nmems; i++) {
1099 if (vm->def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM)
1100 actual += vm->def->mems[i]->target.virtio_mem.currentsize;
1103 VIR_DEBUG("Updating balloon from %lld to %lld kb",
1104 vm->def->mem.cur_balloon, actual);
1105 vm->def->mem.cur_balloon = actual;
1107 qemuDomainSaveStatus(vm);
1108 virObjectUnlock(vm);
1110 virObjectEventStateQueue(driver->domainEventState, event);
1113 static void
1114 qemuProcessHandlePMSuspendDisk(qemuMonitor *mon G_GNUC_UNUSED,
1115 virDomainObj *vm)
1117 virQEMUDriver *driver;
1118 virObjectEvent *event = NULL;
1119 virObjectEvent *lifecycleEvent = NULL;
1121 virObjectLock(vm);
1122 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
1123 event = virDomainEventPMSuspendDiskNewFromObj(vm);
1125 if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1126 qemuDomainObjPrivate *priv = vm->privateData;
1127 VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
1128 "QMP suspend_disk event", vm->def->name);
1130 virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
1131 VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
1132 lifecycleEvent =
1133 virDomainEventLifecycleNewFromObj(vm,
1134 VIR_DOMAIN_EVENT_PMSUSPENDED,
1135 VIR_DOMAIN_EVENT_PMSUSPENDED_DISK);
1136 qemuDomainSaveStatus(vm);
1138 if (priv->agent)
1139 qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1142 virObjectUnlock(vm);
1144 virObjectEventStateQueue(driver->domainEventState, event);
1145 virObjectEventStateQueue(driver->domainEventState, lifecycleEvent);
1149 static void
1150 qemuProcessHandleGuestPanic(qemuMonitor *mon G_GNUC_UNUSED,
1151 virDomainObj *vm,
1152 qemuMonitorEventPanicInfo *info)
1154 virObjectLock(vm);
1156 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_GUESTPANIC,
1157 vm->def->onCrash, 0, info);
1159 virObjectUnlock(vm);
1163 void
1164 qemuProcessHandleDeviceDeleted(qemuMonitor *mon G_GNUC_UNUSED,
1165 virDomainObj *vm,
1166 const char *devAlias)
1168 virObjectLock(vm);
1170 VIR_DEBUG("Device %s removed from domain %p %s",
1171 devAlias, vm, vm->def->name);
1173 if (qemuDomainSignalDeviceRemoval(vm, devAlias,
1174 QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_OK))
1175 goto cleanup;
1177 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_DEVICE_DELETED,
1178 0, 0, g_strdup(devAlias));
1180 cleanup:
1181 virObjectUnlock(vm);
1185 static void
1186 qemuProcessHandleDeviceUnplugErr(qemuMonitor *mon G_GNUC_UNUSED,
1187 virDomainObj *vm,
1188 const char *devPath,
1189 const char *devAlias)
1191 virQEMUDriver *driver;
1192 virObjectEvent *event = NULL;
1194 virObjectLock(vm);
1195 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
1197 VIR_DEBUG("Device %s QOM path %s failed to be removed from domain %p %s",
1198 devAlias, devPath, vm, vm->def->name);
1201 * DEVICE_UNPLUG_GUEST_ERROR will always contain the QOM path
1202 * but QEMU will not guarantee that devAlias will be provided.
1204 * However, given that all Libvirt devices have a devAlias, we
1205 * can ignore the case where QEMU emitted this event without it.
1207 if (!devAlias)
1208 goto cleanup;
1210 qemuDomainSignalDeviceRemoval(vm, devAlias,
1211 QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_GUEST_REJECTED);
1213 event = virDomainEventDeviceRemovalFailedNewFromObj(vm, devAlias);
1215 cleanup:
1216 virObjectUnlock(vm);
1217 virObjectEventStateQueue(driver->domainEventState, event);
1223 * Meaning of fields reported by the event according to the ACPI standard:
1224 * @source:
1225 * 0x00 - 0xff: Notification values, as passed at the request time
1226 * 0x100: Operating System Shutdown Processing
1227 * 0x103: Ejection processing
1228 * 0x200: Insertion processing
1229 * other values are reserved
1231 * @status:
1232 * general values
1233 * 0x00: success
1234 * 0x01: non-specific failure
1235 * 0x02: unrecognized notify code
1236 * 0x03 - 0x7f: reserved
1237 * other values are specific to the notification type (see below)
1239 * for the 0x100 source the following additional codes are standardized:
1240 * 0x80: OS Shutdown request denied
1241 * 0x81: OS Shutdown in progress
1242 * 0x82: OS Shutdown completed
1243 * 0x83: OS Graceful shutdown not supported
1244 * other higher values are reserved
1246 * for the 0x003 (Ejection request) and 0x103 (Ejection processing) source
1247 * the following additional codes are standardized:
1248 * 0x80: Device ejection not supported by OSPM
1249 * 0x81: Device in use by application
1250 * 0x82: Device Busy
1251 * 0x83: Ejection dependency is busy or not supported for ejection by OSPM
1252 * 0x84: Ejection is in progress (pending)
1253 * other higher values are reserved
1255 * for the 0x200 source the following additional codes are standardized:
1256 * 0x80: Device insertion in progress (pending)
1257 * 0x81: Device driver load failure
1258 * 0x82: Device insertion not supported by OSPM
1259 * 0x83-0x8F: Reserved
1260 * 0x90-0x9F: Insertion failure - Resources Unavailable as described by the
1261 * following bit encodings:
1262 * Bit [3]: Bus or Segment Numbers
1263 * Bit [2]: Interrupts
1264 * Bit [1]: I/O
1265 * Bit [0]: Memory
1266 * other higher values are reserved
1268 * Other fields and semantics are specific to the qemu handling of the event.
1269 * - @alias may be NULL for successful unplug operations
1270 * - @slotType describes the device type a bit more closely, currently the
1271 * only known value is 'DIMM'
1272 * - @slot describes the specific device
1274 * Note that qemu does not emit the event for all the documented sources or
1275 * devices.
1277 static void
1278 qemuProcessHandleAcpiOstInfo(qemuMonitor *mon G_GNUC_UNUSED,
1279 virDomainObj *vm,
1280 const char *alias,
1281 const char *slotType,
1282 const char *slot,
1283 unsigned int source,
1284 unsigned int status)
1286 virQEMUDriver *driver;
1287 virObjectEvent *event = NULL;
1289 virObjectLock(vm);
1290 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
1292 VIR_DEBUG("ACPI OST info for device %s domain %p %s. "
1293 "slotType='%s' slot='%s' source=%u status=%u",
1294 NULLSTR(alias), vm, vm->def->name, slotType, slot, source, status);
1296 if (!alias)
1297 goto cleanup;
1299 if (STREQ(slotType, "DIMM")) {
1300 if ((source == 0x003 || source == 0x103) &&
1301 (status == 0x01 || (status >= 0x80 && status <= 0x83))) {
1302 qemuDomainSignalDeviceRemoval(vm, alias,
1303 QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_GUEST_REJECTED);
1305 event = virDomainEventDeviceRemovalFailedNewFromObj(vm, alias);
1309 cleanup:
1310 virObjectUnlock(vm);
1311 virObjectEventStateQueue(driver->domainEventState, event);
1315 static void
1316 qemuProcessHandleBlockThreshold(qemuMonitor *mon G_GNUC_UNUSED,
1317 virDomainObj *vm,
1318 const char *nodename,
1319 unsigned long long threshold,
1320 unsigned long long excess)
1322 qemuDomainObjPrivate *priv;
1323 virQEMUDriver *driver;
1324 virObjectEvent *eventSource = NULL;
1325 virObjectEvent *eventDevice = NULL;
1326 virDomainDiskDef *disk;
1327 virStorageSource *src;
1328 const char *path = NULL;
1330 virObjectLock(vm);
1332 priv = vm->privateData;
1333 driver = priv->driver;
1335 VIR_DEBUG("BLOCK_WRITE_THRESHOLD event for block node '%s' in domain %p %s:"
1336 "threshold '%llu' exceeded by '%llu'",
1337 nodename, vm, vm->def->name, threshold, excess);
1339 if ((disk = qemuDomainDiskLookupByNodename(vm->def, priv->backup, nodename, &src))) {
1340 if (virStorageSourceIsLocalStorage(src))
1341 path = src->path;
1343 if (src == disk->src &&
1344 !src->thresholdEventWithIndex) {
1345 g_autofree char *dev = qemuDomainDiskBackingStoreGetName(disk, 0);
1347 eventDevice = virDomainEventBlockThresholdNewFromObj(vm, dev, path,
1348 threshold, excess);
1351 if (src->id != 0) {
1352 g_autofree char *dev = qemuDomainDiskBackingStoreGetName(disk, src->id);
1354 eventSource = virDomainEventBlockThresholdNewFromObj(vm, dev, path,
1355 threshold, excess);
1359 virObjectUnlock(vm);
1360 virObjectEventStateQueue(driver->domainEventState, eventDevice);
1361 virObjectEventStateQueue(driver->domainEventState, eventSource);
1365 static void
1366 qemuProcessHandleNetdevStreamDisconnected(qemuMonitor *mon G_GNUC_UNUSED,
1367 virDomainObj *vm,
1368 const char *devAlias)
1370 virObjectLock(vm);
1372 VIR_DEBUG("Device %s Netdev Stream Disconnected in domain %p %s",
1373 devAlias, vm, vm->def->name);
1375 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NETDEV_STREAM_DISCONNECTED,
1376 0, 0, g_strdup(devAlias));
1378 virObjectUnlock(vm);
1382 static void
1383 qemuProcessHandleNicRxFilterChanged(qemuMonitor *mon G_GNUC_UNUSED,
1384 virDomainObj *vm,
1385 const char *devAlias)
1387 virObjectLock(vm);
1389 VIR_DEBUG("Device %s RX Filter changed in domain %p %s",
1390 devAlias, vm, vm->def->name);
1392 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED,
1393 0, 0, g_strdup(devAlias));
1395 virObjectUnlock(vm);
1399 static void
1400 qemuProcessHandleSerialChanged(qemuMonitor *mon G_GNUC_UNUSED,
1401 virDomainObj *vm,
1402 const char *devAlias,
1403 bool connected)
1405 virObjectLock(vm);
1407 VIR_DEBUG("Serial port %s state changed to '%d' in domain %p %s",
1408 devAlias, connected, vm, vm->def->name);
1410 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_SERIAL_CHANGED,
1411 connected, 0, g_strdup(devAlias));
1413 virObjectUnlock(vm);
1417 static void
1418 qemuProcessHandleSpiceMigrated(qemuMonitor *mon G_GNUC_UNUSED,
1419 virDomainObj *vm)
1421 qemuDomainJobPrivate *jobPriv;
1423 virObjectLock(vm);
1425 VIR_DEBUG("Spice migration completed for domain %p %s",
1426 vm, vm->def->name);
1428 jobPriv = vm->job->privateData;
1429 if (vm->job->asyncJob != VIR_ASYNC_JOB_MIGRATION_OUT) {
1430 VIR_DEBUG("got SPICE_MIGRATE_COMPLETED event without a migration job");
1431 goto cleanup;
1434 jobPriv->spiceMigrated = true;
1435 virDomainObjBroadcast(vm);
1437 cleanup:
1438 virObjectUnlock(vm);
1442 static void
1443 qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
1444 virDomainObj *vm,
1445 int status)
1447 qemuDomainObjPrivate *priv;
1448 qemuDomainJobDataPrivate *privJob = NULL;
1449 virQEMUDriver *driver;
1450 virObjectEvent *event = NULL;
1451 virDomainState state;
1452 int reason;
1454 virObjectLock(vm);
1456 VIR_DEBUG("Migration of domain %p %s changed state to %s",
1457 vm, vm->def->name,
1458 qemuMonitorMigrationStatusTypeToString(status));
1460 priv = vm->privateData;
1461 driver = priv->driver;
1463 if (vm->job->asyncJob == VIR_ASYNC_JOB_NONE) {
1464 VIR_DEBUG("got MIGRATION event without a migration job");
1465 goto cleanup;
1468 privJob = vm->job->current->privateData;
1470 privJob->stats.mig.status = status;
1471 virDomainObjBroadcast(vm);
1473 state = virDomainObjGetState(vm, &reason);
1475 switch ((qemuMonitorMigrationStatus) status) {
1476 case QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY:
1477 if (vm->job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT &&
1478 state == VIR_DOMAIN_PAUSED &&
1479 reason == VIR_DOMAIN_PAUSED_MIGRATION) {
1480 VIR_DEBUG("Correcting paused state reason for domain %s to %s",
1481 vm->def->name,
1482 virDomainPausedReasonTypeToString(VIR_DOMAIN_PAUSED_POSTCOPY));
1484 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY);
1485 event = virDomainEventLifecycleNewFromObj(vm,
1486 VIR_DOMAIN_EVENT_SUSPENDED,
1487 VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY);
1488 qemuDomainSaveStatus(vm);
1490 break;
1492 case QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY_PAUSED:
1493 if (vm->job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT &&
1494 state == VIR_DOMAIN_PAUSED) {
1495 /* At this point no thread is watching the migration progress on
1496 * the source as it is just waiting for the Finish phase to end.
1497 * Thus we need to handle the event here. */
1498 qemuMigrationSrcPostcopyFailed(vm);
1499 qemuDomainSaveStatus(vm);
1501 break;
1503 case QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY_RECOVER:
1504 if (virDomainObjIsFailedPostcopy(vm, vm->job)) {
1505 int eventType = -1;
1506 int eventDetail = -1;
1508 if (state == VIR_DOMAIN_PAUSED) {
1509 reason = VIR_DOMAIN_PAUSED_POSTCOPY;
1510 eventType = VIR_DOMAIN_EVENT_SUSPENDED;
1511 eventDetail = qemuDomainPausedReasonToSuspendedEvent(reason);
1512 } else {
1513 reason = VIR_DOMAIN_RUNNING_POSTCOPY;
1514 eventType = VIR_DOMAIN_EVENT_RESUMED;
1515 eventDetail = qemuDomainRunningReasonToResumeEvent(reason);
1518 VIR_DEBUG("Post-copy migration recovered; correcting state for domain '%s' to %s/%s",
1519 vm->def->name,
1520 virDomainStateTypeToString(state),
1521 NULLSTR(virDomainStateReasonToString(state, reason)));
1522 vm->job->asyncPaused = false;
1523 virDomainObjSetState(vm, state, reason);
1524 event = virDomainEventLifecycleNewFromObj(vm, eventType, eventDetail);
1525 qemuDomainSaveStatus(vm);
1527 break;
1529 case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED:
1530 /* A post-copy migration marked as failed when reconnecting to a domain
1531 * with running migration may actually still be running, but we're not
1532 * watching it in any thread. Let's make sure the migration is properly
1533 * finished in case we get a "completed" event.
1535 if (virDomainObjIsPostcopy(vm, vm->job) &&
1536 vm->job->phase == QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
1537 vm->job->asyncOwner == 0) {
1538 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
1539 vm->job->asyncJob, status, NULL);
1541 break;
1543 case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE:
1544 case QEMU_MONITOR_MIGRATION_STATUS_SETUP:
1545 case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE:
1546 case QEMU_MONITOR_MIGRATION_STATUS_PRE_SWITCHOVER:
1547 case QEMU_MONITOR_MIGRATION_STATUS_DEVICE:
1548 case QEMU_MONITOR_MIGRATION_STATUS_ERROR:
1549 case QEMU_MONITOR_MIGRATION_STATUS_CANCELLING:
1550 case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED:
1551 case QEMU_MONITOR_MIGRATION_STATUS_WAIT_UNPLUG:
1552 case QEMU_MONITOR_MIGRATION_STATUS_LAST:
1553 default:
1554 break;
1557 cleanup:
1558 virObjectUnlock(vm);
1559 virObjectEventStateQueue(driver->domainEventState, event);
1563 static void
1564 qemuProcessHandleMigrationPass(qemuMonitor *mon G_GNUC_UNUSED,
1565 virDomainObj *vm,
1566 int pass)
1568 qemuDomainObjPrivate *priv;
1570 virObjectLock(vm);
1572 VIR_DEBUG("Migrating domain %p %s, iteration %d",
1573 vm, vm->def->name, pass);
1575 priv = vm->privateData;
1576 if (vm->job->asyncJob == VIR_ASYNC_JOB_NONE) {
1577 VIR_DEBUG("got MIGRATION_PASS event without a migration job");
1578 goto cleanup;
1581 virObjectEventStateQueue(priv->driver->domainEventState,
1582 virDomainEventMigrationIterationNewFromObj(vm, pass));
1584 cleanup:
1585 virObjectUnlock(vm);
1589 static void
1590 qemuProcessHandleDumpCompleted(qemuMonitor *mon G_GNUC_UNUSED,
1591 virDomainObj *vm,
1592 int status,
1593 qemuMonitorDumpStats *stats,
1594 const char *error)
1596 qemuDomainJobPrivate *jobPriv;
1597 qemuDomainJobDataPrivate *privJobCurrent = NULL;
1599 virObjectLock(vm);
1601 VIR_DEBUG("Dump completed for domain %p %s with stats=%p error='%s'",
1602 vm, vm->def->name, stats, NULLSTR(error));
1604 jobPriv = vm->job->privateData;
1605 if (vm->job->asyncJob == VIR_ASYNC_JOB_NONE) {
1606 VIR_DEBUG("got DUMP_COMPLETED event without a dump_completed job");
1607 goto cleanup;
1609 privJobCurrent = vm->job->current->privateData;
1610 jobPriv->dumpCompleted = true;
1611 privJobCurrent->stats.dump = *stats;
1612 vm->job->error = g_strdup(error);
1614 /* Force error if extracting the DUMP_COMPLETED status failed */
1615 if (!error && status < 0) {
1616 vm->job->error = g_strdup(virGetLastErrorMessage());
1617 privJobCurrent->stats.dump.status = QEMU_MONITOR_DUMP_STATUS_FAILED;
1620 virDomainObjBroadcast(vm);
1622 cleanup:
1623 virResetLastError();
1624 virObjectUnlock(vm);
1628 static void
1629 qemuProcessHandlePRManagerStatusChanged(qemuMonitor *mon G_GNUC_UNUSED,
1630 virDomainObj *vm,
1631 const char *prManager,
1632 bool connected)
1634 qemuDomainObjPrivate *priv;
1635 const char *managedAlias = qemuDomainGetManagedPRAlias();
1637 virObjectLock(vm);
1639 VIR_DEBUG("pr-manager %s status changed for domain %p %s connected=%d",
1640 prManager, vm, vm->def->name, connected);
1642 /* Connect events are boring. */
1643 if (connected)
1644 goto cleanup;
1646 /* Disconnect events are more interesting. */
1648 if (STRNEQ(prManager, managedAlias)) {
1649 VIR_DEBUG("pr-manager %s not managed, ignoring event",
1650 prManager);
1651 goto cleanup;
1654 priv = vm->privateData;
1655 priv->prDaemonRunning = false;
1657 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_PR_DISCONNECT,
1658 0, 0, NULL);
1660 cleanup:
1661 virObjectUnlock(vm);
1665 static void
1666 qemuProcessHandleRdmaGidStatusChanged(qemuMonitor *mon G_GNUC_UNUSED,
1667 virDomainObj *vm,
1668 const char *netdev,
1669 bool gid_status,
1670 unsigned long long subnet_prefix,
1671 unsigned long long interface_id)
1673 qemuMonitorRdmaGidStatus *info = NULL;
1675 virObjectLock(vm);
1677 VIR_DEBUG("netdev=%s,gid_status=%d,subnet_prefix=0x%llx,interface_id=0x%llx",
1678 netdev, gid_status, subnet_prefix, interface_id);
1680 info = g_new0(qemuMonitorRdmaGidStatus, 1);
1682 info->netdev = g_strdup(netdev);
1684 info->gid_status = gid_status;
1685 info->subnet_prefix = subnet_prefix;
1686 info->interface_id = interface_id;
1688 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED,
1689 0, 0, info);
1691 virObjectUnlock(vm);
1695 static void
1696 qemuProcessHandleGuestCrashloaded(qemuMonitor *mon G_GNUC_UNUSED,
1697 virDomainObj *vm)
1699 virObjectLock(vm);
1701 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_GUEST_CRASHLOADED,
1702 0, 0, NULL);
1704 virObjectUnlock(vm);
1708 static void
1709 qemuProcessHandleMemoryFailure(qemuMonitor *mon G_GNUC_UNUSED,
1710 virDomainObj *vm,
1711 qemuMonitorEventMemoryFailure *mfp)
1713 virQEMUDriver *driver;
1714 virObjectEvent *event = NULL;
1715 virDomainMemoryFailureRecipientType recipient;
1716 virDomainMemoryFailureActionType action;
1717 unsigned int flags = 0;
1719 virObjectLock(vm);
1720 driver = QEMU_DOMAIN_PRIVATE(vm)->driver;
1722 switch (mfp->recipient) {
1723 case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR:
1724 recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR;
1725 break;
1726 case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST:
1727 recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST;
1728 break;
1729 case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST:
1730 default:
1731 return;
1734 switch (mfp->action) {
1735 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE:
1736 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE;
1737 break;
1738 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT:
1739 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT;
1740 break;
1741 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL:
1742 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL;
1743 break;
1744 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET:
1745 action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET;
1746 break;
1747 case QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST:
1748 default:
1749 return;
1752 if (mfp->action_required)
1753 flags |= VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED;
1754 if (mfp->recursive)
1755 flags |= VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE;
1757 event = virDomainEventMemoryFailureNewFromObj(vm, recipient, action, flags);
1759 virObjectUnlock(vm);
1761 virObjectEventStateQueue(driver->domainEventState, event);
1765 static void
1766 qemuProcessHandleMemoryDeviceSizeChange(qemuMonitor *mon G_GNUC_UNUSED,
1767 virDomainObj *vm,
1768 const char *devAlias,
1769 unsigned long long size)
1771 qemuMonitorMemoryDeviceSizeChange *info = NULL;
1773 virObjectLock(vm);
1775 VIR_DEBUG("Memory device '%s' changed size to '%llu' in domain '%s'",
1776 devAlias, size, vm->def->name);
1778 info = g_new0(qemuMonitorMemoryDeviceSizeChange, 1);
1779 info->devAlias = g_strdup(devAlias);
1780 info->size = size;
1782 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE,
1783 0, 0, info);
1785 virObjectUnlock(vm);
1789 static qemuMonitorCallbacks monitorCallbacks = {
1790 .eofNotify = qemuProcessHandleMonitorEOF,
1791 .errorNotify = qemuProcessHandleMonitorError,
1792 .domainEvent = qemuProcessHandleEvent,
1793 .domainShutdown = qemuProcessHandleShutdown,
1794 .domainStop = qemuProcessHandleStop,
1795 .domainResume = qemuProcessHandleResume,
1796 .domainReset = qemuProcessHandleReset,
1797 .domainRTCChange = qemuProcessHandleRTCChange,
1798 .domainWatchdog = qemuProcessHandleWatchdog,
1799 .domainIOError = qemuProcessHandleIOError,
1800 .domainGraphics = qemuProcessHandleGraphics,
1801 .jobStatusChange = qemuProcessHandleJobStatusChange,
1802 .domainTrayChange = qemuProcessHandleTrayChange,
1803 .domainPMWakeup = qemuProcessHandlePMWakeup,
1804 .domainPMSuspend = qemuProcessHandlePMSuspend,
1805 .domainBalloonChange = qemuProcessHandleBalloonChange,
1806 .domainPMSuspendDisk = qemuProcessHandlePMSuspendDisk,
1807 .domainGuestPanic = qemuProcessHandleGuestPanic,
1808 .domainDeviceDeleted = qemuProcessHandleDeviceDeleted,
1809 .domainNicRxFilterChanged = qemuProcessHandleNicRxFilterChanged,
1810 .domainSerialChange = qemuProcessHandleSerialChanged,
1811 .domainSpiceMigrated = qemuProcessHandleSpiceMigrated,
1812 .domainMigrationStatus = qemuProcessHandleMigrationStatus,
1813 .domainMigrationPass = qemuProcessHandleMigrationPass,
1814 .domainAcpiOstInfo = qemuProcessHandleAcpiOstInfo,
1815 .domainBlockThreshold = qemuProcessHandleBlockThreshold,
1816 .domainDumpCompleted = qemuProcessHandleDumpCompleted,
1817 .domainPRManagerStatusChanged = qemuProcessHandlePRManagerStatusChanged,
1818 .domainRdmaGidStatusChanged = qemuProcessHandleRdmaGidStatusChanged,
1819 .domainGuestCrashloaded = qemuProcessHandleGuestCrashloaded,
1820 .domainMemoryFailure = qemuProcessHandleMemoryFailure,
1821 .domainMemoryDeviceSizeChange = qemuProcessHandleMemoryDeviceSizeChange,
1822 .domainDeviceUnplugError = qemuProcessHandleDeviceUnplugErr,
1823 .domainNetdevStreamDisconnected = qemuProcessHandleNetdevStreamDisconnected,
1826 static void
1827 qemuProcessMonitorReportLogError(qemuMonitor *mon,
1828 const char *msg,
1829 void *opaque);
1832 static void
1833 qemuProcessMonitorLogFree(void *opaque)
1835 qemuLogContext *logCtxt = opaque;
1836 g_clear_object(&logCtxt);
1840 static int
1841 qemuProcessInitMonitor(virDomainObj *vm,
1842 virDomainAsyncJob asyncJob)
1844 int ret;
1846 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
1847 return -1;
1849 ret = qemuMonitorSetCapabilities(QEMU_DOMAIN_PRIVATE(vm)->mon);
1851 qemuDomainObjExitMonitor(vm);
1853 return ret;
1857 static int
1858 qemuConnectMonitor(virQEMUDriver *driver,
1859 virDomainObj *vm,
1860 int asyncJob,
1861 qemuLogContext *logCtxt,
1862 bool reconnect)
1864 qemuDomainObjPrivate *priv = vm->privateData;
1865 qemuMonitor *mon = NULL;
1867 if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) {
1868 VIR_ERROR(_("Failed to set security context for monitor for %1$s"),
1869 vm->def->name);
1870 return -1;
1873 ignore_value(virTimeMillisNow(&priv->monStart));
1875 mon = qemuMonitorOpen(vm,
1876 priv->monConfig,
1877 virEventThreadGetContext(priv->eventThread),
1878 &monitorCallbacks);
1880 if (mon && logCtxt) {
1881 g_object_ref(logCtxt);
1882 qemuMonitorSetDomainLog(mon,
1883 qemuProcessMonitorReportLogError,
1884 logCtxt,
1885 qemuProcessMonitorLogFree);
1888 priv->monStart = 0;
1889 priv->mon = mon;
1891 if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) {
1892 VIR_ERROR(_("Failed to clear security context for monitor for %1$s"),
1893 vm->def->name);
1894 return -1;
1897 if (priv->mon == NULL) {
1898 VIR_INFO("Failed to connect monitor for %s", vm->def->name);
1899 return -1;
1902 if (qemuProcessInitMonitor(vm, asyncJob) < 0)
1903 return -1;
1905 if (qemuMigrationCapsCheck(vm, asyncJob, reconnect) < 0)
1906 return -1;
1908 return 0;
1912 static int
1913 qemuProcessReportLogError(qemuLogContext *logCtxt,
1914 const char *msgprefix)
1916 g_autofree char *logmsg = NULL;
1918 /* assume that 1024 chars of qemu log is the right balance */
1919 if (qemuLogContextReadFiltered(logCtxt, &logmsg, 1024) < 0)
1920 return -1;
1922 virResetLastError();
1923 if (virStringIsEmpty(logmsg))
1924 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", msgprefix);
1925 else
1926 virReportError(VIR_ERR_INTERNAL_ERROR, "%s: %s", /* _( silence sc_libvirt_unmarked_diagnostics */
1927 msgprefix, logmsg);
1929 return 0;
1933 static void
1934 qemuProcessMonitorReportLogError(qemuMonitor *mon G_GNUC_UNUSED,
1935 const char *msg,
1936 void *opaque)
1938 qemuLogContext *logCtxt = opaque;
1939 qemuProcessReportLogError(logCtxt, msg);
1943 static int
1944 qemuProcessLookupPTYs(virDomainChrDef **devices,
1945 int count,
1946 GHashTable *info)
1948 size_t i;
1950 for (i = 0; i < count; i++) {
1951 g_autofree char *id = NULL;
1952 virDomainChrDef *chr = devices[i];
1953 if (chr->source->type == VIR_DOMAIN_CHR_TYPE_PTY) {
1954 qemuMonitorChardevInfo *entry;
1956 id = g_strdup_printf("char%s", chr->info.alias);
1958 entry = virHashLookup(info, id);
1959 if (!entry || !entry->ptyPath) {
1960 if (chr->source->data.file.path == NULL) {
1961 /* neither the log output nor 'info chardev' had a
1962 * pty path for this chardev, report an error
1964 virReportError(VIR_ERR_INTERNAL_ERROR,
1965 _("no assigned pty for device %1$s"), id);
1966 return -1;
1967 } else {
1968 /* 'info chardev' had no pty path for this chardev,
1969 * but the log output had, so we're fine
1971 continue;
1975 g_free(chr->source->data.file.path);
1976 chr->source->data.file.path = g_strdup(entry->ptyPath);
1980 return 0;
1983 static int
1984 qemuProcessFindCharDevicePTYsMonitor(virDomainObj *vm,
1985 GHashTable *info)
1987 size_t i = 0;
1989 if (qemuProcessLookupPTYs(vm->def->serials, vm->def->nserials, info) < 0)
1990 return -1;
1992 if (qemuProcessLookupPTYs(vm->def->parallels, vm->def->nparallels,
1993 info) < 0)
1994 return -1;
1996 if (qemuProcessLookupPTYs(vm->def->channels, vm->def->nchannels, info) < 0)
1997 return -1;
1998 /* For historical reasons, console[0] can be just an alias
1999 * for serial[0]. That's why we need to update it as well. */
2000 if (vm->def->nconsoles) {
2001 virDomainChrDef *chr = vm->def->consoles[0];
2003 if (vm->def->nserials &&
2004 chr->deviceType == VIR_DOMAIN_CHR_DEVICE_TYPE_CONSOLE &&
2005 chr->targetType == VIR_DOMAIN_CHR_CONSOLE_TARGET_TYPE_SERIAL) {
2006 /* yes, the first console is just an alias for serials[0] */
2007 i = 1;
2008 virDomainChrSourceDefCopy(chr->source,
2009 ((vm->def->serials[0])->source));
2013 if (qemuProcessLookupPTYs(vm->def->consoles + i, vm->def->nconsoles - i,
2014 info) < 0)
2015 return -1;
2017 return 0;
2021 static void
2022 qemuProcessRefreshChannelVirtioState(virQEMUDriver *driver,
2023 virDomainObj *vm,
2024 GHashTable *info,
2025 int booted)
2027 size_t i;
2028 int agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL;
2029 qemuMonitorChardevInfo *entry;
2030 virObjectEvent *event = NULL;
2031 g_autofree char *id = NULL;
2033 if (booted)
2034 agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED;
2036 for (i = 0; i < vm->def->nchannels; i++) {
2037 virDomainChrDef *chr = vm->def->channels[i];
2038 if (chr->targetType == VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_VIRTIO) {
2040 VIR_FREE(id);
2041 id = g_strdup_printf("char%s", chr->info.alias);
2043 /* port state not reported */
2044 if (!(entry = virHashLookup(info, id)) ||
2045 !entry->state)
2046 continue;
2048 if (entry->state != VIR_DOMAIN_CHR_DEVICE_STATE_DEFAULT &&
2049 STREQ_NULLABLE(chr->target.name, "org.qemu.guest_agent.0") &&
2050 (event = virDomainEventAgentLifecycleNewFromObj(vm, entry->state,
2051 agentReason)))
2052 virObjectEventStateQueue(driver->domainEventState, event);
2054 chr->state = entry->state;
2061 qemuRefreshVirtioChannelState(virQEMUDriver *driver,
2062 virDomainObj *vm,
2063 virDomainAsyncJob asyncJob)
2065 qemuDomainObjPrivate *priv = vm->privateData;
2066 g_autoptr(GHashTable) info = NULL;
2067 int rc;
2069 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
2070 return -1;
2072 rc = qemuMonitorGetChardevInfo(priv->mon, &info);
2073 qemuDomainObjExitMonitor(vm);
2075 if (rc < 0)
2076 return -1;
2078 qemuProcessRefreshChannelVirtioState(driver, vm, info, false);
2080 return 0;
2084 static int
2085 qemuProcessRefreshPRManagerState(virDomainObj *vm,
2086 GHashTable *info)
2088 qemuDomainObjPrivate *priv = vm->privateData;
2089 qemuMonitorPRManagerInfo *prManagerInfo;
2090 const char *managedAlias = qemuDomainGetManagedPRAlias();
2092 if (!(prManagerInfo = virHashLookup(info, managedAlias))) {
2093 virReportError(VIR_ERR_OPERATION_FAILED,
2094 _("missing info on pr-manager %1$s"),
2095 managedAlias);
2096 return -1;
2099 priv->prDaemonRunning = prManagerInfo->connected;
2101 if (!priv->prDaemonRunning &&
2102 qemuProcessStartManagedPRDaemon(vm) < 0)
2103 return -1;
2105 return 0;
2109 static int
2110 qemuRefreshPRManagerState(virDomainObj *vm)
2112 qemuDomainObjPrivate *priv = vm->privateData;
2113 g_autoptr(GHashTable) info = NULL;
2114 int rc;
2116 if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PR_MANAGER_HELPER) ||
2117 !qemuDomainDefHasManagedPR(vm))
2118 return 0;
2120 qemuDomainObjEnterMonitor(vm);
2121 rc = qemuMonitorGetPRManagerInfo(priv->mon, &info);
2122 qemuDomainObjExitMonitor(vm);
2124 if (rc < 0)
2125 return -1;
2128 return qemuProcessRefreshPRManagerState(vm, info);
2132 static int
2133 qemuProcessRefreshFdsetIndex(virDomainObj *vm)
2135 qemuDomainObjPrivate *priv = vm->privateData;
2136 g_autoptr(qemuMonitorFdsets) fdsets = NULL;
2137 size_t i;
2138 int rc;
2140 /* if the previous index was in the status XML we don't need to update it */
2141 if (priv->fdsetindexParsed)
2142 return 0;
2144 qemuDomainObjEnterMonitor(vm);
2145 rc = qemuMonitorQueryFdsets(priv->mon, &fdsets);
2146 qemuDomainObjExitMonitor(vm);
2148 if (rc < 0)
2149 return -1;
2151 for (i = 0; i < fdsets->nfdsets; i++) {
2152 if (fdsets->fdsets[i].id >= priv->fdsetindex)
2153 priv->fdsetindex = fdsets->fdsets[i].id + 1;
2156 return 0;
2160 static void
2161 qemuRefreshRTC(virDomainObj *vm)
2163 qemuDomainObjPrivate *priv = vm->privateData;
2164 time_t now, then;
2165 struct tm thenbits = { 0 };
2166 long localOffset;
2167 int rv;
2169 if (vm->def->clock.offset != VIR_DOMAIN_CLOCK_OFFSET_VARIABLE)
2170 return;
2172 qemuDomainObjEnterMonitor(vm);
2173 now = time(NULL);
2174 rv = qemuMonitorGetRTCTime(priv->mon, &thenbits);
2175 qemuDomainObjExitMonitor(vm);
2177 if (rv < 0)
2178 return;
2180 thenbits.tm_isdst = -1;
2181 if ((then = mktime(&thenbits)) == (time_t)-1) {
2182 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2183 _("Unable to convert time"));
2184 return;
2187 /* Thing is, @now is in local TZ but @then in UTC. */
2188 if (virTimeLocalOffsetFromUTC(&localOffset) < 0)
2189 return;
2191 vm->def->clock.data.variable.adjustment = then - now + localOffset;
2195 qemuProcessRefreshBalloonState(virDomainObj *vm,
2196 int asyncJob)
2198 unsigned long long balloon;
2199 size_t i;
2200 int rc;
2202 /* if no ballooning is available, the current size equals to the current
2203 * full memory size */
2204 if (!virDomainDefHasMemballoon(vm->def)) {
2205 vm->def->mem.cur_balloon = virDomainDefGetMemoryTotal(vm->def);
2206 return 0;
2209 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
2210 return -1;
2212 rc = qemuMonitorGetBalloonInfo(qemuDomainGetMonitor(vm), &balloon);
2213 qemuDomainObjExitMonitor(vm);
2214 if (rc < 0)
2215 return -1;
2217 /* We want the balloon size stored in domain definition to
2218 * account for the actual size of virtio-mem too. But the
2219 * balloon size as reported by QEMU (@balloon) contains just
2220 * the balloon size without any virtio-mem. Do a wee bit of
2221 * math to fix it. */
2222 VIR_DEBUG("balloon size before fix is %lld", balloon);
2223 for (i = 0; i < vm->def->nmems; i++) {
2224 if (vm->def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM)
2225 balloon += vm->def->mems[i]->target.virtio_mem.currentsize;
2227 VIR_DEBUG("Updating balloon from %lld to %lld kb",
2228 vm->def->mem.cur_balloon, balloon);
2229 vm->def->mem.cur_balloon = balloon;
2231 return 0;
2235 static int
2236 qemuProcessWaitForMonitor(virQEMUDriver *driver,
2237 virDomainObj *vm,
2238 int asyncJob,
2239 qemuLogContext *logCtxt)
2241 int ret = -1;
2242 g_autoptr(GHashTable) info = NULL;
2243 qemuDomainObjPrivate *priv = vm->privateData;
2245 VIR_DEBUG("Connect monitor to vm=%p name='%s'", vm, vm->def->name);
2247 if (qemuConnectMonitor(driver, vm, asyncJob, logCtxt, false) < 0)
2248 goto cleanup;
2250 /* Try to get the pty path mappings again via the monitor. This is much more
2251 * reliable if it's available.
2252 * Note that the monitor itself can be on a pty, so we still need to try the
2253 * log output method. */
2254 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
2255 goto cleanup;
2256 ret = qemuMonitorGetChardevInfo(priv->mon, &info);
2257 VIR_DEBUG("qemuMonitorGetChardevInfo returned %i", ret);
2258 qemuDomainObjExitMonitor(vm);
2260 if (ret == 0) {
2261 if ((ret = qemuProcessFindCharDevicePTYsMonitor(vm, info)) < 0)
2262 goto cleanup;
2264 qemuProcessRefreshChannelVirtioState(driver, vm, info, true);
2267 cleanup:
2268 if (logCtxt && kill(vm->pid, 0) == -1 && errno == ESRCH) {
2269 qemuProcessReportLogError(logCtxt,
2270 _("process exited while connecting to monitor"));
2271 ret = -1;
2274 return ret;
2278 static int
2279 qemuProcessDetectIOThreadPIDs(virDomainObj *vm,
2280 int asyncJob)
2282 qemuDomainObjPrivate *priv = vm->privateData;
2283 qemuMonitorIOThreadInfo **iothreads = NULL;
2284 int niothreads = 0;
2285 int ret = -1;
2286 size_t i;
2288 /* Get the list of IOThreads from qemu */
2289 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
2290 goto cleanup;
2291 ret = qemuMonitorGetIOThreads(priv->mon, &iothreads, &niothreads);
2292 qemuDomainObjExitMonitor(vm);
2293 if (ret < 0)
2294 goto cleanup;
2296 if (niothreads != vm->def->niothreadids) {
2297 virReportError(VIR_ERR_INTERNAL_ERROR,
2298 _("got wrong number of IOThread pids from QEMU monitor. got %1$d, wanted %2$zu"),
2299 niothreads, vm->def->niothreadids);
2300 goto cleanup;
2303 /* Nothing to do */
2304 if (niothreads == 0) {
2305 ret = 0;
2306 goto cleanup;
2309 for (i = 0; i < niothreads; i++) {
2310 virDomainIOThreadIDDef *iothrid;
2312 if (!(iothrid = virDomainIOThreadIDFind(vm->def,
2313 iothreads[i]->iothread_id))) {
2314 virReportError(VIR_ERR_INTERNAL_ERROR,
2315 _("iothread %1$d not found"),
2316 iothreads[i]->iothread_id);
2317 goto cleanup;
2319 iothrid->thread_id = iothreads[i]->thread_id;
2322 ret = 0;
2324 cleanup:
2325 if (iothreads) {
2326 for (i = 0; i < niothreads; i++)
2327 VIR_FREE(iothreads[i]);
2328 VIR_FREE(iothreads);
2330 return ret;
2334 static int
2335 qemuProcessGetAllCpuAffinity(virBitmap **cpumapRet)
2337 g_autoptr(virBitmap) isolCpus = NULL;
2339 *cpumapRet = NULL;
2341 if (!virHostCPUHasBitmap())
2342 return 0;
2344 if (!(*cpumapRet = virHostCPUGetOnlineBitmap()))
2345 return -1;
2347 if (virHostCPUGetIsolated(&isolCpus) < 0)
2348 return -1;
2350 if (isolCpus) {
2351 g_autofree char *isolCpusStr = virBitmapFormat(isolCpus);
2352 g_autofree char *cpumapRetStr = virBitmapFormat(*cpumapRet);
2354 VIR_INFO("Subtracting isolated CPUs %1$s from online CPUs %2$s",
2355 isolCpusStr, cpumapRetStr);
2357 virBitmapSubtract(*cpumapRet, isolCpus);
2360 return 0;
2365 * To be run between fork/exec of QEMU only
2367 #if defined(WITH_SCHED_GETAFFINITY) || defined(WITH_BSD_CPU_AFFINITY)
2368 static int
2369 qemuProcessInitCpuAffinity(virDomainObj *vm)
2371 bool settingAll = false;
2372 g_autoptr(virBitmap) cpumapToSet = NULL;
2373 virDomainNumatuneMemMode mem_mode;
2374 qemuDomainObjPrivate *priv = vm->privateData;
2376 if (!vm->pid) {
2377 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2378 _("Cannot setup CPU affinity until process is started"));
2379 return -1;
2382 /* Here is the deal, we can't set cpuset.mems before qemu is
2383 * started as it clashes with KVM allocation. Therefore, we
2384 * used to let qemu allocate its memory anywhere as we would
2385 * then move the memory to desired NUMA node via CGroups.
2386 * However, that might not be always possible because qemu
2387 * might lock some parts of its memory (e.g. due to VFIO).
2388 * Even if it possible, memory has to be copied between NUMA
2389 * nodes which is suboptimal.
2390 * Solution is to set affinity that matches the best what we
2391 * would have set in CGroups and then fix it later, once qemu
2392 * is already running. */
2393 if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 &&
2394 virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
2395 mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
2396 virBitmap *nodeset = NULL;
2398 if (virDomainNumatuneMaybeGetNodeset(vm->def->numa,
2399 priv->autoNodeset,
2400 &nodeset,
2401 -1) < 0)
2402 return -1;
2404 if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0)
2405 return -1;
2406 } else if (vm->def->cputune.emulatorpin) {
2407 cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin);
2408 } else {
2409 settingAll = true;
2410 if (qemuProcessGetAllCpuAffinity(&cpumapToSet) < 0)
2411 return -1;
2415 * We only want to error out if we failed to set the affinity to
2416 * user-requested mapping. If we are just trying to reset the affinity
2417 * to all CPUs and this fails it can only be an issue if:
2418 * 1) libvirtd does not have CAP_SYS_NICE
2419 * 2) libvirtd does not run on all CPUs
2421 * This scenario can easily occur when libvirtd is run inside a
2422 * container with restrictive permissions and CPU pinning.
2424 * See also: https://bugzilla.redhat.com/1819801#c2
2426 if (cpumapToSet &&
2427 virProcessSetAffinity(vm->pid, cpumapToSet, settingAll) < 0) {
2428 return -1;
2431 return 0;
2433 #else /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
2434 static int
2435 qemuProcessInitCpuAffinity(virDomainObj *vm G_GNUC_UNUSED)
2437 return 0;
2439 #endif /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
2441 /* set link states to down on interfaces at qemu start */
2442 static int
2443 qemuProcessSetLinkStates(virDomainObj *vm,
2444 virDomainAsyncJob asyncJob)
2446 qemuDomainObjPrivate *priv = vm->privateData;
2447 virDomainDef *def = vm->def;
2448 size_t i;
2449 int ret = -1;
2450 int rv;
2452 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
2453 return -1;
2455 for (i = 0; i < def->nnets; i++) {
2456 if (def->nets[i]->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN) {
2457 if (!def->nets[i]->info.alias) {
2458 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2459 _("missing alias for network device"));
2460 goto cleanup;
2463 VIR_DEBUG("Setting link state: %s", def->nets[i]->info.alias);
2465 rv = qemuMonitorSetLink(priv->mon,
2466 def->nets[i]->info.alias,
2467 VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN);
2468 if (rv < 0) {
2469 virReportError(VIR_ERR_OPERATION_FAILED,
2470 _("Couldn't set link state on interface: %1$s"),
2471 def->nets[i]->info.alias);
2472 goto cleanup;
2477 ret = 0;
2479 cleanup:
2480 qemuDomainObjExitMonitor(vm);
2481 return ret;
2486 * qemuProcessSetupPid:
2488 * This function sets resource properties (affinity, cgroups,
2489 * scheduler) for any PID associated with a domain. It should be used
2490 * to set up emulator PIDs as well as vCPU and I/O thread pids to
2491 * ensure they are all handled the same way.
2493 * Returns 0 on success, -1 on error.
2495 static int
2496 qemuProcessSetupPid(virDomainObj *vm,
2497 pid_t pid,
2498 virCgroupThreadName nameval,
2499 int id,
2500 virBitmap *cpumask,
2501 unsigned long long period,
2502 long long quota,
2503 virDomainThreadSchedParam *sched)
2505 qemuDomainObjPrivate *priv = vm->privateData;
2506 virDomainNuma *numatune = vm->def->numa;
2507 virDomainNumatuneMemMode mem_mode;
2508 virCgroup *cgroup = NULL;
2509 virBitmap *use_cpumask = NULL;
2510 virBitmap *affinity_cpumask = NULL;
2511 g_autoptr(virBitmap) hostcpumap = NULL;
2512 g_autofree char *mem_mask = NULL;
2513 int ret = -1;
2514 size_t i;
2516 if ((period || quota) &&
2517 !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
2518 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
2519 _("cgroup cpu is required for scheduler tuning"));
2520 goto cleanup;
2523 /* Infer which cpumask shall be used. */
2524 if (!(use_cpumask = qemuDomainEvaluateCPUMask(vm->def,
2525 cpumask, priv->autoCpuset))) {
2526 /* You may think this is redundant, but we can't assume libvirtd
2527 * itself is running on all pCPUs, so we need to explicitly set
2528 * the spawned QEMU instance to all pCPUs if no map is given in
2529 * its config file */
2530 if (qemuProcessGetAllCpuAffinity(&hostcpumap) < 0)
2531 goto cleanup;
2532 affinity_cpumask = hostcpumap;
2536 * If CPU cgroup controller is not initialized here, then we need
2537 * neither period nor quota settings. And if CPUSET controller is
2538 * not initialized either, then there's nothing to do anyway.
2540 if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) ||
2541 virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
2543 if (virDomainNumatuneGetMode(numatune, -1, &mem_mode) == 0) {
2544 /* QEMU allocates its memory from the emulator thread. Thus it
2545 * needs to access union of all host nodes configured. */
2546 if (mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
2547 qemuDomainNumatuneMaybeFormatNodesetUnion(vm, NULL, &mem_mask);
2548 } else if (mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE &&
2549 virDomainNumatuneMaybeFormatNodeset(numatune,
2550 priv->autoNodeset,
2551 &mem_mask, -1) < 0) {
2552 goto cleanup;
2556 /* For restrictive numatune mode we need to set cpuset.mems for vCPU
2557 * threads based on the node they are in as there is nothing else uses
2558 * for such restriction (e.g. numa_set_membind). */
2559 if (nameval == VIR_CGROUP_THREAD_VCPU) {
2560 /* Look for the guest NUMA node of this vCPU */
2561 for (i = 0; i < virDomainNumaGetNodeCount(numatune); i++) {
2562 virBitmap *node_cpus = virDomainNumaGetNodeCpumask(numatune, i);
2564 if (!virBitmapIsBitSet(node_cpus, id))
2565 continue;
2567 /* Update the mem_mask for this vCPU if the mode of its node is
2568 * 'restrictive'. */
2569 if (virDomainNumatuneGetMode(numatune, i, &mem_mode) == 0 &&
2570 mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {
2571 VIR_FREE(mem_mask);
2573 if (virDomainNumatuneMaybeFormatNodeset(numatune,
2574 priv->autoNodeset,
2575 &mem_mask, i) < 0) {
2576 goto cleanup;
2580 break;
2584 if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0)
2585 goto cleanup;
2587 /* Move the thread to the sub dir before changing the settings so that
2588 * all take effect even with cgroupv2. */
2589 if (virCgroupAddThread(cgroup, pid) < 0)
2590 goto cleanup;
2592 if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
2593 if (use_cpumask &&
2594 virDomainCgroupSetupCpusetCpus(cgroup, use_cpumask) < 0)
2595 goto cleanup;
2597 if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
2598 goto cleanup;
2602 if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
2603 goto cleanup;
2606 if (!affinity_cpumask)
2607 affinity_cpumask = use_cpumask;
2609 /* Setup legacy affinity.
2611 * We only want to error out if we failed to set the affinity to
2612 * user-requested mapping. If we are just trying to reset the affinity
2613 * to all CPUs and this fails it can only be an issue if:
2614 * 1) libvirtd does not have CAP_SYS_NICE
2615 * 2) libvirtd does not run on all CPUs
2617 * This scenario can easily occur when libvirtd is run inside a
2618 * container with restrictive permissions and CPU pinning.
2620 * See also: https://bugzilla.redhat.com/1819801#c2
2622 if (affinity_cpumask &&
2623 virProcessSetAffinity(pid, affinity_cpumask,
2624 affinity_cpumask == hostcpumap) < 0) {
2625 goto cleanup;
2628 /* Set scheduler type and priority, but not for the main thread. */
2629 if (sched &&
2630 nameval != VIR_CGROUP_THREAD_EMULATOR &&
2631 virProcessSetScheduler(pid, sched->policy, sched->priority) < 0)
2632 goto cleanup;
2634 ret = 0;
2635 cleanup:
2636 if (cgroup) {
2637 if (ret < 0)
2638 virCgroupRemove(cgroup);
2639 virCgroupFree(cgroup);
2642 return ret;
2647 qemuProcessSetupEmulator(virDomainObj *vm)
2649 return qemuProcessSetupPid(vm, vm->pid, VIR_CGROUP_THREAD_EMULATOR,
2650 0, vm->def->cputune.emulatorpin,
2651 vm->def->cputune.emulator_period,
2652 vm->def->cputune.emulator_quota,
2653 vm->def->cputune.emulatorsched);
2657 static int
2658 qemuProcessResctrlCreate(virQEMUDriver *driver,
2659 virDomainObj *vm)
2661 size_t i = 0;
2662 g_autoptr(virCaps) caps = NULL;
2663 qemuDomainObjPrivate *priv = vm->privateData;
2665 if (!vm->def->nresctrls)
2666 return 0;
2668 /* Force capability refresh since resctrl info can change
2669 * XXX: move cache info into virresctrl so caps are not needed */
2670 caps = virQEMUDriverGetCapabilities(driver, true);
2671 if (!caps)
2672 return -1;
2674 for (i = 0; i < vm->def->nresctrls; i++) {
2675 size_t j = 0;
2676 if (virResctrlAllocCreate(caps->host.resctrl,
2677 vm->def->resctrls[i]->alloc,
2678 priv->machineName) < 0)
2679 return -1;
2681 for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
2682 virDomainResctrlMonDef *mon = NULL;
2684 mon = vm->def->resctrls[i]->monitors[j];
2685 if (virResctrlMonitorCreate(mon->instance,
2686 priv->machineName) < 0)
2687 return -1;
2691 return 0;
2695 static char *
2696 qemuProcessBuildPRHelperPidfilePathOld(virDomainObj *vm)
2698 qemuDomainObjPrivate *priv = vm->privateData;
2699 const char *prdAlias = qemuDomainGetManagedPRAlias();
2701 return virPidFileBuildPath(priv->libDir, prdAlias);
2705 static char *
2706 qemuProcessBuildPRHelperPidfilePath(virDomainObj *vm)
2708 qemuDomainObjPrivate *priv = vm->privateData;
2709 g_autofree char *domname = virDomainDefGetShortName(vm->def);
2710 g_autofree char *prdName = g_strdup_printf("%s-%s", domname, qemuDomainGetManagedPRAlias());
2711 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
2713 return virPidFileBuildPath(cfg->stateDir, prdName);
2717 void
2718 qemuProcessKillManagedPRDaemon(virDomainObj *vm)
2720 qemuDomainObjPrivate *priv = vm->privateData;
2721 virErrorPtr orig_err;
2722 g_autofree char *pidfile = NULL;
2724 if (!(pidfile = qemuProcessBuildPRHelperPidfilePath(vm))) {
2725 VIR_WARN("Unable to construct pr-helper pidfile path");
2726 return;
2729 if (!virFileExists(pidfile)) {
2730 g_free(pidfile);
2731 if (!(pidfile = qemuProcessBuildPRHelperPidfilePathOld(vm))) {
2732 VIR_WARN("Unable to construct pr-helper pidfile path");
2733 return;
2737 virErrorPreserveLast(&orig_err);
2738 if (virPidFileForceCleanupPath(pidfile) < 0) {
2739 VIR_WARN("Unable to kill pr-helper process");
2740 } else {
2741 priv->prDaemonRunning = false;
2743 virErrorRestore(&orig_err);
2747 static int
2748 qemuProcessStartPRDaemonHook(void *opaque)
2750 virDomainObj *vm = opaque;
2751 size_t i, nfds = 0;
2752 g_autofree int *fds = NULL;
2753 int ret = -1;
2755 if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
2756 virProcessGetNamespaces(vm->pid, &nfds, &fds);
2758 if (nfds > 0 &&
2759 virProcessSetNamespaces(nfds, fds) < 0)
2760 goto cleanup;
2763 ret = 0;
2764 cleanup:
2765 for (i = 0; i < nfds; i++)
2766 VIR_FORCE_CLOSE(fds[i]);
2767 return ret;
2772 qemuProcessStartManagedPRDaemon(virDomainObj *vm)
2774 const char *const prHelperDirs[] = {
2775 "/usr/libexec",
2776 NULL,
2778 qemuDomainObjPrivate *priv = vm->privateData;
2779 virQEMUDriver *driver = priv->driver;
2780 g_autoptr(virQEMUDriverConfig) cfg = NULL;
2781 int errfd = -1;
2782 g_autofree char *prHelperPath = NULL;
2783 g_autofree char *pidfile = NULL;
2784 g_autofree char *socketPath = NULL;
2785 pid_t cpid = -1;
2786 g_autoptr(virCommand) cmd = NULL;
2787 virTimeBackOffVar timebackoff;
2788 const unsigned long long timeout = 500000; /* ms */
2789 int ret = -1;
2791 cfg = virQEMUDriverGetConfig(driver);
2793 prHelperPath = virFindFileInPathFull(cfg->prHelperName, prHelperDirs);
2795 if (!prHelperPath) {
2796 virReportSystemError(errno, _("'%1$s' is not a suitable pr helper"),
2797 cfg->prHelperName);
2798 goto cleanup;
2801 VIR_DEBUG("Using qemu-pr-helper: %s", prHelperPath);
2803 if (!(pidfile = qemuProcessBuildPRHelperPidfilePath(vm)))
2804 goto cleanup;
2806 if (!(socketPath = qemuDomainGetManagedPRSocketPath(priv)))
2807 goto cleanup;
2809 /* Remove stale socket */
2810 if (unlink(socketPath) < 0 &&
2811 errno != ENOENT) {
2812 virReportSystemError(errno,
2813 _("Unable to remove stale socket path: %1$s"),
2814 socketPath);
2815 goto cleanup;
2818 if (!(cmd = virCommandNewArgList(prHelperPath,
2819 "-k", socketPath,
2820 NULL)))
2821 goto cleanup;
2823 virCommandDaemonize(cmd);
2824 virCommandSetPidFile(cmd, pidfile);
2825 virCommandSetErrorFD(cmd, &errfd);
2827 /* Place the process into the same namespace and cgroup as
2828 * qemu (so that it shares the same view of the system). */
2829 virCommandSetPreExecHook(cmd, qemuProcessStartPRDaemonHook, vm);
2831 if (cfg->schedCore == QEMU_SCHED_CORE_FULL) {
2832 pid_t cookie_pid = vm->pid;
2834 if (cookie_pid <= 0)
2835 cookie_pid = priv->schedCoreChildPID;
2837 virCommandSetRunAmong(cmd, cookie_pid);
2840 if (virCommandRun(cmd, NULL) < 0)
2841 goto cleanup;
2843 if (virPidFileReadPath(pidfile, &cpid) < 0) {
2844 virReportError(VIR_ERR_INTERNAL_ERROR,
2845 _("pr helper %1$s didn't show up"),
2846 prHelperPath);
2847 goto cleanup;
2850 if (virTimeBackOffStart(&timebackoff, 1, timeout) < 0)
2851 goto cleanup;
2852 while (virTimeBackOffWait(&timebackoff)) {
2853 char errbuf[1024] = { 0 };
2855 if (virFileExists(socketPath))
2856 break;
2858 if (virProcessKill(cpid, 0) == 0)
2859 continue;
2861 if (saferead(errfd, errbuf, sizeof(errbuf) - 1) < 0) {
2862 virReportSystemError(errno,
2863 _("pr helper %1$s died unexpectedly"),
2864 prHelperPath);
2865 } else {
2866 virReportError(VIR_ERR_OPERATION_FAILED,
2867 _("pr helper died and reported: %1$s"), errbuf);
2869 goto cleanup;
2872 if (!virFileExists(socketPath)) {
2873 virReportError(VIR_ERR_OPERATION_TIMEOUT, "%s",
2874 _("pr helper socked did not show up"));
2875 goto cleanup;
2878 if (priv->cgroup &&
2879 virCgroupAddMachineProcess(priv->cgroup, cpid) < 0)
2880 goto cleanup;
2882 if (qemuSecurityDomainSetPathLabel(driver, vm, socketPath, true) < 0)
2883 goto cleanup;
2885 priv->prDaemonRunning = true;
2886 ret = 0;
2887 cleanup:
2888 if (ret < 0) {
2889 virCommandAbort(cmd);
2890 if (cpid >= 0)
2891 virProcessKillPainfully(cpid, true);
2892 if (pidfile)
2893 unlink(pidfile);
2895 VIR_FORCE_CLOSE(errfd);
2896 return ret;
2900 static int
2901 qemuProcessAllowPostCopyMigration(virDomainObj *vm)
2903 qemuDomainObjPrivate *priv = vm->privateData;
2904 virQEMUDriver *driver = priv->driver;
2905 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
2906 const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
2907 const char *uffd = "/dev/userfaultfd";
2908 int rc;
2910 if (!virFileExists(uffd)) {
2911 VIR_DEBUG("%s is not supported by the host", uffd);
2912 return 0;
2915 if (!devices)
2916 devices = defaultDeviceACL;
2918 if (!g_strv_contains(devices, uffd)) {
2919 VIR_DEBUG("%s is not allowed by device ACL", uffd);
2920 return 0;
2923 VIR_DEBUG("Labeling %s in mount namespace", uffd);
2924 if ((rc = qemuSecurityDomainSetMountNSPathLabel(driver, vm, uffd)) < 0)
2925 return -1;
2927 if (rc == 1)
2928 VIR_DEBUG("Mount namespace is not enabled, leaving %s as is", uffd);
2930 return 0;
2934 static int
2935 qemuProcessInitPasswords(virQEMUDriver *driver,
2936 virDomainObj *vm,
2937 int asyncJob)
2939 int ret = 0;
2940 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
2941 size_t i;
2943 for (i = 0; i < vm->def->ngraphics; ++i) {
2944 virDomainGraphicsDef *graphics = vm->def->graphics[i];
2945 if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
2946 ret = qemuDomainChangeGraphicsPasswords(vm,
2947 VIR_DOMAIN_GRAPHICS_TYPE_VNC,
2948 &graphics->data.vnc.auth,
2949 cfg->vncPassword,
2950 asyncJob);
2951 } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
2952 ret = qemuDomainChangeGraphicsPasswords(vm,
2953 VIR_DOMAIN_GRAPHICS_TYPE_SPICE,
2954 &graphics->data.spice.auth,
2955 cfg->spicePassword,
2956 asyncJob);
2959 if (ret < 0)
2960 return ret;
2963 return ret;
2967 static int
2968 qemuProcessCleanupChardevDevice(virDomainDef *def G_GNUC_UNUSED,
2969 virDomainChrDef *dev,
2970 void *opaque G_GNUC_UNUSED)
2972 if (dev->source->type == VIR_DOMAIN_CHR_TYPE_UNIX &&
2973 dev->source->data.nix.listen &&
2974 dev->source->data.nix.path)
2975 unlink(dev->source->data.nix.path);
2977 return 0;
2982 * Loads and update video memory size for video devices according to QEMU
2983 * process as the QEMU will silently update the values that we pass to QEMU
2984 * through command line. We need to load these updated values and store them
2985 * into the status XML.
2987 * We will fail if for some reason the values cannot be loaded from QEMU because
2988 * its mandatory to get the correct video memory size to status XML to not break
2989 * migration.
2991 static int
2992 qemuProcessUpdateVideoRamSize(virQEMUDriver *driver,
2993 virDomainObj *vm,
2994 int asyncJob)
2996 int ret = -1;
2997 ssize_t i;
2998 qemuDomainObjPrivate *priv = vm->privateData;
2999 virDomainVideoDef *video = NULL;
3000 g_autoptr(virQEMUDriverConfig) cfg = NULL;
3002 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
3003 return -1;
3005 for (i = 0; i < vm->def->nvideos; i++) {
3006 video = vm->def->videos[i];
3008 switch (video->type) {
3009 case VIR_DOMAIN_VIDEO_TYPE_VGA:
3010 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VGA_VGAMEM)) {
3011 if (qemuMonitorUpdateVideoMemorySize(priv->mon, video, "VGA") < 0)
3012 goto error;
3014 break;
3015 case VIR_DOMAIN_VIDEO_TYPE_QXL:
3016 if (i == 0) {
3017 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM) &&
3018 qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3019 "qxl-vga") < 0)
3020 goto error;
3022 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VRAM64) &&
3023 qemuMonitorUpdateVideoVram64Size(priv->mon, video,
3024 "qxl-vga") < 0)
3025 goto error;
3026 } else {
3027 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM) &&
3028 qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3029 "qxl") < 0)
3030 goto error;
3032 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VRAM64) &&
3033 qemuMonitorUpdateVideoVram64Size(priv->mon, video,
3034 "qxl") < 0)
3035 goto error;
3037 break;
3038 case VIR_DOMAIN_VIDEO_TYPE_VMVGA:
3039 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VMWARE_SVGA_VGAMEM)) {
3040 if (qemuMonitorUpdateVideoMemorySize(priv->mon, video,
3041 "vmware-svga") < 0)
3042 goto error;
3044 break;
3045 case VIR_DOMAIN_VIDEO_TYPE_DEFAULT:
3046 case VIR_DOMAIN_VIDEO_TYPE_CIRRUS:
3047 case VIR_DOMAIN_VIDEO_TYPE_XEN:
3048 case VIR_DOMAIN_VIDEO_TYPE_VBOX:
3049 case VIR_DOMAIN_VIDEO_TYPE_PARALLELS:
3050 case VIR_DOMAIN_VIDEO_TYPE_VIRTIO:
3051 case VIR_DOMAIN_VIDEO_TYPE_GOP:
3052 case VIR_DOMAIN_VIDEO_TYPE_NONE:
3053 case VIR_DOMAIN_VIDEO_TYPE_BOCHS:
3054 case VIR_DOMAIN_VIDEO_TYPE_RAMFB:
3055 case VIR_DOMAIN_VIDEO_TYPE_LAST:
3056 break;
3061 qemuDomainObjExitMonitor(vm);
3063 cfg = virQEMUDriverGetConfig(driver);
3064 ret = virDomainObjSave(vm, driver->xmlopt, cfg->stateDir);
3066 return ret;
3068 error:
3069 qemuDomainObjExitMonitor(vm);
3070 return -1;
3074 struct qemuProcessHookData {
3075 virDomainObj *vm;
3076 virQEMUDriver *driver;
3077 virQEMUDriverConfig *cfg;
3080 static int qemuProcessHook(void *data)
3082 struct qemuProcessHookData *h = data;
3083 qemuDomainObjPrivate *priv = h->vm->privateData;
3084 int ret = -1;
3085 int fd;
3086 virBitmap *nodeset = NULL;
3087 virDomainNumatuneMemMode mode;
3089 /* This method cannot use any mutexes, which are not
3090 * protected across fork()
3093 qemuSecurityPostFork(h->driver->securityManager);
3095 /* Some later calls want pid present */
3096 h->vm->pid = getpid();
3098 VIR_DEBUG("Obtaining domain lock");
3100 * Since we're going to leak the returned FD to QEMU,
3101 * we need to make sure it gets a sensible label.
3102 * This mildly sucks, because there could be other
3103 * sockets the lock driver opens that we don't want
3104 * labelled. So far we're ok though.
3106 if (qemuSecuritySetSocketLabel(h->driver->securityManager, h->vm->def) < 0)
3107 goto cleanup;
3108 if (virDomainLockProcessStart(h->driver->lockManager,
3109 h->cfg->uri,
3110 h->vm,
3111 /* QEMU is always paused initially */
3112 true,
3113 &fd) < 0)
3114 goto cleanup;
3115 if (qemuSecurityClearSocketLabel(h->driver->securityManager, h->vm->def) < 0)
3116 goto cleanup;
3118 if (qemuDomainUnshareNamespace(h->cfg, h->driver->securityManager, h->vm) < 0)
3119 goto cleanup;
3121 if (virDomainNumatuneGetMode(h->vm->def->numa, -1, &mode) == 0) {
3122 if ((mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT ||
3123 mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) &&
3124 h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) &&
3125 virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) {
3126 /* Use virNuma* API iff necessary. Once set and child is exec()-ed,
3127 * there's no way for us to change it. Rely on cgroups (if available
3128 * and enabled in the config) rather than virNuma*. */
3129 VIR_DEBUG("Relying on CGroups for memory binding");
3130 } else {
3131 nodeset = virDomainNumatuneGetNodeset(h->vm->def->numa,
3132 priv->autoNodeset, -1);
3134 if (virNumaSetupMemoryPolicy(mode, nodeset) < 0)
3135 goto cleanup;
3139 ret = 0;
3141 cleanup:
3142 virObjectUnref(h->cfg);
3143 VIR_DEBUG("Hook complete ret=%d", ret);
3144 return ret;
3148 qemuProcessPrepareMonitorChr(virDomainChrSourceDef *monConfig,
3149 const char *domainDir)
3151 monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX;
3152 monConfig->data.nix.listen = true;
3154 monConfig->data.nix.path = g_strdup_printf("%s/monitor.sock", domainDir);
3155 return 0;
3160 * Precondition: vm must be locked, and a job must be active.
3161 * This method will call {Enter,Exit}Monitor
3164 qemuProcessStartCPUs(virQEMUDriver *driver, virDomainObj *vm,
3165 virDomainRunningReason reason,
3166 virDomainAsyncJob asyncJob)
3168 int ret = -1;
3169 qemuDomainObjPrivate *priv = vm->privateData;
3170 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
3172 /* Bring up netdevs before starting CPUs */
3173 if (virDomainInterfaceStartDevices(vm->def) < 0)
3174 return -1;
3176 VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState));
3177 if (virDomainLockProcessResume(driver->lockManager, cfg->uri,
3178 vm, priv->lockState) < 0) {
3179 /* Don't free priv->lockState on error, because we need
3180 * to make sure we have state still present if the user
3181 * tries to resume again
3183 return -1;
3185 VIR_FREE(priv->lockState);
3187 priv->runningReason = reason;
3189 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
3190 goto release;
3192 ret = qemuMonitorStartCPUs(priv->mon);
3193 qemuDomainObjExitMonitor(vm);
3195 if (ret < 0)
3196 goto release;
3198 /* The RESUME event handler will change the domain state with the reason
3199 * saved in priv->runningReason and it will also emit corresponding domain
3200 * lifecycle event.
3203 return ret;
3205 release:
3206 priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
3207 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
3208 VIR_WARN("Unable to release lease on %s", vm->def->name);
3209 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
3210 return ret;
3214 int qemuProcessStopCPUs(virQEMUDriver *driver,
3215 virDomainObj *vm,
3216 virDomainPausedReason reason,
3217 virDomainAsyncJob asyncJob)
3219 int ret = -1;
3220 qemuDomainObjPrivate *priv = vm->privateData;
3222 VIR_FREE(priv->lockState);
3224 priv->pausedReason = reason;
3226 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
3227 goto cleanup;
3229 ret = qemuMonitorStopCPUs(priv->mon);
3230 qemuDomainObjExitMonitor(vm);
3232 if (ret < 0)
3233 goto cleanup;
3235 /* de-activate netdevs after stopping CPUs */
3236 ignore_value(virDomainInterfaceStopDevices(vm->def));
3238 if (vm->job->current)
3239 ignore_value(virTimeMillisNow(&vm->job->current->stopped));
3241 /* The STOP event handler will change the domain state with the reason
3242 * saved in priv->pausedReason and it will also emit corresponding domain
3243 * lifecycle event.
3246 if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
3247 VIR_WARN("Unable to release lease on %s", vm->def->name);
3248 VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
3250 cleanup:
3251 if (ret < 0)
3252 priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
3254 return ret;
3259 static void
3260 qemuProcessNotifyNets(virDomainDef *def)
3262 size_t i;
3263 g_autoptr(virConnect) conn = NULL;
3265 for (i = 0; i < def->nnets; i++) {
3266 virDomainNetDef *net = def->nets[i];
3267 /* keep others from trying to use the macvtap device name, but
3268 * don't return error if this happens, since that causes the
3269 * domain to be unceremoniously killed, which would be *very*
3270 * impolite.
3272 switch (virDomainNetGetActualType(net)) {
3273 case VIR_DOMAIN_NET_TYPE_DIRECT:
3274 virNetDevReserveName(net->ifname);
3275 break;
3276 case VIR_DOMAIN_NET_TYPE_BRIDGE:
3277 case VIR_DOMAIN_NET_TYPE_NETWORK:
3278 case VIR_DOMAIN_NET_TYPE_ETHERNET:
3279 virNetDevReserveName(net->ifname);
3280 break;
3281 case VIR_DOMAIN_NET_TYPE_USER:
3282 case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
3283 case VIR_DOMAIN_NET_TYPE_SERVER:
3284 case VIR_DOMAIN_NET_TYPE_CLIENT:
3285 case VIR_DOMAIN_NET_TYPE_MCAST:
3286 case VIR_DOMAIN_NET_TYPE_INTERNAL:
3287 case VIR_DOMAIN_NET_TYPE_HOSTDEV:
3288 case VIR_DOMAIN_NET_TYPE_UDP:
3289 case VIR_DOMAIN_NET_TYPE_VDPA:
3290 case VIR_DOMAIN_NET_TYPE_NULL:
3291 case VIR_DOMAIN_NET_TYPE_VDS:
3292 case VIR_DOMAIN_NET_TYPE_LAST:
3293 break;
3296 if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK && !conn)
3297 conn = virGetConnectNetwork();
3299 virDomainNetNotifyActualDevice(conn, def, net);
3303 /* Attempt to instantiate the filters. Ignore failures because it's
3304 * possible that someone deleted a filter binding and the associated
3305 * filter while the guest was running and we don't want that action
3306 * to cause failure to keep the guest running during the reconnection
3307 * processing. Nor do we necessarily want other failures to do the
3308 * same. We'll just log the error conditions other than of course
3309 * ignoreExists possibility (e.g. the true flag) */
3310 static void
3311 qemuProcessFiltersInstantiate(virDomainDef *def)
3313 size_t i;
3315 for (i = 0; i < def->nnets; i++) {
3316 virDomainNetDef *net = def->nets[i];
3317 if ((net->filter) && (net->ifname)) {
3318 if (virDomainConfNWFilterInstantiate(def->name, def->uuid, net,
3319 true) < 0) {
3320 VIR_WARN("filter '%s' instantiation for '%s' failed '%s'",
3321 net->filter, net->ifname, virGetLastErrorMessage());
3322 virResetLastError();
3328 static int
3329 qemuProcessUpdateState(virDomainObj *vm)
3331 qemuDomainObjPrivate *priv = vm->privateData;
3332 virDomainState state;
3333 virDomainPausedReason reason;
3334 virDomainState newState = VIR_DOMAIN_NOSTATE;
3335 int oldReason;
3336 int newReason;
3337 bool running;
3338 g_autofree char *msg = NULL;
3339 int ret;
3341 qemuDomainObjEnterMonitor(vm);
3342 ret = qemuMonitorGetStatus(priv->mon, &running, &reason);
3343 qemuDomainObjExitMonitor(vm);
3345 if (ret < 0)
3346 return -1;
3348 state = virDomainObjGetState(vm, &oldReason);
3350 if (running &&
3351 (state == VIR_DOMAIN_SHUTOFF ||
3352 (state == VIR_DOMAIN_PAUSED &&
3353 oldReason == VIR_DOMAIN_PAUSED_STARTING_UP))) {
3354 newState = VIR_DOMAIN_RUNNING;
3355 newReason = VIR_DOMAIN_RUNNING_BOOTED;
3356 msg = g_strdup("finished booting");
3357 } else if (state == VIR_DOMAIN_PAUSED && running) {
3358 newState = VIR_DOMAIN_RUNNING;
3359 newReason = VIR_DOMAIN_RUNNING_UNPAUSED;
3360 msg = g_strdup("was unpaused");
3361 } else if (state == VIR_DOMAIN_RUNNING && !running) {
3362 if (reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) {
3363 newState = VIR_DOMAIN_SHUTDOWN;
3364 newReason = VIR_DOMAIN_SHUTDOWN_UNKNOWN;
3365 msg = g_strdup("shutdown");
3366 } else if (reason == VIR_DOMAIN_PAUSED_CRASHED) {
3367 newState = VIR_DOMAIN_CRASHED;
3368 newReason = VIR_DOMAIN_CRASHED_PANICKED;
3369 msg = g_strdup("crashed");
3370 } else {
3371 newState = VIR_DOMAIN_PAUSED;
3372 newReason = reason;
3373 msg = g_strdup_printf("was paused (%s)",
3374 virDomainPausedReasonTypeToString(reason));
3378 if (newState != VIR_DOMAIN_NOSTATE) {
3379 VIR_DEBUG("Domain %s %s while its monitor was disconnected;"
3380 " changing state to %s (%s)",
3381 vm->def->name,
3382 NULLSTR(msg),
3383 virDomainStateTypeToString(newState),
3384 virDomainStateReasonToString(newState, newReason));
3385 virDomainObjSetState(vm, newState, newReason);
3388 return 0;
3392 void
3393 qemuProcessCleanupMigrationJob(virQEMUDriver *driver,
3394 virDomainObj *vm)
3396 qemuDomainObjPrivate *priv = vm->privateData;
3397 virDomainState state;
3398 int reason;
3400 state = virDomainObjGetState(vm, &reason);
3402 VIR_DEBUG("driver=%p, vm=%s, asyncJob=%s, state=%s, reason=%s",
3403 driver, vm->def->name,
3404 virDomainAsyncJobTypeToString(vm->job->asyncJob),
3405 virDomainStateTypeToString(state),
3406 virDomainStateReasonToString(state, reason));
3408 if (vm->job->asyncJob != VIR_ASYNC_JOB_MIGRATION_IN &&
3409 vm->job->asyncJob != VIR_ASYNC_JOB_MIGRATION_OUT)
3410 return;
3412 virPortAllocatorRelease(priv->migrationPort);
3413 priv->migrationPort = 0;
3414 qemuDomainObjDiscardAsyncJob(vm);
3418 static void
3419 qemuProcessRestoreMigrationJob(virDomainObj *vm,
3420 virDomainJobObj *job)
3422 qemuDomainJobPrivate *jobPriv = job->privateData;
3423 virDomainJobOperation op;
3424 unsigned long long allowedJobs;
3426 if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_IN) {
3427 op = VIR_DOMAIN_JOB_OPERATION_MIGRATION_IN;
3428 allowedJobs = VIR_JOB_NONE;
3429 } else {
3430 op = VIR_DOMAIN_JOB_OPERATION_MIGRATION_OUT;
3431 allowedJobs = VIR_JOB_DEFAULT_MASK | JOB_MASK(VIR_JOB_MIGRATION_OP);
3433 allowedJobs |= JOB_MASK(VIR_JOB_MODIFY_MIGRATION_SAFE);
3435 qemuDomainObjRestoreAsyncJob(vm, job->asyncJob, job->phase,
3436 job->asyncStarted, op,
3437 QEMU_DOMAIN_JOB_STATS_TYPE_MIGRATION,
3438 VIR_DOMAIN_JOB_STATUS_PAUSED,
3439 allowedJobs);
3441 job->privateData = g_steal_pointer(&vm->job->privateData);
3442 vm->job->privateData = jobPriv;
3443 vm->job->apiFlags = job->apiFlags;
3444 vm->job->asyncPaused = job->asyncPaused;
3446 qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
3451 * Returns
3452 * -1 on error, the domain will be killed,
3453 * 0 the domain should remain running with the migration job discarded,
3454 * 1 the daemon was restarted during post-copy phase
3456 static int
3457 qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
3458 virDomainObj *vm,
3459 virDomainJobObj *job,
3460 virDomainState state)
3462 VIR_DEBUG("Active incoming migration in phase %s",
3463 qemuMigrationJobPhaseTypeToString(job->phase));
3465 switch ((qemuMigrationJobPhase) job->phase) {
3466 case QEMU_MIGRATION_PHASE_NONE:
3467 case QEMU_MIGRATION_PHASE_PERFORM2:
3468 case QEMU_MIGRATION_PHASE_BEGIN3:
3469 case QEMU_MIGRATION_PHASE_PERFORM3:
3470 case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
3471 case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
3472 case QEMU_MIGRATION_PHASE_CONFIRM3:
3473 case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
3474 case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
3475 case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
3476 case QEMU_MIGRATION_PHASE_LAST:
3477 /* N/A for incoming migration */
3478 break;
3480 case QEMU_MIGRATION_PHASE_PREPARE:
3481 VIR_DEBUG("Killing unfinished incoming migration for domain %s",
3482 vm->def->name);
3483 return -1;
3485 case QEMU_MIGRATION_PHASE_FINISH2:
3486 /* source domain is already killed so let's just resume the domain
3487 * and hope we are all set */
3488 VIR_DEBUG("Incoming migration finished, resuming domain %s",
3489 vm->def->name);
3490 if (qemuProcessStartCPUs(driver, vm,
3491 VIR_DOMAIN_RUNNING_MIGRATED,
3492 VIR_ASYNC_JOB_NONE) < 0) {
3493 VIR_WARN("Could not resume domain %s", vm->def->name);
3495 break;
3497 case QEMU_MIGRATION_PHASE_FINISH3:
3498 /* migration finished, we started resuming the domain but didn't
3499 * confirm success or failure yet; killing it seems safest unless
3500 * we already started guest CPUs or we were in post-copy mode */
3501 if (virDomainObjIsPostcopy(vm, job))
3502 return 1;
3504 if (state != VIR_DOMAIN_RUNNING) {
3505 VIR_DEBUG("Killing migrated domain %s", vm->def->name);
3506 return -1;
3508 break;
3510 case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
3511 case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
3512 case QEMU_MIGRATION_PHASE_FINISH_RESUME:
3513 return 1;
3516 return 0;
3521 * Returns
3522 * -1 the domain should be killed (either after a successful migration or
3523 * on error),
3524 * 0 the domain should remain running with the migration job discarded,
3525 * 1 the daemon was restarted during post-copy phase
3527 static int
3528 qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
3529 virDomainObj *vm,
3530 virDomainJobObj *job,
3531 virDomainJobStatus migStatus,
3532 virDomainState state,
3533 int reason,
3534 unsigned int *stopFlags)
3536 bool postcopy = virDomainObjIsPostcopy(vm, job);
3537 bool resume = false;
3539 VIR_DEBUG("Active outgoing migration in phase %s",
3540 qemuMigrationJobPhaseTypeToString(job->phase));
3542 switch ((qemuMigrationJobPhase) job->phase) {
3543 case QEMU_MIGRATION_PHASE_NONE:
3544 case QEMU_MIGRATION_PHASE_PREPARE:
3545 case QEMU_MIGRATION_PHASE_FINISH2:
3546 case QEMU_MIGRATION_PHASE_FINISH3:
3547 case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
3548 case QEMU_MIGRATION_PHASE_FINISH_RESUME:
3549 case QEMU_MIGRATION_PHASE_LAST:
3550 /* N/A for outgoing migration */
3551 break;
3553 case QEMU_MIGRATION_PHASE_BEGIN3:
3554 /* nothing happened so far, just forget we were about to migrate the
3555 * domain */
3556 break;
3558 case QEMU_MIGRATION_PHASE_PERFORM2:
3559 case QEMU_MIGRATION_PHASE_PERFORM3:
3560 /* migration is still in progress, let's cancel it and resume the
3561 * domain; we can do so even in post-copy phase as the domain was not
3562 * resumed on the destination host yet
3564 VIR_DEBUG("Cancelling unfinished migration of domain %s",
3565 vm->def->name);
3566 if (qemuMigrationSrcCancelUnattended(vm, job) < 0) {
3567 VIR_WARN("Could not cancel ongoing migration of domain %s",
3568 vm->def->name);
3570 resume = true;
3571 break;
3573 case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
3574 /* migration finished but we didn't have a chance to get the result
3575 * of Finish3 step; third party needs to check what to do next; in
3576 * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
3578 if (postcopy)
3579 return 1;
3580 break;
3582 case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
3583 /* Finish3 failed, we need to resume the domain, but once we enter
3584 * post-copy mode there's no way back, so let's just mark the domain
3585 * as broken in that case
3587 if (postcopy)
3588 return 1;
3590 VIR_DEBUG("Resuming domain %s after failed migration",
3591 vm->def->name);
3592 resume = true;
3593 break;
3595 case QEMU_MIGRATION_PHASE_CONFIRM3:
3596 /* migration completed, we need to kill the domain here */
3597 *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
3598 return -1;
3600 case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
3601 if (migStatus == VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED) {
3602 /* migration completed, we need to kill the domain here */
3603 *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
3604 return -1;
3606 return 1;
3608 case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
3609 case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
3610 case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
3611 return 1;
3614 if (resume) {
3615 /* resume the domain but only if it was paused as a result of
3616 * migration
3618 if (state == VIR_DOMAIN_PAUSED &&
3619 (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
3620 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
3621 if (qemuProcessStartCPUs(driver, vm,
3622 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
3623 VIR_ASYNC_JOB_NONE) < 0) {
3624 VIR_WARN("Could not resume domain %s", vm->def->name);
3629 return 0;
3633 static int
3634 qemuProcessRecoverMigration(virQEMUDriver *driver,
3635 virDomainObj *vm,
3636 virDomainJobObj *job,
3637 unsigned int *stopFlags)
3639 virDomainJobStatus migStatus = VIR_DOMAIN_JOB_STATUS_NONE;
3640 qemuDomainJobPrivate *jobPriv = job->privateData;
3641 qemuDomainObjPrivate *priv = vm->privateData;
3642 virDomainState state;
3643 int reason;
3644 int rc;
3646 state = virDomainObjGetState(vm, &reason);
3648 qemuMigrationAnyRefreshStatus(vm, VIR_ASYNC_JOB_NONE, &migStatus);
3650 if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT) {
3651 rc = qemuProcessRecoverMigrationOut(driver, vm, job, migStatus,
3652 state, reason, stopFlags);
3653 } else {
3654 rc = qemuProcessRecoverMigrationIn(driver, vm, job, state);
3657 if (rc < 0)
3658 return -1;
3660 if (rc > 0) {
3661 job->phase = QEMU_MIGRATION_PHASE_POSTCOPY_FAILED;
3662 /* Even though we restore the migration async job here, the APIs below
3663 * use VIR_ASYNC_JOB_NONE because we're already in a MODIFY job started
3664 * before we reconnected to the domain. */
3665 qemuProcessRestoreMigrationJob(vm, job);
3667 if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
3668 VIR_DEBUG("Post-copy migration of domain %s still running, it will be handled as unattended",
3669 vm->def->name);
3670 vm->job->asyncPaused = false;
3671 return 0;
3674 if (migStatus != VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED) {
3675 if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT)
3676 qemuMigrationSrcPostcopyFailed(vm);
3677 else
3678 qemuMigrationDstPostcopyFailed(vm);
3679 /* Set the asyncPaused flag in case we're reconnecting to a domain
3680 * started by an older libvirt. */
3681 vm->job->asyncPaused = true;
3682 return 0;
3685 VIR_DEBUG("Post-copy migration of domain %s already finished",
3686 vm->def->name);
3687 if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT) {
3688 qemuMigrationSrcComplete(driver, vm, VIR_ASYNC_JOB_NONE);
3689 /* No need to stop the restored job as the domain has just been
3690 * destroyed. */
3691 } else {
3692 qemuMigrationDstComplete(driver, vm, true, VIR_ASYNC_JOB_NONE, job);
3693 virDomainObjEndAsyncJob(vm);
3695 return 0;
3698 qemuMigrationParamsReset(vm, VIR_ASYNC_JOB_NONE,
3699 jobPriv->migParams, job->apiFlags);
3700 qemuDomainSetMaxMemLock(vm, 0, &priv->preMigrationMemlock);
3702 return 0;
3706 static void
3707 qemuProcessAbortSnapshotDelete(virDomainObj *vm,
3708 virDomainJobObj *job)
3710 size_t i;
3711 qemuDomainObjPrivate *priv = vm->privateData;
3712 qemuDomainJobPrivate *jobPriv = job->privateData;
3714 if (!jobPriv->snapshotDelete)
3715 return;
3717 for (i = 0; i < vm->def->ndisks; i++) {
3718 virDomainDiskDef *disk = vm->def->disks[i];
3719 g_autoptr(qemuBlockJobData) diskJob = qemuBlockJobDiskGetJob(disk);
3721 if (!diskJob)
3722 continue;
3724 if (diskJob->type != QEMU_BLOCKJOB_TYPE_COMMIT &&
3725 diskJob->type != QEMU_BLOCKJOB_TYPE_ACTIVE_COMMIT) {
3726 continue;
3729 qemuBlockJobSyncBegin(diskJob);
3731 qemuDomainObjEnterMonitor(vm);
3732 ignore_value(qemuMonitorBlockJobCancel(priv->mon, diskJob->name, false));
3733 qemuDomainObjExitMonitor(vm);
3735 diskJob->state = QEMU_BLOCKJOB_STATE_ABORTING;
3737 qemuBlockJobSyncEnd(vm, diskJob, VIR_ASYNC_JOB_NONE);
3742 static int
3743 qemuProcessRecoverJob(virQEMUDriver *driver,
3744 virDomainObj *vm,
3745 virDomainJobObj *job,
3746 unsigned int *stopFlags)
3748 virDomainState state;
3749 int reason;
3751 state = virDomainObjGetState(vm, &reason);
3753 VIR_DEBUG("Recovering job for domain %s, state=%s(%s), async=%s, job=%s",
3754 vm->def->name,
3755 virDomainStateTypeToString(state),
3756 virDomainStateReasonToString(state, reason),
3757 virDomainAsyncJobTypeToString(job->asyncJob),
3758 virDomainJobTypeToString(job->active));
3760 switch (job->asyncJob) {
3761 case VIR_ASYNC_JOB_MIGRATION_OUT:
3762 case VIR_ASYNC_JOB_MIGRATION_IN:
3763 if (qemuProcessRecoverMigration(driver, vm, job, stopFlags) < 0)
3764 return -1;
3765 break;
3767 case VIR_ASYNC_JOB_SAVE:
3768 case VIR_ASYNC_JOB_DUMP:
3769 case VIR_ASYNC_JOB_SNAPSHOT:
3770 qemuMigrationSrcCancel(vm, VIR_ASYNC_JOB_NONE, false);
3771 /* resume the domain but only if it was paused as a result of
3772 * running a migration-to-file operation. Although we are
3773 * recovering an async job, this function is run at startup
3774 * and must resume things using sync monitor connections. */
3775 if (state == VIR_DOMAIN_PAUSED &&
3776 ((job->asyncJob == VIR_ASYNC_JOB_DUMP &&
3777 reason == VIR_DOMAIN_PAUSED_DUMP) ||
3778 (job->asyncJob == VIR_ASYNC_JOB_SAVE &&
3779 reason == VIR_DOMAIN_PAUSED_SAVE) ||
3780 (job->asyncJob == VIR_ASYNC_JOB_SNAPSHOT &&
3781 (reason == VIR_DOMAIN_PAUSED_SNAPSHOT ||
3782 reason == VIR_DOMAIN_PAUSED_MIGRATION)) ||
3783 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
3784 if (qemuProcessStartCPUs(driver, vm,
3785 VIR_DOMAIN_RUNNING_SAVE_CANCELED,
3786 VIR_ASYNC_JOB_NONE) < 0) {
3787 VIR_WARN("Could not resume domain '%s' after migration to file",
3788 vm->def->name);
3791 qemuProcessAbortSnapshotDelete(vm, job);
3792 break;
3794 case VIR_ASYNC_JOB_START:
3795 /* Already handled in VIR_DOMAIN_PAUSED_STARTING_UP check. */
3796 break;
3798 case VIR_ASYNC_JOB_BACKUP:
3799 /* Restore the config of the async job which is not persisted */
3800 qemuDomainObjRestoreAsyncJob(vm, VIR_ASYNC_JOB_BACKUP, 0,
3801 job->asyncStarted,
3802 VIR_DOMAIN_JOB_OPERATION_BACKUP,
3803 QEMU_DOMAIN_JOB_STATS_TYPE_BACKUP,
3804 VIR_DOMAIN_JOB_STATUS_ACTIVE,
3805 (VIR_JOB_DEFAULT_MASK |
3806 JOB_MASK(VIR_JOB_SUSPEND) |
3807 JOB_MASK(VIR_JOB_MODIFY)));
3808 break;
3810 case VIR_ASYNC_JOB_NONE:
3811 case VIR_ASYNC_JOB_LAST:
3812 break;
3815 if (!virDomainObjIsActive(vm))
3816 return -1;
3818 /* In case any special handling is added for job type that has been ignored
3819 * before, VIR_DOMAIN_TRACK_JOBS (from qemu_domain.h) needs to be updated
3820 * for the job to be properly tracked in domain state XML.
3822 switch (job->active) {
3823 case VIR_JOB_QUERY:
3824 /* harmless */
3825 break;
3827 case VIR_JOB_DESTROY:
3828 VIR_DEBUG("Domain %s should have already been destroyed",
3829 vm->def->name);
3830 return -1;
3832 case VIR_JOB_SUSPEND:
3833 /* mostly harmless */
3834 break;
3836 case VIR_JOB_MODIFY:
3837 /* XXX depending on the command we may be in an inconsistent state and
3838 * we should probably fall back to "monitor error" state and refuse to
3840 break;
3842 case VIR_JOB_MODIFY_MIGRATION_SAFE:
3843 /* event handlers, the reconnection code already handles them as we
3844 * might as well just missed the event while we were not running
3846 break;
3848 case VIR_JOB_MIGRATION_OP:
3849 case VIR_JOB_ABORT:
3850 case VIR_JOB_ASYNC:
3851 case VIR_JOB_ASYNC_NESTED:
3852 /* async job was already handled above */
3853 case VIR_JOB_NONE:
3854 case VIR_JOB_LAST:
3855 break;
3858 return 0;
3861 static int
3862 qemuProcessUpdateDevices(virQEMUDriver *driver,
3863 virDomainObj *vm)
3865 qemuDomainObjPrivate *priv = vm->privateData;
3866 virDomainDeviceDef dev;
3867 g_auto(GStrv) old = g_steal_pointer(&priv->qemuDevices);
3868 GStrv tmp;
3870 if (qemuDomainUpdateDeviceList(vm, VIR_ASYNC_JOB_NONE) < 0)
3871 return -1;
3873 if (!old)
3874 return 0;
3876 for (tmp = old; *tmp; tmp++) {
3877 if (!g_strv_contains((const char **) priv->qemuDevices, *tmp) &&
3878 virDomainDefFindDevice(vm->def, *tmp, &dev, false) == 0 &&
3879 qemuDomainRemoveDevice(driver, vm, &dev))
3880 return -1;
3883 return 0;
3886 static int
3887 qemuDomainPerfRestart(virDomainObj *vm)
3889 size_t i;
3890 virDomainDef *def = vm->def;
3891 qemuDomainObjPrivate *priv = vm->privateData;
3893 if (!(priv->perf = virPerfNew()))
3894 return -1;
3896 for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
3897 if (def->perf.events[i] &&
3898 def->perf.events[i] == VIR_TRISTATE_BOOL_YES) {
3900 /* Failure to re-enable the perf event should not be fatal */
3901 if (virPerfEventEnable(priv->perf, i, vm->pid) < 0)
3902 def->perf.events[i] = VIR_TRISTATE_BOOL_NO;
3906 return 0;
3910 static bool
3911 qemuProcessDomainMemoryDefNeedHugepagesPath(const virDomainMemoryDef *mem,
3912 const long system_pagesize)
3914 unsigned long long pagesize = 0;
3916 switch (mem->model) {
3917 case VIR_DOMAIN_MEMORY_MODEL_DIMM:
3918 pagesize = mem->source.dimm.pagesize;
3919 break;
3920 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
3921 pagesize = mem->source.virtio_mem.pagesize;
3922 break;
3923 case VIR_DOMAIN_MEMORY_MODEL_NONE:
3924 case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
3925 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
3926 case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
3927 case VIR_DOMAIN_MEMORY_MODEL_LAST:
3928 /* None of these can be backed by hugepages. */
3929 return false;
3932 return pagesize != 0 && pagesize != system_pagesize;
3936 static bool
3937 qemuProcessNeedHugepagesPath(virDomainDef *def,
3938 virDomainMemoryDef *mem)
3940 const long system_pagesize = virGetSystemPageSizeKB();
3941 size_t i;
3943 switch ((virDomainMemorySource)def->mem.source) {
3944 case VIR_DOMAIN_MEMORY_SOURCE_FILE:
3945 /* This needs a hugetlbfs mount. */
3946 return true;
3947 case VIR_DOMAIN_MEMORY_SOURCE_MEMFD:
3948 /* memfd works without a hugetlbfs mount */
3949 return false;
3950 case VIR_DOMAIN_MEMORY_SOURCE_NONE:
3951 case VIR_DOMAIN_MEMORY_SOURCE_ANONYMOUS:
3952 case VIR_DOMAIN_MEMORY_SOURCE_LAST:
3953 break;
3956 for (i = 0; i < def->mem.nhugepages; i++) {
3957 if (def->mem.hugepages[i].size != system_pagesize)
3958 return true;
3961 for (i = 0; i < def->nmems; i++) {
3962 if (qemuProcessDomainMemoryDefNeedHugepagesPath(def->mems[i], system_pagesize))
3963 return true;
3966 if (mem &&
3967 qemuProcessDomainMemoryDefNeedHugepagesPath(mem, system_pagesize))
3968 return true;
3970 return false;
3974 static bool
3975 qemuProcessNeedMemoryBackingPath(virDomainDef *def,
3976 virDomainMemoryDef *mem)
3978 size_t i;
3979 size_t numaNodes;
3981 if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE ||
3982 def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3983 return true;
3985 numaNodes = virDomainNumaGetNodeCount(def->numa);
3986 for (i = 0; i < numaNodes; i++) {
3987 if (virDomainNumaGetNodeMemoryAccessMode(def->numa, i)
3988 != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3989 return true;
3992 for (i = 0; i < def->nmems; i++) {
3993 if (def->mems[i]->access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
3994 return true;
3997 if (mem) {
3998 switch (mem->model) {
3999 case VIR_DOMAIN_MEMORY_MODEL_DIMM:
4000 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
4001 if (mem->access != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT) {
4002 /* No need to check for access mode on the target node,
4003 * it was checked for in the previous loop. */
4004 return true;
4006 break;
4008 case VIR_DOMAIN_MEMORY_MODEL_NONE:
4009 case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
4010 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
4011 case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
4012 case VIR_DOMAIN_MEMORY_MODEL_LAST:
4013 /* Backed by user provided path. Not stored in memory
4014 * backing dir anyway. */
4015 break;
4019 return false;
4023 static int
4024 qemuProcessBuildDestroyMemoryPathsImpl(virQEMUDriver *driver,
4025 virDomainObj *vm,
4026 const char *path,
4027 bool build)
4029 if (build) {
4030 if (virFileExists(path))
4031 return 0;
4033 if (g_mkdir_with_parents(path, 0700) < 0) {
4034 virReportSystemError(errno,
4035 _("Unable to create %1$s"),
4036 path);
4037 return -1;
4040 if (qemuDomainNamespaceSetupPath(vm, path, NULL) < 0)
4041 return -1;
4043 if (qemuSecurityDomainSetPathLabel(driver, vm, path, true) < 0)
4044 return -1;
4045 } else {
4046 if (virFileDeleteTree(path) < 0)
4047 return -1;
4050 return 0;
4055 qemuProcessBuildDestroyMemoryPaths(virQEMUDriver *driver,
4056 virDomainObj *vm,
4057 virDomainMemoryDef *mem,
4058 bool build)
4061 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4062 size_t i;
4063 bool shouldBuildHP = false;
4064 bool shouldBuildMB = false;
4066 if (build) {
4067 shouldBuildHP = qemuProcessNeedHugepagesPath(vm->def, mem);
4068 shouldBuildMB = qemuProcessNeedMemoryBackingPath(vm->def, mem);
4071 if (!build || shouldBuildHP) {
4072 for (i = 0; i < cfg->nhugetlbfs; i++) {
4073 g_autofree char *path = NULL;
4074 path = qemuGetDomainHugepagePath(driver, vm->def, &cfg->hugetlbfs[i]);
4076 if (!path)
4077 return -1;
4079 if (build &&
4080 qemuHugepageMakeBasedir(driver, &cfg->hugetlbfs[i]) < 0)
4081 return -1;
4083 if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm,
4084 path, build) < 0)
4085 return -1;
4089 if (!build || shouldBuildMB) {
4090 g_autofree char *path = NULL;
4091 if (qemuGetMemoryBackingDomainPath(driver, vm->def, &path) < 0)
4092 return -1;
4094 if (qemuProcessBuildDestroyMemoryPathsImpl(driver, vm,
4095 path, build) < 0)
4096 return -1;
4099 return 0;
4104 qemuProcessDestroyMemoryBackingPath(virQEMUDriver *driver,
4105 virDomainObj *vm,
4106 virDomainMemoryDef *mem)
4108 g_autofree char *path = NULL;
4110 if (qemuGetMemoryBackingPath(driver, vm->def, mem->info.alias, &path) < 0)
4111 return -1;
4113 if (unlink(path) < 0 &&
4114 errno != ENOENT) {
4115 virReportSystemError(errno, _("Unable to remove %1$s"), path);
4116 return -1;
4119 return 0;
4123 static int
4124 qemuProcessVNCAllocatePorts(virQEMUDriver *driver,
4125 virDomainGraphicsDef *graphics,
4126 bool allocate)
4128 unsigned short port;
4130 if (!allocate) {
4131 if (graphics->data.vnc.autoport)
4132 graphics->data.vnc.port = 5900;
4134 return 0;
4137 if (graphics->data.vnc.autoport) {
4138 if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
4139 return -1;
4140 graphics->data.vnc.port = port;
4141 graphics->data.vnc.portReserved = true;
4144 if (graphics->data.vnc.websocket == -1) {
4145 if (virPortAllocatorAcquire(driver->webSocketPorts, &port) < 0)
4146 return -1;
4147 graphics->data.vnc.websocket = port;
4148 graphics->data.vnc.websocketGenerated = true;
4149 graphics->data.vnc.websocketReserved = true;
4152 return 0;
4155 static int
4156 qemuProcessSPICEAllocatePorts(virQEMUDriver *driver,
4157 virDomainGraphicsDef *graphics,
4158 bool allocate)
4160 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4161 unsigned short port = 0;
4162 unsigned short tlsPort;
4163 size_t i;
4164 int defaultMode = graphics->data.spice.defaultMode;
4166 bool needTLSPort = false;
4167 bool needPort = false;
4169 if (graphics->data.spice.autoport) {
4170 /* check if tlsPort or port need allocation */
4171 for (i = 0; i < VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_LAST; i++) {
4172 switch (graphics->data.spice.channels[i]) {
4173 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
4174 needTLSPort = true;
4175 break;
4177 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
4178 needPort = true;
4179 break;
4181 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
4182 /* default mode will be used */
4183 break;
4186 switch (defaultMode) {
4187 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
4188 needTLSPort = true;
4189 break;
4191 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
4192 needPort = true;
4193 break;
4195 case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
4196 if (cfg->spiceTLS)
4197 needTLSPort = true;
4198 needPort = true;
4199 break;
4203 if (!allocate) {
4204 if (needPort || graphics->data.spice.port == -1)
4205 graphics->data.spice.port = 5901;
4207 if (needTLSPort || graphics->data.spice.tlsPort == -1)
4208 graphics->data.spice.tlsPort = 5902;
4210 return 0;
4213 if (needPort || graphics->data.spice.port == -1) {
4214 if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
4215 return -1;
4217 graphics->data.spice.port = port;
4218 graphics->data.spice.portReserved = true;
4221 if (needTLSPort || graphics->data.spice.tlsPort == -1) {
4222 if (!cfg->spiceTLS) {
4223 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4224 _("Auto allocation of spice TLS port requested but spice TLS is disabled in qemu.conf"));
4225 return -1;
4228 if (virPortAllocatorAcquire(driver->remotePorts, &tlsPort) < 0)
4229 return -1;
4231 graphics->data.spice.tlsPort = tlsPort;
4232 graphics->data.spice.tlsPortReserved = true;
4235 return 0;
4239 static int
4240 qemuProcessVerifyHypervFeatures(virDomainDef *def,
4241 virCPUData *cpu)
4243 size_t i;
4244 int rc;
4246 for (i = 0; i < VIR_DOMAIN_HYPERV_LAST; i++) {
4247 g_autofree char *cpuFeature = NULL;
4249 /* always supported string property */
4250 if (i == VIR_DOMAIN_HYPERV_VENDOR_ID ||
4251 i == VIR_DOMAIN_HYPERV_SPINLOCKS)
4252 continue;
4254 if (def->hyperv_features[i] != VIR_TRISTATE_SWITCH_ON)
4255 continue;
4257 cpuFeature = g_strdup_printf("hv-%s", virDomainHypervTypeToString(i));
4259 rc = virCPUDataCheckFeature(cpu, cpuFeature);
4261 if (rc < 0) {
4262 return -1;
4263 } else if (rc == 1) {
4264 if (i == VIR_DOMAIN_HYPERV_STIMER) {
4265 if (def->hyperv_stimer_direct != VIR_TRISTATE_SWITCH_ON)
4266 continue;
4268 rc = virCPUDataCheckFeature(cpu, VIR_CPU_x86_HV_STIMER_DIRECT);
4269 if (rc < 0)
4270 return -1;
4271 else if (rc == 1)
4272 continue;
4274 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
4275 _("host doesn't support hyperv stimer '%1$s' feature"),
4276 "direct");
4277 return -1;
4279 continue;
4282 switch ((virDomainHyperv) i) {
4283 case VIR_DOMAIN_HYPERV_RELAXED:
4284 case VIR_DOMAIN_HYPERV_VAPIC:
4285 VIR_WARN("host doesn't support hyperv '%s' feature",
4286 virDomainHypervTypeToString(i));
4287 break;
4289 case VIR_DOMAIN_HYPERV_VPINDEX:
4290 case VIR_DOMAIN_HYPERV_RUNTIME:
4291 case VIR_DOMAIN_HYPERV_SYNIC:
4292 case VIR_DOMAIN_HYPERV_STIMER:
4293 case VIR_DOMAIN_HYPERV_RESET:
4294 case VIR_DOMAIN_HYPERV_FREQUENCIES:
4295 case VIR_DOMAIN_HYPERV_REENLIGHTENMENT:
4296 case VIR_DOMAIN_HYPERV_TLBFLUSH:
4297 case VIR_DOMAIN_HYPERV_IPI:
4298 case VIR_DOMAIN_HYPERV_EVMCS:
4299 case VIR_DOMAIN_HYPERV_AVIC:
4300 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
4301 _("host doesn't support hyperv '%1$s' feature"),
4302 virDomainHypervTypeToString(i));
4303 return -1;
4305 case VIR_DOMAIN_HYPERV_SPINLOCKS:
4306 case VIR_DOMAIN_HYPERV_VENDOR_ID:
4307 case VIR_DOMAIN_HYPERV_LAST:
4308 break;
4312 return 0;
4316 static int
4317 qemuProcessVerifyKVMFeatures(virDomainDef *def,
4318 virCPUData *cpu)
4320 int rc = 0;
4322 if (def->features[VIR_DOMAIN_FEATURE_PVSPINLOCK] != VIR_TRISTATE_SWITCH_ON)
4323 return 0;
4325 rc = virCPUDataCheckFeature(cpu, VIR_CPU_x86_KVM_PV_UNHALT);
4327 if (rc <= 0) {
4328 if (rc == 0)
4329 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4330 _("host doesn't support paravirtual spinlocks"));
4331 return -1;
4334 return 0;
4338 static int
4339 qemuProcessVerifyCPUFeatures(virDomainDef *def,
4340 virCPUData *cpu)
4342 int rc;
4344 rc = virCPUCheckFeature(def->os.arch, def->cpu, "invtsc");
4346 if (rc < 0) {
4347 return -1;
4348 } else if (rc == 1) {
4349 rc = virCPUDataCheckFeature(cpu, "invtsc");
4350 if (rc <= 0) {
4351 if (rc == 0) {
4352 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4353 _("host doesn't support invariant TSC"));
4355 return -1;
4359 return 0;
4363 /* returns the QOM path to the first vcpu */
4364 static const char *
4365 qemuProcessGetVCPUQOMPath(virDomainObj *vm)
4367 virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, 0);
4368 qemuDomainVcpuPrivate *vcpupriv;
4370 if (vcpu &&
4371 (vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu)) &&
4372 vcpupriv->qomPath)
4373 return vcpupriv->qomPath;
4375 return "/machine/unattached/device[0]";
4379 static int
4380 qemuProcessFetchGuestCPU(virDomainObj *vm,
4381 virDomainAsyncJob asyncJob,
4382 virCPUData **enabled,
4383 virCPUData **disabled)
4385 qemuDomainObjPrivate *priv = vm->privateData;
4386 g_autoptr(virCPUData) dataEnabled = NULL;
4387 g_autoptr(virCPUData) dataDisabled = NULL;
4388 const char *cpuQOMPath = qemuProcessGetVCPUQOMPath(vm);
4389 bool generic;
4390 int rc;
4392 *enabled = NULL;
4393 *disabled = NULL;
4395 generic = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CPU_UNAVAILABLE_FEATURES);
4397 if (!generic && !ARCH_IS_X86(vm->def->os.arch))
4398 return 0;
4400 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
4401 return -1;
4403 if (generic) {
4404 rc = qemuMonitorGetGuestCPU(priv->mon,
4405 vm->def->os.arch,
4406 cpuQOMPath,
4407 virQEMUCapsCPUFeatureFromQEMU,
4408 &dataEnabled, &dataDisabled);
4409 } else {
4410 rc = qemuMonitorGetGuestCPUx86(priv->mon, cpuQOMPath, &dataEnabled, &dataDisabled);
4413 qemuDomainObjExitMonitor(vm);
4415 if (rc == -1)
4416 return -1;
4418 *enabled = g_steal_pointer(&dataEnabled);
4419 *disabled = g_steal_pointer(&dataDisabled);
4420 return 0;
4424 static int
4425 qemuProcessVerifyCPU(virDomainObj *vm,
4426 virCPUData *cpu)
4428 virDomainDef *def = vm->def;
4430 if (!cpu)
4431 return 0;
4433 if (qemuProcessVerifyKVMFeatures(def, cpu) < 0 ||
4434 qemuProcessVerifyHypervFeatures(def, cpu) < 0)
4435 return -1;
4437 if (!def->cpu ||
4438 (def->cpu->mode == VIR_CPU_MODE_CUSTOM &&
4439 !def->cpu->model))
4440 return 0;
4442 if (qemuProcessVerifyCPUFeatures(def, cpu) < 0)
4443 return -1;
4445 return 0;
4449 static int
4450 qemuProcessUpdateLiveGuestCPU(virDomainObj *vm,
4451 virCPUData *enabled,
4452 virCPUData *disabled)
4454 virDomainDef *def = vm->def;
4455 int rc;
4457 if (!enabled)
4458 return 0;
4460 if (!def->cpu ||
4461 (def->cpu->mode == VIR_CPU_MODE_CUSTOM &&
4462 !def->cpu->model))
4463 return 0;
4465 if ((rc = virCPUUpdateLive(def->os.arch, def->cpu, enabled, disabled)) < 0)
4466 return -1;
4468 if (rc == 0)
4469 def->cpu->check = VIR_CPU_CHECK_FULL;
4471 return 0;
4475 static int
4476 qemuProcessUpdateAndVerifyCPU(virDomainObj *vm,
4477 virDomainAsyncJob asyncJob)
4479 g_autoptr(virCPUData) cpu = NULL;
4480 g_autoptr(virCPUData) disabled = NULL;
4482 if (qemuProcessFetchGuestCPU(vm, asyncJob, &cpu, &disabled) < 0)
4483 return -1;
4485 if (qemuProcessVerifyCPU(vm, cpu) < 0)
4486 return -1;
4488 if (qemuProcessUpdateLiveGuestCPU(vm, cpu, disabled) < 0)
4489 return -1;
4491 return 0;
4495 static int
4496 qemuProcessFetchCPUDefinitions(virDomainObj *vm,
4497 virDomainAsyncJob asyncJob,
4498 virDomainCapsCPUModels **cpuModels)
4500 qemuDomainObjPrivate *priv = vm->privateData;
4501 g_autoptr(virDomainCapsCPUModels) models = NULL;
4502 int rc;
4504 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
4505 return -1;
4507 rc = virQEMUCapsFetchCPUModels(priv->mon, vm->def->os.arch, &models);
4509 qemuDomainObjExitMonitor(vm);
4510 if (rc < 0)
4511 return -1;
4513 *cpuModels = g_steal_pointer(&models);
4514 return 0;
4518 static int
4519 qemuProcessUpdateCPU(virDomainObj *vm,
4520 virDomainAsyncJob asyncJob)
4522 g_autoptr(virCPUData) cpu = NULL;
4523 g_autoptr(virCPUData) disabled = NULL;
4524 g_autoptr(virDomainCapsCPUModels) models = NULL;
4526 /* The host CPU model comes from host caps rather than QEMU caps so
4527 * fallback must be allowed no matter what the user specified in the XML.
4529 vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW;
4531 if (qemuProcessFetchGuestCPU(vm, asyncJob, &cpu, &disabled) < 0)
4532 return -1;
4534 if (qemuProcessUpdateLiveGuestCPU(vm, cpu, disabled) < 0)
4535 return -1;
4537 if (qemuProcessFetchCPUDefinitions(vm, asyncJob, &models) < 0 ||
4538 virCPUTranslate(vm->def->os.arch, vm->def->cpu, models) < 0)
4539 return -1;
4541 return 0;
4545 struct qemuPrepareNVRAMHelperData {
4546 int srcFD;
4547 const char *srcPath;
4550 static int
4551 qemuPrepareNVRAMHelper(int dstFD,
4552 const char *dstPath,
4553 const void *opaque)
4555 const struct qemuPrepareNVRAMHelperData *data = opaque;
4556 ssize_t r;
4558 do {
4559 char buf[1024];
4561 if ((r = saferead(data->srcFD, buf, sizeof(buf))) < 0) {
4562 virReportSystemError(errno,
4563 _("Unable to read from file '%1$s'"),
4564 data->srcPath);
4565 return -2;
4568 if (safewrite(dstFD, buf, r) < 0) {
4569 virReportSystemError(errno,
4570 _("Unable to write to file '%1$s'"),
4571 dstPath);
4572 return -1;
4574 } while (r);
4576 return 0;
4580 static int
4581 qemuPrepareNVRAM(virQEMUDriver *driver,
4582 virDomainObj *vm,
4583 bool reset_nvram)
4585 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
4586 VIR_AUTOCLOSE srcFD = -1;
4587 virDomainLoaderDef *loader = vm->def->os.loader;
4588 struct qemuPrepareNVRAMHelperData data;
4590 if (!loader || !loader->nvram)
4591 return 0;
4593 if (!virStorageSourceIsLocalStorage(loader->nvram)) {
4594 if (!reset_nvram) {
4595 return 0;
4596 } else {
4597 virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
4598 _("resetting of nvram is not supported with network backed nvram"));
4599 return -1;
4603 if (virFileExists(loader->nvram->path) && !reset_nvram)
4604 return 0;
4606 if (!loader->nvramTemplate) {
4607 virReportError(VIR_ERR_OPERATION_FAILED,
4608 _("unable to find any master var store for loader: %1$s"),
4609 loader->path);
4610 return -1;
4613 if ((srcFD = virFileOpenAs(loader->nvramTemplate, O_RDONLY,
4614 0, -1, -1, 0)) < 0) {
4615 virReportSystemError(-srcFD,
4616 _("Failed to open file '%1$s'"),
4617 loader->nvramTemplate);
4618 return -1;
4621 data.srcFD = srcFD;
4622 data.srcPath = loader->nvramTemplate;
4624 if (virFileRewrite(loader->nvram->path,
4625 S_IRUSR | S_IWUSR,
4626 cfg->user, cfg->group,
4627 qemuPrepareNVRAMHelper,
4628 &data) < 0) {
4629 return -1;
4632 return 0;
4636 static void
4637 qemuLogOperation(virDomainObj *vm,
4638 const char *msg,
4639 virCommand *cmd,
4640 qemuLogContext *logCtxt)
4642 g_autofree char *timestamp = NULL;
4643 qemuDomainObjPrivate *priv = vm->privateData;
4644 int qemuVersion = virQEMUCapsGetVersion(priv->qemuCaps);
4645 const char *package = virQEMUCapsGetPackage(priv->qemuCaps);
4646 g_autofree char *hostname = virGetHostname();
4647 struct utsname uts;
4649 uname(&uts);
4651 if ((timestamp = virTimeStringNow()) == NULL)
4652 return;
4654 if (qemuLogContextWrite(logCtxt,
4655 "%s: %s %s, qemu version: %d.%d.%d%s, kernel: %s, hostname: %s\n",
4656 timestamp, msg, VIR_LOG_VERSION_STRING,
4657 (qemuVersion / 1000000) % 1000,
4658 (qemuVersion / 1000) % 1000,
4659 qemuVersion % 1000,
4660 NULLSTR_EMPTY(package),
4661 uts.release,
4662 NULLSTR_EMPTY(hostname)) < 0)
4663 return;
4665 if (cmd) {
4666 g_autofree char *args = virCommandToString(cmd, true);
4667 qemuLogContextWrite(logCtxt, "%s\n", args);
4672 void
4673 qemuProcessIncomingDefFree(qemuProcessIncomingDef *inc)
4675 if (!inc)
4676 return;
4678 g_free(inc->address);
4679 g_free(inc->uri);
4680 g_free(inc);
4685 * This function does not copy @path, the caller is responsible for keeping
4686 * the @path pointer valid during the lifetime of the allocated
4687 * qemuProcessIncomingDef structure.
4689 * The caller is responsible for closing @fd, calling
4690 * qemuProcessIncomingDefFree will NOT close it.
4692 qemuProcessIncomingDef *
4693 qemuProcessIncomingDefNew(virQEMUCaps *qemuCaps,
4694 const char *listenAddress,
4695 const char *migrateFrom,
4696 int fd,
4697 const char *path)
4699 qemuProcessIncomingDef *inc = NULL;
4701 if (qemuMigrationDstCheckProtocol(qemuCaps, migrateFrom) < 0)
4702 return NULL;
4704 inc = g_new0(qemuProcessIncomingDef, 1);
4706 inc->address = g_strdup(listenAddress);
4708 inc->uri = qemuMigrationDstGetURI(migrateFrom, fd);
4709 if (!inc->uri)
4710 goto error;
4712 inc->fd = fd;
4713 inc->path = path;
4715 return inc;
4717 error:
4718 qemuProcessIncomingDefFree(inc);
4719 return NULL;
4724 * This function starts a new VIR_ASYNC_JOB_START async job. The user is
4725 * responsible for calling qemuProcessEndJob to stop this job and for passing
4726 * VIR_ASYNC_JOB_START as @asyncJob argument to any function requiring this
4727 * parameter between qemuProcessBeginJob and qemuProcessEndJob.
4730 qemuProcessBeginJob(virDomainObj *vm,
4731 virDomainJobOperation operation,
4732 unsigned int apiFlags)
4734 if (virDomainObjBeginAsyncJob(vm, VIR_ASYNC_JOB_START,
4735 operation, apiFlags) < 0)
4736 return -1;
4738 qemuDomainObjSetAsyncJobMask(vm, VIR_JOB_NONE);
4739 return 0;
4743 void
4744 qemuProcessEndJob(virDomainObj *vm)
4746 virDomainObjEndAsyncJob(vm);
4750 static int
4751 qemuProcessStartHook(virQEMUDriver *driver,
4752 virDomainObj *vm,
4753 virHookQemuOpType op,
4754 virHookSubopType subop)
4756 qemuDomainObjPrivate *priv = vm->privateData;
4757 g_autofree char *xml = NULL;
4758 int ret;
4760 if (!virHookPresent(VIR_HOOK_DRIVER_QEMU))
4761 return 0;
4763 if (!(xml = qemuDomainDefFormatXML(driver, priv->qemuCaps, vm->def, 0)))
4764 return -1;
4766 ret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, op, subop,
4767 NULL, xml, NULL);
4769 return ret;
4773 static int
4774 qemuProcessGraphicsReservePorts(virDomainGraphicsDef *graphics,
4775 bool reconnect)
4777 virDomainGraphicsListenDef *glisten;
4779 if (graphics->nListens <= 0)
4780 return 0;
4782 glisten = &graphics->listens[0];
4784 if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
4785 glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
4786 return 0;
4788 switch (graphics->type) {
4789 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4790 if (!graphics->data.vnc.autoport ||
4791 reconnect) {
4792 if (virPortAllocatorSetUsed(graphics->data.vnc.port) < 0)
4793 return -1;
4794 graphics->data.vnc.portReserved = true;
4796 if (graphics->data.vnc.websocket > 0) {
4797 if (virPortAllocatorSetUsed(graphics->data.vnc.websocket) < 0)
4798 return -1;
4799 graphics->data.vnc.websocketReserved = true;
4801 break;
4803 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4804 if (graphics->data.spice.autoport && !reconnect)
4805 return 0;
4807 if (graphics->data.spice.port > 0) {
4808 if (virPortAllocatorSetUsed(graphics->data.spice.port) < 0)
4809 return -1;
4810 graphics->data.spice.portReserved = true;
4813 if (graphics->data.spice.tlsPort > 0) {
4814 if (virPortAllocatorSetUsed(graphics->data.spice.tlsPort) < 0)
4815 return -1;
4816 graphics->data.spice.tlsPortReserved = true;
4818 break;
4820 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4821 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4822 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4823 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
4824 case VIR_DOMAIN_GRAPHICS_TYPE_DBUS:
4825 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
4826 break;
4829 return 0;
4833 static int
4834 qemuProcessGraphicsAllocatePorts(virQEMUDriver *driver,
4835 virDomainGraphicsDef *graphics,
4836 bool allocate)
4838 virDomainGraphicsListenDef *glisten;
4840 if (graphics->nListens <= 0)
4841 return 0;
4843 glisten = &graphics->listens[0];
4845 if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
4846 glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
4847 return 0;
4849 switch (graphics->type) {
4850 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4851 if (qemuProcessVNCAllocatePorts(driver, graphics, allocate) < 0)
4852 return -1;
4853 break;
4855 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4856 if (qemuProcessSPICEAllocatePorts(driver, graphics, allocate) < 0)
4857 return -1;
4858 break;
4860 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
4861 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
4862 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
4863 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
4864 case VIR_DOMAIN_GRAPHICS_TYPE_DBUS:
4865 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
4866 break;
4869 return 0;
4872 static int
4873 qemuProcessGetNetworkAddress(const char *netname,
4874 char **netaddr)
4876 g_autoptr(virConnect) conn = NULL;
4877 g_autoptr(virNetwork) net = NULL;
4878 g_autoptr(virNetworkDef) netdef = NULL;
4879 virNetworkIPDef *ipdef;
4880 virSocketAddr addr;
4881 virSocketAddr *addrptr = NULL;
4882 char *dev_name = NULL;
4883 g_autofree char *xml = NULL;
4885 *netaddr = NULL;
4887 if (!(conn = virGetConnectNetwork()))
4888 return -1;
4890 net = virNetworkLookupByName(conn, netname);
4891 if (!net)
4892 return -1;
4894 xml = virNetworkGetXMLDesc(net, 0);
4895 if (!xml)
4896 return -1;
4898 netdef = virNetworkDefParse(xml, NULL, NULL, false);
4899 if (!netdef)
4900 return -1;
4902 switch ((virNetworkForwardType) netdef->forward.type) {
4903 case VIR_NETWORK_FORWARD_NONE:
4904 case VIR_NETWORK_FORWARD_NAT:
4905 case VIR_NETWORK_FORWARD_ROUTE:
4906 case VIR_NETWORK_FORWARD_OPEN:
4907 ipdef = virNetworkDefGetIPByIndex(netdef, AF_UNSPEC, 0);
4908 if (!ipdef) {
4909 virReportError(VIR_ERR_INTERNAL_ERROR,
4910 _("network '%1$s' doesn't have an IP address"),
4911 netdef->name);
4912 return -1;
4914 addrptr = &ipdef->address;
4915 break;
4917 case VIR_NETWORK_FORWARD_BRIDGE:
4918 if ((dev_name = netdef->bridge))
4919 break;
4921 * fall through if netdef->bridge wasn't set, since that is
4922 * macvtap bridge mode network.
4924 G_GNUC_FALLTHROUGH;
4926 case VIR_NETWORK_FORWARD_PRIVATE:
4927 case VIR_NETWORK_FORWARD_VEPA:
4928 case VIR_NETWORK_FORWARD_PASSTHROUGH:
4929 if ((netdef->forward.nifs > 0) && netdef->forward.ifs)
4930 dev_name = netdef->forward.ifs[0].device.dev;
4932 if (!dev_name) {
4933 virReportError(VIR_ERR_INTERNAL_ERROR,
4934 _("network '%1$s' has no associated interface or bridge"),
4935 netdef->name);
4936 return -1;
4938 break;
4940 case VIR_NETWORK_FORWARD_HOSTDEV:
4941 break;
4943 case VIR_NETWORK_FORWARD_LAST:
4944 default:
4945 virReportEnumRangeError(virNetworkForwardType, netdef->forward.type);
4946 return -1;
4949 if (dev_name) {
4950 if (virNetDevIPAddrGet(dev_name, &addr) < 0)
4951 return -1;
4952 addrptr = &addr;
4955 if (!(addrptr &&
4956 (*netaddr = virSocketAddrFormat(addrptr)))) {
4957 return -1;
4960 return 0;
4964 static int
4965 qemuProcessGraphicsSetupNetworkAddress(virDomainGraphicsListenDef *glisten,
4966 const char *listenAddr)
4968 int rc;
4970 /* TODO: reject configuration without network specified for network listen */
4971 if (!glisten->network) {
4972 glisten->address = g_strdup(listenAddr);
4973 return 0;
4976 rc = qemuProcessGetNetworkAddress(glisten->network, &glisten->address);
4977 if (rc <= -2) {
4978 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
4979 _("network-based listen isn't possible, network driver isn't present"));
4980 return -1;
4982 if (rc < 0)
4983 return -1;
4985 return 0;
4989 static int
4990 qemuProcessGraphicsSetupDBus(virQEMUDriver *driver,
4991 virDomainGraphicsDef *graphics,
4992 virDomainObj *vm)
4994 if (graphics->type != VIR_DOMAIN_GRAPHICS_TYPE_DBUS)
4995 return 0;
4997 if (!graphics->data.dbus.p2p && !graphics->data.dbus.address) {
4998 graphics->data.dbus.address = qemuDBusGetAddress(driver, vm);
5001 return 0;
5005 static int
5006 qemuProcessGraphicsSetupListen(virQEMUDriver *driver,
5007 virDomainGraphicsDef *graphics,
5008 virDomainObj *vm)
5010 qemuDomainObjPrivate *priv = vm->privateData;
5011 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
5012 const char *type = virDomainGraphicsTypeToString(graphics->type);
5013 char *listenAddr = NULL;
5014 bool useSocket = false;
5015 size_t i;
5017 switch (graphics->type) {
5018 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
5019 useSocket = cfg->vncAutoUnixSocket;
5020 listenAddr = cfg->vncListen;
5021 break;
5023 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
5024 useSocket = cfg->spiceAutoUnixSocket;
5025 listenAddr = cfg->spiceListen;
5026 break;
5028 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
5029 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
5030 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
5031 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
5032 case VIR_DOMAIN_GRAPHICS_TYPE_DBUS:
5033 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
5034 break;
5037 for (i = 0; i < graphics->nListens; i++) {
5038 virDomainGraphicsListenDef *glisten = &graphics->listens[i];
5040 switch (glisten->type) {
5041 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS:
5042 if (!glisten->address) {
5043 /* If there is no address specified and qemu.conf has
5044 * *_auto_unix_socket set we should use unix socket as
5045 * default instead of tcp listen. */
5046 if (useSocket) {
5047 memset(glisten, 0, sizeof(*glisten));
5048 glisten->socket = g_strdup_printf("%s/%s.sock", priv->libDir,
5049 type);
5050 glisten->fromConfig = true;
5051 glisten->type = VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET;
5052 } else if (listenAddr) {
5053 glisten->address = g_strdup(listenAddr);
5054 glisten->fromConfig = true;
5057 break;
5059 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK:
5060 if (glisten->address || !listenAddr)
5061 continue;
5063 if (qemuProcessGraphicsSetupNetworkAddress(glisten,
5064 listenAddr) < 0)
5065 return -1;
5066 break;
5068 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET:
5069 if (!glisten->socket) {
5070 glisten->socket = g_strdup_printf("%s/%s.sock", priv->libDir,
5071 type);
5072 glisten->autoGenerated = true;
5074 break;
5076 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NONE:
5077 case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_LAST:
5078 break;
5082 return 0;
5086 static int
5087 qemuProcessGraphicsSetupRenderNode(virDomainGraphicsDef *graphics,
5088 virQEMUCaps *qemuCaps)
5090 char **rendernode = NULL;
5092 if (!virDomainGraphicsNeedsAutoRenderNode(graphics))
5093 return 0;
5095 /* Don't bother picking a DRM node if QEMU doesn't support it. */
5096 switch (graphics->type) {
5097 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
5098 if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPICE_RENDERNODE))
5099 return 0;
5101 rendernode = &graphics->data.spice.rendernode;
5102 break;
5103 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
5104 if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_EGL_HEADLESS_RENDERNODE))
5105 return 0;
5107 rendernode = &graphics->data.egl_headless.rendernode;
5108 break;
5109 case VIR_DOMAIN_GRAPHICS_TYPE_DBUS:
5110 rendernode = &graphics->data.dbus.rendernode;
5111 break;
5112 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
5113 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
5114 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
5115 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
5116 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
5117 virReportEnumRangeError(virDomainGraphicsType, graphics->type);
5118 break;
5121 if (!(*rendernode = virHostGetDRMRenderNode()))
5122 return -1;
5124 return 0;
5128 static int
5129 qemuProcessSetupGraphics(virQEMUDriver *driver,
5130 virDomainObj *vm,
5131 virQEMUCaps *qemuCaps,
5132 unsigned int flags)
5134 virDomainGraphicsDef *graphics;
5135 bool allocate = !(flags & VIR_QEMU_PROCESS_START_PRETEND);
5136 size_t i;
5138 for (i = 0; i < vm->def->ngraphics; i++) {
5139 graphics = vm->def->graphics[i];
5141 if (qemuProcessGraphicsSetupRenderNode(graphics, qemuCaps) < 0)
5142 return -1;
5144 if (qemuProcessGraphicsSetupListen(driver, graphics, vm) < 0)
5145 return -1;
5147 if (qemuProcessGraphicsSetupDBus(driver, graphics, vm) < 0)
5148 return -1;
5151 if (allocate) {
5152 for (i = 0; i < vm->def->ngraphics; i++) {
5153 graphics = vm->def->graphics[i];
5155 if (qemuProcessGraphicsReservePorts(graphics, false) < 0)
5156 return -1;
5160 for (i = 0; i < vm->def->ngraphics; ++i) {
5161 graphics = vm->def->graphics[i];
5163 if (qemuProcessGraphicsAllocatePorts(driver, graphics, allocate) < 0)
5164 return -1;
5167 return 0;
5171 static int
5172 qemuProcessSetupRawIO(virDomainObj *vm,
5173 virCommand *cmd G_GNUC_UNUSED)
5175 bool rawio = false;
5176 size_t i;
5177 int ret = -1;
5179 /* in case a certain disk is desirous of CAP_SYS_RAWIO, add this */
5180 for (i = 0; i < vm->def->ndisks; i++) {
5181 virDomainDiskDef *disk = vm->def->disks[i];
5183 if (disk->rawio == VIR_TRISTATE_BOOL_YES) {
5184 rawio = true;
5185 #ifndef CAP_SYS_RAWIO
5186 break;
5187 #endif
5191 /* If rawio not already set, check hostdevs as well */
5192 if (!rawio) {
5193 for (i = 0; i < vm->def->nhostdevs; i++) {
5194 virDomainHostdevSubsysSCSI *scsisrc;
5196 if (!virHostdevIsSCSIDevice(vm->def->hostdevs[i]))
5197 continue;
5199 scsisrc = &vm->def->hostdevs[i]->source.subsys.u.scsi;
5200 if (scsisrc->rawio == VIR_TRISTATE_BOOL_YES) {
5201 rawio = true;
5202 break;
5207 ret = 0;
5209 if (rawio) {
5210 #ifdef CAP_SYS_RAWIO
5211 virCommandAllowCap(cmd, CAP_SYS_RAWIO);
5212 #else
5213 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5214 _("Raw I/O is not supported on this platform"));
5215 ret = -1;
5216 #endif
5218 return ret;
5222 static int
5223 qemuProcessSetupBalloon(virDomainObj *vm,
5224 virDomainAsyncJob asyncJob)
5226 unsigned long long balloon = vm->def->mem.cur_balloon;
5227 qemuDomainObjPrivate *priv = vm->privateData;
5228 int ret = -1;
5230 if (!virDomainDefHasMemballoon(vm->def))
5231 return 0;
5233 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
5234 return -1;
5236 if (vm->def->memballoon->period)
5237 qemuMonitorSetMemoryStatsPeriod(priv->mon, vm->def->memballoon,
5238 vm->def->memballoon->period);
5239 if (qemuMonitorSetBalloon(priv->mon, balloon) < 0)
5240 goto cleanup;
5242 ret = 0;
5244 cleanup:
5245 qemuDomainObjExitMonitor(vm);
5246 return ret;
5250 static int
5251 qemuProcessMakeDir(virQEMUDriver *driver,
5252 virDomainObj *vm,
5253 const char *path)
5255 if (g_mkdir_with_parents(path, 0750) < 0) {
5256 virReportSystemError(errno, _("Cannot create directory '%1$s'"), path);
5257 return -1;
5260 if (qemuSecurityDomainSetPathLabel(driver, vm, path, true) < 0)
5261 return -1;
5263 return 0;
5267 static void
5268 qemuProcessStartWarnShmem(virDomainObj *vm)
5270 size_t i;
5271 bool check_shmem = false;
5272 bool shmem = vm->def->nshmems;
5275 * For vhost-user to work, the domain has to have some type of
5276 * shared memory configured. We're not the proper ones to judge
5277 * whether shared hugepages or shm are enough and will be in the
5278 * future, so we'll just warn in case neither is configured.
5279 * Moreover failing would give the false illusion that libvirt is
5280 * really checking that everything works before running the domain
5281 * and not only we are unable to do that, but it's also not our
5282 * aim to do so.
5284 for (i = 0; i < vm->def->nnets; i++) {
5285 if (virDomainNetGetActualType(vm->def->nets[i]) ==
5286 VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
5287 check_shmem = true;
5288 break;
5292 if (!check_shmem)
5293 return;
5296 * This check is by no means complete. We merely check
5297 * whether there are *some* hugepages enabled and *some* NUMA
5298 * nodes with shared memory access.
5300 if (!shmem && vm->def->mem.nhugepages) {
5301 for (i = 0; i < virDomainNumaGetNodeCount(vm->def->numa); i++) {
5302 if (virDomainNumaGetNodeMemoryAccessMode(vm->def->numa, i) ==
5303 VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
5304 shmem = true;
5305 break;
5310 if (!shmem) {
5311 VIR_WARN("Detected vhost-user interface without any shared memory, "
5312 "the interface might not be operational");
5317 static int
5318 qemuProcessStartValidateGraphics(virDomainObj *vm)
5320 size_t i;
5322 for (i = 0; i < vm->def->ngraphics; i++) {
5323 virDomainGraphicsDef *graphics = vm->def->graphics[i];
5325 switch (graphics->type) {
5326 case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
5327 case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
5328 if (graphics->nListens > 1) {
5329 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5330 _("QEMU does not support multiple listens for one graphics device."));
5331 return -1;
5333 break;
5335 case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
5336 case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
5337 case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
5338 case VIR_DOMAIN_GRAPHICS_TYPE_EGL_HEADLESS:
5339 case VIR_DOMAIN_GRAPHICS_TYPE_DBUS:
5340 case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
5341 break;
5345 return 0;
5349 static int
5350 qemuProcessStartValidateShmem(virDomainObj *vm)
5352 size_t i;
5354 for (i = 0; i < vm->def->nshmems; i++) {
5355 virDomainShmemDef *shmem = vm->def->shmems[i];
5357 if (strchr(shmem->name, '/')) {
5358 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5359 _("shmem name '%1$s' must not contain '/'"),
5360 shmem->name);
5361 return -1;
5365 return 0;
5369 static int
5370 qemuProcessStartValidateDisks(virDomainObj *vm,
5371 virQEMUCaps *qemuCaps)
5373 size_t i;
5375 for (i = 0; i < vm->def->ndisks; i++) {
5376 virDomainDiskDef *disk = vm->def->disks[i];
5377 virStorageSource *src = disk->src;
5379 /* This is a best effort check as we can only check if the command
5380 * option exists, but we cannot determine whether the running QEMU
5381 * was build with '--enable-vxhs'. */
5382 if (src->type == VIR_STORAGE_TYPE_NETWORK &&
5383 src->protocol == VIR_STORAGE_NET_PROTOCOL_VXHS &&
5384 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VXHS)) {
5385 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5386 _("VxHS protocol is not supported with this QEMU binary"));
5387 return -1;
5390 /* PowerPC pseries based VMs do not support floppy device */
5391 if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY &&
5392 qemuDomainIsPSeries(vm->def)) {
5393 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5394 _("PowerPC pseries machines do not support floppy device"));
5395 return -1;
5398 if (src->type == VIR_STORAGE_TYPE_NVME &&
5399 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DRIVE_NVME)) {
5400 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5401 _("NVMe disks are not supported with this QEMU binary"));
5402 return -1;
5406 return 0;
5410 /* 250 parts per million (ppm) is a half of NTP threshold */
5411 #define TSC_TOLERANCE 250
5413 static int
5414 qemuProcessStartValidateTSC(virQEMUDriver *driver,
5415 virDomainObj *vm)
5417 size_t i;
5418 unsigned long long freq = 0;
5419 unsigned long long tolerance;
5420 unsigned long long minFreq;
5421 unsigned long long maxFreq;
5422 virHostCPUTscInfo *tsc;
5423 g_autoptr(virCPUDef) cpu = NULL;
5425 for (i = 0; i < vm->def->clock.ntimers; i++) {
5426 virDomainTimerDef *timer = vm->def->clock.timers[i];
5428 if (timer->name == VIR_DOMAIN_TIMER_NAME_TSC &&
5429 timer->frequency > 0) {
5430 freq = timer->frequency;
5431 break;
5435 if (freq == 0)
5436 return 0;
5438 VIR_DEBUG("Requested TSC frequency %llu Hz", freq);
5440 cpu = virQEMUDriverGetHostCPU(driver);
5441 if (!cpu || !cpu->tsc) {
5442 VIR_DEBUG("Host TSC frequency could not be probed");
5443 return 0;
5446 tsc = cpu->tsc;
5447 tolerance = tsc->frequency * TSC_TOLERANCE / 1000000;
5448 minFreq = tsc->frequency - tolerance;
5449 maxFreq = tsc->frequency + tolerance;
5451 VIR_DEBUG("Host TSC frequency %llu Hz, scaling %s, tolerance +/- %llu Hz",
5452 tsc->frequency, virTristateBoolTypeToString(tsc->scaling),
5453 tolerance);
5455 if (freq >= minFreq && freq <= maxFreq) {
5456 VIR_DEBUG("Requested TSC frequency is within tolerance interval");
5457 return 0;
5460 if (tsc->scaling == VIR_TRISTATE_BOOL_YES)
5461 return 0;
5463 if (tsc->scaling == VIR_TRISTATE_BOOL_ABSENT) {
5464 VIR_DEBUG("Requested TSC frequency falls outside tolerance range and "
5465 "scaling support is unknown, QEMU will try and possibly "
5466 "fail later");
5467 return 0;
5470 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5471 _("Requested TSC frequency %1$llu Hz is outside tolerance range ([%2$llu, %3$llu] Hz) around host frequency %4$llu Hz and TSC scaling is not supported by the host CPU"),
5472 freq, minFreq, maxFreq, tsc->frequency);
5473 return -1;
5478 * qemuProcessStartValidate:
5479 * @vm: domain object
5480 * @qemuCaps: emulator capabilities
5481 * @migration: restoration of existing state
5483 * This function aggregates checks done prior to start of a VM.
5485 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
5486 * start the domain but create a valid qemu command. If some code shouldn't be
5487 * executed in this case, make sure to check this flag.
5489 static int
5490 qemuProcessStartValidate(virQEMUDriver *driver,
5491 virDomainObj *vm,
5492 virQEMUCaps *qemuCaps,
5493 unsigned int flags)
5495 if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
5496 if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
5497 VIR_DEBUG("Checking for KVM availability");
5498 if (!virFileExists("/dev/kvm")) {
5499 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5500 _("Domain requires KVM, but it is not available. Check that virtualization is enabled in the host BIOS, and host configuration is setup to load the kvm modules."));
5501 return -1;
5505 VIR_DEBUG("Checking domain and device security labels");
5506 if (qemuSecurityCheckAllLabel(driver->securityManager, vm->def) < 0)
5507 return -1;
5511 if (virDomainDefValidate(vm->def, 0, driver->xmlopt, qemuCaps) < 0)
5512 return -1;
5514 if (qemuProcessStartValidateGraphics(vm) < 0)
5515 return -1;
5517 if (qemuProcessStartValidateShmem(vm) < 0)
5518 return -1;
5520 if (vm->def->cpu) {
5521 if (virCPUValidateFeatures(vm->def->os.arch, vm->def->cpu) < 0)
5522 return -1;
5524 if (ARCH_IS_X86(vm->def->os.arch) &&
5525 !virQEMUCapsGet(qemuCaps, QEMU_CAPS_CPU_UNAVAILABLE_FEATURES)) {
5526 g_auto(GStrv) features = NULL;
5527 int n;
5529 if ((n = virCPUDefCheckFeatures(vm->def->cpu,
5530 virCPUx86FeatureFilterSelectMSR,
5531 NULL,
5532 &features)) < 0)
5533 return -1;
5535 if (n > 0) {
5536 g_autofree char *str = NULL;
5538 str = g_strjoinv(", ", features);
5539 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
5540 _("Some features cannot be reliably used with this QEMU: %1$s"), str);
5541 return -1;
5546 if (qemuProcessStartValidateDisks(vm, qemuCaps) < 0)
5547 return -1;
5549 if (qemuProcessStartValidateTSC(driver, vm) < 0)
5550 return -1;
5552 VIR_DEBUG("Checking for any possible (non-fatal) issues");
5554 qemuProcessStartWarnShmem(vm);
5556 return 0;
5560 static int
5561 qemuProcessStartUpdateCustomCaps(virDomainObj *vm)
5563 qemuDomainObjPrivate *priv = vm->privateData;
5564 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
5565 qemuDomainXmlNsDef *nsdef = vm->def->namespaceData;
5566 char **next;
5567 int tmp;
5569 if (cfg->capabilityfilters) {
5570 for (next = cfg->capabilityfilters; *next; next++) {
5571 if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5572 virReportError(VIR_ERR_INTERNAL_ERROR,
5573 _("invalid capability_filters capability '%1$s'"),
5574 *next);
5575 return -1;
5578 virQEMUCapsClear(priv->qemuCaps, tmp);
5582 if (nsdef) {
5583 for (next = nsdef->capsadd; next && *next; next++) {
5584 if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5585 virReportError(VIR_ERR_INTERNAL_ERROR,
5586 _("invalid qemu namespace capability '%1$s'"),
5587 *next);
5588 return -1;
5591 virQEMUCapsSet(priv->qemuCaps, tmp);
5594 for (next = nsdef->capsdel; next && *next; next++) {
5595 if ((tmp = virQEMUCapsTypeFromString(*next)) < 0) {
5596 virReportError(VIR_ERR_INTERNAL_ERROR,
5597 _("invalid qemu namespace capability '%1$s'"),
5598 *next);
5599 return -1;
5602 virQEMUCapsClear(priv->qemuCaps, tmp);
5606 return 0;
5611 * qemuProcessPrepareQEMUCaps:
5612 * @vm: domain object
5613 * @qemuCapsCache: cache of QEMU capabilities
5615 * Prepare the capabilities of a QEMU process for startup. This includes
5616 * copying the caps to a static cache and potential post-processing depending
5617 * on the configuration of the VM and startup process.
5619 * Returns 0 on success, -1 on error.
5621 static int
5622 qemuProcessPrepareQEMUCaps(virDomainObj *vm,
5623 virFileCache *qemuCapsCache)
5625 qemuDomainObjPrivate *priv = vm->privateData;
5627 virObjectUnref(priv->qemuCaps);
5628 if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(qemuCapsCache,
5629 vm->def->emulator)))
5630 return -1;
5632 /* Update qemu capabilities according to lists passed in via namespace */
5633 if (qemuProcessStartUpdateCustomCaps(vm) < 0)
5634 return -1;
5636 /* re-process capability lockouts since we might have removed capabilities */
5637 virQEMUCapsInitProcessCapsInterlock(priv->qemuCaps);
5639 return 0;
5644 * qemuProcessInit:
5646 * Prepares the domain up to the point when priv->qemuCaps is initialized. The
5647 * function calls qemuProcessStop when needed.
5649 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
5650 * start the domain but create a valid qemu command. If some code shouldn't be
5651 * executed in this case, make sure to check this flag.
5653 * Returns 0 on success, -1 on error.
5656 qemuProcessInit(virQEMUDriver *driver,
5657 virDomainObj *vm,
5658 virCPUDef *updatedCPU,
5659 virDomainAsyncJob asyncJob,
5660 bool migration,
5661 unsigned int flags)
5663 qemuDomainObjPrivate *priv = vm->privateData;
5664 int stopFlags;
5665 g_autoptr(virCPUDef) origCPU = NULL;
5667 VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
5668 vm, vm->def->name, vm->def->id, migration);
5670 VIR_DEBUG("Beginning VM startup process");
5672 if (virDomainObjIsActive(vm)) {
5673 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
5674 _("VM is already active"));
5675 return -1;
5678 /* in case when the post parse callback failed we need to re-run it on the
5679 * old config prior we start the VM */
5680 if (vm->def->postParseFailed) {
5681 VIR_DEBUG("re-running the post parse callback");
5683 /* we don't have the private copy of qemuCaps at this point */
5684 if (virDomainDefPostParse(vm->def, 0, driver->xmlopt, NULL) < 0)
5685 return -1;
5688 VIR_DEBUG("Determining emulator version");
5689 if (qemuProcessPrepareQEMUCaps(vm, driver->qemuCapsCache) < 0)
5690 return -1;
5692 qemuDomainUpdateCPU(vm, updatedCPU, &origCPU);
5694 if (qemuProcessStartValidate(driver, vm, priv->qemuCaps, flags) < 0)
5695 return -1;
5697 /* Do this upfront, so any part of the startup process can add
5698 * runtime state to vm->def that won't be persisted. This let's us
5699 * report implicit runtime defaults in the XML, like vnc listen/socket
5701 VIR_DEBUG("Setting current domain def as transient");
5702 if (virDomainObjSetDefTransient(driver->xmlopt, vm, priv->qemuCaps) < 0)
5703 return -1;
5705 if (flags & VIR_QEMU_PROCESS_START_PRETEND) {
5706 if (qemuDomainSetPrivatePaths(driver, vm) < 0) {
5707 virDomainObjRemoveTransientDef(vm);
5708 return -1;
5710 } else {
5711 vm->def->id = qemuDriverAllocateID(driver);
5712 qemuDomainSetFakeReboot(vm, false);
5713 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_STARTING_UP);
5715 if (g_atomic_int_add(&driver->nactive, 1) == 0 && driver->inhibitCallback)
5716 driver->inhibitCallback(true, driver->inhibitOpaque);
5718 /* Run an early hook to set-up missing devices */
5719 if (qemuProcessStartHook(driver, vm,
5720 VIR_HOOK_QEMU_OP_PREPARE,
5721 VIR_HOOK_SUBOP_BEGIN) < 0)
5722 goto stop;
5724 if (qemuDomainSetPrivatePaths(driver, vm) < 0)
5725 goto stop;
5727 priv->origCPU = g_steal_pointer(&origCPU);
5730 return 0;
5732 stop:
5733 stopFlags = VIR_QEMU_PROCESS_STOP_NO_RELABEL;
5734 if (migration)
5735 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
5736 qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
5737 return -1;
5741 static int
5742 qemuProcessPrepareDomainNetwork(virDomainObj *vm)
5744 virDomainDef *def = vm->def;
5745 qemuDomainObjPrivate *priv = vm->privateData;
5746 size_t i;
5747 g_autoptr(virConnect) conn = NULL;
5749 for (i = 0; i < def->nnets; i++) {
5750 virDomainNetDef *net = def->nets[i];
5751 virDomainNetType actualType;
5753 /* If appropriate, grab a physical device from the configured
5754 * network's pool of devices, or resolve bridge device name
5755 * to the one defined in the network definition.
5757 if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
5758 if (!conn && !(conn = virGetConnectNetwork()))
5759 return -1;
5760 if (virDomainNetAllocateActualDevice(conn, def, net) < 0)
5761 return -1;
5764 actualType = virDomainNetGetActualType(net);
5765 if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
5766 net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
5767 /* Each type='hostdev' network device must also have a
5768 * corresponding entry in the hostdevs array. For netdevs
5769 * that are hardcoded as type='hostdev', this is already
5770 * done by the parser, but for those allocated from a
5771 * network / determined at runtime, we need to do it
5772 * separately.
5774 virDomainHostdevDef *hostdev = virDomainNetGetActualHostdev(net);
5775 virDomainHostdevSubsysPCI *pcisrc = &hostdev->source.subsys.u.pci;
5777 if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
5778 virReportError(VIR_ERR_INTERNAL_ERROR,
5779 _("PCI device %1$04x:%2$02x:%3$02x.%4$x allocated from network %5$s is already in use by domain %6$s"),
5780 pcisrc->addr.domain, pcisrc->addr.bus,
5781 pcisrc->addr.slot, pcisrc->addr.function,
5782 net->data.network.name, def->name);
5783 return -1;
5786 /* For hostdev present in qemuProcessPrepareDomain() phase this was
5787 * done already, but this code runs after that, so we have to call
5788 * it ourselves. */
5789 if (qemuDomainPrepareHostdev(hostdev, priv) < 0)
5790 return -1;
5792 if (virDomainHostdevInsert(def, hostdev) < 0)
5793 return -1;
5796 return 0;
5800 static int
5801 qemuProcessPrepareHostNetwork(virDomainObj *vm)
5803 qemuDomainObjPrivate *priv = vm->privateData;
5804 size_t i;
5806 for (i = 0; i < vm->def->nnets; i++) {
5807 virDomainNetDef *net = vm->def->nets[i];
5808 virDomainNetType actualType = virDomainNetGetActualType(net);
5810 if (actualType == VIR_DOMAIN_NET_TYPE_USER &&
5811 net->backend.type == VIR_DOMAIN_NET_BACKEND_DEFAULT &&
5812 !priv->disableSlirp &&
5813 virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DBUS_VMSTATE)) {
5814 if (qemuInterfacePrepareSlirp(priv->driver, net) < 0)
5815 return -1;
5820 return 0;
5824 struct qemuProcessSetupVcpuSchedCoreHelperData {
5825 pid_t vcpupid;
5826 pid_t dummypid;
5829 static int
5830 qemuProcessSetupVcpuSchedCoreHelper(pid_t ppid G_GNUC_UNUSED,
5831 void *opaque)
5833 struct qemuProcessSetupVcpuSchedCoreHelperData *data = opaque;
5835 if (virProcessSchedCoreShareFrom(data->dummypid) < 0) {
5836 virReportSystemError(errno,
5837 _("unable to share scheduling cookie from %1$lld"),
5838 (long long) data->dummypid);
5839 return -1;
5842 if (virProcessSchedCoreShareTo(data->vcpupid) < 0) {
5843 virReportSystemError(errno,
5844 _("unable to share scheduling cookie to %1$lld"),
5845 (long long) data->vcpupid);
5846 return -1;
5849 return 0;
5854 * qemuProcessSetupVcpu:
5855 * @vm: domain object
5856 * @vcpuid: id of VCPU to set defaults
5857 * @schedCore: whether to set scheduling group
5859 * This function sets resource properties (cgroups, affinity, scheduler) for a
5860 * vCPU. This function expects that the vCPU is online and the vCPU pids were
5861 * correctly detected at the point when it's called.
5863 * Returns 0 on success, -1 on error.
5866 qemuProcessSetupVcpu(virDomainObj *vm,
5867 unsigned int vcpuid,
5868 bool schedCore)
5870 qemuDomainObjPrivate *priv = vm->privateData;
5871 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
5872 pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
5873 virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
5874 virDomainResctrlMonDef *mon = NULL;
5875 size_t i = 0;
5877 if (qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
5878 vcpuid, vcpu->cpumask,
5879 vm->def->cputune.period,
5880 vm->def->cputune.quota,
5881 &vcpu->sched) < 0)
5882 return -1;
5884 if (schedCore &&
5885 cfg->schedCore == QEMU_SCHED_CORE_VCPUS) {
5886 struct qemuProcessSetupVcpuSchedCoreHelperData data = { .vcpupid = vcpupid,
5887 .dummypid = -1 };
5889 for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
5890 pid_t temptid = qemuDomainGetVcpuPid(vm, i);
5892 if (temptid > 0) {
5893 data.dummypid = temptid;
5894 break;
5898 if (data.dummypid == -1) {
5899 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
5900 _("Unable to find a vCPU that is online"));
5901 return -1;
5904 if (virProcessRunInFork(qemuProcessSetupVcpuSchedCoreHelper, &data) < 0)
5905 return -1;
5908 for (i = 0; i < vm->def->nresctrls; i++) {
5909 size_t j = 0;
5910 virDomainResctrlDef *ct = vm->def->resctrls[i];
5912 if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
5913 if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
5914 return -1;
5916 for (j = 0; j < ct->nmonitors; j++) {
5917 mon = ct->monitors[j];
5919 if (virBitmapEqual(ct->vcpus, mon->vcpus) &&
5920 !virResctrlAllocIsEmpty(ct->alloc))
5921 continue;
5923 if (virBitmapIsBitSet(mon->vcpus, vcpuid)) {
5924 if (virResctrlMonitorAddPID(mon->instance, vcpupid) < 0)
5925 return -1;
5926 break;
5930 break;
5934 return 0;
5938 static int
5939 qemuProcessSetupAllVcpusSchedCoreHelper(pid_t ppid G_GNUC_UNUSED,
5940 void *opaque)
5942 virDomainObj *vm = opaque;
5943 size_t i;
5945 /* Since we are setting all vCPU threads at once and from a forked off
5946 * child, we don't need the dummy schedCoreChildPID and can create one on
5947 * our own. */
5948 if (virProcessSchedCoreCreate() < 0) {
5949 virReportSystemError(errno, "%s",
5950 _("Unable to set SCHED_CORE"));
5952 return -1;
5955 for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
5956 pid_t vcpupid = qemuDomainGetVcpuPid(vm, i);
5958 if (vcpupid > 0 &&
5959 virProcessSchedCoreShareTo(vcpupid) < 0) {
5960 virReportSystemError(errno,
5961 _("unable to share scheduling cookie to %1$lld"),
5962 (long long) vcpupid);
5963 return -1;
5967 return 0;
5971 static int
5972 qemuProcessSetupVcpus(virDomainObj *vm)
5974 qemuDomainObjPrivate *priv = vm->privateData;
5975 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
5976 virDomainVcpuDef *vcpu;
5977 unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
5978 size_t i;
5980 if ((vm->def->cputune.period || vm->def->cputune.quota) &&
5981 !virCgroupHasController(((qemuDomainObjPrivate *) vm->privateData)->cgroup,
5982 VIR_CGROUP_CONTROLLER_CPU)) {
5983 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
5984 _("cgroup cpu is required for scheduler tuning"));
5985 return -1;
5988 if (!qemuDomainHasVcpuPids(vm)) {
5989 /* If any CPU has custom affinity that differs from the
5990 * VM default affinity, we must reject it */
5991 for (i = 0; i < maxvcpus; i++) {
5992 vcpu = virDomainDefGetVcpu(vm->def, i);
5994 if (!vcpu->online)
5995 continue;
5997 if (vcpu->cpumask &&
5998 !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) {
5999 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
6000 _("cpu affinity is not supported"));
6001 return -1;
6005 return 0;
6008 for (i = 0; i < maxvcpus; i++) {
6009 vcpu = virDomainDefGetVcpu(vm->def, i);
6011 if (!vcpu->online)
6012 continue;
6014 if (qemuProcessSetupVcpu(vm, i, false) < 0)
6015 return -1;
6018 if (cfg->schedCore == QEMU_SCHED_CORE_VCPUS &&
6019 virProcessRunInFork(qemuProcessSetupAllVcpusSchedCoreHelper, vm) < 0)
6020 return -1;
6022 return 0;
6027 qemuProcessSetupIOThread(virDomainObj *vm,
6028 virDomainIOThreadIDDef *iothread)
6030 return qemuProcessSetupPid(vm, iothread->thread_id,
6031 VIR_CGROUP_THREAD_IOTHREAD,
6032 iothread->iothread_id,
6033 iothread->cpumask,
6034 vm->def->cputune.iothread_period,
6035 vm->def->cputune.iothread_quota,
6036 &iothread->sched);
6040 static int
6041 qemuProcessSetupIOThreads(virDomainObj *vm)
6043 size_t i;
6045 for (i = 0; i < vm->def->niothreadids; i++) {
6046 virDomainIOThreadIDDef *info = vm->def->iothreadids[i];
6048 if (qemuProcessSetupIOThread(vm, info) < 0)
6049 return -1;
6052 return 0;
6056 static int
6057 qemuProcessValidateHotpluggableVcpus(virDomainDef *def)
6059 virDomainVcpuDef *vcpu;
6060 virDomainVcpuDef *subvcpu;
6061 qemuDomainVcpuPrivate *vcpupriv;
6062 unsigned int maxvcpus = virDomainDefGetVcpusMax(def);
6063 size_t i = 0;
6064 size_t j;
6065 g_autoptr(virBitmap) ordermap = virBitmapNew(maxvcpus + 1);
6067 /* validate:
6068 * - all hotpluggable entities to be hotplugged have the correct data
6069 * - vcpus belonging to a hotpluggable entity share configuration
6070 * - order of the hotpluggable entities is unique
6072 for (i = 0; i < maxvcpus; i++) {
6073 vcpu = virDomainDefGetVcpu(def, i);
6074 vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);
6076 /* skip over hotpluggable entities */
6077 if (vcpupriv->vcpus == 0)
6078 continue;
6080 if (vcpu->order != 0) {
6081 if (virBitmapIsBitSet(ordermap, vcpu->order)) {
6082 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6083 _("duplicate vcpu order '%1$u'"), vcpu->order);
6084 return -1;
6087 if (virBitmapSetBit(ordermap, vcpu->order)) {
6088 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6089 _("vcpu order '%1$u' exceeds vcpu count"),
6090 vcpu->order);
6091 return -1;
6095 for (j = i + 1; j < (i + vcpupriv->vcpus); j++) {
6096 subvcpu = virDomainDefGetVcpu(def, j);
6097 if (subvcpu->hotpluggable != vcpu->hotpluggable ||
6098 subvcpu->online != vcpu->online ||
6099 subvcpu->order != vcpu->order) {
6100 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6101 _("vcpus '%1$zu' and '%2$zu' are in the same hotplug group but differ in configuration"),
6102 i, j);
6103 return -1;
6107 if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES) {
6108 if ((vcpupriv->socket_id == -1 && vcpupriv->core_id == -1 &&
6109 vcpupriv->thread_id == -1 && vcpupriv->node_id == -1) ||
6110 !vcpupriv->type) {
6111 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6112 _("vcpu '%1$zu' is missing hotplug data"), i);
6113 return -1;
6118 return 0;
6122 static int
6123 qemuDomainHasHotpluggableStartupVcpus(virDomainDef *def)
6125 size_t maxvcpus = virDomainDefGetVcpusMax(def);
6126 virDomainVcpuDef *vcpu;
6127 size_t i;
6129 for (i = 0; i < maxvcpus; i++) {
6130 vcpu = virDomainDefGetVcpu(def, i);
6132 if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES)
6133 return true;
6136 return false;
6140 static int
6141 qemuProcessVcpusSortOrder(const void *a,
6142 const void *b,
6143 void *opaque G_GNUC_UNUSED)
6145 virDomainVcpuDef *vcpua = *((virDomainVcpuDef **)a);
6146 virDomainVcpuDef *vcpub = *((virDomainVcpuDef **)b);
6148 return vcpua->order - vcpub->order;
6152 static int
6153 qemuProcessSetupHotpluggableVcpus(virDomainObj *vm,
6154 virDomainAsyncJob asyncJob)
6156 unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
6157 qemuDomainObjPrivate *priv = vm->privateData;
6158 virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
6159 virDomainVcpuDef *vcpu;
6160 qemuDomainVcpuPrivate *vcpupriv;
6161 size_t i;
6162 int ret = -1;
6163 int rc;
6165 g_autofree virDomainVcpuDef **bootHotplug = NULL;
6166 size_t nbootHotplug = 0;
6168 for (i = 0; i < maxvcpus; i++) {
6169 vcpu = virDomainDefGetVcpu(vm->def, i);
6170 vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);
6172 if (vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES && vcpu->online &&
6173 vcpupriv->vcpus != 0) {
6174 vcpupriv->alias = g_strdup_printf("vcpu%zu", i);
6176 VIR_APPEND_ELEMENT(bootHotplug, nbootHotplug, vcpu);
6180 if (nbootHotplug == 0)
6181 return 0;
6183 g_qsort_with_data(bootHotplug, nbootHotplug,
6184 sizeof(*bootHotplug), qemuProcessVcpusSortOrder, NULL);
6186 if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
6187 goto cleanup;
6189 for (i = 0; i < nbootHotplug; i++) {
6190 g_autoptr(virJSONValue) vcpuprops = NULL;
6191 vcpu = bootHotplug[i];
6193 if (!(vcpuprops = qemuBuildHotpluggableCPUProps(vcpu)))
6194 goto cleanup;
6196 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
6197 goto cleanup;
6199 rc = qemuMonitorAddDeviceProps(qemuDomainGetMonitor(vm), &vcpuprops);
6201 qemuDomainObjExitMonitor(vm);
6203 if (rc < 0)
6204 goto cleanup;
6207 ret = 0;
6209 cleanup:
6210 virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
6211 return ret;
6215 static bool
6216 qemuProcessDropUnknownCPUFeatures(const char *name,
6217 virCPUFeaturePolicy policy,
6218 void *opaque)
6220 const char **features = opaque;
6222 if (policy != VIR_CPU_FEATURE_DISABLE &&
6223 policy != VIR_CPU_FEATURE_FORBID)
6224 return true;
6226 if (g_strv_contains(features, name))
6227 return true;
6229 /* Features unknown to QEMU are implicitly disabled, we can just drop them
6230 * from the definition. */
6231 return false;
6235 static int
6236 qemuProcessUpdateGuestCPU(virDomainDef *def,
6237 virQEMUCaps *qemuCaps,
6238 virArch hostarch,
6239 unsigned int flags)
6241 if (!def->cpu)
6242 return 0;
6244 /* nothing to do if only topology part of CPU def is used */
6245 if (def->cpu->mode == VIR_CPU_MODE_CUSTOM && !def->cpu->model)
6246 return 0;
6248 /* Old libvirt added host CPU model to host-model CPUs for migrations,
6249 * while new libvirt just turns host-model into custom mode. We need
6250 * to fix the mode to maintain backward compatibility and to avoid
6251 * the CPU model to be replaced in virCPUUpdate.
6253 if (!(flags & VIR_QEMU_PROCESS_START_NEW) &&
6254 ARCH_IS_X86(def->os.arch) &&
6255 def->cpu->mode == VIR_CPU_MODE_HOST_MODEL &&
6256 def->cpu->model) {
6257 def->cpu->mode = VIR_CPU_MODE_CUSTOM;
6260 if (!virQEMUCapsIsCPUModeSupported(qemuCaps, hostarch, def->virtType,
6261 def->cpu->mode, def->os.machine)) {
6262 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6263 _("CPU mode '%1$s' for %2$s %3$s domain on %4$s host is not supported by hypervisor"),
6264 virCPUModeTypeToString(def->cpu->mode),
6265 virArchToString(def->os.arch),
6266 virDomainVirtTypeToString(def->virtType),
6267 virArchToString(hostarch));
6268 return -1;
6271 if (virCPUConvertLegacy(hostarch, def->cpu) < 0)
6272 return -1;
6274 if (def->cpu->check != VIR_CPU_CHECK_NONE) {
6275 virCPUDef *host;
6277 host = virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6278 VIR_QEMU_CAPS_HOST_CPU_FULL);
6280 if (host && virCPUCheckForbiddenFeatures(def->cpu, host) < 0)
6281 return -1;
6284 /* nothing to update for host-passthrough / maximum */
6285 if (def->cpu->mode != VIR_CPU_MODE_HOST_PASSTHROUGH &&
6286 def->cpu->mode != VIR_CPU_MODE_MAXIMUM) {
6287 g_autoptr(virDomainCapsCPUModels) cpuModels = NULL;
6288 virCPUFeaturePolicy removedPolicy = VIR_CPU_FEATURE_DISABLE;
6290 if (def->cpu->check == VIR_CPU_CHECK_PARTIAL &&
6291 !virQEMUCapsIsCPUUsable(qemuCaps, def->virtType, def->cpu) &&
6292 virCPUCompare(hostarch,
6293 virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6294 VIR_QEMU_CAPS_HOST_CPU_FULL),
6295 def->cpu, true) < 0)
6296 return -1;
6298 /* When starting a fresh domain we disable all features removed from
6299 * the specified CPU model to make sure they are only used if
6300 * explicitly requested. But when we are restoring a previously running
6301 * domain (migration, snapshot, ...) all removed features were already
6302 * explicitly listed in the CPU definition and if we found a removed
6303 * feature which is missing it must have been removed later and must be
6304 * enabled rather than disabled here match the state described by older
6305 * libvirt.
6307 if (!(flags & VIR_QEMU_PROCESS_START_NEW))
6308 removedPolicy = VIR_CPU_FEATURE_REQUIRE;
6310 if (virCPUUpdate(def->os.arch, def->cpu,
6311 virQEMUCapsGetHostModel(qemuCaps, def->virtType,
6312 VIR_QEMU_CAPS_HOST_CPU_MIGRATABLE),
6313 removedPolicy) < 0)
6314 return -1;
6316 cpuModels = virQEMUCapsGetCPUModels(qemuCaps, def->virtType, NULL, NULL);
6318 if (virCPUTranslate(def->os.arch, def->cpu, cpuModels) < 0)
6319 return -1;
6321 def->cpu->fallback = VIR_CPU_FALLBACK_FORBID;
6324 if (virCPUDefFilterFeatures(def->cpu, virQEMUCapsCPUFilterFeatures,
6325 &def->os.arch) < 0)
6326 return -1;
6328 if (ARCH_IS_X86(def->os.arch)) {
6329 g_auto(GStrv) features = NULL;
6331 if (virQEMUCapsGetCPUFeatures(qemuCaps, def->virtType, false, &features) < 0)
6332 return -1;
6334 if (features &&
6335 virCPUDefFilterFeatures(def->cpu, qemuProcessDropUnknownCPUFeatures,
6336 features) < 0)
6337 return -1;
6340 return 0;
6344 static int
6345 qemuProcessPrepareDomainNUMAPlacement(virDomainObj *vm)
6347 qemuDomainObjPrivate *priv = vm->privateData;
6348 g_autofree char *nodeset = NULL;
6349 g_autoptr(virBitmap) numadNodeset = NULL;
6350 g_autoptr(virBitmap) hostMemoryNodeset = NULL;
6351 g_autoptr(virCapsHostNUMA) caps = NULL;
6353 /* Get the advisory nodeset from numad if 'placement' of
6354 * either <vcpu> or <numatune> is 'auto'.
6356 if (!virDomainDefNeedsPlacementAdvice(vm->def))
6357 return 0;
6359 nodeset = virNumaGetAutoPlacementAdvice(virDomainDefGetVcpus(vm->def),
6360 virDomainDefGetMemoryTotal(vm->def));
6362 if (!nodeset)
6363 return -1;
6365 if (!(hostMemoryNodeset = virNumaGetHostMemoryNodeset()))
6366 return -1;
6368 VIR_DEBUG("Nodeset returned from numad: %s", nodeset);
6370 if (virBitmapParse(nodeset, &numadNodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
6371 return -1;
6373 if (!(caps = virCapabilitiesHostNUMANewHost()))
6374 return -1;
6376 /* numad may return a nodeset that only contains cpus but cgroups don't play
6377 * well with that. Set the autoCpuset from all cpus from that nodeset, but
6378 * assign autoNodeset only with nodes containing memory. */
6379 if (!(priv->autoCpuset = virCapabilitiesHostNUMAGetCpus(caps, numadNodeset)))
6380 return -1;
6382 virBitmapIntersect(numadNodeset, hostMemoryNodeset);
6384 priv->autoNodeset = g_steal_pointer(&numadNodeset);
6386 return 0;
6390 static void
6391 qemuProcessPrepareDeviceBootorder(virDomainDef *def)
6393 size_t i;
6394 unsigned int bootCD = 0;
6395 unsigned int bootFloppy = 0;
6396 unsigned int bootDisk = 0;
6397 unsigned int bootNetwork = 0;
6399 if (def->os.nBootDevs == 0)
6400 return;
6402 for (i = 0; i < def->os.nBootDevs; i++) {
6403 switch (def->os.bootDevs[i]) {
6404 case VIR_DOMAIN_BOOT_CDROM:
6405 bootCD = i + 1;
6406 break;
6408 case VIR_DOMAIN_BOOT_FLOPPY:
6409 bootFloppy = i + 1;
6410 break;
6412 case VIR_DOMAIN_BOOT_DISK:
6413 bootDisk = i + 1;
6414 break;
6416 case VIR_DOMAIN_BOOT_NET:
6417 bootNetwork = i + 1;
6418 break;
6420 case VIR_DOMAIN_BOOT_LAST:
6421 default:
6422 break;
6426 for (i = 0; i < def->ndisks; i++) {
6427 virDomainDiskDef *disk = def->disks[i];
6429 switch (disk->device) {
6430 case VIR_DOMAIN_DISK_DEVICE_CDROM:
6431 disk->info.effectiveBootIndex = bootCD;
6432 bootCD = 0;
6433 break;
6435 case VIR_DOMAIN_DISK_DEVICE_DISK:
6436 case VIR_DOMAIN_DISK_DEVICE_LUN:
6437 disk->info.effectiveBootIndex = bootDisk;
6438 bootDisk = 0;
6439 break;
6441 case VIR_DOMAIN_DISK_DEVICE_FLOPPY:
6442 disk->info.effectiveBootIndex = bootFloppy;
6443 bootFloppy = 0;
6444 break;
6446 case VIR_DOMAIN_DISK_DEVICE_LAST:
6447 default:
6448 break;
6452 if (def->nnets > 0 && bootNetwork > 0) {
6453 /* If network boot is enabled, the first network device gets enabled. If
6454 * that one is backed by a host device, then we need to find the first
6455 * corresponding host device */
6456 if (virDomainNetGetActualType(def->nets[0]) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
6457 for (i = 0; i < def->nhostdevs; i++) {
6458 virDomainHostdevDef *hostdev = def->hostdevs[i];
6459 virDomainHostdevSubsys *subsys = &hostdev->source.subsys;
6461 if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
6462 subsys->type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
6463 hostdev->info->type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED &&
6464 hostdev->parentnet) {
6465 hostdev->info->effectiveBootIndex = bootNetwork;
6466 break;
6469 } else {
6470 def->nets[0]->info.effectiveBootIndex = bootNetwork;
6476 static int
6477 qemuProcessPrepareDomainStorage(virQEMUDriver *driver,
6478 virDomainObj *vm,
6479 qemuDomainObjPrivate *priv,
6480 virQEMUDriverConfig *cfg,
6481 unsigned int flags)
6483 size_t i;
6484 bool cold_boot = flags & VIR_QEMU_PROCESS_START_COLD;
6486 for (i = vm->def->ndisks; i > 0; i--) {
6487 size_t idx = i - 1;
6488 virDomainDiskDef *disk = vm->def->disks[idx];
6490 if (virDomainDiskTranslateSourcePool(disk) < 0) {
6491 if (qemuDomainCheckDiskStartupPolicy(driver, vm, idx, cold_boot) < 0)
6492 return -1;
6494 /* disk source was dropped */
6495 continue;
6498 if (qemuDomainPrepareDiskSource(disk, priv, cfg) < 0)
6499 return -1;
6502 return 0;
6506 static int
6507 qemuProcessPrepareDomainHostdevs(virDomainObj *vm,
6508 qemuDomainObjPrivate *priv)
6510 size_t i;
6512 for (i = 0; i < vm->def->nhostdevs; i++) {
6513 virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
6515 if (qemuDomainPrepareHostdev(hostdev, priv) < 0)
6516 return -1;
6519 return 0;
6524 * qemuProcessRebootAllowed:
6525 * @def: domain definition
6527 * This function encapsulates the logic which dictated whether '-no-reboot' was
6528 * used instead of '-no-shutdown' which is used QEMU versions which don't
6529 * support the 'set-action' QMP command.
6531 bool
6532 qemuProcessRebootAllowed(const virDomainDef *def)
6534 return def->onReboot != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY ||
6535 def->onPoweroff != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY ||
6536 (def->onCrash != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY &&
6537 def->onCrash != VIR_DOMAIN_LIFECYCLE_ACTION_COREDUMP_DESTROY);
6541 static void
6542 qemuProcessPrepareAllowReboot(virDomainObj *vm)
6544 virDomainDef *def = vm->def;
6545 qemuDomainObjPrivate *priv = vm->privateData;
6547 /* with 'set-action' QMP command we don't need to keep this around as
6548 * we always update qemu with the proper state */
6549 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION))
6550 return;
6552 if (priv->allowReboot != VIR_TRISTATE_BOOL_ABSENT)
6553 return;
6555 priv->allowReboot = virTristateBoolFromBool(qemuProcessRebootAllowed(def));
6559 static int
6560 qemuProcessUpdateSEVInfo(virDomainObj *vm)
6562 qemuDomainObjPrivate *priv = vm->privateData;
6563 virQEMUCaps *qemuCaps = priv->qemuCaps;
6564 virDomainSEVDef *sev = &vm->def->sec->data.sev;
6565 virSEVCapability *sevCaps = NULL;
6567 /* if platform specific info like 'cbitpos' and 'reducedPhysBits' have
6568 * not been supplied, we need to autofill them from caps now as both are
6569 * mandatory on QEMU cmdline
6571 sevCaps = virQEMUCapsGetSEVCapabilities(qemuCaps);
6572 if (!sev->haveCbitpos) {
6573 sev->cbitpos = sevCaps->cbitpos;
6574 sev->haveCbitpos = true;
6577 if (!sev->haveReducedPhysBits) {
6578 sev->reduced_phys_bits = sevCaps->reduced_phys_bits;
6579 sev->haveReducedPhysBits = true;
6582 return 0;
6586 /* qemuProcessPrepareChardevSource:
6587 * @def: live domain definition
6588 * @cfg: driver configuration
6590 * Iterate through all devices that use virDomainChrSourceDef as backend.
6592 static int
6593 qemuProcessPrepareChardevSource(virDomainDef *def,
6594 virQEMUDriverConfig *cfg)
6596 struct qemuDomainPrepareChardevSourceData data = { .cfg = cfg };
6598 return qemuDomainDeviceBackendChardevForeach(def,
6599 qemuDomainPrepareChardevSourceOne,
6600 &data);
6605 * qemuProcessPrepareDomain:
6606 * @driver: qemu driver
6607 * @vm: domain object
6608 * @flags: qemuProcessStartFlags
6610 * This function groups all code that modifies only live XML of a domain which
6611 * is about to start and it's the only place to do those modifications.
6613 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
6614 * start the domain but create a valid qemu command. If some code shouldn't be
6615 * executed in this case, make sure to check this flag.
6617 * This function MUST be called before qemuProcessPrepareHost().
6619 * TODO: move all XML modification from qemuBuildCommandLine into this function
6622 qemuProcessPrepareDomain(virQEMUDriver *driver,
6623 virDomainObj *vm,
6624 unsigned int flags)
6626 size_t i;
6627 qemuDomainObjPrivate *priv = vm->privateData;
6628 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
6630 priv->machineName = qemuDomainGetMachineName(vm);
6631 if (!priv->machineName)
6632 return -1;
6634 if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
6635 /* If you are using a SecurityDriver with dynamic labelling,
6636 then generate a security label for isolation */
6637 VIR_DEBUG("Generating domain security label (if required)");
6638 if (qemuSecurityGenLabel(driver->securityManager, vm->def) < 0) {
6639 virDomainAuditSecurityLabel(vm, false);
6640 return -1;
6642 virDomainAuditSecurityLabel(vm, true);
6644 if (qemuProcessPrepareDomainNUMAPlacement(vm) < 0)
6645 return -1;
6648 /* Whether we should use virtlogd as stdio handler for character
6649 * devices source backend. */
6650 priv->chardevStdioLogd = cfg->stdioLogD;
6652 /* Track if this domain remembers original owner */
6653 priv->rememberOwner = cfg->rememberOwner;
6655 qemuProcessPrepareAllowReboot(vm);
6658 * Normally PCI addresses are assigned in the virDomainCreate
6659 * or virDomainDefine methods. We might still need to assign
6660 * some here to cope with the question of upgrades. Regardless
6661 * we also need to populate the PCI address set cache for later
6662 * use in hotplug
6664 VIR_DEBUG("Assigning domain PCI addresses");
6665 if ((qemuDomainAssignAddresses(vm->def, priv->qemuCaps, driver, vm,
6666 !!(flags & VIR_QEMU_PROCESS_START_NEW))) < 0) {
6667 return -1;
6670 if (qemuAssignDeviceAliases(vm->def) < 0)
6671 return -1;
6673 qemuProcessPrepareDeviceBootorder(vm->def);
6675 VIR_DEBUG("Setting graphics devices");
6676 if (qemuProcessSetupGraphics(driver, vm, priv->qemuCaps, flags) < 0)
6677 return -1;
6679 VIR_DEBUG("Create domain masterKey");
6680 if (qemuDomainMasterKeyCreate(vm) < 0)
6681 return -1;
6683 VIR_DEBUG("Setting up storage");
6684 if (qemuProcessPrepareDomainStorage(driver, vm, priv, cfg, flags) < 0)
6685 return -1;
6687 VIR_DEBUG("Setting up host devices");
6688 if (qemuProcessPrepareDomainHostdevs(vm, priv) < 0)
6689 return -1;
6691 VIR_DEBUG("Setting up network devices");
6692 if (qemuProcessPrepareDomainNetwork(vm) < 0)
6693 return -1;
6695 VIR_DEBUG("Prepare chardev source backends");
6696 if (qemuProcessPrepareChardevSource(vm->def, cfg) < 0)
6697 return -1;
6699 VIR_DEBUG("Prepare device secrets");
6700 if (qemuDomainSecretPrepare(driver, vm) < 0)
6701 return -1;
6703 VIR_DEBUG("Prepare bios/uefi paths");
6704 if (qemuFirmwareFillDomain(driver, vm->def, false) < 0)
6705 return -1;
6706 if (qemuDomainInitializePflashStorageSource(vm, cfg) < 0)
6707 return -1;
6709 VIR_DEBUG("Preparing external devices");
6710 if (qemuExtDevicesPrepareDomain(driver, vm) < 0)
6711 return -1;
6713 if (flags & VIR_QEMU_PROCESS_START_NEW) {
6714 VIR_DEBUG("Aligning guest memory");
6715 if (qemuDomainAlignMemorySizes(vm->def) < 0)
6716 return -1;
6719 for (i = 0; i < vm->def->nchannels; i++) {
6720 if (qemuDomainPrepareChannel(vm->def->channels[i],
6721 priv->channelTargetDir) < 0)
6722 return -1;
6725 if (!(priv->monConfig = virDomainChrSourceDefNew(driver->xmlopt)))
6726 return -1;
6728 VIR_DEBUG("Preparing monitor state");
6729 if (qemuProcessPrepareMonitorChr(priv->monConfig, priv->libDir) < 0)
6730 return -1;
6732 priv->monError = false;
6733 priv->monStart = 0;
6734 priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN;
6735 priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN;
6737 VIR_DEBUG("Updating guest CPU definition");
6738 if (qemuProcessUpdateGuestCPU(vm->def, priv->qemuCaps, driver->hostarch, flags) < 0)
6739 return -1;
6741 for (i = 0; i < vm->def->nshmems; i++)
6742 qemuDomainPrepareShmemChardev(vm->def->shmems[i]);
6744 if (vm->def->sec &&
6745 vm->def->sec->sectype == VIR_DOMAIN_LAUNCH_SECURITY_SEV) {
6746 VIR_DEBUG("Updating SEV platform info");
6747 if (qemuProcessUpdateSEVInfo(vm) < 0)
6748 return -1;
6751 return 0;
6755 static int
6756 qemuProcessSEVCreateFile(virDomainObj *vm,
6757 const char *name,
6758 const char *data)
6760 qemuDomainObjPrivate *priv = vm->privateData;
6761 virQEMUDriver *driver = priv->driver;
6762 g_autofree char *configFile = NULL;
6764 if (!(configFile = virFileBuildPath(priv->libDir, name, ".base64")))
6765 return -1;
6767 if (virFileRewriteStr(configFile, S_IRUSR | S_IWUSR, data) < 0) {
6768 virReportSystemError(errno, _("failed to write data to config '%1$s'"),
6769 configFile);
6770 return -1;
6773 if (qemuSecurityDomainSetPathLabel(driver, vm, configFile, true) < 0)
6774 return -1;
6776 return 0;
6780 static int
6781 qemuProcessPrepareSEVGuestInput(virDomainObj *vm)
6783 virDomainSEVDef *sev = &vm->def->sec->data.sev;
6785 VIR_DEBUG("Preparing SEV guest");
6787 if (sev->dh_cert) {
6788 if (qemuProcessSEVCreateFile(vm, "dh_cert", sev->dh_cert) < 0)
6789 return -1;
6792 if (sev->session) {
6793 if (qemuProcessSEVCreateFile(vm, "session", sev->session) < 0)
6794 return -1;
6797 return 0;
6801 static int
6802 qemuProcessPrepareLaunchSecurityGuestInput(virDomainObj *vm)
6804 virDomainSecDef *sec = vm->def->sec;
6806 if (!sec)
6807 return 0;
6809 switch ((virDomainLaunchSecurity) sec->sectype) {
6810 case VIR_DOMAIN_LAUNCH_SECURITY_SEV:
6811 return qemuProcessPrepareSEVGuestInput(vm);
6812 case VIR_DOMAIN_LAUNCH_SECURITY_PV:
6813 return 0;
6814 case VIR_DOMAIN_LAUNCH_SECURITY_NONE:
6815 case VIR_DOMAIN_LAUNCH_SECURITY_LAST:
6816 virReportEnumRangeError(virDomainLaunchSecurity, sec->sectype);
6817 return -1;
6820 return 0;
6824 static int
6825 qemuProcessPrepareHostStorageSourceVDPA(virStorageSource *src,
6826 qemuDomainObjPrivate *priv)
6828 qemuDomainStorageSourcePrivate *srcpriv = NULL;
6829 virStorageType actualType = virStorageSourceGetActualType(src);
6830 int vdpafd = -1;
6832 if (actualType != VIR_STORAGE_TYPE_VHOST_VDPA)
6833 return 0;
6835 if ((vdpafd = qemuVDPAConnect(src->vdpadev)) < 0)
6836 return -1;
6838 srcpriv = qemuDomainStorageSourcePrivateFetch(src);
6840 srcpriv->fdpass = qemuFDPassNew(qemuBlockStorageSourceGetStorageNodename(src), priv);
6841 qemuFDPassAddFD(srcpriv->fdpass, &vdpafd, "-vdpa");
6842 return 0;
6847 * See qemuProcessPrepareHostStorageSourceChain
6850 qemuProcessPrepareHostStorageSource(virDomainObj *vm,
6851 virStorageSource *src)
6853 /* connect to any necessary vdpa block devices */
6854 if (qemuProcessPrepareHostStorageSourceVDPA(src, vm->privateData) < 0)
6855 return -1;
6857 return 0;
6862 * qemuProcessPrepareHostStorageSourceChain:
6864 * @vm: domain object
6865 * @chain: source chain
6867 * Prepare the host side of a disk for use with the VM. Note that this function
6868 * accesses host resources.
6871 qemuProcessPrepareHostStorageSourceChain(virDomainObj *vm,
6872 virStorageSource *chain)
6874 virStorageSource *n;
6876 for (n = chain; virStorageSourceIsBacking(n); n = n->backingStore) {
6877 if (qemuProcessPrepareHostStorageSource(vm, n) < 0)
6878 return -1;
6881 return 0;
6886 * qemuProcessPrepareHostStorageDisk:
6888 * @vm: domain object
6889 * @disk: disk definition object
6891 * Prepare the host side of a disk for use with the VM. Note that this function
6892 * accesses host resources.
6894 * Note that this function does not call qemuDomainDetermineDiskChain as that is
6895 * needed in qemuProcessPrepareHostStorage to remove disks based on the startup
6896 * policy, thus other callers need to call it explicitly.
6899 qemuProcessPrepareHostStorageDisk(virDomainObj *vm,
6900 virDomainDiskDef *disk)
6902 if (qemuProcessPrepareHostStorageSourceChain(vm, disk->src) < 0)
6903 return -1;
6905 return 0;
6909 static int
6910 qemuProcessPrepareHostStorage(virQEMUDriver *driver,
6911 virDomainObj *vm,
6912 unsigned int flags)
6914 size_t i;
6915 bool cold_boot = flags & VIR_QEMU_PROCESS_START_COLD;
6917 for (i = vm->def->ndisks; i > 0; i--) {
6918 size_t idx = i - 1;
6919 virDomainDiskDef *disk = vm->def->disks[idx];
6921 if (virStorageSourceIsEmpty(disk->src))
6922 continue;
6924 /* backing chain needs to be redetected if we aren't using blockdev */
6925 if (qemuDiskBusIsSD(disk->bus))
6926 virStorageSourceBackingStoreClear(disk->src);
6929 * Go to applying startup policy for optional disk with nonexistent
6930 * source file immediately as determining chain will surely fail
6931 * and we don't want noisy error notice in logs for this case.
6933 if (qemuDomainDiskIsMissingLocalOptional(disk) && cold_boot)
6934 VIR_INFO("optional disk '%s' source file is missing, "
6935 "skip checking disk chain", disk->dst);
6936 else if (qemuDomainDetermineDiskChain(driver, vm, disk, NULL) >= 0)
6937 continue;
6939 if (qemuDomainCheckDiskStartupPolicy(driver, vm, idx, cold_boot) >= 0)
6940 continue;
6942 return -1;
6945 for (i = 0; i < vm->def->ndisks; i++) {
6946 virDomainDiskDef *disk = vm->def->disks[i];
6948 if (qemuProcessPrepareHostStorageDisk(vm, disk) < 0)
6949 return -1;
6952 return 0;
6957 qemuProcessOpenVhostVsock(virDomainVsockDef *vsock)
6959 qemuDomainVsockPrivate *priv = (qemuDomainVsockPrivate *)vsock->privateData;
6960 const char *vsock_path = "/dev/vhost-vsock";
6961 int fd;
6963 if ((fd = open(vsock_path, O_RDWR)) < 0) {
6964 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
6965 "%s", _("unable to open vhost-vsock device"));
6966 return -1;
6969 if (vsock->auto_cid == VIR_TRISTATE_BOOL_YES) {
6970 if (virVsockAcquireGuestCid(fd, &vsock->guest_cid) < 0)
6971 goto error;
6972 } else {
6973 if (virVsockSetGuestCid(fd, vsock->guest_cid) < 0)
6974 goto error;
6977 priv->vhostfd = fd;
6978 return 0;
6980 error:
6981 VIR_FORCE_CLOSE(fd);
6982 return -1;
6986 static int
6987 qemuProcessPrepareHostBackendChardevFileHelper(const char *path,
6988 virTristateSwitch append,
6989 int *fd,
6990 virLogManager *logManager,
6991 virSecurityManager *secManager,
6992 virQEMUDriverConfig *cfg,
6993 const virDomainDef *def)
6995 if (logManager) {
6996 int flags = 0;
6998 if (append == VIR_TRISTATE_SWITCH_ABSENT ||
6999 append == VIR_TRISTATE_SWITCH_OFF)
7000 flags |= VIR_LOG_MANAGER_PROTOCOL_DOMAIN_OPEN_LOG_FILE_TRUNCATE;
7002 if ((*fd = virLogManagerDomainOpenLogFile(logManager,
7003 "qemu",
7004 def->uuid,
7005 def->name,
7006 path,
7007 flags,
7008 NULL, NULL)) < 0)
7009 return -1;
7010 } else {
7011 int oflags = O_CREAT | O_WRONLY;
7013 switch (append) {
7014 case VIR_TRISTATE_SWITCH_ABSENT:
7015 case VIR_TRISTATE_SWITCH_OFF:
7016 oflags |= O_TRUNC;
7017 break;
7018 case VIR_TRISTATE_SWITCH_ON:
7019 oflags |= O_APPEND;
7020 break;
7021 case VIR_TRISTATE_SWITCH_LAST:
7022 break;
7025 if ((*fd = qemuDomainOpenFile(cfg, def, path, oflags, NULL)) < 0)
7026 return -1;
7028 if (qemuSecuritySetImageFDLabel(secManager, (virDomainDef*)def, *fd) < 0) {
7029 VIR_FORCE_CLOSE(*fd);
7030 return -1;
7034 return 0;
7038 struct qemuProcessPrepareHostBackendChardevData {
7039 qemuDomainObjPrivate *priv;
7040 virLogManager *logManager;
7041 virQEMUDriverConfig *cfg;
7042 virDomainDef *def;
7043 const char *fdprefix;
7047 static int
7048 qemuProcessPrepareHostBackendChardevOne(virDomainDeviceDef *dev,
7049 virDomainChrSourceDef *chardev,
7050 void *opaque)
7052 struct qemuProcessPrepareHostBackendChardevData *data = opaque;
7053 qemuDomainChrSourcePrivate *charpriv = QEMU_DOMAIN_CHR_SOURCE_PRIVATE(chardev);
7054 const char *devalias = NULL;
7056 /* this function is also called for the monitor backend which doesn't have
7057 * a 'dev' */
7058 if (dev) {
7059 virDomainDeviceInfo *info = virDomainDeviceGetInfo(dev);
7060 devalias = info->alias;
7062 /* vhost-user disk doesn't use FD passing */
7063 if (dev->type == VIR_DOMAIN_DEVICE_DISK)
7064 return 0;
7066 if (dev->type == VIR_DOMAIN_DEVICE_NET) {
7067 /* due to a historical bug in qemu we don't use FD passtrhough for
7068 * vhost-sockets for network devices */
7069 return 0;
7072 /* TPMs FD passing setup is special and handled separately */
7073 if (dev->type == VIR_DOMAIN_DEVICE_TPM)
7074 return 0;
7075 } else {
7076 devalias = data->fdprefix;
7079 switch ((virDomainChrType) chardev->type) {
7080 case VIR_DOMAIN_CHR_TYPE_NULL:
7081 case VIR_DOMAIN_CHR_TYPE_VC:
7082 case VIR_DOMAIN_CHR_TYPE_PTY:
7083 case VIR_DOMAIN_CHR_TYPE_DEV:
7084 case VIR_DOMAIN_CHR_TYPE_PIPE:
7085 case VIR_DOMAIN_CHR_TYPE_STDIO:
7086 case VIR_DOMAIN_CHR_TYPE_UDP:
7087 case VIR_DOMAIN_CHR_TYPE_TCP:
7088 case VIR_DOMAIN_CHR_TYPE_SPICEVMC:
7089 case VIR_DOMAIN_CHR_TYPE_SPICEPORT:
7090 case VIR_DOMAIN_CHR_TYPE_QEMU_VDAGENT:
7091 case VIR_DOMAIN_CHR_TYPE_DBUS:
7092 break;
7094 case VIR_DOMAIN_CHR_TYPE_FILE: {
7095 VIR_AUTOCLOSE sourcefd = -1;
7097 if (qemuProcessPrepareHostBackendChardevFileHelper(chardev->data.file.path,
7098 chardev->data.file.append,
7099 &sourcefd,
7100 data->logManager,
7101 data->priv->driver->securityManager,
7102 data->cfg,
7103 data->def) < 0)
7104 return -1;
7106 charpriv->sourcefd = qemuFDPassNew(devalias, data->priv);
7108 qemuFDPassAddFD(charpriv->sourcefd, &sourcefd, "-source");
7110 break;
7112 case VIR_DOMAIN_CHR_TYPE_UNIX:
7113 if (chardev->data.nix.listen) {
7114 g_autofree char *name = g_strdup_printf("%s-source", devalias);
7115 VIR_AUTOCLOSE sourcefd = -1;
7117 if (qemuSecuritySetSocketLabel(data->priv->driver->securityManager, data->def) < 0)
7118 return -1;
7120 sourcefd = qemuOpenChrChardevUNIXSocket(chardev);
7122 if (qemuSecurityClearSocketLabel(data->priv->driver->securityManager, data->def) < 0 ||
7123 sourcefd < 0)
7124 return -1;
7126 charpriv->directfd = qemuFDPassDirectNew(name, &sourcefd);
7128 break;
7130 case VIR_DOMAIN_CHR_TYPE_NMDM:
7131 case VIR_DOMAIN_CHR_TYPE_LAST:
7132 default:
7133 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
7134 _("unsupported chardev '%1$s'"),
7135 virDomainChrTypeToString(chardev->type));
7136 return -1;
7139 if (chardev->logfile) {
7140 VIR_AUTOCLOSE logfd = -1;
7142 if (qemuProcessPrepareHostBackendChardevFileHelper(chardev->logfile,
7143 chardev->logappend,
7144 &logfd,
7145 data->logManager,
7146 data->priv->driver->securityManager,
7147 data->cfg,
7148 data->def) < 0)
7149 return -1;
7151 charpriv->logfd = qemuFDPassNew(devalias, data->priv);
7153 qemuFDPassAddFD(charpriv->logfd, &logfd, "-log");
7156 return 0;
7160 /* prepare the chardev backends for various devices:
7161 * serial/parallel/channel chardevs, vhost-user disks, vhost-user network
7162 * interfaces, smartcards, shared memory, and redirdevs */
7163 static int
7164 qemuProcessPrepareHostBackendChardev(virDomainObj *vm)
7166 qemuDomainObjPrivate *priv = vm->privateData;
7167 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
7168 struct qemuProcessPrepareHostBackendChardevData data = {
7169 .priv = priv,
7170 .logManager = NULL,
7171 .cfg = cfg,
7172 .def = vm->def,
7174 g_autoptr(virLogManager) logManager = NULL;
7176 if (cfg->stdioLogD) {
7177 if (!(logManager = data.logManager = virLogManagerNew(priv->driver->privileged)))
7178 return -1;
7181 if (qemuDomainDeviceBackendChardevForeach(vm->def,
7182 qemuProcessPrepareHostBackendChardevOne,
7183 &data) < 0)
7184 return -1;
7186 data.fdprefix = "monitor";
7188 if (qemuProcessPrepareHostBackendChardevOne(NULL, priv->monConfig, &data) < 0)
7189 return -1;
7191 return 0;
7196 qemuProcessPrepareHostBackendChardevHotplug(virDomainObj *vm,
7197 virDomainDeviceDef *dev)
7199 qemuDomainObjPrivate *priv = vm->privateData;
7200 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
7201 struct qemuProcessPrepareHostBackendChardevData data = {
7202 .priv = priv,
7203 .logManager = NULL,
7204 .cfg = cfg,
7205 .def = vm->def,
7207 g_autoptr(virLogManager) logManager = NULL;
7209 if (cfg->stdioLogD) {
7210 if (!(logManager = data.logManager = virLogManagerNew(priv->driver->privileged)))
7211 return -1;
7214 if (qemuDomainDeviceBackendChardevForeachOne(dev,
7215 qemuProcessPrepareHostBackendChardevOne,
7216 &data) < 0)
7217 return -1;
7219 return 0;
7223 * qemuProcessPrepareHost:
7224 * @driver: qemu driver
7225 * @vm: domain object
7226 * @flags: qemuProcessStartFlags
7228 * This function groups all code that modifies host system (which also may
7229 * update live XML) to prepare environment for a domain which is about to start
7230 * and it's the only place to do those modifications.
7232 * This function MUST be called only after qemuProcessPrepareDomain().
7234 * TODO: move all host modification from qemuBuildCommandLine into this function
7237 qemuProcessPrepareHost(virQEMUDriver *driver,
7238 virDomainObj *vm,
7239 unsigned int flags)
7241 unsigned int hostdev_flags = 0;
7242 qemuDomainObjPrivate *priv = vm->privateData;
7243 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7246 * Create all per-domain directories in order to make sure domain
7247 * with any possible seclabels can access it.
7249 if (qemuProcessMakeDir(driver, vm, priv->libDir) < 0 ||
7250 qemuProcessMakeDir(driver, vm, priv->channelTargetDir) < 0)
7251 return -1;
7253 if (qemuPrepareNVRAM(driver, vm, !!(flags & VIR_QEMU_PROCESS_START_RESET_NVRAM)) < 0)
7254 return -1;
7256 if (vm->def->vsock) {
7257 if (qemuProcessOpenVhostVsock(vm->def->vsock) < 0)
7258 return -1;
7260 VIR_DEBUG("Preparing network devices");
7261 if (qemuProcessPrepareHostNetwork(vm) < 0)
7262 return -1;
7264 /* Must be run before security labelling */
7265 VIR_DEBUG("Preparing host devices");
7266 if (!cfg->relaxedACS)
7267 hostdev_flags |= VIR_HOSTDEV_STRICT_ACS_CHECK;
7268 if (flags & VIR_QEMU_PROCESS_START_NEW)
7269 hostdev_flags |= VIR_HOSTDEV_COLD_BOOT;
7270 if (qemuHostdevPrepareDomainDevices(driver, vm->def, hostdev_flags) < 0)
7271 return -1;
7273 VIR_DEBUG("Preparing chr device backends");
7274 if (qemuProcessPrepareHostBackendChardev(vm) < 0)
7275 return -1;
7277 if (qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, true) < 0)
7278 return -1;
7280 /* Ensure no historical cgroup for this VM is lying around bogus
7281 * settings */
7282 VIR_DEBUG("Ensuring no historical cgroup is lying around");
7283 virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName);
7285 if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
7286 virReportSystemError(errno,
7287 _("cannot create log directory %1$s"),
7288 cfg->logDir);
7289 return -1;
7292 VIR_FREE(priv->pidfile);
7293 if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, vm->def->name))) {
7294 virReportSystemError(errno,
7295 "%s", _("Failed to build pidfile path."));
7296 return -1;
7299 if (unlink(priv->pidfile) < 0 &&
7300 errno != ENOENT) {
7301 virReportSystemError(errno,
7302 _("Cannot remove stale PID file %1$s"),
7303 priv->pidfile);
7304 return -1;
7307 VIR_DEBUG("Write domain masterKey");
7308 if (qemuDomainWriteMasterKeyFile(driver, vm) < 0)
7309 return -1;
7311 VIR_DEBUG("Preparing disks (host)");
7312 if (qemuProcessPrepareHostStorage(driver, vm, flags) < 0)
7313 return -1;
7315 VIR_DEBUG("Preparing external devices");
7316 if (qemuExtDevicesPrepareHost(driver, vm) < 0)
7317 return -1;
7319 if (qemuProcessPrepareLaunchSecurityGuestInput(vm) < 0)
7320 return -1;
7322 return 0;
7327 * qemuProcessGenID:
7328 * @vm: Pointer to domain object
7329 * @flags: qemuProcessStartFlags
7331 * If this domain is requesting to use genid, then update the GUID
7332 * value if the VIR_QEMU_PROCESS_START_GEN_VMID flag is set. This
7333 * flag is set on specific paths during domain start processing when
7334 * there is the possibility that the VM is potentially re-executing
7335 * something that has already been executed before.
7337 static int
7338 qemuProcessGenID(virDomainObj *vm,
7339 unsigned int flags)
7341 if (!vm->def->genidRequested)
7342 return 0;
7344 /* If we are coming from a path where we must provide a new gen id
7345 * value regardless of whether it was previously generated or provided,
7346 * then generate a new GUID value before we build the command line. */
7347 if (flags & VIR_QEMU_PROCESS_START_GEN_VMID) {
7348 if (virUUIDGenerate(vm->def->genid) < 0) {
7349 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
7350 _("failed to regenerate genid"));
7351 return -1;
7355 return 0;
7360 * qemuProcessSetupDiskThrottling:
7362 * Sets up disk trottling for -blockdev via block_set_io_throttle monitor
7363 * command. This hack should be replaced by proper use of the 'throttle'
7364 * blockdev driver in qemu once it will support changing of the throttle group.
7365 * Same hack is done in qemuDomainAttachDiskGeneric.
7367 static int
7368 qemuProcessSetupDiskThrottling(virDomainObj *vm,
7369 virDomainAsyncJob asyncJob)
7371 size_t i;
7372 int ret = -1;
7374 VIR_DEBUG("Setting up disk throttling for -blockdev via block_set_io_throttle");
7376 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
7377 return -1;
7379 for (i = 0; i < vm->def->ndisks; i++) {
7380 virDomainDiskDef *disk = vm->def->disks[i];
7382 /* Setting throttling for empty drives fails */
7383 if (virStorageSourceIsEmpty(disk->src))
7384 continue;
7386 if (!qemuDiskConfigBlkdeviotuneEnabled(disk))
7387 continue;
7389 if (qemuMonitorSetBlockIoThrottle(qemuDomainGetMonitor(vm),
7390 QEMU_DOMAIN_DISK_PRIVATE(disk)->qomName,
7391 &disk->blkdeviotune) < 0)
7392 goto cleanup;
7395 ret = 0;
7397 cleanup:
7398 qemuDomainObjExitMonitor(vm);
7399 return ret;
7403 static int
7404 qemuProcessEnableDomainNamespaces(virQEMUDriver *driver,
7405 virDomainObj *vm)
7407 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
7408 const char *state = "disabled";
7410 if (virBitmapIsBitSet(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) &&
7411 qemuDomainEnableNamespace(vm, QEMU_DOMAIN_NS_MOUNT) < 0)
7412 return -1;
7414 if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT))
7415 state = "enabled";
7417 VIR_DEBUG("Mount namespace for domain name=%s is %s",
7418 vm->def->name, state);
7419 return 0;
7423 static int
7424 qemuProcessEnablePerf(virDomainObj *vm)
7426 qemuDomainObjPrivate *priv = vm->privateData;
7427 size_t i;
7429 if (!(priv->perf = virPerfNew()))
7430 return -1;
7432 for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
7433 if (vm->def->perf.events[i] == VIR_TRISTATE_BOOL_YES &&
7434 virPerfEventEnable(priv->perf, i, vm->pid) < 0)
7435 return -1;
7438 return 0;
7442 static int
7443 qemuProcessSetupDisksTransientSnapshot(virDomainObj *vm,
7444 virDomainAsyncJob asyncJob)
7446 g_autoptr(qemuSnapshotDiskContext) snapctxt = NULL;
7447 g_autoptr(GHashTable) blockNamedNodeData = NULL;
7448 size_t i;
7450 if (!(blockNamedNodeData = qemuBlockGetNamedNodeData(vm, asyncJob)))
7451 return -1;
7453 snapctxt = qemuSnapshotDiskContextNew(vm->def->ndisks, vm, asyncJob);
7455 for (i = 0; i < vm->def->ndisks; i++) {
7456 virDomainDiskDef *domdisk = vm->def->disks[i];
7457 g_autoptr(virDomainSnapshotDiskDef) snapdisk = NULL;
7459 if (!domdisk->transient ||
7460 domdisk->transientShareBacking == VIR_TRISTATE_BOOL_YES)
7461 continue;
7463 /* validation code makes sure that we do this only for local disks
7464 * with a file source */
7466 if (!(snapdisk = qemuSnapshotGetTransientDiskDef(domdisk, vm->def->name)))
7467 return -1;
7469 if (qemuSnapshotDiskPrepareOne(snapctxt, domdisk, snapdisk,
7470 blockNamedNodeData,
7471 false,
7472 false) < 0)
7473 return -1;
7476 if (qemuSnapshotDiskCreate(snapctxt) < 0)
7477 return -1;
7479 for (i = 0; i < vm->def->ndisks; i++) {
7480 virDomainDiskDef *domdisk = vm->def->disks[i];
7482 if (!domdisk->transient ||
7483 domdisk->transientShareBacking == VIR_TRISTATE_BOOL_YES)
7484 continue;
7486 QEMU_DOMAIN_DISK_PRIVATE(domdisk)->transientOverlayCreated = true;
7489 return 0;
7493 static int
7494 qemuProcessSetupDisksTransientHotplug(virDomainObj *vm,
7495 virDomainAsyncJob asyncJob)
7497 qemuDomainObjPrivate *priv = vm->privateData;
7498 bool hasHotpluggedDisk = false;
7499 size_t i;
7501 for (i = 0; i < vm->def->ndisks; i++) {
7502 virDomainDiskDef *domdisk = vm->def->disks[i];
7504 if (!domdisk->transient ||
7505 domdisk->transientShareBacking != VIR_TRISTATE_BOOL_YES)
7506 continue;
7508 if (qemuDomainAttachDiskGeneric(vm, domdisk, asyncJob) < 0)
7509 return -1;
7511 hasHotpluggedDisk = true;
7514 /* in order to allow booting from such disks we need to issue a system-reset
7515 * so that the firmware tables recording bootable devices are regerated */
7516 if (hasHotpluggedDisk) {
7517 int rc;
7519 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
7520 return -1;
7522 rc = qemuMonitorSystemReset(priv->mon);
7524 qemuDomainObjExitMonitor(vm);
7525 if (rc < 0)
7526 return -1;
7529 return 0;
7533 static int
7534 qemuProcessSetupDisksTransient(virDomainObj *vm,
7535 virDomainAsyncJob asyncJob)
7537 if (qemuProcessSetupDisksTransientSnapshot(vm, asyncJob) < 0)
7538 return -1;
7540 if (qemuProcessSetupDisksTransientHotplug(vm, asyncJob) < 0)
7541 return -1;
7543 return 0;
7547 static int
7548 qemuProcessSetupLifecycleActions(virDomainObj *vm,
7549 virDomainAsyncJob asyncJob)
7551 qemuDomainObjPrivate *priv = vm->privateData;
7552 int rc;
7554 if (!(virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION)))
7555 return 0;
7557 /* for now we handle only onReboot->destroy here as an alternative to
7558 * '-no-reboot' on the commandline */
7559 if (vm->def->onReboot != VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY)
7560 return 0;
7562 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
7563 return -1;
7565 rc = qemuMonitorSetAction(priv->mon,
7566 QEMU_MONITOR_ACTION_SHUTDOWN_KEEP,
7567 QEMU_MONITOR_ACTION_REBOOT_SHUTDOWN,
7568 QEMU_MONITOR_ACTION_WATCHDOG_KEEP,
7569 QEMU_MONITOR_ACTION_PANIC_KEEP);
7571 qemuDomainObjExitMonitor(vm);
7572 if (rc < 0)
7573 return -1;
7575 return 0;
7580 qemuProcessDeleteThreadContext(virDomainObj *vm)
7582 qemuDomainObjPrivate *priv = vm->privateData;
7583 GSList *next = priv->threadContextAliases;
7584 int ret = -1;
7586 if (!next)
7587 return 0;
7589 for (; next; next = next->next) {
7590 if (qemuMonitorDelObject(priv->mon, next->data, true) < 0)
7591 goto cleanup;
7594 ret = 0;
7595 cleanup:
7596 g_slist_free_full(g_steal_pointer(&priv->threadContextAliases), g_free);
7597 return ret;
7601 static int
7602 qemuProcessDeleteThreadContextHelper(virDomainObj *vm,
7603 virDomainAsyncJob asyncJob)
7605 qemuDomainObjPrivate *priv = vm->privateData;
7606 int ret = -1;
7608 if (!priv->threadContextAliases)
7609 return 0;
7611 VIR_DEBUG("Deleting thread context objects");
7612 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
7613 return -1;
7615 ret = qemuProcessDeleteThreadContext(vm);
7617 qemuDomainObjExitMonitor(vm);
7619 return ret;
7624 * qemuProcessLaunch:
7626 * Launch a new QEMU process with stopped virtual CPUs.
7628 * The caller is supposed to call qemuProcessStop with appropriate
7629 * flags in case of failure.
7631 * Returns 0 on success,
7632 * -1 on error which happened before devices were labeled and thus
7633 * there is no need to restore them,
7634 * -2 on error requesting security labels to be restored.
7637 qemuProcessLaunch(virConnectPtr conn,
7638 virQEMUDriver *driver,
7639 virDomainObj *vm,
7640 virDomainAsyncJob asyncJob,
7641 qemuProcessIncomingDef *incoming,
7642 virDomainMomentObj *snapshot,
7643 virNetDevVPortProfileOp vmop,
7644 unsigned int flags)
7646 int ret = -1;
7647 int rv;
7648 int logfile = -1;
7649 g_autoptr(qemuLogContext) logCtxt = NULL;
7650 qemuDomainObjPrivate *priv = vm->privateData;
7651 g_autoptr(virCommand) cmd = NULL;
7652 struct qemuProcessHookData hookData;
7653 g_autoptr(virQEMUDriverConfig) cfg = NULL;
7654 size_t nnicindexes = 0;
7655 g_autofree int *nicindexes = NULL;
7656 unsigned long long maxMemLock = 0;
7657 bool incomingMigrationExtDevices = false;
7659 VIR_DEBUG("conn=%p driver=%p vm=%p name=%s id=%d asyncJob=%d "
7660 "incoming.uri=%s "
7661 "incoming.fd=%d incoming.path=%s "
7662 "snapshot=%p vmop=%d flags=0x%x",
7663 conn, driver, vm, vm->def->name, vm->def->id, asyncJob,
7664 NULLSTR(incoming ? incoming->uri : NULL),
7665 incoming ? incoming->fd : -1,
7666 NULLSTR(incoming ? incoming->path : NULL),
7667 snapshot, vmop, flags);
7669 /* Okay, these are just internal flags,
7670 * but doesn't hurt to check */
7671 virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
7672 VIR_QEMU_PROCESS_START_PAUSED |
7673 VIR_QEMU_PROCESS_START_AUTODESTROY |
7674 VIR_QEMU_PROCESS_START_NEW |
7675 VIR_QEMU_PROCESS_START_GEN_VMID |
7676 VIR_QEMU_PROCESS_START_RESET_NVRAM, -1);
7678 cfg = virQEMUDriverGetConfig(driver);
7680 if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY) {
7681 if (!conn) {
7682 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
7683 _("Domain autodestroy requires a connection handle"));
7684 return -1;
7686 if (driver->embeddedRoot) {
7687 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
7688 _("Domain autodestroy not supported for embedded drivers yet"));
7689 return -1;
7693 hookData.vm = vm;
7694 hookData.driver = driver;
7695 /* We don't increase cfg's reference counter here. */
7696 hookData.cfg = cfg;
7698 VIR_DEBUG("Creating domain log file");
7699 if (!(logCtxt = qemuLogContextNew(driver, vm, vm->def->name))) {
7700 virLastErrorPrefixMessage("%s", _("can't connect to virtlogd"));
7701 goto cleanup;
7703 logfile = qemuLogContextGetWriteFD(logCtxt);
7705 if (qemuProcessGenID(vm, flags) < 0)
7706 goto cleanup;
7708 if (qemuDomainSchedCoreStart(cfg, vm) < 0)
7709 goto cleanup;
7711 /* For external devices the rules of incoming migration are a bit stricter,
7712 * than plain @incoming != NULL. They need to differentiate between
7713 * incoming migration and restore from a save file. */
7714 incomingMigrationExtDevices = incoming &&
7715 vmop == VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START;
7717 if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0)
7718 goto cleanup;
7720 if (!(cmd = qemuBuildCommandLine(vm,
7721 incoming ? "defer" : NULL,
7722 snapshot, vmop,
7723 &nnicindexes, &nicindexes)))
7724 goto cleanup;
7726 if (incoming && incoming->fd != -1)
7727 virCommandPassFD(cmd, incoming->fd, 0);
7729 /* now that we know it is about to start call the hook if present */
7730 if (qemuProcessStartHook(driver, vm,
7731 VIR_HOOK_QEMU_OP_START,
7732 VIR_HOOK_SUBOP_BEGIN) < 0)
7733 goto cleanup;
7735 qemuLogOperation(vm, "starting up", cmd, logCtxt);
7737 qemuDomainObjCheckTaint(driver, vm, logCtxt, incoming != NULL);
7739 qemuLogContextMarkPosition(logCtxt);
7741 if (qemuProcessEnableDomainNamespaces(driver, vm) < 0)
7742 goto cleanup;
7744 VIR_DEBUG("Setting up raw IO");
7745 if (qemuProcessSetupRawIO(vm, cmd) < 0)
7746 goto cleanup;
7748 virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData);
7749 virCommandSetUmask(cmd, 0x002);
7751 VIR_DEBUG("Setting up process limits");
7753 /* In some situations, eg. VFIO passthrough, QEMU might need to lock a
7754 * significant amount of memory, so we need to set the limit accordingly */
7755 maxMemLock = qemuDomainGetMemLockLimitBytes(vm->def);
7757 /* For all these settings, zero indicates that the limit should
7758 * not be set explicitly and the default/inherited limit should
7759 * be applied instead */
7760 if (maxMemLock > 0)
7761 virCommandSetMaxMemLock(cmd, maxMemLock);
7762 if (cfg->maxProcesses > 0)
7763 virCommandSetMaxProcesses(cmd, cfg->maxProcesses);
7764 if (cfg->maxFiles > 0)
7765 virCommandSetMaxFiles(cmd, cfg->maxFiles);
7766 if (cfg->schedCore == QEMU_SCHED_CORE_EMULATOR ||
7767 cfg->schedCore == QEMU_SCHED_CORE_FULL)
7768 virCommandSetRunAmong(cmd, priv->schedCoreChildPID);
7770 /* In this case, however, zero means that core dumps should be
7771 * disabled, and so we always need to set the limit explicitly */
7772 virCommandSetMaxCoreSize(cmd, cfg->maxCore);
7774 VIR_DEBUG("Setting up security labelling");
7775 if (qemuSecuritySetChildProcessLabel(driver->securityManager,
7776 vm->def, false, cmd) < 0)
7777 goto cleanup;
7779 virCommandSetOutputFD(cmd, &logfile);
7780 virCommandSetErrorFD(cmd, &logfile);
7781 virCommandNonblockingFDs(cmd);
7782 virCommandSetPidFile(cmd, priv->pidfile);
7783 virCommandDaemonize(cmd);
7784 virCommandRequireHandshake(cmd);
7786 if (qemuSecurityPreFork(driver->securityManager) < 0)
7787 goto cleanup;
7788 rv = virCommandRun(cmd, NULL);
7789 qemuSecurityPostFork(driver->securityManager);
7791 /* wait for qemu process to show up */
7792 if (rv == 0) {
7793 if ((rv = virPidFileReadPath(priv->pidfile, &vm->pid)) < 0) {
7794 virReportSystemError(-rv,
7795 _("Domain %1$s didn't show up"),
7796 vm->def->name);
7797 goto cleanup;
7799 VIR_DEBUG("QEMU vm=%p name=%s running with pid=%lld",
7800 vm, vm->def->name, (long long)vm->pid);
7801 } else {
7802 VIR_DEBUG("QEMU vm=%p name=%s failed to spawn",
7803 vm, vm->def->name);
7804 goto cleanup;
7807 VIR_DEBUG("Writing early domain status to disk");
7808 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
7809 goto cleanup;
7811 VIR_DEBUG("Waiting for handshake from child");
7812 if (virCommandHandshakeWait(cmd) < 0) {
7813 /* Read errors from child that occurred between fork and exec. */
7814 qemuProcessReportLogError(logCtxt,
7815 _("Process exited prior to exec"));
7816 goto cleanup;
7819 VIR_DEBUG("Building domain mount namespace (if required)");
7820 if (qemuDomainBuildNamespace(cfg, vm) < 0)
7821 goto cleanup;
7823 VIR_DEBUG("Setting up domain cgroup (if required)");
7824 if (qemuSetupCgroup(vm, nnicindexes, nicindexes) < 0)
7825 goto cleanup;
7827 VIR_DEBUG("Setting up domain perf (if required)");
7828 if (qemuProcessEnablePerf(vm) < 0)
7829 goto cleanup;
7831 /* This must be done after cgroup placement to avoid resetting CPU
7832 * affinity */
7833 if (qemuProcessInitCpuAffinity(vm) < 0)
7834 goto cleanup;
7836 VIR_DEBUG("Setting emulator tuning/settings");
7837 if (qemuProcessSetupEmulator(vm) < 0)
7838 goto cleanup;
7840 VIR_DEBUG("Setting cgroup for external devices (if required)");
7841 if (qemuSetupCgroupForExtDevices(vm, driver) < 0)
7842 goto cleanup;
7844 VIR_DEBUG("Setting up resctrl");
7845 if (qemuProcessResctrlCreate(driver, vm) < 0)
7846 goto cleanup;
7848 VIR_DEBUG("Setting up managed PR daemon");
7849 if (virDomainDefHasManagedPR(vm->def) &&
7850 qemuProcessStartManagedPRDaemon(vm) < 0)
7851 goto cleanup;
7853 VIR_DEBUG("Setting up permissions to allow post-copy migration");
7854 if (qemuProcessAllowPostCopyMigration(vm) < 0)
7855 goto cleanup;
7857 VIR_DEBUG("Setting domain security labels");
7858 if (qemuSecuritySetAllLabel(driver,
7860 incoming ? incoming->path : NULL,
7861 incoming != NULL) < 0)
7862 goto cleanup;
7864 /* Security manager labeled all devices, therefore
7865 * if any operation from now on fails, we need to ask the caller to
7866 * restore labels.
7868 ret = -2;
7870 if (incoming && incoming->fd != -1) {
7871 /* if there's an fd to migrate from, and it's a pipe, put the
7872 * proper security label on it
7874 struct stat stdin_sb;
7876 VIR_DEBUG("setting security label on pipe used for migration");
7878 if (fstat(incoming->fd, &stdin_sb) < 0) {
7879 virReportSystemError(errno,
7880 _("cannot stat fd %1$d"), incoming->fd);
7881 goto cleanup;
7883 if (S_ISFIFO(stdin_sb.st_mode) &&
7884 qemuSecuritySetImageFDLabel(driver->securityManager,
7885 vm->def, incoming->fd) < 0)
7886 goto cleanup;
7889 VIR_DEBUG("Labelling done, completing handshake to child");
7890 if (virCommandHandshakeNotify(cmd) < 0)
7891 goto cleanup;
7892 VIR_DEBUG("Handshake complete, child running");
7894 if (qemuDomainObjStartWorker(vm) < 0)
7895 goto cleanup;
7897 VIR_DEBUG("Waiting for monitor to show up");
7898 if (qemuProcessWaitForMonitor(driver, vm, asyncJob, logCtxt) < 0)
7899 goto cleanup;
7901 if (qemuConnectAgent(driver, vm) < 0)
7902 goto cleanup;
7904 VIR_DEBUG("setting up hotpluggable cpus");
7905 if (qemuDomainHasHotpluggableStartupVcpus(vm->def)) {
7906 if (qemuDomainRefreshVcpuInfo(vm, asyncJob, false) < 0)
7907 goto cleanup;
7909 if (qemuProcessValidateHotpluggableVcpus(vm->def) < 0)
7910 goto cleanup;
7912 if (qemuProcessSetupHotpluggableVcpus(vm, asyncJob) < 0)
7913 goto cleanup;
7916 VIR_DEBUG("Refreshing VCPU info");
7917 if (qemuDomainRefreshVcpuInfo(vm, asyncJob, false) < 0)
7918 goto cleanup;
7920 if (qemuDomainValidateVcpuInfo(vm) < 0)
7921 goto cleanup;
7923 qemuDomainVcpuPersistOrder(vm->def);
7925 VIR_DEBUG("Verifying and updating provided guest CPU");
7926 if (qemuProcessUpdateAndVerifyCPU(vm, asyncJob) < 0)
7927 goto cleanup;
7929 VIR_DEBUG("Detecting IOThread PIDs");
7930 if (qemuProcessDetectIOThreadPIDs(vm, asyncJob) < 0)
7931 goto cleanup;
7933 VIR_DEBUG("Setting global CPU cgroup (if required)");
7934 if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup) < 0)
7935 goto cleanup;
7937 VIR_DEBUG("Setting vCPU tuning/settings");
7938 if (qemuProcessSetupVcpus(vm) < 0)
7939 goto cleanup;
7941 VIR_DEBUG("Setting IOThread tuning/settings");
7942 if (qemuProcessSetupIOThreads(vm) < 0)
7943 goto cleanup;
7945 VIR_DEBUG("Setting emulator scheduler");
7946 if (vm->def->cputune.emulatorsched &&
7947 virProcessSetScheduler(vm->pid,
7948 vm->def->cputune.emulatorsched->policy,
7949 vm->def->cputune.emulatorsched->priority) < 0)
7950 goto cleanup;
7952 VIR_DEBUG("Setting any required VM passwords");
7953 if (qemuProcessInitPasswords(driver, vm, asyncJob) < 0)
7954 goto cleanup;
7956 /* set default link states */
7957 /* qemu doesn't support setting this on the command line, so
7958 * enter the monitor */
7959 VIR_DEBUG("Setting network link states");
7960 if (qemuProcessSetLinkStates(vm, asyncJob) < 0)
7961 goto cleanup;
7963 VIR_DEBUG("Setting initial memory amount");
7964 if (qemuProcessSetupBalloon(vm, asyncJob) < 0)
7965 goto cleanup;
7967 if (qemuProcessSetupDiskThrottling(vm, asyncJob) < 0)
7968 goto cleanup;
7970 /* Since CPUs were not started yet, the balloon could not return the memory
7971 * to the host and thus cur_balloon needs to be updated so that GetXMLdesc
7972 * and friends return the correct size in case they can't grab the job */
7973 if (!incoming && !snapshot &&
7974 qemuProcessRefreshBalloonState(vm, asyncJob) < 0)
7975 goto cleanup;
7977 if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY)
7978 virCloseCallbacksDomainAdd(vm, conn, qemuProcessAutoDestroy);
7980 if (!incoming && !snapshot) {
7981 VIR_DEBUG("Setting up transient disk");
7982 if (qemuProcessSetupDisksTransient(vm, asyncJob) < 0)
7983 goto cleanup;
7986 VIR_DEBUG("Setting handling of lifecycle actions");
7987 if (qemuProcessSetupLifecycleActions(vm, asyncJob) < 0)
7988 goto cleanup;
7990 if (qemuProcessDeleteThreadContextHelper(vm, asyncJob) < 0)
7991 goto cleanup;
7993 ret = 0;
7995 cleanup:
7996 qemuDomainSchedCoreStop(priv);
7997 qemuDomainStartupCleanup(vm);
7998 return ret;
8002 static int
8003 qemuProcessRefreshRxFilters(virDomainObj *vm,
8004 virDomainAsyncJob asyncJob)
8006 size_t i;
8008 for (i = 0; i < vm->def->nnets; i++) {
8009 virDomainNetDef *def = vm->def->nets[i];
8011 if (!virDomainNetGetActualTrustGuestRxFilters(def))
8012 continue;
8014 /* rx-filters are supported only for virtio model and TUN/TAP based
8015 * types. */
8016 if (def->model != VIR_DOMAIN_NET_MODEL_VIRTIO)
8017 continue;
8019 switch (virDomainNetGetActualType(def)) {
8020 case VIR_DOMAIN_NET_TYPE_ETHERNET:
8021 case VIR_DOMAIN_NET_TYPE_NETWORK:
8022 case VIR_DOMAIN_NET_TYPE_BRIDGE:
8023 case VIR_DOMAIN_NET_TYPE_DIRECT:
8024 break;
8025 case VIR_DOMAIN_NET_TYPE_USER:
8026 case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
8027 case VIR_DOMAIN_NET_TYPE_SERVER:
8028 case VIR_DOMAIN_NET_TYPE_CLIENT:
8029 case VIR_DOMAIN_NET_TYPE_MCAST:
8030 case VIR_DOMAIN_NET_TYPE_INTERNAL:
8031 case VIR_DOMAIN_NET_TYPE_HOSTDEV:
8032 case VIR_DOMAIN_NET_TYPE_UDP:
8033 case VIR_DOMAIN_NET_TYPE_VDPA:
8034 case VIR_DOMAIN_NET_TYPE_NULL:
8035 case VIR_DOMAIN_NET_TYPE_VDS:
8036 case VIR_DOMAIN_NET_TYPE_LAST:
8037 default:
8038 continue;
8041 if (qemuDomainSyncRxFilter(vm, def, asyncJob) < 0)
8042 return -1;
8045 return 0;
8050 * qemuProcessRefreshState:
8051 * @driver: qemu driver data
8052 * @vm: domain to refresh
8053 * @asyncJob: async job type
8055 * This function gathers calls to refresh qemu state after startup. This
8056 * function is called after a deferred migration finishes so that we can update
8057 * state influenced by the migration stream.
8060 qemuProcessRefreshState(virQEMUDriver *driver,
8061 virDomainObj *vm,
8062 virDomainAsyncJob asyncJob)
8064 VIR_DEBUG("Fetching list of active devices");
8065 if (qemuDomainUpdateDeviceList(vm, asyncJob) < 0)
8066 return -1;
8068 VIR_DEBUG("Updating info of memory devices");
8069 if (qemuDomainUpdateMemoryDeviceInfo(vm, asyncJob) < 0)
8070 return -1;
8072 VIR_DEBUG("Detecting actual memory size for video device");
8073 if (qemuProcessUpdateVideoRamSize(driver, vm, asyncJob) < 0)
8074 return -1;
8076 VIR_DEBUG("Updating disk data");
8077 if (qemuProcessRefreshDisks(vm, asyncJob) < 0)
8078 return -1;
8080 VIR_DEBUG("Updating rx-filter data");
8081 if (qemuProcessRefreshRxFilters(vm, asyncJob) < 0)
8082 return -1;
8084 return 0;
8089 * qemuProcessFinishStartup:
8091 * Finish starting a new domain.
8094 qemuProcessFinishStartup(virQEMUDriver *driver,
8095 virDomainObj *vm,
8096 virDomainAsyncJob asyncJob,
8097 bool startCPUs,
8098 virDomainPausedReason pausedReason)
8100 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
8102 if (startCPUs) {
8103 VIR_DEBUG("Starting domain CPUs");
8104 if (qemuProcessStartCPUs(driver, vm,
8105 VIR_DOMAIN_RUNNING_BOOTED,
8106 asyncJob) < 0) {
8107 if (virGetLastErrorCode() == VIR_ERR_OK)
8108 virReportError(VIR_ERR_OPERATION_FAILED, "%s",
8109 _("resume operation failed"));
8110 return -1;
8112 } else {
8113 virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, pausedReason);
8116 VIR_DEBUG("Writing domain status to disk");
8117 if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0)
8118 return -1;
8120 if (qemuProcessStartHook(driver, vm,
8121 VIR_HOOK_QEMU_OP_STARTED,
8122 VIR_HOOK_SUBOP_BEGIN) < 0)
8123 return -1;
8125 return 0;
8130 qemuProcessStart(virConnectPtr conn,
8131 virQEMUDriver *driver,
8132 virDomainObj *vm,
8133 virCPUDef *updatedCPU,
8134 virDomainAsyncJob asyncJob,
8135 const char *migrateFrom,
8136 int migrateFd,
8137 const char *migratePath,
8138 virDomainMomentObj *snapshot,
8139 virNetDevVPortProfileOp vmop,
8140 unsigned int flags)
8142 qemuDomainObjPrivate *priv = vm->privateData;
8143 qemuProcessIncomingDef *incoming = NULL;
8144 unsigned int stopFlags;
8145 bool relabel = false;
8146 bool relabelSavedState = false;
8147 int ret = -1;
8148 int rv;
8150 VIR_DEBUG("conn=%p driver=%p vm=%p name=%s id=%d asyncJob=%s "
8151 "migrateFrom=%s migrateFd=%d migratePath=%s "
8152 "snapshot=%p vmop=%d flags=0x%x",
8153 conn, driver, vm, vm->def->name, vm->def->id,
8154 virDomainAsyncJobTypeToString(asyncJob),
8155 NULLSTR(migrateFrom), migrateFd, NULLSTR(migratePath),
8156 snapshot, vmop, flags);
8158 virCheckFlagsGoto(VIR_QEMU_PROCESS_START_COLD |
8159 VIR_QEMU_PROCESS_START_PAUSED |
8160 VIR_QEMU_PROCESS_START_AUTODESTROY |
8161 VIR_QEMU_PROCESS_START_GEN_VMID |
8162 VIR_QEMU_PROCESS_START_RESET_NVRAM, cleanup);
8164 if (!migrateFrom && !snapshot)
8165 flags |= VIR_QEMU_PROCESS_START_NEW;
8167 if (qemuProcessInit(driver, vm, updatedCPU,
8168 asyncJob, !!migrateFrom, flags) < 0)
8169 goto cleanup;
8171 if (migrateFrom) {
8172 incoming = qemuProcessIncomingDefNew(priv->qemuCaps, NULL, migrateFrom,
8173 migrateFd, migratePath);
8174 if (!incoming)
8175 goto stop;
8178 if (qemuProcessPrepareDomain(driver, vm, flags) < 0)
8179 goto stop;
8181 if (qemuProcessPrepareHost(driver, vm, flags) < 0)
8182 goto stop;
8184 if (migratePath) {
8185 if (qemuSecuritySetSavedStateLabel(driver->securityManager,
8186 vm->def, migratePath) < 0)
8187 goto cleanup;
8188 relabelSavedState = true;
8191 if ((rv = qemuProcessLaunch(conn, driver, vm, asyncJob, incoming,
8192 snapshot, vmop, flags)) < 0) {
8193 if (rv == -2)
8194 relabel = true;
8195 goto stop;
8197 relabel = true;
8199 if (incoming) {
8200 if (qemuMigrationDstRun(vm, incoming->uri, asyncJob) < 0)
8201 goto stop;
8202 } else {
8203 /* Refresh state of devices from QEMU. During migration this happens
8204 * in qemuMigrationDstFinish to ensure that state information is fully
8205 * transferred. */
8206 if (qemuProcessRefreshState(driver, vm, asyncJob) < 0)
8207 goto stop;
8210 if (qemuProcessFinishStartup(driver, vm, asyncJob,
8211 !(flags & VIR_QEMU_PROCESS_START_PAUSED),
8212 incoming ?
8213 VIR_DOMAIN_PAUSED_MIGRATION :
8214 VIR_DOMAIN_PAUSED_USER) < 0)
8215 goto stop;
8217 if (!incoming) {
8218 /* Keep watching qemu log for errors during incoming migration, otherwise
8219 * unset reporting errors from qemu log. */
8220 qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
8223 ret = 0;
8225 cleanup:
8226 if (relabelSavedState &&
8227 qemuSecurityRestoreSavedStateLabel(driver->securityManager,
8228 vm->def, migratePath) < 0)
8229 VIR_WARN("failed to restore save state label on %s", migratePath);
8230 qemuProcessIncomingDefFree(incoming);
8231 return ret;
8233 stop:
8234 stopFlags = 0;
8235 if (!relabel)
8236 stopFlags |= VIR_QEMU_PROCESS_STOP_NO_RELABEL;
8237 if (migrateFrom)
8238 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
8239 if (priv->mon)
8240 qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
8241 qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
8242 goto cleanup;
8247 * qemuProcessStartWithMemoryState:
8248 * @conn: connection object
8249 * @driver: qemu driver object
8250 * @vm: domain object
8251 * @fd: FD pointer of memory state file
8252 * @path: path to memory state file
8253 * @snapshot: internal snapshot to load when starting QEMU process or NULL
8254 * @data: data from memory state file or NULL
8255 * @asyncJob: type of asynchronous job
8256 * @start_flags: flags to start QEMU process with
8257 * @reason: audit log reason
8258 * @started: boolean to store if QEMU process was started
8260 * Start VM with existing memory state. Make sure that the stored memory state
8261 * is correctly decompressed so it can be loaded by QEMU process.
8263 * When reverting to internal snapshot caller needs to pass @snapshot
8264 * to correctly start QEMU process, @fd, @path, @data needs to be NULL.
8266 * When restoring VM from saved image caller needs to pass @fd, @path and
8267 * @data to correctly start QEMU process, @snapshot needs to be NULL.
8269 * For audit purposes the expected @reason is one of `restored` or `from-snapshot`.
8271 * Returns 0 on success, -1 on error.
8274 qemuProcessStartWithMemoryState(virConnectPtr conn,
8275 virQEMUDriver *driver,
8276 virDomainObj *vm,
8277 int *fd,
8278 const char *path,
8279 virDomainMomentObj *snapshot,
8280 virQEMUSaveData *data,
8281 virDomainAsyncJob asyncJob,
8282 unsigned int start_flags,
8283 const char *reason,
8284 bool *started)
8286 qemuDomainObjPrivate *priv = vm->privateData;
8287 g_autoptr(qemuDomainSaveCookie) cookie = NULL;
8288 VIR_AUTOCLOSE intermediatefd = -1;
8289 g_autoptr(virCommand) cmd = NULL;
8290 g_autofree char *errbuf = NULL;
8291 const char *migrateFrom = NULL;
8292 int rc = 0;
8294 if (data) {
8295 if (virSaveCookieParseString(data->cookie, (virObject **)&cookie,
8296 virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0)
8297 return -1;
8299 if (qemuSaveImageDecompressionStart(data, fd, &intermediatefd,
8300 &errbuf, &cmd) < 0) {
8301 return -1;
8304 migrateFrom = "stdio";
8307 /* No cookie means libvirt which saved the domain was too old to mess up
8308 * the CPU definitions.
8310 if (cookie)
8311 qemuDomainFixupCPUs(vm, &cookie->cpu);
8313 if (cookie && !cookie->slirpHelper)
8314 priv->disableSlirp = true;
8316 if (qemuProcessStart(conn, driver, vm, cookie ? cookie->cpu : NULL,
8317 asyncJob, migrateFrom, *fd, path, snapshot,
8318 VIR_NETDEV_VPORT_PROFILE_OP_RESTORE,
8319 start_flags) == 0)
8320 *started = true;
8322 if (data) {
8323 rc = qemuSaveImageDecompressionStop(cmd, fd, &intermediatefd, errbuf,
8324 *started, path);
8327 virDomainAuditStart(vm, reason, *started);
8328 if (!*started || rc < 0)
8329 return -1;
8331 /* qemuProcessStart doesn't unset the qemu error reporting infrastructure
8332 * in case of migration (which is used in this case) so we need to reset it
8333 * so that the handle to virtlogd is not held open unnecessarily */
8334 qemuMonitorSetDomainLog(qemuDomainGetMonitor(vm), NULL, NULL, NULL);
8336 return 0;
8341 qemuProcessCreatePretendCmdPrepare(virQEMUDriver *driver,
8342 virDomainObj *vm,
8343 const char *migrateURI,
8344 unsigned int flags)
8346 virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
8347 VIR_QEMU_PROCESS_START_PAUSED |
8348 VIR_QEMU_PROCESS_START_AUTODESTROY, -1);
8350 flags |= VIR_QEMU_PROCESS_START_PRETEND;
8352 if (!migrateURI)
8353 flags |= VIR_QEMU_PROCESS_START_NEW;
8355 if (qemuProcessInit(driver, vm, NULL, VIR_ASYNC_JOB_NONE,
8356 !!migrateURI, flags) < 0)
8357 return -1;
8359 if (qemuProcessPrepareDomain(driver, vm, flags) < 0)
8360 return -1;
8362 return 0;
8366 virCommand *
8367 qemuProcessCreatePretendCmdBuild(virDomainObj *vm,
8368 const char *migrateURI)
8370 return qemuBuildCommandLine(vm,
8371 migrateURI,
8372 NULL,
8373 VIR_NETDEV_VPORT_PROFILE_OP_NO_OP,
8374 NULL,
8375 NULL);
8380 qemuProcessKill(virDomainObj *vm, unsigned int flags)
8382 VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x",
8383 vm, vm->def->name,
8384 (long long)vm->pid, flags);
8386 if (!(flags & VIR_QEMU_PROCESS_KILL_NOCHECK)) {
8387 if (!virDomainObjIsActive(vm)) {
8388 VIR_DEBUG("VM '%s' not active", vm->def->name);
8389 return 0;
8393 if (flags & VIR_QEMU_PROCESS_KILL_NOWAIT) {
8394 virProcessKill(vm->pid,
8395 (flags & VIR_QEMU_PROCESS_KILL_FORCE) ?
8396 SIGKILL : SIGTERM);
8397 return 0;
8400 /* Request an extra delay of two seconds per current nhostdevs
8401 * to be safe against stalls by the kernel freeing up the resources */
8402 return virProcessKillPainfullyDelay(vm->pid,
8403 !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
8404 vm->def->nhostdevs * 2,
8405 false);
8410 * qemuProcessBeginStopJob:
8412 * Stop all current jobs by killing the domain and start a new one for
8413 * qemuProcessStop.
8416 qemuProcessBeginStopJob(virDomainObj *vm,
8417 virDomainJob job,
8418 bool forceKill)
8420 qemuDomainObjPrivate *priv = vm->privateData;
8421 unsigned int killFlags = forceKill ? VIR_QEMU_PROCESS_KILL_FORCE : 0;
8423 /* We need to prevent monitor EOF callback from doing our work (and
8424 * sending misleading events) while the vm is unlocked inside
8425 * BeginJob/ProcessKill API or any other code path before 'vm->def->id' is
8426 * cleared inside qemuProcessStop */
8427 priv->beingDestroyed = true;
8429 if (qemuProcessKill(vm, killFlags) < 0)
8430 goto error;
8432 /* Wake up anything waiting on domain condition */
8433 VIR_DEBUG("waking up all jobs waiting on the domain condition");
8434 virDomainObjBroadcast(vm);
8436 if (virDomainObjBeginJob(vm, job) < 0)
8437 goto error;
8439 /* priv->beingDestroyed is deliberately left set to 'true' here. Caller
8440 * is supposed to call qemuProcessStop, which will reset it after
8441 * 'vm->def->id' is set to -1 */
8442 return 0;
8444 error:
8445 priv->beingDestroyed = false;
8446 return -1;
8450 void qemuProcessStop(virQEMUDriver *driver,
8451 virDomainObj *vm,
8452 virDomainShutoffReason reason,
8453 virDomainAsyncJob asyncJob,
8454 unsigned int flags)
8456 int ret;
8457 int retries = 0;
8458 qemuDomainObjPrivate *priv = vm->privateData;
8459 virErrorPtr orig_err;
8460 virDomainDef *def = vm->def;
8461 size_t i;
8462 g_autofree char *timestamp = NULL;
8463 g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
8464 bool outgoingMigration;
8466 VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%lld, "
8467 "reason=%s, asyncJob=%s, flags=0x%x",
8468 vm, vm->def->name, vm->def->id,
8469 (long long)vm->pid,
8470 virDomainShutoffReasonTypeToString(reason),
8471 virDomainAsyncJobTypeToString(asyncJob),
8472 flags);
8474 /* This method is routinely used in clean up paths. Disable error
8475 * reporting so we don't squash a legit error. */
8476 virErrorPreserveLast(&orig_err);
8478 if (asyncJob != VIR_ASYNC_JOB_NONE) {
8479 if (virDomainObjBeginNestedJob(vm, asyncJob) < 0)
8480 goto cleanup;
8481 } else if (vm->job->asyncJob != VIR_ASYNC_JOB_NONE &&
8482 vm->job->asyncOwner == virThreadSelfID() &&
8483 vm->job->active != VIR_JOB_ASYNC_NESTED) {
8484 VIR_WARN("qemuProcessStop called without a nested job (async=%s)",
8485 virDomainAsyncJobTypeToString(asyncJob));
8488 if (!virDomainObjIsActive(vm)) {
8489 VIR_DEBUG("VM '%s' not active", vm->def->name);
8490 goto endjob;
8493 /* BEWARE: At this point 'vm->def->id' is not cleared yet. Any code that
8494 * requires the id (e.g. to call virDomainDefGetShortName()) must be placed
8495 * between here (after the VM is killed) and the statement clearing the id.
8496 * The code *MUST NOT* unlock vm, otherwise other code might be confused
8497 * about the state of the VM. */
8499 if ((timestamp = virTimeStringNow()) != NULL) {
8500 qemuDomainLogAppendMessage(driver, vm, "%s: shutting down, reason=%s\n",
8501 timestamp,
8502 virDomainShutoffReasonTypeToString(reason));
8505 /* shut it off for sure */
8506 ignore_value(qemuProcessKill(vm,
8507 VIR_QEMU_PROCESS_KILL_FORCE|
8508 VIR_QEMU_PROCESS_KILL_NOCHECK));
8510 if (priv->agent) {
8511 g_clear_pointer(&priv->agent, qemuAgentClose);
8513 priv->agentError = false;
8515 if (priv->mon) {
8516 g_clear_pointer(&priv->mon, qemuMonitorClose);
8519 qemuProcessBuildDestroyMemoryPaths(driver, vm, NULL, false);
8521 /* Do this before we delete the tree and remove pidfile. */
8522 qemuProcessKillManagedPRDaemon(vm);
8524 qemuDomainCleanupRun(driver, vm);
8526 outgoingMigration = (flags & VIR_QEMU_PROCESS_STOP_MIGRATED) &&
8527 (asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT);
8529 qemuExtDevicesStop(driver, vm, outgoingMigration);
8531 qemuDBusStop(driver, vm);
8533 /* Only after this point we can reset 'priv->beingDestroyed' so that
8534 * there's no point at which the VM could be considered as alive between
8535 * entering the destroy job and this point where the active "flag" is
8536 * cleared.
8538 vm->def->id = -1;
8539 priv->beingDestroyed = false;
8541 /* Wake up anything waiting on domain condition */
8542 virDomainObjBroadcast(vm);
8544 /* IMPORTANT: qemuDomainObjStopWorker() unlocks @vm in order to prevent
8545 * deadlocks with the per-VM event loop thread. This MUST be done after
8546 * marking the VM as dead */
8547 qemuDomainObjStopWorker(vm);
8549 if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback)
8550 driver->inhibitCallback(false, driver->inhibitOpaque);
8552 /* Clear network bandwidth */
8553 virDomainClearNetBandwidth(vm->def);
8555 virDomainConfVMNWFilterTeardown(vm);
8557 if (cfg->macFilter) {
8558 for (i = 0; i < def->nnets; i++) {
8559 virDomainNetDef *net = def->nets[i];
8560 if (net->ifname == NULL)
8561 continue;
8562 ignore_value(ebtablesRemoveForwardAllowIn(driver->ebtables,
8563 net->ifname,
8564 &net->mac));
8568 virPortAllocatorRelease(priv->nbdPort);
8569 priv->nbdPort = 0;
8571 if (priv->monConfig) {
8572 if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
8573 unlink(priv->monConfig->data.nix.path);
8574 g_clear_pointer(&priv->monConfig, virObjectUnref);
8577 /* Remove the master key */
8578 qemuDomainMasterKeyRemove(priv);
8580 ignore_value(virDomainChrDefForeach(vm->def,
8581 false,
8582 qemuProcessCleanupChardevDevice,
8583 NULL));
8586 /* Its namespace is also gone then. */
8587 qemuDomainDestroyNamespace(driver, vm);
8589 virFileDeleteTree(priv->libDir);
8590 virFileDeleteTree(priv->channelTargetDir);
8592 /* Stop autodestroy in case guest is restarted */
8593 virCloseCallbacksDomainRemove(vm, NULL, qemuProcessAutoDestroy);
8595 /* now that we know it's stopped call the hook if present */
8596 if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8597 g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
8599 /* we can't stop the operation even if the script raised an error */
8600 ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
8601 VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
8602 NULL, xml, NULL));
8605 /* Reset Security Labels unless caller don't want us to */
8606 if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL))
8607 qemuSecurityRestoreAllLabel(driver, vm,
8608 !!(flags & VIR_QEMU_PROCESS_STOP_MIGRATED));
8610 /* Clear out dynamically assigned labels */
8611 for (i = 0; i < vm->def->nseclabels; i++) {
8612 if (vm->def->seclabels[i]->type == VIR_DOMAIN_SECLABEL_DYNAMIC)
8613 VIR_FREE(vm->def->seclabels[i]->label);
8614 VIR_FREE(vm->def->seclabels[i]->imagelabel);
8617 qemuHostdevReAttachDomainDevices(driver, vm->def);
8618 for (i = 0; i < def->nnets; i++) {
8619 virDomainNetDef *net = def->nets[i];
8620 virDomainInterfaceDeleteDevice(def,
8621 net,
8622 QEMU_DOMAIN_NETWORK_PRIVATE(net)->created,
8623 cfg->stateDir);
8626 retry:
8627 if ((ret = virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) {
8628 if (ret == -EBUSY && (retries++ < 5)) {
8629 g_usleep(200*1000);
8630 goto retry;
8632 VIR_WARN("Failed to remove cgroup for %s",
8633 vm->def->name);
8636 /* Remove resctrl allocation after cgroups are cleaned up which makes it
8637 * kind of safer (although removing the allocation should work even with
8638 * pids in tasks file */
8639 for (i = 0; i < vm->def->nresctrls; i++) {
8640 size_t j = 0;
8642 for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
8643 virDomainResctrlMonDef *mon = NULL;
8645 mon = vm->def->resctrls[i]->monitors[j];
8646 virResctrlMonitorRemove(mon->instance);
8649 virResctrlAllocRemove(vm->def->resctrls[i]->alloc);
8652 qemuProcessRemoveDomainStatus(driver, vm);
8654 /* Remove VNC and Spice ports from port reservation bitmap, but only if
8655 they were reserved by the driver (autoport=yes)
8657 for (i = 0; i < vm->def->ngraphics; ++i) {
8658 virDomainGraphicsDef *graphics = vm->def->graphics[i];
8659 if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
8660 if (graphics->data.vnc.portReserved) {
8661 virPortAllocatorRelease(graphics->data.vnc.port);
8662 graphics->data.vnc.portReserved = false;
8664 if (graphics->data.vnc.websocketReserved) {
8665 virPortAllocatorRelease(graphics->data.vnc.websocket);
8666 graphics->data.vnc.websocketReserved = false;
8668 if (graphics->data.vnc.websocketGenerated) {
8669 graphics->data.vnc.websocketGenerated = false;
8670 graphics->data.vnc.websocket = -1;
8673 if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
8674 if (graphics->data.spice.portReserved) {
8675 virPortAllocatorRelease(graphics->data.spice.port);
8676 graphics->data.spice.portReserved = false;
8679 if (graphics->data.spice.tlsPortReserved) {
8680 virPortAllocatorRelease(graphics->data.spice.tlsPort);
8681 graphics->data.spice.tlsPortReserved = false;
8686 for (i = 0; i < vm->ndeprecations; i++)
8687 g_free(vm->deprecations[i]);
8688 g_clear_pointer(&vm->deprecations, g_free);
8689 vm->ndeprecations = 0;
8690 vm->taint = 0;
8691 vm->pid = 0;
8692 virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
8693 for (i = 0; i < vm->def->niothreadids; i++)
8694 vm->def->iothreadids[i]->thread_id = 0;
8696 /* clean up a possible backup job */
8697 if (priv->backup)
8698 qemuBackupJobTerminate(vm, VIR_DOMAIN_JOB_STATUS_CANCELED);
8700 /* Do this explicitly after vm->pid is reset so that security drivers don't
8701 * try to enter the domain's namespace which is non-existent by now as qemu
8702 * is no longer running. */
8703 if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL)) {
8704 for (i = 0; i < def->ndisks; i++) {
8705 virDomainDiskDef *disk = def->disks[i];
8707 if (disk->mirror) {
8708 if (qemuSecurityRestoreImageLabel(driver, vm, disk->mirror, false) < 0)
8709 VIR_WARN("Unable to restore security label on %s", disk->dst);
8711 if (virStorageSourceChainHasNVMe(disk->mirror))
8712 qemuHostdevReAttachOneNVMeDisk(driver, vm->def->name, disk->mirror);
8715 qemuBlockRemoveImageMetadata(driver, vm, disk->dst, disk->src);
8717 /* for now transient disks are forbidden with migration so they
8718 * can be handled here */
8719 if (disk->transient &&
8720 QEMU_DOMAIN_DISK_PRIVATE(disk)->transientOverlayCreated) {
8721 VIR_DEBUG("Removing transient overlay '%s' of disk '%s'",
8722 disk->src->path, disk->dst);
8723 if (qemuDomainStorageFileInit(driver, vm, disk->src, NULL) >= 0) {
8724 virStorageSourceUnlink(disk->src);
8725 virStorageSourceDeinit(disk->src);
8731 qemuSecurityReleaseLabel(driver->securityManager, vm->def);
8733 /* clear all private data entries which are no longer needed */
8734 qemuDomainObjPrivateDataClear(priv);
8736 /* The "release" hook cleans up additional resources */
8737 if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
8738 g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
8740 /* we can't stop the operation even if the script raised an error */
8741 virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
8742 VIR_HOOK_QEMU_OP_RELEASE, VIR_HOOK_SUBOP_END,
8743 virDomainShutoffReasonTypeToString(reason), xml, NULL);
8746 virDomainObjRemoveTransientDef(vm);
8748 endjob:
8749 if (asyncJob != VIR_ASYNC_JOB_NONE)
8750 virDomainObjEndJob(vm);
8752 cleanup:
8753 virErrorRestore(&orig_err);
8757 void
8758 qemuProcessAutoDestroy(virDomainObj *dom,
8759 virConnectPtr conn)
8761 qemuDomainObjPrivate *priv = dom->privateData;
8762 virQEMUDriver *driver = priv->driver;
8763 virObjectEvent *event = NULL;
8764 unsigned int stopFlags = 0;
8766 VIR_DEBUG("vm=%s, conn=%p", dom->def->name, conn);
8768 if (dom->job->asyncJob == VIR_ASYNC_JOB_MIGRATION_IN)
8769 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
8771 if (dom->job->asyncJob) {
8772 VIR_DEBUG("vm=%s has long-term job active, cancelling",
8773 dom->def->name);
8774 qemuDomainObjDiscardAsyncJob(dom);
8777 VIR_DEBUG("Killing domain");
8779 if (qemuProcessBeginStopJob(dom, VIR_JOB_DESTROY, true) < 0)
8780 return;
8782 qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED,
8783 VIR_ASYNC_JOB_NONE, stopFlags);
8785 virDomainAuditStop(dom, "destroyed");
8786 event = virDomainEventLifecycleNewFromObj(dom,
8787 VIR_DOMAIN_EVENT_STOPPED,
8788 VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
8790 qemuDomainRemoveInactive(driver, dom, 0, false);
8792 virDomainObjEndJob(dom);
8794 virObjectEventStateQueue(driver->domainEventState, event);
8798 void
8799 qemuProcessRefreshDiskProps(virDomainDiskDef *disk,
8800 struct qemuDomainDiskInfo *info)
8802 qemuDomainDiskPrivate *diskpriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
8804 if (info->removable) {
8805 if (info->empty)
8806 virDomainDiskEmptySource(disk);
8808 if (info->tray) {
8809 if (info->tray_open)
8810 disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
8811 else
8812 disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;
8816 diskpriv->removable = info->removable;
8817 diskpriv->tray = info->tray;
8822 qemuProcessRefreshDisks(virDomainObj *vm,
8823 virDomainAsyncJob asyncJob)
8825 qemuDomainObjPrivate *priv = vm->privateData;
8826 virQEMUDriver *driver = priv->driver;
8827 g_autoptr(GHashTable) table = NULL;
8828 size_t i;
8830 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) == 0) {
8831 table = qemuMonitorGetBlockInfo(priv->mon);
8832 qemuDomainObjExitMonitor(vm);
8835 if (!table)
8836 return -1;
8838 for (i = 0; i < vm->def->ndisks; i++) {
8839 virDomainDiskDef *disk = vm->def->disks[i];
8840 qemuDomainDiskPrivate *diskpriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
8841 struct qemuDomainDiskInfo *info;
8842 const char *entryname = disk->info.alias;
8843 virDomainDiskTray old_tray_status = disk->tray_status;
8845 if (diskpriv->qomName)
8846 entryname = diskpriv->qomName;
8848 if (!(info = virHashLookup(table, entryname)))
8849 continue;
8851 qemuProcessRefreshDiskProps(disk, info);
8853 if (diskpriv->tray &&
8854 old_tray_status != disk->tray_status) {
8855 virDomainEventTrayChangeReason reason = VIR_DOMAIN_EVENT_TRAY_CHANGE_OPEN;
8856 virObjectEvent *event;
8858 if (disk->tray_status == VIR_DOMAIN_DISK_TRAY_CLOSED)
8859 reason = VIR_DOMAIN_EVENT_TRAY_CHANGE_CLOSE;
8861 event = virDomainEventTrayChangeNewFromObj(vm, disk->info.alias, reason);
8862 virObjectEventStateQueue(driver->domainEventState, event);
8866 return 0;
8870 static int
8871 qemuProcessRefreshCPUMigratability(virDomainObj *vm,
8872 virDomainAsyncJob asyncJob)
8874 qemuDomainObjPrivate *priv = vm->privateData;
8875 virDomainDef *def = vm->def;
8876 const char *cpuQOMPath = qemuProcessGetVCPUQOMPath(vm);
8877 bool migratable;
8878 int rc;
8880 if (def->cpu->mode != VIR_CPU_MODE_HOST_PASSTHROUGH &&
8881 def->cpu->mode != VIR_CPU_MODE_MAXIMUM)
8882 return 0;
8884 /* If the cpu.migratable capability is present, the migratable attribute
8885 * is set correctly. */
8886 if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_CPU_MIGRATABLE))
8887 return 0;
8889 if (!ARCH_IS_X86(def->os.arch))
8890 return 0;
8892 if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
8893 return -1;
8895 rc = qemuMonitorGetCPUMigratable(priv->mon, cpuQOMPath, &migratable);
8897 qemuDomainObjExitMonitor(vm);
8898 if (rc < 0)
8899 return -1;
8901 if (rc == 1)
8902 migratable = false;
8904 /* Libvirt 6.5.0 would set migratable='off' for running domains even though
8905 * the actual default used by QEMU was 'on'. */
8906 if (def->cpu->migratable == VIR_TRISTATE_SWITCH_OFF && migratable) {
8907 VIR_DEBUG("Fixing CPU migratable attribute");
8908 def->cpu->migratable = VIR_TRISTATE_SWITCH_ON;
8911 if (def->cpu->migratable == VIR_TRISTATE_SWITCH_ABSENT)
8912 def->cpu->migratable = virTristateSwitchFromBool(migratable);
8914 return 0;
8918 static int
8919 qemuProcessRefreshCPU(virQEMUDriver *driver,
8920 virDomainObj *vm)
8922 qemuDomainObjPrivate *priv = vm->privateData;
8923 g_autoptr(virCPUDef) host = NULL;
8924 g_autoptr(virCPUDef) hostmig = NULL;
8925 g_autoptr(virCPUDef) cpu = NULL;
8926 virCPUFeaturePolicy removedPolicy;
8928 /* When reconnecting to a running domain, we know all features marked as
8929 * removed from a CPU model were already explicitly mentioned in the
8930 * definition. If any removed features are missing, they must have been
8931 * removed after the domain was started and thus they have to be enabled
8932 * (otherwise they would be explicitly listed as disabled).
8934 removedPolicy = VIR_CPU_FEATURE_REQUIRE;
8936 if (!vm->def->cpu)
8937 return 0;
8939 if (vm->def->cpu->mode == VIR_CPU_MODE_CUSTOM &&
8940 vm->def->cpu->model &&
8941 virCPUUpdate(vm->def->os.arch, vm->def->cpu, NULL, removedPolicy) < 0)
8942 return -1;
8944 if (!virQEMUCapsGuestIsNative(driver->hostarch, vm->def->os.arch))
8945 return 0;
8947 if (qemuProcessRefreshCPUMigratability(vm, VIR_ASYNC_JOB_NONE) < 0)
8948 return -1;
8950 if (!(host = virQEMUDriverGetHostCPU(driver))) {
8951 virResetLastError();
8952 return 0;
8955 /* If the domain with a host-model CPU was started by an old libvirt
8956 * (< 2.3) which didn't replace the CPU with a custom one, let's do it now
8957 * since the rest of our code does not really expect a host-model CPU in a
8958 * running domain.
8960 if (vm->def->cpu->mode == VIR_CPU_MODE_HOST_MODEL) {
8962 * PSeries domains are able to run with host-model CPU by design,
8963 * even on Libvirt newer than 2.3, never replacing host-model with
8964 * custom in the virCPUUpdate() call. It is not needed to call
8965 * virCPUUpdate() and qemuProcessUpdateCPU() in this case.
8967 if (qemuDomainIsPSeries(vm->def))
8968 return 0;
8970 if (!(hostmig = virCPUCopyMigratable(host->arch, host)))
8971 return -1;
8973 cpu = virCPUDefCopyWithoutModel(hostmig);
8975 virCPUDefCopyModelFilter(cpu, hostmig, false, virQEMUCapsCPUFilterFeatures,
8976 &host->arch);
8978 if (virCPUUpdate(vm->def->os.arch, vm->def->cpu, cpu, removedPolicy) < 0)
8979 return -1;
8981 if (qemuProcessUpdateCPU(vm, VIR_ASYNC_JOB_NONE) < 0)
8982 return -1;
8983 } else if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CPU_MODEL_EXPANSION)) {
8984 /* We only try to fix CPUs when the libvirt/QEMU combo used to start
8985 * the domain did not know about query-cpu-model-expansion in which
8986 * case the host-model is known to not contain features which QEMU
8987 * doesn't know about.
8989 qemuDomainFixupCPUs(vm, &priv->origCPU);
8992 return 0;
8997 * qemuProcessReloadMachineTypes:
8999 * Reload machine type information into the 'qemuCaps' object from the current
9000 * qemu.
9002 static int
9003 qemuProcessReloadMachineTypes(virDomainObj *vm)
9005 qemuDomainObjPrivate *priv = vm->privateData;
9006 bool fail = false;
9008 qemuDomainObjEnterMonitor(vm);
9010 if (virQEMUCapsInitQMPArch(priv->qemuCaps, priv->mon) < 0)
9011 fail = true;
9013 if (!fail &&
9014 virQEMUCapsProbeQMPMachineTypes(priv->qemuCaps,
9015 vm->def->virtType,
9016 priv->mon) < 0)
9017 fail = true;
9019 qemuDomainObjExitMonitor(vm);
9021 if (fail)
9022 return -1;
9024 return 0;
9028 struct qemuProcessReconnectData {
9029 virQEMUDriver *driver;
9030 virDomainObj *obj;
9031 virIdentity *identity;
9034 * Open an existing VM's monitor, re-detect VCPU threads
9035 * and re-reserve the security labels in use
9037 * This function also inherits a locked and ref'd domain object.
9039 * This function needs to:
9040 * 1. Enter job
9041 * 1. just before monitor reconnect do lightweight MonitorEnter
9042 * (increase VM refcount and unlock VM)
9043 * 2. reconnect to monitor
9044 * 3. do lightweight MonitorExit (lock VM)
9045 * 4. continue reconnect process
9046 * 5. EndJob
9048 * We can't do normal MonitorEnter & MonitorExit because these two lock the
9049 * monitor lock, which does not exists in this early phase.
9051 static void
9052 qemuProcessReconnect(void *opaque)
9054 struct qemuProcessReconnectData *data = opaque;
9055 virQEMUDriver *driver = data->driver;
9056 virDomainObj *obj = data->obj;
9057 qemuDomainObjPrivate *priv;
9058 g_auto(virDomainJobObj) oldjob = {
9059 .cb = NULL,
9061 int state;
9062 int reason;
9063 g_autoptr(virQEMUDriverConfig) cfg = NULL;
9064 size_t i;
9065 unsigned int stopFlags = 0;
9066 bool jobStarted = false;
9067 bool tryMonReconn = false;
9069 virIdentitySetCurrent(data->identity);
9070 g_clear_object(&data->identity);
9071 VIR_FREE(data);
9073 cfg = virQEMUDriverGetConfig(driver);
9074 priv = obj->privateData;
9076 virDomainObjPreserveJob(obj->job, &oldjob);
9077 if (oldjob.asyncJob == VIR_ASYNC_JOB_MIGRATION_IN)
9078 stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
9079 if (oldjob.asyncJob == VIR_ASYNC_JOB_BACKUP && priv->backup)
9080 priv->backup->apiFlags = oldjob.apiFlags;
9082 if (virDomainObjBeginJob(obj, VIR_JOB_MODIFY) < 0)
9083 goto error;
9084 jobStarted = true;
9086 /* XXX If we ever gonna change pid file pattern, come up with
9087 * some intelligence here to deal with old paths. */
9088 if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, obj->def->name)))
9089 goto error;
9091 /* Restore the masterKey */
9092 if (qemuDomainMasterKeyReadFile(priv) < 0)
9093 goto error;
9095 if (qemuExtDevicesInitPaths(driver, obj->def) < 0)
9096 goto error;
9098 /* If we are connecting to a guest started by old libvirt there is no
9099 * allowReboot in status XML and we need to initialize it. */
9100 qemuProcessPrepareAllowReboot(obj);
9102 if (qemuHostdevUpdateActiveDomainDevices(driver, obj->def) < 0)
9103 goto error;
9105 if (qemuDomainObjStartWorker(obj) < 0)
9106 goto error;
9108 VIR_DEBUG("Reconnect monitor to def=%p name='%s'", obj, obj->def->name);
9110 tryMonReconn = true;
9112 /* XXX check PID liveliness & EXE path */
9113 if (qemuConnectMonitor(driver, obj, VIR_ASYNC_JOB_NONE, NULL, true) < 0)
9114 goto error;
9116 priv->machineName = qemuDomainGetMachineName(obj);
9117 if (!priv->machineName)
9118 goto error;
9120 if (virDomainCgroupConnectCgroup("qemu",
9121 obj,
9122 &priv->cgroup,
9123 cfg->cgroupControllers,
9124 priv->driver->privileged,
9125 priv->machineName) < 0)
9126 goto error;
9128 if (qemuDomainPerfRestart(obj) < 0)
9129 goto error;
9131 for (i = 0; i < obj->def->ndisks; i++) {
9132 virDomainDiskDef *disk = obj->def->disks[i];
9134 if (virDomainDiskTranslateSourcePool(disk) < 0)
9135 goto error;
9138 for (i = 0; i < obj->def->ngraphics; i++) {
9139 if (qemuProcessGraphicsReservePorts(obj->def->graphics[i], true) < 0)
9140 goto error;
9143 if (qemuProcessUpdateState(obj) < 0)
9144 goto error;
9146 state = virDomainObjGetState(obj, &reason);
9147 if (state == VIR_DOMAIN_SHUTOFF ||
9148 (state == VIR_DOMAIN_PAUSED &&
9149 reason == VIR_DOMAIN_PAUSED_STARTING_UP)) {
9150 VIR_DEBUG("Domain '%s' wasn't fully started yet, killing it",
9151 obj->def->name);
9152 goto error;
9155 if (!priv->qemuCaps) {
9156 virReportError(VIR_ERR_INTERNAL_ERROR,
9157 _("domain '%1$s' has no capabilities recorded"),
9158 obj->def->name);
9159 goto error;
9162 /* Reload and populate machine type data into 'qemuCaps' as that is not
9163 * serialized into the status XML. */
9164 if (qemuProcessReloadMachineTypes(obj) < 0)
9165 goto error;
9167 if (qemuDomainAssignAddresses(obj->def, priv->qemuCaps,
9168 driver, obj, false) < 0) {
9169 goto error;
9172 /* In case the domain shutdown or fake reboot while we were not running,
9173 * we need to finish the shutdown or fake reboot process. And we need to
9174 * do it after we have virQEMUCaps filled in.
9176 if (state == VIR_DOMAIN_SHUTDOWN ||
9177 (state == VIR_DOMAIN_PAUSED &&
9178 reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) ||
9179 (priv->fakeReboot && state == VIR_DOMAIN_PAUSED &&
9180 reason == VIR_DOMAIN_PAUSED_USER)) {
9181 VIR_DEBUG("Finishing shutdown sequence for domain %s",
9182 obj->def->name);
9183 qemuProcessShutdownOrReboot(obj);
9184 goto cleanup;
9187 /* if domain requests security driver we haven't loaded, report error, but
9188 * do not kill the domain
9190 ignore_value(qemuSecurityCheckAllLabel(driver->securityManager,
9191 obj->def));
9193 if (qemuDomainRefreshVcpuInfo(obj, VIR_ASYNC_JOB_NONE, true) < 0)
9194 goto error;
9196 qemuDomainVcpuPersistOrder(obj->def);
9198 /* Make sure the original CPU is always preserved in priv->origCPU. */
9199 if (!priv->origCPU)
9200 qemuDomainUpdateCPU(obj, NULL, &priv->origCPU);
9202 if (qemuProcessRefreshCPU(driver, obj) < 0)
9203 goto error;
9205 if (qemuDomainUpdateMemoryDeviceInfo(obj, VIR_ASYNC_JOB_NONE) < 0)
9206 goto error;
9208 if (qemuProcessDetectIOThreadPIDs(obj, VIR_ASYNC_JOB_NONE) < 0)
9209 goto error;
9211 if (qemuSecurityReserveLabel(driver->securityManager, obj->def, obj->pid) < 0)
9212 goto error;
9214 if (qemuProcessRefreshRxFilters(obj, VIR_ASYNC_JOB_NONE) < 0)
9215 goto error;
9217 qemuProcessNotifyNets(obj->def);
9219 qemuProcessFiltersInstantiate(obj->def);
9221 if (qemuProcessRefreshDisks(obj, VIR_ASYNC_JOB_NONE) < 0)
9222 goto error;
9224 /* At this point we've already checked that the startup of the VM was
9225 * completed successfully before, thus that also implies that all transient
9226 * disk overlays were created. */
9227 for (i = 0; i < obj->def->ndisks; i++) {
9228 virDomainDiskDef *disk = obj->def->disks[i];
9230 if (disk->transient)
9231 QEMU_DOMAIN_DISK_PRIVATE(disk)->transientOverlayCreated = true;
9234 if (qemuRefreshVirtioChannelState(driver, obj, VIR_ASYNC_JOB_NONE) < 0)
9235 goto error;
9237 /* If querying of guest's RTC failed, report error, but do not kill the domain. */
9238 qemuRefreshRTC(obj);
9240 if (qemuProcessRefreshBalloonState(obj, VIR_ASYNC_JOB_NONE) < 0)
9241 goto error;
9243 if (qemuProcessRecoverJob(driver, obj, &oldjob, &stopFlags) < 0)
9244 goto error;
9246 if (qemuBlockJobRefreshJobs(obj) < 0)
9247 goto error;
9249 if (qemuProcessUpdateDevices(driver, obj) < 0)
9250 goto error;
9252 if (qemuRefreshPRManagerState(obj) < 0)
9253 goto error;
9255 if (qemuProcessRefreshFdsetIndex(obj) < 0)
9256 goto error;
9258 if (qemuConnectAgent(driver, obj) < 0)
9259 goto error;
9261 for (i = 0; i < obj->def->nresctrls; i++) {
9262 size_t j = 0;
9264 if (virResctrlAllocDeterminePath(obj->def->resctrls[i]->alloc,
9265 priv->machineName) < 0)
9266 goto error;
9268 for (j = 0; j < obj->def->resctrls[i]->nmonitors; j++) {
9269 virDomainResctrlMonDef *mon = NULL;
9271 mon = obj->def->resctrls[i]->monitors[j];
9272 if (virResctrlMonitorDeterminePath(mon->instance,
9273 priv->machineName) < 0)
9274 goto error;
9278 for (i = 0; i < obj->def->ndisks; i++)
9279 if (qemuNbdkitStorageSourceManageProcess(obj->def->disks[i]->src, obj) < 0)
9280 goto error;
9282 if (obj->def->os.loader && obj->def->os.loader->nvram)
9283 if (qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram, obj) < 0)
9284 goto error;
9286 /* update domain state XML with possibly updated state in virDomainObj */
9287 if (virDomainObjSave(obj, driver->xmlopt, cfg->stateDir) < 0)
9288 goto error;
9290 /* Run an hook to allow admins to do some magic */
9291 if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
9292 g_autofree char *xml = qemuDomainDefFormatXML(driver,
9293 priv->qemuCaps,
9294 obj->def, 0);
9295 int hookret;
9297 hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, obj->def->name,
9298 VIR_HOOK_QEMU_OP_RECONNECT, VIR_HOOK_SUBOP_BEGIN,
9299 NULL, xml, NULL);
9302 * If the script raised an error abort the launch
9304 if (hookret < 0)
9305 goto error;
9308 if (g_atomic_int_add(&driver->nactive, 1) == 0 && driver->inhibitCallback)
9309 driver->inhibitCallback(true, driver->inhibitOpaque);
9311 cleanup:
9312 if (jobStarted)
9313 virDomainObjEndJob(obj);
9314 if (!virDomainObjIsActive(obj))
9315 qemuDomainRemoveInactive(driver, obj, 0, false);
9316 virDomainObjEndAPI(&obj);
9317 virIdentitySetCurrent(NULL);
9318 return;
9320 error:
9321 if (virDomainObjIsActive(obj)) {
9322 /* We can't get the monitor back, so must kill the VM
9323 * to remove danger of it ending up running twice if
9324 * user tries to start it again later.
9326 * If we cannot get to the monitor when the QEMU command
9327 * line used -no-shutdown, then we can safely say that the
9328 * domain crashed; otherwise, if the monitor was started,
9329 * then we can blame ourselves, else we failed before the
9330 * monitor started so we don't really know. */
9331 if (!priv->mon && tryMonReconn &&
9332 (priv->allowReboot == VIR_TRISTATE_BOOL_YES ||
9333 virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SET_ACTION)))
9334 state = VIR_DOMAIN_SHUTOFF_CRASHED;
9335 else if (priv->mon)
9336 state = VIR_DOMAIN_SHUTOFF_DAEMON;
9337 else
9338 state = VIR_DOMAIN_SHUTOFF_UNKNOWN;
9340 /* If BeginJob failed, we jumped here without a job, let's hope another
9341 * thread didn't have a chance to start playing with the domain yet
9342 * (it's all we can do anyway).
9344 qemuProcessStop(driver, obj, state, VIR_ASYNC_JOB_NONE, stopFlags);
9346 goto cleanup;
9349 static int
9350 qemuProcessReconnectHelper(virDomainObj *obj,
9351 void *opaque)
9353 virThread thread;
9354 struct qemuProcessReconnectData *src = opaque;
9355 struct qemuProcessReconnectData *data;
9356 g_autofree char *name = NULL;
9358 /* If the VM was inactive, we don't need to reconnect */
9359 if (obj->pid == 0)
9360 return 0;
9362 data = g_new0(struct qemuProcessReconnectData, 1);
9364 memcpy(data, src, sizeof(*data));
9365 data->obj = obj;
9366 data->identity = virIdentityGetCurrent();
9368 /* this lock and reference will be eventually transferred to the thread
9369 * that handles the reconnect */
9370 virObjectLock(obj);
9371 virObjectRef(obj);
9373 name = g_strdup_printf("init-%s", obj->def->name);
9375 if (virThreadCreateFull(&thread, false, qemuProcessReconnect,
9376 name, false, data) < 0) {
9377 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
9378 _("Could not create thread. QEMU initialization might be incomplete"));
9379 /* We can't spawn a thread and thus connect to monitor. Kill qemu.
9380 * It's safe to call qemuProcessStop without a job here since there
9381 * is no thread that could be doing anything else with the same domain
9382 * object.
9384 qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
9385 VIR_ASYNC_JOB_NONE, 0);
9386 qemuDomainRemoveInactiveLocked(src->driver, obj);
9388 virDomainObjEndAPI(&obj);
9389 g_clear_object(&data->identity);
9390 VIR_FREE(data);
9391 return -1;
9394 return 0;
9398 * qemuProcessReconnectAll
9400 * Try to re-open the resources for live VMs that we care
9401 * about.
9403 void
9404 qemuProcessReconnectAll(virQEMUDriver *driver)
9406 struct qemuProcessReconnectData data = {.driver = driver};
9407 virDomainObjListForEach(driver->domains, true,
9408 qemuProcessReconnectHelper, &data);
9412 static void virQEMUCapsMonitorNotify(qemuMonitor *mon G_GNUC_UNUSED,
9413 virDomainObj *vm G_GNUC_UNUSED)
9417 static qemuMonitorCallbacks callbacks = {
9418 .eofNotify = virQEMUCapsMonitorNotify,
9419 .errorNotify = virQEMUCapsMonitorNotify,
9423 static void
9424 qemuProcessQMPStop(qemuProcessQMP *proc)
9426 if (proc->mon) {
9427 virObjectUnlock(proc->mon);
9428 g_clear_pointer(&proc->mon, qemuMonitorClose);
9431 if (proc->cmd) {
9432 virCommandAbort(proc->cmd);
9433 g_clear_pointer(&proc->cmd, virCommandFree);
9436 if (proc->monpath)
9437 unlink(proc->monpath);
9439 virDomainObjEndAPI(&proc->vm);
9441 if (proc->pid != 0) {
9442 VIR_DEBUG("Killing QMP caps process %lld", (long long)proc->pid);
9443 virProcessKillPainfully(proc->pid, true);
9444 virResetLastError();
9445 proc->pid = 0;
9448 if (proc->pidfile)
9449 unlink(proc->pidfile);
9451 if (proc->uniqDir)
9452 rmdir(proc->uniqDir);
9457 * qemuProcessQMPFree:
9458 * @proc: Stores process and connection state
9460 * Kill QEMU process and free process data structure.
9462 void
9463 qemuProcessQMPFree(qemuProcessQMP *proc)
9465 if (!proc)
9466 return;
9468 qemuProcessQMPStop(proc);
9470 g_object_unref(proc->eventThread);
9472 g_free(proc->binary);
9473 g_free(proc->libDir);
9474 g_free(proc->uniqDir);
9475 g_free(proc->monpath);
9476 g_free(proc->monarg);
9477 g_free(proc->pidfile);
9478 g_free(proc->stdErr);
9479 g_free(proc);
9484 * qemuProcessQMPNew:
9485 * @binary: QEMU binary
9486 * @libDir: Directory for process and connection artifacts
9487 * @runUid: UserId for QEMU process
9488 * @runGid: GroupId for QEMU process
9489 * @forceTCG: Force TCG mode if true
9491 * Allocate and initialize domain structure encapsulating QEMU process state
9492 * and monitor connection for completing QMP queries.
9494 qemuProcessQMP *
9495 qemuProcessQMPNew(const char *binary,
9496 const char *libDir,
9497 uid_t runUid,
9498 gid_t runGid,
9499 bool forceTCG)
9501 g_autoptr(qemuProcessQMP) proc = NULL;
9502 const char *threadSuffix;
9503 g_autofree char *threadName = NULL;
9505 VIR_DEBUG("exec=%s, libDir=%s, runUid=%u, runGid=%u, forceTCG=%d",
9506 binary, libDir, runUid, runGid, forceTCG);
9508 proc = g_new0(qemuProcessQMP, 1);
9510 proc->binary = g_strdup(binary);
9511 proc->libDir = g_strdup(libDir);
9513 proc->runUid = runUid;
9514 proc->runGid = runGid;
9515 proc->forceTCG = forceTCG;
9517 threadSuffix = strrchr(binary, '-');
9518 if (threadSuffix)
9519 threadSuffix++;
9520 else
9521 threadSuffix = binary;
9522 threadName = g_strdup_printf("qmp-%s", threadSuffix);
9524 if (!(proc->eventThread = virEventThreadNew(threadName)))
9525 return NULL;
9527 return g_steal_pointer(&proc);
9531 static int
9532 qemuProcessQEMULabelUniqPath(qemuProcessQMP *proc)
9534 /* We cannot use the security driver here, but we should not need to. */
9535 if (chown(proc->uniqDir, proc->runUid, -1) < 0) {
9536 virReportSystemError(errno,
9537 _("Cannot chown uniq path: %1$s"),
9538 proc->uniqDir);
9539 return -1;
9542 return 0;
9546 static int
9547 qemuProcessQMPInit(qemuProcessQMP *proc)
9549 g_autofree char *template = NULL;
9551 VIR_DEBUG("proc=%p, emulator=%s", proc, proc->binary);
9553 template = g_strdup_printf("%s/qmp-XXXXXX", proc->libDir);
9555 if (!(proc->uniqDir = g_mkdtemp(template))) {
9556 virReportSystemError(errno,
9557 _("Failed to create unique directory with template '%1$s' for probing QEMU"),
9558 template);
9559 return -1;
9561 /* if g_mkdtemp succeeds, proc->uniqDir is now the owner of
9562 * the string. Set template to NULL to avoid freeing
9563 * the memory in this case */
9564 template = NULL;
9566 if (qemuProcessQEMULabelUniqPath(proc) < 0)
9567 return -1;
9569 proc->monpath = g_strdup_printf("%s/%s", proc->uniqDir, "qmp.monitor");
9571 proc->monarg = g_strdup_printf("unix:%s,server=on,wait=off", proc->monpath);
9574 * Normally we'd use runDir for pid files, but because we're using
9575 * -daemonize we need QEMU to be allowed to create them, rather
9576 * than libvirtd. So we're using libDir which QEMU can write to
9578 proc->pidfile = g_strdup_printf("%s/%s", proc->uniqDir, "qmp.pid");
9580 return 0;
9584 #if defined(__linux__)
9585 # define hwaccel "kvm:tcg"
9586 #elif defined(__APPLE__)
9587 # define hwaccel "hvf:tcg"
9588 #else
9589 # define hwaccel "tcg"
9590 #endif
9592 static int
9593 qemuProcessQMPLaunch(qemuProcessQMP *proc)
9595 const char *machine;
9596 int status = 0;
9597 int rc;
9599 if (proc->forceTCG)
9600 machine = "none,accel=tcg";
9601 else
9602 machine = "none,accel=" hwaccel;
9604 VIR_DEBUG("Try to probe capabilities of '%s' via QMP, machine %s",
9605 proc->binary, machine);
9608 * We explicitly need to use -daemonize here, rather than
9609 * virCommandDaemonize, because we need to synchronize
9610 * with QEMU creating its monitor socket API. Using
9611 * daemonize guarantees control won't return to libvirt
9612 * until the socket is present.
9614 proc->cmd = virCommandNewArgList(proc->binary,
9615 "-S",
9616 "-no-user-config",
9617 "-nodefaults",
9618 "-nographic",
9619 "-machine", machine,
9620 "-qmp", proc->monarg,
9621 "-pidfile", proc->pidfile,
9622 "-daemonize",
9623 NULL);
9624 virCommandAddEnvPassCommon(proc->cmd);
9625 virCommandClearCaps(proc->cmd);
9627 #if WITH_CAPNG
9628 /* QEMU might run into permission issues, e.g. /dev/sev (0600), override
9629 * them just for the purpose of probing */
9630 if (geteuid() == 0)
9631 virCommandAllowCap(proc->cmd, CAP_DAC_OVERRIDE);
9632 #endif
9634 virCommandSetGID(proc->cmd, proc->runGid);
9635 virCommandSetUID(proc->cmd, proc->runUid);
9637 virCommandSetErrorBuffer(proc->cmd, &(proc->stdErr));
9639 if (virCommandRun(proc->cmd, &status) < 0)
9640 return -1;
9642 if (status != 0) {
9643 VIR_DEBUG("QEMU %s exited with status %d", proc->binary, status);
9644 virReportError(VIR_ERR_INTERNAL_ERROR,
9645 _("Failed to start QEMU binary %1$s for probing: %2$s"),
9646 proc->binary,
9647 proc->stdErr ? proc->stdErr : _("unknown error"));
9648 return -1;
9651 if ((rc = virPidFileReadPath(proc->pidfile, &proc->pid)) < 0) {
9652 virReportSystemError(-rc, _("Failed to read pidfile %1$s"), proc->pidfile);
9653 return -1;
9656 return 0;
9661 qemuProcessQMPInitMonitor(qemuMonitor *mon)
9663 if (qemuMonitorSetCapabilities(mon) < 0) {
9664 VIR_DEBUG("Failed to set monitor capabilities %s",
9665 virGetLastErrorMessage());
9666 return -1;
9669 return 0;
9673 static int
9674 qemuProcessQMPConnectMonitor(qemuProcessQMP *proc)
9676 g_autoptr(virDomainXMLOption) xmlopt = NULL;
9677 virDomainChrSourceDef monConfig;
9679 VIR_DEBUG("proc=%p, emulator=%s, proc->pid=%lld",
9680 proc, proc->binary, (long long)proc->pid);
9682 monConfig.type = VIR_DOMAIN_CHR_TYPE_UNIX;
9683 monConfig.data.nix.path = proc->monpath;
9684 monConfig.data.nix.listen = false;
9686 if (!(xmlopt = virDomainXMLOptionNew(NULL, NULL, NULL, NULL, NULL, NULL)) ||
9687 !(proc->vm = virDomainObjNew(xmlopt)) ||
9688 !(proc->vm->def = virDomainDefNew(xmlopt)))
9689 return -1;
9691 proc->vm->pid = proc->pid;
9693 if (!(proc->mon = qemuMonitorOpen(proc->vm, &monConfig,
9694 virEventThreadGetContext(proc->eventThread),
9695 &callbacks)))
9696 return -1;
9698 virObjectLock(proc->mon);
9700 if (qemuProcessQMPInitMonitor(proc->mon) < 0)
9701 return -1;
9703 return 0;
9708 * qemuProcessQMPStart:
9709 * @proc: QEMU process and connection state created by qemuProcessQMPNew()
9711 * Start and connect to QEMU binary so QMP queries can be made.
9713 * Usage:
9714 * proc = qemuProcessQMPNew(binary, libDir, runUid, runGid, forceTCG);
9715 * qemuProcessQMPStart(proc);
9716 * ** Send QMP Queries to QEMU using monitor (proc->mon) **
9717 * qemuProcessQMPFree(proc);
9719 * Process error output (proc->stdErr) remains available in qemuProcessQMP
9720 * struct until qemuProcessQMPFree is called.
9723 qemuProcessQMPStart(qemuProcessQMP *proc)
9725 VIR_DEBUG("proc=%p, emulator=%s", proc, proc->binary);
9727 if (qemuProcessQMPInit(proc) < 0)
9728 return -1;
9730 if (qemuProcessQMPLaunch(proc) < 0)
9731 return -1;
9733 if (qemuProcessQMPConnectMonitor(proc) < 0)
9734 return -1;
9736 return 0;
9740 void
9741 qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
9742 virDomainObj *vm)
9744 virObjectLock(vm);
9745 VIR_DEBUG("nbdkit process %i died", nbdkit->pid);
9746 qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit);
9747 virObjectUnlock(vm);