4 * Copyright (C) 2020 Red Hat, Inc.
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu-common.h"
16 #include "qemu/cutils.h"
17 #include "qemu/error-report.h"
18 #include "qemu/units.h"
19 #include "sysemu/numa.h"
20 #include "sysemu/sysemu.h"
21 #include "sysemu/reset.h"
22 #include "hw/virtio/virtio.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "hw/virtio/virtio-access.h"
25 #include "hw/virtio/virtio-mem.h"
26 #include "qapi/error.h"
27 #include "qapi/visitor.h"
28 #include "exec/ram_addr.h"
29 #include "migration/misc.h"
30 #include "hw/boards.h"
31 #include "hw/qdev-properties.h"
32 #include CONFIG_DEVICES
36 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
39 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
41 #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
42 defined(__powerpc64__)
43 #define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
45 /* fallback to 1 MiB (e.g., the THP size on s390x) */
46 #define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
50 * We want to have a reasonable default block size such that
51 * 1. We avoid splitting THPs when unplugging memory, which degrades
53 * 2. We avoid placing THPs for plugged blocks that also cover unplugged
56 * The actual THP size might differ between Linux kernels, so we try to probe
57 * it. In the future (if we ever run into issues regarding 2.), we might want
58 * to disable THP in case we fail to properly probe the THP size, or if the
59 * block size is configured smaller than the THP size.
61 static uint32_t thp_size
;
63 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
64 static uint32_t virtio_mem_thp_size(void)
66 gchar
*content
= NULL
;
75 * Try to probe the actual THP size, fallback to (sane but eventually
76 * incorrect) default sizes.
78 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH
, &content
, NULL
, NULL
) &&
79 !qemu_strtou64(content
, &endptr
, 0, &tmp
) &&
80 (!endptr
|| *endptr
== '\n')) {
82 * Sanity-check the value, if it's too big (e.g., aarch64 with 64k base
83 * pages) or weird, fallback to something smaller.
85 if (!tmp
|| !is_power_of_2(tmp
) || tmp
> 16 * MiB
) {
86 warn_report("Read unsupported THP size: %" PRIx64
, tmp
);
93 thp_size
= VIRTIO_MEM_DEFAULT_THP_SIZE
;
94 warn_report("Could not detect THP size, falling back to %" PRIx64
95 " MiB.", thp_size
/ MiB
);
102 static uint64_t virtio_mem_default_block_size(RAMBlock
*rb
)
104 const uint64_t page_size
= qemu_ram_pagesize(rb
);
106 /* We can have hugetlbfs with a page size smaller than the THP size. */
107 if (page_size
== qemu_real_host_page_size
) {
108 return MAX(page_size
, virtio_mem_thp_size());
110 return MAX(page_size
, VIRTIO_MEM_MIN_BLOCK_SIZE
);
114 * Size the usable region bigger than the requested size if possible. Esp.
115 * Linux guests will only add (aligned) memory blocks in case they fully
116 * fit into the usable region, but plug+online only a subset of the pages.
117 * The memory block size corresponds mostly to the section size.
119 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
120 * a section size of 1GB on arm64 (as long as the start address is properly
121 * aligned, similar to ordinary DIMMs).
123 * We can change this at any time and maybe even make it configurable if
124 * necessary (as the section size can change). But it's more likely that the
125 * section size will rather get smaller and not bigger over time.
127 #if defined(TARGET_X86_64) || defined(TARGET_I386)
128 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
130 #error VIRTIO_MEM_USABLE_EXTENT not defined
133 static bool virtio_mem_is_busy(void)
136 * Postcopy cannot handle concurrent discards and we don't want to migrate
137 * pages on-demand with stale content when plugging new blocks.
139 * For precopy, we don't want unplugged blocks in our migration stream, and
140 * when plugging new blocks, the page content might differ between source
141 * and destination (observable by the guest when not initializing pages
142 * after plugging them) until we're running on the destination (as we didn't
143 * migrate these blocks when they were unplugged).
145 return migration_in_incoming_postcopy() || !migration_is_idle();
148 static bool virtio_mem_test_bitmap(VirtIOMEM
*vmem
, uint64_t start_gpa
,
149 uint64_t size
, bool plugged
)
151 const unsigned long first_bit
= (start_gpa
- vmem
->addr
) / vmem
->block_size
;
152 const unsigned long last_bit
= first_bit
+ (size
/ vmem
->block_size
) - 1;
153 unsigned long found_bit
;
155 /* We fake a shorter bitmap to avoid searching too far. */
157 found_bit
= find_next_zero_bit(vmem
->bitmap
, last_bit
+ 1, first_bit
);
159 found_bit
= find_next_bit(vmem
->bitmap
, last_bit
+ 1, first_bit
);
161 return found_bit
> last_bit
;
164 static void virtio_mem_set_bitmap(VirtIOMEM
*vmem
, uint64_t start_gpa
,
165 uint64_t size
, bool plugged
)
167 const unsigned long bit
= (start_gpa
- vmem
->addr
) / vmem
->block_size
;
168 const unsigned long nbits
= size
/ vmem
->block_size
;
171 bitmap_set(vmem
->bitmap
, bit
, nbits
);
173 bitmap_clear(vmem
->bitmap
, bit
, nbits
);
177 static void virtio_mem_send_response(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
178 struct virtio_mem_resp
*resp
)
180 VirtIODevice
*vdev
= VIRTIO_DEVICE(vmem
);
181 VirtQueue
*vq
= vmem
->vq
;
183 trace_virtio_mem_send_response(le16_to_cpu(resp
->type
));
184 iov_from_buf(elem
->in_sg
, elem
->in_num
, 0, resp
, sizeof(*resp
));
186 virtqueue_push(vq
, elem
, sizeof(*resp
));
187 virtio_notify(vdev
, vq
);
190 static void virtio_mem_send_response_simple(VirtIOMEM
*vmem
,
191 VirtQueueElement
*elem
,
194 struct virtio_mem_resp resp
= {
195 .type
= cpu_to_le16(type
),
198 virtio_mem_send_response(vmem
, elem
, &resp
);
201 static bool virtio_mem_valid_range(VirtIOMEM
*vmem
, uint64_t gpa
, uint64_t size
)
203 if (!QEMU_IS_ALIGNED(gpa
, vmem
->block_size
)) {
206 if (gpa
+ size
< gpa
|| !size
) {
209 if (gpa
< vmem
->addr
|| gpa
>= vmem
->addr
+ vmem
->usable_region_size
) {
212 if (gpa
+ size
> vmem
->addr
+ vmem
->usable_region_size
) {
218 static int virtio_mem_set_block_state(VirtIOMEM
*vmem
, uint64_t start_gpa
,
219 uint64_t size
, bool plug
)
221 const uint64_t offset
= start_gpa
- vmem
->addr
;
224 if (virtio_mem_is_busy()) {
229 ret
= ram_block_discard_range(vmem
->memdev
->mr
.ram_block
, offset
, size
);
231 error_report("Unexpected error discarding RAM: %s",
236 virtio_mem_set_bitmap(vmem
, start_gpa
, size
, plug
);
240 static int virtio_mem_state_change_request(VirtIOMEM
*vmem
, uint64_t gpa
,
241 uint16_t nb_blocks
, bool plug
)
243 const uint64_t size
= nb_blocks
* vmem
->block_size
;
246 if (!virtio_mem_valid_range(vmem
, gpa
, size
)) {
247 return VIRTIO_MEM_RESP_ERROR
;
250 if (plug
&& (vmem
->size
+ size
> vmem
->requested_size
)) {
251 return VIRTIO_MEM_RESP_NACK
;
254 /* test if really all blocks are in the opposite state */
255 if (!virtio_mem_test_bitmap(vmem
, gpa
, size
, !plug
)) {
256 return VIRTIO_MEM_RESP_ERROR
;
259 ret
= virtio_mem_set_block_state(vmem
, gpa
, size
, plug
);
261 return VIRTIO_MEM_RESP_BUSY
;
268 notifier_list_notify(&vmem
->size_change_notifiers
, &vmem
->size
);
269 return VIRTIO_MEM_RESP_ACK
;
272 static void virtio_mem_plug_request(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
273 struct virtio_mem_req
*req
)
275 const uint64_t gpa
= le64_to_cpu(req
->u
.plug
.addr
);
276 const uint16_t nb_blocks
= le16_to_cpu(req
->u
.plug
.nb_blocks
);
279 trace_virtio_mem_plug_request(gpa
, nb_blocks
);
280 type
= virtio_mem_state_change_request(vmem
, gpa
, nb_blocks
, true);
281 virtio_mem_send_response_simple(vmem
, elem
, type
);
284 static void virtio_mem_unplug_request(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
285 struct virtio_mem_req
*req
)
287 const uint64_t gpa
= le64_to_cpu(req
->u
.unplug
.addr
);
288 const uint16_t nb_blocks
= le16_to_cpu(req
->u
.unplug
.nb_blocks
);
291 trace_virtio_mem_unplug_request(gpa
, nb_blocks
);
292 type
= virtio_mem_state_change_request(vmem
, gpa
, nb_blocks
, false);
293 virtio_mem_send_response_simple(vmem
, elem
, type
);
296 static void virtio_mem_resize_usable_region(VirtIOMEM
*vmem
,
297 uint64_t requested_size
,
300 uint64_t newsize
= MIN(memory_region_size(&vmem
->memdev
->mr
),
301 requested_size
+ VIRTIO_MEM_USABLE_EXTENT
);
303 /* The usable region size always has to be multiples of the block size. */
304 newsize
= QEMU_ALIGN_UP(newsize
, vmem
->block_size
);
306 if (!requested_size
) {
310 if (newsize
< vmem
->usable_region_size
&& !can_shrink
) {
314 trace_virtio_mem_resized_usable_region(vmem
->usable_region_size
, newsize
);
315 vmem
->usable_region_size
= newsize
;
318 static int virtio_mem_unplug_all(VirtIOMEM
*vmem
)
320 RAMBlock
*rb
= vmem
->memdev
->mr
.ram_block
;
323 if (virtio_mem_is_busy()) {
327 ret
= ram_block_discard_range(rb
, 0, qemu_ram_get_used_length(rb
));
329 error_report("Unexpected error discarding RAM: %s", strerror(-ret
));
332 bitmap_clear(vmem
->bitmap
, 0, vmem
->bitmap_size
);
335 notifier_list_notify(&vmem
->size_change_notifiers
, &vmem
->size
);
337 trace_virtio_mem_unplugged_all();
338 virtio_mem_resize_usable_region(vmem
, vmem
->requested_size
, true);
342 static void virtio_mem_unplug_all_request(VirtIOMEM
*vmem
,
343 VirtQueueElement
*elem
)
345 trace_virtio_mem_unplug_all_request();
346 if (virtio_mem_unplug_all(vmem
)) {
347 virtio_mem_send_response_simple(vmem
, elem
, VIRTIO_MEM_RESP_BUSY
);
349 virtio_mem_send_response_simple(vmem
, elem
, VIRTIO_MEM_RESP_ACK
);
353 static void virtio_mem_state_request(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
354 struct virtio_mem_req
*req
)
356 const uint16_t nb_blocks
= le16_to_cpu(req
->u
.state
.nb_blocks
);
357 const uint64_t gpa
= le64_to_cpu(req
->u
.state
.addr
);
358 const uint64_t size
= nb_blocks
* vmem
->block_size
;
359 struct virtio_mem_resp resp
= {
360 .type
= cpu_to_le16(VIRTIO_MEM_RESP_ACK
),
363 trace_virtio_mem_state_request(gpa
, nb_blocks
);
364 if (!virtio_mem_valid_range(vmem
, gpa
, size
)) {
365 virtio_mem_send_response_simple(vmem
, elem
, VIRTIO_MEM_RESP_ERROR
);
369 if (virtio_mem_test_bitmap(vmem
, gpa
, size
, true)) {
370 resp
.u
.state
.state
= cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED
);
371 } else if (virtio_mem_test_bitmap(vmem
, gpa
, size
, false)) {
372 resp
.u
.state
.state
= cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED
);
374 resp
.u
.state
.state
= cpu_to_le16(VIRTIO_MEM_STATE_MIXED
);
376 trace_virtio_mem_state_response(le16_to_cpu(resp
.u
.state
.state
));
377 virtio_mem_send_response(vmem
, elem
, &resp
);
380 static void virtio_mem_handle_request(VirtIODevice
*vdev
, VirtQueue
*vq
)
382 const int len
= sizeof(struct virtio_mem_req
);
383 VirtIOMEM
*vmem
= VIRTIO_MEM(vdev
);
384 VirtQueueElement
*elem
;
385 struct virtio_mem_req req
;
389 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
394 if (iov_to_buf(elem
->out_sg
, elem
->out_num
, 0, &req
, len
) < len
) {
395 virtio_error(vdev
, "virtio-mem protocol violation: invalid request"
397 virtqueue_detach_element(vq
, elem
, 0);
402 if (iov_size(elem
->in_sg
, elem
->in_num
) <
403 sizeof(struct virtio_mem_resp
)) {
404 virtio_error(vdev
, "virtio-mem protocol violation: not enough space"
405 " for response: %zu",
406 iov_size(elem
->in_sg
, elem
->in_num
));
407 virtqueue_detach_element(vq
, elem
, 0);
412 type
= le16_to_cpu(req
.type
);
414 case VIRTIO_MEM_REQ_PLUG
:
415 virtio_mem_plug_request(vmem
, elem
, &req
);
417 case VIRTIO_MEM_REQ_UNPLUG
:
418 virtio_mem_unplug_request(vmem
, elem
, &req
);
420 case VIRTIO_MEM_REQ_UNPLUG_ALL
:
421 virtio_mem_unplug_all_request(vmem
, elem
);
423 case VIRTIO_MEM_REQ_STATE
:
424 virtio_mem_state_request(vmem
, elem
, &req
);
427 virtio_error(vdev
, "virtio-mem protocol violation: unknown request"
429 virtqueue_detach_element(vq
, elem
, 0);
438 static void virtio_mem_get_config(VirtIODevice
*vdev
, uint8_t *config_data
)
440 VirtIOMEM
*vmem
= VIRTIO_MEM(vdev
);
441 struct virtio_mem_config
*config
= (void *) config_data
;
443 config
->block_size
= cpu_to_le64(vmem
->block_size
);
444 config
->node_id
= cpu_to_le16(vmem
->node
);
445 config
->requested_size
= cpu_to_le64(vmem
->requested_size
);
446 config
->plugged_size
= cpu_to_le64(vmem
->size
);
447 config
->addr
= cpu_to_le64(vmem
->addr
);
448 config
->region_size
= cpu_to_le64(memory_region_size(&vmem
->memdev
->mr
));
449 config
->usable_region_size
= cpu_to_le64(vmem
->usable_region_size
);
452 static uint64_t virtio_mem_get_features(VirtIODevice
*vdev
, uint64_t features
,
455 MachineState
*ms
= MACHINE(qdev_get_machine());
457 if (ms
->numa_state
) {
458 #if defined(CONFIG_ACPI)
459 virtio_add_feature(&features
, VIRTIO_MEM_F_ACPI_PXM
);
465 static void virtio_mem_system_reset(void *opaque
)
467 VirtIOMEM
*vmem
= VIRTIO_MEM(opaque
);
470 * During usual resets, we will unplug all memory and shrink the usable
471 * region size. This is, however, not possible in all scenarios. Then,
472 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
474 virtio_mem_unplug_all(vmem
);
477 static void virtio_mem_device_realize(DeviceState
*dev
, Error
**errp
)
479 MachineState
*ms
= MACHINE(qdev_get_machine());
480 int nb_numa_nodes
= ms
->numa_state
? ms
->numa_state
->num_nodes
: 0;
481 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
482 VirtIOMEM
*vmem
= VIRTIO_MEM(dev
);
488 error_setg(errp
, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP
);
490 } else if (host_memory_backend_is_mapped(vmem
->memdev
)) {
491 error_setg(errp
, "'%s' property specifies a busy memdev: %s",
492 VIRTIO_MEM_MEMDEV_PROP
,
493 object_get_canonical_path_component(OBJECT(vmem
->memdev
)));
495 } else if (!memory_region_is_ram(&vmem
->memdev
->mr
) ||
496 memory_region_is_rom(&vmem
->memdev
->mr
) ||
497 !vmem
->memdev
->mr
.ram_block
) {
498 error_setg(errp
, "'%s' property specifies an unsupported memdev",
499 VIRTIO_MEM_MEMDEV_PROP
);
503 if ((nb_numa_nodes
&& vmem
->node
>= nb_numa_nodes
) ||
504 (!nb_numa_nodes
&& vmem
->node
)) {
505 error_setg(errp
, "'%s' property has value '%" PRIu32
"', which exceeds"
506 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP
,
507 vmem
->node
, nb_numa_nodes
? nb_numa_nodes
: 1);
512 error_setg(errp
, "Incompatible with mlock");
516 rb
= vmem
->memdev
->mr
.ram_block
;
517 page_size
= qemu_ram_pagesize(rb
);
520 * If the block size wasn't configured by the user, use a sane default. This
521 * allows using hugetlbfs backends of any page size without manual
524 if (!vmem
->block_size
) {
525 vmem
->block_size
= virtio_mem_default_block_size(rb
);
528 if (vmem
->block_size
< page_size
) {
529 error_setg(errp
, "'%s' property has to be at least the page size (0x%"
530 PRIx64
")", VIRTIO_MEM_BLOCK_SIZE_PROP
, page_size
);
532 } else if (vmem
->block_size
< virtio_mem_default_block_size(rb
)) {
533 warn_report("'%s' property is smaller than the default block size (%"
534 PRIx64
" MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP
,
535 virtio_mem_default_block_size(rb
) / MiB
);
536 } else if (!QEMU_IS_ALIGNED(vmem
->requested_size
, vmem
->block_size
)) {
537 error_setg(errp
, "'%s' property has to be multiples of '%s' (0x%" PRIx64
538 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP
,
539 VIRTIO_MEM_BLOCK_SIZE_PROP
, vmem
->block_size
);
541 } else if (!QEMU_IS_ALIGNED(vmem
->addr
, vmem
->block_size
)) {
542 error_setg(errp
, "'%s' property has to be multiples of '%s' (0x%" PRIx64
543 ")", VIRTIO_MEM_ADDR_PROP
, VIRTIO_MEM_BLOCK_SIZE_PROP
,
546 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem
->memdev
->mr
),
548 error_setg(errp
, "'%s' property memdev size has to be multiples of"
549 "'%s' (0x%" PRIx64
")", VIRTIO_MEM_MEMDEV_PROP
,
550 VIRTIO_MEM_BLOCK_SIZE_PROP
, vmem
->block_size
);
554 if (ram_block_discard_require(true)) {
555 error_setg(errp
, "Discarding RAM is disabled");
559 ret
= ram_block_discard_range(rb
, 0, qemu_ram_get_used_length(rb
));
561 error_setg_errno(errp
, -ret
, "Unexpected error discarding RAM");
562 ram_block_discard_require(false);
566 virtio_mem_resize_usable_region(vmem
, vmem
->requested_size
, true);
568 vmem
->bitmap_size
= memory_region_size(&vmem
->memdev
->mr
) /
570 vmem
->bitmap
= bitmap_new(vmem
->bitmap_size
);
572 virtio_init(vdev
, TYPE_VIRTIO_MEM
, VIRTIO_ID_MEM
,
573 sizeof(struct virtio_mem_config
));
574 vmem
->vq
= virtio_add_queue(vdev
, 128, virtio_mem_handle_request
);
576 host_memory_backend_set_mapped(vmem
->memdev
, true);
577 vmstate_register_ram(&vmem
->memdev
->mr
, DEVICE(vmem
));
578 qemu_register_reset(virtio_mem_system_reset
, vmem
);
579 precopy_add_notifier(&vmem
->precopy_notifier
);
582 static void virtio_mem_device_unrealize(DeviceState
*dev
)
584 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
585 VirtIOMEM
*vmem
= VIRTIO_MEM(dev
);
587 precopy_remove_notifier(&vmem
->precopy_notifier
);
588 qemu_unregister_reset(virtio_mem_system_reset
, vmem
);
589 vmstate_unregister_ram(&vmem
->memdev
->mr
, DEVICE(vmem
));
590 host_memory_backend_set_mapped(vmem
->memdev
, false);
591 virtio_del_queue(vdev
, 0);
592 virtio_cleanup(vdev
);
593 g_free(vmem
->bitmap
);
594 ram_block_discard_require(false);
597 static int virtio_mem_restore_unplugged(VirtIOMEM
*vmem
)
599 RAMBlock
*rb
= vmem
->memdev
->mr
.ram_block
;
600 unsigned long first_zero_bit
, last_zero_bit
;
601 uint64_t offset
, length
;
604 /* Find consecutive unplugged blocks and discard the consecutive range. */
605 first_zero_bit
= find_first_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
);
606 while (first_zero_bit
< vmem
->bitmap_size
) {
607 offset
= first_zero_bit
* vmem
->block_size
;
608 last_zero_bit
= find_next_bit(vmem
->bitmap
, vmem
->bitmap_size
,
609 first_zero_bit
+ 1) - 1;
610 length
= (last_zero_bit
- first_zero_bit
+ 1) * vmem
->block_size
;
612 ret
= ram_block_discard_range(rb
, offset
, length
);
614 error_report("Unexpected error discarding RAM: %s",
618 first_zero_bit
= find_next_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
,
624 static int virtio_mem_post_load(void *opaque
, int version_id
)
626 if (migration_in_incoming_postcopy()) {
630 return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque
));
633 typedef struct VirtIOMEMMigSanityChecks
{
636 uint64_t region_size
;
639 } VirtIOMEMMigSanityChecks
;
641 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque
)
643 VirtIOMEMMigSanityChecks
*tmp
= opaque
;
644 VirtIOMEM
*vmem
= tmp
->parent
;
646 tmp
->addr
= vmem
->addr
;
647 tmp
->region_size
= memory_region_size(&vmem
->memdev
->mr
);
648 tmp
->block_size
= vmem
->block_size
;
649 tmp
->node
= vmem
->node
;
653 static int virtio_mem_mig_sanity_checks_post_load(void *opaque
, int version_id
)
655 VirtIOMEMMigSanityChecks
*tmp
= opaque
;
656 VirtIOMEM
*vmem
= tmp
->parent
;
657 const uint64_t new_region_size
= memory_region_size(&vmem
->memdev
->mr
);
659 if (tmp
->addr
!= vmem
->addr
) {
660 error_report("Property '%s' changed from 0x%" PRIx64
" to 0x%" PRIx64
,
661 VIRTIO_MEM_ADDR_PROP
, tmp
->addr
, vmem
->addr
);
665 * Note: Preparation for resizeable memory regions. The maximum size
666 * of the memory region must not change during migration.
668 if (tmp
->region_size
!= new_region_size
) {
669 error_report("Property '%s' size changed from 0x%" PRIx64
" to 0x%"
670 PRIx64
, VIRTIO_MEM_MEMDEV_PROP
, tmp
->region_size
,
674 if (tmp
->block_size
!= vmem
->block_size
) {
675 error_report("Property '%s' changed from 0x%" PRIx64
" to 0x%" PRIx64
,
676 VIRTIO_MEM_BLOCK_SIZE_PROP
, tmp
->block_size
,
680 if (tmp
->node
!= vmem
->node
) {
681 error_report("Property '%s' changed from %" PRIu32
" to %" PRIu32
,
682 VIRTIO_MEM_NODE_PROP
, tmp
->node
, vmem
->node
);
688 static const VMStateDescription vmstate_virtio_mem_sanity_checks
= {
689 .name
= "virtio-mem-device/sanity-checks",
690 .pre_save
= virtio_mem_mig_sanity_checks_pre_save
,
691 .post_load
= virtio_mem_mig_sanity_checks_post_load
,
692 .fields
= (VMStateField
[]) {
693 VMSTATE_UINT64(addr
, VirtIOMEMMigSanityChecks
),
694 VMSTATE_UINT64(region_size
, VirtIOMEMMigSanityChecks
),
695 VMSTATE_UINT64(block_size
, VirtIOMEMMigSanityChecks
),
696 VMSTATE_UINT32(node
, VirtIOMEMMigSanityChecks
),
697 VMSTATE_END_OF_LIST(),
701 static const VMStateDescription vmstate_virtio_mem_device
= {
702 .name
= "virtio-mem-device",
703 .minimum_version_id
= 1,
705 .post_load
= virtio_mem_post_load
,
706 .fields
= (VMStateField
[]) {
707 VMSTATE_WITH_TMP(VirtIOMEM
, VirtIOMEMMigSanityChecks
,
708 vmstate_virtio_mem_sanity_checks
),
709 VMSTATE_UINT64(usable_region_size
, VirtIOMEM
),
710 VMSTATE_UINT64(size
, VirtIOMEM
),
711 VMSTATE_UINT64(requested_size
, VirtIOMEM
),
712 VMSTATE_BITMAP(bitmap
, VirtIOMEM
, 0, bitmap_size
),
713 VMSTATE_END_OF_LIST()
717 static const VMStateDescription vmstate_virtio_mem
= {
718 .name
= "virtio-mem",
719 .minimum_version_id
= 1,
721 .fields
= (VMStateField
[]) {
722 VMSTATE_VIRTIO_DEVICE
,
723 VMSTATE_END_OF_LIST()
727 static void virtio_mem_fill_device_info(const VirtIOMEM
*vmem
,
728 VirtioMEMDeviceInfo
*vi
)
730 vi
->memaddr
= vmem
->addr
;
731 vi
->node
= vmem
->node
;
732 vi
->requested_size
= vmem
->requested_size
;
733 vi
->size
= vmem
->size
;
734 vi
->max_size
= memory_region_size(&vmem
->memdev
->mr
);
735 vi
->block_size
= vmem
->block_size
;
736 vi
->memdev
= object_get_canonical_path(OBJECT(vmem
->memdev
));
739 static MemoryRegion
*virtio_mem_get_memory_region(VirtIOMEM
*vmem
, Error
**errp
)
742 error_setg(errp
, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP
);
746 return &vmem
->memdev
->mr
;
749 static void virtio_mem_add_size_change_notifier(VirtIOMEM
*vmem
,
752 notifier_list_add(&vmem
->size_change_notifiers
, notifier
);
755 static void virtio_mem_remove_size_change_notifier(VirtIOMEM
*vmem
,
758 notifier_remove(notifier
);
761 static void virtio_mem_get_size(Object
*obj
, Visitor
*v
, const char *name
,
762 void *opaque
, Error
**errp
)
764 const VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
765 uint64_t value
= vmem
->size
;
767 visit_type_size(v
, name
, &value
, errp
);
770 static void virtio_mem_get_requested_size(Object
*obj
, Visitor
*v
,
771 const char *name
, void *opaque
,
774 const VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
775 uint64_t value
= vmem
->requested_size
;
777 visit_type_size(v
, name
, &value
, errp
);
780 static void virtio_mem_set_requested_size(Object
*obj
, Visitor
*v
,
781 const char *name
, void *opaque
,
784 VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
788 visit_type_size(v
, name
, &value
, &err
);
790 error_propagate(errp
, err
);
795 * The block size and memory backend are not fixed until the device was
796 * realized. realize() will verify these properties then.
798 if (DEVICE(obj
)->realized
) {
799 if (!QEMU_IS_ALIGNED(value
, vmem
->block_size
)) {
800 error_setg(errp
, "'%s' has to be multiples of '%s' (0x%" PRIx64
801 ")", name
, VIRTIO_MEM_BLOCK_SIZE_PROP
,
804 } else if (value
> memory_region_size(&vmem
->memdev
->mr
)) {
805 error_setg(errp
, "'%s' cannot exceed the memory backend size"
806 "(0x%" PRIx64
")", name
,
807 memory_region_size(&vmem
->memdev
->mr
));
811 if (value
!= vmem
->requested_size
) {
812 virtio_mem_resize_usable_region(vmem
, value
, false);
813 vmem
->requested_size
= value
;
816 * Trigger a config update so the guest gets notified. We trigger
817 * even if the size didn't change (especially helpful for debugging).
819 virtio_notify_config(VIRTIO_DEVICE(vmem
));
821 vmem
->requested_size
= value
;
825 static void virtio_mem_get_block_size(Object
*obj
, Visitor
*v
, const char *name
,
826 void *opaque
, Error
**errp
)
828 const VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
829 uint64_t value
= vmem
->block_size
;
832 * If not configured by the user (and we're not realized yet), use the
833 * default block size we would use with the current memory backend.
836 if (vmem
->memdev
&& memory_region_is_ram(&vmem
->memdev
->mr
)) {
837 value
= virtio_mem_default_block_size(vmem
->memdev
->mr
.ram_block
);
839 value
= virtio_mem_thp_size();
843 visit_type_size(v
, name
, &value
, errp
);
846 static void virtio_mem_set_block_size(Object
*obj
, Visitor
*v
, const char *name
,
847 void *opaque
, Error
**errp
)
849 VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
853 if (DEVICE(obj
)->realized
) {
854 error_setg(errp
, "'%s' cannot be changed", name
);
858 visit_type_size(v
, name
, &value
, &err
);
860 error_propagate(errp
, err
);
864 if (value
< VIRTIO_MEM_MIN_BLOCK_SIZE
) {
865 error_setg(errp
, "'%s' property has to be at least 0x%" PRIx32
, name
,
866 VIRTIO_MEM_MIN_BLOCK_SIZE
);
868 } else if (!is_power_of_2(value
)) {
869 error_setg(errp
, "'%s' property has to be a power of two", name
);
872 vmem
->block_size
= value
;
875 static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM
*vmem
)
877 void * const host
= qemu_ram_get_host_addr(vmem
->memdev
->mr
.ram_block
);
878 unsigned long first_zero_bit
, last_zero_bit
;
879 uint64_t offset
, length
;
882 * Find consecutive unplugged blocks and exclude them from migration.
884 * Note: Blocks cannot get (un)plugged during precopy, no locking needed.
886 first_zero_bit
= find_first_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
);
887 while (first_zero_bit
< vmem
->bitmap_size
) {
888 offset
= first_zero_bit
* vmem
->block_size
;
889 last_zero_bit
= find_next_bit(vmem
->bitmap
, vmem
->bitmap_size
,
890 first_zero_bit
+ 1) - 1;
891 length
= (last_zero_bit
- first_zero_bit
+ 1) * vmem
->block_size
;
893 qemu_guest_free_page_hint(host
+ offset
, length
);
894 first_zero_bit
= find_next_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
,
899 static int virtio_mem_precopy_notify(NotifierWithReturn
*n
, void *data
)
901 VirtIOMEM
*vmem
= container_of(n
, VirtIOMEM
, precopy_notifier
);
902 PrecopyNotifyData
*pnd
= data
;
904 switch (pnd
->reason
) {
905 case PRECOPY_NOTIFY_SETUP
:
906 precopy_enable_free_page_optimization();
908 case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC
:
909 virtio_mem_precopy_exclude_unplugged(vmem
);
918 static void virtio_mem_instance_init(Object
*obj
)
920 VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
922 notifier_list_init(&vmem
->size_change_notifiers
);
923 vmem
->precopy_notifier
.notify
= virtio_mem_precopy_notify
;
925 object_property_add(obj
, VIRTIO_MEM_SIZE_PROP
, "size", virtio_mem_get_size
,
927 object_property_add(obj
, VIRTIO_MEM_REQUESTED_SIZE_PROP
, "size",
928 virtio_mem_get_requested_size
,
929 virtio_mem_set_requested_size
, NULL
, NULL
);
930 object_property_add(obj
, VIRTIO_MEM_BLOCK_SIZE_PROP
, "size",
931 virtio_mem_get_block_size
, virtio_mem_set_block_size
,
935 static Property virtio_mem_properties
[] = {
936 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP
, VirtIOMEM
, addr
, 0),
937 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP
, VirtIOMEM
, node
, 0),
938 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP
, VirtIOMEM
, memdev
,
939 TYPE_MEMORY_BACKEND
, HostMemoryBackend
*),
940 DEFINE_PROP_END_OF_LIST(),
943 static void virtio_mem_class_init(ObjectClass
*klass
, void *data
)
945 DeviceClass
*dc
= DEVICE_CLASS(klass
);
946 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
947 VirtIOMEMClass
*vmc
= VIRTIO_MEM_CLASS(klass
);
949 device_class_set_props(dc
, virtio_mem_properties
);
950 dc
->vmsd
= &vmstate_virtio_mem
;
952 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
953 vdc
->realize
= virtio_mem_device_realize
;
954 vdc
->unrealize
= virtio_mem_device_unrealize
;
955 vdc
->get_config
= virtio_mem_get_config
;
956 vdc
->get_features
= virtio_mem_get_features
;
957 vdc
->vmsd
= &vmstate_virtio_mem_device
;
959 vmc
->fill_device_info
= virtio_mem_fill_device_info
;
960 vmc
->get_memory_region
= virtio_mem_get_memory_region
;
961 vmc
->add_size_change_notifier
= virtio_mem_add_size_change_notifier
;
962 vmc
->remove_size_change_notifier
= virtio_mem_remove_size_change_notifier
;
965 static const TypeInfo virtio_mem_info
= {
966 .name
= TYPE_VIRTIO_MEM
,
967 .parent
= TYPE_VIRTIO_DEVICE
,
968 .instance_size
= sizeof(VirtIOMEM
),
969 .instance_init
= virtio_mem_instance_init
,
970 .class_init
= virtio_mem_class_init
,
971 .class_size
= sizeof(VirtIOMEMClass
),
974 static void virtio_register_types(void)
976 type_register_static(&virtio_mem_info
);
979 type_init(virtio_register_types
)