4 * Copyright (C) 2020 Red Hat, Inc.
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/error-report.h"
17 #include "qemu/units.h"
18 #include "sysemu/numa.h"
19 #include "sysemu/sysemu.h"
20 #include "sysemu/reset.h"
21 #include "hw/virtio/virtio.h"
22 #include "hw/virtio/virtio-bus.h"
23 #include "hw/virtio/virtio-mem.h"
24 #include "qapi/error.h"
25 #include "qapi/visitor.h"
26 #include "exec/ram_addr.h"
27 #include "migration/misc.h"
28 #include "hw/boards.h"
29 #include "hw/qdev-properties.h"
30 #include CONFIG_DEVICES
33 static const VMStateDescription vmstate_virtio_mem_device_early
;
36 * We only had legacy x86 guests that did not support
37 * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
39 #if defined(TARGET_X86_64) || defined(TARGET_I386)
40 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
44 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
47 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
49 static uint32_t virtio_mem_default_thp_size(void)
51 uint32_t default_thp_size
= VIRTIO_MEM_MIN_BLOCK_SIZE
;
53 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
54 default_thp_size
= 2 * MiB
;
55 #elif defined(__aarch64__)
56 if (qemu_real_host_page_size() == 4 * KiB
) {
57 default_thp_size
= 2 * MiB
;
58 } else if (qemu_real_host_page_size() == 16 * KiB
) {
59 default_thp_size
= 32 * MiB
;
60 } else if (qemu_real_host_page_size() == 64 * KiB
) {
61 default_thp_size
= 512 * MiB
;
65 return default_thp_size
;
69 * We want to have a reasonable default block size such that
70 * 1. We avoid splitting THPs when unplugging memory, which degrades
72 * 2. We avoid placing THPs for plugged blocks that also cover unplugged
75 * The actual THP size might differ between Linux kernels, so we try to probe
76 * it. In the future (if we ever run into issues regarding 2.), we might want
77 * to disable THP in case we fail to properly probe the THP size, or if the
78 * block size is configured smaller than the THP size.
80 static uint32_t thp_size
;
82 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
83 static uint32_t virtio_mem_thp_size(void)
85 gchar
*content
= NULL
;
94 * Try to probe the actual THP size, fallback to (sane but eventually
95 * incorrect) default sizes.
97 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH
, &content
, NULL
, NULL
) &&
98 !qemu_strtou64(content
, &endptr
, 0, &tmp
) &&
99 (!endptr
|| *endptr
== '\n')) {
100 /* Sanity-check the value and fallback to something reasonable. */
101 if (!tmp
|| !is_power_of_2(tmp
)) {
102 warn_report("Read unsupported THP size: %" PRIx64
, tmp
);
109 thp_size
= virtio_mem_default_thp_size();
110 warn_report("Could not detect THP size, falling back to %" PRIx64
111 " MiB.", thp_size
/ MiB
);
118 static uint64_t virtio_mem_default_block_size(RAMBlock
*rb
)
120 const uint64_t page_size
= qemu_ram_pagesize(rb
);
122 /* We can have hugetlbfs with a page size smaller than the THP size. */
123 if (page_size
== qemu_real_host_page_size()) {
124 return MAX(page_size
, virtio_mem_thp_size());
126 return MAX(page_size
, VIRTIO_MEM_MIN_BLOCK_SIZE
);
129 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
130 static bool virtio_mem_has_shared_zeropage(RAMBlock
*rb
)
133 * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
134 * anonymous RAM. In any other case, reading unplugged *can* populate a
135 * fresh page, consuming actual memory.
137 return !qemu_ram_is_shared(rb
) && qemu_ram_get_fd(rb
) < 0 &&
138 qemu_ram_pagesize(rb
) == qemu_real_host_page_size();
140 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
143 * Size the usable region bigger than the requested size if possible. Esp.
144 * Linux guests will only add (aligned) memory blocks in case they fully
145 * fit into the usable region, but plug+online only a subset of the pages.
146 * The memory block size corresponds mostly to the section size.
148 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
149 * a section size of 512MB on arm64 (as long as the start address is properly
150 * aligned, similar to ordinary DIMMs).
152 * We can change this at any time and maybe even make it configurable if
153 * necessary (as the section size can change). But it's more likely that the
154 * section size will rather get smaller and not bigger over time.
156 #if defined(TARGET_X86_64) || defined(TARGET_I386)
157 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
158 #elif defined(TARGET_ARM)
159 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
161 #error VIRTIO_MEM_USABLE_EXTENT not defined
164 static bool virtio_mem_is_busy(void)
167 * Postcopy cannot handle concurrent discards and we don't want to migrate
168 * pages on-demand with stale content when plugging new blocks.
170 * For precopy, we don't want unplugged blocks in our migration stream, and
171 * when plugging new blocks, the page content might differ between source
172 * and destination (observable by the guest when not initializing pages
173 * after plugging them) until we're running on the destination (as we didn't
174 * migrate these blocks when they were unplugged).
176 return migration_in_incoming_postcopy() || !migration_is_idle();
179 typedef int (*virtio_mem_range_cb
)(const VirtIOMEM
*vmem
, void *arg
,
180 uint64_t offset
, uint64_t size
);
182 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM
*vmem
, void *arg
,
183 virtio_mem_range_cb cb
)
185 unsigned long first_zero_bit
, last_zero_bit
;
186 uint64_t offset
, size
;
189 first_zero_bit
= find_first_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
);
190 while (first_zero_bit
< vmem
->bitmap_size
) {
191 offset
= first_zero_bit
* vmem
->block_size
;
192 last_zero_bit
= find_next_bit(vmem
->bitmap
, vmem
->bitmap_size
,
193 first_zero_bit
+ 1) - 1;
194 size
= (last_zero_bit
- first_zero_bit
+ 1) * vmem
->block_size
;
196 ret
= cb(vmem
, arg
, offset
, size
);
200 first_zero_bit
= find_next_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
,
206 static int virtio_mem_for_each_plugged_range(const VirtIOMEM
*vmem
, void *arg
,
207 virtio_mem_range_cb cb
)
209 unsigned long first_bit
, last_bit
;
210 uint64_t offset
, size
;
213 first_bit
= find_first_bit(vmem
->bitmap
, vmem
->bitmap_size
);
214 while (first_bit
< vmem
->bitmap_size
) {
215 offset
= first_bit
* vmem
->block_size
;
216 last_bit
= find_next_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
,
218 size
= (last_bit
- first_bit
+ 1) * vmem
->block_size
;
220 ret
= cb(vmem
, arg
, offset
, size
);
224 first_bit
= find_next_bit(vmem
->bitmap
, vmem
->bitmap_size
,
231 * Adjust the memory section to cover the intersection with the given range.
233 * Returns false if the intersection is empty, otherwise returns true.
235 static bool virtio_mem_intersect_memory_section(MemoryRegionSection
*s
,
236 uint64_t offset
, uint64_t size
)
238 uint64_t start
= MAX(s
->offset_within_region
, offset
);
239 uint64_t end
= MIN(s
->offset_within_region
+ int128_get64(s
->size
),
246 s
->offset_within_address_space
+= start
- s
->offset_within_region
;
247 s
->offset_within_region
= start
;
248 s
->size
= int128_make64(end
- start
);
252 typedef int (*virtio_mem_section_cb
)(MemoryRegionSection
*s
, void *arg
);
254 static int virtio_mem_for_each_plugged_section(const VirtIOMEM
*vmem
,
255 MemoryRegionSection
*s
,
257 virtio_mem_section_cb cb
)
259 unsigned long first_bit
, last_bit
;
260 uint64_t offset
, size
;
263 first_bit
= s
->offset_within_region
/ vmem
->block_size
;
264 first_bit
= find_next_bit(vmem
->bitmap
, vmem
->bitmap_size
, first_bit
);
265 while (first_bit
< vmem
->bitmap_size
) {
266 MemoryRegionSection tmp
= *s
;
268 offset
= first_bit
* vmem
->block_size
;
269 last_bit
= find_next_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
,
271 size
= (last_bit
- first_bit
+ 1) * vmem
->block_size
;
273 if (!virtio_mem_intersect_memory_section(&tmp
, offset
, size
)) {
280 first_bit
= find_next_bit(vmem
->bitmap
, vmem
->bitmap_size
,
286 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM
*vmem
,
287 MemoryRegionSection
*s
,
289 virtio_mem_section_cb cb
)
291 unsigned long first_bit
, last_bit
;
292 uint64_t offset
, size
;
295 first_bit
= s
->offset_within_region
/ vmem
->block_size
;
296 first_bit
= find_next_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
, first_bit
);
297 while (first_bit
< vmem
->bitmap_size
) {
298 MemoryRegionSection tmp
= *s
;
300 offset
= first_bit
* vmem
->block_size
;
301 last_bit
= find_next_bit(vmem
->bitmap
, vmem
->bitmap_size
,
303 size
= (last_bit
- first_bit
+ 1) * vmem
->block_size
;
305 if (!virtio_mem_intersect_memory_section(&tmp
, offset
, size
)) {
312 first_bit
= find_next_zero_bit(vmem
->bitmap
, vmem
->bitmap_size
,
318 static int virtio_mem_notify_populate_cb(MemoryRegionSection
*s
, void *arg
)
320 RamDiscardListener
*rdl
= arg
;
322 return rdl
->notify_populate(rdl
, s
);
325 static int virtio_mem_notify_discard_cb(MemoryRegionSection
*s
, void *arg
)
327 RamDiscardListener
*rdl
= arg
;
329 rdl
->notify_discard(rdl
, s
);
333 static void virtio_mem_notify_unplug(VirtIOMEM
*vmem
, uint64_t offset
,
336 RamDiscardListener
*rdl
;
338 QLIST_FOREACH(rdl
, &vmem
->rdl_list
, next
) {
339 MemoryRegionSection tmp
= *rdl
->section
;
341 if (!virtio_mem_intersect_memory_section(&tmp
, offset
, size
)) {
344 rdl
->notify_discard(rdl
, &tmp
);
348 static int virtio_mem_notify_plug(VirtIOMEM
*vmem
, uint64_t offset
,
351 RamDiscardListener
*rdl
, *rdl2
;
354 QLIST_FOREACH(rdl
, &vmem
->rdl_list
, next
) {
355 MemoryRegionSection tmp
= *rdl
->section
;
357 if (!virtio_mem_intersect_memory_section(&tmp
, offset
, size
)) {
360 ret
= rdl
->notify_populate(rdl
, &tmp
);
367 /* Notify all already-notified listeners. */
368 QLIST_FOREACH(rdl2
, &vmem
->rdl_list
, next
) {
369 MemoryRegionSection tmp
= *rdl2
->section
;
374 if (!virtio_mem_intersect_memory_section(&tmp
, offset
, size
)) {
377 rdl2
->notify_discard(rdl2
, &tmp
);
383 static void virtio_mem_notify_unplug_all(VirtIOMEM
*vmem
)
385 RamDiscardListener
*rdl
;
391 QLIST_FOREACH(rdl
, &vmem
->rdl_list
, next
) {
392 if (rdl
->double_discard_supported
) {
393 rdl
->notify_discard(rdl
, rdl
->section
);
395 virtio_mem_for_each_plugged_section(vmem
, rdl
->section
, rdl
,
396 virtio_mem_notify_discard_cb
);
401 static bool virtio_mem_is_range_plugged(const VirtIOMEM
*vmem
,
402 uint64_t start_gpa
, uint64_t size
)
404 const unsigned long first_bit
= (start_gpa
- vmem
->addr
) / vmem
->block_size
;
405 const unsigned long last_bit
= first_bit
+ (size
/ vmem
->block_size
) - 1;
406 unsigned long found_bit
;
408 /* We fake a shorter bitmap to avoid searching too far. */
409 found_bit
= find_next_zero_bit(vmem
->bitmap
, last_bit
+ 1, first_bit
);
410 return found_bit
> last_bit
;
413 static bool virtio_mem_is_range_unplugged(const VirtIOMEM
*vmem
,
414 uint64_t start_gpa
, uint64_t size
)
416 const unsigned long first_bit
= (start_gpa
- vmem
->addr
) / vmem
->block_size
;
417 const unsigned long last_bit
= first_bit
+ (size
/ vmem
->block_size
) - 1;
418 unsigned long found_bit
;
420 /* We fake a shorter bitmap to avoid searching too far. */
421 found_bit
= find_next_bit(vmem
->bitmap
, last_bit
+ 1, first_bit
);
422 return found_bit
> last_bit
;
425 static void virtio_mem_set_range_plugged(VirtIOMEM
*vmem
, uint64_t start_gpa
,
428 const unsigned long bit
= (start_gpa
- vmem
->addr
) / vmem
->block_size
;
429 const unsigned long nbits
= size
/ vmem
->block_size
;
431 bitmap_set(vmem
->bitmap
, bit
, nbits
);
434 static void virtio_mem_set_range_unplugged(VirtIOMEM
*vmem
, uint64_t start_gpa
,
437 const unsigned long bit
= (start_gpa
- vmem
->addr
) / vmem
->block_size
;
438 const unsigned long nbits
= size
/ vmem
->block_size
;
440 bitmap_clear(vmem
->bitmap
, bit
, nbits
);
443 static void virtio_mem_send_response(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
444 struct virtio_mem_resp
*resp
)
446 VirtIODevice
*vdev
= VIRTIO_DEVICE(vmem
);
447 VirtQueue
*vq
= vmem
->vq
;
449 trace_virtio_mem_send_response(le16_to_cpu(resp
->type
));
450 iov_from_buf(elem
->in_sg
, elem
->in_num
, 0, resp
, sizeof(*resp
));
452 virtqueue_push(vq
, elem
, sizeof(*resp
));
453 virtio_notify(vdev
, vq
);
456 static void virtio_mem_send_response_simple(VirtIOMEM
*vmem
,
457 VirtQueueElement
*elem
,
460 struct virtio_mem_resp resp
= {
461 .type
= cpu_to_le16(type
),
464 virtio_mem_send_response(vmem
, elem
, &resp
);
467 static bool virtio_mem_valid_range(const VirtIOMEM
*vmem
, uint64_t gpa
,
470 if (!QEMU_IS_ALIGNED(gpa
, vmem
->block_size
)) {
473 if (gpa
+ size
< gpa
|| !size
) {
476 if (gpa
< vmem
->addr
|| gpa
>= vmem
->addr
+ vmem
->usable_region_size
) {
479 if (gpa
+ size
> vmem
->addr
+ vmem
->usable_region_size
) {
485 static int virtio_mem_set_block_state(VirtIOMEM
*vmem
, uint64_t start_gpa
,
486 uint64_t size
, bool plug
)
488 const uint64_t offset
= start_gpa
- vmem
->addr
;
489 RAMBlock
*rb
= vmem
->memdev
->mr
.ram_block
;
492 if (virtio_mem_is_busy()) {
497 if (ram_block_discard_range(rb
, offset
, size
)) {
500 virtio_mem_notify_unplug(vmem
, offset
, size
);
501 virtio_mem_set_range_unplugged(vmem
, start_gpa
, size
);
505 if (vmem
->prealloc
) {
506 void *area
= memory_region_get_ram_ptr(&vmem
->memdev
->mr
) + offset
;
507 int fd
= memory_region_get_fd(&vmem
->memdev
->mr
);
508 Error
*local_err
= NULL
;
510 qemu_prealloc_mem(fd
, area
, size
, 1, NULL
, &local_err
);
515 * Warn only once, we don't want to fill the log with these
519 warn_report_err(local_err
);
522 error_free(local_err
);
529 ret
= virtio_mem_notify_plug(vmem
, offset
, size
);
532 /* Could be preallocation or a notifier populated memory. */
533 ram_block_discard_range(vmem
->memdev
->mr
.ram_block
, offset
, size
);
537 virtio_mem_set_range_plugged(vmem
, start_gpa
, size
);
541 static int virtio_mem_state_change_request(VirtIOMEM
*vmem
, uint64_t gpa
,
542 uint16_t nb_blocks
, bool plug
)
544 const uint64_t size
= nb_blocks
* vmem
->block_size
;
547 if (!virtio_mem_valid_range(vmem
, gpa
, size
)) {
548 return VIRTIO_MEM_RESP_ERROR
;
551 if (plug
&& (vmem
->size
+ size
> vmem
->requested_size
)) {
552 return VIRTIO_MEM_RESP_NACK
;
555 /* test if really all blocks are in the opposite state */
556 if ((plug
&& !virtio_mem_is_range_unplugged(vmem
, gpa
, size
)) ||
557 (!plug
&& !virtio_mem_is_range_plugged(vmem
, gpa
, size
))) {
558 return VIRTIO_MEM_RESP_ERROR
;
561 ret
= virtio_mem_set_block_state(vmem
, gpa
, size
, plug
);
563 return VIRTIO_MEM_RESP_BUSY
;
570 notifier_list_notify(&vmem
->size_change_notifiers
, &vmem
->size
);
571 return VIRTIO_MEM_RESP_ACK
;
574 static void virtio_mem_plug_request(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
575 struct virtio_mem_req
*req
)
577 const uint64_t gpa
= le64_to_cpu(req
->u
.plug
.addr
);
578 const uint16_t nb_blocks
= le16_to_cpu(req
->u
.plug
.nb_blocks
);
581 trace_virtio_mem_plug_request(gpa
, nb_blocks
);
582 type
= virtio_mem_state_change_request(vmem
, gpa
, nb_blocks
, true);
583 virtio_mem_send_response_simple(vmem
, elem
, type
);
586 static void virtio_mem_unplug_request(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
587 struct virtio_mem_req
*req
)
589 const uint64_t gpa
= le64_to_cpu(req
->u
.unplug
.addr
);
590 const uint16_t nb_blocks
= le16_to_cpu(req
->u
.unplug
.nb_blocks
);
593 trace_virtio_mem_unplug_request(gpa
, nb_blocks
);
594 type
= virtio_mem_state_change_request(vmem
, gpa
, nb_blocks
, false);
595 virtio_mem_send_response_simple(vmem
, elem
, type
);
598 static void virtio_mem_resize_usable_region(VirtIOMEM
*vmem
,
599 uint64_t requested_size
,
602 uint64_t newsize
= MIN(memory_region_size(&vmem
->memdev
->mr
),
603 requested_size
+ VIRTIO_MEM_USABLE_EXTENT
);
605 /* The usable region size always has to be multiples of the block size. */
606 newsize
= QEMU_ALIGN_UP(newsize
, vmem
->block_size
);
608 if (!requested_size
) {
612 if (newsize
< vmem
->usable_region_size
&& !can_shrink
) {
616 trace_virtio_mem_resized_usable_region(vmem
->usable_region_size
, newsize
);
617 vmem
->usable_region_size
= newsize
;
620 static int virtio_mem_unplug_all(VirtIOMEM
*vmem
)
622 RAMBlock
*rb
= vmem
->memdev
->mr
.ram_block
;
624 if (virtio_mem_is_busy()) {
628 if (ram_block_discard_range(rb
, 0, qemu_ram_get_used_length(rb
))) {
631 virtio_mem_notify_unplug_all(vmem
);
633 bitmap_clear(vmem
->bitmap
, 0, vmem
->bitmap_size
);
636 notifier_list_notify(&vmem
->size_change_notifiers
, &vmem
->size
);
638 trace_virtio_mem_unplugged_all();
639 virtio_mem_resize_usable_region(vmem
, vmem
->requested_size
, true);
643 static void virtio_mem_unplug_all_request(VirtIOMEM
*vmem
,
644 VirtQueueElement
*elem
)
646 trace_virtio_mem_unplug_all_request();
647 if (virtio_mem_unplug_all(vmem
)) {
648 virtio_mem_send_response_simple(vmem
, elem
, VIRTIO_MEM_RESP_BUSY
);
650 virtio_mem_send_response_simple(vmem
, elem
, VIRTIO_MEM_RESP_ACK
);
654 static void virtio_mem_state_request(VirtIOMEM
*vmem
, VirtQueueElement
*elem
,
655 struct virtio_mem_req
*req
)
657 const uint16_t nb_blocks
= le16_to_cpu(req
->u
.state
.nb_blocks
);
658 const uint64_t gpa
= le64_to_cpu(req
->u
.state
.addr
);
659 const uint64_t size
= nb_blocks
* vmem
->block_size
;
660 struct virtio_mem_resp resp
= {
661 .type
= cpu_to_le16(VIRTIO_MEM_RESP_ACK
),
664 trace_virtio_mem_state_request(gpa
, nb_blocks
);
665 if (!virtio_mem_valid_range(vmem
, gpa
, size
)) {
666 virtio_mem_send_response_simple(vmem
, elem
, VIRTIO_MEM_RESP_ERROR
);
670 if (virtio_mem_is_range_plugged(vmem
, gpa
, size
)) {
671 resp
.u
.state
.state
= cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED
);
672 } else if (virtio_mem_is_range_unplugged(vmem
, gpa
, size
)) {
673 resp
.u
.state
.state
= cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED
);
675 resp
.u
.state
.state
= cpu_to_le16(VIRTIO_MEM_STATE_MIXED
);
677 trace_virtio_mem_state_response(le16_to_cpu(resp
.u
.state
.state
));
678 virtio_mem_send_response(vmem
, elem
, &resp
);
681 static void virtio_mem_handle_request(VirtIODevice
*vdev
, VirtQueue
*vq
)
683 const int len
= sizeof(struct virtio_mem_req
);
684 VirtIOMEM
*vmem
= VIRTIO_MEM(vdev
);
685 VirtQueueElement
*elem
;
686 struct virtio_mem_req req
;
690 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
695 if (iov_to_buf(elem
->out_sg
, elem
->out_num
, 0, &req
, len
) < len
) {
696 virtio_error(vdev
, "virtio-mem protocol violation: invalid request"
698 virtqueue_detach_element(vq
, elem
, 0);
703 if (iov_size(elem
->in_sg
, elem
->in_num
) <
704 sizeof(struct virtio_mem_resp
)) {
705 virtio_error(vdev
, "virtio-mem protocol violation: not enough space"
706 " for response: %zu",
707 iov_size(elem
->in_sg
, elem
->in_num
));
708 virtqueue_detach_element(vq
, elem
, 0);
713 type
= le16_to_cpu(req
.type
);
715 case VIRTIO_MEM_REQ_PLUG
:
716 virtio_mem_plug_request(vmem
, elem
, &req
);
718 case VIRTIO_MEM_REQ_UNPLUG
:
719 virtio_mem_unplug_request(vmem
, elem
, &req
);
721 case VIRTIO_MEM_REQ_UNPLUG_ALL
:
722 virtio_mem_unplug_all_request(vmem
, elem
);
724 case VIRTIO_MEM_REQ_STATE
:
725 virtio_mem_state_request(vmem
, elem
, &req
);
728 virtio_error(vdev
, "virtio-mem protocol violation: unknown request"
730 virtqueue_detach_element(vq
, elem
, 0);
739 static void virtio_mem_get_config(VirtIODevice
*vdev
, uint8_t *config_data
)
741 VirtIOMEM
*vmem
= VIRTIO_MEM(vdev
);
742 struct virtio_mem_config
*config
= (void *) config_data
;
744 config
->block_size
= cpu_to_le64(vmem
->block_size
);
745 config
->node_id
= cpu_to_le16(vmem
->node
);
746 config
->requested_size
= cpu_to_le64(vmem
->requested_size
);
747 config
->plugged_size
= cpu_to_le64(vmem
->size
);
748 config
->addr
= cpu_to_le64(vmem
->addr
);
749 config
->region_size
= cpu_to_le64(memory_region_size(&vmem
->memdev
->mr
));
750 config
->usable_region_size
= cpu_to_le64(vmem
->usable_region_size
);
753 static uint64_t virtio_mem_get_features(VirtIODevice
*vdev
, uint64_t features
,
756 MachineState
*ms
= MACHINE(qdev_get_machine());
757 VirtIOMEM
*vmem
= VIRTIO_MEM(vdev
);
759 if (ms
->numa_state
) {
760 #if defined(CONFIG_ACPI)
761 virtio_add_feature(&features
, VIRTIO_MEM_F_ACPI_PXM
);
764 assert(vmem
->unplugged_inaccessible
!= ON_OFF_AUTO_AUTO
);
765 if (vmem
->unplugged_inaccessible
== ON_OFF_AUTO_ON
) {
766 virtio_add_feature(&features
, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE
);
771 static int virtio_mem_validate_features(VirtIODevice
*vdev
)
773 if (virtio_host_has_feature(vdev
, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE
) &&
774 !virtio_vdev_has_feature(vdev
, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE
)) {
780 static void virtio_mem_system_reset(void *opaque
)
782 VirtIOMEM
*vmem
= VIRTIO_MEM(opaque
);
785 * During usual resets, we will unplug all memory and shrink the usable
786 * region size. This is, however, not possible in all scenarios. Then,
787 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
789 virtio_mem_unplug_all(vmem
);
792 static void virtio_mem_device_realize(DeviceState
*dev
, Error
**errp
)
794 MachineState
*ms
= MACHINE(qdev_get_machine());
795 int nb_numa_nodes
= ms
->numa_state
? ms
->numa_state
->num_nodes
: 0;
796 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
797 VirtIOMEM
*vmem
= VIRTIO_MEM(dev
);
803 error_setg(errp
, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP
);
805 } else if (host_memory_backend_is_mapped(vmem
->memdev
)) {
806 error_setg(errp
, "'%s' property specifies a busy memdev: %s",
807 VIRTIO_MEM_MEMDEV_PROP
,
808 object_get_canonical_path_component(OBJECT(vmem
->memdev
)));
810 } else if (!memory_region_is_ram(&vmem
->memdev
->mr
) ||
811 memory_region_is_rom(&vmem
->memdev
->mr
) ||
812 !vmem
->memdev
->mr
.ram_block
) {
813 error_setg(errp
, "'%s' property specifies an unsupported memdev",
814 VIRTIO_MEM_MEMDEV_PROP
);
816 } else if (vmem
->memdev
->prealloc
) {
817 error_setg(errp
, "'%s' property specifies a memdev with preallocation"
818 " enabled: %s. Instead, specify 'prealloc=on' for the"
819 " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP
,
820 object_get_canonical_path_component(OBJECT(vmem
->memdev
)));
824 if ((nb_numa_nodes
&& vmem
->node
>= nb_numa_nodes
) ||
825 (!nb_numa_nodes
&& vmem
->node
)) {
826 error_setg(errp
, "'%s' property has value '%" PRIu32
"', which exceeds"
827 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP
,
828 vmem
->node
, nb_numa_nodes
? nb_numa_nodes
: 1);
833 error_setg(errp
, "Incompatible with mlock");
837 rb
= vmem
->memdev
->mr
.ram_block
;
838 page_size
= qemu_ram_pagesize(rb
);
840 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
841 switch (vmem
->unplugged_inaccessible
) {
842 case ON_OFF_AUTO_AUTO
:
843 if (virtio_mem_has_shared_zeropage(rb
)) {
844 vmem
->unplugged_inaccessible
= ON_OFF_AUTO_OFF
;
846 vmem
->unplugged_inaccessible
= ON_OFF_AUTO_ON
;
849 case ON_OFF_AUTO_OFF
:
850 if (!virtio_mem_has_shared_zeropage(rb
)) {
851 warn_report("'%s' property set to 'off' with a memdev that does"
852 " not support the shared zeropage.",
853 VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP
);
859 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
860 vmem
->unplugged_inaccessible
= ON_OFF_AUTO_ON
;
861 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
864 * If the block size wasn't configured by the user, use a sane default. This
865 * allows using hugetlbfs backends of any page size without manual
868 if (!vmem
->block_size
) {
869 vmem
->block_size
= virtio_mem_default_block_size(rb
);
872 if (vmem
->block_size
< page_size
) {
873 error_setg(errp
, "'%s' property has to be at least the page size (0x%"
874 PRIx64
")", VIRTIO_MEM_BLOCK_SIZE_PROP
, page_size
);
876 } else if (vmem
->block_size
< virtio_mem_default_block_size(rb
)) {
877 warn_report("'%s' property is smaller than the default block size (%"
878 PRIx64
" MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP
,
879 virtio_mem_default_block_size(rb
) / MiB
);
881 if (!QEMU_IS_ALIGNED(vmem
->requested_size
, vmem
->block_size
)) {
882 error_setg(errp
, "'%s' property has to be multiples of '%s' (0x%" PRIx64
883 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP
,
884 VIRTIO_MEM_BLOCK_SIZE_PROP
, vmem
->block_size
);
886 } else if (!QEMU_IS_ALIGNED(vmem
->addr
, vmem
->block_size
)) {
887 error_setg(errp
, "'%s' property has to be multiples of '%s' (0x%" PRIx64
888 ")", VIRTIO_MEM_ADDR_PROP
, VIRTIO_MEM_BLOCK_SIZE_PROP
,
891 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem
->memdev
->mr
),
893 error_setg(errp
, "'%s' property memdev size has to be multiples of"
894 "'%s' (0x%" PRIx64
")", VIRTIO_MEM_MEMDEV_PROP
,
895 VIRTIO_MEM_BLOCK_SIZE_PROP
, vmem
->block_size
);
899 if (ram_block_coordinated_discard_require(true)) {
900 error_setg(errp
, "Discarding RAM is disabled");
904 ret
= ram_block_discard_range(rb
, 0, qemu_ram_get_used_length(rb
));
906 error_setg_errno(errp
, -ret
, "Unexpected error discarding RAM");
907 ram_block_coordinated_discard_require(false);
911 virtio_mem_resize_usable_region(vmem
, vmem
->requested_size
, true);
913 vmem
->bitmap_size
= memory_region_size(&vmem
->memdev
->mr
) /
915 vmem
->bitmap
= bitmap_new(vmem
->bitmap_size
);
917 virtio_init(vdev
, VIRTIO_ID_MEM
, sizeof(struct virtio_mem_config
));
918 vmem
->vq
= virtio_add_queue(vdev
, 128, virtio_mem_handle_request
);
920 host_memory_backend_set_mapped(vmem
->memdev
, true);
921 vmstate_register_ram(&vmem
->memdev
->mr
, DEVICE(vmem
));
922 if (vmem
->early_migration
) {
923 vmstate_register(VMSTATE_IF(vmem
), VMSTATE_INSTANCE_ID_ANY
,
924 &vmstate_virtio_mem_device_early
, vmem
);
926 qemu_register_reset(virtio_mem_system_reset
, vmem
);
929 * Set ourselves as RamDiscardManager before the plug handler maps the
930 * memory region and exposes it via an address space.
932 memory_region_set_ram_discard_manager(&vmem
->memdev
->mr
,
933 RAM_DISCARD_MANAGER(vmem
));
936 static void virtio_mem_device_unrealize(DeviceState
*dev
)
938 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
939 VirtIOMEM
*vmem
= VIRTIO_MEM(dev
);
942 * The unplug handler unmapped the memory region, it cannot be
943 * found via an address space anymore. Unset ourselves.
945 memory_region_set_ram_discard_manager(&vmem
->memdev
->mr
, NULL
);
946 qemu_unregister_reset(virtio_mem_system_reset
, vmem
);
947 if (vmem
->early_migration
) {
948 vmstate_unregister(VMSTATE_IF(vmem
), &vmstate_virtio_mem_device_early
,
951 vmstate_unregister_ram(&vmem
->memdev
->mr
, DEVICE(vmem
));
952 host_memory_backend_set_mapped(vmem
->memdev
, false);
953 virtio_del_queue(vdev
, 0);
954 virtio_cleanup(vdev
);
955 g_free(vmem
->bitmap
);
956 ram_block_coordinated_discard_require(false);
959 static int virtio_mem_discard_range_cb(const VirtIOMEM
*vmem
, void *arg
,
960 uint64_t offset
, uint64_t size
)
962 RAMBlock
*rb
= vmem
->memdev
->mr
.ram_block
;
964 return ram_block_discard_range(rb
, offset
, size
) ? -EINVAL
: 0;
967 static int virtio_mem_restore_unplugged(VirtIOMEM
*vmem
)
969 /* Make sure all memory is really discarded after migration. */
970 return virtio_mem_for_each_unplugged_range(vmem
, NULL
,
971 virtio_mem_discard_range_cb
);
974 static int virtio_mem_post_load(void *opaque
, int version_id
)
976 VirtIOMEM
*vmem
= VIRTIO_MEM(opaque
);
977 RamDiscardListener
*rdl
;
980 if (vmem
->prealloc
&& !vmem
->early_migration
) {
981 warn_report("Proper preallocation with migration requires a newer QEMU machine");
985 * We started out with all memory discarded and our memory region is mapped
986 * into an address space. Replay, now that we updated the bitmap.
988 QLIST_FOREACH(rdl
, &vmem
->rdl_list
, next
) {
989 ret
= virtio_mem_for_each_plugged_section(vmem
, rdl
->section
, rdl
,
990 virtio_mem_notify_populate_cb
);
996 if (migration_in_incoming_postcopy()) {
1000 return virtio_mem_restore_unplugged(vmem
);
1003 static int virtio_mem_prealloc_range_cb(const VirtIOMEM
*vmem
, void *arg
,
1004 uint64_t offset
, uint64_t size
)
1006 void *area
= memory_region_get_ram_ptr(&vmem
->memdev
->mr
) + offset
;
1007 int fd
= memory_region_get_fd(&vmem
->memdev
->mr
);
1008 Error
*local_err
= NULL
;
1010 qemu_prealloc_mem(fd
, area
, size
, 1, NULL
, &local_err
);
1012 error_report_err(local_err
);
1018 static int virtio_mem_post_load_early(void *opaque
, int version_id
)
1020 VirtIOMEM
*vmem
= VIRTIO_MEM(opaque
);
1021 RAMBlock
*rb
= vmem
->memdev
->mr
.ram_block
;
1024 if (!vmem
->prealloc
) {
1029 * We restored the bitmap and verified that the basic properties
1030 * match on source and destination, so we can go ahead and preallocate
1031 * memory for all plugged memory blocks, before actual RAM migration starts
1032 * touching this memory.
1034 ret
= virtio_mem_for_each_plugged_range(vmem
, NULL
,
1035 virtio_mem_prealloc_range_cb
);
1041 * This is tricky: postcopy wants to start with a clean slate. On
1042 * POSTCOPY_INCOMING_ADVISE, postcopy code discards all (ordinarily
1043 * preallocated) RAM such that postcopy will work as expected later.
1045 * However, we run after POSTCOPY_INCOMING_ADVISE -- but before actual
1046 * RAM migration. So let's discard all memory again. This looks like an
1047 * expensive NOP, but actually serves a purpose: we made sure that we
1048 * were able to allocate all required backend memory once. We cannot
1049 * guarantee that the backend memory we will free will remain free
1050 * until we need it during postcopy, but at least we can catch the
1051 * obvious setup issues this way.
1053 if (migration_incoming_postcopy_advised()) {
1054 if (ram_block_discard_range(rb
, 0, qemu_ram_get_used_length(rb
))) {
1061 typedef struct VirtIOMEMMigSanityChecks
{
1064 uint64_t region_size
;
1065 uint64_t block_size
;
1067 } VirtIOMEMMigSanityChecks
;
1069 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque
)
1071 VirtIOMEMMigSanityChecks
*tmp
= opaque
;
1072 VirtIOMEM
*vmem
= tmp
->parent
;
1074 tmp
->addr
= vmem
->addr
;
1075 tmp
->region_size
= memory_region_size(&vmem
->memdev
->mr
);
1076 tmp
->block_size
= vmem
->block_size
;
1077 tmp
->node
= vmem
->node
;
1081 static int virtio_mem_mig_sanity_checks_post_load(void *opaque
, int version_id
)
1083 VirtIOMEMMigSanityChecks
*tmp
= opaque
;
1084 VirtIOMEM
*vmem
= tmp
->parent
;
1085 const uint64_t new_region_size
= memory_region_size(&vmem
->memdev
->mr
);
1087 if (tmp
->addr
!= vmem
->addr
) {
1088 error_report("Property '%s' changed from 0x%" PRIx64
" to 0x%" PRIx64
,
1089 VIRTIO_MEM_ADDR_PROP
, tmp
->addr
, vmem
->addr
);
1093 * Note: Preparation for resizeable memory regions. The maximum size
1094 * of the memory region must not change during migration.
1096 if (tmp
->region_size
!= new_region_size
) {
1097 error_report("Property '%s' size changed from 0x%" PRIx64
" to 0x%"
1098 PRIx64
, VIRTIO_MEM_MEMDEV_PROP
, tmp
->region_size
,
1102 if (tmp
->block_size
!= vmem
->block_size
) {
1103 error_report("Property '%s' changed from 0x%" PRIx64
" to 0x%" PRIx64
,
1104 VIRTIO_MEM_BLOCK_SIZE_PROP
, tmp
->block_size
,
1108 if (tmp
->node
!= vmem
->node
) {
1109 error_report("Property '%s' changed from %" PRIu32
" to %" PRIu32
,
1110 VIRTIO_MEM_NODE_PROP
, tmp
->node
, vmem
->node
);
1116 static const VMStateDescription vmstate_virtio_mem_sanity_checks
= {
1117 .name
= "virtio-mem-device/sanity-checks",
1118 .pre_save
= virtio_mem_mig_sanity_checks_pre_save
,
1119 .post_load
= virtio_mem_mig_sanity_checks_post_load
,
1120 .fields
= (VMStateField
[]) {
1121 VMSTATE_UINT64(addr
, VirtIOMEMMigSanityChecks
),
1122 VMSTATE_UINT64(region_size
, VirtIOMEMMigSanityChecks
),
1123 VMSTATE_UINT64(block_size
, VirtIOMEMMigSanityChecks
),
1124 VMSTATE_UINT32(node
, VirtIOMEMMigSanityChecks
),
1125 VMSTATE_END_OF_LIST(),
1129 static bool virtio_mem_vmstate_field_exists(void *opaque
, int version_id
)
1131 const VirtIOMEM
*vmem
= VIRTIO_MEM(opaque
);
1133 /* With early migration, these fields were already migrated. */
1134 return !vmem
->early_migration
;
1137 static const VMStateDescription vmstate_virtio_mem_device
= {
1138 .name
= "virtio-mem-device",
1139 .minimum_version_id
= 1,
1141 .priority
= MIG_PRI_VIRTIO_MEM
,
1142 .post_load
= virtio_mem_post_load
,
1143 .fields
= (VMStateField
[]) {
1144 VMSTATE_WITH_TMP_TEST(VirtIOMEM
, virtio_mem_vmstate_field_exists
,
1145 VirtIOMEMMigSanityChecks
,
1146 vmstate_virtio_mem_sanity_checks
),
1147 VMSTATE_UINT64(usable_region_size
, VirtIOMEM
),
1148 VMSTATE_UINT64_TEST(size
, VirtIOMEM
, virtio_mem_vmstate_field_exists
),
1149 VMSTATE_UINT64(requested_size
, VirtIOMEM
),
1150 VMSTATE_BITMAP_TEST(bitmap
, VirtIOMEM
, virtio_mem_vmstate_field_exists
,
1152 VMSTATE_END_OF_LIST()
1157 * Transfer properties that are immutable while migration is active early,
1158 * such that we have have this information around before migrating any RAM
1161 * Note that virtio_mem_is_busy() makes sure these properties can no longer
1162 * change on the migration source until migration completed.
1164 * With QEMU compat machines, we transmit these properties later, via
1165 * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists().
1167 static const VMStateDescription vmstate_virtio_mem_device_early
= {
1168 .name
= "virtio-mem-device-early",
1169 .minimum_version_id
= 1,
1171 .early_setup
= true,
1172 .post_load
= virtio_mem_post_load_early
,
1173 .fields
= (VMStateField
[]) {
1174 VMSTATE_WITH_TMP(VirtIOMEM
, VirtIOMEMMigSanityChecks
,
1175 vmstate_virtio_mem_sanity_checks
),
1176 VMSTATE_UINT64(size
, VirtIOMEM
),
1177 VMSTATE_BITMAP(bitmap
, VirtIOMEM
, 0, bitmap_size
),
1178 VMSTATE_END_OF_LIST()
1182 static const VMStateDescription vmstate_virtio_mem
= {
1183 .name
= "virtio-mem",
1184 .minimum_version_id
= 1,
1186 .fields
= (VMStateField
[]) {
1187 VMSTATE_VIRTIO_DEVICE
,
1188 VMSTATE_END_OF_LIST()
1192 static void virtio_mem_fill_device_info(const VirtIOMEM
*vmem
,
1193 VirtioMEMDeviceInfo
*vi
)
1195 vi
->memaddr
= vmem
->addr
;
1196 vi
->node
= vmem
->node
;
1197 vi
->requested_size
= vmem
->requested_size
;
1198 vi
->size
= vmem
->size
;
1199 vi
->max_size
= memory_region_size(&vmem
->memdev
->mr
);
1200 vi
->block_size
= vmem
->block_size
;
1201 vi
->memdev
= object_get_canonical_path(OBJECT(vmem
->memdev
));
1204 static MemoryRegion
*virtio_mem_get_memory_region(VirtIOMEM
*vmem
, Error
**errp
)
1206 if (!vmem
->memdev
) {
1207 error_setg(errp
, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP
);
1211 return &vmem
->memdev
->mr
;
1214 static void virtio_mem_add_size_change_notifier(VirtIOMEM
*vmem
,
1217 notifier_list_add(&vmem
->size_change_notifiers
, notifier
);
1220 static void virtio_mem_remove_size_change_notifier(VirtIOMEM
*vmem
,
1223 notifier_remove(notifier
);
1226 static void virtio_mem_get_size(Object
*obj
, Visitor
*v
, const char *name
,
1227 void *opaque
, Error
**errp
)
1229 const VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
1230 uint64_t value
= vmem
->size
;
1232 visit_type_size(v
, name
, &value
, errp
);
1235 static void virtio_mem_get_requested_size(Object
*obj
, Visitor
*v
,
1236 const char *name
, void *opaque
,
1239 const VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
1240 uint64_t value
= vmem
->requested_size
;
1242 visit_type_size(v
, name
, &value
, errp
);
1245 static void virtio_mem_set_requested_size(Object
*obj
, Visitor
*v
,
1246 const char *name
, void *opaque
,
1249 VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
1252 if (!visit_type_size(v
, name
, &value
, errp
)) {
1257 * The block size and memory backend are not fixed until the device was
1258 * realized. realize() will verify these properties then.
1260 if (DEVICE(obj
)->realized
) {
1261 if (!QEMU_IS_ALIGNED(value
, vmem
->block_size
)) {
1262 error_setg(errp
, "'%s' has to be multiples of '%s' (0x%" PRIx64
1263 ")", name
, VIRTIO_MEM_BLOCK_SIZE_PROP
,
1266 } else if (value
> memory_region_size(&vmem
->memdev
->mr
)) {
1267 error_setg(errp
, "'%s' cannot exceed the memory backend size"
1268 "(0x%" PRIx64
")", name
,
1269 memory_region_size(&vmem
->memdev
->mr
));
1273 if (value
!= vmem
->requested_size
) {
1274 virtio_mem_resize_usable_region(vmem
, value
, false);
1275 vmem
->requested_size
= value
;
1278 * Trigger a config update so the guest gets notified. We trigger
1279 * even if the size didn't change (especially helpful for debugging).
1281 virtio_notify_config(VIRTIO_DEVICE(vmem
));
1283 vmem
->requested_size
= value
;
1287 static void virtio_mem_get_block_size(Object
*obj
, Visitor
*v
, const char *name
,
1288 void *opaque
, Error
**errp
)
1290 const VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
1291 uint64_t value
= vmem
->block_size
;
1294 * If not configured by the user (and we're not realized yet), use the
1295 * default block size we would use with the current memory backend.
1298 if (vmem
->memdev
&& memory_region_is_ram(&vmem
->memdev
->mr
)) {
1299 value
= virtio_mem_default_block_size(vmem
->memdev
->mr
.ram_block
);
1301 value
= virtio_mem_thp_size();
1305 visit_type_size(v
, name
, &value
, errp
);
1308 static void virtio_mem_set_block_size(Object
*obj
, Visitor
*v
, const char *name
,
1309 void *opaque
, Error
**errp
)
1311 VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
1314 if (DEVICE(obj
)->realized
) {
1315 error_setg(errp
, "'%s' cannot be changed", name
);
1319 if (!visit_type_size(v
, name
, &value
, errp
)) {
1323 if (value
< VIRTIO_MEM_MIN_BLOCK_SIZE
) {
1324 error_setg(errp
, "'%s' property has to be at least 0x%" PRIx32
, name
,
1325 VIRTIO_MEM_MIN_BLOCK_SIZE
);
1327 } else if (!is_power_of_2(value
)) {
1328 error_setg(errp
, "'%s' property has to be a power of two", name
);
1331 vmem
->block_size
= value
;
1334 static void virtio_mem_instance_init(Object
*obj
)
1336 VirtIOMEM
*vmem
= VIRTIO_MEM(obj
);
1338 notifier_list_init(&vmem
->size_change_notifiers
);
1339 QLIST_INIT(&vmem
->rdl_list
);
1341 object_property_add(obj
, VIRTIO_MEM_SIZE_PROP
, "size", virtio_mem_get_size
,
1343 object_property_add(obj
, VIRTIO_MEM_REQUESTED_SIZE_PROP
, "size",
1344 virtio_mem_get_requested_size
,
1345 virtio_mem_set_requested_size
, NULL
, NULL
);
1346 object_property_add(obj
, VIRTIO_MEM_BLOCK_SIZE_PROP
, "size",
1347 virtio_mem_get_block_size
, virtio_mem_set_block_size
,
1351 static Property virtio_mem_properties
[] = {
1352 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP
, VirtIOMEM
, addr
, 0),
1353 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP
, VirtIOMEM
, node
, 0),
1354 DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP
, VirtIOMEM
, prealloc
, false),
1355 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP
, VirtIOMEM
, memdev
,
1356 TYPE_MEMORY_BACKEND
, HostMemoryBackend
*),
1357 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1358 DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP
, VirtIOMEM
,
1359 unplugged_inaccessible
, ON_OFF_AUTO_ON
),
1361 DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP
, VirtIOMEM
,
1362 early_migration
, true),
1363 DEFINE_PROP_END_OF_LIST(),
1366 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager
*rdm
,
1367 const MemoryRegion
*mr
)
1369 const VirtIOMEM
*vmem
= VIRTIO_MEM(rdm
);
1371 g_assert(mr
== &vmem
->memdev
->mr
);
1372 return vmem
->block_size
;
1375 static bool virtio_mem_rdm_is_populated(const RamDiscardManager
*rdm
,
1376 const MemoryRegionSection
*s
)
1378 const VirtIOMEM
*vmem
= VIRTIO_MEM(rdm
);
1379 uint64_t start_gpa
= vmem
->addr
+ s
->offset_within_region
;
1380 uint64_t end_gpa
= start_gpa
+ int128_get64(s
->size
);
1382 g_assert(s
->mr
== &vmem
->memdev
->mr
);
1384 start_gpa
= QEMU_ALIGN_DOWN(start_gpa
, vmem
->block_size
);
1385 end_gpa
= QEMU_ALIGN_UP(end_gpa
, vmem
->block_size
);
1387 if (!virtio_mem_valid_range(vmem
, start_gpa
, end_gpa
- start_gpa
)) {
1391 return virtio_mem_is_range_plugged(vmem
, start_gpa
, end_gpa
- start_gpa
);
1394 struct VirtIOMEMReplayData
{
1399 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection
*s
, void *arg
)
1401 struct VirtIOMEMReplayData
*data
= arg
;
1403 return ((ReplayRamPopulate
)data
->fn
)(s
, data
->opaque
);
1406 static int virtio_mem_rdm_replay_populated(const RamDiscardManager
*rdm
,
1407 MemoryRegionSection
*s
,
1408 ReplayRamPopulate replay_fn
,
1411 const VirtIOMEM
*vmem
= VIRTIO_MEM(rdm
);
1412 struct VirtIOMEMReplayData data
= {
1417 g_assert(s
->mr
== &vmem
->memdev
->mr
);
1418 return virtio_mem_for_each_plugged_section(vmem
, s
, &data
,
1419 virtio_mem_rdm_replay_populated_cb
);
1422 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection
*s
,
1425 struct VirtIOMEMReplayData
*data
= arg
;
1427 ((ReplayRamDiscard
)data
->fn
)(s
, data
->opaque
);
1431 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager
*rdm
,
1432 MemoryRegionSection
*s
,
1433 ReplayRamDiscard replay_fn
,
1436 const VirtIOMEM
*vmem
= VIRTIO_MEM(rdm
);
1437 struct VirtIOMEMReplayData data
= {
1442 g_assert(s
->mr
== &vmem
->memdev
->mr
);
1443 virtio_mem_for_each_unplugged_section(vmem
, s
, &data
,
1444 virtio_mem_rdm_replay_discarded_cb
);
1447 static void virtio_mem_rdm_register_listener(RamDiscardManager
*rdm
,
1448 RamDiscardListener
*rdl
,
1449 MemoryRegionSection
*s
)
1451 VirtIOMEM
*vmem
= VIRTIO_MEM(rdm
);
1454 g_assert(s
->mr
== &vmem
->memdev
->mr
);
1455 rdl
->section
= memory_region_section_new_copy(s
);
1457 QLIST_INSERT_HEAD(&vmem
->rdl_list
, rdl
, next
);
1458 ret
= virtio_mem_for_each_plugged_section(vmem
, rdl
->section
, rdl
,
1459 virtio_mem_notify_populate_cb
);
1461 error_report("%s: Replaying plugged ranges failed: %s", __func__
,
1466 static void virtio_mem_rdm_unregister_listener(RamDiscardManager
*rdm
,
1467 RamDiscardListener
*rdl
)
1469 VirtIOMEM
*vmem
= VIRTIO_MEM(rdm
);
1471 g_assert(rdl
->section
->mr
== &vmem
->memdev
->mr
);
1473 if (rdl
->double_discard_supported
) {
1474 rdl
->notify_discard(rdl
, rdl
->section
);
1476 virtio_mem_for_each_plugged_section(vmem
, rdl
->section
, rdl
,
1477 virtio_mem_notify_discard_cb
);
1481 memory_region_section_free_copy(rdl
->section
);
1482 rdl
->section
= NULL
;
1483 QLIST_REMOVE(rdl
, next
);
1486 static void virtio_mem_class_init(ObjectClass
*klass
, void *data
)
1488 DeviceClass
*dc
= DEVICE_CLASS(klass
);
1489 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
1490 VirtIOMEMClass
*vmc
= VIRTIO_MEM_CLASS(klass
);
1491 RamDiscardManagerClass
*rdmc
= RAM_DISCARD_MANAGER_CLASS(klass
);
1493 device_class_set_props(dc
, virtio_mem_properties
);
1494 dc
->vmsd
= &vmstate_virtio_mem
;
1496 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
1497 vdc
->realize
= virtio_mem_device_realize
;
1498 vdc
->unrealize
= virtio_mem_device_unrealize
;
1499 vdc
->get_config
= virtio_mem_get_config
;
1500 vdc
->get_features
= virtio_mem_get_features
;
1501 vdc
->validate_features
= virtio_mem_validate_features
;
1502 vdc
->vmsd
= &vmstate_virtio_mem_device
;
1504 vmc
->fill_device_info
= virtio_mem_fill_device_info
;
1505 vmc
->get_memory_region
= virtio_mem_get_memory_region
;
1506 vmc
->add_size_change_notifier
= virtio_mem_add_size_change_notifier
;
1507 vmc
->remove_size_change_notifier
= virtio_mem_remove_size_change_notifier
;
1509 rdmc
->get_min_granularity
= virtio_mem_rdm_get_min_granularity
;
1510 rdmc
->is_populated
= virtio_mem_rdm_is_populated
;
1511 rdmc
->replay_populated
= virtio_mem_rdm_replay_populated
;
1512 rdmc
->replay_discarded
= virtio_mem_rdm_replay_discarded
;
1513 rdmc
->register_listener
= virtio_mem_rdm_register_listener
;
1514 rdmc
->unregister_listener
= virtio_mem_rdm_unregister_listener
;
1517 static const TypeInfo virtio_mem_info
= {
1518 .name
= TYPE_VIRTIO_MEM
,
1519 .parent
= TYPE_VIRTIO_DEVICE
,
1520 .instance_size
= sizeof(VirtIOMEM
),
1521 .instance_init
= virtio_mem_instance_init
,
1522 .class_init
= virtio_mem_class_init
,
1523 .class_size
= sizeof(VirtIOMEMClass
),
1524 .interfaces
= (InterfaceInfo
[]) {
1525 { TYPE_RAM_DISCARD_MANAGER
},
1530 static void virtio_register_types(void)
1532 type_register_static(&virtio_mem_info
);
1535 type_init(virtio_register_types
)