target/riscv: Reduce overhead of MSTATUS_SUM change
[qemu/kevin.git] / hw / virtio / virtio-mem.c
blob957fe77dc002a949320b270320916f62f59ccc29
1 /*
2 * Virtio MEM device
4 * Copyright (C) 2020 Red Hat, Inc.
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/iov.h"
15 #include "qemu/cutils.h"
16 #include "qemu/error-report.h"
17 #include "qemu/units.h"
18 #include "sysemu/numa.h"
19 #include "sysemu/sysemu.h"
20 #include "sysemu/reset.h"
21 #include "hw/virtio/virtio.h"
22 #include "hw/virtio/virtio-bus.h"
23 #include "hw/virtio/virtio-access.h"
24 #include "hw/virtio/virtio-mem.h"
25 #include "qapi/error.h"
26 #include "qapi/visitor.h"
27 #include "exec/ram_addr.h"
28 #include "migration/misc.h"
29 #include "hw/boards.h"
30 #include "hw/qdev-properties.h"
31 #include CONFIG_DEVICES
32 #include "trace.h"
34 static const VMStateDescription vmstate_virtio_mem_device_early;
37 * We only had legacy x86 guests that did not support
38 * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
40 #if defined(TARGET_X86_64) || defined(TARGET_I386)
41 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
42 #endif
45 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
46 * bitmap small.
48 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
50 static uint32_t virtio_mem_default_thp_size(void)
52 uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
54 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
55 default_thp_size = 2 * MiB;
56 #elif defined(__aarch64__)
57 if (qemu_real_host_page_size() == 4 * KiB) {
58 default_thp_size = 2 * MiB;
59 } else if (qemu_real_host_page_size() == 16 * KiB) {
60 default_thp_size = 32 * MiB;
61 } else if (qemu_real_host_page_size() == 64 * KiB) {
62 default_thp_size = 512 * MiB;
64 #endif
66 return default_thp_size;
70 * We want to have a reasonable default block size such that
71 * 1. We avoid splitting THPs when unplugging memory, which degrades
72 * performance.
73 * 2. We avoid placing THPs for plugged blocks that also cover unplugged
74 * blocks.
76 * The actual THP size might differ between Linux kernels, so we try to probe
77 * it. In the future (if we ever run into issues regarding 2.), we might want
78 * to disable THP in case we fail to properly probe the THP size, or if the
79 * block size is configured smaller than the THP size.
81 static uint32_t thp_size;
83 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
84 static uint32_t virtio_mem_thp_size(void)
86 gchar *content = NULL;
87 const char *endptr;
88 uint64_t tmp;
90 if (thp_size) {
91 return thp_size;
95 * Try to probe the actual THP size, fallback to (sane but eventually
96 * incorrect) default sizes.
98 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
99 !qemu_strtou64(content, &endptr, 0, &tmp) &&
100 (!endptr || *endptr == '\n')) {
101 /* Sanity-check the value and fallback to something reasonable. */
102 if (!tmp || !is_power_of_2(tmp)) {
103 warn_report("Read unsupported THP size: %" PRIx64, tmp);
104 } else {
105 thp_size = tmp;
109 if (!thp_size) {
110 thp_size = virtio_mem_default_thp_size();
111 warn_report("Could not detect THP size, falling back to %" PRIx64
112 " MiB.", thp_size / MiB);
115 g_free(content);
116 return thp_size;
119 static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
121 const uint64_t page_size = qemu_ram_pagesize(rb);
123 /* We can have hugetlbfs with a page size smaller than the THP size. */
124 if (page_size == qemu_real_host_page_size()) {
125 return MAX(page_size, virtio_mem_thp_size());
127 return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
130 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
131 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
134 * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
135 * anonymous RAM. In any other case, reading unplugged *can* populate a
136 * fresh page, consuming actual memory.
138 return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
139 qemu_ram_pagesize(rb) == qemu_real_host_page_size();
141 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
144 * Size the usable region bigger than the requested size if possible. Esp.
145 * Linux guests will only add (aligned) memory blocks in case they fully
146 * fit into the usable region, but plug+online only a subset of the pages.
147 * The memory block size corresponds mostly to the section size.
149 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
150 * a section size of 512MB on arm64 (as long as the start address is properly
151 * aligned, similar to ordinary DIMMs).
153 * We can change this at any time and maybe even make it configurable if
154 * necessary (as the section size can change). But it's more likely that the
155 * section size will rather get smaller and not bigger over time.
157 #if defined(TARGET_X86_64) || defined(TARGET_I386)
158 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
159 #elif defined(TARGET_ARM)
160 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
161 #else
162 #error VIRTIO_MEM_USABLE_EXTENT not defined
163 #endif
165 static bool virtio_mem_is_busy(void)
168 * Postcopy cannot handle concurrent discards and we don't want to migrate
169 * pages on-demand with stale content when plugging new blocks.
171 * For precopy, we don't want unplugged blocks in our migration stream, and
172 * when plugging new blocks, the page content might differ between source
173 * and destination (observable by the guest when not initializing pages
174 * after plugging them) until we're running on the destination (as we didn't
175 * migrate these blocks when they were unplugged).
177 return migration_in_incoming_postcopy() || !migration_is_idle();
180 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
181 uint64_t offset, uint64_t size);
183 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
184 virtio_mem_range_cb cb)
186 unsigned long first_zero_bit, last_zero_bit;
187 uint64_t offset, size;
188 int ret = 0;
190 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
191 while (first_zero_bit < vmem->bitmap_size) {
192 offset = first_zero_bit * vmem->block_size;
193 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
194 first_zero_bit + 1) - 1;
195 size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
197 ret = cb(vmem, arg, offset, size);
198 if (ret) {
199 break;
201 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
202 last_zero_bit + 2);
204 return ret;
207 static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
208 virtio_mem_range_cb cb)
210 unsigned long first_bit, last_bit;
211 uint64_t offset, size;
212 int ret = 0;
214 first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
215 while (first_bit < vmem->bitmap_size) {
216 offset = first_bit * vmem->block_size;
217 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
218 first_bit + 1) - 1;
219 size = (last_bit - first_bit + 1) * vmem->block_size;
221 ret = cb(vmem, arg, offset, size);
222 if (ret) {
223 break;
225 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
226 last_bit + 2);
228 return ret;
232 * Adjust the memory section to cover the intersection with the given range.
234 * Returns false if the intersection is empty, otherwise returns true.
236 static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
237 uint64_t offset, uint64_t size)
239 uint64_t start = MAX(s->offset_within_region, offset);
240 uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
241 offset + size);
243 if (end <= start) {
244 return false;
247 s->offset_within_address_space += start - s->offset_within_region;
248 s->offset_within_region = start;
249 s->size = int128_make64(end - start);
250 return true;
253 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
255 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
256 MemoryRegionSection *s,
257 void *arg,
258 virtio_mem_section_cb cb)
260 unsigned long first_bit, last_bit;
261 uint64_t offset, size;
262 int ret = 0;
264 first_bit = s->offset_within_region / vmem->block_size;
265 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
266 while (first_bit < vmem->bitmap_size) {
267 MemoryRegionSection tmp = *s;
269 offset = first_bit * vmem->block_size;
270 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
271 first_bit + 1) - 1;
272 size = (last_bit - first_bit + 1) * vmem->block_size;
274 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
275 break;
277 ret = cb(&tmp, arg);
278 if (ret) {
279 break;
281 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
282 last_bit + 2);
284 return ret;
287 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
288 MemoryRegionSection *s,
289 void *arg,
290 virtio_mem_section_cb cb)
292 unsigned long first_bit, last_bit;
293 uint64_t offset, size;
294 int ret = 0;
296 first_bit = s->offset_within_region / vmem->block_size;
297 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
298 while (first_bit < vmem->bitmap_size) {
299 MemoryRegionSection tmp = *s;
301 offset = first_bit * vmem->block_size;
302 last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
303 first_bit + 1) - 1;
304 size = (last_bit - first_bit + 1) * vmem->block_size;
306 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
307 break;
309 ret = cb(&tmp, arg);
310 if (ret) {
311 break;
313 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
314 last_bit + 2);
316 return ret;
319 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
321 RamDiscardListener *rdl = arg;
323 return rdl->notify_populate(rdl, s);
326 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
328 RamDiscardListener *rdl = arg;
330 rdl->notify_discard(rdl, s);
331 return 0;
334 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
335 uint64_t size)
337 RamDiscardListener *rdl;
339 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
340 MemoryRegionSection tmp = *rdl->section;
342 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
343 continue;
345 rdl->notify_discard(rdl, &tmp);
349 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
350 uint64_t size)
352 RamDiscardListener *rdl, *rdl2;
353 int ret = 0;
355 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
356 MemoryRegionSection tmp = *rdl->section;
358 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
359 continue;
361 ret = rdl->notify_populate(rdl, &tmp);
362 if (ret) {
363 break;
367 if (ret) {
368 /* Notify all already-notified listeners. */
369 QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
370 MemoryRegionSection tmp = *rdl2->section;
372 if (rdl2 == rdl) {
373 break;
375 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
376 continue;
378 rdl2->notify_discard(rdl2, &tmp);
381 return ret;
384 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
386 RamDiscardListener *rdl;
388 if (!vmem->size) {
389 return;
392 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
393 if (rdl->double_discard_supported) {
394 rdl->notify_discard(rdl, rdl->section);
395 } else {
396 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
397 virtio_mem_notify_discard_cb);
402 static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
403 uint64_t size, bool plugged)
405 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
406 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
407 unsigned long found_bit;
409 /* We fake a shorter bitmap to avoid searching too far. */
410 if (plugged) {
411 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
412 } else {
413 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
415 return found_bit > last_bit;
418 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
419 uint64_t size, bool plugged)
421 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
422 const unsigned long nbits = size / vmem->block_size;
424 if (plugged) {
425 bitmap_set(vmem->bitmap, bit, nbits);
426 } else {
427 bitmap_clear(vmem->bitmap, bit, nbits);
431 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
432 struct virtio_mem_resp *resp)
434 VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
435 VirtQueue *vq = vmem->vq;
437 trace_virtio_mem_send_response(le16_to_cpu(resp->type));
438 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
440 virtqueue_push(vq, elem, sizeof(*resp));
441 virtio_notify(vdev, vq);
444 static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
445 VirtQueueElement *elem,
446 uint16_t type)
448 struct virtio_mem_resp resp = {
449 .type = cpu_to_le16(type),
452 virtio_mem_send_response(vmem, elem, &resp);
455 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
456 uint64_t size)
458 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
459 return false;
461 if (gpa + size < gpa || !size) {
462 return false;
464 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
465 return false;
467 if (gpa + size > vmem->addr + vmem->usable_region_size) {
468 return false;
470 return true;
473 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
474 uint64_t size, bool plug)
476 const uint64_t offset = start_gpa - vmem->addr;
477 RAMBlock *rb = vmem->memdev->mr.ram_block;
479 if (virtio_mem_is_busy()) {
480 return -EBUSY;
483 if (!plug) {
484 if (ram_block_discard_range(rb, offset, size)) {
485 return -EBUSY;
487 virtio_mem_notify_unplug(vmem, offset, size);
488 } else {
489 int ret = 0;
491 if (vmem->prealloc) {
492 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
493 int fd = memory_region_get_fd(&vmem->memdev->mr);
494 Error *local_err = NULL;
496 qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
497 if (local_err) {
498 static bool warned;
501 * Warn only once, we don't want to fill the log with these
502 * warnings.
504 if (!warned) {
505 warn_report_err(local_err);
506 warned = true;
507 } else {
508 error_free(local_err);
510 ret = -EBUSY;
513 if (!ret) {
514 ret = virtio_mem_notify_plug(vmem, offset, size);
517 if (ret) {
518 /* Could be preallocation or a notifier populated memory. */
519 ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
520 return -EBUSY;
523 virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
524 return 0;
527 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
528 uint16_t nb_blocks, bool plug)
530 const uint64_t size = nb_blocks * vmem->block_size;
531 int ret;
533 if (!virtio_mem_valid_range(vmem, gpa, size)) {
534 return VIRTIO_MEM_RESP_ERROR;
537 if (plug && (vmem->size + size > vmem->requested_size)) {
538 return VIRTIO_MEM_RESP_NACK;
541 /* test if really all blocks are in the opposite state */
542 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
543 return VIRTIO_MEM_RESP_ERROR;
546 ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
547 if (ret) {
548 return VIRTIO_MEM_RESP_BUSY;
550 if (plug) {
551 vmem->size += size;
552 } else {
553 vmem->size -= size;
555 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
556 return VIRTIO_MEM_RESP_ACK;
559 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
560 struct virtio_mem_req *req)
562 const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
563 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
564 uint16_t type;
566 trace_virtio_mem_plug_request(gpa, nb_blocks);
567 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
568 virtio_mem_send_response_simple(vmem, elem, type);
571 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
572 struct virtio_mem_req *req)
574 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
575 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
576 uint16_t type;
578 trace_virtio_mem_unplug_request(gpa, nb_blocks);
579 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
580 virtio_mem_send_response_simple(vmem, elem, type);
583 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
584 uint64_t requested_size,
585 bool can_shrink)
587 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
588 requested_size + VIRTIO_MEM_USABLE_EXTENT);
590 /* The usable region size always has to be multiples of the block size. */
591 newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
593 if (!requested_size) {
594 newsize = 0;
597 if (newsize < vmem->usable_region_size && !can_shrink) {
598 return;
601 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
602 vmem->usable_region_size = newsize;
605 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
607 RAMBlock *rb = vmem->memdev->mr.ram_block;
609 if (virtio_mem_is_busy()) {
610 return -EBUSY;
613 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
614 return -EBUSY;
616 virtio_mem_notify_unplug_all(vmem);
618 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
619 if (vmem->size) {
620 vmem->size = 0;
621 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
623 trace_virtio_mem_unplugged_all();
624 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
625 return 0;
628 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
629 VirtQueueElement *elem)
631 trace_virtio_mem_unplug_all_request();
632 if (virtio_mem_unplug_all(vmem)) {
633 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
634 } else {
635 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
639 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
640 struct virtio_mem_req *req)
642 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
643 const uint64_t gpa = le64_to_cpu(req->u.state.addr);
644 const uint64_t size = nb_blocks * vmem->block_size;
645 struct virtio_mem_resp resp = {
646 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
649 trace_virtio_mem_state_request(gpa, nb_blocks);
650 if (!virtio_mem_valid_range(vmem, gpa, size)) {
651 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
652 return;
655 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
656 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
657 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
658 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
659 } else {
660 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
662 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
663 virtio_mem_send_response(vmem, elem, &resp);
666 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
668 const int len = sizeof(struct virtio_mem_req);
669 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
670 VirtQueueElement *elem;
671 struct virtio_mem_req req;
672 uint16_t type;
674 while (true) {
675 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
676 if (!elem) {
677 return;
680 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
681 virtio_error(vdev, "virtio-mem protocol violation: invalid request"
682 " size: %d", len);
683 virtqueue_detach_element(vq, elem, 0);
684 g_free(elem);
685 return;
688 if (iov_size(elem->in_sg, elem->in_num) <
689 sizeof(struct virtio_mem_resp)) {
690 virtio_error(vdev, "virtio-mem protocol violation: not enough space"
691 " for response: %zu",
692 iov_size(elem->in_sg, elem->in_num));
693 virtqueue_detach_element(vq, elem, 0);
694 g_free(elem);
695 return;
698 type = le16_to_cpu(req.type);
699 switch (type) {
700 case VIRTIO_MEM_REQ_PLUG:
701 virtio_mem_plug_request(vmem, elem, &req);
702 break;
703 case VIRTIO_MEM_REQ_UNPLUG:
704 virtio_mem_unplug_request(vmem, elem, &req);
705 break;
706 case VIRTIO_MEM_REQ_UNPLUG_ALL:
707 virtio_mem_unplug_all_request(vmem, elem);
708 break;
709 case VIRTIO_MEM_REQ_STATE:
710 virtio_mem_state_request(vmem, elem, &req);
711 break;
712 default:
713 virtio_error(vdev, "virtio-mem protocol violation: unknown request"
714 " type: %d", type);
715 virtqueue_detach_element(vq, elem, 0);
716 g_free(elem);
717 return;
720 g_free(elem);
724 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
726 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
727 struct virtio_mem_config *config = (void *) config_data;
729 config->block_size = cpu_to_le64(vmem->block_size);
730 config->node_id = cpu_to_le16(vmem->node);
731 config->requested_size = cpu_to_le64(vmem->requested_size);
732 config->plugged_size = cpu_to_le64(vmem->size);
733 config->addr = cpu_to_le64(vmem->addr);
734 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
735 config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
738 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
739 Error **errp)
741 MachineState *ms = MACHINE(qdev_get_machine());
742 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
744 if (ms->numa_state) {
745 #if defined(CONFIG_ACPI)
746 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
747 #endif
749 assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
750 if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
751 virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
753 return features;
756 static int virtio_mem_validate_features(VirtIODevice *vdev)
758 if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
759 !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
760 return -EFAULT;
762 return 0;
765 static void virtio_mem_system_reset(void *opaque)
767 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
770 * During usual resets, we will unplug all memory and shrink the usable
771 * region size. This is, however, not possible in all scenarios. Then,
772 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
774 virtio_mem_unplug_all(vmem);
777 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
779 MachineState *ms = MACHINE(qdev_get_machine());
780 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
781 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
782 VirtIOMEM *vmem = VIRTIO_MEM(dev);
783 uint64_t page_size;
784 RAMBlock *rb;
785 int ret;
787 if (!vmem->memdev) {
788 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
789 return;
790 } else if (host_memory_backend_is_mapped(vmem->memdev)) {
791 error_setg(errp, "'%s' property specifies a busy memdev: %s",
792 VIRTIO_MEM_MEMDEV_PROP,
793 object_get_canonical_path_component(OBJECT(vmem->memdev)));
794 return;
795 } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
796 memory_region_is_rom(&vmem->memdev->mr) ||
797 !vmem->memdev->mr.ram_block) {
798 error_setg(errp, "'%s' property specifies an unsupported memdev",
799 VIRTIO_MEM_MEMDEV_PROP);
800 return;
801 } else if (vmem->memdev->prealloc) {
802 error_setg(errp, "'%s' property specifies a memdev with preallocation"
803 " enabled: %s. Instead, specify 'prealloc=on' for the"
804 " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP,
805 object_get_canonical_path_component(OBJECT(vmem->memdev)));
806 return;
809 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
810 (!nb_numa_nodes && vmem->node)) {
811 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
812 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
813 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
814 return;
817 if (enable_mlock) {
818 error_setg(errp, "Incompatible with mlock");
819 return;
822 rb = vmem->memdev->mr.ram_block;
823 page_size = qemu_ram_pagesize(rb);
825 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
826 switch (vmem->unplugged_inaccessible) {
827 case ON_OFF_AUTO_AUTO:
828 if (virtio_mem_has_shared_zeropage(rb)) {
829 vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
830 } else {
831 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
833 break;
834 case ON_OFF_AUTO_OFF:
835 if (!virtio_mem_has_shared_zeropage(rb)) {
836 warn_report("'%s' property set to 'off' with a memdev that does"
837 " not support the shared zeropage.",
838 VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
840 break;
841 default:
842 break;
844 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
845 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
846 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
849 * If the block size wasn't configured by the user, use a sane default. This
850 * allows using hugetlbfs backends of any page size without manual
851 * intervention.
853 if (!vmem->block_size) {
854 vmem->block_size = virtio_mem_default_block_size(rb);
857 if (vmem->block_size < page_size) {
858 error_setg(errp, "'%s' property has to be at least the page size (0x%"
859 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
860 return;
861 } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
862 warn_report("'%s' property is smaller than the default block size (%"
863 PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
864 virtio_mem_default_block_size(rb) / MiB);
866 if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
867 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
868 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
869 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
870 return;
871 } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
872 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
873 ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
874 vmem->block_size);
875 return;
876 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
877 vmem->block_size)) {
878 error_setg(errp, "'%s' property memdev size has to be multiples of"
879 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
880 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
881 return;
884 if (ram_block_coordinated_discard_require(true)) {
885 error_setg(errp, "Discarding RAM is disabled");
886 return;
889 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
890 if (ret) {
891 error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
892 ram_block_coordinated_discard_require(false);
893 return;
896 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
898 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
899 vmem->block_size;
900 vmem->bitmap = bitmap_new(vmem->bitmap_size);
902 virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
903 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
905 host_memory_backend_set_mapped(vmem->memdev, true);
906 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
907 if (vmem->early_migration) {
908 vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY,
909 &vmstate_virtio_mem_device_early, vmem);
911 qemu_register_reset(virtio_mem_system_reset, vmem);
914 * Set ourselves as RamDiscardManager before the plug handler maps the
915 * memory region and exposes it via an address space.
917 memory_region_set_ram_discard_manager(&vmem->memdev->mr,
918 RAM_DISCARD_MANAGER(vmem));
921 static void virtio_mem_device_unrealize(DeviceState *dev)
923 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
924 VirtIOMEM *vmem = VIRTIO_MEM(dev);
927 * The unplug handler unmapped the memory region, it cannot be
928 * found via an address space anymore. Unset ourselves.
930 memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
931 qemu_unregister_reset(virtio_mem_system_reset, vmem);
932 if (vmem->early_migration) {
933 vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early,
934 vmem);
936 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
937 host_memory_backend_set_mapped(vmem->memdev, false);
938 virtio_del_queue(vdev, 0);
939 virtio_cleanup(vdev);
940 g_free(vmem->bitmap);
941 ram_block_coordinated_discard_require(false);
944 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
945 uint64_t offset, uint64_t size)
947 RAMBlock *rb = vmem->memdev->mr.ram_block;
949 return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
952 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
954 /* Make sure all memory is really discarded after migration. */
955 return virtio_mem_for_each_unplugged_range(vmem, NULL,
956 virtio_mem_discard_range_cb);
959 static int virtio_mem_post_load(void *opaque, int version_id)
961 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
962 RamDiscardListener *rdl;
963 int ret;
965 if (vmem->prealloc && !vmem->early_migration) {
966 warn_report("Proper preallocation with migration requires a newer QEMU machine");
970 * We started out with all memory discarded and our memory region is mapped
971 * into an address space. Replay, now that we updated the bitmap.
973 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
974 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
975 virtio_mem_notify_populate_cb);
976 if (ret) {
977 return ret;
981 if (migration_in_incoming_postcopy()) {
982 return 0;
985 return virtio_mem_restore_unplugged(vmem);
988 static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
989 uint64_t offset, uint64_t size)
991 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
992 int fd = memory_region_get_fd(&vmem->memdev->mr);
993 Error *local_err = NULL;
995 qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
996 if (local_err) {
997 error_report_err(local_err);
998 return -ENOMEM;
1000 return 0;
1003 static int virtio_mem_post_load_early(void *opaque, int version_id)
1005 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1006 RAMBlock *rb = vmem->memdev->mr.ram_block;
1007 int ret;
1009 if (!vmem->prealloc) {
1010 return 0;
1014 * We restored the bitmap and verified that the basic properties
1015 * match on source and destination, so we can go ahead and preallocate
1016 * memory for all plugged memory blocks, before actual RAM migration starts
1017 * touching this memory.
1019 ret = virtio_mem_for_each_plugged_range(vmem, NULL,
1020 virtio_mem_prealloc_range_cb);
1021 if (ret) {
1022 return ret;
1026 * This is tricky: postcopy wants to start with a clean slate. On
1027 * POSTCOPY_INCOMING_ADVISE, postcopy code discards all (ordinarily
1028 * preallocated) RAM such that postcopy will work as expected later.
1030 * However, we run after POSTCOPY_INCOMING_ADVISE -- but before actual
1031 * RAM migration. So let's discard all memory again. This looks like an
1032 * expensive NOP, but actually serves a purpose: we made sure that we
1033 * were able to allocate all required backend memory once. We cannot
1034 * guarantee that the backend memory we will free will remain free
1035 * until we need it during postcopy, but at least we can catch the
1036 * obvious setup issues this way.
1038 if (migration_incoming_postcopy_advised()) {
1039 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
1040 return -EBUSY;
1043 return 0;
1046 typedef struct VirtIOMEMMigSanityChecks {
1047 VirtIOMEM *parent;
1048 uint64_t addr;
1049 uint64_t region_size;
1050 uint64_t block_size;
1051 uint32_t node;
1052 } VirtIOMEMMigSanityChecks;
1054 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
1056 VirtIOMEMMigSanityChecks *tmp = opaque;
1057 VirtIOMEM *vmem = tmp->parent;
1059 tmp->addr = vmem->addr;
1060 tmp->region_size = memory_region_size(&vmem->memdev->mr);
1061 tmp->block_size = vmem->block_size;
1062 tmp->node = vmem->node;
1063 return 0;
1066 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
1068 VirtIOMEMMigSanityChecks *tmp = opaque;
1069 VirtIOMEM *vmem = tmp->parent;
1070 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
1072 if (tmp->addr != vmem->addr) {
1073 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
1074 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
1075 return -EINVAL;
1078 * Note: Preparation for resizeable memory regions. The maximum size
1079 * of the memory region must not change during migration.
1081 if (tmp->region_size != new_region_size) {
1082 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
1083 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
1084 new_region_size);
1085 return -EINVAL;
1087 if (tmp->block_size != vmem->block_size) {
1088 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
1089 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
1090 vmem->block_size);
1091 return -EINVAL;
1093 if (tmp->node != vmem->node) {
1094 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
1095 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
1096 return -EINVAL;
1098 return 0;
1101 static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
1102 .name = "virtio-mem-device/sanity-checks",
1103 .pre_save = virtio_mem_mig_sanity_checks_pre_save,
1104 .post_load = virtio_mem_mig_sanity_checks_post_load,
1105 .fields = (VMStateField[]) {
1106 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
1107 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
1108 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
1109 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
1110 VMSTATE_END_OF_LIST(),
1114 static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id)
1116 const VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1118 /* With early migration, these fields were already migrated. */
1119 return !vmem->early_migration;
1122 static const VMStateDescription vmstate_virtio_mem_device = {
1123 .name = "virtio-mem-device",
1124 .minimum_version_id = 1,
1125 .version_id = 1,
1126 .priority = MIG_PRI_VIRTIO_MEM,
1127 .post_load = virtio_mem_post_load,
1128 .fields = (VMStateField[]) {
1129 VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists,
1130 VirtIOMEMMigSanityChecks,
1131 vmstate_virtio_mem_sanity_checks),
1132 VMSTATE_UINT64(usable_region_size, VirtIOMEM),
1133 VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists),
1134 VMSTATE_UINT64(requested_size, VirtIOMEM),
1135 VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists,
1136 0, bitmap_size),
1137 VMSTATE_END_OF_LIST()
1142 * Transfer properties that are immutable while migration is active early,
1143 * such that we have have this information around before migrating any RAM
1144 * content.
1146 * Note that virtio_mem_is_busy() makes sure these properties can no longer
1147 * change on the migration source until migration completed.
1149 * With QEMU compat machines, we transmit these properties later, via
1150 * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists().
1152 static const VMStateDescription vmstate_virtio_mem_device_early = {
1153 .name = "virtio-mem-device-early",
1154 .minimum_version_id = 1,
1155 .version_id = 1,
1156 .early_setup = true,
1157 .post_load = virtio_mem_post_load_early,
1158 .fields = (VMStateField[]) {
1159 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
1160 vmstate_virtio_mem_sanity_checks),
1161 VMSTATE_UINT64(size, VirtIOMEM),
1162 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
1163 VMSTATE_END_OF_LIST()
1167 static const VMStateDescription vmstate_virtio_mem = {
1168 .name = "virtio-mem",
1169 .minimum_version_id = 1,
1170 .version_id = 1,
1171 .fields = (VMStateField[]) {
1172 VMSTATE_VIRTIO_DEVICE,
1173 VMSTATE_END_OF_LIST()
1177 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
1178 VirtioMEMDeviceInfo *vi)
1180 vi->memaddr = vmem->addr;
1181 vi->node = vmem->node;
1182 vi->requested_size = vmem->requested_size;
1183 vi->size = vmem->size;
1184 vi->max_size = memory_region_size(&vmem->memdev->mr);
1185 vi->block_size = vmem->block_size;
1186 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
1189 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
1191 if (!vmem->memdev) {
1192 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
1193 return NULL;
1196 return &vmem->memdev->mr;
1199 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
1200 Notifier *notifier)
1202 notifier_list_add(&vmem->size_change_notifiers, notifier);
1205 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
1206 Notifier *notifier)
1208 notifier_remove(notifier);
1211 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
1212 void *opaque, Error **errp)
1214 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1215 uint64_t value = vmem->size;
1217 visit_type_size(v, name, &value, errp);
1220 static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
1221 const char *name, void *opaque,
1222 Error **errp)
1224 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1225 uint64_t value = vmem->requested_size;
1227 visit_type_size(v, name, &value, errp);
1230 static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
1231 const char *name, void *opaque,
1232 Error **errp)
1234 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1235 uint64_t value;
1237 if (!visit_type_size(v, name, &value, errp)) {
1238 return;
1242 * The block size and memory backend are not fixed until the device was
1243 * realized. realize() will verify these properties then.
1245 if (DEVICE(obj)->realized) {
1246 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1247 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1248 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1249 vmem->block_size);
1250 return;
1251 } else if (value > memory_region_size(&vmem->memdev->mr)) {
1252 error_setg(errp, "'%s' cannot exceed the memory backend size"
1253 "(0x%" PRIx64 ")", name,
1254 memory_region_size(&vmem->memdev->mr));
1255 return;
1258 if (value != vmem->requested_size) {
1259 virtio_mem_resize_usable_region(vmem, value, false);
1260 vmem->requested_size = value;
1263 * Trigger a config update so the guest gets notified. We trigger
1264 * even if the size didn't change (especially helpful for debugging).
1266 virtio_notify_config(VIRTIO_DEVICE(vmem));
1267 } else {
1268 vmem->requested_size = value;
1272 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1273 void *opaque, Error **errp)
1275 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1276 uint64_t value = vmem->block_size;
1279 * If not configured by the user (and we're not realized yet), use the
1280 * default block size we would use with the current memory backend.
1282 if (!value) {
1283 if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1284 value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1285 } else {
1286 value = virtio_mem_thp_size();
1290 visit_type_size(v, name, &value, errp);
1293 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1294 void *opaque, Error **errp)
1296 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1297 uint64_t value;
1299 if (DEVICE(obj)->realized) {
1300 error_setg(errp, "'%s' cannot be changed", name);
1301 return;
1304 if (!visit_type_size(v, name, &value, errp)) {
1305 return;
1308 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1309 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1310 VIRTIO_MEM_MIN_BLOCK_SIZE);
1311 return;
1312 } else if (!is_power_of_2(value)) {
1313 error_setg(errp, "'%s' property has to be a power of two", name);
1314 return;
1316 vmem->block_size = value;
1319 static void virtio_mem_instance_init(Object *obj)
1321 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1323 notifier_list_init(&vmem->size_change_notifiers);
1324 QLIST_INIT(&vmem->rdl_list);
1326 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1327 NULL, NULL, NULL);
1328 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1329 virtio_mem_get_requested_size,
1330 virtio_mem_set_requested_size, NULL, NULL);
1331 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1332 virtio_mem_get_block_size, virtio_mem_set_block_size,
1333 NULL, NULL);
1336 static Property virtio_mem_properties[] = {
1337 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1338 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1339 DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
1340 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1341 TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1342 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1343 DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
1344 unplugged_inaccessible, ON_OFF_AUTO_AUTO),
1345 #endif
1346 DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
1347 early_migration, true),
1348 DEFINE_PROP_END_OF_LIST(),
1351 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1352 const MemoryRegion *mr)
1354 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1356 g_assert(mr == &vmem->memdev->mr);
1357 return vmem->block_size;
1360 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1361 const MemoryRegionSection *s)
1363 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1364 uint64_t start_gpa = vmem->addr + s->offset_within_region;
1365 uint64_t end_gpa = start_gpa + int128_get64(s->size);
1367 g_assert(s->mr == &vmem->memdev->mr);
1369 start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1370 end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1372 if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1373 return false;
1376 return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
1379 struct VirtIOMEMReplayData {
1380 void *fn;
1381 void *opaque;
1384 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1386 struct VirtIOMEMReplayData *data = arg;
1388 return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1391 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1392 MemoryRegionSection *s,
1393 ReplayRamPopulate replay_fn,
1394 void *opaque)
1396 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1397 struct VirtIOMEMReplayData data = {
1398 .fn = replay_fn,
1399 .opaque = opaque,
1402 g_assert(s->mr == &vmem->memdev->mr);
1403 return virtio_mem_for_each_plugged_section(vmem, s, &data,
1404 virtio_mem_rdm_replay_populated_cb);
1407 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1408 void *arg)
1410 struct VirtIOMEMReplayData *data = arg;
1412 ((ReplayRamDiscard)data->fn)(s, data->opaque);
1413 return 0;
1416 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1417 MemoryRegionSection *s,
1418 ReplayRamDiscard replay_fn,
1419 void *opaque)
1421 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1422 struct VirtIOMEMReplayData data = {
1423 .fn = replay_fn,
1424 .opaque = opaque,
1427 g_assert(s->mr == &vmem->memdev->mr);
1428 virtio_mem_for_each_unplugged_section(vmem, s, &data,
1429 virtio_mem_rdm_replay_discarded_cb);
1432 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1433 RamDiscardListener *rdl,
1434 MemoryRegionSection *s)
1436 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1437 int ret;
1439 g_assert(s->mr == &vmem->memdev->mr);
1440 rdl->section = memory_region_section_new_copy(s);
1442 QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1443 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1444 virtio_mem_notify_populate_cb);
1445 if (ret) {
1446 error_report("%s: Replaying plugged ranges failed: %s", __func__,
1447 strerror(-ret));
1451 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1452 RamDiscardListener *rdl)
1454 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1456 g_assert(rdl->section->mr == &vmem->memdev->mr);
1457 if (vmem->size) {
1458 if (rdl->double_discard_supported) {
1459 rdl->notify_discard(rdl, rdl->section);
1460 } else {
1461 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1462 virtio_mem_notify_discard_cb);
1466 memory_region_section_free_copy(rdl->section);
1467 rdl->section = NULL;
1468 QLIST_REMOVE(rdl, next);
1471 static void virtio_mem_class_init(ObjectClass *klass, void *data)
1473 DeviceClass *dc = DEVICE_CLASS(klass);
1474 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1475 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1476 RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1478 device_class_set_props(dc, virtio_mem_properties);
1479 dc->vmsd = &vmstate_virtio_mem;
1481 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1482 vdc->realize = virtio_mem_device_realize;
1483 vdc->unrealize = virtio_mem_device_unrealize;
1484 vdc->get_config = virtio_mem_get_config;
1485 vdc->get_features = virtio_mem_get_features;
1486 vdc->validate_features = virtio_mem_validate_features;
1487 vdc->vmsd = &vmstate_virtio_mem_device;
1489 vmc->fill_device_info = virtio_mem_fill_device_info;
1490 vmc->get_memory_region = virtio_mem_get_memory_region;
1491 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1492 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1494 rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1495 rdmc->is_populated = virtio_mem_rdm_is_populated;
1496 rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1497 rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1498 rdmc->register_listener = virtio_mem_rdm_register_listener;
1499 rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1502 static const TypeInfo virtio_mem_info = {
1503 .name = TYPE_VIRTIO_MEM,
1504 .parent = TYPE_VIRTIO_DEVICE,
1505 .instance_size = sizeof(VirtIOMEM),
1506 .instance_init = virtio_mem_instance_init,
1507 .class_init = virtio_mem_class_init,
1508 .class_size = sizeof(VirtIOMEMClass),
1509 .interfaces = (InterfaceInfo[]) {
1510 { TYPE_RAM_DISCARD_MANAGER },
1515 static void virtio_register_types(void)
1517 type_register_static(&virtio_mem_info);
1520 type_init(virtio_register_types)