migration: stop decompression to allocate and free memory frequently
[qemu/kevin.git] / target / i386 / hax-mem.c
blobf46e85544deb2d7a1fa49bd8c6f4867e9e42a081
1 /*
2 * HAX memory mapping operations
4 * Copyright (c) 2015-16 Intel Corporation
5 * Copyright 2016 Google, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2. See
8 * the COPYING file in the top-level directory.
9 */
11 #include "qemu/osdep.h"
12 #include "cpu.h"
13 #include "exec/address-spaces.h"
14 #include "exec/exec-all.h"
15 #include "qemu/error-report.h"
17 #include "target/i386/hax-i386.h"
18 #include "qemu/queue.h"
20 #define DEBUG_HAX_MEM 0
22 #define DPRINTF(fmt, ...) \
23 do { \
24 if (DEBUG_HAX_MEM) { \
25 fprintf(stdout, fmt, ## __VA_ARGS__); \
26 } \
27 } while (0)
29 /**
30 * HAXMapping: describes a pending guest physical memory mapping
32 * @start_pa: a guest physical address marking the start of the region; must be
33 * page-aligned
34 * @size: a guest physical address marking the end of the region; must be
35 * page-aligned
36 * @host_va: the host virtual address of the start of the mapping
37 * @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID
38 * @entry: additional fields for linking #HAXMapping instances together
40 typedef struct HAXMapping {
41 uint64_t start_pa;
42 uint32_t size;
43 uint64_t host_va;
44 int flags;
45 QTAILQ_ENTRY(HAXMapping) entry;
46 } HAXMapping;
49 * A doubly-linked list (actually a tail queue) of the pending page mappings
50 * for the ongoing memory transaction.
52 * It is used to optimize the number of page mapping updates done through the
53 * kernel module. For example, it's effective when a driver is digging an MMIO
54 * hole inside an existing memory mapping. It will get a deletion of the whole
55 * region, then the addition of the 2 remaining RAM areas around the hole and
56 * finally the memory transaction commit. During the commit, it will effectively
57 * send to the kernel only the removal of the pages from the MMIO hole after
58 * having computed locally the result of the deletion and additions.
60 static QTAILQ_HEAD(HAXMappingListHead, HAXMapping) mappings =
61 QTAILQ_HEAD_INITIALIZER(mappings);
63 /**
64 * hax_mapping_dump_list: dumps @mappings to stdout (for debugging)
66 static void hax_mapping_dump_list(void)
68 HAXMapping *entry;
70 DPRINTF("%s updates:\n", __func__);
71 QTAILQ_FOREACH(entry, &mappings, entry) {
72 DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64
73 "%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+',
74 entry->start_pa, entry->start_pa + entry->size, entry->host_va,
75 entry->flags & HAX_RAM_INFO_ROM ? " ROM" : "");
79 static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa,
80 uint32_t size, uint64_t host_va,
81 uint8_t flags)
83 HAXMapping *entry;
85 entry = g_malloc0(sizeof(*entry));
86 entry->start_pa = start_pa;
87 entry->size = size;
88 entry->host_va = host_va;
89 entry->flags = flags;
90 if (!next) {
91 QTAILQ_INSERT_TAIL(&mappings, entry, entry);
92 } else {
93 QTAILQ_INSERT_BEFORE(next, entry, entry);
97 static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va,
98 uint8_t flags)
100 /* removed then added without change for the read-only flag */
101 bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID;
103 return (entry->host_va == host_va) && nop_flags;
106 static void hax_update_mapping(uint64_t start_pa, uint32_t size,
107 uint64_t host_va, uint8_t flags)
109 uint64_t end_pa = start_pa + size;
110 HAXMapping *entry, *next;
112 QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
113 uint32_t chunk_sz;
114 if (start_pa >= entry->start_pa + entry->size) {
115 continue;
117 if (start_pa < entry->start_pa) {
118 chunk_sz = end_pa <= entry->start_pa ? size
119 : entry->start_pa - start_pa;
120 hax_insert_mapping_before(entry, start_pa, chunk_sz,
121 host_va, flags);
122 start_pa += chunk_sz;
123 host_va += chunk_sz;
124 size -= chunk_sz;
125 } else if (start_pa > entry->start_pa) {
126 /* split the existing chunk at start_pa */
127 chunk_sz = start_pa - entry->start_pa;
128 hax_insert_mapping_before(entry, entry->start_pa, chunk_sz,
129 entry->host_va, entry->flags);
130 entry->start_pa += chunk_sz;
131 entry->host_va += chunk_sz;
132 entry->size -= chunk_sz;
134 /* now start_pa == entry->start_pa */
135 chunk_sz = MIN(size, entry->size);
136 if (chunk_sz) {
137 bool nop = hax_mapping_is_opposite(entry, host_va, flags);
138 bool partial = chunk_sz < entry->size;
139 if (partial) {
140 /* remove the beginning of the existing chunk */
141 entry->start_pa += chunk_sz;
142 entry->host_va += chunk_sz;
143 entry->size -= chunk_sz;
144 if (!nop) {
145 hax_insert_mapping_before(entry, start_pa, chunk_sz,
146 host_va, flags);
148 } else { /* affects the full mapping entry */
149 if (nop) { /* no change to this mapping, remove it */
150 QTAILQ_REMOVE(&mappings, entry, entry);
151 g_free(entry);
152 } else { /* update mapping properties */
153 entry->host_va = host_va;
154 entry->flags = flags;
157 start_pa += chunk_sz;
158 host_va += chunk_sz;
159 size -= chunk_sz;
161 if (!size) { /* we are done */
162 break;
165 if (size) { /* add the leftover */
166 hax_insert_mapping_before(NULL, start_pa, size, host_va, flags);
170 static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
172 MemoryRegion *mr = section->mr;
173 hwaddr start_pa = section->offset_within_address_space;
174 ram_addr_t size = int128_get64(section->size);
175 unsigned int delta;
176 uint64_t host_va;
177 uint32_t max_mapping_size;
179 /* We only care about RAM and ROM regions */
180 if (!memory_region_is_ram(mr)) {
181 if (memory_region_is_romd(mr)) {
182 /* HAXM kernel module does not support ROMD yet */
183 warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
184 start_pa, start_pa + size);
186 return;
189 /* Adjust start_pa and size so that they are page-aligned. (Cf
190 * kvm_set_phys_mem() in kvm-all.c).
192 delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
193 delta &= ~qemu_real_host_page_mask;
194 if (delta > size) {
195 return;
197 start_pa += delta;
198 size -= delta;
199 size &= qemu_real_host_page_mask;
200 if (!size || (start_pa & ~qemu_real_host_page_mask)) {
201 return;
204 host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
205 + section->offset_within_region + delta;
206 if (memory_region_is_rom(section->mr)) {
207 flags |= HAX_RAM_INFO_ROM;
211 * The kernel module interface uses 32-bit sizes:
212 * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
214 * If the mapping size is longer than 32 bits, we can't process it in one
215 * call into the kernel. Instead, we split the mapping into smaller ones,
216 * and call hax_update_mapping() on each.
218 max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
219 while (size > max_mapping_size) {
220 hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
221 start_pa += max_mapping_size;
222 size -= max_mapping_size;
223 host_va += max_mapping_size;
225 /* Now size <= max_mapping_size */
226 hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
229 static void hax_region_add(MemoryListener *listener,
230 MemoryRegionSection *section)
232 memory_region_ref(section->mr);
233 hax_process_section(section, 0);
236 static void hax_region_del(MemoryListener *listener,
237 MemoryRegionSection *section)
239 hax_process_section(section, HAX_RAM_INFO_INVALID);
240 memory_region_unref(section->mr);
243 static void hax_transaction_begin(MemoryListener *listener)
245 g_assert(QTAILQ_EMPTY(&mappings));
248 static void hax_transaction_commit(MemoryListener *listener)
250 if (!QTAILQ_EMPTY(&mappings)) {
251 HAXMapping *entry, *next;
253 if (DEBUG_HAX_MEM) {
254 hax_mapping_dump_list();
256 QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
257 if (entry->flags & HAX_RAM_INFO_INVALID) {
258 /* for unmapping, put the values expected by the kernel */
259 entry->flags = HAX_RAM_INFO_INVALID;
260 entry->host_va = 0;
262 if (hax_set_ram(entry->start_pa, entry->size,
263 entry->host_va, entry->flags)) {
264 fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%"
265 PRIx32 " flags %02x\n", __func__, entry->start_pa,
266 entry->size, entry->flags);
268 QTAILQ_REMOVE(&mappings, entry, entry);
269 g_free(entry);
274 /* currently we fake the dirty bitmap sync, always dirty */
275 static void hax_log_sync(MemoryListener *listener,
276 MemoryRegionSection *section)
278 MemoryRegion *mr = section->mr;
280 if (!memory_region_is_ram(mr)) {
281 /* Skip MMIO regions */
282 return;
285 memory_region_set_dirty(mr, 0, int128_get64(section->size));
288 static MemoryListener hax_memory_listener = {
289 .begin = hax_transaction_begin,
290 .commit = hax_transaction_commit,
291 .region_add = hax_region_add,
292 .region_del = hax_region_del,
293 .log_sync = hax_log_sync,
294 .priority = 10,
297 static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
300 * We must register each RAM block with the HAXM kernel module, or
301 * hax_set_ram() will fail for any mapping into the RAM block:
302 * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
304 * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
305 * host physical pages for the RAM block as part of this registration
306 * process, hence the name hax_populate_ram().
308 if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
309 fprintf(stderr, "HAX failed to populate RAM\n");
310 abort();
314 static struct RAMBlockNotifier hax_ram_notifier = {
315 .ram_block_added = hax_ram_block_added,
318 void hax_memory_init(void)
320 ram_block_notifier_add(&hax_ram_notifier);
321 memory_listener_register(&hax_memory_listener, &address_space_memory);