2 * QEMU Xen emulation: Grant table support
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 * Authors: David Woodhouse <dwmw2@infradead.org>
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qapi/error.h"
18 #include "qom/object.h"
19 #include "exec/target_page.h"
20 #include "exec/address-spaces.h"
21 #include "migration/vmstate.h"
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_gnttab.h"
28 #include "xen_primary_console.h"
30 #include "sysemu/kvm.h"
31 #include "sysemu/kvm_xen.h"
33 #include "hw/xen/interface/memory.h"
34 #include "hw/xen/interface/grant_table.h"
36 #define TYPE_XEN_GNTTAB "xen-gnttab"
37 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState
, XEN_GNTTAB
)
39 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
41 static struct gnttab_backend_ops emu_gnttab_backend_ops
;
43 struct XenGnttabState
{
55 /* Theoretically, v2 support could be added here. */
58 MemoryRegion gnt_frames
;
59 MemoryRegion
*gnt_aliases
;
60 uint64_t *gnt_frame_gpas
;
65 struct XenGnttabState
*xen_gnttab_singleton
;
67 static void xen_gnttab_realize(DeviceState
*dev
, Error
**errp
)
69 XenGnttabState
*s
= XEN_GNTTAB(dev
);
72 if (xen_mode
!= XEN_EMULATE
) {
73 error_setg(errp
, "Xen grant table support is for Xen emulation");
76 s
->max_frames
= kvm_xen_get_gnttab_max_frames();
77 memory_region_init_ram(&s
->gnt_frames
, OBJECT(dev
), "xen:grant_table",
78 XEN_PAGE_SIZE
* s
->max_frames
, &error_abort
);
79 memory_region_set_enabled(&s
->gnt_frames
, true);
80 s
->entries
.v1
= memory_region_get_ram_ptr(&s
->gnt_frames
);
82 /* Create individual page-sizes aliases for overlays */
83 s
->gnt_aliases
= (void *)g_new0(MemoryRegion
, s
->max_frames
);
84 s
->gnt_frame_gpas
= (void *)g_new(uint64_t, s
->max_frames
);
85 for (i
= 0; i
< s
->max_frames
; i
++) {
86 memory_region_init_alias(&s
->gnt_aliases
[i
], OBJECT(dev
),
88 i
* XEN_PAGE_SIZE
, XEN_PAGE_SIZE
);
89 s
->gnt_frame_gpas
[i
] = INVALID_GPA
;
93 memset(s
->entries
.v1
, 0, XEN_PAGE_SIZE
* s
->max_frames
);
94 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].flags
= GTF_permit_access
;
95 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].frame
= XEN_SPECIAL_PFN(XENSTORE
);
97 qemu_mutex_init(&s
->gnt_lock
);
99 xen_gnttab_singleton
= s
;
101 s
->map_track
= g_new0(uint8_t, s
->max_frames
* ENTRIES_PER_FRAME_V1
);
103 xen_gnttab_ops
= &emu_gnttab_backend_ops
;
106 static int xen_gnttab_post_load(void *opaque
, int version_id
)
108 XenGnttabState
*s
= XEN_GNTTAB(opaque
);
111 for (i
= 0; i
< s
->nr_frames
; i
++) {
112 if (s
->gnt_frame_gpas
[i
] != INVALID_GPA
) {
113 xen_overlay_do_map_page(&s
->gnt_aliases
[i
], s
->gnt_frame_gpas
[i
]);
119 static bool xen_gnttab_is_needed(void *opaque
)
121 return xen_mode
== XEN_EMULATE
;
124 static const VMStateDescription xen_gnttab_vmstate
= {
125 .name
= "xen_gnttab",
127 .minimum_version_id
= 1,
128 .needed
= xen_gnttab_is_needed
,
129 .post_load
= xen_gnttab_post_load
,
130 .fields
= (const VMStateField
[]) {
131 VMSTATE_UINT32(nr_frames
, XenGnttabState
),
132 VMSTATE_VARRAY_UINT32(gnt_frame_gpas
, XenGnttabState
, nr_frames
, 0,
133 vmstate_info_uint64
, uint64_t),
134 VMSTATE_END_OF_LIST()
138 static void xen_gnttab_class_init(ObjectClass
*klass
, void *data
)
140 DeviceClass
*dc
= DEVICE_CLASS(klass
);
142 dc
->realize
= xen_gnttab_realize
;
143 dc
->vmsd
= &xen_gnttab_vmstate
;
146 static const TypeInfo xen_gnttab_info
= {
147 .name
= TYPE_XEN_GNTTAB
,
148 .parent
= TYPE_SYS_BUS_DEVICE
,
149 .instance_size
= sizeof(XenGnttabState
),
150 .class_init
= xen_gnttab_class_init
,
153 void xen_gnttab_create(void)
155 xen_gnttab_singleton
= XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB
,
159 static void xen_gnttab_register_types(void)
161 type_register_static(&xen_gnttab_info
);
164 type_init(xen_gnttab_register_types
)
166 int xen_gnttab_map_page(uint64_t idx
, uint64_t gfn
)
168 XenGnttabState
*s
= xen_gnttab_singleton
;
169 uint64_t gpa
= gfn
<< XEN_PAGE_SHIFT
;
175 if (idx
>= s
->max_frames
) {
180 QEMU_LOCK_GUARD(&s
->gnt_lock
);
182 xen_overlay_do_map_page(&s
->gnt_aliases
[idx
], gpa
);
184 s
->gnt_frame_gpas
[idx
] = gpa
;
186 if (s
->nr_frames
<= idx
) {
187 s
->nr_frames
= idx
+ 1;
193 int xen_gnttab_set_version_op(struct gnttab_set_version
*set
)
197 switch (set
->version
) {
203 /* Behave as before set_version was introduced. */
215 int xen_gnttab_get_version_op(struct gnttab_get_version
*get
)
217 if (get
->dom
!= DOMID_SELF
&& get
->dom
!= xen_domid
) {
225 int xen_gnttab_query_size_op(struct gnttab_query_size
*size
)
227 XenGnttabState
*s
= xen_gnttab_singleton
;
233 if (size
->dom
!= DOMID_SELF
&& size
->dom
!= xen_domid
) {
234 size
->status
= GNTST_bad_domain
;
238 size
->status
= GNTST_okay
;
239 size
->nr_frames
= s
->nr_frames
;
240 size
->max_nr_frames
= s
->max_frames
;
244 /* Track per-open refs, to allow close() to clean up. */
246 MemoryRegionSection mrs
;
252 static void gnt_unref(XenGnttabState
*s
, grant_ref_t ref
,
253 MemoryRegionSection
*mrs
, int prot
)
255 if (mrs
&& mrs
->mr
) {
256 if (prot
& PROT_WRITE
) {
257 memory_region_set_dirty(mrs
->mr
, mrs
->offset_within_region
,
260 memory_region_unref(mrs
->mr
);
263 assert(s
->map_track
[ref
] != 0);
265 if (--s
->map_track
[ref
] == 0) {
266 grant_entry_v1_t
*gnt_p
= &s
->entries
.v1
[ref
];
267 qatomic_and(&gnt_p
->flags
, (uint16_t)~(GTF_reading
| GTF_writing
));
271 static uint64_t gnt_ref(XenGnttabState
*s
, grant_ref_t ref
, int prot
)
273 uint16_t mask
= GTF_type_mask
| GTF_sub_page
;
274 grant_entry_v1_t gnt
, *gnt_p
;
277 if (ref
>= s
->max_frames
* ENTRIES_PER_FRAME_V1
||
278 s
->map_track
[ref
] == UINT8_MAX
) {
282 if (prot
& PROT_WRITE
) {
283 mask
|= GTF_readonly
;
286 gnt_p
= &s
->entries
.v1
[ref
];
289 * The guest can legitimately be changing the GTF_readonly flag. Allow
290 * that, but don't let a malicious guest cause a livelock.
292 for (retries
= 0; retries
< 5; retries
++) {
295 /* Read the entry before an atomic operation on its flags */
296 gnt
= *(volatile grant_entry_v1_t
*)gnt_p
;
298 if ((gnt
.flags
& mask
) != GTF_permit_access
||
299 gnt
.domid
!= DOMID_QEMU
) {
303 new_flags
= gnt
.flags
| GTF_reading
;
304 if (prot
& PROT_WRITE
) {
305 new_flags
|= GTF_writing
;
308 if (qatomic_cmpxchg(&gnt_p
->flags
, gnt
.flags
, new_flags
) == gnt
.flags
) {
309 return (uint64_t)gnt
.frame
<< XEN_PAGE_SHIFT
;
316 struct xengntdev_handle
{
317 GHashTable
*active_maps
;
320 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle
*xgt
,
326 static void *xen_be_gnttab_map_refs(struct xengntdev_handle
*xgt
,
327 uint32_t count
, uint32_t domid
,
328 uint32_t *refs
, int prot
)
330 XenGnttabState
*s
= xen_gnttab_singleton
;
331 struct active_ref
*act
;
338 if (domid
!= xen_domid
) {
343 if (!count
|| count
> 4096) {
349 * Making a contiguous mapping from potentially discontiguous grant
350 * references would be... distinctly non-trivial. We don't support it.
351 * Even changing the API to return an array of pointers, one per page,
352 * wouldn't be simple to use in PV backends because some structures
353 * actually cross page boundaries (e.g. 32-bit blkif_response ring
354 * entries are 12 bytes).
361 QEMU_LOCK_GUARD(&s
->gnt_lock
);
363 act
= g_hash_table_lookup(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]));
365 if ((prot
& PROT_WRITE
) && !(act
->prot
& PROT_WRITE
)) {
366 if (gnt_ref(s
, refs
[0], prot
) == INVALID_GPA
) {
369 act
->prot
|= PROT_WRITE
;
373 uint64_t gpa
= gnt_ref(s
, refs
[0], prot
);
374 if (gpa
== INVALID_GPA
) {
379 act
= g_new0(struct active_ref
, 1);
382 act
->mrs
= memory_region_find(get_system_memory(), gpa
, XEN_PAGE_SIZE
);
385 !int128_lt(act
->mrs
.size
, int128_make64(XEN_PAGE_SIZE
)) &&
386 memory_region_get_ram_addr(act
->mrs
.mr
) != RAM_ADDR_INVALID
) {
387 act
->virtaddr
= qemu_map_ram_ptr(act
->mrs
.mr
->ram_block
,
388 act
->mrs
.offset_within_region
);
390 if (!act
->virtaddr
) {
391 gnt_unref(s
, refs
[0], &act
->mrs
, 0);
397 s
->map_track
[refs
[0]]++;
398 g_hash_table_insert(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]), act
);
401 return act
->virtaddr
;
404 static gboolean
do_unmap(gpointer key
, gpointer value
, gpointer user_data
)
406 XenGnttabState
*s
= user_data
;
407 grant_ref_t gref
= GPOINTER_TO_INT(key
);
408 struct active_ref
*act
= value
;
410 gnt_unref(s
, gref
, &act
->mrs
, act
->prot
);
415 static int xen_be_gnttab_unmap(struct xengntdev_handle
*xgt
,
416 void *start_address
, uint32_t *refs
,
419 XenGnttabState
*s
= xen_gnttab_singleton
;
420 struct active_ref
*act
;
430 QEMU_LOCK_GUARD(&s
->gnt_lock
);
432 act
= g_hash_table_lookup(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]));
437 if (act
->virtaddr
!= start_address
) {
441 if (!--act
->refcnt
) {
442 do_unmap(GINT_TO_POINTER(refs
[0]), act
, s
);
443 g_hash_table_remove(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]));
450 * This looks a bit like the one for true Xen in xen-operations.c but
451 * in emulation we don't support multi-page mappings. And under Xen we
452 * *want* the multi-page mappings so we have fewer bounces through the
453 * kernel and the hypervisor. So the code paths end up being similar,
456 static int xen_be_gnttab_copy(struct xengntdev_handle
*xgt
, bool to_domain
,
457 uint32_t domid
, XenGrantCopySegment
*segs
,
458 uint32_t nr_segs
, Error
**errp
)
460 int prot
= to_domain
? PROT_WRITE
: PROT_READ
;
463 for (i
= 0; i
< nr_segs
; i
++) {
464 XenGrantCopySegment
*seg
= &segs
[i
];
466 uint32_t ref
= to_domain
? seg
->dest
.foreign
.ref
:
467 seg
->source
.foreign
.ref
;
469 page
= xen_be_gnttab_map_refs(xgt
, 1, domid
, &ref
, prot
);
472 error_setg_errno(errp
, errno
,
473 "xen_be_gnttab_map_refs failed");
479 memcpy(page
+ seg
->dest
.foreign
.offset
, seg
->source
.virt
,
482 memcpy(seg
->dest
.virt
, page
+ seg
->source
.foreign
.offset
,
486 if (xen_be_gnttab_unmap(xgt
, page
, &ref
, 1)) {
488 error_setg_errno(errp
, errno
, "xen_be_gnttab_unmap failed");
497 static struct xengntdev_handle
*xen_be_gnttab_open(void)
499 struct xengntdev_handle
*xgt
= g_new0(struct xengntdev_handle
, 1);
501 xgt
->active_maps
= g_hash_table_new(g_direct_hash
, g_direct_equal
);
505 static int xen_be_gnttab_close(struct xengntdev_handle
*xgt
)
507 XenGnttabState
*s
= xen_gnttab_singleton
;
513 g_hash_table_foreach_remove(xgt
->active_maps
, do_unmap
, s
);
514 g_hash_table_destroy(xgt
->active_maps
);
519 static struct gnttab_backend_ops emu_gnttab_backend_ops
= {
520 .open
= xen_be_gnttab_open
,
521 .close
= xen_be_gnttab_close
,
522 .grant_copy
= xen_be_gnttab_copy
,
523 .set_max_grants
= xen_be_gnttab_set_max_grants
,
524 .map_refs
= xen_be_gnttab_map_refs
,
525 .unmap
= xen_be_gnttab_unmap
,
528 int xen_gnttab_reset(void)
530 XenGnttabState
*s
= xen_gnttab_singleton
;
536 QEMU_LOCK_GUARD(&s
->gnt_lock
);
540 memset(s
->entries
.v1
, 0, XEN_PAGE_SIZE
* s
->max_frames
);
541 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].flags
= GTF_permit_access
;
542 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].frame
= XEN_SPECIAL_PFN(XENSTORE
);
544 if (xen_primary_console_get_pfn()) {
545 s
->entries
.v1
[GNTTAB_RESERVED_CONSOLE
].flags
= GTF_permit_access
;
546 s
->entries
.v1
[GNTTAB_RESERVED_CONSOLE
].frame
= XEN_SPECIAL_PFN(CONSOLE
);