2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2012 NetApp, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * This file and its contents are supplied under the terms of the
32 * Common Development and Distribution License ("CDDL"), version 1.0.
33 * You may only use this file in accordance with the terms of version
36 * A full copy of the text of the CDDL should have accompanied this
37 * source. A copy of the CDDL is also available via the Internet at
38 * http://www.illumos.org/license/CDDL.
40 * Copyright 2020 Oxide Computer Company
44 * Memory ranges are represented with an RB tree. On insertion, the range
45 * is checked for overlaps. On lookup, the key has the same base and limit
46 * so it can be searched within the range.
49 #include <sys/cdefs.h>
50 __FBSDID("$FreeBSD$");
52 #include <sys/types.h>
53 #include <sys/errno.h>
55 #include <machine/vmm.h>
65 struct mmio_rb_range
{
66 RB_ENTRY(mmio_rb_range
) mr_link
; /* RB tree links */
67 struct mem_range mr_param
;
73 RB_PROTOTYPE(mmio_rb_tree
, mmio_rb_range
, mr_link
, mmio_rb_range_compare
);
75 static RB_HEAD(mmio_rb_tree
, mmio_rb_range
) mmio_rb_root
, mmio_rb_fallback
;
78 * Per-vCPU cache. Since most accesses from a vCPU will be to
79 * consecutive addresses in a range, it makes sense to cache the
82 static struct mmio_rb_range
**mmio_hint
;
85 static pthread_rwlock_t mmio_rwlock
;
88 mmio_rb_range_compare(struct mmio_rb_range
*a
, struct mmio_rb_range
*b
)
90 if (a
->mr_end
< b
->mr_base
)
92 else if (a
->mr_base
> b
->mr_end
)
98 mmio_rb_lookup(struct mmio_rb_tree
*rbt
, uint64_t addr
,
99 struct mmio_rb_range
**entry
)
101 struct mmio_rb_range find
, *res
;
103 find
.mr_base
= find
.mr_end
= addr
;
105 res
= RB_FIND(mmio_rb_tree
, rbt
, &find
);
116 mmio_rb_add(struct mmio_rb_tree
*rbt
, struct mmio_rb_range
*new)
118 struct mmio_rb_range
*overlap
;
120 overlap
= RB_INSERT(mmio_rb_tree
, rbt
, new);
122 if (overlap
!= NULL
) {
124 printf("overlap detected: new %lx:%lx, tree %lx:%lx, '%s' "
125 "claims region already claimed for '%s'\n",
126 new->mr_base
, new->mr_end
,
127 overlap
->mr_base
, overlap
->mr_end
,
128 new->mr_param
.name
, overlap
->mr_param
.name
);
139 mmio_rb_dump(struct mmio_rb_tree
*rbt
)
142 struct mmio_rb_range
*np
;
144 pthread_rwlock_rdlock(&mmio_rwlock
);
145 RB_FOREACH(np
, mmio_rb_tree
, rbt
) {
146 printf(" %lx:%lx, %s\n", np
->mr_base
, np
->mr_end
,
149 perror
= pthread_rwlock_unlock(&mmio_rwlock
);
154 RB_GENERATE(mmio_rb_tree
, mmio_rb_range
, mr_link
, mmio_rb_range_compare
);
156 typedef int (mem_cb_t
)(struct vmctx
*ctx
, int vcpu
, uint64_t gpa
,
157 struct mem_range
*mr
, void *arg
);
160 mem_read(void *ctx
, int vcpu
, uint64_t gpa
, uint64_t *rval
, int size
, void *arg
)
163 struct mem_range
*mr
= arg
;
165 error
= (*mr
->handler
)(ctx
, vcpu
, MEM_F_READ
, gpa
, size
,
166 rval
, mr
->arg1
, mr
->arg2
);
171 mem_write(void *ctx
, int vcpu
, uint64_t gpa
, uint64_t wval
, int size
, void *arg
)
174 struct mem_range
*mr
= arg
;
176 error
= (*mr
->handler
)(ctx
, vcpu
, MEM_F_WRITE
, gpa
, size
,
177 &wval
, mr
->arg1
, mr
->arg2
);
182 access_memory(struct vmctx
*ctx
, int vcpu
, uint64_t paddr
, mem_cb_t
*cb
,
185 struct mmio_rb_range
*entry
;
186 int err
, perror
, immutable
;
188 pthread_rwlock_rdlock(&mmio_rwlock
);
190 * First check the per-vCPU cache
192 if (mmio_hint
[vcpu
] &&
193 paddr
>= mmio_hint
[vcpu
]->mr_base
&&
194 paddr
<= mmio_hint
[vcpu
]->mr_end
) {
195 entry
= mmio_hint
[vcpu
];
200 if (mmio_rb_lookup(&mmio_rb_root
, paddr
, &entry
) == 0) {
201 /* Update the per-vCPU cache */
202 mmio_hint
[vcpu
] = entry
;
203 } else if (mmio_rb_lookup(&mmio_rb_fallback
, paddr
, &entry
)) {
204 perror
= pthread_rwlock_unlock(&mmio_rwlock
);
210 assert(entry
!= NULL
);
213 * An 'immutable' memory range is guaranteed to be never removed
214 * so there is no need to hold 'mmio_rwlock' while calling the
217 * XXX writes to the PCIR_COMMAND register can cause register_mem()
218 * to be called. If the guest is using PCI extended config space
219 * to modify the PCIR_COMMAND register then register_mem() can
220 * deadlock on 'mmio_rwlock'. However by registering the extended
221 * config space window as 'immutable' the deadlock can be avoided.
223 immutable
= (entry
->mr_param
.flags
& MEM_F_IMMUTABLE
);
225 perror
= pthread_rwlock_unlock(&mmio_rwlock
);
229 err
= cb(ctx
, vcpu
, paddr
, &entry
->mr_param
, arg
);
232 perror
= pthread_rwlock_unlock(&mmio_rwlock
);
241 emulate_mem_cb(struct vmctx
*ctx
, int vcpu
, uint64_t paddr
, struct mem_range
*mr
,
244 struct vm_mmio
*mmio
;
249 if (mmio
->read
!= 0) {
250 err
= mem_read(ctx
, vcpu
, paddr
, &mmio
->data
, mmio
->bytes
, mr
);
252 err
= mem_write(ctx
, vcpu
, paddr
, mmio
->data
, mmio
->bytes
, mr
);
259 emulate_mem(struct vmctx
*ctx
, int vcpu
, struct vm_mmio
*mmio
)
261 return (access_memory(ctx
, vcpu
, mmio
->gpa
, emulate_mem_cb
, mmio
));
271 rw_mem_cb(struct vmctx
*ctx
, int vcpu
, uint64_t paddr
, struct mem_range
*mr
,
274 struct rw_mem_args
*rma
;
277 return (mr
->handler(ctx
, vcpu
, rma
->operation
, paddr
, rma
->size
,
278 rma
->val
, mr
->arg1
, mr
->arg2
));
282 read_mem(struct vmctx
*ctx
, int vcpu
, uint64_t gpa
, uint64_t *rval
, int size
)
284 struct rw_mem_args rma
;
288 rma
.operation
= MEM_F_READ
;
289 return (access_memory(ctx
, vcpu
, gpa
, rw_mem_cb
, &rma
));
293 write_mem(struct vmctx
*ctx
, int vcpu
, uint64_t gpa
, uint64_t wval
, int size
)
295 struct rw_mem_args rma
;
299 rma
.operation
= MEM_F_WRITE
;
300 return (access_memory(ctx
, vcpu
, gpa
, rw_mem_cb
, &rma
));
304 register_mem_int(struct mmio_rb_tree
*rbt
, struct mem_range
*memp
)
306 struct mmio_rb_range
*entry
, *mrp
;
311 mrp
= malloc(sizeof(struct mmio_rb_range
));
313 warn("%s: couldn't allocate memory for mrp\n",
317 mrp
->mr_param
= *memp
;
318 mrp
->mr_base
= memp
->base
;
319 mrp
->mr_end
= memp
->base
+ memp
->size
- 1;
320 pthread_rwlock_wrlock(&mmio_rwlock
);
321 if (mmio_rb_lookup(rbt
, memp
->base
, &entry
) != 0)
322 err
= mmio_rb_add(rbt
, mrp
);
324 else /* smatch warn: possible memory leak of 'mrp' */
327 perror
= pthread_rwlock_unlock(&mmio_rwlock
);
337 register_mem(struct mem_range
*memp
)
340 return (register_mem_int(&mmio_rb_root
, memp
));
344 register_mem_fallback(struct mem_range
*memp
)
347 return (register_mem_int(&mmio_rb_fallback
, memp
));
351 unregister_mem(struct mem_range
*memp
)
353 struct mem_range
*mr
;
354 struct mmio_rb_range
*entry
= NULL
;
357 pthread_rwlock_wrlock(&mmio_rwlock
);
358 err
= mmio_rb_lookup(&mmio_rb_root
, memp
->base
, &entry
);
360 mr
= &entry
->mr_param
;
361 assert(mr
->name
== memp
->name
);
362 assert(mr
->base
== memp
->base
&& mr
->size
== memp
->size
);
363 assert((mr
->flags
& MEM_F_IMMUTABLE
) == 0);
364 RB_REMOVE(mmio_rb_tree
, &mmio_rb_root
, entry
);
366 /* flush Per-vCPU cache */
367 for (i
= 0; i
< mmio_ncpu
; i
++) {
368 if (mmio_hint
[i
] == entry
)
372 perror
= pthread_rwlock_unlock(&mmio_rwlock
);
386 mmio_hint
= calloc(ncpu
, sizeof(*mmio_hint
));
387 RB_INIT(&mmio_rb_root
);
388 RB_INIT(&mmio_rb_fallback
);
389 pthread_rwlock_init(&mmio_rwlock
, NULL
);