Merge tag 'bsd-user-misc-2024q2-pull-request' of gitlab.com:bsdimp/qemu into staging
[qemu/kevin.git] / hw / i386 / amd_iommu.c
blob6d4fde72f9b2f04a6e331c92446e49f14af2f444
1 /*
2 * QEMU emulation of AMD IOMMU (AMD-Vi)
4 * Copyright (C) 2011 Eduard - Gabriel Munteanu
5 * Copyright (C) 2015, 2016 David Kiarie Kahurani
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, see <http://www.gnu.org/licenses/>.
20 * Cache implementation inspired by hw/i386/intel_iommu.c
23 #include "qemu/osdep.h"
24 #include "hw/i386/pc.h"
25 #include "hw/pci/msi.h"
26 #include "hw/pci/pci_bus.h"
27 #include "migration/vmstate.h"
28 #include "amd_iommu.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "hw/i386/apic_internal.h"
32 #include "trace.h"
33 #include "hw/i386/apic-msidef.h"
34 #include "hw/qdev-properties.h"
36 /* used AMD-Vi MMIO registers */
37 const char *amdvi_mmio_low[] = {
38 "AMDVI_MMIO_DEVTAB_BASE",
39 "AMDVI_MMIO_CMDBUF_BASE",
40 "AMDVI_MMIO_EVTLOG_BASE",
41 "AMDVI_MMIO_CONTROL",
42 "AMDVI_MMIO_EXCL_BASE",
43 "AMDVI_MMIO_EXCL_LIMIT",
44 "AMDVI_MMIO_EXT_FEATURES",
45 "AMDVI_MMIO_PPR_BASE",
46 "UNHANDLED"
48 const char *amdvi_mmio_high[] = {
49 "AMDVI_MMIO_COMMAND_HEAD",
50 "AMDVI_MMIO_COMMAND_TAIL",
51 "AMDVI_MMIO_EVTLOG_HEAD",
52 "AMDVI_MMIO_EVTLOG_TAIL",
53 "AMDVI_MMIO_STATUS",
54 "AMDVI_MMIO_PPR_HEAD",
55 "AMDVI_MMIO_PPR_TAIL",
56 "UNHANDLED"
59 struct AMDVIAddressSpace {
60 uint8_t bus_num; /* bus number */
61 uint8_t devfn; /* device function */
62 AMDVIState *iommu_state; /* AMDVI - one per machine */
63 MemoryRegion root; /* AMDVI Root memory map region */
64 IOMMUMemoryRegion iommu; /* Device's address translation region */
65 MemoryRegion iommu_ir; /* Device's interrupt remapping region */
66 AddressSpace as; /* device's corresponding address space */
69 /* AMDVI cache entry */
70 typedef struct AMDVIIOTLBEntry {
71 uint16_t domid; /* assigned domain id */
72 uint16_t devid; /* device owning entry */
73 uint64_t perms; /* access permissions */
74 uint64_t translated_addr; /* translated address */
75 uint64_t page_mask; /* physical page size */
76 } AMDVIIOTLBEntry;
78 uint64_t amdvi_extended_feature_register(AMDVIState *s)
80 uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
81 if (s->xtsup) {
82 feature |= AMDVI_FEATURE_XT;
85 return feature;
88 /* configure MMIO registers at startup/reset */
89 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
90 uint64_t romask, uint64_t w1cmask)
92 stq_le_p(&s->mmior[addr], val);
93 stq_le_p(&s->romask[addr], romask);
94 stq_le_p(&s->w1cmask[addr], w1cmask);
97 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
99 return lduw_le_p(&s->mmior[addr]);
102 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
104 return ldl_le_p(&s->mmior[addr]);
107 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
109 return ldq_le_p(&s->mmior[addr]);
112 /* internal write */
113 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
115 stq_le_p(&s->mmior[addr], val);
118 /* external write */
119 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
121 uint16_t romask = lduw_le_p(&s->romask[addr]);
122 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
123 uint16_t oldval = lduw_le_p(&s->mmior[addr]);
124 stw_le_p(&s->mmior[addr],
125 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
128 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
130 uint32_t romask = ldl_le_p(&s->romask[addr]);
131 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
132 uint32_t oldval = ldl_le_p(&s->mmior[addr]);
133 stl_le_p(&s->mmior[addr],
134 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
137 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
139 uint64_t romask = ldq_le_p(&s->romask[addr]);
140 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
141 uint32_t oldval = ldq_le_p(&s->mmior[addr]);
142 stq_le_p(&s->mmior[addr],
143 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
146 /* OR a 64-bit register with a 64-bit value */
147 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
149 return amdvi_readq(s, addr) | val;
152 /* OR a 64-bit register with a 64-bit value storing result in the register */
153 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
155 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
158 /* AND a 64-bit register with a 64-bit value storing result in the register */
159 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
161 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
164 static void amdvi_generate_msi_interrupt(AMDVIState *s)
166 MSIMessage msg = {};
167 MemTxAttrs attrs = {
168 .requester_id = pci_requester_id(&s->pci.dev)
171 if (msi_enabled(&s->pci.dev)) {
172 msg = msi_get_message(&s->pci.dev, 0);
173 address_space_stl_le(&address_space_memory, msg.address, msg.data,
174 attrs, NULL);
178 static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
180 /* event logging not enabled */
181 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
182 AMDVI_MMIO_STATUS_EVT_OVF)) {
183 return;
186 /* event log buffer full */
187 if (s->evtlog_tail >= s->evtlog_len) {
188 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
189 /* generate interrupt */
190 amdvi_generate_msi_interrupt(s);
191 return;
194 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
195 evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) {
196 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
199 s->evtlog_tail += AMDVI_EVENT_LEN;
200 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
201 amdvi_generate_msi_interrupt(s);
204 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
205 int length)
207 int index = start / 64, bitpos = start % 64;
208 uint64_t mask = MAKE_64BIT_MASK(start, length);
209 buffer[index] &= ~mask;
210 buffer[index] |= (value << bitpos) & mask;
213 * AMDVi event structure
214 * 0:15 -> DeviceID
215 * 48:63 -> event type + miscellaneous info
216 * 64:127 -> related address
218 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
219 uint16_t info)
221 evt[0] = 0;
222 evt[1] = 0;
224 amdvi_setevent_bits(evt, devid, 0, 16);
225 amdvi_setevent_bits(evt, info, 48, 16);
226 amdvi_setevent_bits(evt, addr, 64, 64);
228 /* log an error encountered during a page walk
230 * @addr: virtual address in translation request
232 static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
233 hwaddr addr, uint16_t info)
235 uint64_t evt[2];
237 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
238 amdvi_encode_event(evt, devid, addr, info);
239 amdvi_log_event(s, evt);
240 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
241 PCI_STATUS_SIG_TARGET_ABORT);
244 * log a master abort accessing device table
245 * @devtab : address of device table entry
246 * @info : error flags
248 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
249 hwaddr devtab, uint16_t info)
251 uint64_t evt[2];
253 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
255 amdvi_encode_event(evt, devid, devtab, info);
256 amdvi_log_event(s, evt);
257 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
258 PCI_STATUS_SIG_TARGET_ABORT);
260 /* log an event trying to access command buffer
261 * @addr : address that couldn't be accessed
263 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
265 uint64_t evt[2];
266 uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
268 amdvi_encode_event(evt, 0, addr, info);
269 amdvi_log_event(s, evt);
270 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
271 PCI_STATUS_SIG_TARGET_ABORT);
273 /* log an illegal command event
274 * @addr : address of illegal command
276 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
277 hwaddr addr)
279 uint64_t evt[2];
281 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
282 amdvi_encode_event(evt, 0, addr, info);
283 amdvi_log_event(s, evt);
285 /* log an error accessing device table
287 * @devid : device owning the table entry
288 * @devtab : address of device table entry
289 * @info : error flags
291 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
292 hwaddr addr, uint16_t info)
294 uint64_t evt[2];
296 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
297 amdvi_encode_event(evt, devid, addr, info);
298 amdvi_log_event(s, evt);
300 /* log an error accessing a PTE entry
301 * @addr : address that couldn't be accessed
303 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
304 hwaddr addr, uint16_t info)
306 uint64_t evt[2];
308 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
309 amdvi_encode_event(evt, devid, addr, info);
310 amdvi_log_event(s, evt);
311 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
312 PCI_STATUS_SIG_TARGET_ABORT);
315 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
317 return *((const uint64_t *)v1) == *((const uint64_t *)v2);
320 static guint amdvi_uint64_hash(gconstpointer v)
322 return (guint)*(const uint64_t *)v;
325 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
326 uint64_t devid)
328 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
329 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
330 return g_hash_table_lookup(s->iotlb, &key);
333 static void amdvi_iotlb_reset(AMDVIState *s)
335 assert(s->iotlb);
336 trace_amdvi_iotlb_reset();
337 g_hash_table_remove_all(s->iotlb);
340 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
341 gpointer user_data)
343 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
344 uint16_t devid = *(uint16_t *)user_data;
345 return entry->devid == devid;
348 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
349 uint64_t devid)
351 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
352 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
353 g_hash_table_remove(s->iotlb, &key);
356 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
357 uint64_t gpa, IOMMUTLBEntry to_cache,
358 uint16_t domid)
360 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
361 uint64_t *key = g_new(uint64_t, 1);
362 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
364 /* don't cache erroneous translations */
365 if (to_cache.perm != IOMMU_NONE) {
366 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
367 PCI_FUNC(devid), gpa, to_cache.translated_addr);
369 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
370 amdvi_iotlb_reset(s);
373 entry->domid = domid;
374 entry->perms = to_cache.perm;
375 entry->translated_addr = to_cache.translated_addr;
376 entry->page_mask = to_cache.addr_mask;
377 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
378 g_hash_table_replace(s->iotlb, key, entry);
382 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
384 /* pad the last 3 bits */
385 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
386 uint64_t data = cpu_to_le64(cmd[1]);
388 if (extract64(cmd[0], 52, 8)) {
389 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
390 s->cmdbuf + s->cmdbuf_head);
392 if (extract64(cmd[0], 0, 1)) {
393 if (dma_memory_write(&address_space_memory, addr, &data,
394 AMDVI_COMPLETION_DATA_SIZE,
395 MEMTXATTRS_UNSPECIFIED)) {
396 trace_amdvi_completion_wait_fail(addr);
399 /* set completion interrupt */
400 if (extract64(cmd[0], 1, 1)) {
401 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
402 /* generate interrupt */
403 amdvi_generate_msi_interrupt(s);
405 trace_amdvi_completion_wait(addr, data);
408 /* log error without aborting since linux seems to be using reserved bits */
409 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
411 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
413 /* This command should invalidate internal caches of which there isn't */
414 if (extract64(cmd[0], 16, 44) || cmd[1]) {
415 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
416 s->cmdbuf + s->cmdbuf_head);
418 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
419 PCI_FUNC(devid));
422 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
424 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) ||
425 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
426 || extract64(cmd[1], 48, 16)) {
427 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
428 s->cmdbuf + s->cmdbuf_head);
430 trace_amdvi_ppr_exec();
433 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
435 if (extract64(cmd[0], 0, 60) || cmd[1]) {
436 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
437 s->cmdbuf + s->cmdbuf_head);
440 amdvi_iotlb_reset(s);
441 trace_amdvi_all_inval();
444 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
445 gpointer user_data)
447 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
448 uint16_t domid = *(uint16_t *)user_data;
449 return entry->domid == domid;
452 /* we don't have devid - we can't remove pages by address */
453 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
455 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
457 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
458 extract64(cmd[1], 3, 9)) {
459 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
460 s->cmdbuf + s->cmdbuf_head);
463 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
464 &domid);
465 trace_amdvi_pages_inval(domid);
468 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
470 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) ||
471 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
472 extract64(cmd[1], 5, 7)) {
473 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
474 s->cmdbuf + s->cmdbuf_head);
477 trace_amdvi_prefetch_pages();
480 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
482 if (extract64(cmd[0], 16, 44) || cmd[1]) {
483 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
484 s->cmdbuf + s->cmdbuf_head);
485 return;
488 trace_amdvi_intr_inval();
491 /* FIXME: Try to work with the specified size instead of all the pages
492 * when the S bit is on
494 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
497 uint16_t devid = extract64(cmd[0], 0, 16);
498 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
499 extract64(cmd[1], 6, 6)) {
500 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
501 s->cmdbuf + s->cmdbuf_head);
502 return;
505 if (extract64(cmd[1], 0, 1)) {
506 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
507 &devid);
508 } else {
509 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
510 cpu_to_le16(extract64(cmd[1], 0, 16)));
512 trace_amdvi_iotlb_inval();
515 /* not honouring reserved bits is regarded as an illegal command */
516 static void amdvi_cmdbuf_exec(AMDVIState *s)
518 uint64_t cmd[2];
520 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
521 cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) {
522 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
523 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
524 return;
527 switch (extract64(cmd[0], 60, 4)) {
528 case AMDVI_CMD_COMPLETION_WAIT:
529 amdvi_completion_wait(s, cmd);
530 break;
531 case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
532 amdvi_inval_devtab_entry(s, cmd);
533 break;
534 case AMDVI_CMD_INVAL_AMDVI_PAGES:
535 amdvi_inval_pages(s, cmd);
536 break;
537 case AMDVI_CMD_INVAL_IOTLB_PAGES:
538 iommu_inval_iotlb(s, cmd);
539 break;
540 case AMDVI_CMD_INVAL_INTR_TABLE:
541 amdvi_inval_inttable(s, cmd);
542 break;
543 case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
544 amdvi_prefetch_pages(s, cmd);
545 break;
546 case AMDVI_CMD_COMPLETE_PPR_REQUEST:
547 amdvi_complete_ppr(s, cmd);
548 break;
549 case AMDVI_CMD_INVAL_AMDVI_ALL:
550 amdvi_inval_all(s, cmd);
551 break;
552 default:
553 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
554 /* log illegal command */
555 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
556 s->cmdbuf + s->cmdbuf_head);
560 static void amdvi_cmdbuf_run(AMDVIState *s)
562 if (!s->cmdbuf_enabled) {
563 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
564 return;
567 /* check if there is work to do. */
568 while (s->cmdbuf_head != s->cmdbuf_tail) {
569 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
570 amdvi_cmdbuf_exec(s);
571 s->cmdbuf_head += AMDVI_COMMAND_SIZE;
572 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
574 /* wrap head pointer */
575 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
576 s->cmdbuf_head = 0;
581 static void amdvi_mmio_trace(hwaddr addr, unsigned size)
583 uint8_t index = (addr & ~0x2000) / 8;
585 if ((addr & 0x2000)) {
586 /* high table */
587 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
588 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
589 } else {
590 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
591 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
595 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
597 AMDVIState *s = opaque;
599 uint64_t val = -1;
600 if (addr + size > AMDVI_MMIO_SIZE) {
601 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
602 return (uint64_t)-1;
605 if (size == 2) {
606 val = amdvi_readw(s, addr);
607 } else if (size == 4) {
608 val = amdvi_readl(s, addr);
609 } else if (size == 8) {
610 val = amdvi_readq(s, addr);
612 amdvi_mmio_trace(addr, size);
614 return val;
617 static void amdvi_handle_control_write(AMDVIState *s)
619 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
620 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
622 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
623 s->evtlog_enabled = s->enabled && !!(control &
624 AMDVI_MMIO_CONTROL_EVENTLOGEN);
626 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
627 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
628 s->cmdbuf_enabled = s->enabled && !!(control &
629 AMDVI_MMIO_CONTROL_CMDBUFLEN);
630 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
632 /* update the flags depending on the control register */
633 if (s->cmdbuf_enabled) {
634 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
635 } else {
636 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
638 if (s->evtlog_enabled) {
639 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
640 } else {
641 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
644 trace_amdvi_control_status(control);
645 amdvi_cmdbuf_run(s);
648 static inline void amdvi_handle_devtab_write(AMDVIState *s)
651 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
652 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
654 /* set device table length */
655 s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
656 (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
657 AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
660 static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
662 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
663 & AMDVI_MMIO_CMDBUF_HEAD_MASK;
664 amdvi_cmdbuf_run(s);
667 static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
669 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
670 & AMDVI_MMIO_CMDBUF_BASE_MASK;
671 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
672 & AMDVI_MMIO_CMDBUF_SIZE_MASK);
673 s->cmdbuf_head = s->cmdbuf_tail = 0;
676 static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
678 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
679 & AMDVI_MMIO_CMDBUF_TAIL_MASK;
680 amdvi_cmdbuf_run(s);
683 static inline void amdvi_handle_excllim_write(AMDVIState *s)
685 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
686 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
687 AMDVI_MMIO_EXCL_LIMIT_LOW;
690 static inline void amdvi_handle_evtbase_write(AMDVIState *s)
692 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
693 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
694 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
695 & AMDVI_MMIO_EVTLOG_SIZE_MASK);
698 static inline void amdvi_handle_evttail_write(AMDVIState *s)
700 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
701 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
704 static inline void amdvi_handle_evthead_write(AMDVIState *s)
706 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
707 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
710 static inline void amdvi_handle_pprbase_write(AMDVIState *s)
712 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
713 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
714 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
715 & AMDVI_MMIO_PPRLOG_SIZE_MASK);
718 static inline void amdvi_handle_pprhead_write(AMDVIState *s)
720 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
721 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
724 static inline void amdvi_handle_pprtail_write(AMDVIState *s)
726 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
727 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
730 /* FIXME: something might go wrong if System Software writes in chunks
731 * of one byte but linux writes in chunks of 4 bytes so currently it
732 * works correctly with linux but will definitely be busted if software
733 * reads/writes 8 bytes
735 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
736 hwaddr addr)
738 if (size == 2) {
739 amdvi_writew(s, addr, val);
740 } else if (size == 4) {
741 amdvi_writel(s, addr, val);
742 } else if (size == 8) {
743 amdvi_writeq(s, addr, val);
747 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
748 unsigned size)
750 AMDVIState *s = opaque;
751 unsigned long offset = addr & 0x07;
753 if (addr + size > AMDVI_MMIO_SIZE) {
754 trace_amdvi_mmio_write("error: addr outside region: max ",
755 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
756 return;
759 amdvi_mmio_trace(addr, size);
760 switch (addr & ~0x07) {
761 case AMDVI_MMIO_CONTROL:
762 amdvi_mmio_reg_write(s, size, val, addr);
763 amdvi_handle_control_write(s);
764 break;
765 case AMDVI_MMIO_DEVICE_TABLE:
766 amdvi_mmio_reg_write(s, size, val, addr);
767 /* set device table address
768 * This also suffers from inability to tell whether software
769 * is done writing
771 if (offset || (size == 8)) {
772 amdvi_handle_devtab_write(s);
774 break;
775 case AMDVI_MMIO_COMMAND_HEAD:
776 amdvi_mmio_reg_write(s, size, val, addr);
777 amdvi_handle_cmdhead_write(s);
778 break;
779 case AMDVI_MMIO_COMMAND_BASE:
780 amdvi_mmio_reg_write(s, size, val, addr);
781 /* FIXME - make sure System Software has finished writing in case
782 * it writes in chucks less than 8 bytes in a robust way.As for
783 * now, this hacks works for the linux driver
785 if (offset || (size == 8)) {
786 amdvi_handle_cmdbase_write(s);
788 break;
789 case AMDVI_MMIO_COMMAND_TAIL:
790 amdvi_mmio_reg_write(s, size, val, addr);
791 amdvi_handle_cmdtail_write(s);
792 break;
793 case AMDVI_MMIO_EVENT_BASE:
794 amdvi_mmio_reg_write(s, size, val, addr);
795 amdvi_handle_evtbase_write(s);
796 break;
797 case AMDVI_MMIO_EVENT_HEAD:
798 amdvi_mmio_reg_write(s, size, val, addr);
799 amdvi_handle_evthead_write(s);
800 break;
801 case AMDVI_MMIO_EVENT_TAIL:
802 amdvi_mmio_reg_write(s, size, val, addr);
803 amdvi_handle_evttail_write(s);
804 break;
805 case AMDVI_MMIO_EXCL_LIMIT:
806 amdvi_mmio_reg_write(s, size, val, addr);
807 amdvi_handle_excllim_write(s);
808 break;
809 /* PPR log base - unused for now */
810 case AMDVI_MMIO_PPR_BASE:
811 amdvi_mmio_reg_write(s, size, val, addr);
812 amdvi_handle_pprbase_write(s);
813 break;
814 /* PPR log head - also unused for now */
815 case AMDVI_MMIO_PPR_HEAD:
816 amdvi_mmio_reg_write(s, size, val, addr);
817 amdvi_handle_pprhead_write(s);
818 break;
819 /* PPR log tail - unused for now */
820 case AMDVI_MMIO_PPR_TAIL:
821 amdvi_mmio_reg_write(s, size, val, addr);
822 amdvi_handle_pprtail_write(s);
823 break;
827 static inline uint64_t amdvi_get_perms(uint64_t entry)
829 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
830 AMDVI_DEV_PERM_SHIFT;
833 /* validate that reserved bits are honoured */
834 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
835 uint64_t *dte)
837 if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
838 || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
839 || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
840 amdvi_log_illegaldevtab_error(s, devid,
841 s->devtab +
842 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
843 return false;
846 return true;
849 /* get a device table entry given the devid */
850 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
852 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
854 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
855 AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
856 trace_amdvi_dte_get_fail(s->devtab, offset);
857 /* log error accessing dte */
858 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
859 return false;
862 *entry = le64_to_cpu(*entry);
863 if (!amdvi_validate_dte(s, devid, entry)) {
864 trace_amdvi_invalid_dte(entry[0]);
865 return false;
868 return true;
871 /* get pte translation mode */
872 static inline uint8_t get_pte_translation_mode(uint64_t pte)
874 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
877 static inline uint64_t pte_override_page_mask(uint64_t pte)
879 uint8_t page_mask = 13;
880 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
881 /* find the first zero bit */
882 while (addr & 1) {
883 page_mask++;
884 addr = addr >> 1;
887 return ~((1ULL << page_mask) - 1);
890 static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
892 return ~((1UL << ((oldlevel * 9) + 3)) - 1);
895 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
896 uint16_t devid)
898 uint64_t pte;
900 if (dma_memory_read(&address_space_memory, pte_addr,
901 &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
902 trace_amdvi_get_pte_hwerror(pte_addr);
903 amdvi_log_pagetab_error(s, devid, pte_addr, 0);
904 pte = 0;
905 return pte;
908 pte = le64_to_cpu(pte);
909 return pte;
912 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
913 IOMMUTLBEntry *ret, unsigned perms,
914 hwaddr addr)
916 unsigned level, present, pte_perms, oldlevel;
917 uint64_t pte = dte[0], pte_addr, page_mask;
919 /* make sure the DTE has TV = 1 */
920 if (pte & AMDVI_DEV_TRANSLATION_VALID) {
921 level = get_pte_translation_mode(pte);
922 if (level >= 7) {
923 trace_amdvi_mode_invalid(level, addr);
924 return;
926 if (level == 0) {
927 goto no_remap;
930 /* we are at the leaf page table or page table encodes a huge page */
931 do {
932 pte_perms = amdvi_get_perms(pte);
933 present = pte & 1;
934 if (!present || perms != (perms & pte_perms)) {
935 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
936 trace_amdvi_page_fault(addr);
937 return;
940 /* go to the next lower level */
941 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
942 /* add offset and load pte */
943 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
944 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
945 if (!pte) {
946 return;
948 oldlevel = level;
949 level = get_pte_translation_mode(pte);
950 } while (level > 0 && level < 7);
952 if (level == 0x7) {
953 page_mask = pte_override_page_mask(pte);
954 } else {
955 page_mask = pte_get_page_mask(oldlevel);
958 /* get access permissions from pte */
959 ret->iova = addr & page_mask;
960 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
961 ret->addr_mask = ~page_mask;
962 ret->perm = amdvi_get_perms(pte);
963 return;
965 no_remap:
966 ret->iova = addr & AMDVI_PAGE_MASK_4K;
967 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
968 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
969 ret->perm = amdvi_get_perms(pte);
972 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
973 bool is_write, IOMMUTLBEntry *ret)
975 AMDVIState *s = as->iommu_state;
976 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
977 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
978 uint64_t entry[4];
980 if (iotlb_entry) {
981 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
982 PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
983 ret->iova = addr & ~iotlb_entry->page_mask;
984 ret->translated_addr = iotlb_entry->translated_addr;
985 ret->addr_mask = iotlb_entry->page_mask;
986 ret->perm = iotlb_entry->perms;
987 return;
990 if (!amdvi_get_dte(s, devid, entry)) {
991 return;
994 /* devices with V = 0 are not translated */
995 if (!(entry[0] & AMDVI_DEV_VALID)) {
996 goto out;
999 amdvi_page_walk(as, entry, ret,
1000 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
1002 amdvi_update_iotlb(s, devid, addr, *ret,
1003 entry[1] & AMDVI_DEV_DOMID_ID_MASK);
1004 return;
1006 out:
1007 ret->iova = addr & AMDVI_PAGE_MASK_4K;
1008 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
1009 ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
1010 ret->perm = IOMMU_RW;
1013 static inline bool amdvi_is_interrupt_addr(hwaddr addr)
1015 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
1018 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
1019 IOMMUAccessFlags flag, int iommu_idx)
1021 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1022 AMDVIState *s = as->iommu_state;
1023 IOMMUTLBEntry ret = {
1024 .target_as = &address_space_memory,
1025 .iova = addr,
1026 .translated_addr = 0,
1027 .addr_mask = ~(hwaddr)0,
1028 .perm = IOMMU_NONE
1031 if (!s->enabled) {
1032 /* AMDVI disabled - corresponds to iommu=off not
1033 * failure to provide any parameter
1035 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1036 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1037 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1038 ret.perm = IOMMU_RW;
1039 return ret;
1040 } else if (amdvi_is_interrupt_addr(addr)) {
1041 ret.iova = addr & AMDVI_PAGE_MASK_4K;
1042 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1043 ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1044 ret.perm = IOMMU_WO;
1045 return ret;
1048 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
1049 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
1050 PCI_FUNC(as->devfn), addr, ret.translated_addr);
1051 return ret;
1054 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1055 union irte *irte, uint16_t devid)
1057 uint64_t irte_root, offset;
1059 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1060 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
1062 trace_amdvi_ir_irte(irte_root, offset);
1064 if (dma_memory_read(&address_space_memory, irte_root + offset,
1065 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1066 trace_amdvi_ir_err("failed to get irte");
1067 return -AMDVI_IR_GET_IRTE;
1070 trace_amdvi_ir_irte_val(irte->val);
1072 return 0;
1075 static int amdvi_int_remap_legacy(AMDVIState *iommu,
1076 MSIMessage *origin,
1077 MSIMessage *translated,
1078 uint64_t *dte,
1079 X86IOMMUIrq *irq,
1080 uint16_t sid)
1082 int ret;
1083 union irte irte;
1085 /* get interrupt remapping table */
1086 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
1087 if (ret < 0) {
1088 return ret;
1091 if (!irte.fields.valid) {
1092 trace_amdvi_ir_target_abort("RemapEn is disabled");
1093 return -AMDVI_IR_TARGET_ABORT;
1096 if (irte.fields.guest_mode) {
1097 error_report_once("guest mode is not zero");
1098 return -AMDVI_IR_ERR;
1101 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1102 error_report_once("reserved int_type");
1103 return -AMDVI_IR_ERR;
1106 irq->delivery_mode = irte.fields.int_type;
1107 irq->vector = irte.fields.vector;
1108 irq->dest_mode = irte.fields.dm;
1109 irq->redir_hint = irte.fields.rq_eoi;
1110 irq->dest = irte.fields.destination;
1112 return 0;
1115 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1116 struct irte_ga *irte, uint16_t devid)
1118 uint64_t irte_root, offset;
1120 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1121 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
1122 trace_amdvi_ir_irte(irte_root, offset);
1124 if (dma_memory_read(&address_space_memory, irte_root + offset,
1125 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1126 trace_amdvi_ir_err("failed to get irte_ga");
1127 return -AMDVI_IR_GET_IRTE;
1130 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
1131 return 0;
1134 static int amdvi_int_remap_ga(AMDVIState *iommu,
1135 MSIMessage *origin,
1136 MSIMessage *translated,
1137 uint64_t *dte,
1138 X86IOMMUIrq *irq,
1139 uint16_t sid)
1141 int ret;
1142 struct irte_ga irte;
1144 /* get interrupt remapping table */
1145 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
1146 if (ret < 0) {
1147 return ret;
1150 if (!irte.lo.fields_remap.valid) {
1151 trace_amdvi_ir_target_abort("RemapEn is disabled");
1152 return -AMDVI_IR_TARGET_ABORT;
1155 if (irte.lo.fields_remap.guest_mode) {
1156 error_report_once("guest mode is not zero");
1157 return -AMDVI_IR_ERR;
1160 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1161 error_report_once("reserved int_type is set");
1162 return -AMDVI_IR_ERR;
1165 irq->delivery_mode = irte.lo.fields_remap.int_type;
1166 irq->vector = irte.hi.fields.vector;
1167 irq->dest_mode = irte.lo.fields_remap.dm;
1168 irq->redir_hint = irte.lo.fields_remap.rq_eoi;
1169 if (iommu->xtsup) {
1170 irq->dest = irte.lo.fields_remap.destination |
1171 (irte.hi.fields.destination_hi << 24);
1172 } else {
1173 irq->dest = irte.lo.fields_remap.destination & 0xff;
1176 return 0;
1179 static int __amdvi_int_remap_msi(AMDVIState *iommu,
1180 MSIMessage *origin,
1181 MSIMessage *translated,
1182 uint64_t *dte,
1183 X86IOMMUIrq *irq,
1184 uint16_t sid)
1186 int ret;
1187 uint8_t int_ctl;
1189 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
1190 trace_amdvi_ir_intctl(int_ctl);
1192 switch (int_ctl) {
1193 case AMDVI_IR_INTCTL_PASS:
1194 memcpy(translated, origin, sizeof(*origin));
1195 return 0;
1196 case AMDVI_IR_INTCTL_REMAP:
1197 break;
1198 case AMDVI_IR_INTCTL_ABORT:
1199 trace_amdvi_ir_target_abort("int_ctl abort");
1200 return -AMDVI_IR_TARGET_ABORT;
1201 default:
1202 trace_amdvi_ir_err("int_ctl reserved");
1203 return -AMDVI_IR_ERR;
1206 if (iommu->ga_enabled) {
1207 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
1208 } else {
1209 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
1212 return ret;
1215 /* Interrupt remapping for MSI/MSI-X entry */
1216 static int amdvi_int_remap_msi(AMDVIState *iommu,
1217 MSIMessage *origin,
1218 MSIMessage *translated,
1219 uint16_t sid)
1221 int ret = 0;
1222 uint64_t pass = 0;
1223 uint64_t dte[4] = { 0 };
1224 X86IOMMUIrq irq = { 0 };
1225 uint8_t dest_mode, delivery_mode;
1227 assert(origin && translated);
1230 * When IOMMU is enabled, interrupt remap request will come either from
1231 * IO-APIC or PCI device. If interrupt is from PCI device then it will
1232 * have a valid requester id but if the interrupt is from IO-APIC
1233 * then requester id will be invalid.
1235 if (sid == X86_IOMMU_SID_INVALID) {
1236 sid = AMDVI_IOAPIC_SB_DEVID;
1239 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
1241 /* check if device table entry is set before we go further. */
1242 if (!iommu || !iommu->devtab_len) {
1243 memcpy(translated, origin, sizeof(*origin));
1244 goto out;
1247 if (!amdvi_get_dte(iommu, sid, dte)) {
1248 return -AMDVI_IR_ERR;
1251 /* Check if IR is enabled in DTE */
1252 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
1253 memcpy(translated, origin, sizeof(*origin));
1254 goto out;
1257 /* validate that we are configure with intremap=on */
1258 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
1259 trace_amdvi_err("Interrupt remapping is enabled in the guest but "
1260 "not in the host. Use intremap=on to enable interrupt "
1261 "remapping in amd-iommu.");
1262 return -AMDVI_IR_ERR;
1265 if (origin->address < AMDVI_INT_ADDR_FIRST ||
1266 origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
1267 trace_amdvi_err("MSI is not from IOAPIC.");
1268 return -AMDVI_IR_ERR;
1272 * The MSI data register [10:8] are used to get the upstream interrupt type.
1274 * See MSI/MSI-X format:
1275 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
1276 * (page 5)
1278 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
1280 switch (delivery_mode) {
1281 case AMDVI_IOAPIC_INT_TYPE_FIXED:
1282 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
1283 trace_amdvi_ir_delivery_mode("fixed/arbitrated");
1284 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
1285 if (ret < 0) {
1286 goto remap_fail;
1287 } else {
1288 /* Translate IRQ to MSI messages */
1289 x86_iommu_irq_to_msi_message(&irq, translated);
1290 goto out;
1292 break;
1293 case AMDVI_IOAPIC_INT_TYPE_SMI:
1294 error_report("SMI is not supported!");
1295 ret = -AMDVI_IR_ERR;
1296 break;
1297 case AMDVI_IOAPIC_INT_TYPE_NMI:
1298 pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
1299 trace_amdvi_ir_delivery_mode("nmi");
1300 break;
1301 case AMDVI_IOAPIC_INT_TYPE_INIT:
1302 pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
1303 trace_amdvi_ir_delivery_mode("init");
1304 break;
1305 case AMDVI_IOAPIC_INT_TYPE_EINT:
1306 pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
1307 trace_amdvi_ir_delivery_mode("eint");
1308 break;
1309 default:
1310 trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
1311 ret = -AMDVI_IR_ERR;
1312 break;
1315 if (ret < 0) {
1316 goto remap_fail;
1320 * The MSI address register bit[2] is used to get the destination
1321 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
1322 * only.
1324 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
1325 if (dest_mode) {
1326 trace_amdvi_ir_err("invalid dest_mode");
1327 ret = -AMDVI_IR_ERR;
1328 goto remap_fail;
1331 if (pass) {
1332 memcpy(translated, origin, sizeof(*origin));
1333 } else {
1334 trace_amdvi_ir_err("passthrough is not enabled");
1335 ret = -AMDVI_IR_ERR;
1336 goto remap_fail;
1339 out:
1340 trace_amdvi_ir_remap_msi(origin->address, origin->data,
1341 translated->address, translated->data);
1342 return 0;
1344 remap_fail:
1345 return ret;
1348 static int amdvi_int_remap(X86IOMMUState *iommu,
1349 MSIMessage *origin,
1350 MSIMessage *translated,
1351 uint16_t sid)
1353 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
1354 translated, sid);
1357 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
1358 uint64_t value, unsigned size,
1359 MemTxAttrs attrs)
1361 int ret;
1362 MSIMessage from = { 0, 0 }, to = { 0, 0 };
1363 uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
1365 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
1366 from.data = (uint32_t) value;
1368 trace_amdvi_mem_ir_write_req(addr, value, size);
1370 if (!attrs.unspecified) {
1371 /* We have explicit Source ID */
1372 sid = attrs.requester_id;
1375 ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
1376 if (ret < 0) {
1377 /* TODO: log the event using IOMMU log event interface */
1378 error_report_once("failed to remap interrupt from devid 0x%x", sid);
1379 return MEMTX_ERROR;
1382 apic_get_class(NULL)->send_msi(&to);
1384 trace_amdvi_mem_ir_write(to.address, to.data);
1385 return MEMTX_OK;
1388 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
1389 uint64_t *data, unsigned size,
1390 MemTxAttrs attrs)
1392 return MEMTX_OK;
1395 static const MemoryRegionOps amdvi_ir_ops = {
1396 .read_with_attrs = amdvi_mem_ir_read,
1397 .write_with_attrs = amdvi_mem_ir_write,
1398 .endianness = DEVICE_LITTLE_ENDIAN,
1399 .impl = {
1400 .min_access_size = 4,
1401 .max_access_size = 4,
1403 .valid = {
1404 .min_access_size = 4,
1405 .max_access_size = 4,
1409 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1411 char name[128];
1412 AMDVIState *s = opaque;
1413 AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
1414 int bus_num = pci_bus_num(bus);
1416 iommu_as = s->address_spaces[bus_num];
1418 /* allocate memory during the first run */
1419 if (!iommu_as) {
1420 iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
1421 s->address_spaces[bus_num] = iommu_as;
1424 /* set up AMD-Vi region */
1425 if (!iommu_as[devfn]) {
1426 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
1428 iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
1429 iommu_as[devfn]->bus_num = (uint8_t)bus_num;
1430 iommu_as[devfn]->devfn = (uint8_t)devfn;
1431 iommu_as[devfn]->iommu_state = s;
1433 amdvi_dev_as = iommu_as[devfn];
1436 * Memory region relationships looks like (Address range shows
1437 * only lower 32 bits to make it short in length...):
1439 * |-----------------+-------------------+----------|
1440 * | Name | Address range | Priority |
1441 * |-----------------+-------------------+----------+
1442 * | amdvi_root | 00000000-ffffffff | 0 |
1443 * | amdvi_iommu | 00000000-ffffffff | 1 |
1444 * | amdvi_iommu_ir | fee00000-feefffff | 64 |
1445 * |-----------------+-------------------+----------|
1447 memory_region_init_iommu(&amdvi_dev_as->iommu,
1448 sizeof(amdvi_dev_as->iommu),
1449 TYPE_AMD_IOMMU_MEMORY_REGION,
1450 OBJECT(s),
1451 "amd_iommu", UINT64_MAX);
1452 memory_region_init(&amdvi_dev_as->root, OBJECT(s),
1453 "amdvi_root", UINT64_MAX);
1454 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
1455 memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s),
1456 &amdvi_ir_ops, s, "amd_iommu_ir",
1457 AMDVI_INT_ADDR_SIZE);
1458 memory_region_add_subregion_overlap(&amdvi_dev_as->root,
1459 AMDVI_INT_ADDR_FIRST,
1460 &amdvi_dev_as->iommu_ir,
1461 64);
1462 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1463 MEMORY_REGION(&amdvi_dev_as->iommu),
1466 return &iommu_as[devfn]->as;
1469 static const PCIIOMMUOps amdvi_iommu_ops = {
1470 .get_address_space = amdvi_host_dma_iommu,
1473 static const MemoryRegionOps mmio_mem_ops = {
1474 .read = amdvi_mmio_read,
1475 .write = amdvi_mmio_write,
1476 .endianness = DEVICE_LITTLE_ENDIAN,
1477 .impl = {
1478 .min_access_size = 1,
1479 .max_access_size = 8,
1480 .unaligned = false,
1482 .valid = {
1483 .min_access_size = 1,
1484 .max_access_size = 8,
1488 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
1489 IOMMUNotifierFlag old,
1490 IOMMUNotifierFlag new,
1491 Error **errp)
1493 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1495 if (new & IOMMU_NOTIFIER_MAP) {
1496 error_setg(errp,
1497 "device %02x.%02x.%x requires iommu notifier which is not "
1498 "currently supported", as->bus_num, PCI_SLOT(as->devfn),
1499 PCI_FUNC(as->devfn));
1500 return -EINVAL;
1502 return 0;
1505 static void amdvi_init(AMDVIState *s)
1507 amdvi_iotlb_reset(s);
1509 s->devtab_len = 0;
1510 s->cmdbuf_len = 0;
1511 s->cmdbuf_head = 0;
1512 s->cmdbuf_tail = 0;
1513 s->evtlog_head = 0;
1514 s->evtlog_tail = 0;
1515 s->excl_enabled = false;
1516 s->excl_allow = false;
1517 s->mmio_enabled = false;
1518 s->enabled = false;
1519 s->ats_enabled = false;
1520 s->cmdbuf_enabled = false;
1522 /* reset MMIO */
1523 memset(s->mmior, 0, AMDVI_MMIO_SIZE);
1524 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
1525 amdvi_extended_feature_register(s),
1526 0xffffffffffffffef, 0);
1527 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
1530 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
1532 AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
1533 int ret;
1535 ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
1536 AMDVI_CAPAB_SIZE, errp);
1537 if (ret < 0) {
1538 return;
1540 s->capab_offset = ret;
1542 ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
1543 AMDVI_CAPAB_REG_SIZE, errp);
1544 if (ret < 0) {
1545 return;
1547 ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
1548 AMDVI_CAPAB_REG_SIZE, errp);
1549 if (ret < 0) {
1550 return;
1553 if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
1554 return;
1557 /* reset device ident */
1558 pci_config_set_prog_interface(pdev->config, 0);
1560 /* reset AMDVI specific capabilities, all r/o */
1561 pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
1562 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
1563 AMDVI_BASE_ADDR & ~(0xffff0000));
1564 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
1565 (AMDVI_BASE_ADDR & ~(0xffff)) >> 16);
1566 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
1567 0xff000000);
1568 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
1569 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
1570 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
1573 static void amdvi_sysbus_reset(DeviceState *dev)
1575 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1577 msi_reset(&s->pci.dev);
1578 amdvi_init(s);
1581 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
1583 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1584 MachineState *ms = MACHINE(qdev_get_machine());
1585 PCMachineState *pcms = PC_MACHINE(ms);
1586 X86MachineState *x86ms = X86_MACHINE(ms);
1587 PCIBus *bus = pcms->pcibus;
1589 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
1590 amdvi_uint64_equal, g_free, g_free);
1592 /* This device should take care of IOMMU PCI properties */
1593 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
1594 return;
1597 /* Pseudo address space under root PCI bus. */
1598 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
1600 /* set up MMIO */
1601 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
1602 AMDVI_MMIO_SIZE);
1603 memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
1604 &s->mmio);
1605 pci_setup_iommu(bus, &amdvi_iommu_ops, s);
1606 amdvi_init(s);
1609 static Property amdvi_properties[] = {
1610 DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
1611 DEFINE_PROP_END_OF_LIST(),
1614 static const VMStateDescription vmstate_amdvi_sysbus = {
1615 .name = "amd-iommu",
1616 .unmigratable = 1
1619 static void amdvi_sysbus_instance_init(Object *klass)
1621 AMDVIState *s = AMD_IOMMU_DEVICE(klass);
1623 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
1626 static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
1628 DeviceClass *dc = DEVICE_CLASS(klass);
1629 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
1631 dc->reset = amdvi_sysbus_reset;
1632 dc->vmsd = &vmstate_amdvi_sysbus;
1633 dc->hotpluggable = false;
1634 dc_class->realize = amdvi_sysbus_realize;
1635 dc_class->int_remap = amdvi_int_remap;
1636 /* Supported by the pc-q35-* machine types */
1637 dc->user_creatable = true;
1638 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1639 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1640 device_class_set_props(dc, amdvi_properties);
1643 static const TypeInfo amdvi_sysbus = {
1644 .name = TYPE_AMD_IOMMU_DEVICE,
1645 .parent = TYPE_X86_IOMMU_DEVICE,
1646 .instance_size = sizeof(AMDVIState),
1647 .instance_init = amdvi_sysbus_instance_init,
1648 .class_init = amdvi_sysbus_class_init
1651 static void amdvi_pci_class_init(ObjectClass *klass, void *data)
1653 DeviceClass *dc = DEVICE_CLASS(klass);
1654 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1656 k->vendor_id = PCI_VENDOR_ID_AMD;
1657 k->class_id = 0x0806;
1658 k->realize = amdvi_pci_realize;
1660 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1661 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1664 static const TypeInfo amdvi_pci = {
1665 .name = TYPE_AMD_IOMMU_PCI,
1666 .parent = TYPE_PCI_DEVICE,
1667 .instance_size = sizeof(AMDVIPCIState),
1668 .class_init = amdvi_pci_class_init,
1669 .interfaces = (InterfaceInfo[]) {
1670 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1671 { },
1675 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
1677 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1679 imrc->translate = amdvi_translate;
1680 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
1683 static const TypeInfo amdvi_iommu_memory_region_info = {
1684 .parent = TYPE_IOMMU_MEMORY_REGION,
1685 .name = TYPE_AMD_IOMMU_MEMORY_REGION,
1686 .class_init = amdvi_iommu_memory_region_class_init,
1689 static void amdvi_register_types(void)
1691 type_register_static(&amdvi_pci);
1692 type_register_static(&amdvi_sysbus);
1693 type_register_static(&amdvi_iommu_memory_region_info);
1696 type_init(amdvi_register_types);