intel-iommu: Cope with broken HP DC7900 BIOS
[linux-2.6/linux-2.6-openrd.git] / drivers / pci / dmar.c
blob380b60e677e038b51c781609cf3eb03f5d11bc0c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables.
26 * These routines are used by both DMA-remapping and Interrupt-remapping
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
37 #undef PREFIX
38 #define PREFIX "DMAR:"
40 /* No locks are needed as DMA remapping hardware unit
41 * list is constructed at boot time and hotplug of
42 * these units are not supported by the architecture.
44 LIST_HEAD(dmar_drhd_units);
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
52 * add INCLUDE_ALL at the tail, so scan the list will find it at
53 * the very end.
55 if (drhd->include_all)
56 list_add_tail(&drhd->list, &dmar_drhd_units);
57 else
58 list_add(&drhd->list, &dmar_drhd_units);
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62 struct pci_dev **dev, u16 segment)
64 struct pci_bus *bus;
65 struct pci_dev *pdev = NULL;
66 struct acpi_dmar_pci_path *path;
67 int count;
69 bus = pci_find_bus(segment, scope->bus);
70 path = (struct acpi_dmar_pci_path *)(scope + 1);
71 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72 / sizeof(struct acpi_dmar_pci_path);
74 while (count) {
75 if (pdev)
76 pci_dev_put(pdev);
78 * Some BIOSes list non-exist devices in DMAR table, just
79 * ignore it
81 if (!bus) {
82 printk(KERN_WARNING
83 PREFIX "Device scope bus [%d] not found\n",
84 scope->bus);
85 break;
87 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88 if (!pdev) {
89 printk(KERN_WARNING PREFIX
90 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91 segment, bus->number, path->dev, path->fn);
92 break;
94 path ++;
95 count --;
96 bus = pdev->subordinate;
98 if (!pdev) {
99 printk(KERN_WARNING PREFIX
100 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101 segment, scope->bus, path->dev, path->fn);
102 *dev = NULL;
103 return 0;
105 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106 pdev->subordinate) || (scope->entry_type == \
107 ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108 pci_dev_put(pdev);
109 printk(KERN_WARNING PREFIX
110 "Device scope type does not match for %s\n",
111 pci_name(pdev));
112 return -EINVAL;
114 *dev = pdev;
115 return 0;
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119 struct pci_dev ***devices, u16 segment)
121 struct acpi_dmar_device_scope *scope;
122 void * tmp = start;
123 int index;
124 int ret;
126 *cnt = 0;
127 while (start < end) {
128 scope = start;
129 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131 (*cnt)++;
132 else
133 printk(KERN_WARNING PREFIX
134 "Unsupported device scope\n");
135 start += scope->length;
137 if (*cnt == 0)
138 return 0;
140 *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141 if (!*devices)
142 return -ENOMEM;
144 start = tmp;
145 index = 0;
146 while (start < end) {
147 scope = start;
148 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150 ret = dmar_parse_one_dev_scope(scope,
151 &(*devices)[index], segment);
152 if (ret) {
153 kfree(*devices);
154 return ret;
156 index ++;
158 start += scope->length;
161 return 0;
165 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166 * structure which uniquely represent one DMA remapping hardware unit
167 * present in the platform
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 struct acpi_dmar_hardware_unit *drhd;
173 struct dmar_drhd_unit *dmaru;
174 int ret = 0;
176 drhd = (struct acpi_dmar_hardware_unit *)header;
177 if (!drhd->address) {
178 /* Promote an attitude of violence to a BIOS engineer today */
179 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
180 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
181 dmi_get_system_info(DMI_BIOS_VENDOR),
182 dmi_get_system_info(DMI_BIOS_VERSION),
183 dmi_get_system_info(DMI_PRODUCT_VERSION));
184 return -ENODEV;
186 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
187 if (!dmaru)
188 return -ENOMEM;
190 dmaru->hdr = header;
191 dmaru->reg_base_addr = drhd->address;
192 dmaru->segment = drhd->segment;
193 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195 ret = alloc_iommu(dmaru);
196 if (ret) {
197 kfree(dmaru);
198 return ret;
200 dmar_register_drhd_unit(dmaru);
201 return 0;
204 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 struct acpi_dmar_hardware_unit *drhd;
207 int ret = 0;
209 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211 if (dmaru->include_all)
212 return 0;
214 ret = dmar_parse_dev_scope((void *)(drhd + 1),
215 ((void *)drhd) + drhd->header.length,
216 &dmaru->devices_cnt, &dmaru->devices,
217 drhd->segment);
218 if (ret) {
219 list_del(&dmaru->list);
220 kfree(dmaru);
222 return ret;
225 #ifdef CONFIG_DMAR
226 LIST_HEAD(dmar_rmrr_units);
228 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 list_add(&rmrr->list, &dmar_rmrr_units);
234 static int __init
235 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 struct acpi_dmar_reserved_memory *rmrr;
238 struct dmar_rmrr_unit *rmrru;
240 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
241 if (!rmrru)
242 return -ENOMEM;
244 rmrru->hdr = header;
245 rmrr = (struct acpi_dmar_reserved_memory *)header;
246 rmrru->base_address = rmrr->base_address;
247 rmrru->end_address = rmrr->end_address;
249 dmar_register_rmrr_unit(rmrru);
250 return 0;
253 static int __init
254 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 struct acpi_dmar_reserved_memory *rmrr;
257 int ret;
259 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
260 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
261 ((void *)rmrr) + rmrr->header.length,
262 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264 if (ret || (rmrru->devices_cnt == 0)) {
265 list_del(&rmrru->list);
266 kfree(rmrru);
268 return ret;
271 static LIST_HEAD(dmar_atsr_units);
273 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 struct acpi_dmar_atsr *atsr;
276 struct dmar_atsr_unit *atsru;
278 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
279 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
280 if (!atsru)
281 return -ENOMEM;
283 atsru->hdr = hdr;
284 atsru->include_all = atsr->flags & 0x1;
286 list_add(&atsru->list, &dmar_atsr_units);
288 return 0;
291 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 int rc;
294 struct acpi_dmar_atsr *atsr;
296 if (atsru->include_all)
297 return 0;
299 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
300 rc = dmar_parse_dev_scope((void *)(atsr + 1),
301 (void *)atsr + atsr->header.length,
302 &atsru->devices_cnt, &atsru->devices,
303 atsr->segment);
304 if (rc || !atsru->devices_cnt) {
305 list_del(&atsru->list);
306 kfree(atsru);
309 return rc;
312 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 int i;
315 struct pci_bus *bus;
316 struct acpi_dmar_atsr *atsr;
317 struct dmar_atsr_unit *atsru;
319 list_for_each_entry(atsru, &dmar_atsr_units, list) {
320 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
321 if (atsr->segment == pci_domain_nr(dev->bus))
322 goto found;
325 return 0;
327 found:
328 for (bus = dev->bus; bus; bus = bus->parent) {
329 struct pci_dev *bridge = bus->self;
331 if (!bridge || !bridge->is_pcie ||
332 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
333 return 0;
335 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
336 for (i = 0; i < atsru->devices_cnt; i++)
337 if (atsru->devices[i] == bridge)
338 return 1;
339 break;
343 if (atsru->include_all)
344 return 1;
346 return 0;
348 #endif
350 static void __init
351 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 struct acpi_dmar_hardware_unit *drhd;
354 struct acpi_dmar_reserved_memory *rmrr;
355 struct acpi_dmar_atsr *atsr;
357 switch (header->type) {
358 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
359 drhd = container_of(header, struct acpi_dmar_hardware_unit,
360 header);
361 printk (KERN_INFO PREFIX
362 "DRHD base: %#016Lx flags: %#x\n",
363 (unsigned long long)drhd->address, drhd->flags);
364 break;
365 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
366 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
367 header);
368 printk (KERN_INFO PREFIX
369 "RMRR base: %#016Lx end: %#016Lx\n",
370 (unsigned long long)rmrr->base_address,
371 (unsigned long long)rmrr->end_address);
372 break;
373 case ACPI_DMAR_TYPE_ATSR:
374 atsr = container_of(header, struct acpi_dmar_atsr, header);
375 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
376 break;
381 * dmar_table_detect - checks to see if the platform supports DMAR devices
383 static int __init dmar_table_detect(void)
385 acpi_status status = AE_OK;
387 /* if we could find DMAR table, then there are DMAR devices */
388 status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
389 (struct acpi_table_header **)&dmar_tbl,
390 &dmar_tbl_size);
392 if (ACPI_SUCCESS(status) && !dmar_tbl) {
393 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
394 status = AE_NOT_FOUND;
397 return (ACPI_SUCCESS(status) ? 1 : 0);
401 * parse_dmar_table - parses the DMA reporting table
403 static int __init
404 parse_dmar_table(void)
406 struct acpi_table_dmar *dmar;
407 struct acpi_dmar_header *entry_header;
408 int ret = 0;
411 * Do it again, earlier dmar_tbl mapping could be mapped with
412 * fixed map.
414 dmar_table_detect();
416 dmar = (struct acpi_table_dmar *)dmar_tbl;
417 if (!dmar)
418 return -ENODEV;
420 if (dmar->width < PAGE_SHIFT - 1) {
421 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
422 return -EINVAL;
425 printk (KERN_INFO PREFIX "Host address width %d\n",
426 dmar->width + 1);
428 entry_header = (struct acpi_dmar_header *)(dmar + 1);
429 while (((unsigned long)entry_header) <
430 (((unsigned long)dmar) + dmar_tbl->length)) {
431 /* Avoid looping forever on bad ACPI tables */
432 if (entry_header->length == 0) {
433 printk(KERN_WARNING PREFIX
434 "Invalid 0-length structure\n");
435 ret = -EINVAL;
436 break;
439 dmar_table_print_dmar_entry(entry_header);
441 switch (entry_header->type) {
442 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
443 ret = dmar_parse_one_drhd(entry_header);
444 break;
445 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
446 #ifdef CONFIG_DMAR
447 ret = dmar_parse_one_rmrr(entry_header);
448 #endif
449 break;
450 case ACPI_DMAR_TYPE_ATSR:
451 #ifdef CONFIG_DMAR
452 ret = dmar_parse_one_atsr(entry_header);
453 #endif
454 break;
455 default:
456 printk(KERN_WARNING PREFIX
457 "Unknown DMAR structure type\n");
458 ret = 0; /* for forward compatibility */
459 break;
461 if (ret)
462 break;
464 entry_header = ((void *)entry_header + entry_header->length);
466 return ret;
469 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
470 struct pci_dev *dev)
472 int index;
474 while (dev) {
475 for (index = 0; index < cnt; index++)
476 if (dev == devices[index])
477 return 1;
479 /* Check our parent */
480 dev = dev->bus->self;
483 return 0;
486 struct dmar_drhd_unit *
487 dmar_find_matched_drhd_unit(struct pci_dev *dev)
489 struct dmar_drhd_unit *dmaru = NULL;
490 struct acpi_dmar_hardware_unit *drhd;
492 list_for_each_entry(dmaru, &dmar_drhd_units, list) {
493 drhd = container_of(dmaru->hdr,
494 struct acpi_dmar_hardware_unit,
495 header);
497 if (dmaru->include_all &&
498 drhd->segment == pci_domain_nr(dev->bus))
499 return dmaru;
501 if (dmar_pci_device_match(dmaru->devices,
502 dmaru->devices_cnt, dev))
503 return dmaru;
506 return NULL;
509 int __init dmar_dev_scope_init(void)
511 struct dmar_drhd_unit *drhd, *drhd_n;
512 int ret = -ENODEV;
514 list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
515 ret = dmar_parse_dev(drhd);
516 if (ret)
517 return ret;
520 #ifdef CONFIG_DMAR
522 struct dmar_rmrr_unit *rmrr, *rmrr_n;
523 struct dmar_atsr_unit *atsr, *atsr_n;
525 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
526 ret = rmrr_parse_dev(rmrr);
527 if (ret)
528 return ret;
531 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
532 ret = atsr_parse_dev(atsr);
533 if (ret)
534 return ret;
537 #endif
539 return ret;
543 int __init dmar_table_init(void)
545 static int dmar_table_initialized;
546 int ret;
548 if (dmar_table_initialized)
549 return 0;
551 dmar_table_initialized = 1;
553 ret = parse_dmar_table();
554 if (ret) {
555 if (ret != -ENODEV)
556 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
557 return ret;
560 if (list_empty(&dmar_drhd_units)) {
561 printk(KERN_INFO PREFIX "No DMAR devices found\n");
562 return -ENODEV;
565 #ifdef CONFIG_DMAR
566 if (list_empty(&dmar_rmrr_units))
567 printk(KERN_INFO PREFIX "No RMRR found\n");
569 if (list_empty(&dmar_atsr_units))
570 printk(KERN_INFO PREFIX "No ATSR found\n");
571 #endif
573 #ifdef CONFIG_INTR_REMAP
574 parse_ioapics_under_ir();
575 #endif
576 return 0;
579 void __init detect_intel_iommu(void)
581 int ret;
583 ret = dmar_table_detect();
586 #ifdef CONFIG_INTR_REMAP
587 struct acpi_table_dmar *dmar;
589 * for now we will disable dma-remapping when interrupt
590 * remapping is enabled.
591 * When support for queued invalidation for IOTLB invalidation
592 * is added, we will not need this any more.
594 dmar = (struct acpi_table_dmar *) dmar_tbl;
595 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
596 printk(KERN_INFO
597 "Queued invalidation will be enabled to support "
598 "x2apic and Intr-remapping.\n");
599 #endif
600 #ifdef CONFIG_DMAR
601 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
602 !dmar_disabled)
603 iommu_detected = 1;
604 #endif
606 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
607 dmar_tbl = NULL;
611 int alloc_iommu(struct dmar_drhd_unit *drhd)
613 struct intel_iommu *iommu;
614 int map_size;
615 u32 ver;
616 static int iommu_allocated = 0;
617 int agaw = 0;
618 int msagaw = 0;
620 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
621 if (!iommu)
622 return -ENOMEM;
624 iommu->seq_id = iommu_allocated++;
625 sprintf (iommu->name, "dmar%d", iommu->seq_id);
627 iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
628 if (!iommu->reg) {
629 printk(KERN_ERR "IOMMU: can't map the region\n");
630 goto error;
632 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
633 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
635 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
636 /* Promote an attitude of violence to a BIOS engineer today */
637 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
638 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
639 drhd->reg_base_addr,
640 dmi_get_system_info(DMI_BIOS_VENDOR),
641 dmi_get_system_info(DMI_BIOS_VERSION),
642 dmi_get_system_info(DMI_PRODUCT_VERSION));
643 goto err_unmap;
646 #ifdef CONFIG_DMAR
647 agaw = iommu_calculate_agaw(iommu);
648 if (agaw < 0) {
649 printk(KERN_ERR
650 "Cannot get a valid agaw for iommu (seq_id = %d)\n",
651 iommu->seq_id);
652 goto err_unmap;
654 msagaw = iommu_calculate_max_sagaw(iommu);
655 if (msagaw < 0) {
656 printk(KERN_ERR
657 "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
658 iommu->seq_id);
659 goto err_unmap;
661 #endif
662 iommu->agaw = agaw;
663 iommu->msagaw = msagaw;
665 /* the registers might be more than one page */
666 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
667 cap_max_fault_reg_offset(iommu->cap));
668 map_size = VTD_PAGE_ALIGN(map_size);
669 if (map_size > VTD_PAGE_SIZE) {
670 iounmap(iommu->reg);
671 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
672 if (!iommu->reg) {
673 printk(KERN_ERR "IOMMU: can't map the region\n");
674 goto error;
678 ver = readl(iommu->reg + DMAR_VER_REG);
679 pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
680 (unsigned long long)drhd->reg_base_addr,
681 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
682 (unsigned long long)iommu->cap,
683 (unsigned long long)iommu->ecap);
685 spin_lock_init(&iommu->register_lock);
687 drhd->iommu = iommu;
688 return 0;
690 err_unmap:
691 iounmap(iommu->reg);
692 error:
693 kfree(iommu);
694 return -1;
697 void free_iommu(struct intel_iommu *iommu)
699 if (!iommu)
700 return;
702 #ifdef CONFIG_DMAR
703 free_dmar_iommu(iommu);
704 #endif
706 if (iommu->reg)
707 iounmap(iommu->reg);
708 kfree(iommu);
712 * Reclaim all the submitted descriptors which have completed its work.
714 static inline void reclaim_free_desc(struct q_inval *qi)
716 while (qi->desc_status[qi->free_tail] == QI_DONE ||
717 qi->desc_status[qi->free_tail] == QI_ABORT) {
718 qi->desc_status[qi->free_tail] = QI_FREE;
719 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
720 qi->free_cnt++;
724 static int qi_check_fault(struct intel_iommu *iommu, int index)
726 u32 fault;
727 int head, tail;
728 struct q_inval *qi = iommu->qi;
729 int wait_index = (index + 1) % QI_LENGTH;
731 if (qi->desc_status[wait_index] == QI_ABORT)
732 return -EAGAIN;
734 fault = readl(iommu->reg + DMAR_FSTS_REG);
737 * If IQE happens, the head points to the descriptor associated
738 * with the error. No new descriptors are fetched until the IQE
739 * is cleared.
741 if (fault & DMA_FSTS_IQE) {
742 head = readl(iommu->reg + DMAR_IQH_REG);
743 if ((head >> DMAR_IQ_SHIFT) == index) {
744 printk(KERN_ERR "VT-d detected invalid descriptor: "
745 "low=%llx, high=%llx\n",
746 (unsigned long long)qi->desc[index].low,
747 (unsigned long long)qi->desc[index].high);
748 memcpy(&qi->desc[index], &qi->desc[wait_index],
749 sizeof(struct qi_desc));
750 __iommu_flush_cache(iommu, &qi->desc[index],
751 sizeof(struct qi_desc));
752 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
753 return -EINVAL;
758 * If ITE happens, all pending wait_desc commands are aborted.
759 * No new descriptors are fetched until the ITE is cleared.
761 if (fault & DMA_FSTS_ITE) {
762 head = readl(iommu->reg + DMAR_IQH_REG);
763 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
764 head |= 1;
765 tail = readl(iommu->reg + DMAR_IQT_REG);
766 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
768 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
770 do {
771 if (qi->desc_status[head] == QI_IN_USE)
772 qi->desc_status[head] = QI_ABORT;
773 head = (head - 2 + QI_LENGTH) % QI_LENGTH;
774 } while (head != tail);
776 if (qi->desc_status[wait_index] == QI_ABORT)
777 return -EAGAIN;
780 if (fault & DMA_FSTS_ICE)
781 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
783 return 0;
787 * Submit the queued invalidation descriptor to the remapping
788 * hardware unit and wait for its completion.
790 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
792 int rc;
793 struct q_inval *qi = iommu->qi;
794 struct qi_desc *hw, wait_desc;
795 int wait_index, index;
796 unsigned long flags;
798 if (!qi)
799 return 0;
801 hw = qi->desc;
803 restart:
804 rc = 0;
806 spin_lock_irqsave(&qi->q_lock, flags);
807 while (qi->free_cnt < 3) {
808 spin_unlock_irqrestore(&qi->q_lock, flags);
809 cpu_relax();
810 spin_lock_irqsave(&qi->q_lock, flags);
813 index = qi->free_head;
814 wait_index = (index + 1) % QI_LENGTH;
816 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
818 hw[index] = *desc;
820 wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
821 QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
822 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
824 hw[wait_index] = wait_desc;
826 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
827 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
829 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
830 qi->free_cnt -= 2;
833 * update the HW tail register indicating the presence of
834 * new descriptors.
836 writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
838 while (qi->desc_status[wait_index] != QI_DONE) {
840 * We will leave the interrupts disabled, to prevent interrupt
841 * context to queue another cmd while a cmd is already submitted
842 * and waiting for completion on this cpu. This is to avoid
843 * a deadlock where the interrupt context can wait indefinitely
844 * for free slots in the queue.
846 rc = qi_check_fault(iommu, index);
847 if (rc)
848 break;
850 spin_unlock(&qi->q_lock);
851 cpu_relax();
852 spin_lock(&qi->q_lock);
855 qi->desc_status[index] = QI_DONE;
857 reclaim_free_desc(qi);
858 spin_unlock_irqrestore(&qi->q_lock, flags);
860 if (rc == -EAGAIN)
861 goto restart;
863 return rc;
867 * Flush the global interrupt entry cache.
869 void qi_global_iec(struct intel_iommu *iommu)
871 struct qi_desc desc;
873 desc.low = QI_IEC_TYPE;
874 desc.high = 0;
876 /* should never fail */
877 qi_submit_sync(&desc, iommu);
880 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
881 u64 type)
883 struct qi_desc desc;
885 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
886 | QI_CC_GRAN(type) | QI_CC_TYPE;
887 desc.high = 0;
889 qi_submit_sync(&desc, iommu);
892 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
893 unsigned int size_order, u64 type)
895 u8 dw = 0, dr = 0;
897 struct qi_desc desc;
898 int ih = 0;
900 if (cap_write_drain(iommu->cap))
901 dw = 1;
903 if (cap_read_drain(iommu->cap))
904 dr = 1;
906 desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
907 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
908 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
909 | QI_IOTLB_AM(size_order);
911 qi_submit_sync(&desc, iommu);
914 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
915 u64 addr, unsigned mask)
917 struct qi_desc desc;
919 if (mask) {
920 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
921 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
922 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
923 } else
924 desc.high = QI_DEV_IOTLB_ADDR(addr);
926 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
927 qdep = 0;
929 desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
930 QI_DIOTLB_TYPE;
932 qi_submit_sync(&desc, iommu);
936 * Disable Queued Invalidation interface.
938 void dmar_disable_qi(struct intel_iommu *iommu)
940 unsigned long flags;
941 u32 sts;
942 cycles_t start_time = get_cycles();
944 if (!ecap_qis(iommu->ecap))
945 return;
947 spin_lock_irqsave(&iommu->register_lock, flags);
949 sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
950 if (!(sts & DMA_GSTS_QIES))
951 goto end;
954 * Give a chance to HW to complete the pending invalidation requests.
956 while ((readl(iommu->reg + DMAR_IQT_REG) !=
957 readl(iommu->reg + DMAR_IQH_REG)) &&
958 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
959 cpu_relax();
961 iommu->gcmd &= ~DMA_GCMD_QIE;
962 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
964 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
965 !(sts & DMA_GSTS_QIES), sts);
966 end:
967 spin_unlock_irqrestore(&iommu->register_lock, flags);
971 * Enable queued invalidation.
973 static void __dmar_enable_qi(struct intel_iommu *iommu)
975 u32 sts;
976 unsigned long flags;
977 struct q_inval *qi = iommu->qi;
979 qi->free_head = qi->free_tail = 0;
980 qi->free_cnt = QI_LENGTH;
982 spin_lock_irqsave(&iommu->register_lock, flags);
984 /* write zero to the tail reg */
985 writel(0, iommu->reg + DMAR_IQT_REG);
987 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
989 iommu->gcmd |= DMA_GCMD_QIE;
990 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
992 /* Make sure hardware complete it */
993 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
995 spin_unlock_irqrestore(&iommu->register_lock, flags);
999 * Enable Queued Invalidation interface. This is a must to support
1000 * interrupt-remapping. Also used by DMA-remapping, which replaces
1001 * register based IOTLB invalidation.
1003 int dmar_enable_qi(struct intel_iommu *iommu)
1005 struct q_inval *qi;
1007 if (!ecap_qis(iommu->ecap))
1008 return -ENOENT;
1011 * queued invalidation is already setup and enabled.
1013 if (iommu->qi)
1014 return 0;
1016 iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1017 if (!iommu->qi)
1018 return -ENOMEM;
1020 qi = iommu->qi;
1022 qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1023 if (!qi->desc) {
1024 kfree(qi);
1025 iommu->qi = 0;
1026 return -ENOMEM;
1029 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1030 if (!qi->desc_status) {
1031 free_page((unsigned long) qi->desc);
1032 kfree(qi);
1033 iommu->qi = 0;
1034 return -ENOMEM;
1037 qi->free_head = qi->free_tail = 0;
1038 qi->free_cnt = QI_LENGTH;
1040 spin_lock_init(&qi->q_lock);
1042 __dmar_enable_qi(iommu);
1044 return 0;
1047 /* iommu interrupt handling. Most stuff are MSI-like. */
1049 enum faulttype {
1050 DMA_REMAP,
1051 INTR_REMAP,
1052 UNKNOWN,
1055 static const char *dma_remap_fault_reasons[] =
1057 "Software",
1058 "Present bit in root entry is clear",
1059 "Present bit in context entry is clear",
1060 "Invalid context entry",
1061 "Access beyond MGAW",
1062 "PTE Write access is not set",
1063 "PTE Read access is not set",
1064 "Next page table ptr is invalid",
1065 "Root table address invalid",
1066 "Context table ptr is invalid",
1067 "non-zero reserved fields in RTP",
1068 "non-zero reserved fields in CTP",
1069 "non-zero reserved fields in PTE",
1072 static const char *intr_remap_fault_reasons[] =
1074 "Detected reserved fields in the decoded interrupt-remapped request",
1075 "Interrupt index exceeded the interrupt-remapping table size",
1076 "Present field in the IRTE entry is clear",
1077 "Error accessing interrupt-remapping table pointed by IRTA_REG",
1078 "Detected reserved fields in the IRTE entry",
1079 "Blocked a compatibility format interrupt request",
1080 "Blocked an interrupt request due to source-id verification failure",
1083 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1085 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1087 if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1088 ARRAY_SIZE(intr_remap_fault_reasons))) {
1089 *fault_type = INTR_REMAP;
1090 return intr_remap_fault_reasons[fault_reason - 0x20];
1091 } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1092 *fault_type = DMA_REMAP;
1093 return dma_remap_fault_reasons[fault_reason];
1094 } else {
1095 *fault_type = UNKNOWN;
1096 return "Unknown";
1100 void dmar_msi_unmask(unsigned int irq)
1102 struct intel_iommu *iommu = get_irq_data(irq);
1103 unsigned long flag;
1105 /* unmask it */
1106 spin_lock_irqsave(&iommu->register_lock, flag);
1107 writel(0, iommu->reg + DMAR_FECTL_REG);
1108 /* Read a reg to force flush the post write */
1109 readl(iommu->reg + DMAR_FECTL_REG);
1110 spin_unlock_irqrestore(&iommu->register_lock, flag);
1113 void dmar_msi_mask(unsigned int irq)
1115 unsigned long flag;
1116 struct intel_iommu *iommu = get_irq_data(irq);
1118 /* mask it */
1119 spin_lock_irqsave(&iommu->register_lock, flag);
1120 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1121 /* Read a reg to force flush the post write */
1122 readl(iommu->reg + DMAR_FECTL_REG);
1123 spin_unlock_irqrestore(&iommu->register_lock, flag);
1126 void dmar_msi_write(int irq, struct msi_msg *msg)
1128 struct intel_iommu *iommu = get_irq_data(irq);
1129 unsigned long flag;
1131 spin_lock_irqsave(&iommu->register_lock, flag);
1132 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1133 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1134 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1135 spin_unlock_irqrestore(&iommu->register_lock, flag);
1138 void dmar_msi_read(int irq, struct msi_msg *msg)
1140 struct intel_iommu *iommu = get_irq_data(irq);
1141 unsigned long flag;
1143 spin_lock_irqsave(&iommu->register_lock, flag);
1144 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1145 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1146 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1147 spin_unlock_irqrestore(&iommu->register_lock, flag);
1150 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1151 u8 fault_reason, u16 source_id, unsigned long long addr)
1153 const char *reason;
1154 int fault_type;
1156 reason = dmar_get_fault_reason(fault_reason, &fault_type);
1158 if (fault_type == INTR_REMAP)
1159 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1160 "fault index %llx\n"
1161 "INTR-REMAP:[fault reason %02d] %s\n",
1162 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1163 PCI_FUNC(source_id & 0xFF), addr >> 48,
1164 fault_reason, reason);
1165 else
1166 printk(KERN_ERR
1167 "DMAR:[%s] Request device [%02x:%02x.%d] "
1168 "fault addr %llx \n"
1169 "DMAR:[fault reason %02d] %s\n",
1170 (type ? "DMA Read" : "DMA Write"),
1171 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1172 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1173 return 0;
1176 #define PRIMARY_FAULT_REG_LEN (16)
1177 irqreturn_t dmar_fault(int irq, void *dev_id)
1179 struct intel_iommu *iommu = dev_id;
1180 int reg, fault_index;
1181 u32 fault_status;
1182 unsigned long flag;
1184 spin_lock_irqsave(&iommu->register_lock, flag);
1185 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1186 if (fault_status)
1187 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1188 fault_status);
1190 /* TBD: ignore advanced fault log currently */
1191 if (!(fault_status & DMA_FSTS_PPF))
1192 goto clear_rest;
1194 fault_index = dma_fsts_fault_record_index(fault_status);
1195 reg = cap_fault_reg_offset(iommu->cap);
1196 while (1) {
1197 u8 fault_reason;
1198 u16 source_id;
1199 u64 guest_addr;
1200 int type;
1201 u32 data;
1203 /* highest 32 bits */
1204 data = readl(iommu->reg + reg +
1205 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1206 if (!(data & DMA_FRCD_F))
1207 break;
1209 fault_reason = dma_frcd_fault_reason(data);
1210 type = dma_frcd_type(data);
1212 data = readl(iommu->reg + reg +
1213 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1214 source_id = dma_frcd_source_id(data);
1216 guest_addr = dmar_readq(iommu->reg + reg +
1217 fault_index * PRIMARY_FAULT_REG_LEN);
1218 guest_addr = dma_frcd_page_addr(guest_addr);
1219 /* clear the fault */
1220 writel(DMA_FRCD_F, iommu->reg + reg +
1221 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1223 spin_unlock_irqrestore(&iommu->register_lock, flag);
1225 dmar_fault_do_one(iommu, type, fault_reason,
1226 source_id, guest_addr);
1228 fault_index++;
1229 if (fault_index > cap_num_fault_regs(iommu->cap))
1230 fault_index = 0;
1231 spin_lock_irqsave(&iommu->register_lock, flag);
1233 clear_rest:
1234 /* clear all the other faults */
1235 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1236 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1238 spin_unlock_irqrestore(&iommu->register_lock, flag);
1239 return IRQ_HANDLED;
1242 int dmar_set_interrupt(struct intel_iommu *iommu)
1244 int irq, ret;
1247 * Check if the fault interrupt is already initialized.
1249 if (iommu->irq)
1250 return 0;
1252 irq = create_irq();
1253 if (!irq) {
1254 printk(KERN_ERR "IOMMU: no free vectors\n");
1255 return -EINVAL;
1258 set_irq_data(irq, iommu);
1259 iommu->irq = irq;
1261 ret = arch_setup_dmar_msi(irq);
1262 if (ret) {
1263 set_irq_data(irq, NULL);
1264 iommu->irq = 0;
1265 destroy_irq(irq);
1266 return ret;
1269 ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1270 if (ret)
1271 printk(KERN_ERR "IOMMU: can't request irq\n");
1272 return ret;
1275 int __init enable_drhd_fault_handling(void)
1277 struct dmar_drhd_unit *drhd;
1280 * Enable fault control interrupt.
1282 for_each_drhd_unit(drhd) {
1283 int ret;
1284 struct intel_iommu *iommu = drhd->iommu;
1285 ret = dmar_set_interrupt(iommu);
1287 if (ret) {
1288 printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1289 " interrupt, ret %d\n",
1290 (unsigned long long)drhd->reg_base_addr, ret);
1291 return -1;
1295 return 0;
1299 * Re-enable Queued Invalidation interface.
1301 int dmar_reenable_qi(struct intel_iommu *iommu)
1303 if (!ecap_qis(iommu->ecap))
1304 return -ENOENT;
1306 if (!iommu->qi)
1307 return -ENOENT;
1310 * First disable queued invalidation.
1312 dmar_disable_qi(iommu);
1314 * Then enable queued invalidation again. Since there is no pending
1315 * invalidation requests now, it's safe to re-enable queued
1316 * invalidation.
1318 __dmar_enable_qi(iommu);
1320 return 0;