Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes...
[linux-2.6/linux-2.6-openrd.git] / drivers / pci / dmar.c
blob6cdc931f7c1773e6fb59b2c8bbac85df950a9f86
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables.
26 * These routines are used by both DMA-remapping and Interrupt-remapping
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
39 #define PREFIX "DMAR: "
41 /* No locks are needed as DMA remapping hardware unit
42 * list is constructed at boot time and hotplug of
43 * these units are not supported by the architecture.
45 LIST_HEAD(dmar_drhd_units);
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 * add INCLUDE_ALL at the tail, so scan the list will find it at
54 * the very end.
56 if (drhd->include_all)
57 list_add_tail(&drhd->list, &dmar_drhd_units);
58 else
59 list_add(&drhd->list, &dmar_drhd_units);
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63 struct pci_dev **dev, u16 segment)
65 struct pci_bus *bus;
66 struct pci_dev *pdev = NULL;
67 struct acpi_dmar_pci_path *path;
68 int count;
70 bus = pci_find_bus(segment, scope->bus);
71 path = (struct acpi_dmar_pci_path *)(scope + 1);
72 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73 / sizeof(struct acpi_dmar_pci_path);
75 while (count) {
76 if (pdev)
77 pci_dev_put(pdev);
79 * Some BIOSes list non-exist devices in DMAR table, just
80 * ignore it
82 if (!bus) {
83 printk(KERN_WARNING
84 PREFIX "Device scope bus [%d] not found\n",
85 scope->bus);
86 break;
88 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89 if (!pdev) {
90 printk(KERN_WARNING PREFIX
91 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92 segment, bus->number, path->dev, path->fn);
93 break;
95 path ++;
96 count --;
97 bus = pdev->subordinate;
99 if (!pdev) {
100 printk(KERN_WARNING PREFIX
101 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102 segment, scope->bus, path->dev, path->fn);
103 *dev = NULL;
104 return 0;
106 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107 pdev->subordinate) || (scope->entry_type == \
108 ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109 pci_dev_put(pdev);
110 printk(KERN_WARNING PREFIX
111 "Device scope type does not match for %s\n",
112 pci_name(pdev));
113 return -EINVAL;
115 *dev = pdev;
116 return 0;
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120 struct pci_dev ***devices, u16 segment)
122 struct acpi_dmar_device_scope *scope;
123 void * tmp = start;
124 int index;
125 int ret;
127 *cnt = 0;
128 while (start < end) {
129 scope = start;
130 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132 (*cnt)++;
133 else
134 printk(KERN_WARNING PREFIX
135 "Unsupported device scope\n");
136 start += scope->length;
138 if (*cnt == 0)
139 return 0;
141 *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142 if (!*devices)
143 return -ENOMEM;
145 start = tmp;
146 index = 0;
147 while (start < end) {
148 scope = start;
149 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151 ret = dmar_parse_one_dev_scope(scope,
152 &(*devices)[index], segment);
153 if (ret) {
154 kfree(*devices);
155 return ret;
157 index ++;
159 start += scope->length;
162 return 0;
166 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167 * structure which uniquely represent one DMA remapping hardware unit
168 * present in the platform
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
173 struct acpi_dmar_hardware_unit *drhd;
174 struct dmar_drhd_unit *dmaru;
175 int ret = 0;
177 drhd = (struct acpi_dmar_hardware_unit *)header;
178 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
179 if (!dmaru)
180 return -ENOMEM;
182 dmaru->hdr = header;
183 dmaru->reg_base_addr = drhd->address;
184 dmaru->segment = drhd->segment;
185 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
187 ret = alloc_iommu(dmaru);
188 if (ret) {
189 kfree(dmaru);
190 return ret;
192 dmar_register_drhd_unit(dmaru);
193 return 0;
196 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
198 struct acpi_dmar_hardware_unit *drhd;
199 int ret = 0;
201 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
203 if (dmaru->include_all)
204 return 0;
206 ret = dmar_parse_dev_scope((void *)(drhd + 1),
207 ((void *)drhd) + drhd->header.length,
208 &dmaru->devices_cnt, &dmaru->devices,
209 drhd->segment);
210 if (ret) {
211 list_del(&dmaru->list);
212 kfree(dmaru);
214 return ret;
217 #ifdef CONFIG_DMAR
218 LIST_HEAD(dmar_rmrr_units);
220 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
222 list_add(&rmrr->list, &dmar_rmrr_units);
226 static int __init
227 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
229 struct acpi_dmar_reserved_memory *rmrr;
230 struct dmar_rmrr_unit *rmrru;
232 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
233 if (!rmrru)
234 return -ENOMEM;
236 rmrru->hdr = header;
237 rmrr = (struct acpi_dmar_reserved_memory *)header;
238 rmrru->base_address = rmrr->base_address;
239 rmrru->end_address = rmrr->end_address;
241 dmar_register_rmrr_unit(rmrru);
242 return 0;
245 static int __init
246 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
248 struct acpi_dmar_reserved_memory *rmrr;
249 int ret;
251 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
252 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
253 ((void *)rmrr) + rmrr->header.length,
254 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
256 if (ret || (rmrru->devices_cnt == 0)) {
257 list_del(&rmrru->list);
258 kfree(rmrru);
260 return ret;
263 static LIST_HEAD(dmar_atsr_units);
265 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
267 struct acpi_dmar_atsr *atsr;
268 struct dmar_atsr_unit *atsru;
270 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
271 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
272 if (!atsru)
273 return -ENOMEM;
275 atsru->hdr = hdr;
276 atsru->include_all = atsr->flags & 0x1;
278 list_add(&atsru->list, &dmar_atsr_units);
280 return 0;
283 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
285 int rc;
286 struct acpi_dmar_atsr *atsr;
288 if (atsru->include_all)
289 return 0;
291 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
292 rc = dmar_parse_dev_scope((void *)(atsr + 1),
293 (void *)atsr + atsr->header.length,
294 &atsru->devices_cnt, &atsru->devices,
295 atsr->segment);
296 if (rc || !atsru->devices_cnt) {
297 list_del(&atsru->list);
298 kfree(atsru);
301 return rc;
304 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
306 int i;
307 struct pci_bus *bus;
308 struct acpi_dmar_atsr *atsr;
309 struct dmar_atsr_unit *atsru;
311 list_for_each_entry(atsru, &dmar_atsr_units, list) {
312 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
313 if (atsr->segment == pci_domain_nr(dev->bus))
314 goto found;
317 return 0;
319 found:
320 for (bus = dev->bus; bus; bus = bus->parent) {
321 struct pci_dev *bridge = bus->self;
323 if (!bridge || !pci_is_pcie(bridge) ||
324 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
325 return 0;
327 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
328 for (i = 0; i < atsru->devices_cnt; i++)
329 if (atsru->devices[i] == bridge)
330 return 1;
331 break;
335 if (atsru->include_all)
336 return 1;
338 return 0;
340 #endif
342 static void __init
343 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
345 struct acpi_dmar_hardware_unit *drhd;
346 struct acpi_dmar_reserved_memory *rmrr;
347 struct acpi_dmar_atsr *atsr;
348 struct acpi_dmar_rhsa *rhsa;
350 switch (header->type) {
351 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
352 drhd = container_of(header, struct acpi_dmar_hardware_unit,
353 header);
354 printk (KERN_INFO PREFIX
355 "DRHD base: %#016Lx flags: %#x\n",
356 (unsigned long long)drhd->address, drhd->flags);
357 break;
358 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
359 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
360 header);
361 printk (KERN_INFO PREFIX
362 "RMRR base: %#016Lx end: %#016Lx\n",
363 (unsigned long long)rmrr->base_address,
364 (unsigned long long)rmrr->end_address);
365 break;
366 case ACPI_DMAR_TYPE_ATSR:
367 atsr = container_of(header, struct acpi_dmar_atsr, header);
368 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
369 break;
370 case ACPI_DMAR_HARDWARE_AFFINITY:
371 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
372 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
373 (unsigned long long)rhsa->base_address,
374 rhsa->proximity_domain);
375 break;
380 * dmar_table_detect - checks to see if the platform supports DMAR devices
382 static int __init dmar_table_detect(void)
384 acpi_status status = AE_OK;
386 /* if we could find DMAR table, then there are DMAR devices */
387 status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
388 (struct acpi_table_header **)&dmar_tbl,
389 &dmar_tbl_size);
391 if (ACPI_SUCCESS(status) && !dmar_tbl) {
392 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
393 status = AE_NOT_FOUND;
396 return (ACPI_SUCCESS(status) ? 1 : 0);
400 * parse_dmar_table - parses the DMA reporting table
402 static int __init
403 parse_dmar_table(void)
405 struct acpi_table_dmar *dmar;
406 struct acpi_dmar_header *entry_header;
407 int ret = 0;
410 * Do it again, earlier dmar_tbl mapping could be mapped with
411 * fixed map.
413 dmar_table_detect();
416 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
417 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
419 dmar_tbl = tboot_get_dmar_table(dmar_tbl);
421 dmar = (struct acpi_table_dmar *)dmar_tbl;
422 if (!dmar)
423 return -ENODEV;
425 if (dmar->width < PAGE_SHIFT - 1) {
426 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
427 return -EINVAL;
430 printk (KERN_INFO PREFIX "Host address width %d\n",
431 dmar->width + 1);
433 entry_header = (struct acpi_dmar_header *)(dmar + 1);
434 while (((unsigned long)entry_header) <
435 (((unsigned long)dmar) + dmar_tbl->length)) {
436 /* Avoid looping forever on bad ACPI tables */
437 if (entry_header->length == 0) {
438 printk(KERN_WARNING PREFIX
439 "Invalid 0-length structure\n");
440 ret = -EINVAL;
441 break;
444 dmar_table_print_dmar_entry(entry_header);
446 switch (entry_header->type) {
447 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
448 ret = dmar_parse_one_drhd(entry_header);
449 break;
450 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
451 #ifdef CONFIG_DMAR
452 ret = dmar_parse_one_rmrr(entry_header);
453 #endif
454 break;
455 case ACPI_DMAR_TYPE_ATSR:
456 #ifdef CONFIG_DMAR
457 ret = dmar_parse_one_atsr(entry_header);
458 #endif
459 break;
460 case ACPI_DMAR_HARDWARE_AFFINITY:
461 /* We don't do anything with RHSA (yet?) */
462 break;
463 default:
464 printk(KERN_WARNING PREFIX
465 "Unknown DMAR structure type %d\n",
466 entry_header->type);
467 ret = 0; /* for forward compatibility */
468 break;
470 if (ret)
471 break;
473 entry_header = ((void *)entry_header + entry_header->length);
475 return ret;
478 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
479 struct pci_dev *dev)
481 int index;
483 while (dev) {
484 for (index = 0; index < cnt; index++)
485 if (dev == devices[index])
486 return 1;
488 /* Check our parent */
489 dev = dev->bus->self;
492 return 0;
495 struct dmar_drhd_unit *
496 dmar_find_matched_drhd_unit(struct pci_dev *dev)
498 struct dmar_drhd_unit *dmaru = NULL;
499 struct acpi_dmar_hardware_unit *drhd;
501 list_for_each_entry(dmaru, &dmar_drhd_units, list) {
502 drhd = container_of(dmaru->hdr,
503 struct acpi_dmar_hardware_unit,
504 header);
506 if (dmaru->include_all &&
507 drhd->segment == pci_domain_nr(dev->bus))
508 return dmaru;
510 if (dmar_pci_device_match(dmaru->devices,
511 dmaru->devices_cnt, dev))
512 return dmaru;
515 return NULL;
518 int __init dmar_dev_scope_init(void)
520 struct dmar_drhd_unit *drhd, *drhd_n;
521 int ret = -ENODEV;
523 list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
524 ret = dmar_parse_dev(drhd);
525 if (ret)
526 return ret;
529 #ifdef CONFIG_DMAR
531 struct dmar_rmrr_unit *rmrr, *rmrr_n;
532 struct dmar_atsr_unit *atsr, *atsr_n;
534 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
535 ret = rmrr_parse_dev(rmrr);
536 if (ret)
537 return ret;
540 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
541 ret = atsr_parse_dev(atsr);
542 if (ret)
543 return ret;
546 #endif
548 return ret;
552 int __init dmar_table_init(void)
554 static int dmar_table_initialized;
555 int ret;
557 if (dmar_table_initialized)
558 return 0;
560 dmar_table_initialized = 1;
562 ret = parse_dmar_table();
563 if (ret) {
564 if (ret != -ENODEV)
565 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
566 return ret;
569 if (list_empty(&dmar_drhd_units)) {
570 printk(KERN_INFO PREFIX "No DMAR devices found\n");
571 return -ENODEV;
574 #ifdef CONFIG_DMAR
575 if (list_empty(&dmar_rmrr_units))
576 printk(KERN_INFO PREFIX "No RMRR found\n");
578 if (list_empty(&dmar_atsr_units))
579 printk(KERN_INFO PREFIX "No ATSR found\n");
580 #endif
582 return 0;
585 int __init check_zero_address(void)
587 struct acpi_table_dmar *dmar;
588 struct acpi_dmar_header *entry_header;
589 struct acpi_dmar_hardware_unit *drhd;
591 dmar = (struct acpi_table_dmar *)dmar_tbl;
592 entry_header = (struct acpi_dmar_header *)(dmar + 1);
594 while (((unsigned long)entry_header) <
595 (((unsigned long)dmar) + dmar_tbl->length)) {
596 /* Avoid looping forever on bad ACPI tables */
597 if (entry_header->length == 0) {
598 printk(KERN_WARNING PREFIX
599 "Invalid 0-length structure\n");
600 return 0;
603 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
604 drhd = (void *)entry_header;
605 if (!drhd->address) {
606 /* Promote an attitude of violence to a BIOS engineer today */
607 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
608 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
609 dmi_get_system_info(DMI_BIOS_VENDOR),
610 dmi_get_system_info(DMI_BIOS_VERSION),
611 dmi_get_system_info(DMI_PRODUCT_VERSION));
612 #ifdef CONFIG_DMAR
613 dmar_disabled = 1;
614 #endif
615 return 0;
617 break;
620 entry_header = ((void *)entry_header + entry_header->length);
622 return 1;
625 void __init detect_intel_iommu(void)
627 int ret;
629 ret = dmar_table_detect();
630 if (ret)
631 ret = check_zero_address();
633 #ifdef CONFIG_INTR_REMAP
634 struct acpi_table_dmar *dmar;
636 * for now we will disable dma-remapping when interrupt
637 * remapping is enabled.
638 * When support for queued invalidation for IOTLB invalidation
639 * is added, we will not need this any more.
641 dmar = (struct acpi_table_dmar *) dmar_tbl;
642 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
643 printk(KERN_INFO
644 "Queued invalidation will be enabled to support "
645 "x2apic and Intr-remapping.\n");
646 #endif
647 #ifdef CONFIG_DMAR
648 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
649 iommu_detected = 1;
650 /* Make sure ACS will be enabled */
651 pci_request_acs();
653 #endif
654 #ifdef CONFIG_X86
655 if (ret)
656 x86_init.iommu.iommu_init = intel_iommu_init;
657 #endif
659 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
660 dmar_tbl = NULL;
664 int alloc_iommu(struct dmar_drhd_unit *drhd)
666 struct intel_iommu *iommu;
667 int map_size;
668 u32 ver;
669 static int iommu_allocated = 0;
670 int agaw = 0;
671 int msagaw = 0;
673 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
674 if (!iommu)
675 return -ENOMEM;
677 iommu->seq_id = iommu_allocated++;
678 sprintf (iommu->name, "dmar%d", iommu->seq_id);
680 iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
681 if (!iommu->reg) {
682 printk(KERN_ERR "IOMMU: can't map the region\n");
683 goto error;
685 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
686 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
688 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
689 /* Promote an attitude of violence to a BIOS engineer today */
690 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
691 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
692 drhd->reg_base_addr,
693 dmi_get_system_info(DMI_BIOS_VENDOR),
694 dmi_get_system_info(DMI_BIOS_VERSION),
695 dmi_get_system_info(DMI_PRODUCT_VERSION));
696 goto err_unmap;
699 #ifdef CONFIG_DMAR
700 agaw = iommu_calculate_agaw(iommu);
701 if (agaw < 0) {
702 printk(KERN_ERR
703 "Cannot get a valid agaw for iommu (seq_id = %d)\n",
704 iommu->seq_id);
705 goto err_unmap;
707 msagaw = iommu_calculate_max_sagaw(iommu);
708 if (msagaw < 0) {
709 printk(KERN_ERR
710 "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
711 iommu->seq_id);
712 goto err_unmap;
714 #endif
715 iommu->agaw = agaw;
716 iommu->msagaw = msagaw;
718 /* the registers might be more than one page */
719 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
720 cap_max_fault_reg_offset(iommu->cap));
721 map_size = VTD_PAGE_ALIGN(map_size);
722 if (map_size > VTD_PAGE_SIZE) {
723 iounmap(iommu->reg);
724 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
725 if (!iommu->reg) {
726 printk(KERN_ERR "IOMMU: can't map the region\n");
727 goto error;
731 ver = readl(iommu->reg + DMAR_VER_REG);
732 pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
733 (unsigned long long)drhd->reg_base_addr,
734 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
735 (unsigned long long)iommu->cap,
736 (unsigned long long)iommu->ecap);
738 spin_lock_init(&iommu->register_lock);
740 drhd->iommu = iommu;
741 return 0;
743 err_unmap:
744 iounmap(iommu->reg);
745 error:
746 kfree(iommu);
747 return -1;
750 void free_iommu(struct intel_iommu *iommu)
752 if (!iommu)
753 return;
755 #ifdef CONFIG_DMAR
756 free_dmar_iommu(iommu);
757 #endif
759 if (iommu->reg)
760 iounmap(iommu->reg);
761 kfree(iommu);
765 * Reclaim all the submitted descriptors which have completed its work.
767 static inline void reclaim_free_desc(struct q_inval *qi)
769 while (qi->desc_status[qi->free_tail] == QI_DONE ||
770 qi->desc_status[qi->free_tail] == QI_ABORT) {
771 qi->desc_status[qi->free_tail] = QI_FREE;
772 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
773 qi->free_cnt++;
777 static int qi_check_fault(struct intel_iommu *iommu, int index)
779 u32 fault;
780 int head, tail;
781 struct q_inval *qi = iommu->qi;
782 int wait_index = (index + 1) % QI_LENGTH;
784 if (qi->desc_status[wait_index] == QI_ABORT)
785 return -EAGAIN;
787 fault = readl(iommu->reg + DMAR_FSTS_REG);
790 * If IQE happens, the head points to the descriptor associated
791 * with the error. No new descriptors are fetched until the IQE
792 * is cleared.
794 if (fault & DMA_FSTS_IQE) {
795 head = readl(iommu->reg + DMAR_IQH_REG);
796 if ((head >> DMAR_IQ_SHIFT) == index) {
797 printk(KERN_ERR "VT-d detected invalid descriptor: "
798 "low=%llx, high=%llx\n",
799 (unsigned long long)qi->desc[index].low,
800 (unsigned long long)qi->desc[index].high);
801 memcpy(&qi->desc[index], &qi->desc[wait_index],
802 sizeof(struct qi_desc));
803 __iommu_flush_cache(iommu, &qi->desc[index],
804 sizeof(struct qi_desc));
805 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
806 return -EINVAL;
811 * If ITE happens, all pending wait_desc commands are aborted.
812 * No new descriptors are fetched until the ITE is cleared.
814 if (fault & DMA_FSTS_ITE) {
815 head = readl(iommu->reg + DMAR_IQH_REG);
816 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
817 head |= 1;
818 tail = readl(iommu->reg + DMAR_IQT_REG);
819 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
821 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
823 do {
824 if (qi->desc_status[head] == QI_IN_USE)
825 qi->desc_status[head] = QI_ABORT;
826 head = (head - 2 + QI_LENGTH) % QI_LENGTH;
827 } while (head != tail);
829 if (qi->desc_status[wait_index] == QI_ABORT)
830 return -EAGAIN;
833 if (fault & DMA_FSTS_ICE)
834 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
836 return 0;
840 * Submit the queued invalidation descriptor to the remapping
841 * hardware unit and wait for its completion.
843 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
845 int rc;
846 struct q_inval *qi = iommu->qi;
847 struct qi_desc *hw, wait_desc;
848 int wait_index, index;
849 unsigned long flags;
851 if (!qi)
852 return 0;
854 hw = qi->desc;
856 restart:
857 rc = 0;
859 spin_lock_irqsave(&qi->q_lock, flags);
860 while (qi->free_cnt < 3) {
861 spin_unlock_irqrestore(&qi->q_lock, flags);
862 cpu_relax();
863 spin_lock_irqsave(&qi->q_lock, flags);
866 index = qi->free_head;
867 wait_index = (index + 1) % QI_LENGTH;
869 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
871 hw[index] = *desc;
873 wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
874 QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
875 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
877 hw[wait_index] = wait_desc;
879 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
880 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
882 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
883 qi->free_cnt -= 2;
886 * update the HW tail register indicating the presence of
887 * new descriptors.
889 writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
891 while (qi->desc_status[wait_index] != QI_DONE) {
893 * We will leave the interrupts disabled, to prevent interrupt
894 * context to queue another cmd while a cmd is already submitted
895 * and waiting for completion on this cpu. This is to avoid
896 * a deadlock where the interrupt context can wait indefinitely
897 * for free slots in the queue.
899 rc = qi_check_fault(iommu, index);
900 if (rc)
901 break;
903 spin_unlock(&qi->q_lock);
904 cpu_relax();
905 spin_lock(&qi->q_lock);
908 qi->desc_status[index] = QI_DONE;
910 reclaim_free_desc(qi);
911 spin_unlock_irqrestore(&qi->q_lock, flags);
913 if (rc == -EAGAIN)
914 goto restart;
916 return rc;
920 * Flush the global interrupt entry cache.
922 void qi_global_iec(struct intel_iommu *iommu)
924 struct qi_desc desc;
926 desc.low = QI_IEC_TYPE;
927 desc.high = 0;
929 /* should never fail */
930 qi_submit_sync(&desc, iommu);
933 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
934 u64 type)
936 struct qi_desc desc;
938 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
939 | QI_CC_GRAN(type) | QI_CC_TYPE;
940 desc.high = 0;
942 qi_submit_sync(&desc, iommu);
945 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
946 unsigned int size_order, u64 type)
948 u8 dw = 0, dr = 0;
950 struct qi_desc desc;
951 int ih = 0;
953 if (cap_write_drain(iommu->cap))
954 dw = 1;
956 if (cap_read_drain(iommu->cap))
957 dr = 1;
959 desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
960 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
961 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
962 | QI_IOTLB_AM(size_order);
964 qi_submit_sync(&desc, iommu);
967 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
968 u64 addr, unsigned mask)
970 struct qi_desc desc;
972 if (mask) {
973 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
974 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
975 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
976 } else
977 desc.high = QI_DEV_IOTLB_ADDR(addr);
979 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
980 qdep = 0;
982 desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
983 QI_DIOTLB_TYPE;
985 qi_submit_sync(&desc, iommu);
989 * Disable Queued Invalidation interface.
991 void dmar_disable_qi(struct intel_iommu *iommu)
993 unsigned long flags;
994 u32 sts;
995 cycles_t start_time = get_cycles();
997 if (!ecap_qis(iommu->ecap))
998 return;
1000 spin_lock_irqsave(&iommu->register_lock, flags);
1002 sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
1003 if (!(sts & DMA_GSTS_QIES))
1004 goto end;
1007 * Give a chance to HW to complete the pending invalidation requests.
1009 while ((readl(iommu->reg + DMAR_IQT_REG) !=
1010 readl(iommu->reg + DMAR_IQH_REG)) &&
1011 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1012 cpu_relax();
1014 iommu->gcmd &= ~DMA_GCMD_QIE;
1015 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1017 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1018 !(sts & DMA_GSTS_QIES), sts);
1019 end:
1020 spin_unlock_irqrestore(&iommu->register_lock, flags);
1024 * Enable queued invalidation.
1026 static void __dmar_enable_qi(struct intel_iommu *iommu)
1028 u32 sts;
1029 unsigned long flags;
1030 struct q_inval *qi = iommu->qi;
1032 qi->free_head = qi->free_tail = 0;
1033 qi->free_cnt = QI_LENGTH;
1035 spin_lock_irqsave(&iommu->register_lock, flags);
1037 /* write zero to the tail reg */
1038 writel(0, iommu->reg + DMAR_IQT_REG);
1040 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1042 iommu->gcmd |= DMA_GCMD_QIE;
1043 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1045 /* Make sure hardware complete it */
1046 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1048 spin_unlock_irqrestore(&iommu->register_lock, flags);
1052 * Enable Queued Invalidation interface. This is a must to support
1053 * interrupt-remapping. Also used by DMA-remapping, which replaces
1054 * register based IOTLB invalidation.
1056 int dmar_enable_qi(struct intel_iommu *iommu)
1058 struct q_inval *qi;
1060 if (!ecap_qis(iommu->ecap))
1061 return -ENOENT;
1064 * queued invalidation is already setup and enabled.
1066 if (iommu->qi)
1067 return 0;
1069 iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1070 if (!iommu->qi)
1071 return -ENOMEM;
1073 qi = iommu->qi;
1075 qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1076 if (!qi->desc) {
1077 kfree(qi);
1078 iommu->qi = 0;
1079 return -ENOMEM;
1082 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1083 if (!qi->desc_status) {
1084 free_page((unsigned long) qi->desc);
1085 kfree(qi);
1086 iommu->qi = 0;
1087 return -ENOMEM;
1090 qi->free_head = qi->free_tail = 0;
1091 qi->free_cnt = QI_LENGTH;
1093 spin_lock_init(&qi->q_lock);
1095 __dmar_enable_qi(iommu);
1097 return 0;
1100 /* iommu interrupt handling. Most stuff are MSI-like. */
1102 enum faulttype {
1103 DMA_REMAP,
1104 INTR_REMAP,
1105 UNKNOWN,
1108 static const char *dma_remap_fault_reasons[] =
1110 "Software",
1111 "Present bit in root entry is clear",
1112 "Present bit in context entry is clear",
1113 "Invalid context entry",
1114 "Access beyond MGAW",
1115 "PTE Write access is not set",
1116 "PTE Read access is not set",
1117 "Next page table ptr is invalid",
1118 "Root table address invalid",
1119 "Context table ptr is invalid",
1120 "non-zero reserved fields in RTP",
1121 "non-zero reserved fields in CTP",
1122 "non-zero reserved fields in PTE",
1125 static const char *intr_remap_fault_reasons[] =
1127 "Detected reserved fields in the decoded interrupt-remapped request",
1128 "Interrupt index exceeded the interrupt-remapping table size",
1129 "Present field in the IRTE entry is clear",
1130 "Error accessing interrupt-remapping table pointed by IRTA_REG",
1131 "Detected reserved fields in the IRTE entry",
1132 "Blocked a compatibility format interrupt request",
1133 "Blocked an interrupt request due to source-id verification failure",
1136 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1138 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1140 if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1141 ARRAY_SIZE(intr_remap_fault_reasons))) {
1142 *fault_type = INTR_REMAP;
1143 return intr_remap_fault_reasons[fault_reason - 0x20];
1144 } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1145 *fault_type = DMA_REMAP;
1146 return dma_remap_fault_reasons[fault_reason];
1147 } else {
1148 *fault_type = UNKNOWN;
1149 return "Unknown";
1153 void dmar_msi_unmask(unsigned int irq)
1155 struct intel_iommu *iommu = get_irq_data(irq);
1156 unsigned long flag;
1158 /* unmask it */
1159 spin_lock_irqsave(&iommu->register_lock, flag);
1160 writel(0, iommu->reg + DMAR_FECTL_REG);
1161 /* Read a reg to force flush the post write */
1162 readl(iommu->reg + DMAR_FECTL_REG);
1163 spin_unlock_irqrestore(&iommu->register_lock, flag);
1166 void dmar_msi_mask(unsigned int irq)
1168 unsigned long flag;
1169 struct intel_iommu *iommu = get_irq_data(irq);
1171 /* mask it */
1172 spin_lock_irqsave(&iommu->register_lock, flag);
1173 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1174 /* Read a reg to force flush the post write */
1175 readl(iommu->reg + DMAR_FECTL_REG);
1176 spin_unlock_irqrestore(&iommu->register_lock, flag);
1179 void dmar_msi_write(int irq, struct msi_msg *msg)
1181 struct intel_iommu *iommu = get_irq_data(irq);
1182 unsigned long flag;
1184 spin_lock_irqsave(&iommu->register_lock, flag);
1185 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1186 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1187 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1188 spin_unlock_irqrestore(&iommu->register_lock, flag);
1191 void dmar_msi_read(int irq, struct msi_msg *msg)
1193 struct intel_iommu *iommu = get_irq_data(irq);
1194 unsigned long flag;
1196 spin_lock_irqsave(&iommu->register_lock, flag);
1197 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1198 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1199 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1200 spin_unlock_irqrestore(&iommu->register_lock, flag);
1203 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1204 u8 fault_reason, u16 source_id, unsigned long long addr)
1206 const char *reason;
1207 int fault_type;
1209 reason = dmar_get_fault_reason(fault_reason, &fault_type);
1211 if (fault_type == INTR_REMAP)
1212 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1213 "fault index %llx\n"
1214 "INTR-REMAP:[fault reason %02d] %s\n",
1215 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1216 PCI_FUNC(source_id & 0xFF), addr >> 48,
1217 fault_reason, reason);
1218 else
1219 printk(KERN_ERR
1220 "DMAR:[%s] Request device [%02x:%02x.%d] "
1221 "fault addr %llx \n"
1222 "DMAR:[fault reason %02d] %s\n",
1223 (type ? "DMA Read" : "DMA Write"),
1224 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1225 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1226 return 0;
1229 #define PRIMARY_FAULT_REG_LEN (16)
1230 irqreturn_t dmar_fault(int irq, void *dev_id)
1232 struct intel_iommu *iommu = dev_id;
1233 int reg, fault_index;
1234 u32 fault_status;
1235 unsigned long flag;
1237 spin_lock_irqsave(&iommu->register_lock, flag);
1238 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1239 if (fault_status)
1240 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1241 fault_status);
1243 /* TBD: ignore advanced fault log currently */
1244 if (!(fault_status & DMA_FSTS_PPF))
1245 goto clear_rest;
1247 fault_index = dma_fsts_fault_record_index(fault_status);
1248 reg = cap_fault_reg_offset(iommu->cap);
1249 while (1) {
1250 u8 fault_reason;
1251 u16 source_id;
1252 u64 guest_addr;
1253 int type;
1254 u32 data;
1256 /* highest 32 bits */
1257 data = readl(iommu->reg + reg +
1258 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1259 if (!(data & DMA_FRCD_F))
1260 break;
1262 fault_reason = dma_frcd_fault_reason(data);
1263 type = dma_frcd_type(data);
1265 data = readl(iommu->reg + reg +
1266 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1267 source_id = dma_frcd_source_id(data);
1269 guest_addr = dmar_readq(iommu->reg + reg +
1270 fault_index * PRIMARY_FAULT_REG_LEN);
1271 guest_addr = dma_frcd_page_addr(guest_addr);
1272 /* clear the fault */
1273 writel(DMA_FRCD_F, iommu->reg + reg +
1274 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1276 spin_unlock_irqrestore(&iommu->register_lock, flag);
1278 dmar_fault_do_one(iommu, type, fault_reason,
1279 source_id, guest_addr);
1281 fault_index++;
1282 if (fault_index >= cap_num_fault_regs(iommu->cap))
1283 fault_index = 0;
1284 spin_lock_irqsave(&iommu->register_lock, flag);
1286 clear_rest:
1287 /* clear all the other faults */
1288 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1289 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1291 spin_unlock_irqrestore(&iommu->register_lock, flag);
1292 return IRQ_HANDLED;
1295 int dmar_set_interrupt(struct intel_iommu *iommu)
1297 int irq, ret;
1300 * Check if the fault interrupt is already initialized.
1302 if (iommu->irq)
1303 return 0;
1305 irq = create_irq();
1306 if (!irq) {
1307 printk(KERN_ERR "IOMMU: no free vectors\n");
1308 return -EINVAL;
1311 set_irq_data(irq, iommu);
1312 iommu->irq = irq;
1314 ret = arch_setup_dmar_msi(irq);
1315 if (ret) {
1316 set_irq_data(irq, NULL);
1317 iommu->irq = 0;
1318 destroy_irq(irq);
1319 return ret;
1322 ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1323 if (ret)
1324 printk(KERN_ERR "IOMMU: can't request irq\n");
1325 return ret;
1328 int __init enable_drhd_fault_handling(void)
1330 struct dmar_drhd_unit *drhd;
1333 * Enable fault control interrupt.
1335 for_each_drhd_unit(drhd) {
1336 int ret;
1337 struct intel_iommu *iommu = drhd->iommu;
1338 ret = dmar_set_interrupt(iommu);
1340 if (ret) {
1341 printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1342 " interrupt, ret %d\n",
1343 (unsigned long long)drhd->reg_base_addr, ret);
1344 return -1;
1348 return 0;
1352 * Re-enable Queued Invalidation interface.
1354 int dmar_reenable_qi(struct intel_iommu *iommu)
1356 if (!ecap_qis(iommu->ecap))
1357 return -ENOENT;
1359 if (!iommu->qi)
1360 return -ENOENT;
1363 * First disable queued invalidation.
1365 dmar_disable_qi(iommu);
1367 * Then enable queued invalidation again. Since there is no pending
1368 * invalidation requests now, it's safe to re-enable queued
1369 * invalidation.
1371 __dmar_enable_qi(iommu);
1373 return 0;
1377 * Check interrupt remapping support in DMAR table description.
1379 int dmar_ir_support(void)
1381 struct acpi_table_dmar *dmar;
1382 dmar = (struct acpi_table_dmar *)dmar_tbl;
1383 return dmar->flags & 0x1;