2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
26 * These routines are used by both DMA-remapping and Interrupt-remapping
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
36 #define PREFIX "DMAR:"
38 /* No locks are needed as DMA remapping hardware unit
39 * list is constructed at boot time and hotplug of
40 * these units are not supported by the architecture.
42 LIST_HEAD(dmar_drhd_units
);
44 static struct acpi_table_header
* __initdata dmar_tbl
;
45 static acpi_size dmar_tbl_size
;
47 static void __init
dmar_register_drhd_unit(struct dmar_drhd_unit
*drhd
)
50 * add INCLUDE_ALL at the tail, so scan the list will find it at
53 if (drhd
->include_all
)
54 list_add_tail(&drhd
->list
, &dmar_drhd_units
);
56 list_add(&drhd
->list
, &dmar_drhd_units
);
59 static int __init
dmar_parse_one_dev_scope(struct acpi_dmar_device_scope
*scope
,
60 struct pci_dev
**dev
, u16 segment
)
63 struct pci_dev
*pdev
= NULL
;
64 struct acpi_dmar_pci_path
*path
;
67 bus
= pci_find_bus(segment
, scope
->bus
);
68 path
= (struct acpi_dmar_pci_path
*)(scope
+ 1);
69 count
= (scope
->length
- sizeof(struct acpi_dmar_device_scope
))
70 / sizeof(struct acpi_dmar_pci_path
);
76 * Some BIOSes list non-exist devices in DMAR table, just
81 PREFIX
"Device scope bus [%d] not found\n",
85 pdev
= pci_get_slot(bus
, PCI_DEVFN(path
->dev
, path
->fn
));
87 printk(KERN_WARNING PREFIX
88 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
89 segment
, bus
->number
, path
->dev
, path
->fn
);
94 bus
= pdev
->subordinate
;
97 printk(KERN_WARNING PREFIX
98 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
99 segment
, scope
->bus
, path
->dev
, path
->fn
);
103 if ((scope
->entry_type
== ACPI_DMAR_SCOPE_TYPE_ENDPOINT
&& \
104 pdev
->subordinate
) || (scope
->entry_type
== \
105 ACPI_DMAR_SCOPE_TYPE_BRIDGE
&& !pdev
->subordinate
)) {
107 printk(KERN_WARNING PREFIX
108 "Device scope type does not match for %s\n",
116 static int __init
dmar_parse_dev_scope(void *start
, void *end
, int *cnt
,
117 struct pci_dev
***devices
, u16 segment
)
119 struct acpi_dmar_device_scope
*scope
;
125 while (start
< end
) {
127 if (scope
->entry_type
== ACPI_DMAR_SCOPE_TYPE_ENDPOINT
||
128 scope
->entry_type
== ACPI_DMAR_SCOPE_TYPE_BRIDGE
)
131 printk(KERN_WARNING PREFIX
132 "Unsupported device scope\n");
133 start
+= scope
->length
;
138 *devices
= kcalloc(*cnt
, sizeof(struct pci_dev
*), GFP_KERNEL
);
144 while (start
< end
) {
146 if (scope
->entry_type
== ACPI_DMAR_SCOPE_TYPE_ENDPOINT
||
147 scope
->entry_type
== ACPI_DMAR_SCOPE_TYPE_BRIDGE
) {
148 ret
= dmar_parse_one_dev_scope(scope
,
149 &(*devices
)[index
], segment
);
156 start
+= scope
->length
;
163 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
164 * structure which uniquely represent one DMA remapping hardware unit
165 * present in the platform
168 dmar_parse_one_drhd(struct acpi_dmar_header
*header
)
170 struct acpi_dmar_hardware_unit
*drhd
;
171 struct dmar_drhd_unit
*dmaru
;
174 dmaru
= kzalloc(sizeof(*dmaru
), GFP_KERNEL
);
179 drhd
= (struct acpi_dmar_hardware_unit
*)header
;
180 dmaru
->reg_base_addr
= drhd
->address
;
181 dmaru
->include_all
= drhd
->flags
& 0x1; /* BIT0: INCLUDE_ALL */
183 ret
= alloc_iommu(dmaru
);
188 dmar_register_drhd_unit(dmaru
);
192 static int __init
dmar_parse_dev(struct dmar_drhd_unit
*dmaru
)
194 struct acpi_dmar_hardware_unit
*drhd
;
197 drhd
= (struct acpi_dmar_hardware_unit
*) dmaru
->hdr
;
199 if (dmaru
->include_all
)
202 ret
= dmar_parse_dev_scope((void *)(drhd
+ 1),
203 ((void *)drhd
) + drhd
->header
.length
,
204 &dmaru
->devices_cnt
, &dmaru
->devices
,
207 list_del(&dmaru
->list
);
214 LIST_HEAD(dmar_rmrr_units
);
216 static void __init
dmar_register_rmrr_unit(struct dmar_rmrr_unit
*rmrr
)
218 list_add(&rmrr
->list
, &dmar_rmrr_units
);
223 dmar_parse_one_rmrr(struct acpi_dmar_header
*header
)
225 struct acpi_dmar_reserved_memory
*rmrr
;
226 struct dmar_rmrr_unit
*rmrru
;
228 rmrru
= kzalloc(sizeof(*rmrru
), GFP_KERNEL
);
233 rmrr
= (struct acpi_dmar_reserved_memory
*)header
;
234 rmrru
->base_address
= rmrr
->base_address
;
235 rmrru
->end_address
= rmrr
->end_address
;
237 dmar_register_rmrr_unit(rmrru
);
242 rmrr_parse_dev(struct dmar_rmrr_unit
*rmrru
)
244 struct acpi_dmar_reserved_memory
*rmrr
;
247 rmrr
= (struct acpi_dmar_reserved_memory
*) rmrru
->hdr
;
248 ret
= dmar_parse_dev_scope((void *)(rmrr
+ 1),
249 ((void *)rmrr
) + rmrr
->header
.length
,
250 &rmrru
->devices_cnt
, &rmrru
->devices
, rmrr
->segment
);
252 if (ret
|| (rmrru
->devices_cnt
== 0)) {
253 list_del(&rmrru
->list
);
261 dmar_table_print_dmar_entry(struct acpi_dmar_header
*header
)
263 struct acpi_dmar_hardware_unit
*drhd
;
264 struct acpi_dmar_reserved_memory
*rmrr
;
266 switch (header
->type
) {
267 case ACPI_DMAR_TYPE_HARDWARE_UNIT
:
268 drhd
= (struct acpi_dmar_hardware_unit
*)header
;
269 printk (KERN_INFO PREFIX
270 "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
271 drhd
->flags
, (unsigned long long)drhd
->address
);
273 case ACPI_DMAR_TYPE_RESERVED_MEMORY
:
274 rmrr
= (struct acpi_dmar_reserved_memory
*)header
;
276 printk (KERN_INFO PREFIX
277 "RMRR base: 0x%016Lx end: 0x%016Lx\n",
278 (unsigned long long)rmrr
->base_address
,
279 (unsigned long long)rmrr
->end_address
);
285 * dmar_table_detect - checks to see if the platform supports DMAR devices
287 static int __init
dmar_table_detect(void)
289 acpi_status status
= AE_OK
;
291 /* if we could find DMAR table, then there are DMAR devices */
292 status
= acpi_get_table_with_size(ACPI_SIG_DMAR
, 0,
293 (struct acpi_table_header
**)&dmar_tbl
,
296 if (ACPI_SUCCESS(status
) && !dmar_tbl
) {
297 printk (KERN_WARNING PREFIX
"Unable to map DMAR\n");
298 status
= AE_NOT_FOUND
;
301 return (ACPI_SUCCESS(status
) ? 1 : 0);
305 * parse_dmar_table - parses the DMA reporting table
308 parse_dmar_table(void)
310 struct acpi_table_dmar
*dmar
;
311 struct acpi_dmar_header
*entry_header
;
315 * Do it again, earlier dmar_tbl mapping could be mapped with
320 dmar
= (struct acpi_table_dmar
*)dmar_tbl
;
324 if (dmar
->width
< PAGE_SHIFT
- 1) {
325 printk(KERN_WARNING PREFIX
"Invalid DMAR haw\n");
329 printk (KERN_INFO PREFIX
"Host address width %d\n",
332 entry_header
= (struct acpi_dmar_header
*)(dmar
+ 1);
333 while (((unsigned long)entry_header
) <
334 (((unsigned long)dmar
) + dmar_tbl
->length
)) {
335 /* Avoid looping forever on bad ACPI tables */
336 if (entry_header
->length
== 0) {
337 printk(KERN_WARNING PREFIX
338 "Invalid 0-length structure\n");
343 dmar_table_print_dmar_entry(entry_header
);
345 switch (entry_header
->type
) {
346 case ACPI_DMAR_TYPE_HARDWARE_UNIT
:
347 ret
= dmar_parse_one_drhd(entry_header
);
349 case ACPI_DMAR_TYPE_RESERVED_MEMORY
:
351 ret
= dmar_parse_one_rmrr(entry_header
);
355 printk(KERN_WARNING PREFIX
356 "Unknown DMAR structure type\n");
357 ret
= 0; /* for forward compatibility */
363 entry_header
= ((void *)entry_header
+ entry_header
->length
);
368 int dmar_pci_device_match(struct pci_dev
*devices
[], int cnt
,
374 for (index
= 0; index
< cnt
; index
++)
375 if (dev
== devices
[index
])
378 /* Check our parent */
379 dev
= dev
->bus
->self
;
385 struct dmar_drhd_unit
*
386 dmar_find_matched_drhd_unit(struct pci_dev
*dev
)
388 struct dmar_drhd_unit
*dmaru
= NULL
;
389 struct acpi_dmar_hardware_unit
*drhd
;
391 list_for_each_entry(dmaru
, &dmar_drhd_units
, list
) {
392 drhd
= container_of(dmaru
->hdr
,
393 struct acpi_dmar_hardware_unit
,
396 if (dmaru
->include_all
&&
397 drhd
->segment
== pci_domain_nr(dev
->bus
))
400 if (dmar_pci_device_match(dmaru
->devices
,
401 dmaru
->devices_cnt
, dev
))
408 int __init
dmar_dev_scope_init(void)
410 struct dmar_drhd_unit
*drhd
, *drhd_n
;
413 list_for_each_entry_safe(drhd
, drhd_n
, &dmar_drhd_units
, list
) {
414 ret
= dmar_parse_dev(drhd
);
421 struct dmar_rmrr_unit
*rmrr
, *rmrr_n
;
422 list_for_each_entry_safe(rmrr
, rmrr_n
, &dmar_rmrr_units
, list
) {
423 ret
= rmrr_parse_dev(rmrr
);
434 int __init
dmar_table_init(void)
436 static int dmar_table_initialized
;
439 if (dmar_table_initialized
)
442 dmar_table_initialized
= 1;
444 ret
= parse_dmar_table();
447 printk(KERN_INFO PREFIX
"parse DMAR table failure.\n");
451 if (list_empty(&dmar_drhd_units
)) {
452 printk(KERN_INFO PREFIX
"No DMAR devices found\n");
457 if (list_empty(&dmar_rmrr_units
))
458 printk(KERN_INFO PREFIX
"No RMRR found\n");
461 #ifdef CONFIG_INTR_REMAP
462 parse_ioapics_under_ir();
467 void __init
detect_intel_iommu(void)
471 ret
= dmar_table_detect();
474 #ifdef CONFIG_INTR_REMAP
475 struct acpi_table_dmar
*dmar
;
477 * for now we will disable dma-remapping when interrupt
478 * remapping is enabled.
479 * When support for queued invalidation for IOTLB invalidation
480 * is added, we will not need this any more.
482 dmar
= (struct acpi_table_dmar
*) dmar_tbl
;
483 if (ret
&& cpu_has_x2apic
&& dmar
->flags
& 0x1)
485 "Queued invalidation will be enabled to support "
486 "x2apic and Intr-remapping.\n");
489 if (ret
&& !no_iommu
&& !iommu_detected
&& !swiotlb
&&
494 early_acpi_os_unmap_memory(dmar_tbl
, dmar_tbl_size
);
499 int alloc_iommu(struct dmar_drhd_unit
*drhd
)
501 struct intel_iommu
*iommu
;
504 static int iommu_allocated
= 0;
507 iommu
= kzalloc(sizeof(*iommu
), GFP_KERNEL
);
511 iommu
->seq_id
= iommu_allocated
++;
513 iommu
->reg
= ioremap(drhd
->reg_base_addr
, VTD_PAGE_SIZE
);
515 printk(KERN_ERR
"IOMMU: can't map the region\n");
518 iommu
->cap
= dmar_readq(iommu
->reg
+ DMAR_CAP_REG
);
519 iommu
->ecap
= dmar_readq(iommu
->reg
+ DMAR_ECAP_REG
);
522 agaw
= iommu_calculate_agaw(iommu
);
525 "Cannot get a valid agaw for iommu (seq_id = %d)\n",
532 /* the registers might be more than one page */
533 map_size
= max_t(int, ecap_max_iotlb_offset(iommu
->ecap
),
534 cap_max_fault_reg_offset(iommu
->cap
));
535 map_size
= VTD_PAGE_ALIGN(map_size
);
536 if (map_size
> VTD_PAGE_SIZE
) {
538 iommu
->reg
= ioremap(drhd
->reg_base_addr
, map_size
);
540 printk(KERN_ERR
"IOMMU: can't map the region\n");
545 ver
= readl(iommu
->reg
+ DMAR_VER_REG
);
546 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
547 (unsigned long long)drhd
->reg_base_addr
,
548 DMAR_VER_MAJOR(ver
), DMAR_VER_MINOR(ver
),
549 (unsigned long long)iommu
->cap
,
550 (unsigned long long)iommu
->ecap
);
552 spin_lock_init(&iommu
->register_lock
);
561 void free_iommu(struct intel_iommu
*iommu
)
567 free_dmar_iommu(iommu
);
576 * Reclaim all the submitted descriptors which have completed its work.
578 static inline void reclaim_free_desc(struct q_inval
*qi
)
580 while (qi
->desc_status
[qi
->free_tail
] == QI_DONE
) {
581 qi
->desc_status
[qi
->free_tail
] = QI_FREE
;
582 qi
->free_tail
= (qi
->free_tail
+ 1) % QI_LENGTH
;
587 static int qi_check_fault(struct intel_iommu
*iommu
, int index
)
591 struct q_inval
*qi
= iommu
->qi
;
592 int wait_index
= (index
+ 1) % QI_LENGTH
;
594 fault
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
597 * If IQE happens, the head points to the descriptor associated
598 * with the error. No new descriptors are fetched until the IQE
601 if (fault
& DMA_FSTS_IQE
) {
602 head
= readl(iommu
->reg
+ DMAR_IQH_REG
);
603 if ((head
>> 4) == index
) {
604 memcpy(&qi
->desc
[index
], &qi
->desc
[wait_index
],
605 sizeof(struct qi_desc
));
606 __iommu_flush_cache(iommu
, &qi
->desc
[index
],
607 sizeof(struct qi_desc
));
608 writel(DMA_FSTS_IQE
, iommu
->reg
+ DMAR_FSTS_REG
);
617 * Submit the queued invalidation descriptor to the remapping
618 * hardware unit and wait for its completion.
620 int qi_submit_sync(struct qi_desc
*desc
, struct intel_iommu
*iommu
)
623 struct q_inval
*qi
= iommu
->qi
;
624 struct qi_desc
*hw
, wait_desc
;
625 int wait_index
, index
;
633 spin_lock_irqsave(&qi
->q_lock
, flags
);
634 while (qi
->free_cnt
< 3) {
635 spin_unlock_irqrestore(&qi
->q_lock
, flags
);
637 spin_lock_irqsave(&qi
->q_lock
, flags
);
640 index
= qi
->free_head
;
641 wait_index
= (index
+ 1) % QI_LENGTH
;
643 qi
->desc_status
[index
] = qi
->desc_status
[wait_index
] = QI_IN_USE
;
647 wait_desc
.low
= QI_IWD_STATUS_DATA(QI_DONE
) |
648 QI_IWD_STATUS_WRITE
| QI_IWD_TYPE
;
649 wait_desc
.high
= virt_to_phys(&qi
->desc_status
[wait_index
]);
651 hw
[wait_index
] = wait_desc
;
653 __iommu_flush_cache(iommu
, &hw
[index
], sizeof(struct qi_desc
));
654 __iommu_flush_cache(iommu
, &hw
[wait_index
], sizeof(struct qi_desc
));
656 qi
->free_head
= (qi
->free_head
+ 2) % QI_LENGTH
;
660 * update the HW tail register indicating the presence of
663 writel(qi
->free_head
<< 4, iommu
->reg
+ DMAR_IQT_REG
);
665 while (qi
->desc_status
[wait_index
] != QI_DONE
) {
667 * We will leave the interrupts disabled, to prevent interrupt
668 * context to queue another cmd while a cmd is already submitted
669 * and waiting for completion on this cpu. This is to avoid
670 * a deadlock where the interrupt context can wait indefinitely
671 * for free slots in the queue.
673 rc
= qi_check_fault(iommu
, index
);
677 spin_unlock(&qi
->q_lock
);
679 spin_lock(&qi
->q_lock
);
682 qi
->desc_status
[index
] = qi
->desc_status
[wait_index
] = QI_DONE
;
684 reclaim_free_desc(qi
);
685 spin_unlock_irqrestore(&qi
->q_lock
, flags
);
691 * Flush the global interrupt entry cache.
693 void qi_global_iec(struct intel_iommu
*iommu
)
697 desc
.low
= QI_IEC_TYPE
;
700 /* should never fail */
701 qi_submit_sync(&desc
, iommu
);
704 int qi_flush_context(struct intel_iommu
*iommu
, u16 did
, u16 sid
, u8 fm
,
705 u64 type
, int non_present_entry_flush
)
709 if (non_present_entry_flush
) {
710 if (!cap_caching_mode(iommu
->cap
))
716 desc
.low
= QI_CC_FM(fm
) | QI_CC_SID(sid
) | QI_CC_DID(did
)
717 | QI_CC_GRAN(type
) | QI_CC_TYPE
;
720 return qi_submit_sync(&desc
, iommu
);
723 int qi_flush_iotlb(struct intel_iommu
*iommu
, u16 did
, u64 addr
,
724 unsigned int size_order
, u64 type
,
725 int non_present_entry_flush
)
732 if (non_present_entry_flush
) {
733 if (!cap_caching_mode(iommu
->cap
))
739 if (cap_write_drain(iommu
->cap
))
742 if (cap_read_drain(iommu
->cap
))
745 desc
.low
= QI_IOTLB_DID(did
) | QI_IOTLB_DR(dr
) | QI_IOTLB_DW(dw
)
746 | QI_IOTLB_GRAN(type
) | QI_IOTLB_TYPE
;
747 desc
.high
= QI_IOTLB_ADDR(addr
) | QI_IOTLB_IH(ih
)
748 | QI_IOTLB_AM(size_order
);
750 return qi_submit_sync(&desc
, iommu
);
754 * Enable Queued Invalidation interface. This is a must to support
755 * interrupt-remapping. Also used by DMA-remapping, which replaces
756 * register based IOTLB invalidation.
758 int dmar_enable_qi(struct intel_iommu
*iommu
)
764 if (!ecap_qis(iommu
->ecap
))
768 * queued invalidation is already setup and enabled.
773 iommu
->qi
= kmalloc(sizeof(*qi
), GFP_KERNEL
);
779 qi
->desc
= (void *)(get_zeroed_page(GFP_KERNEL
));
786 qi
->desc_status
= kmalloc(QI_LENGTH
* sizeof(int), GFP_KERNEL
);
787 if (!qi
->desc_status
) {
788 free_page((unsigned long) qi
->desc
);
794 qi
->free_head
= qi
->free_tail
= 0;
795 qi
->free_cnt
= QI_LENGTH
;
797 spin_lock_init(&qi
->q_lock
);
799 spin_lock_irqsave(&iommu
->register_lock
, flags
);
800 /* write zero to the tail reg */
801 writel(0, iommu
->reg
+ DMAR_IQT_REG
);
803 dmar_writeq(iommu
->reg
+ DMAR_IQA_REG
, virt_to_phys(qi
->desc
));
805 cmd
= iommu
->gcmd
| DMA_GCMD_QIE
;
806 iommu
->gcmd
|= DMA_GCMD_QIE
;
807 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
809 /* Make sure hardware complete it */
810 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
, readl
, (sts
& DMA_GSTS_QIES
), sts
);
811 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);