2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <asm/cacheflush.h>
43 #include <asm/iommu.h>
46 #define ROOT_SIZE VTD_PAGE_SIZE
47 #define CONTEXT_SIZE VTD_PAGE_SIZE
49 #define IS_BRIDGE_HOST_DEVICE(pdev) \
50 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
51 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
53 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55 #define IOAPIC_RANGE_START (0xfee00000)
56 #define IOAPIC_RANGE_END (0xfeefffff)
57 #define IOVA_START_ADDR (0x1000)
59 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61 #define MAX_AGAW_WIDTH 64
63 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
69 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
73 #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
74 #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
76 /* page table handling */
77 #define LEVEL_STRIDE (9)
78 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
80 static inline int agaw_to_level(int agaw
)
85 static inline int agaw_to_width(int agaw
)
87 return 30 + agaw
* LEVEL_STRIDE
;
90 static inline int width_to_agaw(int width
)
92 return (width
- 30) / LEVEL_STRIDE
;
95 static inline unsigned int level_to_offset_bits(int level
)
97 return (level
- 1) * LEVEL_STRIDE
;
100 static inline int pfn_level_offset(unsigned long pfn
, int level
)
102 return (pfn
>> level_to_offset_bits(level
)) & LEVEL_MASK
;
105 static inline unsigned long level_mask(int level
)
107 return -1UL << level_to_offset_bits(level
);
110 static inline unsigned long level_size(int level
)
112 return 1UL << level_to_offset_bits(level
);
115 static inline unsigned long align_to_level(unsigned long pfn
, int level
)
117 return (pfn
+ level_size(level
) - 1) & level_mask(level
);
120 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
121 are never going to work. */
122 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn
)
124 return dma_pfn
>> (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
127 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn
)
129 return mm_pfn
<< (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
131 static inline unsigned long page_to_dma_pfn(struct page
*pg
)
133 return mm_to_dma_pfn(page_to_pfn(pg
));
135 static inline unsigned long virt_to_dma_pfn(void *p
)
137 return page_to_dma_pfn(virt_to_page(p
));
140 /* global iommu list, set NULL for ignored DMAR units */
141 static struct intel_iommu
**g_iommus
;
143 static void __init
check_tylersburg_isoch(void);
144 static int rwbf_quirk
;
149 * 12-63: Context Ptr (12 - (haw-1))
156 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
157 static inline bool root_present(struct root_entry
*root
)
159 return (root
->val
& 1);
161 static inline void set_root_present(struct root_entry
*root
)
165 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
167 root
->val
|= value
& VTD_PAGE_MASK
;
170 static inline struct context_entry
*
171 get_context_addr_from_root(struct root_entry
*root
)
173 return (struct context_entry
*)
174 (root_present(root
)?phys_to_virt(
175 root
->val
& VTD_PAGE_MASK
) :
182 * 1: fault processing disable
183 * 2-3: translation type
184 * 12-63: address space root
190 struct context_entry
{
195 static inline bool context_present(struct context_entry
*context
)
197 return (context
->lo
& 1);
199 static inline void context_set_present(struct context_entry
*context
)
204 static inline void context_set_fault_enable(struct context_entry
*context
)
206 context
->lo
&= (((u64
)-1) << 2) | 1;
209 static inline void context_set_translation_type(struct context_entry
*context
,
212 context
->lo
&= (((u64
)-1) << 4) | 3;
213 context
->lo
|= (value
& 3) << 2;
216 static inline void context_set_address_root(struct context_entry
*context
,
219 context
->lo
|= value
& VTD_PAGE_MASK
;
222 static inline void context_set_address_width(struct context_entry
*context
,
225 context
->hi
|= value
& 7;
228 static inline void context_set_domain_id(struct context_entry
*context
,
231 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
234 static inline void context_clear_entry(struct context_entry
*context
)
247 * 12-63: Host physcial address
253 static inline void dma_clear_pte(struct dma_pte
*pte
)
258 static inline void dma_set_pte_readable(struct dma_pte
*pte
)
260 pte
->val
|= DMA_PTE_READ
;
263 static inline void dma_set_pte_writable(struct dma_pte
*pte
)
265 pte
->val
|= DMA_PTE_WRITE
;
268 static inline void dma_set_pte_snp(struct dma_pte
*pte
)
270 pte
->val
|= DMA_PTE_SNP
;
273 static inline void dma_set_pte_prot(struct dma_pte
*pte
, unsigned long prot
)
275 pte
->val
= (pte
->val
& ~3) | (prot
& 3);
278 static inline u64
dma_pte_addr(struct dma_pte
*pte
)
281 return pte
->val
& VTD_PAGE_MASK
;
283 /* Must have a full atomic 64-bit read */
284 return __cmpxchg64(&pte
->val
, 0ULL, 0ULL) & VTD_PAGE_MASK
;
288 static inline void dma_set_pte_pfn(struct dma_pte
*pte
, unsigned long pfn
)
290 pte
->val
|= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
293 static inline bool dma_pte_present(struct dma_pte
*pte
)
295 return (pte
->val
& 3) != 0;
298 static inline int first_pte_in_page(struct dma_pte
*pte
)
300 return !((unsigned long)pte
& ~VTD_PAGE_MASK
);
304 * This domain is a statically identity mapping domain.
305 * 1. This domain creats a static 1:1 mapping to all usable memory.
306 * 2. It maps to each iommu if successful.
307 * 3. Each iommu mapps to this domain if successful.
309 static struct dmar_domain
*si_domain
;
310 static int hw_pass_through
= 1;
312 /* devices under the same p2p bridge are owned in one domain */
313 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
315 /* domain represents a virtual machine, more than one devices
316 * across iommus may be owned in one domain, e.g. kvm guest.
318 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
320 /* si_domain contains mulitple devices */
321 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
324 int id
; /* domain id */
325 int nid
; /* node id */
326 unsigned long iommu_bmp
; /* bitmap of iommus this domain uses*/
328 struct list_head devices
; /* all devices' list */
329 struct iova_domain iovad
; /* iova's that belong to this domain */
331 struct dma_pte
*pgd
; /* virtual address */
332 int gaw
; /* max guest address width */
334 /* adjusted guest address width, 0 is level 2 30-bit */
337 int flags
; /* flags to find out type of domain */
339 int iommu_coherency
;/* indicate coherency of iommu access */
340 int iommu_snooping
; /* indicate snooping control feature*/
341 int iommu_count
; /* reference count of iommu */
342 spinlock_t iommu_lock
; /* protect iommu set in domain */
343 u64 max_addr
; /* maximum mapped address */
346 /* PCI domain-device relationship */
347 struct device_domain_info
{
348 struct list_head link
; /* link to domain siblings */
349 struct list_head global
; /* link to global list */
350 int segment
; /* PCI domain */
351 u8 bus
; /* PCI bus number */
352 u8 devfn
; /* PCI devfn number */
353 struct pci_dev
*dev
; /* it's NULL for PCIe-to-PCI bridge */
354 struct intel_iommu
*iommu
; /* IOMMU used by this device */
355 struct dmar_domain
*domain
; /* pointer to domain */
358 static void flush_unmaps_timeout(unsigned long data
);
360 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
362 #define HIGH_WATER_MARK 250
363 struct deferred_flush_tables
{
365 struct iova
*iova
[HIGH_WATER_MARK
];
366 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
369 static struct deferred_flush_tables
*deferred_flush
;
371 /* bitmap for indexing intel_iommus */
372 static int g_num_of_iommus
;
374 static DEFINE_SPINLOCK(async_umap_flush_lock
);
375 static LIST_HEAD(unmaps_to_do
);
378 static long list_size
;
380 static void domain_remove_dev_info(struct dmar_domain
*domain
);
382 #ifdef CONFIG_DMAR_DEFAULT_ON
383 int dmar_disabled
= 0;
385 int dmar_disabled
= 1;
386 #endif /*CONFIG_DMAR_DEFAULT_ON*/
388 static int dmar_map_gfx
= 1;
389 static int dmar_forcedac
;
390 static int intel_iommu_strict
;
392 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
393 static DEFINE_SPINLOCK(device_domain_lock
);
394 static LIST_HEAD(device_domain_list
);
396 static struct iommu_ops intel_iommu_ops
;
398 static int __init
intel_iommu_setup(char *str
)
403 if (!strncmp(str
, "on", 2)) {
405 printk(KERN_INFO
"Intel-IOMMU: enabled\n");
406 } else if (!strncmp(str
, "off", 3)) {
408 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
409 } else if (!strncmp(str
, "igfx_off", 8)) {
412 "Intel-IOMMU: disable GFX device mapping\n");
413 } else if (!strncmp(str
, "forcedac", 8)) {
415 "Intel-IOMMU: Forcing DAC for PCI devices\n");
417 } else if (!strncmp(str
, "strict", 6)) {
419 "Intel-IOMMU: disable batched IOTLB flush\n");
420 intel_iommu_strict
= 1;
423 str
+= strcspn(str
, ",");
429 __setup("intel_iommu=", intel_iommu_setup
);
431 static struct kmem_cache
*iommu_domain_cache
;
432 static struct kmem_cache
*iommu_devinfo_cache
;
433 static struct kmem_cache
*iommu_iova_cache
;
435 static inline void *alloc_pgtable_page(int node
)
440 page
= alloc_pages_node(node
, GFP_ATOMIC
| __GFP_ZERO
, 0);
442 vaddr
= page_address(page
);
446 static inline void free_pgtable_page(void *vaddr
)
448 free_page((unsigned long)vaddr
);
451 static inline void *alloc_domain_mem(void)
453 return kmem_cache_alloc(iommu_domain_cache
, GFP_ATOMIC
);
456 static void free_domain_mem(void *vaddr
)
458 kmem_cache_free(iommu_domain_cache
, vaddr
);
461 static inline void * alloc_devinfo_mem(void)
463 return kmem_cache_alloc(iommu_devinfo_cache
, GFP_ATOMIC
);
466 static inline void free_devinfo_mem(void *vaddr
)
468 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
471 struct iova
*alloc_iova_mem(void)
473 return kmem_cache_alloc(iommu_iova_cache
, GFP_ATOMIC
);
476 void free_iova_mem(struct iova
*iova
)
478 kmem_cache_free(iommu_iova_cache
, iova
);
482 static int __iommu_calculate_agaw(struct intel_iommu
*iommu
, int max_gaw
)
487 sagaw
= cap_sagaw(iommu
->cap
);
488 for (agaw
= width_to_agaw(max_gaw
);
490 if (test_bit(agaw
, &sagaw
))
498 * Calculate max SAGAW for each iommu.
500 int iommu_calculate_max_sagaw(struct intel_iommu
*iommu
)
502 return __iommu_calculate_agaw(iommu
, MAX_AGAW_WIDTH
);
506 * calculate agaw for each iommu.
507 * "SAGAW" may be different across iommus, use a default agaw, and
508 * get a supported less agaw for iommus that don't support the default agaw.
510 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
512 return __iommu_calculate_agaw(iommu
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
515 /* This functionin only returns single iommu in a domain */
516 static struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
520 /* si_domain and vm domain should not get here. */
521 BUG_ON(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
);
522 BUG_ON(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
);
524 iommu_id
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
525 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
528 return g_iommus
[iommu_id
];
531 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
535 domain
->iommu_coherency
= 1;
537 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
538 if (!ecap_coherent(g_iommus
[i
]->ecap
)) {
539 domain
->iommu_coherency
= 0;
545 static void domain_update_iommu_snooping(struct dmar_domain
*domain
)
549 domain
->iommu_snooping
= 1;
551 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
552 if (!ecap_sc_support(g_iommus
[i
]->ecap
)) {
553 domain
->iommu_snooping
= 0;
559 /* Some capabilities may be different across iommus */
560 static void domain_update_iommu_cap(struct dmar_domain
*domain
)
562 domain_update_iommu_coherency(domain
);
563 domain_update_iommu_snooping(domain
);
566 static struct intel_iommu
*device_to_iommu(int segment
, u8 bus
, u8 devfn
)
568 struct dmar_drhd_unit
*drhd
= NULL
;
571 for_each_drhd_unit(drhd
) {
574 if (segment
!= drhd
->segment
)
577 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
578 if (drhd
->devices
[i
] &&
579 drhd
->devices
[i
]->bus
->number
== bus
&&
580 drhd
->devices
[i
]->devfn
== devfn
)
582 if (drhd
->devices
[i
] &&
583 drhd
->devices
[i
]->subordinate
&&
584 drhd
->devices
[i
]->subordinate
->number
<= bus
&&
585 drhd
->devices
[i
]->subordinate
->subordinate
>= bus
)
589 if (drhd
->include_all
)
596 static void domain_flush_cache(struct dmar_domain
*domain
,
597 void *addr
, int size
)
599 if (!domain
->iommu_coherency
)
600 clflush_cache_range(addr
, size
);
603 /* Gets context entry for a given bus and devfn */
604 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
607 struct root_entry
*root
;
608 struct context_entry
*context
;
609 unsigned long phy_addr
;
612 spin_lock_irqsave(&iommu
->lock
, flags
);
613 root
= &iommu
->root_entry
[bus
];
614 context
= get_context_addr_from_root(root
);
616 context
= (struct context_entry
*)
617 alloc_pgtable_page(iommu
->node
);
619 spin_unlock_irqrestore(&iommu
->lock
, flags
);
622 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
623 phy_addr
= virt_to_phys((void *)context
);
624 set_root_value(root
, phy_addr
);
625 set_root_present(root
);
626 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
628 spin_unlock_irqrestore(&iommu
->lock
, flags
);
629 return &context
[devfn
];
632 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
634 struct root_entry
*root
;
635 struct context_entry
*context
;
639 spin_lock_irqsave(&iommu
->lock
, flags
);
640 root
= &iommu
->root_entry
[bus
];
641 context
= get_context_addr_from_root(root
);
646 ret
= context_present(&context
[devfn
]);
648 spin_unlock_irqrestore(&iommu
->lock
, flags
);
652 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
654 struct root_entry
*root
;
655 struct context_entry
*context
;
658 spin_lock_irqsave(&iommu
->lock
, flags
);
659 root
= &iommu
->root_entry
[bus
];
660 context
= get_context_addr_from_root(root
);
662 context_clear_entry(&context
[devfn
]);
663 __iommu_flush_cache(iommu
, &context
[devfn
], \
666 spin_unlock_irqrestore(&iommu
->lock
, flags
);
669 static void free_context_table(struct intel_iommu
*iommu
)
671 struct root_entry
*root
;
674 struct context_entry
*context
;
676 spin_lock_irqsave(&iommu
->lock
, flags
);
677 if (!iommu
->root_entry
) {
680 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
681 root
= &iommu
->root_entry
[i
];
682 context
= get_context_addr_from_root(root
);
684 free_pgtable_page(context
);
686 free_pgtable_page(iommu
->root_entry
);
687 iommu
->root_entry
= NULL
;
689 spin_unlock_irqrestore(&iommu
->lock
, flags
);
692 static struct dma_pte
*pfn_to_dma_pte(struct dmar_domain
*domain
,
695 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
696 struct dma_pte
*parent
, *pte
= NULL
;
697 int level
= agaw_to_level(domain
->agaw
);
700 BUG_ON(!domain
->pgd
);
701 BUG_ON(addr_width
< BITS_PER_LONG
&& pfn
>> addr_width
);
702 parent
= domain
->pgd
;
707 offset
= pfn_level_offset(pfn
, level
);
708 pte
= &parent
[offset
];
712 if (!dma_pte_present(pte
)) {
715 tmp_page
= alloc_pgtable_page(domain
->nid
);
720 domain_flush_cache(domain
, tmp_page
, VTD_PAGE_SIZE
);
721 pteval
= ((uint64_t)virt_to_dma_pfn(tmp_page
) << VTD_PAGE_SHIFT
) | DMA_PTE_READ
| DMA_PTE_WRITE
;
722 if (cmpxchg64(&pte
->val
, 0ULL, pteval
)) {
723 /* Someone else set it while we were thinking; use theirs. */
724 free_pgtable_page(tmp_page
);
727 domain_flush_cache(domain
, pte
, sizeof(*pte
));
730 parent
= phys_to_virt(dma_pte_addr(pte
));
737 /* return address's pte at specific level */
738 static struct dma_pte
*dma_pfn_level_pte(struct dmar_domain
*domain
,
742 struct dma_pte
*parent
, *pte
= NULL
;
743 int total
= agaw_to_level(domain
->agaw
);
746 parent
= domain
->pgd
;
747 while (level
<= total
) {
748 offset
= pfn_level_offset(pfn
, total
);
749 pte
= &parent
[offset
];
753 if (!dma_pte_present(pte
))
755 parent
= phys_to_virt(dma_pte_addr(pte
));
761 /* clear last level pte, a tlb flush should be followed */
762 static void dma_pte_clear_range(struct dmar_domain
*domain
,
763 unsigned long start_pfn
,
764 unsigned long last_pfn
)
766 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
767 struct dma_pte
*first_pte
, *pte
;
769 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
770 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
771 BUG_ON(start_pfn
> last_pfn
);
773 /* we don't need lock here; nobody else touches the iova range */
775 first_pte
= pte
= dma_pfn_level_pte(domain
, start_pfn
, 1);
777 start_pfn
= align_to_level(start_pfn
+ 1, 2);
784 } while (start_pfn
<= last_pfn
&& !first_pte_in_page(pte
));
786 domain_flush_cache(domain
, first_pte
,
787 (void *)pte
- (void *)first_pte
);
789 } while (start_pfn
&& start_pfn
<= last_pfn
);
792 /* free page table pages. last level pte should already be cleared */
793 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
794 unsigned long start_pfn
,
795 unsigned long last_pfn
)
797 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
798 struct dma_pte
*first_pte
, *pte
;
799 int total
= agaw_to_level(domain
->agaw
);
803 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
804 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
805 BUG_ON(start_pfn
> last_pfn
);
807 /* We don't need lock here; nobody else touches the iova range */
809 while (level
<= total
) {
810 tmp
= align_to_level(start_pfn
, level
);
812 /* If we can't even clear one PTE at this level, we're done */
813 if (tmp
+ level_size(level
) - 1 > last_pfn
)
817 first_pte
= pte
= dma_pfn_level_pte(domain
, tmp
, level
);
819 tmp
= align_to_level(tmp
+ 1, level
+ 1);
823 if (dma_pte_present(pte
)) {
824 free_pgtable_page(phys_to_virt(dma_pte_addr(pte
)));
828 tmp
+= level_size(level
);
829 } while (!first_pte_in_page(pte
) &&
830 tmp
+ level_size(level
) - 1 <= last_pfn
);
832 domain_flush_cache(domain
, first_pte
,
833 (void *)pte
- (void *)first_pte
);
835 } while (tmp
&& tmp
+ level_size(level
) - 1 <= last_pfn
);
839 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
840 free_pgtable_page(domain
->pgd
);
846 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
848 struct root_entry
*root
;
851 root
= (struct root_entry
*)alloc_pgtable_page(iommu
->node
);
855 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
857 spin_lock_irqsave(&iommu
->lock
, flags
);
858 iommu
->root_entry
= root
;
859 spin_unlock_irqrestore(&iommu
->lock
, flags
);
864 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
870 addr
= iommu
->root_entry
;
872 spin_lock_irqsave(&iommu
->register_lock
, flag
);
873 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
875 writel(iommu
->gcmd
| DMA_GCMD_SRTP
, iommu
->reg
+ DMAR_GCMD_REG
);
877 /* Make sure hardware complete it */
878 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
879 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
881 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
884 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
889 if (!rwbf_quirk
&& !cap_rwbf(iommu
->cap
))
892 spin_lock_irqsave(&iommu
->register_lock
, flag
);
893 writel(iommu
->gcmd
| DMA_GCMD_WBF
, iommu
->reg
+ DMAR_GCMD_REG
);
895 /* Make sure hardware complete it */
896 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
897 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
899 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
902 /* return value determine if we need a write buffer flush */
903 static void __iommu_flush_context(struct intel_iommu
*iommu
,
904 u16 did
, u16 source_id
, u8 function_mask
,
911 case DMA_CCMD_GLOBAL_INVL
:
912 val
= DMA_CCMD_GLOBAL_INVL
;
914 case DMA_CCMD_DOMAIN_INVL
:
915 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
917 case DMA_CCMD_DEVICE_INVL
:
918 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
919 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
926 spin_lock_irqsave(&iommu
->register_lock
, flag
);
927 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
929 /* Make sure hardware complete it */
930 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
931 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
933 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
936 /* return value determine if we need a write buffer flush */
937 static void __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
938 u64 addr
, unsigned int size_order
, u64 type
)
940 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
941 u64 val
= 0, val_iva
= 0;
945 case DMA_TLB_GLOBAL_FLUSH
:
946 /* global flush doesn't need set IVA_REG */
947 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
949 case DMA_TLB_DSI_FLUSH
:
950 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
952 case DMA_TLB_PSI_FLUSH
:
953 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
954 /* Note: always flush non-leaf currently */
955 val_iva
= size_order
| addr
;
960 /* Note: set drain read/write */
963 * This is probably to be super secure.. Looks like we can
964 * ignore it without any impact.
966 if (cap_read_drain(iommu
->cap
))
967 val
|= DMA_TLB_READ_DRAIN
;
969 if (cap_write_drain(iommu
->cap
))
970 val
|= DMA_TLB_WRITE_DRAIN
;
972 spin_lock_irqsave(&iommu
->register_lock
, flag
);
973 /* Note: Only uses first TLB reg currently */
975 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
976 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
978 /* Make sure hardware complete it */
979 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
980 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
982 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
984 /* check IOTLB invalidation granularity */
985 if (DMA_TLB_IAIG(val
) == 0)
986 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
987 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
988 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
989 (unsigned long long)DMA_TLB_IIRG(type
),
990 (unsigned long long)DMA_TLB_IAIG(val
));
993 static struct device_domain_info
*iommu_support_dev_iotlb(
994 struct dmar_domain
*domain
, int segment
, u8 bus
, u8 devfn
)
998 struct device_domain_info
*info
;
999 struct intel_iommu
*iommu
= device_to_iommu(segment
, bus
, devfn
);
1001 if (!ecap_dev_iotlb_support(iommu
->ecap
))
1007 spin_lock_irqsave(&device_domain_lock
, flags
);
1008 list_for_each_entry(info
, &domain
->devices
, link
)
1009 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1013 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1015 if (!found
|| !info
->dev
)
1018 if (!pci_find_ext_capability(info
->dev
, PCI_EXT_CAP_ID_ATS
))
1021 if (!dmar_find_matched_atsr_unit(info
->dev
))
1024 info
->iommu
= iommu
;
1029 static void iommu_enable_dev_iotlb(struct device_domain_info
*info
)
1034 pci_enable_ats(info
->dev
, VTD_PAGE_SHIFT
);
1037 static void iommu_disable_dev_iotlb(struct device_domain_info
*info
)
1039 if (!info
->dev
|| !pci_ats_enabled(info
->dev
))
1042 pci_disable_ats(info
->dev
);
1045 static void iommu_flush_dev_iotlb(struct dmar_domain
*domain
,
1046 u64 addr
, unsigned mask
)
1049 unsigned long flags
;
1050 struct device_domain_info
*info
;
1052 spin_lock_irqsave(&device_domain_lock
, flags
);
1053 list_for_each_entry(info
, &domain
->devices
, link
) {
1054 if (!info
->dev
|| !pci_ats_enabled(info
->dev
))
1057 sid
= info
->bus
<< 8 | info
->devfn
;
1058 qdep
= pci_ats_queue_depth(info
->dev
);
1059 qi_flush_dev_iotlb(info
->iommu
, sid
, qdep
, addr
, mask
);
1061 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1064 static void iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
1065 unsigned long pfn
, unsigned int pages
, int map
)
1067 unsigned int mask
= ilog2(__roundup_pow_of_two(pages
));
1068 uint64_t addr
= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
1073 * Fallback to domain selective flush if no PSI support or the size is
1075 * PSI requires page size to be 2 ^ x, and the base address is naturally
1076 * aligned to the size
1078 if (!cap_pgsel_inv(iommu
->cap
) || mask
> cap_max_amask_val(iommu
->cap
))
1079 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1082 iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
1086 * In caching mode, changes of pages from non-present to present require
1087 * flush. However, device IOTLB doesn't need to be flushed in this case.
1089 if (!cap_caching_mode(iommu
->cap
) || !map
)
1090 iommu_flush_dev_iotlb(iommu
->domains
[did
], addr
, mask
);
1093 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
1096 unsigned long flags
;
1098 spin_lock_irqsave(&iommu
->register_lock
, flags
);
1099 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
1100 pmen
&= ~DMA_PMEN_EPM
;
1101 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
1103 /* wait for the protected region status bit to clear */
1104 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
1105 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
1107 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1110 static int iommu_enable_translation(struct intel_iommu
*iommu
)
1113 unsigned long flags
;
1115 spin_lock_irqsave(&iommu
->register_lock
, flags
);
1116 iommu
->gcmd
|= DMA_GCMD_TE
;
1117 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1119 /* Make sure hardware complete it */
1120 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1121 readl
, (sts
& DMA_GSTS_TES
), sts
);
1123 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1127 static int iommu_disable_translation(struct intel_iommu
*iommu
)
1132 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1133 iommu
->gcmd
&= ~DMA_GCMD_TE
;
1134 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1136 /* Make sure hardware complete it */
1137 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1138 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
1140 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1145 static int iommu_init_domains(struct intel_iommu
*iommu
)
1147 unsigned long ndomains
;
1148 unsigned long nlongs
;
1150 ndomains
= cap_ndoms(iommu
->cap
);
1151 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu
->seq_id
,
1153 nlongs
= BITS_TO_LONGS(ndomains
);
1155 spin_lock_init(&iommu
->lock
);
1157 /* TBD: there might be 64K domains,
1158 * consider other allocation for future chip
1160 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1161 if (!iommu
->domain_ids
) {
1162 printk(KERN_ERR
"Allocating domain id array failed\n");
1165 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1167 if (!iommu
->domains
) {
1168 printk(KERN_ERR
"Allocating domain array failed\n");
1173 * if Caching mode is set, then invalid translations are tagged
1174 * with domainid 0. Hence we need to pre-allocate it.
1176 if (cap_caching_mode(iommu
->cap
))
1177 set_bit(0, iommu
->domain_ids
);
1182 static void domain_exit(struct dmar_domain
*domain
);
1183 static void vm_domain_exit(struct dmar_domain
*domain
);
1185 void free_dmar_iommu(struct intel_iommu
*iommu
)
1187 struct dmar_domain
*domain
;
1189 unsigned long flags
;
1191 if ((iommu
->domains
) && (iommu
->domain_ids
)) {
1192 for_each_set_bit(i
, iommu
->domain_ids
, cap_ndoms(iommu
->cap
)) {
1193 domain
= iommu
->domains
[i
];
1194 clear_bit(i
, iommu
->domain_ids
);
1196 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1197 if (--domain
->iommu_count
== 0) {
1198 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
)
1199 vm_domain_exit(domain
);
1201 domain_exit(domain
);
1203 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1207 if (iommu
->gcmd
& DMA_GCMD_TE
)
1208 iommu_disable_translation(iommu
);
1211 irq_set_handler_data(iommu
->irq
, NULL
);
1212 /* This will mask the irq */
1213 free_irq(iommu
->irq
, iommu
);
1214 destroy_irq(iommu
->irq
);
1217 kfree(iommu
->domains
);
1218 kfree(iommu
->domain_ids
);
1220 g_iommus
[iommu
->seq_id
] = NULL
;
1222 /* if all iommus are freed, free g_iommus */
1223 for (i
= 0; i
< g_num_of_iommus
; i
++) {
1228 if (i
== g_num_of_iommus
)
1231 /* free context mapping */
1232 free_context_table(iommu
);
1235 static struct dmar_domain
*alloc_domain(void)
1237 struct dmar_domain
*domain
;
1239 domain
= alloc_domain_mem();
1244 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
1250 static int iommu_attach_domain(struct dmar_domain
*domain
,
1251 struct intel_iommu
*iommu
)
1254 unsigned long ndomains
;
1255 unsigned long flags
;
1257 ndomains
= cap_ndoms(iommu
->cap
);
1259 spin_lock_irqsave(&iommu
->lock
, flags
);
1261 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1262 if (num
>= ndomains
) {
1263 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1264 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1269 set_bit(num
, iommu
->domain_ids
);
1270 set_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1271 iommu
->domains
[num
] = domain
;
1272 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1277 static void iommu_detach_domain(struct dmar_domain
*domain
,
1278 struct intel_iommu
*iommu
)
1280 unsigned long flags
;
1284 spin_lock_irqsave(&iommu
->lock
, flags
);
1285 ndomains
= cap_ndoms(iommu
->cap
);
1286 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1287 if (iommu
->domains
[num
] == domain
) {
1294 clear_bit(num
, iommu
->domain_ids
);
1295 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1296 iommu
->domains
[num
] = NULL
;
1298 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1301 static struct iova_domain reserved_iova_list
;
1302 static struct lock_class_key reserved_rbtree_key
;
1304 static int dmar_init_reserved_ranges(void)
1306 struct pci_dev
*pdev
= NULL
;
1310 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1312 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1313 &reserved_rbtree_key
);
1315 /* IOAPIC ranges shouldn't be accessed by DMA */
1316 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1317 IOVA_PFN(IOAPIC_RANGE_END
));
1319 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1323 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1324 for_each_pci_dev(pdev
) {
1327 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1328 r
= &pdev
->resource
[i
];
1329 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1331 iova
= reserve_iova(&reserved_iova_list
,
1335 printk(KERN_ERR
"Reserve iova failed\n");
1343 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1345 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1348 static inline int guestwidth_to_adjustwidth(int gaw
)
1351 int r
= (gaw
- 12) % 9;
1362 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1364 struct intel_iommu
*iommu
;
1365 int adjust_width
, agaw
;
1366 unsigned long sagaw
;
1368 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1369 spin_lock_init(&domain
->iommu_lock
);
1371 domain_reserve_special_ranges(domain
);
1373 /* calculate AGAW */
1374 iommu
= domain_get_iommu(domain
);
1375 if (guest_width
> cap_mgaw(iommu
->cap
))
1376 guest_width
= cap_mgaw(iommu
->cap
);
1377 domain
->gaw
= guest_width
;
1378 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1379 agaw
= width_to_agaw(adjust_width
);
1380 sagaw
= cap_sagaw(iommu
->cap
);
1381 if (!test_bit(agaw
, &sagaw
)) {
1382 /* hardware doesn't support it, choose a bigger one */
1383 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1384 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1388 domain
->agaw
= agaw
;
1389 INIT_LIST_HEAD(&domain
->devices
);
1391 if (ecap_coherent(iommu
->ecap
))
1392 domain
->iommu_coherency
= 1;
1394 domain
->iommu_coherency
= 0;
1396 if (ecap_sc_support(iommu
->ecap
))
1397 domain
->iommu_snooping
= 1;
1399 domain
->iommu_snooping
= 0;
1401 domain
->iommu_count
= 1;
1402 domain
->nid
= iommu
->node
;
1404 /* always allocate the top pgd */
1405 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
1408 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1412 static void domain_exit(struct dmar_domain
*domain
)
1414 struct dmar_drhd_unit
*drhd
;
1415 struct intel_iommu
*iommu
;
1417 /* Domain 0 is reserved, so dont process it */
1421 /* Flush any lazy unmaps that may reference this domain */
1422 if (!intel_iommu_strict
)
1423 flush_unmaps_timeout(0);
1425 domain_remove_dev_info(domain
);
1427 put_iova_domain(&domain
->iovad
);
1430 dma_pte_clear_range(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1432 /* free page tables */
1433 dma_pte_free_pagetable(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1435 for_each_active_iommu(iommu
, drhd
)
1436 if (test_bit(iommu
->seq_id
, &domain
->iommu_bmp
))
1437 iommu_detach_domain(domain
, iommu
);
1439 free_domain_mem(domain
);
1442 static int domain_context_mapping_one(struct dmar_domain
*domain
, int segment
,
1443 u8 bus
, u8 devfn
, int translation
)
1445 struct context_entry
*context
;
1446 unsigned long flags
;
1447 struct intel_iommu
*iommu
;
1448 struct dma_pte
*pgd
;
1450 unsigned long ndomains
;
1453 struct device_domain_info
*info
= NULL
;
1455 pr_debug("Set context mapping for %02x:%02x.%d\n",
1456 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1458 BUG_ON(!domain
->pgd
);
1459 BUG_ON(translation
!= CONTEXT_TT_PASS_THROUGH
&&
1460 translation
!= CONTEXT_TT_MULTI_LEVEL
);
1462 iommu
= device_to_iommu(segment
, bus
, devfn
);
1466 context
= device_to_context_entry(iommu
, bus
, devfn
);
1469 spin_lock_irqsave(&iommu
->lock
, flags
);
1470 if (context_present(context
)) {
1471 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1478 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
1479 domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) {
1482 /* find an available domain id for this device in iommu */
1483 ndomains
= cap_ndoms(iommu
->cap
);
1484 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1485 if (iommu
->domains
[num
] == domain
) {
1493 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1494 if (num
>= ndomains
) {
1495 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1496 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1500 set_bit(num
, iommu
->domain_ids
);
1501 iommu
->domains
[num
] = domain
;
1505 /* Skip top levels of page tables for
1506 * iommu which has less agaw than default.
1507 * Unnecessary for PT mode.
1509 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1510 for (agaw
= domain
->agaw
; agaw
!= iommu
->agaw
; agaw
--) {
1511 pgd
= phys_to_virt(dma_pte_addr(pgd
));
1512 if (!dma_pte_present(pgd
)) {
1513 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1520 context_set_domain_id(context
, id
);
1522 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1523 info
= iommu_support_dev_iotlb(domain
, segment
, bus
, devfn
);
1524 translation
= info
? CONTEXT_TT_DEV_IOTLB
:
1525 CONTEXT_TT_MULTI_LEVEL
;
1528 * In pass through mode, AW must be programmed to indicate the largest
1529 * AGAW value supported by hardware. And ASR is ignored by hardware.
1531 if (unlikely(translation
== CONTEXT_TT_PASS_THROUGH
))
1532 context_set_address_width(context
, iommu
->msagaw
);
1534 context_set_address_root(context
, virt_to_phys(pgd
));
1535 context_set_address_width(context
, iommu
->agaw
);
1538 context_set_translation_type(context
, translation
);
1539 context_set_fault_enable(context
);
1540 context_set_present(context
);
1541 domain_flush_cache(domain
, context
, sizeof(*context
));
1544 * It's a non-present to present mapping. If hardware doesn't cache
1545 * non-present entry we only need to flush the write-buffer. If the
1546 * _does_ cache non-present entries, then it does so in the special
1547 * domain #0, which we have to flush:
1549 if (cap_caching_mode(iommu
->cap
)) {
1550 iommu
->flush
.flush_context(iommu
, 0,
1551 (((u16
)bus
) << 8) | devfn
,
1552 DMA_CCMD_MASK_NOBIT
,
1553 DMA_CCMD_DEVICE_INVL
);
1554 iommu
->flush
.flush_iotlb(iommu
, domain
->id
, 0, 0, DMA_TLB_DSI_FLUSH
);
1556 iommu_flush_write_buffer(iommu
);
1558 iommu_enable_dev_iotlb(info
);
1559 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1561 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1562 if (!test_and_set_bit(iommu
->seq_id
, &domain
->iommu_bmp
)) {
1563 domain
->iommu_count
++;
1564 if (domain
->iommu_count
== 1)
1565 domain
->nid
= iommu
->node
;
1566 domain_update_iommu_cap(domain
);
1568 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1573 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
,
1577 struct pci_dev
*tmp
, *parent
;
1579 ret
= domain_context_mapping_one(domain
, pci_domain_nr(pdev
->bus
),
1580 pdev
->bus
->number
, pdev
->devfn
,
1585 /* dependent device mapping */
1586 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1589 /* Secondary interface's bus number and devfn 0 */
1590 parent
= pdev
->bus
->self
;
1591 while (parent
!= tmp
) {
1592 ret
= domain_context_mapping_one(domain
,
1593 pci_domain_nr(parent
->bus
),
1594 parent
->bus
->number
,
1595 parent
->devfn
, translation
);
1598 parent
= parent
->bus
->self
;
1600 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
1601 return domain_context_mapping_one(domain
,
1602 pci_domain_nr(tmp
->subordinate
),
1603 tmp
->subordinate
->number
, 0,
1605 else /* this is a legacy PCI bridge */
1606 return domain_context_mapping_one(domain
,
1607 pci_domain_nr(tmp
->bus
),
1613 static int domain_context_mapped(struct pci_dev
*pdev
)
1616 struct pci_dev
*tmp
, *parent
;
1617 struct intel_iommu
*iommu
;
1619 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
1624 ret
= device_context_mapped(iommu
, pdev
->bus
->number
, pdev
->devfn
);
1627 /* dependent device mapping */
1628 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1631 /* Secondary interface's bus number and devfn 0 */
1632 parent
= pdev
->bus
->self
;
1633 while (parent
!= tmp
) {
1634 ret
= device_context_mapped(iommu
, parent
->bus
->number
,
1638 parent
= parent
->bus
->self
;
1640 if (pci_is_pcie(tmp
))
1641 return device_context_mapped(iommu
, tmp
->subordinate
->number
,
1644 return device_context_mapped(iommu
, tmp
->bus
->number
,
1648 /* Returns a number of VTD pages, but aligned to MM page size */
1649 static inline unsigned long aligned_nrpages(unsigned long host_addr
,
1652 host_addr
&= ~PAGE_MASK
;
1653 return PAGE_ALIGN(host_addr
+ size
) >> VTD_PAGE_SHIFT
;
1656 static int __domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1657 struct scatterlist
*sg
, unsigned long phys_pfn
,
1658 unsigned long nr_pages
, int prot
)
1660 struct dma_pte
*first_pte
= NULL
, *pte
= NULL
;
1661 phys_addr_t
uninitialized_var(pteval
);
1662 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
1663 unsigned long sg_res
;
1665 BUG_ON(addr_width
< BITS_PER_LONG
&& (iov_pfn
+ nr_pages
- 1) >> addr_width
);
1667 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1670 prot
&= DMA_PTE_READ
| DMA_PTE_WRITE
| DMA_PTE_SNP
;
1675 sg_res
= nr_pages
+ 1;
1676 pteval
= ((phys_addr_t
)phys_pfn
<< VTD_PAGE_SHIFT
) | prot
;
1679 while (nr_pages
--) {
1683 sg_res
= aligned_nrpages(sg
->offset
, sg
->length
);
1684 sg
->dma_address
= ((dma_addr_t
)iov_pfn
<< VTD_PAGE_SHIFT
) + sg
->offset
;
1685 sg
->dma_length
= sg
->length
;
1686 pteval
= page_to_phys(sg_page(sg
)) | prot
;
1689 first_pte
= pte
= pfn_to_dma_pte(domain
, iov_pfn
);
1693 /* We don't need lock here, nobody else
1694 * touches the iova range
1696 tmp
= cmpxchg64_local(&pte
->val
, 0ULL, pteval
);
1698 static int dumps
= 5;
1699 printk(KERN_CRIT
"ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1700 iov_pfn
, tmp
, (unsigned long long)pteval
);
1703 debug_dma_dump_mappings(NULL
);
1708 if (!nr_pages
|| first_pte_in_page(pte
)) {
1709 domain_flush_cache(domain
, first_pte
,
1710 (void *)pte
- (void *)first_pte
);
1714 pteval
+= VTD_PAGE_SIZE
;
1722 static inline int domain_sg_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1723 struct scatterlist
*sg
, unsigned long nr_pages
,
1726 return __domain_mapping(domain
, iov_pfn
, sg
, 0, nr_pages
, prot
);
1729 static inline int domain_pfn_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1730 unsigned long phys_pfn
, unsigned long nr_pages
,
1733 return __domain_mapping(domain
, iov_pfn
, NULL
, phys_pfn
, nr_pages
, prot
);
1736 static void iommu_detach_dev(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
1741 clear_context_table(iommu
, bus
, devfn
);
1742 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
1743 DMA_CCMD_GLOBAL_INVL
);
1744 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
1747 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1749 struct device_domain_info
*info
;
1750 unsigned long flags
;
1751 struct intel_iommu
*iommu
;
1753 spin_lock_irqsave(&device_domain_lock
, flags
);
1754 while (!list_empty(&domain
->devices
)) {
1755 info
= list_entry(domain
->devices
.next
,
1756 struct device_domain_info
, link
);
1757 list_del(&info
->link
);
1758 list_del(&info
->global
);
1760 info
->dev
->dev
.archdata
.iommu
= NULL
;
1761 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1763 iommu_disable_dev_iotlb(info
);
1764 iommu
= device_to_iommu(info
->segment
, info
->bus
, info
->devfn
);
1765 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
1766 free_devinfo_mem(info
);
1768 spin_lock_irqsave(&device_domain_lock
, flags
);
1770 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1775 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1777 static struct dmar_domain
*
1778 find_domain(struct pci_dev
*pdev
)
1780 struct device_domain_info
*info
;
1782 /* No lock here, assumes no domain exit in normal case */
1783 info
= pdev
->dev
.archdata
.iommu
;
1785 return info
->domain
;
1789 /* domain is initialized */
1790 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1792 struct dmar_domain
*domain
, *found
= NULL
;
1793 struct intel_iommu
*iommu
;
1794 struct dmar_drhd_unit
*drhd
;
1795 struct device_domain_info
*info
, *tmp
;
1796 struct pci_dev
*dev_tmp
;
1797 unsigned long flags
;
1798 int bus
= 0, devfn
= 0;
1802 domain
= find_domain(pdev
);
1806 segment
= pci_domain_nr(pdev
->bus
);
1808 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1810 if (pci_is_pcie(dev_tmp
)) {
1811 bus
= dev_tmp
->subordinate
->number
;
1814 bus
= dev_tmp
->bus
->number
;
1815 devfn
= dev_tmp
->devfn
;
1817 spin_lock_irqsave(&device_domain_lock
, flags
);
1818 list_for_each_entry(info
, &device_domain_list
, global
) {
1819 if (info
->segment
== segment
&&
1820 info
->bus
== bus
&& info
->devfn
== devfn
) {
1821 found
= info
->domain
;
1825 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1826 /* pcie-pci bridge already has a domain, uses it */
1833 domain
= alloc_domain();
1837 /* Allocate new domain for the device */
1838 drhd
= dmar_find_matched_drhd_unit(pdev
);
1840 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1844 iommu
= drhd
->iommu
;
1846 ret
= iommu_attach_domain(domain
, iommu
);
1848 free_domain_mem(domain
);
1852 if (domain_init(domain
, gaw
)) {
1853 domain_exit(domain
);
1857 /* register pcie-to-pci device */
1859 info
= alloc_devinfo_mem();
1861 domain_exit(domain
);
1864 info
->segment
= segment
;
1866 info
->devfn
= devfn
;
1868 info
->domain
= domain
;
1869 /* This domain is shared by devices under p2p bridge */
1870 domain
->flags
|= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES
;
1872 /* pcie-to-pci bridge already has a domain, uses it */
1874 spin_lock_irqsave(&device_domain_lock
, flags
);
1875 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1876 if (tmp
->segment
== segment
&&
1877 tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1878 found
= tmp
->domain
;
1883 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1884 free_devinfo_mem(info
);
1885 domain_exit(domain
);
1888 list_add(&info
->link
, &domain
->devices
);
1889 list_add(&info
->global
, &device_domain_list
);
1890 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1895 info
= alloc_devinfo_mem();
1898 info
->segment
= segment
;
1899 info
->bus
= pdev
->bus
->number
;
1900 info
->devfn
= pdev
->devfn
;
1902 info
->domain
= domain
;
1903 spin_lock_irqsave(&device_domain_lock
, flags
);
1904 /* somebody is fast */
1905 found
= find_domain(pdev
);
1906 if (found
!= NULL
) {
1907 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1908 if (found
!= domain
) {
1909 domain_exit(domain
);
1912 free_devinfo_mem(info
);
1915 list_add(&info
->link
, &domain
->devices
);
1916 list_add(&info
->global
, &device_domain_list
);
1917 pdev
->dev
.archdata
.iommu
= info
;
1918 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1921 /* recheck it here, maybe others set it */
1922 return find_domain(pdev
);
1925 static int iommu_identity_mapping
;
1926 #define IDENTMAP_ALL 1
1927 #define IDENTMAP_GFX 2
1928 #define IDENTMAP_AZALIA 4
1930 static int iommu_domain_identity_map(struct dmar_domain
*domain
,
1931 unsigned long long start
,
1932 unsigned long long end
)
1934 unsigned long first_vpfn
= start
>> VTD_PAGE_SHIFT
;
1935 unsigned long last_vpfn
= end
>> VTD_PAGE_SHIFT
;
1937 if (!reserve_iova(&domain
->iovad
, dma_to_mm_pfn(first_vpfn
),
1938 dma_to_mm_pfn(last_vpfn
))) {
1939 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1943 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1944 start
, end
, domain
->id
);
1946 * RMRR range might have overlap with physical memory range,
1949 dma_pte_clear_range(domain
, first_vpfn
, last_vpfn
);
1951 return domain_pfn_mapping(domain
, first_vpfn
, first_vpfn
,
1952 last_vpfn
- first_vpfn
+ 1,
1953 DMA_PTE_READ
|DMA_PTE_WRITE
);
1956 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
1957 unsigned long long start
,
1958 unsigned long long end
)
1960 struct dmar_domain
*domain
;
1963 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1967 /* For _hardware_ passthrough, don't bother. But for software
1968 passthrough, we do it anyway -- it may indicate a memory
1969 range which is reserved in E820, so which didn't get set
1970 up to start with in si_domain */
1971 if (domain
== si_domain
&& hw_pass_through
) {
1972 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1973 pci_name(pdev
), start
, end
);
1978 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1979 pci_name(pdev
), start
, end
);
1982 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
1983 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1984 dmi_get_system_info(DMI_BIOS_VENDOR
),
1985 dmi_get_system_info(DMI_BIOS_VERSION
),
1986 dmi_get_system_info(DMI_PRODUCT_VERSION
));
1991 if (end
>> agaw_to_width(domain
->agaw
)) {
1992 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
1993 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1994 agaw_to_width(domain
->agaw
),
1995 dmi_get_system_info(DMI_BIOS_VENDOR
),
1996 dmi_get_system_info(DMI_BIOS_VERSION
),
1997 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2002 ret
= iommu_domain_identity_map(domain
, start
, end
);
2006 /* context entry init */
2007 ret
= domain_context_mapping(domain
, pdev
, CONTEXT_TT_MULTI_LEVEL
);
2014 domain_exit(domain
);
2018 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
2019 struct pci_dev
*pdev
)
2021 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2023 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
2024 rmrr
->end_address
+ 1);
2027 #ifdef CONFIG_DMAR_FLOPPY_WA
2028 static inline void iommu_prepare_isa(void)
2030 struct pci_dev
*pdev
;
2033 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
2037 printk(KERN_INFO
"IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2038 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
2041 printk(KERN_ERR
"IOMMU: Failed to create 0-16MiB identity map; "
2042 "floppy might not work\n");
2046 static inline void iommu_prepare_isa(void)
2050 #endif /* !CONFIG_DMAR_FLPY_WA */
2052 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
);
2054 static int __init
si_domain_work_fn(unsigned long start_pfn
,
2055 unsigned long end_pfn
, void *datax
)
2059 *ret
= iommu_domain_identity_map(si_domain
,
2060 (uint64_t)start_pfn
<< PAGE_SHIFT
,
2061 (uint64_t)end_pfn
<< PAGE_SHIFT
);
2066 static int __init
si_domain_init(int hw
)
2068 struct dmar_drhd_unit
*drhd
;
2069 struct intel_iommu
*iommu
;
2072 si_domain
= alloc_domain();
2076 pr_debug("Identity mapping domain is domain %d\n", si_domain
->id
);
2078 for_each_active_iommu(iommu
, drhd
) {
2079 ret
= iommu_attach_domain(si_domain
, iommu
);
2081 domain_exit(si_domain
);
2086 if (md_domain_init(si_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2087 domain_exit(si_domain
);
2091 si_domain
->flags
= DOMAIN_FLAG_STATIC_IDENTITY
;
2096 for_each_online_node(nid
) {
2097 work_with_active_regions(nid
, si_domain_work_fn
, &ret
);
2105 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
2106 struct pci_dev
*pdev
);
2107 static int identity_mapping(struct pci_dev
*pdev
)
2109 struct device_domain_info
*info
;
2111 if (likely(!iommu_identity_mapping
))
2114 info
= pdev
->dev
.archdata
.iommu
;
2115 if (info
&& info
!= DUMMY_DEVICE_DOMAIN_INFO
)
2116 return (info
->domain
== si_domain
);
2121 static int domain_add_dev_info(struct dmar_domain
*domain
,
2122 struct pci_dev
*pdev
,
2125 struct device_domain_info
*info
;
2126 unsigned long flags
;
2129 info
= alloc_devinfo_mem();
2133 ret
= domain_context_mapping(domain
, pdev
, translation
);
2135 free_devinfo_mem(info
);
2139 info
->segment
= pci_domain_nr(pdev
->bus
);
2140 info
->bus
= pdev
->bus
->number
;
2141 info
->devfn
= pdev
->devfn
;
2143 info
->domain
= domain
;
2145 spin_lock_irqsave(&device_domain_lock
, flags
);
2146 list_add(&info
->link
, &domain
->devices
);
2147 list_add(&info
->global
, &device_domain_list
);
2148 pdev
->dev
.archdata
.iommu
= info
;
2149 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2154 static int iommu_should_identity_map(struct pci_dev
*pdev
, int startup
)
2156 if ((iommu_identity_mapping
& IDENTMAP_AZALIA
) && IS_AZALIA(pdev
))
2159 if ((iommu_identity_mapping
& IDENTMAP_GFX
) && IS_GFX_DEVICE(pdev
))
2162 if (!(iommu_identity_mapping
& IDENTMAP_ALL
))
2166 * We want to start off with all devices in the 1:1 domain, and
2167 * take them out later if we find they can't access all of memory.
2169 * However, we can't do this for PCI devices behind bridges,
2170 * because all PCI devices behind the same bridge will end up
2171 * with the same source-id on their transactions.
2173 * Practically speaking, we can't change things around for these
2174 * devices at run-time, because we can't be sure there'll be no
2175 * DMA transactions in flight for any of their siblings.
2177 * So PCI devices (unless they're on the root bus) as well as
2178 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2179 * the 1:1 domain, just in _case_ one of their siblings turns out
2180 * not to be able to map all of memory.
2182 if (!pci_is_pcie(pdev
)) {
2183 if (!pci_is_root_bus(pdev
->bus
))
2185 if (pdev
->class >> 8 == PCI_CLASS_BRIDGE_PCI
)
2187 } else if (pdev
->pcie_type
== PCI_EXP_TYPE_PCI_BRIDGE
)
2191 * At boot time, we don't yet know if devices will be 64-bit capable.
2192 * Assume that they will -- if they turn out not to be, then we can
2193 * take them out of the 1:1 domain later.
2197 * If the device's dma_mask is less than the system's memory
2198 * size then this is not a candidate for identity mapping.
2200 u64 dma_mask
= pdev
->dma_mask
;
2202 if (pdev
->dev
.coherent_dma_mask
&&
2203 pdev
->dev
.coherent_dma_mask
< dma_mask
)
2204 dma_mask
= pdev
->dev
.coherent_dma_mask
;
2206 return dma_mask
>= dma_get_required_mask(&pdev
->dev
);
2212 static int __init
iommu_prepare_static_identity_mapping(int hw
)
2214 struct pci_dev
*pdev
= NULL
;
2217 ret
= si_domain_init(hw
);
2221 for_each_pci_dev(pdev
) {
2222 /* Skip Host/PCI Bridge devices */
2223 if (IS_BRIDGE_HOST_DEVICE(pdev
))
2225 if (iommu_should_identity_map(pdev
, 1)) {
2226 printk(KERN_INFO
"IOMMU: %s identity mapping for device %s\n",
2227 hw
? "hardware" : "software", pci_name(pdev
));
2229 ret
= domain_add_dev_info(si_domain
, pdev
,
2230 hw
? CONTEXT_TT_PASS_THROUGH
:
2231 CONTEXT_TT_MULTI_LEVEL
);
2240 static int __init
init_dmars(int force_on
)
2242 struct dmar_drhd_unit
*drhd
;
2243 struct dmar_rmrr_unit
*rmrr
;
2244 struct pci_dev
*pdev
;
2245 struct intel_iommu
*iommu
;
2251 * initialize and program root entry to not present
2254 for_each_drhd_unit(drhd
) {
2257 * lock not needed as this is only incremented in the single
2258 * threaded kernel __init code path all other access are read
2263 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
2266 printk(KERN_ERR
"Allocating global iommu array failed\n");
2271 deferred_flush
= kzalloc(g_num_of_iommus
*
2272 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
2273 if (!deferred_flush
) {
2278 for_each_drhd_unit(drhd
) {
2282 iommu
= drhd
->iommu
;
2283 g_iommus
[iommu
->seq_id
] = iommu
;
2285 ret
= iommu_init_domains(iommu
);
2291 * we could share the same root & context tables
2292 * among all IOMMU's. Need to Split it later.
2294 ret
= iommu_alloc_root_entry(iommu
);
2296 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
2299 if (!ecap_pass_through(iommu
->ecap
))
2300 hw_pass_through
= 0;
2304 * Start from the sane iommu hardware state.
2306 for_each_drhd_unit(drhd
) {
2310 iommu
= drhd
->iommu
;
2313 * If the queued invalidation is already initialized by us
2314 * (for example, while enabling interrupt-remapping) then
2315 * we got the things already rolling from a sane state.
2321 * Clear any previous faults.
2323 dmar_fault(-1, iommu
);
2325 * Disable queued invalidation if supported and already enabled
2326 * before OS handover.
2328 dmar_disable_qi(iommu
);
2331 for_each_drhd_unit(drhd
) {
2335 iommu
= drhd
->iommu
;
2337 if (dmar_enable_qi(iommu
)) {
2339 * Queued Invalidate not enabled, use Register Based
2342 iommu
->flush
.flush_context
= __iommu_flush_context
;
2343 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
2344 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Register based "
2347 (unsigned long long)drhd
->reg_base_addr
);
2349 iommu
->flush
.flush_context
= qi_flush_context
;
2350 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2351 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Queued "
2354 (unsigned long long)drhd
->reg_base_addr
);
2358 if (iommu_pass_through
)
2359 iommu_identity_mapping
|= IDENTMAP_ALL
;
2361 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2362 iommu_identity_mapping
|= IDENTMAP_GFX
;
2365 check_tylersburg_isoch();
2368 * If pass through is not set or not enabled, setup context entries for
2369 * identity mappings for rmrr, gfx, and isa and may fall back to static
2370 * identity mapping if iommu_identity_mapping is set.
2372 if (iommu_identity_mapping
) {
2373 ret
= iommu_prepare_static_identity_mapping(hw_pass_through
);
2375 printk(KERN_CRIT
"Failed to setup IOMMU pass-through\n");
2381 * for each dev attached to rmrr
2383 * locate drhd for dev, alloc domain for dev
2384 * allocate free domain
2385 * allocate page table entries for rmrr
2386 * if context not allocated for bus
2387 * allocate and init context
2388 * set present in root table for this bus
2389 * init context with domain, translation etc
2393 printk(KERN_INFO
"IOMMU: Setting RMRR:\n");
2394 for_each_rmrr_units(rmrr
) {
2395 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
2396 pdev
= rmrr
->devices
[i
];
2398 * some BIOS lists non-exist devices in DMAR
2403 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
2406 "IOMMU: mapping reserved region failed\n");
2410 iommu_prepare_isa();
2415 * global invalidate context cache
2416 * global invalidate iotlb
2417 * enable translation
2419 for_each_drhd_unit(drhd
) {
2420 if (drhd
->ignored
) {
2422 * we always have to disable PMRs or DMA may fail on
2426 iommu_disable_protect_mem_regions(drhd
->iommu
);
2429 iommu
= drhd
->iommu
;
2431 iommu_flush_write_buffer(iommu
);
2433 ret
= dmar_set_interrupt(iommu
);
2437 iommu_set_root_entry(iommu
);
2439 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
2440 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
2442 ret
= iommu_enable_translation(iommu
);
2446 iommu_disable_protect_mem_regions(iommu
);
2451 for_each_drhd_unit(drhd
) {
2454 iommu
= drhd
->iommu
;
2461 /* This takes a number of _MM_ pages, not VTD pages */
2462 static struct iova
*intel_alloc_iova(struct device
*dev
,
2463 struct dmar_domain
*domain
,
2464 unsigned long nrpages
, uint64_t dma_mask
)
2466 struct pci_dev
*pdev
= to_pci_dev(dev
);
2467 struct iova
*iova
= NULL
;
2469 /* Restrict dma_mask to the width that the iommu can handle */
2470 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
), dma_mask
);
2472 if (!dmar_forcedac
&& dma_mask
> DMA_BIT_MASK(32)) {
2474 * First try to allocate an io virtual address in
2475 * DMA_BIT_MASK(32) and if that fails then try allocating
2478 iova
= alloc_iova(&domain
->iovad
, nrpages
,
2479 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2483 iova
= alloc_iova(&domain
->iovad
, nrpages
, IOVA_PFN(dma_mask
), 1);
2484 if (unlikely(!iova
)) {
2485 printk(KERN_ERR
"Allocating %ld-page iova for %s failed",
2486 nrpages
, pci_name(pdev
));
2493 static struct dmar_domain
*__get_valid_domain_for_dev(struct pci_dev
*pdev
)
2495 struct dmar_domain
*domain
;
2498 domain
= get_domain_for_dev(pdev
,
2499 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2502 "Allocating domain for %s failed", pci_name(pdev
));
2506 /* make sure context mapping is ok */
2507 if (unlikely(!domain_context_mapped(pdev
))) {
2508 ret
= domain_context_mapping(domain
, pdev
,
2509 CONTEXT_TT_MULTI_LEVEL
);
2512 "Domain context map for %s failed",
2521 static inline struct dmar_domain
*get_valid_domain_for_dev(struct pci_dev
*dev
)
2523 struct device_domain_info
*info
;
2525 /* No lock here, assumes no domain exit in normal case */
2526 info
= dev
->dev
.archdata
.iommu
;
2528 return info
->domain
;
2530 return __get_valid_domain_for_dev(dev
);
2533 static int iommu_dummy(struct pci_dev
*pdev
)
2535 return pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
;
2538 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2539 static int iommu_no_mapping(struct device
*dev
)
2541 struct pci_dev
*pdev
;
2544 if (unlikely(dev
->bus
!= &pci_bus_type
))
2547 pdev
= to_pci_dev(dev
);
2548 if (iommu_dummy(pdev
))
2551 if (!iommu_identity_mapping
)
2554 found
= identity_mapping(pdev
);
2556 if (iommu_should_identity_map(pdev
, 0))
2560 * 32 bit DMA is removed from si_domain and fall back
2561 * to non-identity mapping.
2563 domain_remove_one_dev_info(si_domain
, pdev
);
2564 printk(KERN_INFO
"32bit %s uses non-identity mapping\n",
2570 * In case of a detached 64 bit DMA device from vm, the device
2571 * is put into si_domain for identity mapping.
2573 if (iommu_should_identity_map(pdev
, 0)) {
2575 ret
= domain_add_dev_info(si_domain
, pdev
,
2577 CONTEXT_TT_PASS_THROUGH
:
2578 CONTEXT_TT_MULTI_LEVEL
);
2580 printk(KERN_INFO
"64bit %s uses identity mapping\n",
2590 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2591 size_t size
, int dir
, u64 dma_mask
)
2593 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2594 struct dmar_domain
*domain
;
2595 phys_addr_t start_paddr
;
2599 struct intel_iommu
*iommu
;
2600 unsigned long paddr_pfn
= paddr
>> PAGE_SHIFT
;
2602 BUG_ON(dir
== DMA_NONE
);
2604 if (iommu_no_mapping(hwdev
))
2607 domain
= get_valid_domain_for_dev(pdev
);
2611 iommu
= domain_get_iommu(domain
);
2612 size
= aligned_nrpages(paddr
, size
);
2614 iova
= intel_alloc_iova(hwdev
, domain
, dma_to_mm_pfn(size
), dma_mask
);
2619 * Check if DMAR supports zero-length reads on write only
2622 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2623 !cap_zlr(iommu
->cap
))
2624 prot
|= DMA_PTE_READ
;
2625 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2626 prot
|= DMA_PTE_WRITE
;
2628 * paddr - (paddr + size) might be partial page, we should map the whole
2629 * page. Note: if two part of one page are separately mapped, we
2630 * might have two guest_addr mapping to the same host paddr, but this
2631 * is not a big problem
2633 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova
->pfn_lo
),
2634 mm_to_dma_pfn(paddr_pfn
), size
, prot
);
2638 /* it's a non-present to present mapping. Only flush if caching mode */
2639 if (cap_caching_mode(iommu
->cap
))
2640 iommu_flush_iotlb_psi(iommu
, domain
->id
, mm_to_dma_pfn(iova
->pfn_lo
), size
, 1);
2642 iommu_flush_write_buffer(iommu
);
2644 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
2645 start_paddr
+= paddr
& ~PAGE_MASK
;
2650 __free_iova(&domain
->iovad
, iova
);
2651 printk(KERN_ERR
"Device %s request: %zx@%llx dir %d --- failed\n",
2652 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
2656 static dma_addr_t
intel_map_page(struct device
*dev
, struct page
*page
,
2657 unsigned long offset
, size_t size
,
2658 enum dma_data_direction dir
,
2659 struct dma_attrs
*attrs
)
2661 return __intel_map_single(dev
, page_to_phys(page
) + offset
, size
,
2662 dir
, to_pci_dev(dev
)->dma_mask
);
2665 static void flush_unmaps(void)
2671 /* just flush them all */
2672 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2673 struct intel_iommu
*iommu
= g_iommus
[i
];
2677 if (!deferred_flush
[i
].next
)
2680 /* In caching mode, global flushes turn emulation expensive */
2681 if (!cap_caching_mode(iommu
->cap
))
2682 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2683 DMA_TLB_GLOBAL_FLUSH
);
2684 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2686 struct iova
*iova
= deferred_flush
[i
].iova
[j
];
2687 struct dmar_domain
*domain
= deferred_flush
[i
].domain
[j
];
2689 /* On real hardware multiple invalidations are expensive */
2690 if (cap_caching_mode(iommu
->cap
))
2691 iommu_flush_iotlb_psi(iommu
, domain
->id
,
2692 iova
->pfn_lo
, iova
->pfn_hi
- iova
->pfn_lo
+ 1, 0);
2694 mask
= ilog2(mm_to_dma_pfn(iova
->pfn_hi
- iova
->pfn_lo
+ 1));
2695 iommu_flush_dev_iotlb(deferred_flush
[i
].domain
[j
],
2696 (uint64_t)iova
->pfn_lo
<< PAGE_SHIFT
, mask
);
2698 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
, iova
);
2700 deferred_flush
[i
].next
= 0;
2706 static void flush_unmaps_timeout(unsigned long data
)
2708 unsigned long flags
;
2710 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2712 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2715 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2717 unsigned long flags
;
2719 struct intel_iommu
*iommu
;
2721 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2722 if (list_size
== HIGH_WATER_MARK
)
2725 iommu
= domain_get_iommu(dom
);
2726 iommu_id
= iommu
->seq_id
;
2728 next
= deferred_flush
[iommu_id
].next
;
2729 deferred_flush
[iommu_id
].domain
[next
] = dom
;
2730 deferred_flush
[iommu_id
].iova
[next
] = iova
;
2731 deferred_flush
[iommu_id
].next
++;
2734 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
2738 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2741 static void intel_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
2742 size_t size
, enum dma_data_direction dir
,
2743 struct dma_attrs
*attrs
)
2745 struct pci_dev
*pdev
= to_pci_dev(dev
);
2746 struct dmar_domain
*domain
;
2747 unsigned long start_pfn
, last_pfn
;
2749 struct intel_iommu
*iommu
;
2751 if (iommu_no_mapping(dev
))
2754 domain
= find_domain(pdev
);
2757 iommu
= domain_get_iommu(domain
);
2759 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
2760 if (WARN_ONCE(!iova
, "Driver unmaps unmatched page at PFN %llx\n",
2761 (unsigned long long)dev_addr
))
2764 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
2765 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
2767 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2768 pci_name(pdev
), start_pfn
, last_pfn
);
2770 /* clear the whole page */
2771 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
2773 /* free page tables */
2774 dma_pte_free_pagetable(domain
, start_pfn
, last_pfn
);
2776 if (intel_iommu_strict
) {
2777 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
2778 last_pfn
- start_pfn
+ 1, 0);
2780 __free_iova(&domain
->iovad
, iova
);
2782 add_unmap(domain
, iova
);
2784 * queue up the release of the unmap to save the 1/6th of the
2785 * cpu used up by the iotlb flush operation...
2790 static void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
2791 dma_addr_t
*dma_handle
, gfp_t flags
)
2796 size
= PAGE_ALIGN(size
);
2797 order
= get_order(size
);
2799 if (!iommu_no_mapping(hwdev
))
2800 flags
&= ~(GFP_DMA
| GFP_DMA32
);
2801 else if (hwdev
->coherent_dma_mask
< dma_get_required_mask(hwdev
)) {
2802 if (hwdev
->coherent_dma_mask
< DMA_BIT_MASK(32))
2808 vaddr
= (void *)__get_free_pages(flags
, order
);
2811 memset(vaddr
, 0, size
);
2813 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
2815 hwdev
->coherent_dma_mask
);
2818 free_pages((unsigned long)vaddr
, order
);
2822 static void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
2823 dma_addr_t dma_handle
)
2827 size
= PAGE_ALIGN(size
);
2828 order
= get_order(size
);
2830 intel_unmap_page(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
, NULL
);
2831 free_pages((unsigned long)vaddr
, order
);
2834 static void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2835 int nelems
, enum dma_data_direction dir
,
2836 struct dma_attrs
*attrs
)
2838 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2839 struct dmar_domain
*domain
;
2840 unsigned long start_pfn
, last_pfn
;
2842 struct intel_iommu
*iommu
;
2844 if (iommu_no_mapping(hwdev
))
2847 domain
= find_domain(pdev
);
2850 iommu
= domain_get_iommu(domain
);
2852 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2853 if (WARN_ONCE(!iova
, "Driver unmaps unmatched sglist at PFN %llx\n",
2854 (unsigned long long)sglist
[0].dma_address
))
2857 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
2858 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
2860 /* clear the whole page */
2861 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
2863 /* free page tables */
2864 dma_pte_free_pagetable(domain
, start_pfn
, last_pfn
);
2866 if (intel_iommu_strict
) {
2867 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
2868 last_pfn
- start_pfn
+ 1, 0);
2870 __free_iova(&domain
->iovad
, iova
);
2872 add_unmap(domain
, iova
);
2874 * queue up the release of the unmap to save the 1/6th of the
2875 * cpu used up by the iotlb flush operation...
2880 static int intel_nontranslate_map_sg(struct device
*hddev
,
2881 struct scatterlist
*sglist
, int nelems
, int dir
)
2884 struct scatterlist
*sg
;
2886 for_each_sg(sglist
, sg
, nelems
, i
) {
2887 BUG_ON(!sg_page(sg
));
2888 sg
->dma_address
= page_to_phys(sg_page(sg
)) + sg
->offset
;
2889 sg
->dma_length
= sg
->length
;
2894 static int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
2895 enum dma_data_direction dir
, struct dma_attrs
*attrs
)
2898 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2899 struct dmar_domain
*domain
;
2902 struct iova
*iova
= NULL
;
2904 struct scatterlist
*sg
;
2905 unsigned long start_vpfn
;
2906 struct intel_iommu
*iommu
;
2908 BUG_ON(dir
== DMA_NONE
);
2909 if (iommu_no_mapping(hwdev
))
2910 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2912 domain
= get_valid_domain_for_dev(pdev
);
2916 iommu
= domain_get_iommu(domain
);
2918 for_each_sg(sglist
, sg
, nelems
, i
)
2919 size
+= aligned_nrpages(sg
->offset
, sg
->length
);
2921 iova
= intel_alloc_iova(hwdev
, domain
, dma_to_mm_pfn(size
),
2924 sglist
->dma_length
= 0;
2929 * Check if DMAR supports zero-length reads on write only
2932 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2933 !cap_zlr(iommu
->cap
))
2934 prot
|= DMA_PTE_READ
;
2935 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2936 prot
|= DMA_PTE_WRITE
;
2938 start_vpfn
= mm_to_dma_pfn(iova
->pfn_lo
);
2940 ret
= domain_sg_mapping(domain
, start_vpfn
, sglist
, size
, prot
);
2941 if (unlikely(ret
)) {
2942 /* clear the page */
2943 dma_pte_clear_range(domain
, start_vpfn
,
2944 start_vpfn
+ size
- 1);
2945 /* free page tables */
2946 dma_pte_free_pagetable(domain
, start_vpfn
,
2947 start_vpfn
+ size
- 1);
2949 __free_iova(&domain
->iovad
, iova
);
2953 /* it's a non-present to present mapping. Only flush if caching mode */
2954 if (cap_caching_mode(iommu
->cap
))
2955 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_vpfn
, size
, 1);
2957 iommu_flush_write_buffer(iommu
);
2962 static int intel_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
2967 struct dma_map_ops intel_dma_ops
= {
2968 .alloc_coherent
= intel_alloc_coherent
,
2969 .free_coherent
= intel_free_coherent
,
2970 .map_sg
= intel_map_sg
,
2971 .unmap_sg
= intel_unmap_sg
,
2972 .map_page
= intel_map_page
,
2973 .unmap_page
= intel_unmap_page
,
2974 .mapping_error
= intel_mapping_error
,
2977 static inline int iommu_domain_cache_init(void)
2981 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2982 sizeof(struct dmar_domain
),
2987 if (!iommu_domain_cache
) {
2988 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2995 static inline int iommu_devinfo_cache_init(void)
2999 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
3000 sizeof(struct device_domain_info
),
3004 if (!iommu_devinfo_cache
) {
3005 printk(KERN_ERR
"Couldn't create devinfo cache\n");
3012 static inline int iommu_iova_cache_init(void)
3016 iommu_iova_cache
= kmem_cache_create("iommu_iova",
3017 sizeof(struct iova
),
3021 if (!iommu_iova_cache
) {
3022 printk(KERN_ERR
"Couldn't create iova cache\n");
3029 static int __init
iommu_init_mempool(void)
3032 ret
= iommu_iova_cache_init();
3036 ret
= iommu_domain_cache_init();
3040 ret
= iommu_devinfo_cache_init();
3044 kmem_cache_destroy(iommu_domain_cache
);
3046 kmem_cache_destroy(iommu_iova_cache
);
3051 static void __init
iommu_exit_mempool(void)
3053 kmem_cache_destroy(iommu_devinfo_cache
);
3054 kmem_cache_destroy(iommu_domain_cache
);
3055 kmem_cache_destroy(iommu_iova_cache
);
3059 static void quirk_ioat_snb_local_iommu(struct pci_dev
*pdev
)
3061 struct dmar_drhd_unit
*drhd
;
3065 /* We know that this device on this chipset has its own IOMMU.
3066 * If we find it under a different IOMMU, then the BIOS is lying
3067 * to us. Hope that the IOMMU for this device is actually
3068 * disabled, and it needs no translation...
3070 rc
= pci_bus_read_config_dword(pdev
->bus
, PCI_DEVFN(0, 0), 0xb0, &vtbar
);
3072 /* "can't" happen */
3073 dev_info(&pdev
->dev
, "failed to run vt-d quirk\n");
3076 vtbar
&= 0xffff0000;
3078 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3079 drhd
= dmar_find_matched_drhd_unit(pdev
);
3080 if (WARN_TAINT_ONCE(!drhd
|| drhd
->reg_base_addr
- vtbar
!= 0xa000,
3081 TAINT_FIRMWARE_WORKAROUND
,
3082 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3083 pdev
->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3085 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_IOAT_SNB
, quirk_ioat_snb_local_iommu
);
3087 static void __init
init_no_remapping_devices(void)
3089 struct dmar_drhd_unit
*drhd
;
3091 for_each_drhd_unit(drhd
) {
3092 if (!drhd
->include_all
) {
3094 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
3095 if (drhd
->devices
[i
] != NULL
)
3097 /* ignore DMAR unit if no pci devices exist */
3098 if (i
== drhd
->devices_cnt
)
3106 for_each_drhd_unit(drhd
) {
3108 if (drhd
->ignored
|| drhd
->include_all
)
3111 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
3112 if (drhd
->devices
[i
] &&
3113 !IS_GFX_DEVICE(drhd
->devices
[i
]))
3116 if (i
< drhd
->devices_cnt
)
3119 /* bypass IOMMU if it is just for gfx devices */
3121 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
3122 if (!drhd
->devices
[i
])
3124 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3129 #ifdef CONFIG_SUSPEND
3130 static int init_iommu_hw(void)
3132 struct dmar_drhd_unit
*drhd
;
3133 struct intel_iommu
*iommu
= NULL
;
3135 for_each_active_iommu(iommu
, drhd
)
3137 dmar_reenable_qi(iommu
);
3139 for_each_active_iommu(iommu
, drhd
) {
3140 iommu_flush_write_buffer(iommu
);
3142 iommu_set_root_entry(iommu
);
3144 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3145 DMA_CCMD_GLOBAL_INVL
);
3146 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3147 DMA_TLB_GLOBAL_FLUSH
);
3148 iommu_enable_translation(iommu
);
3149 iommu_disable_protect_mem_regions(iommu
);
3155 static void iommu_flush_all(void)
3157 struct dmar_drhd_unit
*drhd
;
3158 struct intel_iommu
*iommu
;
3160 for_each_active_iommu(iommu
, drhd
) {
3161 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3162 DMA_CCMD_GLOBAL_INVL
);
3163 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3164 DMA_TLB_GLOBAL_FLUSH
);
3168 static int iommu_suspend(void)
3170 struct dmar_drhd_unit
*drhd
;
3171 struct intel_iommu
*iommu
= NULL
;
3174 for_each_active_iommu(iommu
, drhd
) {
3175 iommu
->iommu_state
= kzalloc(sizeof(u32
) * MAX_SR_DMAR_REGS
,
3177 if (!iommu
->iommu_state
)
3183 for_each_active_iommu(iommu
, drhd
) {
3184 iommu_disable_translation(iommu
);
3186 spin_lock_irqsave(&iommu
->register_lock
, flag
);
3188 iommu
->iommu_state
[SR_DMAR_FECTL_REG
] =
3189 readl(iommu
->reg
+ DMAR_FECTL_REG
);
3190 iommu
->iommu_state
[SR_DMAR_FEDATA_REG
] =
3191 readl(iommu
->reg
+ DMAR_FEDATA_REG
);
3192 iommu
->iommu_state
[SR_DMAR_FEADDR_REG
] =
3193 readl(iommu
->reg
+ DMAR_FEADDR_REG
);
3194 iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
] =
3195 readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
3197 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3202 for_each_active_iommu(iommu
, drhd
)
3203 kfree(iommu
->iommu_state
);
3208 static void iommu_resume(void)
3210 struct dmar_drhd_unit
*drhd
;
3211 struct intel_iommu
*iommu
= NULL
;
3214 if (init_iommu_hw()) {
3215 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3219 for_each_active_iommu(iommu
, drhd
) {
3221 spin_lock_irqsave(&iommu
->register_lock
, flag
);
3223 writel(iommu
->iommu_state
[SR_DMAR_FECTL_REG
],
3224 iommu
->reg
+ DMAR_FECTL_REG
);
3225 writel(iommu
->iommu_state
[SR_DMAR_FEDATA_REG
],
3226 iommu
->reg
+ DMAR_FEDATA_REG
);
3227 writel(iommu
->iommu_state
[SR_DMAR_FEADDR_REG
],
3228 iommu
->reg
+ DMAR_FEADDR_REG
);
3229 writel(iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
],
3230 iommu
->reg
+ DMAR_FEUADDR_REG
);
3232 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3235 for_each_active_iommu(iommu
, drhd
)
3236 kfree(iommu
->iommu_state
);
3239 static struct syscore_ops iommu_syscore_ops
= {
3240 .resume
= iommu_resume
,
3241 .suspend
= iommu_suspend
,
3244 static void __init
init_iommu_pm_ops(void)
3246 register_syscore_ops(&iommu_syscore_ops
);
3250 static inline int init_iommu_pm_ops(void) { }
3251 #endif /* CONFIG_PM */
3254 * Here we only respond to action of unbound device from driver.
3256 * Added device is not attached to its DMAR domain here yet. That will happen
3257 * when mapping the device to iova.
3259 static int device_notifier(struct notifier_block
*nb
,
3260 unsigned long action
, void *data
)
3262 struct device
*dev
= data
;
3263 struct pci_dev
*pdev
= to_pci_dev(dev
);
3264 struct dmar_domain
*domain
;
3266 if (iommu_no_mapping(dev
))
3269 domain
= find_domain(pdev
);
3273 if (action
== BUS_NOTIFY_UNBOUND_DRIVER
&& !iommu_pass_through
) {
3274 domain_remove_one_dev_info(domain
, pdev
);
3276 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
3277 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) &&
3278 list_empty(&domain
->devices
))
3279 domain_exit(domain
);
3285 static struct notifier_block device_nb
= {
3286 .notifier_call
= device_notifier
,
3289 int __init
intel_iommu_init(void)
3294 /* VT-d is required for a TXT/tboot launch, so enforce that */
3295 force_on
= tboot_force_iommu();
3297 if (dmar_table_init()) {
3299 panic("tboot: Failed to initialize DMAR table\n");
3303 if (dmar_dev_scope_init()) {
3305 panic("tboot: Failed to initialize DMAR device scope\n");
3310 * Check the need for DMA-remapping initialization now.
3311 * Above initialization will also be used by Interrupt-remapping.
3313 if (no_iommu
|| dmar_disabled
)
3316 if (iommu_init_mempool()) {
3318 panic("tboot: Failed to initialize iommu memory\n");
3322 if (dmar_init_reserved_ranges()) {
3324 panic("tboot: Failed to reserve iommu ranges\n");
3328 init_no_remapping_devices();
3330 ret
= init_dmars(force_on
);
3333 panic("tboot: Failed to initialize DMARs\n");
3334 printk(KERN_ERR
"IOMMU: dmar init failed\n");
3335 put_iova_domain(&reserved_iova_list
);
3336 iommu_exit_mempool();
3340 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3342 init_timer(&unmap_timer
);
3343 #ifdef CONFIG_SWIOTLB
3346 dma_ops
= &intel_dma_ops
;
3348 init_iommu_pm_ops();
3350 register_iommu(&intel_iommu_ops
);
3352 bus_register_notifier(&pci_bus_type
, &device_nb
);
3357 static void iommu_detach_dependent_devices(struct intel_iommu
*iommu
,
3358 struct pci_dev
*pdev
)
3360 struct pci_dev
*tmp
, *parent
;
3362 if (!iommu
|| !pdev
)
3365 /* dependent device detach */
3366 tmp
= pci_find_upstream_pcie_bridge(pdev
);
3367 /* Secondary interface's bus number and devfn 0 */
3369 parent
= pdev
->bus
->self
;
3370 while (parent
!= tmp
) {
3371 iommu_detach_dev(iommu
, parent
->bus
->number
,
3373 parent
= parent
->bus
->self
;
3375 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
3376 iommu_detach_dev(iommu
,
3377 tmp
->subordinate
->number
, 0);
3378 else /* this is a legacy PCI bridge */
3379 iommu_detach_dev(iommu
, tmp
->bus
->number
,
3384 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
3385 struct pci_dev
*pdev
)
3387 struct device_domain_info
*info
;
3388 struct intel_iommu
*iommu
;
3389 unsigned long flags
;
3391 struct list_head
*entry
, *tmp
;
3393 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
3398 spin_lock_irqsave(&device_domain_lock
, flags
);
3399 list_for_each_safe(entry
, tmp
, &domain
->devices
) {
3400 info
= list_entry(entry
, struct device_domain_info
, link
);
3401 /* No need to compare PCI domain; it has to be the same */
3402 if (info
->bus
== pdev
->bus
->number
&&
3403 info
->devfn
== pdev
->devfn
) {
3404 list_del(&info
->link
);
3405 list_del(&info
->global
);
3407 info
->dev
->dev
.archdata
.iommu
= NULL
;
3408 spin_unlock_irqrestore(&device_domain_lock
, flags
);
3410 iommu_disable_dev_iotlb(info
);
3411 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
3412 iommu_detach_dependent_devices(iommu
, pdev
);
3413 free_devinfo_mem(info
);
3415 spin_lock_irqsave(&device_domain_lock
, flags
);
3423 /* if there is no other devices under the same iommu
3424 * owned by this domain, clear this iommu in iommu_bmp
3425 * update iommu count and coherency
3427 if (iommu
== device_to_iommu(info
->segment
, info
->bus
,
3433 unsigned long tmp_flags
;
3434 spin_lock_irqsave(&domain
->iommu_lock
, tmp_flags
);
3435 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
3436 domain
->iommu_count
--;
3437 domain_update_iommu_cap(domain
);
3438 spin_unlock_irqrestore(&domain
->iommu_lock
, tmp_flags
);
3440 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
3441 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)) {
3442 spin_lock_irqsave(&iommu
->lock
, tmp_flags
);
3443 clear_bit(domain
->id
, iommu
->domain_ids
);
3444 iommu
->domains
[domain
->id
] = NULL
;
3445 spin_unlock_irqrestore(&iommu
->lock
, tmp_flags
);
3449 spin_unlock_irqrestore(&device_domain_lock
, flags
);
3452 static void vm_domain_remove_all_dev_info(struct dmar_domain
*domain
)
3454 struct device_domain_info
*info
;
3455 struct intel_iommu
*iommu
;
3456 unsigned long flags1
, flags2
;
3458 spin_lock_irqsave(&device_domain_lock
, flags1
);
3459 while (!list_empty(&domain
->devices
)) {
3460 info
= list_entry(domain
->devices
.next
,
3461 struct device_domain_info
, link
);
3462 list_del(&info
->link
);
3463 list_del(&info
->global
);
3465 info
->dev
->dev
.archdata
.iommu
= NULL
;
3467 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
3469 iommu_disable_dev_iotlb(info
);
3470 iommu
= device_to_iommu(info
->segment
, info
->bus
, info
->devfn
);
3471 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
3472 iommu_detach_dependent_devices(iommu
, info
->dev
);
3474 /* clear this iommu in iommu_bmp, update iommu count
3477 spin_lock_irqsave(&domain
->iommu_lock
, flags2
);
3478 if (test_and_clear_bit(iommu
->seq_id
,
3479 &domain
->iommu_bmp
)) {
3480 domain
->iommu_count
--;
3481 domain_update_iommu_cap(domain
);
3483 spin_unlock_irqrestore(&domain
->iommu_lock
, flags2
);
3485 free_devinfo_mem(info
);
3486 spin_lock_irqsave(&device_domain_lock
, flags1
);
3488 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
3491 /* domain id for virtual machine, it won't be set in context */
3492 static unsigned long vm_domid
;
3494 static struct dmar_domain
*iommu_alloc_vm_domain(void)
3496 struct dmar_domain
*domain
;
3498 domain
= alloc_domain_mem();
3502 domain
->id
= vm_domid
++;
3504 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
3505 domain
->flags
= DOMAIN_FLAG_VIRTUAL_MACHINE
;
3510 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
)
3514 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
3515 spin_lock_init(&domain
->iommu_lock
);
3517 domain_reserve_special_ranges(domain
);
3519 /* calculate AGAW */
3520 domain
->gaw
= guest_width
;
3521 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
3522 domain
->agaw
= width_to_agaw(adjust_width
);
3524 INIT_LIST_HEAD(&domain
->devices
);
3526 domain
->iommu_count
= 0;
3527 domain
->iommu_coherency
= 0;
3528 domain
->iommu_snooping
= 0;
3529 domain
->max_addr
= 0;
3532 /* always allocate the top pgd */
3533 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
3536 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
3540 static void iommu_free_vm_domain(struct dmar_domain
*domain
)
3542 unsigned long flags
;
3543 struct dmar_drhd_unit
*drhd
;
3544 struct intel_iommu
*iommu
;
3546 unsigned long ndomains
;
3548 for_each_drhd_unit(drhd
) {
3551 iommu
= drhd
->iommu
;
3553 ndomains
= cap_ndoms(iommu
->cap
);
3554 for_each_set_bit(i
, iommu
->domain_ids
, ndomains
) {
3555 if (iommu
->domains
[i
] == domain
) {
3556 spin_lock_irqsave(&iommu
->lock
, flags
);
3557 clear_bit(i
, iommu
->domain_ids
);
3558 iommu
->domains
[i
] = NULL
;
3559 spin_unlock_irqrestore(&iommu
->lock
, flags
);
3566 static void vm_domain_exit(struct dmar_domain
*domain
)
3568 /* Domain 0 is reserved, so dont process it */
3572 vm_domain_remove_all_dev_info(domain
);
3574 put_iova_domain(&domain
->iovad
);
3577 dma_pte_clear_range(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
3579 /* free page tables */
3580 dma_pte_free_pagetable(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
3582 iommu_free_vm_domain(domain
);
3583 free_domain_mem(domain
);
3586 static int intel_iommu_domain_init(struct iommu_domain
*domain
)
3588 struct dmar_domain
*dmar_domain
;
3590 dmar_domain
= iommu_alloc_vm_domain();
3593 "intel_iommu_domain_init: dmar_domain == NULL\n");
3596 if (md_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
3598 "intel_iommu_domain_init() failed\n");
3599 vm_domain_exit(dmar_domain
);
3602 domain
->priv
= dmar_domain
;
3607 static void intel_iommu_domain_destroy(struct iommu_domain
*domain
)
3609 struct dmar_domain
*dmar_domain
= domain
->priv
;
3611 domain
->priv
= NULL
;
3612 vm_domain_exit(dmar_domain
);
3615 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
3618 struct dmar_domain
*dmar_domain
= domain
->priv
;
3619 struct pci_dev
*pdev
= to_pci_dev(dev
);
3620 struct intel_iommu
*iommu
;
3623 /* normally pdev is not mapped */
3624 if (unlikely(domain_context_mapped(pdev
))) {
3625 struct dmar_domain
*old_domain
;
3627 old_domain
= find_domain(pdev
);
3629 if (dmar_domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
3630 dmar_domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)
3631 domain_remove_one_dev_info(old_domain
, pdev
);
3633 domain_remove_dev_info(old_domain
);
3637 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
3642 /* check if this iommu agaw is sufficient for max mapped address */
3643 addr_width
= agaw_to_width(iommu
->agaw
);
3644 if (addr_width
> cap_mgaw(iommu
->cap
))
3645 addr_width
= cap_mgaw(iommu
->cap
);
3647 if (dmar_domain
->max_addr
> (1LL << addr_width
)) {
3648 printk(KERN_ERR
"%s: iommu width (%d) is not "
3649 "sufficient for the mapped address (%llx)\n",
3650 __func__
, addr_width
, dmar_domain
->max_addr
);
3653 dmar_domain
->gaw
= addr_width
;
3656 * Knock out extra levels of page tables if necessary
3658 while (iommu
->agaw
< dmar_domain
->agaw
) {
3659 struct dma_pte
*pte
;
3661 pte
= dmar_domain
->pgd
;
3662 if (dma_pte_present(pte
)) {
3663 dmar_domain
->pgd
= (struct dma_pte
*)
3664 phys_to_virt(dma_pte_addr(pte
));
3665 free_pgtable_page(pte
);
3667 dmar_domain
->agaw
--;
3670 return domain_add_dev_info(dmar_domain
, pdev
, CONTEXT_TT_MULTI_LEVEL
);
3673 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
3676 struct dmar_domain
*dmar_domain
= domain
->priv
;
3677 struct pci_dev
*pdev
= to_pci_dev(dev
);
3679 domain_remove_one_dev_info(dmar_domain
, pdev
);
3682 static int intel_iommu_map(struct iommu_domain
*domain
,
3683 unsigned long iova
, phys_addr_t hpa
,
3684 int gfp_order
, int iommu_prot
)
3686 struct dmar_domain
*dmar_domain
= domain
->priv
;
3692 if (iommu_prot
& IOMMU_READ
)
3693 prot
|= DMA_PTE_READ
;
3694 if (iommu_prot
& IOMMU_WRITE
)
3695 prot
|= DMA_PTE_WRITE
;
3696 if ((iommu_prot
& IOMMU_CACHE
) && dmar_domain
->iommu_snooping
)
3697 prot
|= DMA_PTE_SNP
;
3699 size
= PAGE_SIZE
<< gfp_order
;
3700 max_addr
= iova
+ size
;
3701 if (dmar_domain
->max_addr
< max_addr
) {
3704 /* check if minimum agaw is sufficient for mapped address */
3705 end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
) + 1;
3706 if (end
< max_addr
) {
3707 printk(KERN_ERR
"%s: iommu width (%d) is not "
3708 "sufficient for the mapped address (%llx)\n",
3709 __func__
, dmar_domain
->gaw
, max_addr
);
3712 dmar_domain
->max_addr
= max_addr
;
3714 /* Round up size to next multiple of PAGE_SIZE, if it and
3715 the low bits of hpa would take us onto the next page */
3716 size
= aligned_nrpages(hpa
, size
);
3717 ret
= domain_pfn_mapping(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
3718 hpa
>> VTD_PAGE_SHIFT
, size
, prot
);
3722 static int intel_iommu_unmap(struct iommu_domain
*domain
,
3723 unsigned long iova
, int gfp_order
)
3725 struct dmar_domain
*dmar_domain
= domain
->priv
;
3726 size_t size
= PAGE_SIZE
<< gfp_order
;
3728 dma_pte_clear_range(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
3729 (iova
+ size
- 1) >> VTD_PAGE_SHIFT
);
3731 if (dmar_domain
->max_addr
== iova
+ size
)
3732 dmar_domain
->max_addr
= iova
;
3737 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
3740 struct dmar_domain
*dmar_domain
= domain
->priv
;
3741 struct dma_pte
*pte
;
3744 pte
= pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
);
3746 phys
= dma_pte_addr(pte
);
3751 static int intel_iommu_domain_has_cap(struct iommu_domain
*domain
,
3754 struct dmar_domain
*dmar_domain
= domain
->priv
;
3756 if (cap
== IOMMU_CAP_CACHE_COHERENCY
)
3757 return dmar_domain
->iommu_snooping
;
3758 if (cap
== IOMMU_CAP_INTR_REMAP
)
3759 return intr_remapping_enabled
;
3764 static struct iommu_ops intel_iommu_ops
= {
3765 .domain_init
= intel_iommu_domain_init
,
3766 .domain_destroy
= intel_iommu_domain_destroy
,
3767 .attach_dev
= intel_iommu_attach_device
,
3768 .detach_dev
= intel_iommu_detach_device
,
3769 .map
= intel_iommu_map
,
3770 .unmap
= intel_iommu_unmap
,
3771 .iova_to_phys
= intel_iommu_iova_to_phys
,
3772 .domain_has_cap
= intel_iommu_domain_has_cap
,
3775 static void __devinit
quirk_iommu_rwbf(struct pci_dev
*dev
)
3778 * Mobile 4 Series Chipset neglects to set RWBF capability,
3781 printk(KERN_INFO
"DMAR: Forcing write-buffer flush capability\n");
3784 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3785 if (dev
->revision
== 0x07) {
3786 printk(KERN_INFO
"DMAR: Disabling IOMMU for graphics on this chipset\n");
3791 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_rwbf
);
3794 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
3795 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
3796 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
3797 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
3798 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
3799 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
3800 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
3801 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
3803 static void __devinit
quirk_calpella_no_shadow_gtt(struct pci_dev
*dev
)
3807 if (pci_read_config_word(dev
, GGC
, &ggc
))
3810 if (!(ggc
& GGC_MEMORY_VT_ENABLED
)) {
3811 printk(KERN_INFO
"DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3815 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0040, quirk_calpella_no_shadow_gtt
);
3816 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0044, quirk_calpella_no_shadow_gtt
);
3817 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0062, quirk_calpella_no_shadow_gtt
);
3818 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x006a, quirk_calpella_no_shadow_gtt
);
3820 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3821 ISOCH DMAR unit for the Azalia sound device, but not give it any
3822 TLB entries, which causes it to deadlock. Check for that. We do
3823 this in a function called from init_dmars(), instead of in a PCI
3824 quirk, because we don't want to print the obnoxious "BIOS broken"
3825 message if VT-d is actually disabled.
3827 static void __init
check_tylersburg_isoch(void)
3829 struct pci_dev
*pdev
;
3830 uint32_t vtisochctrl
;
3832 /* If there's no Azalia in the system anyway, forget it. */
3833 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x3a3e, NULL
);
3838 /* System Management Registers. Might be hidden, in which case
3839 we can't do the sanity check. But that's OK, because the
3840 known-broken BIOSes _don't_ actually hide it, so far. */
3841 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x342e, NULL
);
3845 if (pci_read_config_dword(pdev
, 0x188, &vtisochctrl
)) {
3852 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3853 if (vtisochctrl
& 1)
3856 /* Drop all bits other than the number of TLB entries */
3857 vtisochctrl
&= 0x1c;
3859 /* If we have the recommended number of TLB entries (16), fine. */
3860 if (vtisochctrl
== 0x10)
3863 /* Zero TLB entries? You get to ride the short bus to school. */
3865 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3866 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3867 dmi_get_system_info(DMI_BIOS_VENDOR
),
3868 dmi_get_system_info(DMI_BIOS_VERSION
),
3869 dmi_get_system_info(DMI_PRODUCT_VERSION
));
3870 iommu_identity_mapping
|= IDENTMAP_AZALIA
;
3874 printk(KERN_WARNING
"DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",