2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
22 #include <linux/init.h>
23 #include <linux/bitmap.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/sysdev.h>
28 #include <linux/spinlock.h>
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/dma-mapping.h>
32 #include <linux/mempool.h>
34 #include "intel-iommu.h"
35 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
36 #include <asm/cacheflush.h>
40 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
43 #define IOAPIC_RANGE_START (0xfee00000)
44 #define IOAPIC_RANGE_END (0xfeefffff)
45 #define IOVA_START_ADDR (0x1000)
47 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
49 #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
51 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
53 static void domain_remove_dev_info(struct dmar_domain
*domain
);
55 static int dmar_disabled
;
56 static int __initdata dmar_map_gfx
= 1;
57 static int dmar_forcedac
;
59 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60 static DEFINE_SPINLOCK(device_domain_lock
);
61 static LIST_HEAD(device_domain_list
);
63 static int __init
intel_iommu_setup(char *str
)
68 if (!strncmp(str
, "off", 3)) {
70 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
71 } else if (!strncmp(str
, "igfx_off", 8)) {
74 "Intel-IOMMU: disable GFX device mapping\n");
75 } else if (!strncmp(str
, "forcedac", 8)) {
77 "Intel-IOMMU: Forcing DAC for PCI devices\n");
81 str
+= strcspn(str
, ",");
87 __setup("intel_iommu=", intel_iommu_setup
);
89 static struct kmem_cache
*iommu_domain_cache
;
90 static struct kmem_cache
*iommu_devinfo_cache
;
91 static struct kmem_cache
*iommu_iova_cache
;
93 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
98 /* trying to avoid low memory issues */
99 flags
= current
->flags
& PF_MEMALLOC
;
100 current
->flags
|= PF_MEMALLOC
;
101 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
102 current
->flags
&= (~PF_MEMALLOC
| flags
);
107 static inline void *alloc_pgtable_page(void)
112 /* trying to avoid low memory issues */
113 flags
= current
->flags
& PF_MEMALLOC
;
114 current
->flags
|= PF_MEMALLOC
;
115 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
116 current
->flags
&= (~PF_MEMALLOC
| flags
);
120 static inline void free_pgtable_page(void *vaddr
)
122 free_page((unsigned long)vaddr
);
125 static inline void *alloc_domain_mem(void)
127 return iommu_kmem_cache_alloc(iommu_domain_cache
);
130 static inline void free_domain_mem(void *vaddr
)
132 kmem_cache_free(iommu_domain_cache
, vaddr
);
135 static inline void * alloc_devinfo_mem(void)
137 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
140 static inline void free_devinfo_mem(void *vaddr
)
142 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
145 struct iova
*alloc_iova_mem(void)
147 return iommu_kmem_cache_alloc(iommu_iova_cache
);
150 void free_iova_mem(struct iova
*iova
)
152 kmem_cache_free(iommu_iova_cache
, iova
);
155 static inline void __iommu_flush_cache(
156 struct intel_iommu
*iommu
, void *addr
, int size
)
158 if (!ecap_coherent(iommu
->ecap
))
159 clflush_cache_range(addr
, size
);
162 /* Gets context entry for a given bus and devfn */
163 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
166 struct root_entry
*root
;
167 struct context_entry
*context
;
168 unsigned long phy_addr
;
171 spin_lock_irqsave(&iommu
->lock
, flags
);
172 root
= &iommu
->root_entry
[bus
];
173 context
= get_context_addr_from_root(root
);
175 context
= (struct context_entry
*)alloc_pgtable_page();
177 spin_unlock_irqrestore(&iommu
->lock
, flags
);
180 __iommu_flush_cache(iommu
, (void *)context
, PAGE_SIZE_4K
);
181 phy_addr
= virt_to_phys((void *)context
);
182 set_root_value(root
, phy_addr
);
183 set_root_present(root
);
184 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
186 spin_unlock_irqrestore(&iommu
->lock
, flags
);
187 return &context
[devfn
];
190 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
192 struct root_entry
*root
;
193 struct context_entry
*context
;
197 spin_lock_irqsave(&iommu
->lock
, flags
);
198 root
= &iommu
->root_entry
[bus
];
199 context
= get_context_addr_from_root(root
);
204 ret
= context_present(context
[devfn
]);
206 spin_unlock_irqrestore(&iommu
->lock
, flags
);
210 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
212 struct root_entry
*root
;
213 struct context_entry
*context
;
216 spin_lock_irqsave(&iommu
->lock
, flags
);
217 root
= &iommu
->root_entry
[bus
];
218 context
= get_context_addr_from_root(root
);
220 context_clear_entry(context
[devfn
]);
221 __iommu_flush_cache(iommu
, &context
[devfn
], \
224 spin_unlock_irqrestore(&iommu
->lock
, flags
);
227 static void free_context_table(struct intel_iommu
*iommu
)
229 struct root_entry
*root
;
232 struct context_entry
*context
;
234 spin_lock_irqsave(&iommu
->lock
, flags
);
235 if (!iommu
->root_entry
) {
238 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
239 root
= &iommu
->root_entry
[i
];
240 context
= get_context_addr_from_root(root
);
242 free_pgtable_page(context
);
244 free_pgtable_page(iommu
->root_entry
);
245 iommu
->root_entry
= NULL
;
247 spin_unlock_irqrestore(&iommu
->lock
, flags
);
250 /* page table handling */
251 #define LEVEL_STRIDE (9)
252 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
254 static inline int agaw_to_level(int agaw
)
259 static inline int agaw_to_width(int agaw
)
261 return 30 + agaw
* LEVEL_STRIDE
;
265 static inline int width_to_agaw(int width
)
267 return (width
- 30) / LEVEL_STRIDE
;
270 static inline unsigned int level_to_offset_bits(int level
)
272 return (12 + (level
- 1) * LEVEL_STRIDE
);
275 static inline int address_level_offset(u64 addr
, int level
)
277 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
280 static inline u64
level_mask(int level
)
282 return ((u64
)-1 << level_to_offset_bits(level
));
285 static inline u64
level_size(int level
)
287 return ((u64
)1 << level_to_offset_bits(level
));
290 static inline u64
align_to_level(u64 addr
, int level
)
292 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
295 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
297 int addr_width
= agaw_to_width(domain
->agaw
);
298 struct dma_pte
*parent
, *pte
= NULL
;
299 int level
= agaw_to_level(domain
->agaw
);
303 BUG_ON(!domain
->pgd
);
305 addr
&= (((u64
)1) << addr_width
) - 1;
306 parent
= domain
->pgd
;
308 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
312 offset
= address_level_offset(addr
, level
);
313 pte
= &parent
[offset
];
317 if (!dma_pte_present(*pte
)) {
318 tmp_page
= alloc_pgtable_page();
321 spin_unlock_irqrestore(&domain
->mapping_lock
,
325 __iommu_flush_cache(domain
->iommu
, tmp_page
,
327 dma_set_pte_addr(*pte
, virt_to_phys(tmp_page
));
329 * high level table always sets r/w, last level page
330 * table control read/write
332 dma_set_pte_readable(*pte
);
333 dma_set_pte_writable(*pte
);
334 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
336 parent
= phys_to_virt(dma_pte_addr(*pte
));
340 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
344 /* return address's pte at specific level */
345 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
348 struct dma_pte
*parent
, *pte
= NULL
;
349 int total
= agaw_to_level(domain
->agaw
);
352 parent
= domain
->pgd
;
353 while (level
<= total
) {
354 offset
= address_level_offset(addr
, total
);
355 pte
= &parent
[offset
];
359 if (!dma_pte_present(*pte
))
361 parent
= phys_to_virt(dma_pte_addr(*pte
));
367 /* clear one page's page table */
368 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
370 struct dma_pte
*pte
= NULL
;
372 /* get last level pte */
373 pte
= dma_addr_level_pte(domain
, addr
, 1);
377 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
381 /* clear last level pte, a tlb flush should be followed */
382 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
384 int addr_width
= agaw_to_width(domain
->agaw
);
386 start
&= (((u64
)1) << addr_width
) - 1;
387 end
&= (((u64
)1) << addr_width
) - 1;
388 /* in case it's partial page */
389 start
= PAGE_ALIGN_4K(start
);
392 /* we don't need lock here, nobody else touches the iova range */
393 while (start
< end
) {
394 dma_pte_clear_one(domain
, start
);
395 start
+= PAGE_SIZE_4K
;
399 /* free page table pages. last level pte should already be cleared */
400 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
403 int addr_width
= agaw_to_width(domain
->agaw
);
405 int total
= agaw_to_level(domain
->agaw
);
409 start
&= (((u64
)1) << addr_width
) - 1;
410 end
&= (((u64
)1) << addr_width
) - 1;
412 /* we don't need lock here, nobody else touches the iova range */
414 while (level
<= total
) {
415 tmp
= align_to_level(start
, level
);
416 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
420 pte
= dma_addr_level_pte(domain
, tmp
, level
);
423 phys_to_virt(dma_pte_addr(*pte
)));
425 __iommu_flush_cache(domain
->iommu
,
428 tmp
+= level_size(level
);
433 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
434 free_pgtable_page(domain
->pgd
);
440 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
442 struct root_entry
*root
;
445 root
= (struct root_entry
*)alloc_pgtable_page();
449 __iommu_flush_cache(iommu
, root
, PAGE_SIZE_4K
);
451 spin_lock_irqsave(&iommu
->lock
, flags
);
452 iommu
->root_entry
= root
;
453 spin_unlock_irqrestore(&iommu
->lock
, flags
);
458 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
460 unsigned long start_time = jiffies;\
462 sts = op (iommu->reg + offset);\
465 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466 panic("DMAR hardware is malfunctioning\n");\
471 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
477 addr
= iommu
->root_entry
;
479 spin_lock_irqsave(&iommu
->register_lock
, flag
);
480 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
482 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
483 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
485 /* Make sure hardware complete it */
486 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
487 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
489 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
492 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
497 if (!cap_rwbf(iommu
->cap
))
499 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
501 spin_lock_irqsave(&iommu
->register_lock
, flag
);
502 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
504 /* Make sure hardware complete it */
505 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
506 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
508 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
511 /* return value determine if we need a write buffer flush */
512 static int __iommu_flush_context(struct intel_iommu
*iommu
,
513 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
514 int non_present_entry_flush
)
520 * In the non-present entry flush case, if hardware doesn't cache
521 * non-present entry we do nothing and if hardware cache non-present
522 * entry, we flush entries of domain 0 (the domain id is used to cache
523 * any non-present entries)
525 if (non_present_entry_flush
) {
526 if (!cap_caching_mode(iommu
->cap
))
533 case DMA_CCMD_GLOBAL_INVL
:
534 val
= DMA_CCMD_GLOBAL_INVL
;
536 case DMA_CCMD_DOMAIN_INVL
:
537 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
539 case DMA_CCMD_DEVICE_INVL
:
540 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
541 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
548 spin_lock_irqsave(&iommu
->register_lock
, flag
);
549 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
553 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
555 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
557 /* flush context entry will implictly flush write buffer */
561 static int inline iommu_flush_context_global(struct intel_iommu
*iommu
,
562 int non_present_entry_flush
)
564 return __iommu_flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
565 non_present_entry_flush
);
568 static int inline iommu_flush_context_domain(struct intel_iommu
*iommu
, u16 did
,
569 int non_present_entry_flush
)
571 return __iommu_flush_context(iommu
, did
, 0, 0, DMA_CCMD_DOMAIN_INVL
,
572 non_present_entry_flush
);
575 static int inline iommu_flush_context_device(struct intel_iommu
*iommu
,
576 u16 did
, u16 source_id
, u8 function_mask
, int non_present_entry_flush
)
578 return __iommu_flush_context(iommu
, did
, source_id
, function_mask
,
579 DMA_CCMD_DEVICE_INVL
, non_present_entry_flush
);
582 /* return value determine if we need a write buffer flush */
583 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
584 u64 addr
, unsigned int size_order
, u64 type
,
585 int non_present_entry_flush
)
587 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
588 u64 val
= 0, val_iva
= 0;
592 * In the non-present entry flush case, if hardware doesn't cache
593 * non-present entry we do nothing and if hardware cache non-present
594 * entry, we flush entries of domain 0 (the domain id is used to cache
595 * any non-present entries)
597 if (non_present_entry_flush
) {
598 if (!cap_caching_mode(iommu
->cap
))
605 case DMA_TLB_GLOBAL_FLUSH
:
606 /* global flush doesn't need set IVA_REG */
607 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
609 case DMA_TLB_DSI_FLUSH
:
610 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
612 case DMA_TLB_PSI_FLUSH
:
613 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
614 /* Note: always flush non-leaf currently */
615 val_iva
= size_order
| addr
;
620 /* Note: set drain read/write */
623 * This is probably to be super secure.. Looks like we can
624 * ignore it without any impact.
626 if (cap_read_drain(iommu
->cap
))
627 val
|= DMA_TLB_READ_DRAIN
;
629 if (cap_write_drain(iommu
->cap
))
630 val
|= DMA_TLB_WRITE_DRAIN
;
632 spin_lock_irqsave(&iommu
->register_lock
, flag
);
633 /* Note: Only uses first TLB reg currently */
635 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
636 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
638 /* Make sure hardware complete it */
639 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
640 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
642 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
644 /* check IOTLB invalidation granularity */
645 if (DMA_TLB_IAIG(val
) == 0)
646 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
647 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
648 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649 DMA_TLB_IIRG(type
), DMA_TLB_IAIG(val
));
650 /* flush context entry will implictly flush write buffer */
654 static int inline iommu_flush_iotlb_global(struct intel_iommu
*iommu
,
655 int non_present_entry_flush
)
657 return __iommu_flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
658 non_present_entry_flush
);
661 static int inline iommu_flush_iotlb_dsi(struct intel_iommu
*iommu
, u16 did
,
662 int non_present_entry_flush
)
664 return __iommu_flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
,
665 non_present_entry_flush
);
668 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
669 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
673 BUG_ON(addr
& (~PAGE_MASK_4K
));
676 /* Fallback to domain selective flush if no PSI support */
677 if (!cap_pgsel_inv(iommu
->cap
))
678 return iommu_flush_iotlb_dsi(iommu
, did
,
679 non_present_entry_flush
);
682 * PSI requires page size to be 2 ^ x, and the base address is naturally
683 * aligned to the size
685 mask
= ilog2(__roundup_pow_of_two(pages
));
686 /* Fallback to domain selective flush if size is too big */
687 if (mask
> cap_max_amask_val(iommu
->cap
))
688 return iommu_flush_iotlb_dsi(iommu
, did
,
689 non_present_entry_flush
);
691 return __iommu_flush_iotlb(iommu
, did
, addr
, mask
,
692 DMA_TLB_PSI_FLUSH
, non_present_entry_flush
);
695 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
700 spin_lock_irqsave(&iommu
->register_lock
, flags
);
701 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
702 pmen
&= ~DMA_PMEN_EPM
;
703 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
705 /* wait for the protected region status bit to clear */
706 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
707 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
709 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
712 static int iommu_enable_translation(struct intel_iommu
*iommu
)
717 spin_lock_irqsave(&iommu
->register_lock
, flags
);
718 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
720 /* Make sure hardware complete it */
721 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
722 readl
, (sts
& DMA_GSTS_TES
), sts
);
724 iommu
->gcmd
|= DMA_GCMD_TE
;
725 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
729 static int iommu_disable_translation(struct intel_iommu
*iommu
)
734 spin_lock_irqsave(&iommu
->register_lock
, flag
);
735 iommu
->gcmd
&= ~DMA_GCMD_TE
;
736 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
738 /* Make sure hardware complete it */
739 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
740 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
742 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
746 /* iommu interrupt handling. Most stuff are MSI-like. */
748 static const char *fault_reason_strings
[] =
751 "Present bit in root entry is clear",
752 "Present bit in context entry is clear",
753 "Invalid context entry",
754 "Access beyond MGAW",
755 "PTE Write access is not set",
756 "PTE Read access is not set",
757 "Next page table ptr is invalid",
758 "Root table address invalid",
759 "Context table ptr is invalid",
760 "non-zero reserved fields in RTP",
761 "non-zero reserved fields in CTP",
762 "non-zero reserved fields in PTE",
764 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
766 const char *dmar_get_fault_reason(u8 fault_reason
)
768 if (fault_reason
> MAX_FAULT_REASON_IDX
)
771 return fault_reason_strings
[fault_reason
];
774 void dmar_msi_unmask(unsigned int irq
)
776 struct intel_iommu
*iommu
= get_irq_data(irq
);
780 spin_lock_irqsave(&iommu
->register_lock
, flag
);
781 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
782 /* Read a reg to force flush the post write */
783 readl(iommu
->reg
+ DMAR_FECTL_REG
);
784 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
787 void dmar_msi_mask(unsigned int irq
)
790 struct intel_iommu
*iommu
= get_irq_data(irq
);
793 spin_lock_irqsave(&iommu
->register_lock
, flag
);
794 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
795 /* Read a reg to force flush the post write */
796 readl(iommu
->reg
+ DMAR_FECTL_REG
);
797 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
800 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
802 struct intel_iommu
*iommu
= get_irq_data(irq
);
805 spin_lock_irqsave(&iommu
->register_lock
, flag
);
806 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
807 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
808 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
809 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
812 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
814 struct intel_iommu
*iommu
= get_irq_data(irq
);
817 spin_lock_irqsave(&iommu
->register_lock
, flag
);
818 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
819 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
820 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
821 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
824 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
825 u8 fault_reason
, u16 source_id
, u64 addr
)
829 reason
= dmar_get_fault_reason(fault_reason
);
832 "DMAR:[%s] Request device [%02x:%02x.%d] "
834 "DMAR:[fault reason %02d] %s\n",
835 (type
? "DMA Read" : "DMA Write"),
836 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
837 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
841 #define PRIMARY_FAULT_REG_LEN (16)
842 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
844 struct intel_iommu
*iommu
= dev_id
;
845 int reg
, fault_index
;
849 spin_lock_irqsave(&iommu
->register_lock
, flag
);
850 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
852 /* TBD: ignore advanced fault log currently */
853 if (!(fault_status
& DMA_FSTS_PPF
))
856 fault_index
= dma_fsts_fault_record_index(fault_status
);
857 reg
= cap_fault_reg_offset(iommu
->cap
);
865 /* highest 32 bits */
866 data
= readl(iommu
->reg
+ reg
+
867 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
868 if (!(data
& DMA_FRCD_F
))
871 fault_reason
= dma_frcd_fault_reason(data
);
872 type
= dma_frcd_type(data
);
874 data
= readl(iommu
->reg
+ reg
+
875 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
876 source_id
= dma_frcd_source_id(data
);
878 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
879 fault_index
* PRIMARY_FAULT_REG_LEN
);
880 guest_addr
= dma_frcd_page_addr(guest_addr
);
881 /* clear the fault */
882 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
883 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
885 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
887 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
888 source_id
, guest_addr
);
891 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
893 spin_lock_irqsave(&iommu
->register_lock
, flag
);
896 /* clear primary fault overflow */
897 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
898 if (fault_status
& DMA_FSTS_PFO
)
899 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
901 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
905 int dmar_set_interrupt(struct intel_iommu
*iommu
)
911 printk(KERN_ERR
"IOMMU: no free vectors\n");
915 set_irq_data(irq
, iommu
);
918 ret
= arch_setup_dmar_msi(irq
);
920 set_irq_data(irq
, NULL
);
926 /* Force fault register is cleared */
927 iommu_page_fault(irq
, iommu
);
929 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
931 printk(KERN_ERR
"IOMMU: can't request irq\n");
935 static int iommu_init_domains(struct intel_iommu
*iommu
)
937 unsigned long ndomains
;
938 unsigned long nlongs
;
940 ndomains
= cap_ndoms(iommu
->cap
);
941 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
942 nlongs
= BITS_TO_LONGS(ndomains
);
944 /* TBD: there might be 64K domains,
945 * consider other allocation for future chip
947 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
948 if (!iommu
->domain_ids
) {
949 printk(KERN_ERR
"Allocating domain id array failed\n");
952 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
954 if (!iommu
->domains
) {
955 printk(KERN_ERR
"Allocating domain array failed\n");
956 kfree(iommu
->domain_ids
);
961 * if Caching mode is set, then invalid translations are tagged
962 * with domainid 0. Hence we need to pre-allocate it.
964 if (cap_caching_mode(iommu
->cap
))
965 set_bit(0, iommu
->domain_ids
);
969 static struct intel_iommu
*alloc_iommu(struct dmar_drhd_unit
*drhd
)
971 struct intel_iommu
*iommu
;
976 iommu
= kzalloc(sizeof(*iommu
), GFP_KERNEL
);
979 iommu
->reg
= ioremap(drhd
->reg_base_addr
, PAGE_SIZE_4K
);
981 printk(KERN_ERR
"IOMMU: can't map the region\n");
984 iommu
->cap
= dmar_readq(iommu
->reg
+ DMAR_CAP_REG
);
985 iommu
->ecap
= dmar_readq(iommu
->reg
+ DMAR_ECAP_REG
);
987 /* the registers might be more than one page */
988 map_size
= max_t(int, ecap_max_iotlb_offset(iommu
->ecap
),
989 cap_max_fault_reg_offset(iommu
->cap
));
990 map_size
= PAGE_ALIGN_4K(map_size
);
991 if (map_size
> PAGE_SIZE_4K
) {
993 iommu
->reg
= ioremap(drhd
->reg_base_addr
, map_size
);
995 printk(KERN_ERR
"IOMMU: can't map the region\n");
1000 ver
= readl(iommu
->reg
+ DMAR_VER_REG
);
1001 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1002 drhd
->reg_base_addr
, DMAR_VER_MAJOR(ver
), DMAR_VER_MINOR(ver
),
1003 iommu
->cap
, iommu
->ecap
);
1004 ret
= iommu_init_domains(iommu
);
1007 spin_lock_init(&iommu
->lock
);
1008 spin_lock_init(&iommu
->register_lock
);
1010 drhd
->iommu
= iommu
;
1013 iounmap(iommu
->reg
);
1019 static void domain_exit(struct dmar_domain
*domain
);
1020 static void free_iommu(struct intel_iommu
*iommu
)
1022 struct dmar_domain
*domain
;
1028 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1029 for (; i
< cap_ndoms(iommu
->cap
); ) {
1030 domain
= iommu
->domains
[i
];
1031 clear_bit(i
, iommu
->domain_ids
);
1032 domain_exit(domain
);
1033 i
= find_next_bit(iommu
->domain_ids
,
1034 cap_ndoms(iommu
->cap
), i
+1);
1037 if (iommu
->gcmd
& DMA_GCMD_TE
)
1038 iommu_disable_translation(iommu
);
1041 set_irq_data(iommu
->irq
, NULL
);
1042 /* This will mask the irq */
1043 free_irq(iommu
->irq
, iommu
);
1044 destroy_irq(iommu
->irq
);
1047 kfree(iommu
->domains
);
1048 kfree(iommu
->domain_ids
);
1050 /* free context mapping */
1051 free_context_table(iommu
);
1054 iounmap(iommu
->reg
);
1058 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1061 unsigned long ndomains
;
1062 struct dmar_domain
*domain
;
1063 unsigned long flags
;
1065 domain
= alloc_domain_mem();
1069 ndomains
= cap_ndoms(iommu
->cap
);
1071 spin_lock_irqsave(&iommu
->lock
, flags
);
1072 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1073 if (num
>= ndomains
) {
1074 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1075 free_domain_mem(domain
);
1076 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1080 set_bit(num
, iommu
->domain_ids
);
1082 domain
->iommu
= iommu
;
1083 iommu
->domains
[num
] = domain
;
1084 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1089 static void iommu_free_domain(struct dmar_domain
*domain
)
1091 unsigned long flags
;
1093 spin_lock_irqsave(&domain
->iommu
->lock
, flags
);
1094 clear_bit(domain
->id
, domain
->iommu
->domain_ids
);
1095 spin_unlock_irqrestore(&domain
->iommu
->lock
, flags
);
1098 static struct iova_domain reserved_iova_list
;
1100 static void dmar_init_reserved_ranges(void)
1102 struct pci_dev
*pdev
= NULL
;
1107 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1109 /* IOAPIC ranges shouldn't be accessed by DMA */
1110 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1111 IOVA_PFN(IOAPIC_RANGE_END
));
1113 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1115 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1116 for_each_pci_dev(pdev
) {
1119 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1120 r
= &pdev
->resource
[i
];
1121 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1124 addr
&= PAGE_MASK_4K
;
1125 size
= r
->end
- addr
;
1126 size
= PAGE_ALIGN_4K(size
);
1127 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1128 IOVA_PFN(size
+ addr
) - 1);
1130 printk(KERN_ERR
"Reserve iova failed\n");
1136 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1138 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1141 static inline int guestwidth_to_adjustwidth(int gaw
)
1144 int r
= (gaw
- 12) % 9;
1155 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1157 struct intel_iommu
*iommu
;
1158 int adjust_width
, agaw
;
1159 unsigned long sagaw
;
1161 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1162 spin_lock_init(&domain
->mapping_lock
);
1164 domain_reserve_special_ranges(domain
);
1166 /* calculate AGAW */
1167 iommu
= domain
->iommu
;
1168 if (guest_width
> cap_mgaw(iommu
->cap
))
1169 guest_width
= cap_mgaw(iommu
->cap
);
1170 domain
->gaw
= guest_width
;
1171 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1172 agaw
= width_to_agaw(adjust_width
);
1173 sagaw
= cap_sagaw(iommu
->cap
);
1174 if (!test_bit(agaw
, &sagaw
)) {
1175 /* hardware doesn't support it, choose a bigger one */
1176 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1177 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1181 domain
->agaw
= agaw
;
1182 INIT_LIST_HEAD(&domain
->devices
);
1184 /* always allocate the top pgd */
1185 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1188 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE_4K
);
1192 static void domain_exit(struct dmar_domain
*domain
)
1196 /* Domain 0 is reserved, so dont process it */
1200 domain_remove_dev_info(domain
);
1202 put_iova_domain(&domain
->iovad
);
1203 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1204 end
= end
& (~PAGE_MASK_4K
);
1207 dma_pte_clear_range(domain
, 0, end
);
1209 /* free page tables */
1210 dma_pte_free_pagetable(domain
, 0, end
);
1212 iommu_free_domain(domain
);
1213 free_domain_mem(domain
);
1216 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1219 struct context_entry
*context
;
1220 struct intel_iommu
*iommu
= domain
->iommu
;
1221 unsigned long flags
;
1223 pr_debug("Set context mapping for %02x:%02x.%d\n",
1224 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1225 BUG_ON(!domain
->pgd
);
1226 context
= device_to_context_entry(iommu
, bus
, devfn
);
1229 spin_lock_irqsave(&iommu
->lock
, flags
);
1230 if (context_present(*context
)) {
1231 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1235 context_set_domain_id(*context
, domain
->id
);
1236 context_set_address_width(*context
, domain
->agaw
);
1237 context_set_address_root(*context
, virt_to_phys(domain
->pgd
));
1238 context_set_translation_type(*context
, CONTEXT_TT_MULTI_LEVEL
);
1239 context_set_fault_enable(*context
);
1240 context_set_present(*context
);
1241 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
1243 /* it's a non-present to present mapping */
1244 if (iommu_flush_context_device(iommu
, domain
->id
,
1245 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
, 1))
1246 iommu_flush_write_buffer(iommu
);
1248 iommu_flush_iotlb_dsi(iommu
, 0, 0);
1249 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1254 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1257 struct pci_dev
*tmp
, *parent
;
1259 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1264 /* dependent device mapping */
1265 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1268 /* Secondary interface's bus number and devfn 0 */
1269 parent
= pdev
->bus
->self
;
1270 while (parent
!= tmp
) {
1271 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1275 parent
= parent
->bus
->self
;
1277 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1278 return domain_context_mapping_one(domain
,
1279 tmp
->subordinate
->number
, 0);
1280 else /* this is a legacy PCI bridge */
1281 return domain_context_mapping_one(domain
,
1282 tmp
->bus
->number
, tmp
->devfn
);
1285 static int domain_context_mapped(struct dmar_domain
*domain
,
1286 struct pci_dev
*pdev
)
1289 struct pci_dev
*tmp
, *parent
;
1291 ret
= device_context_mapped(domain
->iommu
,
1292 pdev
->bus
->number
, pdev
->devfn
);
1295 /* dependent device mapping */
1296 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1299 /* Secondary interface's bus number and devfn 0 */
1300 parent
= pdev
->bus
->self
;
1301 while (parent
!= tmp
) {
1302 ret
= device_context_mapped(domain
->iommu
, parent
->bus
->number
,
1306 parent
= parent
->bus
->self
;
1309 return device_context_mapped(domain
->iommu
,
1310 tmp
->subordinate
->number
, 0);
1312 return device_context_mapped(domain
->iommu
,
1313 tmp
->bus
->number
, tmp
->devfn
);
1317 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1318 u64 hpa
, size_t size
, int prot
)
1320 u64 start_pfn
, end_pfn
;
1321 struct dma_pte
*pte
;
1324 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1326 iova
&= PAGE_MASK_4K
;
1327 start_pfn
= ((u64
)hpa
) >> PAGE_SHIFT_4K
;
1328 end_pfn
= (PAGE_ALIGN_4K(((u64
)hpa
) + size
)) >> PAGE_SHIFT_4K
;
1330 while (start_pfn
< end_pfn
) {
1331 pte
= addr_to_dma_pte(domain
, iova
+ PAGE_SIZE_4K
* index
);
1334 /* We don't need lock here, nobody else
1335 * touches the iova range
1337 BUG_ON(dma_pte_addr(*pte
));
1338 dma_set_pte_addr(*pte
, start_pfn
<< PAGE_SHIFT_4K
);
1339 dma_set_pte_prot(*pte
, prot
);
1340 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
1347 static void detach_domain_for_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
1349 clear_context_table(domain
->iommu
, bus
, devfn
);
1350 iommu_flush_context_global(domain
->iommu
, 0);
1351 iommu_flush_iotlb_global(domain
->iommu
, 0);
1354 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1356 struct device_domain_info
*info
;
1357 unsigned long flags
;
1359 spin_lock_irqsave(&device_domain_lock
, flags
);
1360 while (!list_empty(&domain
->devices
)) {
1361 info
= list_entry(domain
->devices
.next
,
1362 struct device_domain_info
, link
);
1363 list_del(&info
->link
);
1364 list_del(&info
->global
);
1366 info
->dev
->dev
.archdata
.iommu
= NULL
;
1367 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1369 detach_domain_for_dev(info
->domain
, info
->bus
, info
->devfn
);
1370 free_devinfo_mem(info
);
1372 spin_lock_irqsave(&device_domain_lock
, flags
);
1374 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1379 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1381 struct dmar_domain
*
1382 find_domain(struct pci_dev
*pdev
)
1384 struct device_domain_info
*info
;
1386 /* No lock here, assumes no domain exit in normal case */
1387 info
= pdev
->dev
.archdata
.iommu
;
1389 return info
->domain
;
1393 static int dmar_pci_device_match(struct pci_dev
*devices
[], int cnt
,
1394 struct pci_dev
*dev
)
1399 for (index
= 0; index
< cnt
; index
++)
1400 if (dev
== devices
[index
])
1403 /* Check our parent */
1404 dev
= dev
->bus
->self
;
1410 static struct dmar_drhd_unit
*
1411 dmar_find_matched_drhd_unit(struct pci_dev
*dev
)
1413 struct dmar_drhd_unit
*drhd
= NULL
;
1415 list_for_each_entry(drhd
, &dmar_drhd_units
, list
) {
1416 if (drhd
->include_all
|| dmar_pci_device_match(drhd
->devices
,
1417 drhd
->devices_cnt
, dev
))
1424 /* domain is initialized */
1425 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1427 struct dmar_domain
*domain
, *found
= NULL
;
1428 struct intel_iommu
*iommu
;
1429 struct dmar_drhd_unit
*drhd
;
1430 struct device_domain_info
*info
, *tmp
;
1431 struct pci_dev
*dev_tmp
;
1432 unsigned long flags
;
1433 int bus
= 0, devfn
= 0;
1435 domain
= find_domain(pdev
);
1439 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1441 if (dev_tmp
->is_pcie
) {
1442 bus
= dev_tmp
->subordinate
->number
;
1445 bus
= dev_tmp
->bus
->number
;
1446 devfn
= dev_tmp
->devfn
;
1448 spin_lock_irqsave(&device_domain_lock
, flags
);
1449 list_for_each_entry(info
, &device_domain_list
, global
) {
1450 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1451 found
= info
->domain
;
1455 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1456 /* pcie-pci bridge already has a domain, uses it */
1463 /* Allocate new domain for the device */
1464 drhd
= dmar_find_matched_drhd_unit(pdev
);
1466 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1470 iommu
= drhd
->iommu
;
1472 domain
= iommu_alloc_domain(iommu
);
1476 if (domain_init(domain
, gaw
)) {
1477 domain_exit(domain
);
1481 /* register pcie-to-pci device */
1483 info
= alloc_devinfo_mem();
1485 domain_exit(domain
);
1489 info
->devfn
= devfn
;
1491 info
->domain
= domain
;
1492 /* This domain is shared by devices under p2p bridge */
1493 domain
->flags
|= DOMAIN_FLAG_MULTIPLE_DEVICES
;
1495 /* pcie-to-pci bridge already has a domain, uses it */
1497 spin_lock_irqsave(&device_domain_lock
, flags
);
1498 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1499 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1500 found
= tmp
->domain
;
1505 free_devinfo_mem(info
);
1506 domain_exit(domain
);
1509 list_add(&info
->link
, &domain
->devices
);
1510 list_add(&info
->global
, &device_domain_list
);
1512 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1516 info
= alloc_devinfo_mem();
1519 info
->bus
= pdev
->bus
->number
;
1520 info
->devfn
= pdev
->devfn
;
1522 info
->domain
= domain
;
1523 spin_lock_irqsave(&device_domain_lock
, flags
);
1524 /* somebody is fast */
1525 found
= find_domain(pdev
);
1526 if (found
!= NULL
) {
1527 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1528 if (found
!= domain
) {
1529 domain_exit(domain
);
1532 free_devinfo_mem(info
);
1535 list_add(&info
->link
, &domain
->devices
);
1536 list_add(&info
->global
, &device_domain_list
);
1537 pdev
->dev
.archdata
.iommu
= info
;
1538 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1541 /* recheck it here, maybe others set it */
1542 return find_domain(pdev
);
1545 static int iommu_prepare_identity_map(struct pci_dev
*pdev
, u64 start
, u64 end
)
1547 struct dmar_domain
*domain
;
1553 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1554 pci_name(pdev
), start
, end
);
1555 /* page table init */
1556 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1560 /* The address might not be aligned */
1561 base
= start
& PAGE_MASK_4K
;
1563 size
= PAGE_ALIGN_4K(size
);
1564 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1565 IOVA_PFN(base
+ size
) - 1)) {
1566 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1571 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1572 size
, base
, pci_name(pdev
));
1574 * RMRR range might have overlap with physical memory range,
1577 dma_pte_clear_range(domain
, base
, base
+ size
);
1579 ret
= domain_page_mapping(domain
, base
, base
, size
,
1580 DMA_PTE_READ
|DMA_PTE_WRITE
);
1584 /* context entry init */
1585 ret
= domain_context_mapping(domain
, pdev
);
1589 domain_exit(domain
);
1594 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1595 struct pci_dev
*pdev
)
1597 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1599 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1600 rmrr
->end_address
+ 1);
1603 #ifdef CONFIG_DMAR_GFX_WA
1604 extern int arch_get_ram_range(int slot
, u64
*addr
, u64
*size
);
1605 static void __init
iommu_prepare_gfx_mapping(void)
1607 struct pci_dev
*pdev
= NULL
;
1612 for_each_pci_dev(pdev
) {
1613 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1614 !IS_GFX_DEVICE(pdev
))
1616 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1618 slot
= arch_get_ram_range(0, &base
, &size
);
1620 ret
= iommu_prepare_identity_map(pdev
,
1624 slot
= arch_get_ram_range(slot
, &base
, &size
);
1628 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1633 #ifdef CONFIG_DMAR_FLOPPY_WA
1634 static inline void iommu_prepare_isa(void)
1636 struct pci_dev
*pdev
;
1639 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1643 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1644 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1647 printk("IOMMU: Failed to create 0-64M identity map, "
1648 "floppy might not work\n");
1652 static inline void iommu_prepare_isa(void)
1656 #endif /* !CONFIG_DMAR_FLPY_WA */
1658 int __init
init_dmars(void)
1660 struct dmar_drhd_unit
*drhd
;
1661 struct dmar_rmrr_unit
*rmrr
;
1662 struct pci_dev
*pdev
;
1663 struct intel_iommu
*iommu
;
1669 * initialize and program root entry to not present
1672 for_each_drhd_unit(drhd
) {
1675 iommu
= alloc_iommu(drhd
);
1683 * we could share the same root & context tables
1684 * amoung all IOMMU's. Need to Split it later.
1686 ret
= iommu_alloc_root_entry(iommu
);
1688 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
1695 * for each dev attached to rmrr
1697 * locate drhd for dev, alloc domain for dev
1698 * allocate free domain
1699 * allocate page table entries for rmrr
1700 * if context not allocated for bus
1701 * allocate and init context
1702 * set present in root table for this bus
1703 * init context with domain, translation etc
1707 for_each_rmrr_units(rmrr
) {
1709 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
1710 pdev
= rmrr
->devices
[i
];
1711 /* some BIOS lists non-exist devices in DMAR table */
1714 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
1717 "IOMMU: mapping reserved region failed\n");
1721 iommu_prepare_gfx_mapping();
1723 iommu_prepare_isa();
1728 * global invalidate context cache
1729 * global invalidate iotlb
1730 * enable translation
1732 for_each_drhd_unit(drhd
) {
1735 iommu
= drhd
->iommu
;
1736 sprintf (iommu
->name
, "dmar%d", unit
++);
1738 iommu_flush_write_buffer(iommu
);
1740 ret
= dmar_set_interrupt(iommu
);
1744 iommu_set_root_entry(iommu
);
1746 iommu_flush_context_global(iommu
, 0);
1747 iommu_flush_iotlb_global(iommu
, 0);
1749 iommu_disable_protect_mem_regions(iommu
);
1751 ret
= iommu_enable_translation(iommu
);
1758 for_each_drhd_unit(drhd
) {
1761 iommu
= drhd
->iommu
;
1767 static inline u64
aligned_size(u64 host_addr
, size_t size
)
1770 addr
= (host_addr
& (~PAGE_MASK_4K
)) + size
;
1771 return PAGE_ALIGN_4K(addr
);
1775 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
1779 /* Make sure it's in range */
1780 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
1781 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
1784 piova
= alloc_iova(&domain
->iovad
,
1785 size
>> PAGE_SHIFT_4K
, IOVA_PFN(end
), 1);
1789 static struct iova
*
1790 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
1793 struct pci_dev
*pdev
= to_pci_dev(dev
);
1794 struct iova
*iova
= NULL
;
1796 if ((pdev
->dma_mask
<= DMA_32BIT_MASK
) || (dmar_forcedac
)) {
1797 iova
= iommu_alloc_iova(domain
, size
, pdev
->dma_mask
);
1800 * First try to allocate an io virtual address in
1801 * DMA_32BIT_MASK and if that fails then try allocating
1804 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
1806 iova
= iommu_alloc_iova(domain
, size
, pdev
->dma_mask
);
1810 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
1817 static struct dmar_domain
*
1818 get_valid_domain_for_dev(struct pci_dev
*pdev
)
1820 struct dmar_domain
*domain
;
1823 domain
= get_domain_for_dev(pdev
,
1824 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1827 "Allocating domain for %s failed", pci_name(pdev
));
1831 /* make sure context mapping is ok */
1832 if (unlikely(!domain_context_mapped(domain
, pdev
))) {
1833 ret
= domain_context_mapping(domain
, pdev
);
1836 "Domain context map for %s failed",
1845 static dma_addr_t
intel_map_single(struct device
*hwdev
, void *addr
,
1846 size_t size
, int dir
)
1848 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1850 struct dmar_domain
*domain
;
1851 unsigned long start_addr
;
1855 BUG_ON(dir
== DMA_NONE
);
1856 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1857 return virt_to_bus(addr
);
1859 domain
= get_valid_domain_for_dev(pdev
);
1863 addr
= (void *)virt_to_phys(addr
);
1864 size
= aligned_size((u64
)addr
, size
);
1866 iova
= __intel_alloc_iova(hwdev
, domain
, size
);
1870 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
1873 * Check if DMAR supports zero-length reads on write only
1876 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
1877 !cap_zlr(domain
->iommu
->cap
))
1878 prot
|= DMA_PTE_READ
;
1879 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
1880 prot
|= DMA_PTE_WRITE
;
1882 * addr - (addr + size) might be partial page, we should map the whole
1883 * page. Note: if two part of one page are separately mapped, we
1884 * might have two guest_addr mapping to the same host addr, but this
1885 * is not a big problem
1887 ret
= domain_page_mapping(domain
, start_addr
,
1888 ((u64
)addr
) & PAGE_MASK_4K
, size
, prot
);
1892 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1893 pci_name(pdev
), size
, (u64
)addr
,
1894 size
, (u64
)start_addr
, dir
);
1896 /* it's a non-present to present mapping */
1897 ret
= iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
1898 start_addr
, size
>> PAGE_SHIFT_4K
, 1);
1900 iommu_flush_write_buffer(domain
->iommu
);
1902 return (start_addr
+ ((u64
)addr
& (~PAGE_MASK_4K
)));
1906 __free_iova(&domain
->iovad
, iova
);
1907 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
1908 pci_name(pdev
), size
, (u64
)addr
, dir
);
1912 static void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
,
1913 size_t size
, int dir
)
1915 struct pci_dev
*pdev
= to_pci_dev(dev
);
1916 struct dmar_domain
*domain
;
1917 unsigned long start_addr
;
1920 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1922 domain
= find_domain(pdev
);
1925 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
1929 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
1930 size
= aligned_size((u64
)dev_addr
, size
);
1932 pr_debug("Device %s unmapping: %lx@%llx\n",
1933 pci_name(pdev
), size
, (u64
)start_addr
);
1935 /* clear the whole page */
1936 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
1937 /* free page tables */
1938 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
1940 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
1941 size
>> PAGE_SHIFT_4K
, 0))
1942 iommu_flush_write_buffer(domain
->iommu
);
1945 __free_iova(&domain
->iovad
, iova
);
1948 static void * intel_alloc_coherent(struct device
*hwdev
, size_t size
,
1949 dma_addr_t
*dma_handle
, gfp_t flags
)
1954 size
= PAGE_ALIGN_4K(size
);
1955 order
= get_order(size
);
1956 flags
&= ~(GFP_DMA
| GFP_DMA32
);
1958 vaddr
= (void *)__get_free_pages(flags
, order
);
1961 memset(vaddr
, 0, size
);
1963 *dma_handle
= intel_map_single(hwdev
, vaddr
, size
, DMA_BIDIRECTIONAL
);
1966 free_pages((unsigned long)vaddr
, order
);
1970 static void intel_free_coherent(struct device
*hwdev
, size_t size
,
1971 void *vaddr
, dma_addr_t dma_handle
)
1975 size
= PAGE_ALIGN_4K(size
);
1976 order
= get_order(size
);
1978 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
1979 free_pages((unsigned long)vaddr
, order
);
1982 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1983 static void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
1984 int nelems
, int dir
)
1987 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1988 struct dmar_domain
*domain
;
1989 unsigned long start_addr
;
1993 struct scatterlist
*sg
;
1995 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1998 domain
= find_domain(pdev
);
2000 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2003 for_each_sg(sglist
, sg
, nelems
, i
) {
2004 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2005 size
+= aligned_size((u64
)addr
, sg
->length
);
2008 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
2010 /* clear the whole page */
2011 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2012 /* free page tables */
2013 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2015 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
2016 size
>> PAGE_SHIFT_4K
, 0))
2017 iommu_flush_write_buffer(domain
->iommu
);
2020 __free_iova(&domain
->iovad
, iova
);
2023 static int intel_nontranslate_map_sg(struct device
*hddev
,
2024 struct scatterlist
*sglist
, int nelems
, int dir
)
2027 struct scatterlist
*sg
;
2029 for_each_sg(sglist
, sg
, nelems
, i
) {
2030 BUG_ON(!sg_page(sg
));
2031 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2032 sg
->dma_length
= sg
->length
;
2037 static int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2038 int nelems
, int dir
)
2042 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2043 struct dmar_domain
*domain
;
2047 struct iova
*iova
= NULL
;
2049 struct scatterlist
*sg
;
2050 unsigned long start_addr
;
2052 BUG_ON(dir
== DMA_NONE
);
2053 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2054 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2056 domain
= get_valid_domain_for_dev(pdev
);
2060 for_each_sg(sglist
, sg
, nelems
, i
) {
2061 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2062 addr
= (void *)virt_to_phys(addr
);
2063 size
+= aligned_size((u64
)addr
, sg
->length
);
2066 iova
= __intel_alloc_iova(hwdev
, domain
, size
);
2068 sglist
->dma_length
= 0;
2073 * Check if DMAR supports zero-length reads on write only
2076 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2077 !cap_zlr(domain
->iommu
->cap
))
2078 prot
|= DMA_PTE_READ
;
2079 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2080 prot
|= DMA_PTE_WRITE
;
2082 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
2084 for_each_sg(sglist
, sg
, nelems
, i
) {
2085 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2086 addr
= (void *)virt_to_phys(addr
);
2087 size
= aligned_size((u64
)addr
, sg
->length
);
2088 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2089 ((u64
)addr
) & PAGE_MASK_4K
,
2092 /* clear the page */
2093 dma_pte_clear_range(domain
, start_addr
,
2094 start_addr
+ offset
);
2095 /* free page tables */
2096 dma_pte_free_pagetable(domain
, start_addr
,
2097 start_addr
+ offset
);
2099 __free_iova(&domain
->iovad
, iova
);
2102 sg
->dma_address
= start_addr
+ offset
+
2103 ((u64
)addr
& (~PAGE_MASK_4K
));
2104 sg
->dma_length
= sg
->length
;
2108 /* it's a non-present to present mapping */
2109 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
2110 start_addr
, offset
>> PAGE_SHIFT_4K
, 1))
2111 iommu_flush_write_buffer(domain
->iommu
);
2115 static struct dma_mapping_ops intel_dma_ops
= {
2116 .alloc_coherent
= intel_alloc_coherent
,
2117 .free_coherent
= intel_free_coherent
,
2118 .map_single
= intel_map_single
,
2119 .unmap_single
= intel_unmap_single
,
2120 .map_sg
= intel_map_sg
,
2121 .unmap_sg
= intel_unmap_sg
,
2124 static inline int iommu_domain_cache_init(void)
2128 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2129 sizeof(struct dmar_domain
),
2134 if (!iommu_domain_cache
) {
2135 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2142 static inline int iommu_devinfo_cache_init(void)
2146 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2147 sizeof(struct device_domain_info
),
2152 if (!iommu_devinfo_cache
) {
2153 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2160 static inline int iommu_iova_cache_init(void)
2164 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2165 sizeof(struct iova
),
2170 if (!iommu_iova_cache
) {
2171 printk(KERN_ERR
"Couldn't create iova cache\n");
2178 static int __init
iommu_init_mempool(void)
2181 ret
= iommu_iova_cache_init();
2185 ret
= iommu_domain_cache_init();
2189 ret
= iommu_devinfo_cache_init();
2193 kmem_cache_destroy(iommu_domain_cache
);
2195 kmem_cache_destroy(iommu_iova_cache
);
2200 static void __init
iommu_exit_mempool(void)
2202 kmem_cache_destroy(iommu_devinfo_cache
);
2203 kmem_cache_destroy(iommu_domain_cache
);
2204 kmem_cache_destroy(iommu_iova_cache
);
2208 void __init
detect_intel_iommu(void)
2210 if (swiotlb
|| no_iommu
|| iommu_detected
|| dmar_disabled
)
2212 if (early_dmar_detect()) {
2217 static void __init
init_no_remapping_devices(void)
2219 struct dmar_drhd_unit
*drhd
;
2221 for_each_drhd_unit(drhd
) {
2222 if (!drhd
->include_all
) {
2224 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2225 if (drhd
->devices
[i
] != NULL
)
2227 /* ignore DMAR unit if no pci devices exist */
2228 if (i
== drhd
->devices_cnt
)
2236 for_each_drhd_unit(drhd
) {
2238 if (drhd
->ignored
|| drhd
->include_all
)
2241 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2242 if (drhd
->devices
[i
] &&
2243 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2246 if (i
< drhd
->devices_cnt
)
2249 /* bypass IOMMU if it is just for gfx devices */
2251 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2252 if (!drhd
->devices
[i
])
2254 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2259 int __init
intel_iommu_init(void)
2263 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2266 if (dmar_table_init())
2269 iommu_init_mempool();
2270 dmar_init_reserved_ranges();
2272 init_no_remapping_devices();
2276 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2277 put_iova_domain(&reserved_iova_list
);
2278 iommu_exit_mempool();
2282 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2285 dma_ops
= &intel_dma_ops
;