2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/slab.h>
26 #include <linux/irq.h>
27 #include <linux/interrupt.h>
28 #include <linux/sysdev.h>
29 #include <linux/spinlock.h>
30 #include <linux/pci.h>
31 #include <linux/dmar.h>
32 #include <linux/dma-mapping.h>
33 #include <linux/mempool.h>
35 #include "intel-iommu.h"
36 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
37 #include <asm/cacheflush.h>
41 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
42 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
44 #define IOAPIC_RANGE_START (0xfee00000)
45 #define IOAPIC_RANGE_END (0xfeefffff)
46 #define IOVA_START_ADDR (0x1000)
48 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
50 #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
52 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
54 static void domain_remove_dev_info(struct dmar_domain
*domain
);
56 static int dmar_disabled
;
57 static int __initdata dmar_map_gfx
= 1;
58 static int dmar_forcedac
;
60 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
61 static DEFINE_SPINLOCK(device_domain_lock
);
62 static LIST_HEAD(device_domain_list
);
64 static int __init
intel_iommu_setup(char *str
)
69 if (!strncmp(str
, "off", 3)) {
71 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
72 } else if (!strncmp(str
, "igfx_off", 8)) {
75 "Intel-IOMMU: disable GFX device mapping\n");
76 } else if (!strncmp(str
, "forcedac", 8)) {
78 "Intel-IOMMU: Forcing DAC for PCI devices\n");
82 str
+= strcspn(str
, ",");
88 __setup("intel_iommu=", intel_iommu_setup
);
90 static struct kmem_cache
*iommu_domain_cache
;
91 static struct kmem_cache
*iommu_devinfo_cache
;
92 static struct kmem_cache
*iommu_iova_cache
;
94 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
99 /* trying to avoid low memory issues */
100 flags
= current
->flags
& PF_MEMALLOC
;
101 current
->flags
|= PF_MEMALLOC
;
102 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
103 current
->flags
&= (~PF_MEMALLOC
| flags
);
108 static inline void *alloc_pgtable_page(void)
113 /* trying to avoid low memory issues */
114 flags
= current
->flags
& PF_MEMALLOC
;
115 current
->flags
|= PF_MEMALLOC
;
116 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
117 current
->flags
&= (~PF_MEMALLOC
| flags
);
121 static inline void free_pgtable_page(void *vaddr
)
123 free_page((unsigned long)vaddr
);
126 static inline void *alloc_domain_mem(void)
128 return iommu_kmem_cache_alloc(iommu_domain_cache
);
131 static inline void free_domain_mem(void *vaddr
)
133 kmem_cache_free(iommu_domain_cache
, vaddr
);
136 static inline void * alloc_devinfo_mem(void)
138 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
141 static inline void free_devinfo_mem(void *vaddr
)
143 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
146 struct iova
*alloc_iova_mem(void)
148 return iommu_kmem_cache_alloc(iommu_iova_cache
);
151 void free_iova_mem(struct iova
*iova
)
153 kmem_cache_free(iommu_iova_cache
, iova
);
156 static inline void __iommu_flush_cache(
157 struct intel_iommu
*iommu
, void *addr
, int size
)
159 if (!ecap_coherent(iommu
->ecap
))
160 clflush_cache_range(addr
, size
);
163 /* Gets context entry for a given bus and devfn */
164 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
167 struct root_entry
*root
;
168 struct context_entry
*context
;
169 unsigned long phy_addr
;
172 spin_lock_irqsave(&iommu
->lock
, flags
);
173 root
= &iommu
->root_entry
[bus
];
174 context
= get_context_addr_from_root(root
);
176 context
= (struct context_entry
*)alloc_pgtable_page();
178 spin_unlock_irqrestore(&iommu
->lock
, flags
);
181 __iommu_flush_cache(iommu
, (void *)context
, PAGE_SIZE_4K
);
182 phy_addr
= virt_to_phys((void *)context
);
183 set_root_value(root
, phy_addr
);
184 set_root_present(root
);
185 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
187 spin_unlock_irqrestore(&iommu
->lock
, flags
);
188 return &context
[devfn
];
191 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
193 struct root_entry
*root
;
194 struct context_entry
*context
;
198 spin_lock_irqsave(&iommu
->lock
, flags
);
199 root
= &iommu
->root_entry
[bus
];
200 context
= get_context_addr_from_root(root
);
205 ret
= context_present(context
[devfn
]);
207 spin_unlock_irqrestore(&iommu
->lock
, flags
);
211 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
213 struct root_entry
*root
;
214 struct context_entry
*context
;
217 spin_lock_irqsave(&iommu
->lock
, flags
);
218 root
= &iommu
->root_entry
[bus
];
219 context
= get_context_addr_from_root(root
);
221 context_clear_entry(context
[devfn
]);
222 __iommu_flush_cache(iommu
, &context
[devfn
], \
225 spin_unlock_irqrestore(&iommu
->lock
, flags
);
228 static void free_context_table(struct intel_iommu
*iommu
)
230 struct root_entry
*root
;
233 struct context_entry
*context
;
235 spin_lock_irqsave(&iommu
->lock
, flags
);
236 if (!iommu
->root_entry
) {
239 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
240 root
= &iommu
->root_entry
[i
];
241 context
= get_context_addr_from_root(root
);
243 free_pgtable_page(context
);
245 free_pgtable_page(iommu
->root_entry
);
246 iommu
->root_entry
= NULL
;
248 spin_unlock_irqrestore(&iommu
->lock
, flags
);
251 /* page table handling */
252 #define LEVEL_STRIDE (9)
253 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
255 static inline int agaw_to_level(int agaw
)
260 static inline int agaw_to_width(int agaw
)
262 return 30 + agaw
* LEVEL_STRIDE
;
266 static inline int width_to_agaw(int width
)
268 return (width
- 30) / LEVEL_STRIDE
;
271 static inline unsigned int level_to_offset_bits(int level
)
273 return (12 + (level
- 1) * LEVEL_STRIDE
);
276 static inline int address_level_offset(u64 addr
, int level
)
278 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
281 static inline u64
level_mask(int level
)
283 return ((u64
)-1 << level_to_offset_bits(level
));
286 static inline u64
level_size(int level
)
288 return ((u64
)1 << level_to_offset_bits(level
));
291 static inline u64
align_to_level(u64 addr
, int level
)
293 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
296 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
298 int addr_width
= agaw_to_width(domain
->agaw
);
299 struct dma_pte
*parent
, *pte
= NULL
;
300 int level
= agaw_to_level(domain
->agaw
);
304 BUG_ON(!domain
->pgd
);
306 addr
&= (((u64
)1) << addr_width
) - 1;
307 parent
= domain
->pgd
;
309 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
313 offset
= address_level_offset(addr
, level
);
314 pte
= &parent
[offset
];
318 if (!dma_pte_present(*pte
)) {
319 tmp_page
= alloc_pgtable_page();
322 spin_unlock_irqrestore(&domain
->mapping_lock
,
326 __iommu_flush_cache(domain
->iommu
, tmp_page
,
328 dma_set_pte_addr(*pte
, virt_to_phys(tmp_page
));
330 * high level table always sets r/w, last level page
331 * table control read/write
333 dma_set_pte_readable(*pte
);
334 dma_set_pte_writable(*pte
);
335 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
337 parent
= phys_to_virt(dma_pte_addr(*pte
));
341 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
345 /* return address's pte at specific level */
346 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
349 struct dma_pte
*parent
, *pte
= NULL
;
350 int total
= agaw_to_level(domain
->agaw
);
353 parent
= domain
->pgd
;
354 while (level
<= total
) {
355 offset
= address_level_offset(addr
, total
);
356 pte
= &parent
[offset
];
360 if (!dma_pte_present(*pte
))
362 parent
= phys_to_virt(dma_pte_addr(*pte
));
368 /* clear one page's page table */
369 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
371 struct dma_pte
*pte
= NULL
;
373 /* get last level pte */
374 pte
= dma_addr_level_pte(domain
, addr
, 1);
378 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
382 /* clear last level pte, a tlb flush should be followed */
383 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
385 int addr_width
= agaw_to_width(domain
->agaw
);
387 start
&= (((u64
)1) << addr_width
) - 1;
388 end
&= (((u64
)1) << addr_width
) - 1;
389 /* in case it's partial page */
390 start
= PAGE_ALIGN_4K(start
);
393 /* we don't need lock here, nobody else touches the iova range */
394 while (start
< end
) {
395 dma_pte_clear_one(domain
, start
);
396 start
+= PAGE_SIZE_4K
;
400 /* free page table pages. last level pte should already be cleared */
401 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
404 int addr_width
= agaw_to_width(domain
->agaw
);
406 int total
= agaw_to_level(domain
->agaw
);
410 start
&= (((u64
)1) << addr_width
) - 1;
411 end
&= (((u64
)1) << addr_width
) - 1;
413 /* we don't need lock here, nobody else touches the iova range */
415 while (level
<= total
) {
416 tmp
= align_to_level(start
, level
);
417 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
421 pte
= dma_addr_level_pte(domain
, tmp
, level
);
424 phys_to_virt(dma_pte_addr(*pte
)));
426 __iommu_flush_cache(domain
->iommu
,
429 tmp
+= level_size(level
);
434 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
435 free_pgtable_page(domain
->pgd
);
441 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
443 struct root_entry
*root
;
446 root
= (struct root_entry
*)alloc_pgtable_page();
450 __iommu_flush_cache(iommu
, root
, PAGE_SIZE_4K
);
452 spin_lock_irqsave(&iommu
->lock
, flags
);
453 iommu
->root_entry
= root
;
454 spin_unlock_irqrestore(&iommu
->lock
, flags
);
459 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
461 unsigned long start_time = jiffies;\
463 sts = op (iommu->reg + offset);\
466 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
467 panic("DMAR hardware is malfunctioning\n");\
472 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
478 addr
= iommu
->root_entry
;
480 spin_lock_irqsave(&iommu
->register_lock
, flag
);
481 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
483 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
484 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
486 /* Make sure hardware complete it */
487 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
488 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
490 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
493 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
498 if (!cap_rwbf(iommu
->cap
))
500 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
502 spin_lock_irqsave(&iommu
->register_lock
, flag
);
503 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
505 /* Make sure hardware complete it */
506 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
507 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
509 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
512 /* return value determine if we need a write buffer flush */
513 static int __iommu_flush_context(struct intel_iommu
*iommu
,
514 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
515 int non_present_entry_flush
)
521 * In the non-present entry flush case, if hardware doesn't cache
522 * non-present entry we do nothing and if hardware cache non-present
523 * entry, we flush entries of domain 0 (the domain id is used to cache
524 * any non-present entries)
526 if (non_present_entry_flush
) {
527 if (!cap_caching_mode(iommu
->cap
))
534 case DMA_CCMD_GLOBAL_INVL
:
535 val
= DMA_CCMD_GLOBAL_INVL
;
537 case DMA_CCMD_DOMAIN_INVL
:
538 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
540 case DMA_CCMD_DEVICE_INVL
:
541 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
542 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
549 spin_lock_irqsave(&iommu
->register_lock
, flag
);
550 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
552 /* Make sure hardware complete it */
553 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
554 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
556 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
558 /* flush context entry will implictly flush write buffer */
562 static int inline iommu_flush_context_global(struct intel_iommu
*iommu
,
563 int non_present_entry_flush
)
565 return __iommu_flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
566 non_present_entry_flush
);
569 static int inline iommu_flush_context_domain(struct intel_iommu
*iommu
, u16 did
,
570 int non_present_entry_flush
)
572 return __iommu_flush_context(iommu
, did
, 0, 0, DMA_CCMD_DOMAIN_INVL
,
573 non_present_entry_flush
);
576 static int inline iommu_flush_context_device(struct intel_iommu
*iommu
,
577 u16 did
, u16 source_id
, u8 function_mask
, int non_present_entry_flush
)
579 return __iommu_flush_context(iommu
, did
, source_id
, function_mask
,
580 DMA_CCMD_DEVICE_INVL
, non_present_entry_flush
);
583 /* return value determine if we need a write buffer flush */
584 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
585 u64 addr
, unsigned int size_order
, u64 type
,
586 int non_present_entry_flush
)
588 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
589 u64 val
= 0, val_iva
= 0;
593 * In the non-present entry flush case, if hardware doesn't cache
594 * non-present entry we do nothing and if hardware cache non-present
595 * entry, we flush entries of domain 0 (the domain id is used to cache
596 * any non-present entries)
598 if (non_present_entry_flush
) {
599 if (!cap_caching_mode(iommu
->cap
))
606 case DMA_TLB_GLOBAL_FLUSH
:
607 /* global flush doesn't need set IVA_REG */
608 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
610 case DMA_TLB_DSI_FLUSH
:
611 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
613 case DMA_TLB_PSI_FLUSH
:
614 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
615 /* Note: always flush non-leaf currently */
616 val_iva
= size_order
| addr
;
621 /* Note: set drain read/write */
624 * This is probably to be super secure.. Looks like we can
625 * ignore it without any impact.
627 if (cap_read_drain(iommu
->cap
))
628 val
|= DMA_TLB_READ_DRAIN
;
630 if (cap_write_drain(iommu
->cap
))
631 val
|= DMA_TLB_WRITE_DRAIN
;
633 spin_lock_irqsave(&iommu
->register_lock
, flag
);
634 /* Note: Only uses first TLB reg currently */
636 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
637 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
639 /* Make sure hardware complete it */
640 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
641 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
643 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
645 /* check IOTLB invalidation granularity */
646 if (DMA_TLB_IAIG(val
) == 0)
647 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
648 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
649 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
650 DMA_TLB_IIRG(type
), DMA_TLB_IAIG(val
));
651 /* flush context entry will implictly flush write buffer */
655 static int inline iommu_flush_iotlb_global(struct intel_iommu
*iommu
,
656 int non_present_entry_flush
)
658 return __iommu_flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
659 non_present_entry_flush
);
662 static int inline iommu_flush_iotlb_dsi(struct intel_iommu
*iommu
, u16 did
,
663 int non_present_entry_flush
)
665 return __iommu_flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
,
666 non_present_entry_flush
);
669 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
670 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
674 BUG_ON(addr
& (~PAGE_MASK_4K
));
677 /* Fallback to domain selective flush if no PSI support */
678 if (!cap_pgsel_inv(iommu
->cap
))
679 return iommu_flush_iotlb_dsi(iommu
, did
,
680 non_present_entry_flush
);
683 * PSI requires page size to be 2 ^ x, and the base address is naturally
684 * aligned to the size
686 mask
= ilog2(__roundup_pow_of_two(pages
));
687 /* Fallback to domain selective flush if size is too big */
688 if (mask
> cap_max_amask_val(iommu
->cap
))
689 return iommu_flush_iotlb_dsi(iommu
, did
,
690 non_present_entry_flush
);
692 return __iommu_flush_iotlb(iommu
, did
, addr
, mask
,
693 DMA_TLB_PSI_FLUSH
, non_present_entry_flush
);
696 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
701 spin_lock_irqsave(&iommu
->register_lock
, flags
);
702 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
703 pmen
&= ~DMA_PMEN_EPM
;
704 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
706 /* wait for the protected region status bit to clear */
707 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
708 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
710 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
713 static int iommu_enable_translation(struct intel_iommu
*iommu
)
718 spin_lock_irqsave(&iommu
->register_lock
, flags
);
719 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
721 /* Make sure hardware complete it */
722 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
723 readl
, (sts
& DMA_GSTS_TES
), sts
);
725 iommu
->gcmd
|= DMA_GCMD_TE
;
726 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
730 static int iommu_disable_translation(struct intel_iommu
*iommu
)
735 spin_lock_irqsave(&iommu
->register_lock
, flag
);
736 iommu
->gcmd
&= ~DMA_GCMD_TE
;
737 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
739 /* Make sure hardware complete it */
740 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
741 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
743 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
747 /* iommu interrupt handling. Most stuff are MSI-like. */
749 static const char *fault_reason_strings
[] =
752 "Present bit in root entry is clear",
753 "Present bit in context entry is clear",
754 "Invalid context entry",
755 "Access beyond MGAW",
756 "PTE Write access is not set",
757 "PTE Read access is not set",
758 "Next page table ptr is invalid",
759 "Root table address invalid",
760 "Context table ptr is invalid",
761 "non-zero reserved fields in RTP",
762 "non-zero reserved fields in CTP",
763 "non-zero reserved fields in PTE",
765 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
767 const char *dmar_get_fault_reason(u8 fault_reason
)
769 if (fault_reason
> MAX_FAULT_REASON_IDX
)
772 return fault_reason_strings
[fault_reason
];
775 void dmar_msi_unmask(unsigned int irq
)
777 struct intel_iommu
*iommu
= get_irq_data(irq
);
781 spin_lock_irqsave(&iommu
->register_lock
, flag
);
782 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
783 /* Read a reg to force flush the post write */
784 readl(iommu
->reg
+ DMAR_FECTL_REG
);
785 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
788 void dmar_msi_mask(unsigned int irq
)
791 struct intel_iommu
*iommu
= get_irq_data(irq
);
794 spin_lock_irqsave(&iommu
->register_lock
, flag
);
795 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
796 /* Read a reg to force flush the post write */
797 readl(iommu
->reg
+ DMAR_FECTL_REG
);
798 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
801 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
803 struct intel_iommu
*iommu
= get_irq_data(irq
);
806 spin_lock_irqsave(&iommu
->register_lock
, flag
);
807 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
808 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
809 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
810 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
813 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
815 struct intel_iommu
*iommu
= get_irq_data(irq
);
818 spin_lock_irqsave(&iommu
->register_lock
, flag
);
819 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
820 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
821 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
822 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
825 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
826 u8 fault_reason
, u16 source_id
, u64 addr
)
830 reason
= dmar_get_fault_reason(fault_reason
);
833 "DMAR:[%s] Request device [%02x:%02x.%d] "
835 "DMAR:[fault reason %02d] %s\n",
836 (type
? "DMA Read" : "DMA Write"),
837 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
838 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
842 #define PRIMARY_FAULT_REG_LEN (16)
843 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
845 struct intel_iommu
*iommu
= dev_id
;
846 int reg
, fault_index
;
850 spin_lock_irqsave(&iommu
->register_lock
, flag
);
851 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
853 /* TBD: ignore advanced fault log currently */
854 if (!(fault_status
& DMA_FSTS_PPF
))
857 fault_index
= dma_fsts_fault_record_index(fault_status
);
858 reg
= cap_fault_reg_offset(iommu
->cap
);
866 /* highest 32 bits */
867 data
= readl(iommu
->reg
+ reg
+
868 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
869 if (!(data
& DMA_FRCD_F
))
872 fault_reason
= dma_frcd_fault_reason(data
);
873 type
= dma_frcd_type(data
);
875 data
= readl(iommu
->reg
+ reg
+
876 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
877 source_id
= dma_frcd_source_id(data
);
879 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
880 fault_index
* PRIMARY_FAULT_REG_LEN
);
881 guest_addr
= dma_frcd_page_addr(guest_addr
);
882 /* clear the fault */
883 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
884 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
886 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
888 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
889 source_id
, guest_addr
);
892 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
894 spin_lock_irqsave(&iommu
->register_lock
, flag
);
897 /* clear primary fault overflow */
898 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
899 if (fault_status
& DMA_FSTS_PFO
)
900 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
902 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
906 int dmar_set_interrupt(struct intel_iommu
*iommu
)
912 printk(KERN_ERR
"IOMMU: no free vectors\n");
916 set_irq_data(irq
, iommu
);
919 ret
= arch_setup_dmar_msi(irq
);
921 set_irq_data(irq
, NULL
);
927 /* Force fault register is cleared */
928 iommu_page_fault(irq
, iommu
);
930 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
932 printk(KERN_ERR
"IOMMU: can't request irq\n");
936 static int iommu_init_domains(struct intel_iommu
*iommu
)
938 unsigned long ndomains
;
939 unsigned long nlongs
;
941 ndomains
= cap_ndoms(iommu
->cap
);
942 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
943 nlongs
= BITS_TO_LONGS(ndomains
);
945 /* TBD: there might be 64K domains,
946 * consider other allocation for future chip
948 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
949 if (!iommu
->domain_ids
) {
950 printk(KERN_ERR
"Allocating domain id array failed\n");
953 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
955 if (!iommu
->domains
) {
956 printk(KERN_ERR
"Allocating domain array failed\n");
957 kfree(iommu
->domain_ids
);
962 * if Caching mode is set, then invalid translations are tagged
963 * with domainid 0. Hence we need to pre-allocate it.
965 if (cap_caching_mode(iommu
->cap
))
966 set_bit(0, iommu
->domain_ids
);
970 static struct intel_iommu
*alloc_iommu(struct dmar_drhd_unit
*drhd
)
972 struct intel_iommu
*iommu
;
977 iommu
= kzalloc(sizeof(*iommu
), GFP_KERNEL
);
980 iommu
->reg
= ioremap(drhd
->reg_base_addr
, PAGE_SIZE_4K
);
982 printk(KERN_ERR
"IOMMU: can't map the region\n");
985 iommu
->cap
= dmar_readq(iommu
->reg
+ DMAR_CAP_REG
);
986 iommu
->ecap
= dmar_readq(iommu
->reg
+ DMAR_ECAP_REG
);
988 /* the registers might be more than one page */
989 map_size
= max_t(int, ecap_max_iotlb_offset(iommu
->ecap
),
990 cap_max_fault_reg_offset(iommu
->cap
));
991 map_size
= PAGE_ALIGN_4K(map_size
);
992 if (map_size
> PAGE_SIZE_4K
) {
994 iommu
->reg
= ioremap(drhd
->reg_base_addr
, map_size
);
996 printk(KERN_ERR
"IOMMU: can't map the region\n");
1001 ver
= readl(iommu
->reg
+ DMAR_VER_REG
);
1002 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1003 drhd
->reg_base_addr
, DMAR_VER_MAJOR(ver
), DMAR_VER_MINOR(ver
),
1004 iommu
->cap
, iommu
->ecap
);
1005 ret
= iommu_init_domains(iommu
);
1008 spin_lock_init(&iommu
->lock
);
1009 spin_lock_init(&iommu
->register_lock
);
1011 drhd
->iommu
= iommu
;
1014 iounmap(iommu
->reg
);
1020 static void domain_exit(struct dmar_domain
*domain
);
1021 static void free_iommu(struct intel_iommu
*iommu
)
1023 struct dmar_domain
*domain
;
1029 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1030 for (; i
< cap_ndoms(iommu
->cap
); ) {
1031 domain
= iommu
->domains
[i
];
1032 clear_bit(i
, iommu
->domain_ids
);
1033 domain_exit(domain
);
1034 i
= find_next_bit(iommu
->domain_ids
,
1035 cap_ndoms(iommu
->cap
), i
+1);
1038 if (iommu
->gcmd
& DMA_GCMD_TE
)
1039 iommu_disable_translation(iommu
);
1042 set_irq_data(iommu
->irq
, NULL
);
1043 /* This will mask the irq */
1044 free_irq(iommu
->irq
, iommu
);
1045 destroy_irq(iommu
->irq
);
1048 kfree(iommu
->domains
);
1049 kfree(iommu
->domain_ids
);
1051 /* free context mapping */
1052 free_context_table(iommu
);
1055 iounmap(iommu
->reg
);
1059 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1062 unsigned long ndomains
;
1063 struct dmar_domain
*domain
;
1064 unsigned long flags
;
1066 domain
= alloc_domain_mem();
1070 ndomains
= cap_ndoms(iommu
->cap
);
1072 spin_lock_irqsave(&iommu
->lock
, flags
);
1073 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1074 if (num
>= ndomains
) {
1075 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1076 free_domain_mem(domain
);
1077 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1081 set_bit(num
, iommu
->domain_ids
);
1083 domain
->iommu
= iommu
;
1084 iommu
->domains
[num
] = domain
;
1085 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1090 static void iommu_free_domain(struct dmar_domain
*domain
)
1092 unsigned long flags
;
1094 spin_lock_irqsave(&domain
->iommu
->lock
, flags
);
1095 clear_bit(domain
->id
, domain
->iommu
->domain_ids
);
1096 spin_unlock_irqrestore(&domain
->iommu
->lock
, flags
);
1099 static struct iova_domain reserved_iova_list
;
1100 static struct lock_class_key reserved_alloc_key
;
1101 static struct lock_class_key reserved_rbtree_key
;
1103 static void dmar_init_reserved_ranges(void)
1105 struct pci_dev
*pdev
= NULL
;
1110 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1112 lockdep_set_class(&reserved_iova_list
.iova_alloc_lock
,
1113 &reserved_alloc_key
);
1114 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1115 &reserved_rbtree_key
);
1117 /* IOAPIC ranges shouldn't be accessed by DMA */
1118 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1119 IOVA_PFN(IOAPIC_RANGE_END
));
1121 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1123 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1124 for_each_pci_dev(pdev
) {
1127 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1128 r
= &pdev
->resource
[i
];
1129 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1132 addr
&= PAGE_MASK_4K
;
1133 size
= r
->end
- addr
;
1134 size
= PAGE_ALIGN_4K(size
);
1135 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1136 IOVA_PFN(size
+ addr
) - 1);
1138 printk(KERN_ERR
"Reserve iova failed\n");
1144 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1146 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1149 static inline int guestwidth_to_adjustwidth(int gaw
)
1152 int r
= (gaw
- 12) % 9;
1163 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1165 struct intel_iommu
*iommu
;
1166 int adjust_width
, agaw
;
1167 unsigned long sagaw
;
1169 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1170 spin_lock_init(&domain
->mapping_lock
);
1172 domain_reserve_special_ranges(domain
);
1174 /* calculate AGAW */
1175 iommu
= domain
->iommu
;
1176 if (guest_width
> cap_mgaw(iommu
->cap
))
1177 guest_width
= cap_mgaw(iommu
->cap
);
1178 domain
->gaw
= guest_width
;
1179 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1180 agaw
= width_to_agaw(adjust_width
);
1181 sagaw
= cap_sagaw(iommu
->cap
);
1182 if (!test_bit(agaw
, &sagaw
)) {
1183 /* hardware doesn't support it, choose a bigger one */
1184 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1185 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1189 domain
->agaw
= agaw
;
1190 INIT_LIST_HEAD(&domain
->devices
);
1192 /* always allocate the top pgd */
1193 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1196 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE_4K
);
1200 static void domain_exit(struct dmar_domain
*domain
)
1204 /* Domain 0 is reserved, so dont process it */
1208 domain_remove_dev_info(domain
);
1210 put_iova_domain(&domain
->iovad
);
1211 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1212 end
= end
& (~PAGE_MASK_4K
);
1215 dma_pte_clear_range(domain
, 0, end
);
1217 /* free page tables */
1218 dma_pte_free_pagetable(domain
, 0, end
);
1220 iommu_free_domain(domain
);
1221 free_domain_mem(domain
);
1224 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1227 struct context_entry
*context
;
1228 struct intel_iommu
*iommu
= domain
->iommu
;
1229 unsigned long flags
;
1231 pr_debug("Set context mapping for %02x:%02x.%d\n",
1232 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1233 BUG_ON(!domain
->pgd
);
1234 context
= device_to_context_entry(iommu
, bus
, devfn
);
1237 spin_lock_irqsave(&iommu
->lock
, flags
);
1238 if (context_present(*context
)) {
1239 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1243 context_set_domain_id(*context
, domain
->id
);
1244 context_set_address_width(*context
, domain
->agaw
);
1245 context_set_address_root(*context
, virt_to_phys(domain
->pgd
));
1246 context_set_translation_type(*context
, CONTEXT_TT_MULTI_LEVEL
);
1247 context_set_fault_enable(*context
);
1248 context_set_present(*context
);
1249 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
1251 /* it's a non-present to present mapping */
1252 if (iommu_flush_context_device(iommu
, domain
->id
,
1253 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
, 1))
1254 iommu_flush_write_buffer(iommu
);
1256 iommu_flush_iotlb_dsi(iommu
, 0, 0);
1257 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1262 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1265 struct pci_dev
*tmp
, *parent
;
1267 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1272 /* dependent device mapping */
1273 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1276 /* Secondary interface's bus number and devfn 0 */
1277 parent
= pdev
->bus
->self
;
1278 while (parent
!= tmp
) {
1279 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1283 parent
= parent
->bus
->self
;
1285 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1286 return domain_context_mapping_one(domain
,
1287 tmp
->subordinate
->number
, 0);
1288 else /* this is a legacy PCI bridge */
1289 return domain_context_mapping_one(domain
,
1290 tmp
->bus
->number
, tmp
->devfn
);
1293 static int domain_context_mapped(struct dmar_domain
*domain
,
1294 struct pci_dev
*pdev
)
1297 struct pci_dev
*tmp
, *parent
;
1299 ret
= device_context_mapped(domain
->iommu
,
1300 pdev
->bus
->number
, pdev
->devfn
);
1303 /* dependent device mapping */
1304 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1307 /* Secondary interface's bus number and devfn 0 */
1308 parent
= pdev
->bus
->self
;
1309 while (parent
!= tmp
) {
1310 ret
= device_context_mapped(domain
->iommu
, parent
->bus
->number
,
1314 parent
= parent
->bus
->self
;
1317 return device_context_mapped(domain
->iommu
,
1318 tmp
->subordinate
->number
, 0);
1320 return device_context_mapped(domain
->iommu
,
1321 tmp
->bus
->number
, tmp
->devfn
);
1325 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1326 u64 hpa
, size_t size
, int prot
)
1328 u64 start_pfn
, end_pfn
;
1329 struct dma_pte
*pte
;
1332 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1334 iova
&= PAGE_MASK_4K
;
1335 start_pfn
= ((u64
)hpa
) >> PAGE_SHIFT_4K
;
1336 end_pfn
= (PAGE_ALIGN_4K(((u64
)hpa
) + size
)) >> PAGE_SHIFT_4K
;
1338 while (start_pfn
< end_pfn
) {
1339 pte
= addr_to_dma_pte(domain
, iova
+ PAGE_SIZE_4K
* index
);
1342 /* We don't need lock here, nobody else
1343 * touches the iova range
1345 BUG_ON(dma_pte_addr(*pte
));
1346 dma_set_pte_addr(*pte
, start_pfn
<< PAGE_SHIFT_4K
);
1347 dma_set_pte_prot(*pte
, prot
);
1348 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
1355 static void detach_domain_for_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
1357 clear_context_table(domain
->iommu
, bus
, devfn
);
1358 iommu_flush_context_global(domain
->iommu
, 0);
1359 iommu_flush_iotlb_global(domain
->iommu
, 0);
1362 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1364 struct device_domain_info
*info
;
1365 unsigned long flags
;
1367 spin_lock_irqsave(&device_domain_lock
, flags
);
1368 while (!list_empty(&domain
->devices
)) {
1369 info
= list_entry(domain
->devices
.next
,
1370 struct device_domain_info
, link
);
1371 list_del(&info
->link
);
1372 list_del(&info
->global
);
1374 info
->dev
->dev
.archdata
.iommu
= NULL
;
1375 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1377 detach_domain_for_dev(info
->domain
, info
->bus
, info
->devfn
);
1378 free_devinfo_mem(info
);
1380 spin_lock_irqsave(&device_domain_lock
, flags
);
1382 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1387 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1389 struct dmar_domain
*
1390 find_domain(struct pci_dev
*pdev
)
1392 struct device_domain_info
*info
;
1394 /* No lock here, assumes no domain exit in normal case */
1395 info
= pdev
->dev
.archdata
.iommu
;
1397 return info
->domain
;
1401 static int dmar_pci_device_match(struct pci_dev
*devices
[], int cnt
,
1402 struct pci_dev
*dev
)
1407 for (index
= 0; index
< cnt
; index
++)
1408 if (dev
== devices
[index
])
1411 /* Check our parent */
1412 dev
= dev
->bus
->self
;
1418 static struct dmar_drhd_unit
*
1419 dmar_find_matched_drhd_unit(struct pci_dev
*dev
)
1421 struct dmar_drhd_unit
*drhd
= NULL
;
1423 list_for_each_entry(drhd
, &dmar_drhd_units
, list
) {
1424 if (drhd
->include_all
|| dmar_pci_device_match(drhd
->devices
,
1425 drhd
->devices_cnt
, dev
))
1432 /* domain is initialized */
1433 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1435 struct dmar_domain
*domain
, *found
= NULL
;
1436 struct intel_iommu
*iommu
;
1437 struct dmar_drhd_unit
*drhd
;
1438 struct device_domain_info
*info
, *tmp
;
1439 struct pci_dev
*dev_tmp
;
1440 unsigned long flags
;
1441 int bus
= 0, devfn
= 0;
1443 domain
= find_domain(pdev
);
1447 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1449 if (dev_tmp
->is_pcie
) {
1450 bus
= dev_tmp
->subordinate
->number
;
1453 bus
= dev_tmp
->bus
->number
;
1454 devfn
= dev_tmp
->devfn
;
1456 spin_lock_irqsave(&device_domain_lock
, flags
);
1457 list_for_each_entry(info
, &device_domain_list
, global
) {
1458 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1459 found
= info
->domain
;
1463 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1464 /* pcie-pci bridge already has a domain, uses it */
1471 /* Allocate new domain for the device */
1472 drhd
= dmar_find_matched_drhd_unit(pdev
);
1474 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1478 iommu
= drhd
->iommu
;
1480 domain
= iommu_alloc_domain(iommu
);
1484 if (domain_init(domain
, gaw
)) {
1485 domain_exit(domain
);
1489 /* register pcie-to-pci device */
1491 info
= alloc_devinfo_mem();
1493 domain_exit(domain
);
1497 info
->devfn
= devfn
;
1499 info
->domain
= domain
;
1500 /* This domain is shared by devices under p2p bridge */
1501 domain
->flags
|= DOMAIN_FLAG_MULTIPLE_DEVICES
;
1503 /* pcie-to-pci bridge already has a domain, uses it */
1505 spin_lock_irqsave(&device_domain_lock
, flags
);
1506 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1507 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1508 found
= tmp
->domain
;
1513 free_devinfo_mem(info
);
1514 domain_exit(domain
);
1517 list_add(&info
->link
, &domain
->devices
);
1518 list_add(&info
->global
, &device_domain_list
);
1520 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1524 info
= alloc_devinfo_mem();
1527 info
->bus
= pdev
->bus
->number
;
1528 info
->devfn
= pdev
->devfn
;
1530 info
->domain
= domain
;
1531 spin_lock_irqsave(&device_domain_lock
, flags
);
1532 /* somebody is fast */
1533 found
= find_domain(pdev
);
1534 if (found
!= NULL
) {
1535 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1536 if (found
!= domain
) {
1537 domain_exit(domain
);
1540 free_devinfo_mem(info
);
1543 list_add(&info
->link
, &domain
->devices
);
1544 list_add(&info
->global
, &device_domain_list
);
1545 pdev
->dev
.archdata
.iommu
= info
;
1546 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1549 /* recheck it here, maybe others set it */
1550 return find_domain(pdev
);
1553 static int iommu_prepare_identity_map(struct pci_dev
*pdev
, u64 start
, u64 end
)
1555 struct dmar_domain
*domain
;
1561 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1562 pci_name(pdev
), start
, end
);
1563 /* page table init */
1564 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1568 /* The address might not be aligned */
1569 base
= start
& PAGE_MASK_4K
;
1571 size
= PAGE_ALIGN_4K(size
);
1572 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1573 IOVA_PFN(base
+ size
) - 1)) {
1574 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1579 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1580 size
, base
, pci_name(pdev
));
1582 * RMRR range might have overlap with physical memory range,
1585 dma_pte_clear_range(domain
, base
, base
+ size
);
1587 ret
= domain_page_mapping(domain
, base
, base
, size
,
1588 DMA_PTE_READ
|DMA_PTE_WRITE
);
1592 /* context entry init */
1593 ret
= domain_context_mapping(domain
, pdev
);
1597 domain_exit(domain
);
1602 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1603 struct pci_dev
*pdev
)
1605 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1607 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1608 rmrr
->end_address
+ 1);
1611 #ifdef CONFIG_DMAR_GFX_WA
1612 extern int arch_get_ram_range(int slot
, u64
*addr
, u64
*size
);
1613 static void __init
iommu_prepare_gfx_mapping(void)
1615 struct pci_dev
*pdev
= NULL
;
1620 for_each_pci_dev(pdev
) {
1621 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1622 !IS_GFX_DEVICE(pdev
))
1624 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1626 slot
= arch_get_ram_range(0, &base
, &size
);
1628 ret
= iommu_prepare_identity_map(pdev
,
1632 slot
= arch_get_ram_range(slot
, &base
, &size
);
1636 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1641 #ifdef CONFIG_DMAR_FLOPPY_WA
1642 static inline void iommu_prepare_isa(void)
1644 struct pci_dev
*pdev
;
1647 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1651 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1652 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1655 printk("IOMMU: Failed to create 0-64M identity map, "
1656 "floppy might not work\n");
1660 static inline void iommu_prepare_isa(void)
1664 #endif /* !CONFIG_DMAR_FLPY_WA */
1666 int __init
init_dmars(void)
1668 struct dmar_drhd_unit
*drhd
;
1669 struct dmar_rmrr_unit
*rmrr
;
1670 struct pci_dev
*pdev
;
1671 struct intel_iommu
*iommu
;
1677 * initialize and program root entry to not present
1680 for_each_drhd_unit(drhd
) {
1683 iommu
= alloc_iommu(drhd
);
1691 * we could share the same root & context tables
1692 * amoung all IOMMU's. Need to Split it later.
1694 ret
= iommu_alloc_root_entry(iommu
);
1696 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
1703 * for each dev attached to rmrr
1705 * locate drhd for dev, alloc domain for dev
1706 * allocate free domain
1707 * allocate page table entries for rmrr
1708 * if context not allocated for bus
1709 * allocate and init context
1710 * set present in root table for this bus
1711 * init context with domain, translation etc
1715 for_each_rmrr_units(rmrr
) {
1717 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
1718 pdev
= rmrr
->devices
[i
];
1719 /* some BIOS lists non-exist devices in DMAR table */
1722 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
1725 "IOMMU: mapping reserved region failed\n");
1729 iommu_prepare_gfx_mapping();
1731 iommu_prepare_isa();
1736 * global invalidate context cache
1737 * global invalidate iotlb
1738 * enable translation
1740 for_each_drhd_unit(drhd
) {
1743 iommu
= drhd
->iommu
;
1744 sprintf (iommu
->name
, "dmar%d", unit
++);
1746 iommu_flush_write_buffer(iommu
);
1748 ret
= dmar_set_interrupt(iommu
);
1752 iommu_set_root_entry(iommu
);
1754 iommu_flush_context_global(iommu
, 0);
1755 iommu_flush_iotlb_global(iommu
, 0);
1757 iommu_disable_protect_mem_regions(iommu
);
1759 ret
= iommu_enable_translation(iommu
);
1766 for_each_drhd_unit(drhd
) {
1769 iommu
= drhd
->iommu
;
1775 static inline u64
aligned_size(u64 host_addr
, size_t size
)
1778 addr
= (host_addr
& (~PAGE_MASK_4K
)) + size
;
1779 return PAGE_ALIGN_4K(addr
);
1783 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
1787 /* Make sure it's in range */
1788 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
1789 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
1792 piova
= alloc_iova(&domain
->iovad
,
1793 size
>> PAGE_SHIFT_4K
, IOVA_PFN(end
), 1);
1797 static struct iova
*
1798 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
1801 struct pci_dev
*pdev
= to_pci_dev(dev
);
1802 struct iova
*iova
= NULL
;
1804 if ((pdev
->dma_mask
<= DMA_32BIT_MASK
) || (dmar_forcedac
)) {
1805 iova
= iommu_alloc_iova(domain
, size
, pdev
->dma_mask
);
1808 * First try to allocate an io virtual address in
1809 * DMA_32BIT_MASK and if that fails then try allocating
1812 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
1814 iova
= iommu_alloc_iova(domain
, size
, pdev
->dma_mask
);
1818 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
1825 static struct dmar_domain
*
1826 get_valid_domain_for_dev(struct pci_dev
*pdev
)
1828 struct dmar_domain
*domain
;
1831 domain
= get_domain_for_dev(pdev
,
1832 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1835 "Allocating domain for %s failed", pci_name(pdev
));
1839 /* make sure context mapping is ok */
1840 if (unlikely(!domain_context_mapped(domain
, pdev
))) {
1841 ret
= domain_context_mapping(domain
, pdev
);
1844 "Domain context map for %s failed",
1853 static dma_addr_t
intel_map_single(struct device
*hwdev
, void *addr
,
1854 size_t size
, int dir
)
1856 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1858 struct dmar_domain
*domain
;
1859 unsigned long start_addr
;
1863 BUG_ON(dir
== DMA_NONE
);
1864 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1865 return virt_to_bus(addr
);
1867 domain
= get_valid_domain_for_dev(pdev
);
1871 addr
= (void *)virt_to_phys(addr
);
1872 size
= aligned_size((u64
)addr
, size
);
1874 iova
= __intel_alloc_iova(hwdev
, domain
, size
);
1878 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
1881 * Check if DMAR supports zero-length reads on write only
1884 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
1885 !cap_zlr(domain
->iommu
->cap
))
1886 prot
|= DMA_PTE_READ
;
1887 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
1888 prot
|= DMA_PTE_WRITE
;
1890 * addr - (addr + size) might be partial page, we should map the whole
1891 * page. Note: if two part of one page are separately mapped, we
1892 * might have two guest_addr mapping to the same host addr, but this
1893 * is not a big problem
1895 ret
= domain_page_mapping(domain
, start_addr
,
1896 ((u64
)addr
) & PAGE_MASK_4K
, size
, prot
);
1900 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1901 pci_name(pdev
), size
, (u64
)addr
,
1902 size
, (u64
)start_addr
, dir
);
1904 /* it's a non-present to present mapping */
1905 ret
= iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
1906 start_addr
, size
>> PAGE_SHIFT_4K
, 1);
1908 iommu_flush_write_buffer(domain
->iommu
);
1910 return (start_addr
+ ((u64
)addr
& (~PAGE_MASK_4K
)));
1914 __free_iova(&domain
->iovad
, iova
);
1915 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
1916 pci_name(pdev
), size
, (u64
)addr
, dir
);
1920 static void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
,
1921 size_t size
, int dir
)
1923 struct pci_dev
*pdev
= to_pci_dev(dev
);
1924 struct dmar_domain
*domain
;
1925 unsigned long start_addr
;
1928 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1930 domain
= find_domain(pdev
);
1933 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
1937 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
1938 size
= aligned_size((u64
)dev_addr
, size
);
1940 pr_debug("Device %s unmapping: %lx@%llx\n",
1941 pci_name(pdev
), size
, (u64
)start_addr
);
1943 /* clear the whole page */
1944 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
1945 /* free page tables */
1946 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
1948 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
1949 size
>> PAGE_SHIFT_4K
, 0))
1950 iommu_flush_write_buffer(domain
->iommu
);
1953 __free_iova(&domain
->iovad
, iova
);
1956 static void * intel_alloc_coherent(struct device
*hwdev
, size_t size
,
1957 dma_addr_t
*dma_handle
, gfp_t flags
)
1962 size
= PAGE_ALIGN_4K(size
);
1963 order
= get_order(size
);
1964 flags
&= ~(GFP_DMA
| GFP_DMA32
);
1966 vaddr
= (void *)__get_free_pages(flags
, order
);
1969 memset(vaddr
, 0, size
);
1971 *dma_handle
= intel_map_single(hwdev
, vaddr
, size
, DMA_BIDIRECTIONAL
);
1974 free_pages((unsigned long)vaddr
, order
);
1978 static void intel_free_coherent(struct device
*hwdev
, size_t size
,
1979 void *vaddr
, dma_addr_t dma_handle
)
1983 size
= PAGE_ALIGN_4K(size
);
1984 order
= get_order(size
);
1986 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
1987 free_pages((unsigned long)vaddr
, order
);
1990 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1991 static void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
1992 int nelems
, int dir
)
1995 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1996 struct dmar_domain
*domain
;
1997 unsigned long start_addr
;
2001 struct scatterlist
*sg
;
2003 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2006 domain
= find_domain(pdev
);
2008 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2011 for_each_sg(sglist
, sg
, nelems
, i
) {
2012 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2013 size
+= aligned_size((u64
)addr
, sg
->length
);
2016 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
2018 /* clear the whole page */
2019 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2020 /* free page tables */
2021 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2023 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
2024 size
>> PAGE_SHIFT_4K
, 0))
2025 iommu_flush_write_buffer(domain
->iommu
);
2028 __free_iova(&domain
->iovad
, iova
);
2031 static int intel_nontranslate_map_sg(struct device
*hddev
,
2032 struct scatterlist
*sglist
, int nelems
, int dir
)
2035 struct scatterlist
*sg
;
2037 for_each_sg(sglist
, sg
, nelems
, i
) {
2038 BUG_ON(!sg_page(sg
));
2039 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2040 sg
->dma_length
= sg
->length
;
2045 static int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2046 int nelems
, int dir
)
2050 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2051 struct dmar_domain
*domain
;
2055 struct iova
*iova
= NULL
;
2057 struct scatterlist
*sg
;
2058 unsigned long start_addr
;
2060 BUG_ON(dir
== DMA_NONE
);
2061 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2062 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2064 domain
= get_valid_domain_for_dev(pdev
);
2068 for_each_sg(sglist
, sg
, nelems
, i
) {
2069 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2070 addr
= (void *)virt_to_phys(addr
);
2071 size
+= aligned_size((u64
)addr
, sg
->length
);
2074 iova
= __intel_alloc_iova(hwdev
, domain
, size
);
2076 sglist
->dma_length
= 0;
2081 * Check if DMAR supports zero-length reads on write only
2084 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2085 !cap_zlr(domain
->iommu
->cap
))
2086 prot
|= DMA_PTE_READ
;
2087 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2088 prot
|= DMA_PTE_WRITE
;
2090 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
2092 for_each_sg(sglist
, sg
, nelems
, i
) {
2093 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2094 addr
= (void *)virt_to_phys(addr
);
2095 size
= aligned_size((u64
)addr
, sg
->length
);
2096 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2097 ((u64
)addr
) & PAGE_MASK_4K
,
2100 /* clear the page */
2101 dma_pte_clear_range(domain
, start_addr
,
2102 start_addr
+ offset
);
2103 /* free page tables */
2104 dma_pte_free_pagetable(domain
, start_addr
,
2105 start_addr
+ offset
);
2107 __free_iova(&domain
->iovad
, iova
);
2110 sg
->dma_address
= start_addr
+ offset
+
2111 ((u64
)addr
& (~PAGE_MASK_4K
));
2112 sg
->dma_length
= sg
->length
;
2116 /* it's a non-present to present mapping */
2117 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
2118 start_addr
, offset
>> PAGE_SHIFT_4K
, 1))
2119 iommu_flush_write_buffer(domain
->iommu
);
2123 static struct dma_mapping_ops intel_dma_ops
= {
2124 .alloc_coherent
= intel_alloc_coherent
,
2125 .free_coherent
= intel_free_coherent
,
2126 .map_single
= intel_map_single
,
2127 .unmap_single
= intel_unmap_single
,
2128 .map_sg
= intel_map_sg
,
2129 .unmap_sg
= intel_unmap_sg
,
2132 static inline int iommu_domain_cache_init(void)
2136 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2137 sizeof(struct dmar_domain
),
2142 if (!iommu_domain_cache
) {
2143 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2150 static inline int iommu_devinfo_cache_init(void)
2154 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2155 sizeof(struct device_domain_info
),
2160 if (!iommu_devinfo_cache
) {
2161 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2168 static inline int iommu_iova_cache_init(void)
2172 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2173 sizeof(struct iova
),
2178 if (!iommu_iova_cache
) {
2179 printk(KERN_ERR
"Couldn't create iova cache\n");
2186 static int __init
iommu_init_mempool(void)
2189 ret
= iommu_iova_cache_init();
2193 ret
= iommu_domain_cache_init();
2197 ret
= iommu_devinfo_cache_init();
2201 kmem_cache_destroy(iommu_domain_cache
);
2203 kmem_cache_destroy(iommu_iova_cache
);
2208 static void __init
iommu_exit_mempool(void)
2210 kmem_cache_destroy(iommu_devinfo_cache
);
2211 kmem_cache_destroy(iommu_domain_cache
);
2212 kmem_cache_destroy(iommu_iova_cache
);
2216 void __init
detect_intel_iommu(void)
2218 if (swiotlb
|| no_iommu
|| iommu_detected
|| dmar_disabled
)
2220 if (early_dmar_detect()) {
2225 static void __init
init_no_remapping_devices(void)
2227 struct dmar_drhd_unit
*drhd
;
2229 for_each_drhd_unit(drhd
) {
2230 if (!drhd
->include_all
) {
2232 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2233 if (drhd
->devices
[i
] != NULL
)
2235 /* ignore DMAR unit if no pci devices exist */
2236 if (i
== drhd
->devices_cnt
)
2244 for_each_drhd_unit(drhd
) {
2246 if (drhd
->ignored
|| drhd
->include_all
)
2249 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2250 if (drhd
->devices
[i
] &&
2251 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2254 if (i
< drhd
->devices_cnt
)
2257 /* bypass IOMMU if it is just for gfx devices */
2259 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2260 if (!drhd
->devices
[i
])
2262 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2267 int __init
intel_iommu_init(void)
2271 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2274 if (dmar_table_init())
2277 iommu_init_mempool();
2278 dmar_init_reserved_ranges();
2280 init_no_remapping_devices();
2284 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2285 put_iova_domain(&reserved_iova_list
);
2286 iommu_exit_mempool();
2290 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2293 dma_ops
= &intel_dma_ops
;