x64, x2apic/intr-remap: move IOMMU_WAIT_OP() macro to intel-iommu.h
[linux-2.6/linux-2.6-openrd.git] / drivers / pci / intel-iommu.c
blobfb701d9dd8c04b10a49bf89751d7869bde275199
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/slab.h>
27 #include <linux/irq.h>
28 #include <linux/interrupt.h>
29 #include <linux/sysdev.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include "iova.h"
37 #include "intel-iommu.h"
38 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
39 #include <asm/cacheflush.h>
40 #include <asm/gart.h>
41 #include "pci.h"
43 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
44 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
46 #define IOAPIC_RANGE_START (0xfee00000)
47 #define IOAPIC_RANGE_END (0xfeefffff)
48 #define IOVA_START_ADDR (0x1000)
50 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
52 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55 static void flush_unmaps_timeout(unsigned long data);
57 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
59 #define HIGH_WATER_MARK 250
60 struct deferred_flush_tables {
61 int next;
62 struct iova *iova[HIGH_WATER_MARK];
63 struct dmar_domain *domain[HIGH_WATER_MARK];
66 static struct deferred_flush_tables *deferred_flush;
68 /* bitmap for indexing intel_iommus */
69 static int g_num_of_iommus;
71 static DEFINE_SPINLOCK(async_umap_flush_lock);
72 static LIST_HEAD(unmaps_to_do);
74 static int timer_on;
75 static long list_size;
77 static void domain_remove_dev_info(struct dmar_domain *domain);
79 static int dmar_disabled;
80 static int __initdata dmar_map_gfx = 1;
81 static int dmar_forcedac;
82 static int intel_iommu_strict;
84 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
85 static DEFINE_SPINLOCK(device_domain_lock);
86 static LIST_HEAD(device_domain_list);
88 static int __init intel_iommu_setup(char *str)
90 if (!str)
91 return -EINVAL;
92 while (*str) {
93 if (!strncmp(str, "off", 3)) {
94 dmar_disabled = 1;
95 printk(KERN_INFO"Intel-IOMMU: disabled\n");
96 } else if (!strncmp(str, "igfx_off", 8)) {
97 dmar_map_gfx = 0;
98 printk(KERN_INFO
99 "Intel-IOMMU: disable GFX device mapping\n");
100 } else if (!strncmp(str, "forcedac", 8)) {
101 printk(KERN_INFO
102 "Intel-IOMMU: Forcing DAC for PCI devices\n");
103 dmar_forcedac = 1;
104 } else if (!strncmp(str, "strict", 6)) {
105 printk(KERN_INFO
106 "Intel-IOMMU: disable batched IOTLB flush\n");
107 intel_iommu_strict = 1;
110 str += strcspn(str, ",");
111 while (*str == ',')
112 str++;
114 return 0;
116 __setup("intel_iommu=", intel_iommu_setup);
118 static struct kmem_cache *iommu_domain_cache;
119 static struct kmem_cache *iommu_devinfo_cache;
120 static struct kmem_cache *iommu_iova_cache;
122 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
124 unsigned int flags;
125 void *vaddr;
127 /* trying to avoid low memory issues */
128 flags = current->flags & PF_MEMALLOC;
129 current->flags |= PF_MEMALLOC;
130 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
131 current->flags &= (~PF_MEMALLOC | flags);
132 return vaddr;
136 static inline void *alloc_pgtable_page(void)
138 unsigned int flags;
139 void *vaddr;
141 /* trying to avoid low memory issues */
142 flags = current->flags & PF_MEMALLOC;
143 current->flags |= PF_MEMALLOC;
144 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
145 current->flags &= (~PF_MEMALLOC | flags);
146 return vaddr;
149 static inline void free_pgtable_page(void *vaddr)
151 free_page((unsigned long)vaddr);
154 static inline void *alloc_domain_mem(void)
156 return iommu_kmem_cache_alloc(iommu_domain_cache);
159 static inline void free_domain_mem(void *vaddr)
161 kmem_cache_free(iommu_domain_cache, vaddr);
164 static inline void * alloc_devinfo_mem(void)
166 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
169 static inline void free_devinfo_mem(void *vaddr)
171 kmem_cache_free(iommu_devinfo_cache, vaddr);
174 struct iova *alloc_iova_mem(void)
176 return iommu_kmem_cache_alloc(iommu_iova_cache);
179 void free_iova_mem(struct iova *iova)
181 kmem_cache_free(iommu_iova_cache, iova);
184 static inline void __iommu_flush_cache(
185 struct intel_iommu *iommu, void *addr, int size)
187 if (!ecap_coherent(iommu->ecap))
188 clflush_cache_range(addr, size);
191 /* Gets context entry for a given bus and devfn */
192 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
193 u8 bus, u8 devfn)
195 struct root_entry *root;
196 struct context_entry *context;
197 unsigned long phy_addr;
198 unsigned long flags;
200 spin_lock_irqsave(&iommu->lock, flags);
201 root = &iommu->root_entry[bus];
202 context = get_context_addr_from_root(root);
203 if (!context) {
204 context = (struct context_entry *)alloc_pgtable_page();
205 if (!context) {
206 spin_unlock_irqrestore(&iommu->lock, flags);
207 return NULL;
209 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
210 phy_addr = virt_to_phys((void *)context);
211 set_root_value(root, phy_addr);
212 set_root_present(root);
213 __iommu_flush_cache(iommu, root, sizeof(*root));
215 spin_unlock_irqrestore(&iommu->lock, flags);
216 return &context[devfn];
219 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
221 struct root_entry *root;
222 struct context_entry *context;
223 int ret;
224 unsigned long flags;
226 spin_lock_irqsave(&iommu->lock, flags);
227 root = &iommu->root_entry[bus];
228 context = get_context_addr_from_root(root);
229 if (!context) {
230 ret = 0;
231 goto out;
233 ret = context_present(context[devfn]);
234 out:
235 spin_unlock_irqrestore(&iommu->lock, flags);
236 return ret;
239 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
241 struct root_entry *root;
242 struct context_entry *context;
243 unsigned long flags;
245 spin_lock_irqsave(&iommu->lock, flags);
246 root = &iommu->root_entry[bus];
247 context = get_context_addr_from_root(root);
248 if (context) {
249 context_clear_entry(context[devfn]);
250 __iommu_flush_cache(iommu, &context[devfn], \
251 sizeof(*context));
253 spin_unlock_irqrestore(&iommu->lock, flags);
256 static void free_context_table(struct intel_iommu *iommu)
258 struct root_entry *root;
259 int i;
260 unsigned long flags;
261 struct context_entry *context;
263 spin_lock_irqsave(&iommu->lock, flags);
264 if (!iommu->root_entry) {
265 goto out;
267 for (i = 0; i < ROOT_ENTRY_NR; i++) {
268 root = &iommu->root_entry[i];
269 context = get_context_addr_from_root(root);
270 if (context)
271 free_pgtable_page(context);
273 free_pgtable_page(iommu->root_entry);
274 iommu->root_entry = NULL;
275 out:
276 spin_unlock_irqrestore(&iommu->lock, flags);
279 /* page table handling */
280 #define LEVEL_STRIDE (9)
281 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
283 static inline int agaw_to_level(int agaw)
285 return agaw + 2;
288 static inline int agaw_to_width(int agaw)
290 return 30 + agaw * LEVEL_STRIDE;
294 static inline int width_to_agaw(int width)
296 return (width - 30) / LEVEL_STRIDE;
299 static inline unsigned int level_to_offset_bits(int level)
301 return (12 + (level - 1) * LEVEL_STRIDE);
304 static inline int address_level_offset(u64 addr, int level)
306 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
309 static inline u64 level_mask(int level)
311 return ((u64)-1 << level_to_offset_bits(level));
314 static inline u64 level_size(int level)
316 return ((u64)1 << level_to_offset_bits(level));
319 static inline u64 align_to_level(u64 addr, int level)
321 return ((addr + level_size(level) - 1) & level_mask(level));
324 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
326 int addr_width = agaw_to_width(domain->agaw);
327 struct dma_pte *parent, *pte = NULL;
328 int level = agaw_to_level(domain->agaw);
329 int offset;
330 unsigned long flags;
332 BUG_ON(!domain->pgd);
334 addr &= (((u64)1) << addr_width) - 1;
335 parent = domain->pgd;
337 spin_lock_irqsave(&domain->mapping_lock, flags);
338 while (level > 0) {
339 void *tmp_page;
341 offset = address_level_offset(addr, level);
342 pte = &parent[offset];
343 if (level == 1)
344 break;
346 if (!dma_pte_present(*pte)) {
347 tmp_page = alloc_pgtable_page();
349 if (!tmp_page) {
350 spin_unlock_irqrestore(&domain->mapping_lock,
351 flags);
352 return NULL;
354 __iommu_flush_cache(domain->iommu, tmp_page,
355 PAGE_SIZE_4K);
356 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
358 * high level table always sets r/w, last level page
359 * table control read/write
361 dma_set_pte_readable(*pte);
362 dma_set_pte_writable(*pte);
363 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
365 parent = phys_to_virt(dma_pte_addr(*pte));
366 level--;
369 spin_unlock_irqrestore(&domain->mapping_lock, flags);
370 return pte;
373 /* return address's pte at specific level */
374 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
375 int level)
377 struct dma_pte *parent, *pte = NULL;
378 int total = agaw_to_level(domain->agaw);
379 int offset;
381 parent = domain->pgd;
382 while (level <= total) {
383 offset = address_level_offset(addr, total);
384 pte = &parent[offset];
385 if (level == total)
386 return pte;
388 if (!dma_pte_present(*pte))
389 break;
390 parent = phys_to_virt(dma_pte_addr(*pte));
391 total--;
393 return NULL;
396 /* clear one page's page table */
397 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
399 struct dma_pte *pte = NULL;
401 /* get last level pte */
402 pte = dma_addr_level_pte(domain, addr, 1);
404 if (pte) {
405 dma_clear_pte(*pte);
406 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
410 /* clear last level pte, a tlb flush should be followed */
411 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
413 int addr_width = agaw_to_width(domain->agaw);
415 start &= (((u64)1) << addr_width) - 1;
416 end &= (((u64)1) << addr_width) - 1;
417 /* in case it's partial page */
418 start = PAGE_ALIGN_4K(start);
419 end &= PAGE_MASK_4K;
421 /* we don't need lock here, nobody else touches the iova range */
422 while (start < end) {
423 dma_pte_clear_one(domain, start);
424 start += PAGE_SIZE_4K;
428 /* free page table pages. last level pte should already be cleared */
429 static void dma_pte_free_pagetable(struct dmar_domain *domain,
430 u64 start, u64 end)
432 int addr_width = agaw_to_width(domain->agaw);
433 struct dma_pte *pte;
434 int total = agaw_to_level(domain->agaw);
435 int level;
436 u64 tmp;
438 start &= (((u64)1) << addr_width) - 1;
439 end &= (((u64)1) << addr_width) - 1;
441 /* we don't need lock here, nobody else touches the iova range */
442 level = 2;
443 while (level <= total) {
444 tmp = align_to_level(start, level);
445 if (tmp >= end || (tmp + level_size(level) > end))
446 return;
448 while (tmp < end) {
449 pte = dma_addr_level_pte(domain, tmp, level);
450 if (pte) {
451 free_pgtable_page(
452 phys_to_virt(dma_pte_addr(*pte)));
453 dma_clear_pte(*pte);
454 __iommu_flush_cache(domain->iommu,
455 pte, sizeof(*pte));
457 tmp += level_size(level);
459 level++;
461 /* free pgd */
462 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
463 free_pgtable_page(domain->pgd);
464 domain->pgd = NULL;
468 /* iommu handling */
469 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
471 struct root_entry *root;
472 unsigned long flags;
474 root = (struct root_entry *)alloc_pgtable_page();
475 if (!root)
476 return -ENOMEM;
478 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
480 spin_lock_irqsave(&iommu->lock, flags);
481 iommu->root_entry = root;
482 spin_unlock_irqrestore(&iommu->lock, flags);
484 return 0;
487 static void iommu_set_root_entry(struct intel_iommu *iommu)
489 void *addr;
490 u32 cmd, sts;
491 unsigned long flag;
493 addr = iommu->root_entry;
495 spin_lock_irqsave(&iommu->register_lock, flag);
496 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
498 cmd = iommu->gcmd | DMA_GCMD_SRTP;
499 writel(cmd, iommu->reg + DMAR_GCMD_REG);
501 /* Make sure hardware complete it */
502 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
503 readl, (sts & DMA_GSTS_RTPS), sts);
505 spin_unlock_irqrestore(&iommu->register_lock, flag);
508 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
510 u32 val;
511 unsigned long flag;
513 if (!cap_rwbf(iommu->cap))
514 return;
515 val = iommu->gcmd | DMA_GCMD_WBF;
517 spin_lock_irqsave(&iommu->register_lock, flag);
518 writel(val, iommu->reg + DMAR_GCMD_REG);
520 /* Make sure hardware complete it */
521 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
522 readl, (!(val & DMA_GSTS_WBFS)), val);
524 spin_unlock_irqrestore(&iommu->register_lock, flag);
527 /* return value determine if we need a write buffer flush */
528 static int __iommu_flush_context(struct intel_iommu *iommu,
529 u16 did, u16 source_id, u8 function_mask, u64 type,
530 int non_present_entry_flush)
532 u64 val = 0;
533 unsigned long flag;
536 * In the non-present entry flush case, if hardware doesn't cache
537 * non-present entry we do nothing and if hardware cache non-present
538 * entry, we flush entries of domain 0 (the domain id is used to cache
539 * any non-present entries)
541 if (non_present_entry_flush) {
542 if (!cap_caching_mode(iommu->cap))
543 return 1;
544 else
545 did = 0;
548 switch (type) {
549 case DMA_CCMD_GLOBAL_INVL:
550 val = DMA_CCMD_GLOBAL_INVL;
551 break;
552 case DMA_CCMD_DOMAIN_INVL:
553 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
554 break;
555 case DMA_CCMD_DEVICE_INVL:
556 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
557 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
558 break;
559 default:
560 BUG();
562 val |= DMA_CCMD_ICC;
564 spin_lock_irqsave(&iommu->register_lock, flag);
565 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
567 /* Make sure hardware complete it */
568 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
569 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
571 spin_unlock_irqrestore(&iommu->register_lock, flag);
573 /* flush context entry will implictly flush write buffer */
574 return 0;
577 static int inline iommu_flush_context_global(struct intel_iommu *iommu,
578 int non_present_entry_flush)
580 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
581 non_present_entry_flush);
584 static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
585 int non_present_entry_flush)
587 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
588 non_present_entry_flush);
591 static int inline iommu_flush_context_device(struct intel_iommu *iommu,
592 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
594 return __iommu_flush_context(iommu, did, source_id, function_mask,
595 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
598 /* return value determine if we need a write buffer flush */
599 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
600 u64 addr, unsigned int size_order, u64 type,
601 int non_present_entry_flush)
603 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
604 u64 val = 0, val_iva = 0;
605 unsigned long flag;
608 * In the non-present entry flush case, if hardware doesn't cache
609 * non-present entry we do nothing and if hardware cache non-present
610 * entry, we flush entries of domain 0 (the domain id is used to cache
611 * any non-present entries)
613 if (non_present_entry_flush) {
614 if (!cap_caching_mode(iommu->cap))
615 return 1;
616 else
617 did = 0;
620 switch (type) {
621 case DMA_TLB_GLOBAL_FLUSH:
622 /* global flush doesn't need set IVA_REG */
623 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
624 break;
625 case DMA_TLB_DSI_FLUSH:
626 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
627 break;
628 case DMA_TLB_PSI_FLUSH:
629 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
630 /* Note: always flush non-leaf currently */
631 val_iva = size_order | addr;
632 break;
633 default:
634 BUG();
636 /* Note: set drain read/write */
637 #if 0
639 * This is probably to be super secure.. Looks like we can
640 * ignore it without any impact.
642 if (cap_read_drain(iommu->cap))
643 val |= DMA_TLB_READ_DRAIN;
644 #endif
645 if (cap_write_drain(iommu->cap))
646 val |= DMA_TLB_WRITE_DRAIN;
648 spin_lock_irqsave(&iommu->register_lock, flag);
649 /* Note: Only uses first TLB reg currently */
650 if (val_iva)
651 dmar_writeq(iommu->reg + tlb_offset, val_iva);
652 dmar_writeq(iommu->reg + tlb_offset + 8, val);
654 /* Make sure hardware complete it */
655 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
656 dmar_readq, (!(val & DMA_TLB_IVT)), val);
658 spin_unlock_irqrestore(&iommu->register_lock, flag);
660 /* check IOTLB invalidation granularity */
661 if (DMA_TLB_IAIG(val) == 0)
662 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
663 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
664 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
665 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
666 /* flush context entry will implictly flush write buffer */
667 return 0;
670 static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
671 int non_present_entry_flush)
673 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
674 non_present_entry_flush);
677 static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
678 int non_present_entry_flush)
680 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
681 non_present_entry_flush);
684 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
685 u64 addr, unsigned int pages, int non_present_entry_flush)
687 unsigned int mask;
689 BUG_ON(addr & (~PAGE_MASK_4K));
690 BUG_ON(pages == 0);
692 /* Fallback to domain selective flush if no PSI support */
693 if (!cap_pgsel_inv(iommu->cap))
694 return iommu_flush_iotlb_dsi(iommu, did,
695 non_present_entry_flush);
698 * PSI requires page size to be 2 ^ x, and the base address is naturally
699 * aligned to the size
701 mask = ilog2(__roundup_pow_of_two(pages));
702 /* Fallback to domain selective flush if size is too big */
703 if (mask > cap_max_amask_val(iommu->cap))
704 return iommu_flush_iotlb_dsi(iommu, did,
705 non_present_entry_flush);
707 return __iommu_flush_iotlb(iommu, did, addr, mask,
708 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
711 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
713 u32 pmen;
714 unsigned long flags;
716 spin_lock_irqsave(&iommu->register_lock, flags);
717 pmen = readl(iommu->reg + DMAR_PMEN_REG);
718 pmen &= ~DMA_PMEN_EPM;
719 writel(pmen, iommu->reg + DMAR_PMEN_REG);
721 /* wait for the protected region status bit to clear */
722 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
723 readl, !(pmen & DMA_PMEN_PRS), pmen);
725 spin_unlock_irqrestore(&iommu->register_lock, flags);
728 static int iommu_enable_translation(struct intel_iommu *iommu)
730 u32 sts;
731 unsigned long flags;
733 spin_lock_irqsave(&iommu->register_lock, flags);
734 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
736 /* Make sure hardware complete it */
737 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
738 readl, (sts & DMA_GSTS_TES), sts);
740 iommu->gcmd |= DMA_GCMD_TE;
741 spin_unlock_irqrestore(&iommu->register_lock, flags);
742 return 0;
745 static int iommu_disable_translation(struct intel_iommu *iommu)
747 u32 sts;
748 unsigned long flag;
750 spin_lock_irqsave(&iommu->register_lock, flag);
751 iommu->gcmd &= ~DMA_GCMD_TE;
752 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
754 /* Make sure hardware complete it */
755 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
756 readl, (!(sts & DMA_GSTS_TES)), sts);
758 spin_unlock_irqrestore(&iommu->register_lock, flag);
759 return 0;
762 /* iommu interrupt handling. Most stuff are MSI-like. */
764 static const char *fault_reason_strings[] =
766 "Software",
767 "Present bit in root entry is clear",
768 "Present bit in context entry is clear",
769 "Invalid context entry",
770 "Access beyond MGAW",
771 "PTE Write access is not set",
772 "PTE Read access is not set",
773 "Next page table ptr is invalid",
774 "Root table address invalid",
775 "Context table ptr is invalid",
776 "non-zero reserved fields in RTP",
777 "non-zero reserved fields in CTP",
778 "non-zero reserved fields in PTE",
780 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
782 const char *dmar_get_fault_reason(u8 fault_reason)
784 if (fault_reason > MAX_FAULT_REASON_IDX)
785 return "Unknown";
786 else
787 return fault_reason_strings[fault_reason];
790 void dmar_msi_unmask(unsigned int irq)
792 struct intel_iommu *iommu = get_irq_data(irq);
793 unsigned long flag;
795 /* unmask it */
796 spin_lock_irqsave(&iommu->register_lock, flag);
797 writel(0, iommu->reg + DMAR_FECTL_REG);
798 /* Read a reg to force flush the post write */
799 readl(iommu->reg + DMAR_FECTL_REG);
800 spin_unlock_irqrestore(&iommu->register_lock, flag);
803 void dmar_msi_mask(unsigned int irq)
805 unsigned long flag;
806 struct intel_iommu *iommu = get_irq_data(irq);
808 /* mask it */
809 spin_lock_irqsave(&iommu->register_lock, flag);
810 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
811 /* Read a reg to force flush the post write */
812 readl(iommu->reg + DMAR_FECTL_REG);
813 spin_unlock_irqrestore(&iommu->register_lock, flag);
816 void dmar_msi_write(int irq, struct msi_msg *msg)
818 struct intel_iommu *iommu = get_irq_data(irq);
819 unsigned long flag;
821 spin_lock_irqsave(&iommu->register_lock, flag);
822 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
823 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
824 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
825 spin_unlock_irqrestore(&iommu->register_lock, flag);
828 void dmar_msi_read(int irq, struct msi_msg *msg)
830 struct intel_iommu *iommu = get_irq_data(irq);
831 unsigned long flag;
833 spin_lock_irqsave(&iommu->register_lock, flag);
834 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
835 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
836 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
837 spin_unlock_irqrestore(&iommu->register_lock, flag);
840 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
841 u8 fault_reason, u16 source_id, u64 addr)
843 const char *reason;
845 reason = dmar_get_fault_reason(fault_reason);
847 printk(KERN_ERR
848 "DMAR:[%s] Request device [%02x:%02x.%d] "
849 "fault addr %llx \n"
850 "DMAR:[fault reason %02d] %s\n",
851 (type ? "DMA Read" : "DMA Write"),
852 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
853 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
854 return 0;
857 #define PRIMARY_FAULT_REG_LEN (16)
858 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
860 struct intel_iommu *iommu = dev_id;
861 int reg, fault_index;
862 u32 fault_status;
863 unsigned long flag;
865 spin_lock_irqsave(&iommu->register_lock, flag);
866 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
868 /* TBD: ignore advanced fault log currently */
869 if (!(fault_status & DMA_FSTS_PPF))
870 goto clear_overflow;
872 fault_index = dma_fsts_fault_record_index(fault_status);
873 reg = cap_fault_reg_offset(iommu->cap);
874 while (1) {
875 u8 fault_reason;
876 u16 source_id;
877 u64 guest_addr;
878 int type;
879 u32 data;
881 /* highest 32 bits */
882 data = readl(iommu->reg + reg +
883 fault_index * PRIMARY_FAULT_REG_LEN + 12);
884 if (!(data & DMA_FRCD_F))
885 break;
887 fault_reason = dma_frcd_fault_reason(data);
888 type = dma_frcd_type(data);
890 data = readl(iommu->reg + reg +
891 fault_index * PRIMARY_FAULT_REG_LEN + 8);
892 source_id = dma_frcd_source_id(data);
894 guest_addr = dmar_readq(iommu->reg + reg +
895 fault_index * PRIMARY_FAULT_REG_LEN);
896 guest_addr = dma_frcd_page_addr(guest_addr);
897 /* clear the fault */
898 writel(DMA_FRCD_F, iommu->reg + reg +
899 fault_index * PRIMARY_FAULT_REG_LEN + 12);
901 spin_unlock_irqrestore(&iommu->register_lock, flag);
903 iommu_page_fault_do_one(iommu, type, fault_reason,
904 source_id, guest_addr);
906 fault_index++;
907 if (fault_index > cap_num_fault_regs(iommu->cap))
908 fault_index = 0;
909 spin_lock_irqsave(&iommu->register_lock, flag);
911 clear_overflow:
912 /* clear primary fault overflow */
913 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
914 if (fault_status & DMA_FSTS_PFO)
915 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
917 spin_unlock_irqrestore(&iommu->register_lock, flag);
918 return IRQ_HANDLED;
921 int dmar_set_interrupt(struct intel_iommu *iommu)
923 int irq, ret;
925 irq = create_irq();
926 if (!irq) {
927 printk(KERN_ERR "IOMMU: no free vectors\n");
928 return -EINVAL;
931 set_irq_data(irq, iommu);
932 iommu->irq = irq;
934 ret = arch_setup_dmar_msi(irq);
935 if (ret) {
936 set_irq_data(irq, NULL);
937 iommu->irq = 0;
938 destroy_irq(irq);
939 return 0;
942 /* Force fault register is cleared */
943 iommu_page_fault(irq, iommu);
945 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
946 if (ret)
947 printk(KERN_ERR "IOMMU: can't request irq\n");
948 return ret;
951 static int iommu_init_domains(struct intel_iommu *iommu)
953 unsigned long ndomains;
954 unsigned long nlongs;
956 ndomains = cap_ndoms(iommu->cap);
957 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
958 nlongs = BITS_TO_LONGS(ndomains);
960 /* TBD: there might be 64K domains,
961 * consider other allocation for future chip
963 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
964 if (!iommu->domain_ids) {
965 printk(KERN_ERR "Allocating domain id array failed\n");
966 return -ENOMEM;
968 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
969 GFP_KERNEL);
970 if (!iommu->domains) {
971 printk(KERN_ERR "Allocating domain array failed\n");
972 kfree(iommu->domain_ids);
973 return -ENOMEM;
976 spin_lock_init(&iommu->lock);
979 * if Caching mode is set, then invalid translations are tagged
980 * with domainid 0. Hence we need to pre-allocate it.
982 if (cap_caching_mode(iommu->cap))
983 set_bit(0, iommu->domain_ids);
984 return 0;
988 static void domain_exit(struct dmar_domain *domain);
990 void free_dmar_iommu(struct intel_iommu *iommu)
992 struct dmar_domain *domain;
993 int i;
995 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
996 for (; i < cap_ndoms(iommu->cap); ) {
997 domain = iommu->domains[i];
998 clear_bit(i, iommu->domain_ids);
999 domain_exit(domain);
1000 i = find_next_bit(iommu->domain_ids,
1001 cap_ndoms(iommu->cap), i+1);
1004 if (iommu->gcmd & DMA_GCMD_TE)
1005 iommu_disable_translation(iommu);
1007 if (iommu->irq) {
1008 set_irq_data(iommu->irq, NULL);
1009 /* This will mask the irq */
1010 free_irq(iommu->irq, iommu);
1011 destroy_irq(iommu->irq);
1014 kfree(iommu->domains);
1015 kfree(iommu->domain_ids);
1017 /* free context mapping */
1018 free_context_table(iommu);
1021 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1023 unsigned long num;
1024 unsigned long ndomains;
1025 struct dmar_domain *domain;
1026 unsigned long flags;
1028 domain = alloc_domain_mem();
1029 if (!domain)
1030 return NULL;
1032 ndomains = cap_ndoms(iommu->cap);
1034 spin_lock_irqsave(&iommu->lock, flags);
1035 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1036 if (num >= ndomains) {
1037 spin_unlock_irqrestore(&iommu->lock, flags);
1038 free_domain_mem(domain);
1039 printk(KERN_ERR "IOMMU: no free domain ids\n");
1040 return NULL;
1043 set_bit(num, iommu->domain_ids);
1044 domain->id = num;
1045 domain->iommu = iommu;
1046 iommu->domains[num] = domain;
1047 spin_unlock_irqrestore(&iommu->lock, flags);
1049 return domain;
1052 static void iommu_free_domain(struct dmar_domain *domain)
1054 unsigned long flags;
1056 spin_lock_irqsave(&domain->iommu->lock, flags);
1057 clear_bit(domain->id, domain->iommu->domain_ids);
1058 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1061 static struct iova_domain reserved_iova_list;
1062 static struct lock_class_key reserved_alloc_key;
1063 static struct lock_class_key reserved_rbtree_key;
1065 static void dmar_init_reserved_ranges(void)
1067 struct pci_dev *pdev = NULL;
1068 struct iova *iova;
1069 int i;
1070 u64 addr, size;
1072 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1074 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1075 &reserved_alloc_key);
1076 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1077 &reserved_rbtree_key);
1079 /* IOAPIC ranges shouldn't be accessed by DMA */
1080 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1081 IOVA_PFN(IOAPIC_RANGE_END));
1082 if (!iova)
1083 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1085 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1086 for_each_pci_dev(pdev) {
1087 struct resource *r;
1089 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1090 r = &pdev->resource[i];
1091 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1092 continue;
1093 addr = r->start;
1094 addr &= PAGE_MASK_4K;
1095 size = r->end - addr;
1096 size = PAGE_ALIGN_4K(size);
1097 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1098 IOVA_PFN(size + addr) - 1);
1099 if (!iova)
1100 printk(KERN_ERR "Reserve iova failed\n");
1106 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1108 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1111 static inline int guestwidth_to_adjustwidth(int gaw)
1113 int agaw;
1114 int r = (gaw - 12) % 9;
1116 if (r == 0)
1117 agaw = gaw;
1118 else
1119 agaw = gaw + 9 - r;
1120 if (agaw > 64)
1121 agaw = 64;
1122 return agaw;
1125 static int domain_init(struct dmar_domain *domain, int guest_width)
1127 struct intel_iommu *iommu;
1128 int adjust_width, agaw;
1129 unsigned long sagaw;
1131 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1132 spin_lock_init(&domain->mapping_lock);
1134 domain_reserve_special_ranges(domain);
1136 /* calculate AGAW */
1137 iommu = domain->iommu;
1138 if (guest_width > cap_mgaw(iommu->cap))
1139 guest_width = cap_mgaw(iommu->cap);
1140 domain->gaw = guest_width;
1141 adjust_width = guestwidth_to_adjustwidth(guest_width);
1142 agaw = width_to_agaw(adjust_width);
1143 sagaw = cap_sagaw(iommu->cap);
1144 if (!test_bit(agaw, &sagaw)) {
1145 /* hardware doesn't support it, choose a bigger one */
1146 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1147 agaw = find_next_bit(&sagaw, 5, agaw);
1148 if (agaw >= 5)
1149 return -ENODEV;
1151 domain->agaw = agaw;
1152 INIT_LIST_HEAD(&domain->devices);
1154 /* always allocate the top pgd */
1155 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1156 if (!domain->pgd)
1157 return -ENOMEM;
1158 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1159 return 0;
1162 static void domain_exit(struct dmar_domain *domain)
1164 u64 end;
1166 /* Domain 0 is reserved, so dont process it */
1167 if (!domain)
1168 return;
1170 domain_remove_dev_info(domain);
1171 /* destroy iovas */
1172 put_iova_domain(&domain->iovad);
1173 end = DOMAIN_MAX_ADDR(domain->gaw);
1174 end = end & (~PAGE_MASK_4K);
1176 /* clear ptes */
1177 dma_pte_clear_range(domain, 0, end);
1179 /* free page tables */
1180 dma_pte_free_pagetable(domain, 0, end);
1182 iommu_free_domain(domain);
1183 free_domain_mem(domain);
1186 static int domain_context_mapping_one(struct dmar_domain *domain,
1187 u8 bus, u8 devfn)
1189 struct context_entry *context;
1190 struct intel_iommu *iommu = domain->iommu;
1191 unsigned long flags;
1193 pr_debug("Set context mapping for %02x:%02x.%d\n",
1194 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1195 BUG_ON(!domain->pgd);
1196 context = device_to_context_entry(iommu, bus, devfn);
1197 if (!context)
1198 return -ENOMEM;
1199 spin_lock_irqsave(&iommu->lock, flags);
1200 if (context_present(*context)) {
1201 spin_unlock_irqrestore(&iommu->lock, flags);
1202 return 0;
1205 context_set_domain_id(*context, domain->id);
1206 context_set_address_width(*context, domain->agaw);
1207 context_set_address_root(*context, virt_to_phys(domain->pgd));
1208 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1209 context_set_fault_enable(*context);
1210 context_set_present(*context);
1211 __iommu_flush_cache(iommu, context, sizeof(*context));
1213 /* it's a non-present to present mapping */
1214 if (iommu_flush_context_device(iommu, domain->id,
1215 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1216 iommu_flush_write_buffer(iommu);
1217 else
1218 iommu_flush_iotlb_dsi(iommu, 0, 0);
1219 spin_unlock_irqrestore(&iommu->lock, flags);
1220 return 0;
1223 static int
1224 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1226 int ret;
1227 struct pci_dev *tmp, *parent;
1229 ret = domain_context_mapping_one(domain, pdev->bus->number,
1230 pdev->devfn);
1231 if (ret)
1232 return ret;
1234 /* dependent device mapping */
1235 tmp = pci_find_upstream_pcie_bridge(pdev);
1236 if (!tmp)
1237 return 0;
1238 /* Secondary interface's bus number and devfn 0 */
1239 parent = pdev->bus->self;
1240 while (parent != tmp) {
1241 ret = domain_context_mapping_one(domain, parent->bus->number,
1242 parent->devfn);
1243 if (ret)
1244 return ret;
1245 parent = parent->bus->self;
1247 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1248 return domain_context_mapping_one(domain,
1249 tmp->subordinate->number, 0);
1250 else /* this is a legacy PCI bridge */
1251 return domain_context_mapping_one(domain,
1252 tmp->bus->number, tmp->devfn);
1255 static int domain_context_mapped(struct dmar_domain *domain,
1256 struct pci_dev *pdev)
1258 int ret;
1259 struct pci_dev *tmp, *parent;
1261 ret = device_context_mapped(domain->iommu,
1262 pdev->bus->number, pdev->devfn);
1263 if (!ret)
1264 return ret;
1265 /* dependent device mapping */
1266 tmp = pci_find_upstream_pcie_bridge(pdev);
1267 if (!tmp)
1268 return ret;
1269 /* Secondary interface's bus number and devfn 0 */
1270 parent = pdev->bus->self;
1271 while (parent != tmp) {
1272 ret = device_context_mapped(domain->iommu, parent->bus->number,
1273 parent->devfn);
1274 if (!ret)
1275 return ret;
1276 parent = parent->bus->self;
1278 if (tmp->is_pcie)
1279 return device_context_mapped(domain->iommu,
1280 tmp->subordinate->number, 0);
1281 else
1282 return device_context_mapped(domain->iommu,
1283 tmp->bus->number, tmp->devfn);
1286 static int
1287 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1288 u64 hpa, size_t size, int prot)
1290 u64 start_pfn, end_pfn;
1291 struct dma_pte *pte;
1292 int index;
1294 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1295 return -EINVAL;
1296 iova &= PAGE_MASK_4K;
1297 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1298 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1299 index = 0;
1300 while (start_pfn < end_pfn) {
1301 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1302 if (!pte)
1303 return -ENOMEM;
1304 /* We don't need lock here, nobody else
1305 * touches the iova range
1307 BUG_ON(dma_pte_addr(*pte));
1308 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1309 dma_set_pte_prot(*pte, prot);
1310 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1311 start_pfn++;
1312 index++;
1314 return 0;
1317 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1319 clear_context_table(domain->iommu, bus, devfn);
1320 iommu_flush_context_global(domain->iommu, 0);
1321 iommu_flush_iotlb_global(domain->iommu, 0);
1324 static void domain_remove_dev_info(struct dmar_domain *domain)
1326 struct device_domain_info *info;
1327 unsigned long flags;
1329 spin_lock_irqsave(&device_domain_lock, flags);
1330 while (!list_empty(&domain->devices)) {
1331 info = list_entry(domain->devices.next,
1332 struct device_domain_info, link);
1333 list_del(&info->link);
1334 list_del(&info->global);
1335 if (info->dev)
1336 info->dev->dev.archdata.iommu = NULL;
1337 spin_unlock_irqrestore(&device_domain_lock, flags);
1339 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1340 free_devinfo_mem(info);
1342 spin_lock_irqsave(&device_domain_lock, flags);
1344 spin_unlock_irqrestore(&device_domain_lock, flags);
1348 * find_domain
1349 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1351 struct dmar_domain *
1352 find_domain(struct pci_dev *pdev)
1354 struct device_domain_info *info;
1356 /* No lock here, assumes no domain exit in normal case */
1357 info = pdev->dev.archdata.iommu;
1358 if (info)
1359 return info->domain;
1360 return NULL;
1363 /* domain is initialized */
1364 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1366 struct dmar_domain *domain, *found = NULL;
1367 struct intel_iommu *iommu;
1368 struct dmar_drhd_unit *drhd;
1369 struct device_domain_info *info, *tmp;
1370 struct pci_dev *dev_tmp;
1371 unsigned long flags;
1372 int bus = 0, devfn = 0;
1374 domain = find_domain(pdev);
1375 if (domain)
1376 return domain;
1378 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1379 if (dev_tmp) {
1380 if (dev_tmp->is_pcie) {
1381 bus = dev_tmp->subordinate->number;
1382 devfn = 0;
1383 } else {
1384 bus = dev_tmp->bus->number;
1385 devfn = dev_tmp->devfn;
1387 spin_lock_irqsave(&device_domain_lock, flags);
1388 list_for_each_entry(info, &device_domain_list, global) {
1389 if (info->bus == bus && info->devfn == devfn) {
1390 found = info->domain;
1391 break;
1394 spin_unlock_irqrestore(&device_domain_lock, flags);
1395 /* pcie-pci bridge already has a domain, uses it */
1396 if (found) {
1397 domain = found;
1398 goto found_domain;
1402 /* Allocate new domain for the device */
1403 drhd = dmar_find_matched_drhd_unit(pdev);
1404 if (!drhd) {
1405 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1406 pci_name(pdev));
1407 return NULL;
1409 iommu = drhd->iommu;
1411 domain = iommu_alloc_domain(iommu);
1412 if (!domain)
1413 goto error;
1415 if (domain_init(domain, gaw)) {
1416 domain_exit(domain);
1417 goto error;
1420 /* register pcie-to-pci device */
1421 if (dev_tmp) {
1422 info = alloc_devinfo_mem();
1423 if (!info) {
1424 domain_exit(domain);
1425 goto error;
1427 info->bus = bus;
1428 info->devfn = devfn;
1429 info->dev = NULL;
1430 info->domain = domain;
1431 /* This domain is shared by devices under p2p bridge */
1432 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1434 /* pcie-to-pci bridge already has a domain, uses it */
1435 found = NULL;
1436 spin_lock_irqsave(&device_domain_lock, flags);
1437 list_for_each_entry(tmp, &device_domain_list, global) {
1438 if (tmp->bus == bus && tmp->devfn == devfn) {
1439 found = tmp->domain;
1440 break;
1443 if (found) {
1444 free_devinfo_mem(info);
1445 domain_exit(domain);
1446 domain = found;
1447 } else {
1448 list_add(&info->link, &domain->devices);
1449 list_add(&info->global, &device_domain_list);
1451 spin_unlock_irqrestore(&device_domain_lock, flags);
1454 found_domain:
1455 info = alloc_devinfo_mem();
1456 if (!info)
1457 goto error;
1458 info->bus = pdev->bus->number;
1459 info->devfn = pdev->devfn;
1460 info->dev = pdev;
1461 info->domain = domain;
1462 spin_lock_irqsave(&device_domain_lock, flags);
1463 /* somebody is fast */
1464 found = find_domain(pdev);
1465 if (found != NULL) {
1466 spin_unlock_irqrestore(&device_domain_lock, flags);
1467 if (found != domain) {
1468 domain_exit(domain);
1469 domain = found;
1471 free_devinfo_mem(info);
1472 return domain;
1474 list_add(&info->link, &domain->devices);
1475 list_add(&info->global, &device_domain_list);
1476 pdev->dev.archdata.iommu = info;
1477 spin_unlock_irqrestore(&device_domain_lock, flags);
1478 return domain;
1479 error:
1480 /* recheck it here, maybe others set it */
1481 return find_domain(pdev);
1484 static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1486 struct dmar_domain *domain;
1487 unsigned long size;
1488 u64 base;
1489 int ret;
1491 printk(KERN_INFO
1492 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1493 pci_name(pdev), start, end);
1494 /* page table init */
1495 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1496 if (!domain)
1497 return -ENOMEM;
1499 /* The address might not be aligned */
1500 base = start & PAGE_MASK_4K;
1501 size = end - base;
1502 size = PAGE_ALIGN_4K(size);
1503 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1504 IOVA_PFN(base + size) - 1)) {
1505 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1506 ret = -ENOMEM;
1507 goto error;
1510 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1511 size, base, pci_name(pdev));
1513 * RMRR range might have overlap with physical memory range,
1514 * clear it first
1516 dma_pte_clear_range(domain, base, base + size);
1518 ret = domain_page_mapping(domain, base, base, size,
1519 DMA_PTE_READ|DMA_PTE_WRITE);
1520 if (ret)
1521 goto error;
1523 /* context entry init */
1524 ret = domain_context_mapping(domain, pdev);
1525 if (!ret)
1526 return 0;
1527 error:
1528 domain_exit(domain);
1529 return ret;
1533 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1534 struct pci_dev *pdev)
1536 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1537 return 0;
1538 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1539 rmrr->end_address + 1);
1542 #ifdef CONFIG_DMAR_GFX_WA
1543 struct iommu_prepare_data {
1544 struct pci_dev *pdev;
1545 int ret;
1548 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1549 unsigned long end_pfn, void *datax)
1551 struct iommu_prepare_data *data;
1553 data = (struct iommu_prepare_data *)datax;
1555 data->ret = iommu_prepare_identity_map(data->pdev,
1556 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1557 return data->ret;
1561 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1563 int nid;
1564 struct iommu_prepare_data data;
1566 data.pdev = pdev;
1567 data.ret = 0;
1569 for_each_online_node(nid) {
1570 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1571 if (data.ret)
1572 return data.ret;
1574 return data.ret;
1577 static void __init iommu_prepare_gfx_mapping(void)
1579 struct pci_dev *pdev = NULL;
1580 int ret;
1582 for_each_pci_dev(pdev) {
1583 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1584 !IS_GFX_DEVICE(pdev))
1585 continue;
1586 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1587 pci_name(pdev));
1588 ret = iommu_prepare_with_active_regions(pdev);
1589 if (ret)
1590 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1593 #endif
1595 #ifdef CONFIG_DMAR_FLOPPY_WA
1596 static inline void iommu_prepare_isa(void)
1598 struct pci_dev *pdev;
1599 int ret;
1601 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1602 if (!pdev)
1603 return;
1605 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1606 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1608 if (ret)
1609 printk("IOMMU: Failed to create 0-64M identity map, "
1610 "floppy might not work\n");
1613 #else
1614 static inline void iommu_prepare_isa(void)
1616 return;
1618 #endif /* !CONFIG_DMAR_FLPY_WA */
1620 int __init init_dmars(void)
1622 struct dmar_drhd_unit *drhd;
1623 struct dmar_rmrr_unit *rmrr;
1624 struct pci_dev *pdev;
1625 struct intel_iommu *iommu;
1626 int i, ret, unit = 0;
1629 * for each drhd
1630 * allocate root
1631 * initialize and program root entry to not present
1632 * endfor
1634 for_each_drhd_unit(drhd) {
1635 g_num_of_iommus++;
1637 * lock not needed as this is only incremented in the single
1638 * threaded kernel __init code path all other access are read
1639 * only
1643 deferred_flush = kzalloc(g_num_of_iommus *
1644 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1645 if (!deferred_flush) {
1646 ret = -ENOMEM;
1647 goto error;
1650 for_each_drhd_unit(drhd) {
1651 if (drhd->ignored)
1652 continue;
1654 iommu = drhd->iommu;
1656 ret = iommu_init_domains(iommu);
1657 if (ret)
1658 goto error;
1661 * TBD:
1662 * we could share the same root & context tables
1663 * amoung all IOMMU's. Need to Split it later.
1665 ret = iommu_alloc_root_entry(iommu);
1666 if (ret) {
1667 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1668 goto error;
1673 * For each rmrr
1674 * for each dev attached to rmrr
1675 * do
1676 * locate drhd for dev, alloc domain for dev
1677 * allocate free domain
1678 * allocate page table entries for rmrr
1679 * if context not allocated for bus
1680 * allocate and init context
1681 * set present in root table for this bus
1682 * init context with domain, translation etc
1683 * endfor
1684 * endfor
1686 for_each_rmrr_units(rmrr) {
1687 for (i = 0; i < rmrr->devices_cnt; i++) {
1688 pdev = rmrr->devices[i];
1689 /* some BIOS lists non-exist devices in DMAR table */
1690 if (!pdev)
1691 continue;
1692 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1693 if (ret)
1694 printk(KERN_ERR
1695 "IOMMU: mapping reserved region failed\n");
1699 iommu_prepare_gfx_mapping();
1701 iommu_prepare_isa();
1704 * for each drhd
1705 * enable fault log
1706 * global invalidate context cache
1707 * global invalidate iotlb
1708 * enable translation
1710 for_each_drhd_unit(drhd) {
1711 if (drhd->ignored)
1712 continue;
1713 iommu = drhd->iommu;
1714 sprintf (iommu->name, "dmar%d", unit++);
1716 iommu_flush_write_buffer(iommu);
1718 ret = dmar_set_interrupt(iommu);
1719 if (ret)
1720 goto error;
1722 iommu_set_root_entry(iommu);
1724 iommu_flush_context_global(iommu, 0);
1725 iommu_flush_iotlb_global(iommu, 0);
1727 iommu_disable_protect_mem_regions(iommu);
1729 ret = iommu_enable_translation(iommu);
1730 if (ret)
1731 goto error;
1734 return 0;
1735 error:
1736 for_each_drhd_unit(drhd) {
1737 if (drhd->ignored)
1738 continue;
1739 iommu = drhd->iommu;
1740 free_iommu(iommu);
1742 return ret;
1745 static inline u64 aligned_size(u64 host_addr, size_t size)
1747 u64 addr;
1748 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1749 return PAGE_ALIGN_4K(addr);
1752 struct iova *
1753 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1755 struct iova *piova;
1757 /* Make sure it's in range */
1758 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1759 if (!size || (IOVA_START_ADDR + size > end))
1760 return NULL;
1762 piova = alloc_iova(&domain->iovad,
1763 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1764 return piova;
1767 static struct iova *
1768 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1769 size_t size)
1771 struct pci_dev *pdev = to_pci_dev(dev);
1772 struct iova *iova = NULL;
1774 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1775 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1776 } else {
1778 * First try to allocate an io virtual address in
1779 * DMA_32BIT_MASK and if that fails then try allocating
1780 * from higher range
1782 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1783 if (!iova)
1784 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1787 if (!iova) {
1788 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1789 return NULL;
1792 return iova;
1795 static struct dmar_domain *
1796 get_valid_domain_for_dev(struct pci_dev *pdev)
1798 struct dmar_domain *domain;
1799 int ret;
1801 domain = get_domain_for_dev(pdev,
1802 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1803 if (!domain) {
1804 printk(KERN_ERR
1805 "Allocating domain for %s failed", pci_name(pdev));
1806 return NULL;
1809 /* make sure context mapping is ok */
1810 if (unlikely(!domain_context_mapped(domain, pdev))) {
1811 ret = domain_context_mapping(domain, pdev);
1812 if (ret) {
1813 printk(KERN_ERR
1814 "Domain context map for %s failed",
1815 pci_name(pdev));
1816 return NULL;
1820 return domain;
1823 static dma_addr_t
1824 intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
1826 struct pci_dev *pdev = to_pci_dev(hwdev);
1827 struct dmar_domain *domain;
1828 unsigned long start_paddr;
1829 struct iova *iova;
1830 int prot = 0;
1831 int ret;
1833 BUG_ON(dir == DMA_NONE);
1834 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1835 return paddr;
1837 domain = get_valid_domain_for_dev(pdev);
1838 if (!domain)
1839 return 0;
1841 size = aligned_size((u64)paddr, size);
1843 iova = __intel_alloc_iova(hwdev, domain, size);
1844 if (!iova)
1845 goto error;
1847 start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
1850 * Check if DMAR supports zero-length reads on write only
1851 * mappings..
1853 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1854 !cap_zlr(domain->iommu->cap))
1855 prot |= DMA_PTE_READ;
1856 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1857 prot |= DMA_PTE_WRITE;
1859 * paddr - (paddr + size) might be partial page, we should map the whole
1860 * page. Note: if two part of one page are separately mapped, we
1861 * might have two guest_addr mapping to the same host paddr, but this
1862 * is not a big problem
1864 ret = domain_page_mapping(domain, start_paddr,
1865 ((u64)paddr) & PAGE_MASK_4K, size, prot);
1866 if (ret)
1867 goto error;
1869 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1870 pci_name(pdev), size, (u64)paddr,
1871 size, (u64)start_paddr, dir);
1873 /* it's a non-present to present mapping */
1874 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1875 start_paddr, size >> PAGE_SHIFT_4K, 1);
1876 if (ret)
1877 iommu_flush_write_buffer(domain->iommu);
1879 return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
1881 error:
1882 if (iova)
1883 __free_iova(&domain->iovad, iova);
1884 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1885 pci_name(pdev), size, (u64)paddr, dir);
1886 return 0;
1889 static void flush_unmaps(void)
1891 int i, j;
1893 timer_on = 0;
1895 /* just flush them all */
1896 for (i = 0; i < g_num_of_iommus; i++) {
1897 if (deferred_flush[i].next) {
1898 struct intel_iommu *iommu =
1899 deferred_flush[i].domain[0]->iommu;
1901 iommu_flush_iotlb_global(iommu, 0);
1902 for (j = 0; j < deferred_flush[i].next; j++) {
1903 __free_iova(&deferred_flush[i].domain[j]->iovad,
1904 deferred_flush[i].iova[j]);
1906 deferred_flush[i].next = 0;
1910 list_size = 0;
1913 static void flush_unmaps_timeout(unsigned long data)
1915 unsigned long flags;
1917 spin_lock_irqsave(&async_umap_flush_lock, flags);
1918 flush_unmaps();
1919 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1922 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1924 unsigned long flags;
1925 int next, iommu_id;
1927 spin_lock_irqsave(&async_umap_flush_lock, flags);
1928 if (list_size == HIGH_WATER_MARK)
1929 flush_unmaps();
1931 iommu_id = dom->iommu->seq_id;
1933 next = deferred_flush[iommu_id].next;
1934 deferred_flush[iommu_id].domain[next] = dom;
1935 deferred_flush[iommu_id].iova[next] = iova;
1936 deferred_flush[iommu_id].next++;
1938 if (!timer_on) {
1939 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1940 timer_on = 1;
1942 list_size++;
1943 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1946 static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1947 size_t size, int dir)
1949 struct pci_dev *pdev = to_pci_dev(dev);
1950 struct dmar_domain *domain;
1951 unsigned long start_addr;
1952 struct iova *iova;
1954 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1955 return;
1956 domain = find_domain(pdev);
1957 BUG_ON(!domain);
1959 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1960 if (!iova)
1961 return;
1963 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1964 size = aligned_size((u64)dev_addr, size);
1966 pr_debug("Device %s unmapping: %lx@%llx\n",
1967 pci_name(pdev), size, (u64)start_addr);
1969 /* clear the whole page */
1970 dma_pte_clear_range(domain, start_addr, start_addr + size);
1971 /* free page tables */
1972 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1973 if (intel_iommu_strict) {
1974 if (iommu_flush_iotlb_psi(domain->iommu,
1975 domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
1976 iommu_flush_write_buffer(domain->iommu);
1977 /* free iova */
1978 __free_iova(&domain->iovad, iova);
1979 } else {
1980 add_unmap(domain, iova);
1982 * queue up the release of the unmap to save the 1/6th of the
1983 * cpu used up by the iotlb flush operation...
1988 static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1989 dma_addr_t *dma_handle, gfp_t flags)
1991 void *vaddr;
1992 int order;
1994 size = PAGE_ALIGN_4K(size);
1995 order = get_order(size);
1996 flags &= ~(GFP_DMA | GFP_DMA32);
1998 vaddr = (void *)__get_free_pages(flags, order);
1999 if (!vaddr)
2000 return NULL;
2001 memset(vaddr, 0, size);
2003 *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL);
2004 if (*dma_handle)
2005 return vaddr;
2006 free_pages((unsigned long)vaddr, order);
2007 return NULL;
2010 static void intel_free_coherent(struct device *hwdev, size_t size,
2011 void *vaddr, dma_addr_t dma_handle)
2013 int order;
2015 size = PAGE_ALIGN_4K(size);
2016 order = get_order(size);
2018 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2019 free_pages((unsigned long)vaddr, order);
2022 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2023 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2024 int nelems, int dir)
2026 int i;
2027 struct pci_dev *pdev = to_pci_dev(hwdev);
2028 struct dmar_domain *domain;
2029 unsigned long start_addr;
2030 struct iova *iova;
2031 size_t size = 0;
2032 void *addr;
2033 struct scatterlist *sg;
2035 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2036 return;
2038 domain = find_domain(pdev);
2040 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2041 if (!iova)
2042 return;
2043 for_each_sg(sglist, sg, nelems, i) {
2044 addr = SG_ENT_VIRT_ADDRESS(sg);
2045 size += aligned_size((u64)addr, sg->length);
2048 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2050 /* clear the whole page */
2051 dma_pte_clear_range(domain, start_addr, start_addr + size);
2052 /* free page tables */
2053 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2055 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2056 size >> PAGE_SHIFT_4K, 0))
2057 iommu_flush_write_buffer(domain->iommu);
2059 /* free iova */
2060 __free_iova(&domain->iovad, iova);
2063 static int intel_nontranslate_map_sg(struct device *hddev,
2064 struct scatterlist *sglist, int nelems, int dir)
2066 int i;
2067 struct scatterlist *sg;
2069 for_each_sg(sglist, sg, nelems, i) {
2070 BUG_ON(!sg_page(sg));
2071 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2072 sg->dma_length = sg->length;
2074 return nelems;
2077 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2078 int nelems, int dir)
2080 void *addr;
2081 int i;
2082 struct pci_dev *pdev = to_pci_dev(hwdev);
2083 struct dmar_domain *domain;
2084 size_t size = 0;
2085 int prot = 0;
2086 size_t offset = 0;
2087 struct iova *iova = NULL;
2088 int ret;
2089 struct scatterlist *sg;
2090 unsigned long start_addr;
2092 BUG_ON(dir == DMA_NONE);
2093 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2094 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2096 domain = get_valid_domain_for_dev(pdev);
2097 if (!domain)
2098 return 0;
2100 for_each_sg(sglist, sg, nelems, i) {
2101 addr = SG_ENT_VIRT_ADDRESS(sg);
2102 addr = (void *)virt_to_phys(addr);
2103 size += aligned_size((u64)addr, sg->length);
2106 iova = __intel_alloc_iova(hwdev, domain, size);
2107 if (!iova) {
2108 sglist->dma_length = 0;
2109 return 0;
2113 * Check if DMAR supports zero-length reads on write only
2114 * mappings..
2116 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2117 !cap_zlr(domain->iommu->cap))
2118 prot |= DMA_PTE_READ;
2119 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2120 prot |= DMA_PTE_WRITE;
2122 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2123 offset = 0;
2124 for_each_sg(sglist, sg, nelems, i) {
2125 addr = SG_ENT_VIRT_ADDRESS(sg);
2126 addr = (void *)virt_to_phys(addr);
2127 size = aligned_size((u64)addr, sg->length);
2128 ret = domain_page_mapping(domain, start_addr + offset,
2129 ((u64)addr) & PAGE_MASK_4K,
2130 size, prot);
2131 if (ret) {
2132 /* clear the page */
2133 dma_pte_clear_range(domain, start_addr,
2134 start_addr + offset);
2135 /* free page tables */
2136 dma_pte_free_pagetable(domain, start_addr,
2137 start_addr + offset);
2138 /* free iova */
2139 __free_iova(&domain->iovad, iova);
2140 return 0;
2142 sg->dma_address = start_addr + offset +
2143 ((u64)addr & (~PAGE_MASK_4K));
2144 sg->dma_length = sg->length;
2145 offset += size;
2148 /* it's a non-present to present mapping */
2149 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2150 start_addr, offset >> PAGE_SHIFT_4K, 1))
2151 iommu_flush_write_buffer(domain->iommu);
2152 return nelems;
2155 static struct dma_mapping_ops intel_dma_ops = {
2156 .alloc_coherent = intel_alloc_coherent,
2157 .free_coherent = intel_free_coherent,
2158 .map_single = intel_map_single,
2159 .unmap_single = intel_unmap_single,
2160 .map_sg = intel_map_sg,
2161 .unmap_sg = intel_unmap_sg,
2164 static inline int iommu_domain_cache_init(void)
2166 int ret = 0;
2168 iommu_domain_cache = kmem_cache_create("iommu_domain",
2169 sizeof(struct dmar_domain),
2171 SLAB_HWCACHE_ALIGN,
2173 NULL);
2174 if (!iommu_domain_cache) {
2175 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2176 ret = -ENOMEM;
2179 return ret;
2182 static inline int iommu_devinfo_cache_init(void)
2184 int ret = 0;
2186 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2187 sizeof(struct device_domain_info),
2189 SLAB_HWCACHE_ALIGN,
2191 NULL);
2192 if (!iommu_devinfo_cache) {
2193 printk(KERN_ERR "Couldn't create devinfo cache\n");
2194 ret = -ENOMEM;
2197 return ret;
2200 static inline int iommu_iova_cache_init(void)
2202 int ret = 0;
2204 iommu_iova_cache = kmem_cache_create("iommu_iova",
2205 sizeof(struct iova),
2207 SLAB_HWCACHE_ALIGN,
2209 NULL);
2210 if (!iommu_iova_cache) {
2211 printk(KERN_ERR "Couldn't create iova cache\n");
2212 ret = -ENOMEM;
2215 return ret;
2218 static int __init iommu_init_mempool(void)
2220 int ret;
2221 ret = iommu_iova_cache_init();
2222 if (ret)
2223 return ret;
2225 ret = iommu_domain_cache_init();
2226 if (ret)
2227 goto domain_error;
2229 ret = iommu_devinfo_cache_init();
2230 if (!ret)
2231 return ret;
2233 kmem_cache_destroy(iommu_domain_cache);
2234 domain_error:
2235 kmem_cache_destroy(iommu_iova_cache);
2237 return -ENOMEM;
2240 static void __init iommu_exit_mempool(void)
2242 kmem_cache_destroy(iommu_devinfo_cache);
2243 kmem_cache_destroy(iommu_domain_cache);
2244 kmem_cache_destroy(iommu_iova_cache);
2248 void __init detect_intel_iommu(void)
2250 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2251 return;
2252 if (early_dmar_detect()) {
2253 iommu_detected = 1;
2257 static void __init init_no_remapping_devices(void)
2259 struct dmar_drhd_unit *drhd;
2261 for_each_drhd_unit(drhd) {
2262 if (!drhd->include_all) {
2263 int i;
2264 for (i = 0; i < drhd->devices_cnt; i++)
2265 if (drhd->devices[i] != NULL)
2266 break;
2267 /* ignore DMAR unit if no pci devices exist */
2268 if (i == drhd->devices_cnt)
2269 drhd->ignored = 1;
2273 if (dmar_map_gfx)
2274 return;
2276 for_each_drhd_unit(drhd) {
2277 int i;
2278 if (drhd->ignored || drhd->include_all)
2279 continue;
2281 for (i = 0; i < drhd->devices_cnt; i++)
2282 if (drhd->devices[i] &&
2283 !IS_GFX_DEVICE(drhd->devices[i]))
2284 break;
2286 if (i < drhd->devices_cnt)
2287 continue;
2289 /* bypass IOMMU if it is just for gfx devices */
2290 drhd->ignored = 1;
2291 for (i = 0; i < drhd->devices_cnt; i++) {
2292 if (!drhd->devices[i])
2293 continue;
2294 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2299 int __init intel_iommu_init(void)
2301 int ret = 0;
2303 if (no_iommu || swiotlb || dmar_disabled)
2304 return -ENODEV;
2306 if (dmar_table_init())
2307 return -ENODEV;
2309 if (dmar_dev_scope_init())
2310 return -ENODEV;
2312 iommu_init_mempool();
2313 dmar_init_reserved_ranges();
2315 init_no_remapping_devices();
2317 ret = init_dmars();
2318 if (ret) {
2319 printk(KERN_ERR "IOMMU: dmar init failed\n");
2320 put_iova_domain(&reserved_iova_list);
2321 iommu_exit_mempool();
2322 return ret;
2324 printk(KERN_INFO
2325 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2327 init_timer(&unmap_timer);
2328 force_iommu = 1;
2329 dma_ops = &intel_dma_ops;
2330 return 0;