hwmon: applesmc: prolong status wait
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / pci / intel-iommu.c
blob8b51e10b7783d589b1d7893bb6febd052b3ee23b
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/slab.h>
27 #include <linux/irq.h>
28 #include <linux/interrupt.h>
29 #include <linux/sysdev.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
41 #include "pci.h"
43 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
44 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
46 #define IOAPIC_RANGE_START (0xfee00000)
47 #define IOAPIC_RANGE_END (0xfeefffff)
48 #define IOVA_START_ADDR (0x1000)
50 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
52 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55 static void flush_unmaps_timeout(unsigned long data);
57 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
59 #define HIGH_WATER_MARK 250
60 struct deferred_flush_tables {
61 int next;
62 struct iova *iova[HIGH_WATER_MARK];
63 struct dmar_domain *domain[HIGH_WATER_MARK];
66 static struct deferred_flush_tables *deferred_flush;
68 /* bitmap for indexing intel_iommus */
69 static int g_num_of_iommus;
71 static DEFINE_SPINLOCK(async_umap_flush_lock);
72 static LIST_HEAD(unmaps_to_do);
74 static int timer_on;
75 static long list_size;
77 static void domain_remove_dev_info(struct dmar_domain *domain);
79 int dmar_disabled;
80 static int __initdata dmar_map_gfx = 1;
81 static int dmar_forcedac;
82 static int intel_iommu_strict;
84 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
85 static DEFINE_SPINLOCK(device_domain_lock);
86 static LIST_HEAD(device_domain_list);
88 static int __init intel_iommu_setup(char *str)
90 if (!str)
91 return -EINVAL;
92 while (*str) {
93 if (!strncmp(str, "off", 3)) {
94 dmar_disabled = 1;
95 printk(KERN_INFO"Intel-IOMMU: disabled\n");
96 } else if (!strncmp(str, "igfx_off", 8)) {
97 dmar_map_gfx = 0;
98 printk(KERN_INFO
99 "Intel-IOMMU: disable GFX device mapping\n");
100 } else if (!strncmp(str, "forcedac", 8)) {
101 printk(KERN_INFO
102 "Intel-IOMMU: Forcing DAC for PCI devices\n");
103 dmar_forcedac = 1;
104 } else if (!strncmp(str, "strict", 6)) {
105 printk(KERN_INFO
106 "Intel-IOMMU: disable batched IOTLB flush\n");
107 intel_iommu_strict = 1;
110 str += strcspn(str, ",");
111 while (*str == ',')
112 str++;
114 return 0;
116 __setup("intel_iommu=", intel_iommu_setup);
118 static struct kmem_cache *iommu_domain_cache;
119 static struct kmem_cache *iommu_devinfo_cache;
120 static struct kmem_cache *iommu_iova_cache;
122 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
124 unsigned int flags;
125 void *vaddr;
127 /* trying to avoid low memory issues */
128 flags = current->flags & PF_MEMALLOC;
129 current->flags |= PF_MEMALLOC;
130 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
131 current->flags &= (~PF_MEMALLOC | flags);
132 return vaddr;
136 static inline void *alloc_pgtable_page(void)
138 unsigned int flags;
139 void *vaddr;
141 /* trying to avoid low memory issues */
142 flags = current->flags & PF_MEMALLOC;
143 current->flags |= PF_MEMALLOC;
144 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
145 current->flags &= (~PF_MEMALLOC | flags);
146 return vaddr;
149 static inline void free_pgtable_page(void *vaddr)
151 free_page((unsigned long)vaddr);
154 static inline void *alloc_domain_mem(void)
156 return iommu_kmem_cache_alloc(iommu_domain_cache);
159 static void free_domain_mem(void *vaddr)
161 kmem_cache_free(iommu_domain_cache, vaddr);
164 static inline void * alloc_devinfo_mem(void)
166 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
169 static inline void free_devinfo_mem(void *vaddr)
171 kmem_cache_free(iommu_devinfo_cache, vaddr);
174 struct iova *alloc_iova_mem(void)
176 return iommu_kmem_cache_alloc(iommu_iova_cache);
179 void free_iova_mem(struct iova *iova)
181 kmem_cache_free(iommu_iova_cache, iova);
184 /* Gets context entry for a given bus and devfn */
185 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
186 u8 bus, u8 devfn)
188 struct root_entry *root;
189 struct context_entry *context;
190 unsigned long phy_addr;
191 unsigned long flags;
193 spin_lock_irqsave(&iommu->lock, flags);
194 root = &iommu->root_entry[bus];
195 context = get_context_addr_from_root(root);
196 if (!context) {
197 context = (struct context_entry *)alloc_pgtable_page();
198 if (!context) {
199 spin_unlock_irqrestore(&iommu->lock, flags);
200 return NULL;
202 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
203 phy_addr = virt_to_phys((void *)context);
204 set_root_value(root, phy_addr);
205 set_root_present(root);
206 __iommu_flush_cache(iommu, root, sizeof(*root));
208 spin_unlock_irqrestore(&iommu->lock, flags);
209 return &context[devfn];
212 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
214 struct root_entry *root;
215 struct context_entry *context;
216 int ret;
217 unsigned long flags;
219 spin_lock_irqsave(&iommu->lock, flags);
220 root = &iommu->root_entry[bus];
221 context = get_context_addr_from_root(root);
222 if (!context) {
223 ret = 0;
224 goto out;
226 ret = context_present(context[devfn]);
227 out:
228 spin_unlock_irqrestore(&iommu->lock, flags);
229 return ret;
232 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
234 struct root_entry *root;
235 struct context_entry *context;
236 unsigned long flags;
238 spin_lock_irqsave(&iommu->lock, flags);
239 root = &iommu->root_entry[bus];
240 context = get_context_addr_from_root(root);
241 if (context) {
242 context_clear_entry(context[devfn]);
243 __iommu_flush_cache(iommu, &context[devfn], \
244 sizeof(*context));
246 spin_unlock_irqrestore(&iommu->lock, flags);
249 static void free_context_table(struct intel_iommu *iommu)
251 struct root_entry *root;
252 int i;
253 unsigned long flags;
254 struct context_entry *context;
256 spin_lock_irqsave(&iommu->lock, flags);
257 if (!iommu->root_entry) {
258 goto out;
260 for (i = 0; i < ROOT_ENTRY_NR; i++) {
261 root = &iommu->root_entry[i];
262 context = get_context_addr_from_root(root);
263 if (context)
264 free_pgtable_page(context);
266 free_pgtable_page(iommu->root_entry);
267 iommu->root_entry = NULL;
268 out:
269 spin_unlock_irqrestore(&iommu->lock, flags);
272 /* page table handling */
273 #define LEVEL_STRIDE (9)
274 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
276 static inline int agaw_to_level(int agaw)
278 return agaw + 2;
281 static inline int agaw_to_width(int agaw)
283 return 30 + agaw * LEVEL_STRIDE;
287 static inline int width_to_agaw(int width)
289 return (width - 30) / LEVEL_STRIDE;
292 static inline unsigned int level_to_offset_bits(int level)
294 return (12 + (level - 1) * LEVEL_STRIDE);
297 static inline int address_level_offset(u64 addr, int level)
299 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
302 static inline u64 level_mask(int level)
304 return ((u64)-1 << level_to_offset_bits(level));
307 static inline u64 level_size(int level)
309 return ((u64)1 << level_to_offset_bits(level));
312 static inline u64 align_to_level(u64 addr, int level)
314 return ((addr + level_size(level) - 1) & level_mask(level));
317 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
319 int addr_width = agaw_to_width(domain->agaw);
320 struct dma_pte *parent, *pte = NULL;
321 int level = agaw_to_level(domain->agaw);
322 int offset;
323 unsigned long flags;
325 BUG_ON(!domain->pgd);
327 addr &= (((u64)1) << addr_width) - 1;
328 parent = domain->pgd;
330 spin_lock_irqsave(&domain->mapping_lock, flags);
331 while (level > 0) {
332 void *tmp_page;
334 offset = address_level_offset(addr, level);
335 pte = &parent[offset];
336 if (level == 1)
337 break;
339 if (!dma_pte_present(*pte)) {
340 tmp_page = alloc_pgtable_page();
342 if (!tmp_page) {
343 spin_unlock_irqrestore(&domain->mapping_lock,
344 flags);
345 return NULL;
347 __iommu_flush_cache(domain->iommu, tmp_page,
348 PAGE_SIZE_4K);
349 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
351 * high level table always sets r/w, last level page
352 * table control read/write
354 dma_set_pte_readable(*pte);
355 dma_set_pte_writable(*pte);
356 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
358 parent = phys_to_virt(dma_pte_addr(*pte));
359 level--;
362 spin_unlock_irqrestore(&domain->mapping_lock, flags);
363 return pte;
366 /* return address's pte at specific level */
367 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
368 int level)
370 struct dma_pte *parent, *pte = NULL;
371 int total = agaw_to_level(domain->agaw);
372 int offset;
374 parent = domain->pgd;
375 while (level <= total) {
376 offset = address_level_offset(addr, total);
377 pte = &parent[offset];
378 if (level == total)
379 return pte;
381 if (!dma_pte_present(*pte))
382 break;
383 parent = phys_to_virt(dma_pte_addr(*pte));
384 total--;
386 return NULL;
389 /* clear one page's page table */
390 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
392 struct dma_pte *pte = NULL;
394 /* get last level pte */
395 pte = dma_addr_level_pte(domain, addr, 1);
397 if (pte) {
398 dma_clear_pte(*pte);
399 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
403 /* clear last level pte, a tlb flush should be followed */
404 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
406 int addr_width = agaw_to_width(domain->agaw);
408 start &= (((u64)1) << addr_width) - 1;
409 end &= (((u64)1) << addr_width) - 1;
410 /* in case it's partial page */
411 start = PAGE_ALIGN_4K(start);
412 end &= PAGE_MASK_4K;
414 /* we don't need lock here, nobody else touches the iova range */
415 while (start < end) {
416 dma_pte_clear_one(domain, start);
417 start += PAGE_SIZE_4K;
421 /* free page table pages. last level pte should already be cleared */
422 static void dma_pte_free_pagetable(struct dmar_domain *domain,
423 u64 start, u64 end)
425 int addr_width = agaw_to_width(domain->agaw);
426 struct dma_pte *pte;
427 int total = agaw_to_level(domain->agaw);
428 int level;
429 u64 tmp;
431 start &= (((u64)1) << addr_width) - 1;
432 end &= (((u64)1) << addr_width) - 1;
434 /* we don't need lock here, nobody else touches the iova range */
435 level = 2;
436 while (level <= total) {
437 tmp = align_to_level(start, level);
438 if (tmp >= end || (tmp + level_size(level) > end))
439 return;
441 while (tmp < end) {
442 pte = dma_addr_level_pte(domain, tmp, level);
443 if (pte) {
444 free_pgtable_page(
445 phys_to_virt(dma_pte_addr(*pte)));
446 dma_clear_pte(*pte);
447 __iommu_flush_cache(domain->iommu,
448 pte, sizeof(*pte));
450 tmp += level_size(level);
452 level++;
454 /* free pgd */
455 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
456 free_pgtable_page(domain->pgd);
457 domain->pgd = NULL;
461 /* iommu handling */
462 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
464 struct root_entry *root;
465 unsigned long flags;
467 root = (struct root_entry *)alloc_pgtable_page();
468 if (!root)
469 return -ENOMEM;
471 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
473 spin_lock_irqsave(&iommu->lock, flags);
474 iommu->root_entry = root;
475 spin_unlock_irqrestore(&iommu->lock, flags);
477 return 0;
480 static void iommu_set_root_entry(struct intel_iommu *iommu)
482 void *addr;
483 u32 cmd, sts;
484 unsigned long flag;
486 addr = iommu->root_entry;
488 spin_lock_irqsave(&iommu->register_lock, flag);
489 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
491 cmd = iommu->gcmd | DMA_GCMD_SRTP;
492 writel(cmd, iommu->reg + DMAR_GCMD_REG);
494 /* Make sure hardware complete it */
495 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
496 readl, (sts & DMA_GSTS_RTPS), sts);
498 spin_unlock_irqrestore(&iommu->register_lock, flag);
501 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
503 u32 val;
504 unsigned long flag;
506 if (!cap_rwbf(iommu->cap))
507 return;
508 val = iommu->gcmd | DMA_GCMD_WBF;
510 spin_lock_irqsave(&iommu->register_lock, flag);
511 writel(val, iommu->reg + DMAR_GCMD_REG);
513 /* Make sure hardware complete it */
514 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
515 readl, (!(val & DMA_GSTS_WBFS)), val);
517 spin_unlock_irqrestore(&iommu->register_lock, flag);
520 /* return value determine if we need a write buffer flush */
521 static int __iommu_flush_context(struct intel_iommu *iommu,
522 u16 did, u16 source_id, u8 function_mask, u64 type,
523 int non_present_entry_flush)
525 u64 val = 0;
526 unsigned long flag;
529 * In the non-present entry flush case, if hardware doesn't cache
530 * non-present entry we do nothing and if hardware cache non-present
531 * entry, we flush entries of domain 0 (the domain id is used to cache
532 * any non-present entries)
534 if (non_present_entry_flush) {
535 if (!cap_caching_mode(iommu->cap))
536 return 1;
537 else
538 did = 0;
541 switch (type) {
542 case DMA_CCMD_GLOBAL_INVL:
543 val = DMA_CCMD_GLOBAL_INVL;
544 break;
545 case DMA_CCMD_DOMAIN_INVL:
546 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
547 break;
548 case DMA_CCMD_DEVICE_INVL:
549 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
550 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
551 break;
552 default:
553 BUG();
555 val |= DMA_CCMD_ICC;
557 spin_lock_irqsave(&iommu->register_lock, flag);
558 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
560 /* Make sure hardware complete it */
561 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
562 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
564 spin_unlock_irqrestore(&iommu->register_lock, flag);
566 /* flush context entry will implicitly flush write buffer */
567 return 0;
570 static int inline iommu_flush_context_global(struct intel_iommu *iommu,
571 int non_present_entry_flush)
573 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
574 non_present_entry_flush);
577 static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
578 int non_present_entry_flush)
580 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
581 non_present_entry_flush);
584 static int inline iommu_flush_context_device(struct intel_iommu *iommu,
585 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
587 return __iommu_flush_context(iommu, did, source_id, function_mask,
588 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
591 /* return value determine if we need a write buffer flush */
592 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
593 u64 addr, unsigned int size_order, u64 type,
594 int non_present_entry_flush)
596 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
597 u64 val = 0, val_iva = 0;
598 unsigned long flag;
601 * In the non-present entry flush case, if hardware doesn't cache
602 * non-present entry we do nothing and if hardware cache non-present
603 * entry, we flush entries of domain 0 (the domain id is used to cache
604 * any non-present entries)
606 if (non_present_entry_flush) {
607 if (!cap_caching_mode(iommu->cap))
608 return 1;
609 else
610 did = 0;
613 switch (type) {
614 case DMA_TLB_GLOBAL_FLUSH:
615 /* global flush doesn't need set IVA_REG */
616 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
617 break;
618 case DMA_TLB_DSI_FLUSH:
619 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
620 break;
621 case DMA_TLB_PSI_FLUSH:
622 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
623 /* Note: always flush non-leaf currently */
624 val_iva = size_order | addr;
625 break;
626 default:
627 BUG();
629 /* Note: set drain read/write */
630 #if 0
632 * This is probably to be super secure.. Looks like we can
633 * ignore it without any impact.
635 if (cap_read_drain(iommu->cap))
636 val |= DMA_TLB_READ_DRAIN;
637 #endif
638 if (cap_write_drain(iommu->cap))
639 val |= DMA_TLB_WRITE_DRAIN;
641 spin_lock_irqsave(&iommu->register_lock, flag);
642 /* Note: Only uses first TLB reg currently */
643 if (val_iva)
644 dmar_writeq(iommu->reg + tlb_offset, val_iva);
645 dmar_writeq(iommu->reg + tlb_offset + 8, val);
647 /* Make sure hardware complete it */
648 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
649 dmar_readq, (!(val & DMA_TLB_IVT)), val);
651 spin_unlock_irqrestore(&iommu->register_lock, flag);
653 /* check IOTLB invalidation granularity */
654 if (DMA_TLB_IAIG(val) == 0)
655 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
656 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
657 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
658 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
659 /* flush iotlb entry will implicitly flush write buffer */
660 return 0;
663 static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
664 int non_present_entry_flush)
666 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
667 non_present_entry_flush);
670 static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
671 int non_present_entry_flush)
673 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
674 non_present_entry_flush);
677 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
678 u64 addr, unsigned int pages, int non_present_entry_flush)
680 unsigned int mask;
682 BUG_ON(addr & (~PAGE_MASK_4K));
683 BUG_ON(pages == 0);
685 /* Fallback to domain selective flush if no PSI support */
686 if (!cap_pgsel_inv(iommu->cap))
687 return iommu_flush_iotlb_dsi(iommu, did,
688 non_present_entry_flush);
691 * PSI requires page size to be 2 ^ x, and the base address is naturally
692 * aligned to the size
694 mask = ilog2(__roundup_pow_of_two(pages));
695 /* Fallback to domain selective flush if size is too big */
696 if (mask > cap_max_amask_val(iommu->cap))
697 return iommu_flush_iotlb_dsi(iommu, did,
698 non_present_entry_flush);
700 return __iommu_flush_iotlb(iommu, did, addr, mask,
701 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
704 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
706 u32 pmen;
707 unsigned long flags;
709 spin_lock_irqsave(&iommu->register_lock, flags);
710 pmen = readl(iommu->reg + DMAR_PMEN_REG);
711 pmen &= ~DMA_PMEN_EPM;
712 writel(pmen, iommu->reg + DMAR_PMEN_REG);
714 /* wait for the protected region status bit to clear */
715 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
716 readl, !(pmen & DMA_PMEN_PRS), pmen);
718 spin_unlock_irqrestore(&iommu->register_lock, flags);
721 static int iommu_enable_translation(struct intel_iommu *iommu)
723 u32 sts;
724 unsigned long flags;
726 spin_lock_irqsave(&iommu->register_lock, flags);
727 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
729 /* Make sure hardware complete it */
730 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
731 readl, (sts & DMA_GSTS_TES), sts);
733 iommu->gcmd |= DMA_GCMD_TE;
734 spin_unlock_irqrestore(&iommu->register_lock, flags);
735 return 0;
738 static int iommu_disable_translation(struct intel_iommu *iommu)
740 u32 sts;
741 unsigned long flag;
743 spin_lock_irqsave(&iommu->register_lock, flag);
744 iommu->gcmd &= ~DMA_GCMD_TE;
745 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
747 /* Make sure hardware complete it */
748 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
749 readl, (!(sts & DMA_GSTS_TES)), sts);
751 spin_unlock_irqrestore(&iommu->register_lock, flag);
752 return 0;
755 /* iommu interrupt handling. Most stuff are MSI-like. */
757 static const char *fault_reason_strings[] =
759 "Software",
760 "Present bit in root entry is clear",
761 "Present bit in context entry is clear",
762 "Invalid context entry",
763 "Access beyond MGAW",
764 "PTE Write access is not set",
765 "PTE Read access is not set",
766 "Next page table ptr is invalid",
767 "Root table address invalid",
768 "Context table ptr is invalid",
769 "non-zero reserved fields in RTP",
770 "non-zero reserved fields in CTP",
771 "non-zero reserved fields in PTE",
773 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
775 const char *dmar_get_fault_reason(u8 fault_reason)
777 if (fault_reason > MAX_FAULT_REASON_IDX)
778 return "Unknown";
779 else
780 return fault_reason_strings[fault_reason];
783 void dmar_msi_unmask(unsigned int irq)
785 struct intel_iommu *iommu = get_irq_data(irq);
786 unsigned long flag;
788 /* unmask it */
789 spin_lock_irqsave(&iommu->register_lock, flag);
790 writel(0, iommu->reg + DMAR_FECTL_REG);
791 /* Read a reg to force flush the post write */
792 readl(iommu->reg + DMAR_FECTL_REG);
793 spin_unlock_irqrestore(&iommu->register_lock, flag);
796 void dmar_msi_mask(unsigned int irq)
798 unsigned long flag;
799 struct intel_iommu *iommu = get_irq_data(irq);
801 /* mask it */
802 spin_lock_irqsave(&iommu->register_lock, flag);
803 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
804 /* Read a reg to force flush the post write */
805 readl(iommu->reg + DMAR_FECTL_REG);
806 spin_unlock_irqrestore(&iommu->register_lock, flag);
809 void dmar_msi_write(int irq, struct msi_msg *msg)
811 struct intel_iommu *iommu = get_irq_data(irq);
812 unsigned long flag;
814 spin_lock_irqsave(&iommu->register_lock, flag);
815 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
816 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
817 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
818 spin_unlock_irqrestore(&iommu->register_lock, flag);
821 void dmar_msi_read(int irq, struct msi_msg *msg)
823 struct intel_iommu *iommu = get_irq_data(irq);
824 unsigned long flag;
826 spin_lock_irqsave(&iommu->register_lock, flag);
827 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
828 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
829 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
830 spin_unlock_irqrestore(&iommu->register_lock, flag);
833 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
834 u8 fault_reason, u16 source_id, u64 addr)
836 const char *reason;
838 reason = dmar_get_fault_reason(fault_reason);
840 printk(KERN_ERR
841 "DMAR:[%s] Request device [%02x:%02x.%d] "
842 "fault addr %llx \n"
843 "DMAR:[fault reason %02d] %s\n",
844 (type ? "DMA Read" : "DMA Write"),
845 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
846 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
847 return 0;
850 #define PRIMARY_FAULT_REG_LEN (16)
851 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
853 struct intel_iommu *iommu = dev_id;
854 int reg, fault_index;
855 u32 fault_status;
856 unsigned long flag;
858 spin_lock_irqsave(&iommu->register_lock, flag);
859 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
861 /* TBD: ignore advanced fault log currently */
862 if (!(fault_status & DMA_FSTS_PPF))
863 goto clear_overflow;
865 fault_index = dma_fsts_fault_record_index(fault_status);
866 reg = cap_fault_reg_offset(iommu->cap);
867 while (1) {
868 u8 fault_reason;
869 u16 source_id;
870 u64 guest_addr;
871 int type;
872 u32 data;
874 /* highest 32 bits */
875 data = readl(iommu->reg + reg +
876 fault_index * PRIMARY_FAULT_REG_LEN + 12);
877 if (!(data & DMA_FRCD_F))
878 break;
880 fault_reason = dma_frcd_fault_reason(data);
881 type = dma_frcd_type(data);
883 data = readl(iommu->reg + reg +
884 fault_index * PRIMARY_FAULT_REG_LEN + 8);
885 source_id = dma_frcd_source_id(data);
887 guest_addr = dmar_readq(iommu->reg + reg +
888 fault_index * PRIMARY_FAULT_REG_LEN);
889 guest_addr = dma_frcd_page_addr(guest_addr);
890 /* clear the fault */
891 writel(DMA_FRCD_F, iommu->reg + reg +
892 fault_index * PRIMARY_FAULT_REG_LEN + 12);
894 spin_unlock_irqrestore(&iommu->register_lock, flag);
896 iommu_page_fault_do_one(iommu, type, fault_reason,
897 source_id, guest_addr);
899 fault_index++;
900 if (fault_index > cap_num_fault_regs(iommu->cap))
901 fault_index = 0;
902 spin_lock_irqsave(&iommu->register_lock, flag);
904 clear_overflow:
905 /* clear primary fault overflow */
906 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
907 if (fault_status & DMA_FSTS_PFO)
908 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
910 spin_unlock_irqrestore(&iommu->register_lock, flag);
911 return IRQ_HANDLED;
914 int dmar_set_interrupt(struct intel_iommu *iommu)
916 int irq, ret;
918 irq = create_irq();
919 if (!irq) {
920 printk(KERN_ERR "IOMMU: no free vectors\n");
921 return -EINVAL;
924 set_irq_data(irq, iommu);
925 iommu->irq = irq;
927 ret = arch_setup_dmar_msi(irq);
928 if (ret) {
929 set_irq_data(irq, NULL);
930 iommu->irq = 0;
931 destroy_irq(irq);
932 return 0;
935 /* Force fault register is cleared */
936 iommu_page_fault(irq, iommu);
938 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
939 if (ret)
940 printk(KERN_ERR "IOMMU: can't request irq\n");
941 return ret;
944 static int iommu_init_domains(struct intel_iommu *iommu)
946 unsigned long ndomains;
947 unsigned long nlongs;
949 ndomains = cap_ndoms(iommu->cap);
950 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
951 nlongs = BITS_TO_LONGS(ndomains);
953 /* TBD: there might be 64K domains,
954 * consider other allocation for future chip
956 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
957 if (!iommu->domain_ids) {
958 printk(KERN_ERR "Allocating domain id array failed\n");
959 return -ENOMEM;
961 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
962 GFP_KERNEL);
963 if (!iommu->domains) {
964 printk(KERN_ERR "Allocating domain array failed\n");
965 kfree(iommu->domain_ids);
966 return -ENOMEM;
969 spin_lock_init(&iommu->lock);
972 * if Caching mode is set, then invalid translations are tagged
973 * with domainid 0. Hence we need to pre-allocate it.
975 if (cap_caching_mode(iommu->cap))
976 set_bit(0, iommu->domain_ids);
977 return 0;
981 static void domain_exit(struct dmar_domain *domain);
983 void free_dmar_iommu(struct intel_iommu *iommu)
985 struct dmar_domain *domain;
986 int i;
988 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
989 for (; i < cap_ndoms(iommu->cap); ) {
990 domain = iommu->domains[i];
991 clear_bit(i, iommu->domain_ids);
992 domain_exit(domain);
993 i = find_next_bit(iommu->domain_ids,
994 cap_ndoms(iommu->cap), i+1);
997 if (iommu->gcmd & DMA_GCMD_TE)
998 iommu_disable_translation(iommu);
1000 if (iommu->irq) {
1001 set_irq_data(iommu->irq, NULL);
1002 /* This will mask the irq */
1003 free_irq(iommu->irq, iommu);
1004 destroy_irq(iommu->irq);
1007 kfree(iommu->domains);
1008 kfree(iommu->domain_ids);
1010 /* free context mapping */
1011 free_context_table(iommu);
1014 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1016 unsigned long num;
1017 unsigned long ndomains;
1018 struct dmar_domain *domain;
1019 unsigned long flags;
1021 domain = alloc_domain_mem();
1022 if (!domain)
1023 return NULL;
1025 ndomains = cap_ndoms(iommu->cap);
1027 spin_lock_irqsave(&iommu->lock, flags);
1028 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1029 if (num >= ndomains) {
1030 spin_unlock_irqrestore(&iommu->lock, flags);
1031 free_domain_mem(domain);
1032 printk(KERN_ERR "IOMMU: no free domain ids\n");
1033 return NULL;
1036 set_bit(num, iommu->domain_ids);
1037 domain->id = num;
1038 domain->iommu = iommu;
1039 iommu->domains[num] = domain;
1040 spin_unlock_irqrestore(&iommu->lock, flags);
1042 return domain;
1045 static void iommu_free_domain(struct dmar_domain *domain)
1047 unsigned long flags;
1049 spin_lock_irqsave(&domain->iommu->lock, flags);
1050 clear_bit(domain->id, domain->iommu->domain_ids);
1051 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1054 static struct iova_domain reserved_iova_list;
1055 static struct lock_class_key reserved_alloc_key;
1056 static struct lock_class_key reserved_rbtree_key;
1058 static void dmar_init_reserved_ranges(void)
1060 struct pci_dev *pdev = NULL;
1061 struct iova *iova;
1062 int i;
1063 u64 addr, size;
1065 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1067 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1068 &reserved_alloc_key);
1069 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1070 &reserved_rbtree_key);
1072 /* IOAPIC ranges shouldn't be accessed by DMA */
1073 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1074 IOVA_PFN(IOAPIC_RANGE_END));
1075 if (!iova)
1076 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1078 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1079 for_each_pci_dev(pdev) {
1080 struct resource *r;
1082 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1083 r = &pdev->resource[i];
1084 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1085 continue;
1086 addr = r->start;
1087 addr &= PAGE_MASK_4K;
1088 size = r->end - addr;
1089 size = PAGE_ALIGN_4K(size);
1090 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1091 IOVA_PFN(size + addr) - 1);
1092 if (!iova)
1093 printk(KERN_ERR "Reserve iova failed\n");
1099 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1101 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1104 static inline int guestwidth_to_adjustwidth(int gaw)
1106 int agaw;
1107 int r = (gaw - 12) % 9;
1109 if (r == 0)
1110 agaw = gaw;
1111 else
1112 agaw = gaw + 9 - r;
1113 if (agaw > 64)
1114 agaw = 64;
1115 return agaw;
1118 static int domain_init(struct dmar_domain *domain, int guest_width)
1120 struct intel_iommu *iommu;
1121 int adjust_width, agaw;
1122 unsigned long sagaw;
1124 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1125 spin_lock_init(&domain->mapping_lock);
1127 domain_reserve_special_ranges(domain);
1129 /* calculate AGAW */
1130 iommu = domain->iommu;
1131 if (guest_width > cap_mgaw(iommu->cap))
1132 guest_width = cap_mgaw(iommu->cap);
1133 domain->gaw = guest_width;
1134 adjust_width = guestwidth_to_adjustwidth(guest_width);
1135 agaw = width_to_agaw(adjust_width);
1136 sagaw = cap_sagaw(iommu->cap);
1137 if (!test_bit(agaw, &sagaw)) {
1138 /* hardware doesn't support it, choose a bigger one */
1139 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1140 agaw = find_next_bit(&sagaw, 5, agaw);
1141 if (agaw >= 5)
1142 return -ENODEV;
1144 domain->agaw = agaw;
1145 INIT_LIST_HEAD(&domain->devices);
1147 /* always allocate the top pgd */
1148 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1149 if (!domain->pgd)
1150 return -ENOMEM;
1151 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1152 return 0;
1155 static void domain_exit(struct dmar_domain *domain)
1157 u64 end;
1159 /* Domain 0 is reserved, so dont process it */
1160 if (!domain)
1161 return;
1163 domain_remove_dev_info(domain);
1164 /* destroy iovas */
1165 put_iova_domain(&domain->iovad);
1166 end = DOMAIN_MAX_ADDR(domain->gaw);
1167 end = end & (~PAGE_MASK_4K);
1169 /* clear ptes */
1170 dma_pte_clear_range(domain, 0, end);
1172 /* free page tables */
1173 dma_pte_free_pagetable(domain, 0, end);
1175 iommu_free_domain(domain);
1176 free_domain_mem(domain);
1179 static int domain_context_mapping_one(struct dmar_domain *domain,
1180 u8 bus, u8 devfn)
1182 struct context_entry *context;
1183 struct intel_iommu *iommu = domain->iommu;
1184 unsigned long flags;
1186 pr_debug("Set context mapping for %02x:%02x.%d\n",
1187 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1188 BUG_ON(!domain->pgd);
1189 context = device_to_context_entry(iommu, bus, devfn);
1190 if (!context)
1191 return -ENOMEM;
1192 spin_lock_irqsave(&iommu->lock, flags);
1193 if (context_present(*context)) {
1194 spin_unlock_irqrestore(&iommu->lock, flags);
1195 return 0;
1198 context_set_domain_id(*context, domain->id);
1199 context_set_address_width(*context, domain->agaw);
1200 context_set_address_root(*context, virt_to_phys(domain->pgd));
1201 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1202 context_set_fault_enable(*context);
1203 context_set_present(*context);
1204 __iommu_flush_cache(iommu, context, sizeof(*context));
1206 /* it's a non-present to present mapping */
1207 if (iommu_flush_context_device(iommu, domain->id,
1208 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1209 iommu_flush_write_buffer(iommu);
1210 else
1211 iommu_flush_iotlb_dsi(iommu, 0, 0);
1212 spin_unlock_irqrestore(&iommu->lock, flags);
1213 return 0;
1216 static int
1217 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1219 int ret;
1220 struct pci_dev *tmp, *parent;
1222 ret = domain_context_mapping_one(domain, pdev->bus->number,
1223 pdev->devfn);
1224 if (ret)
1225 return ret;
1227 /* dependent device mapping */
1228 tmp = pci_find_upstream_pcie_bridge(pdev);
1229 if (!tmp)
1230 return 0;
1231 /* Secondary interface's bus number and devfn 0 */
1232 parent = pdev->bus->self;
1233 while (parent != tmp) {
1234 ret = domain_context_mapping_one(domain, parent->bus->number,
1235 parent->devfn);
1236 if (ret)
1237 return ret;
1238 parent = parent->bus->self;
1240 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1241 return domain_context_mapping_one(domain,
1242 tmp->subordinate->number, 0);
1243 else /* this is a legacy PCI bridge */
1244 return domain_context_mapping_one(domain,
1245 tmp->bus->number, tmp->devfn);
1248 static int domain_context_mapped(struct dmar_domain *domain,
1249 struct pci_dev *pdev)
1251 int ret;
1252 struct pci_dev *tmp, *parent;
1254 ret = device_context_mapped(domain->iommu,
1255 pdev->bus->number, pdev->devfn);
1256 if (!ret)
1257 return ret;
1258 /* dependent device mapping */
1259 tmp = pci_find_upstream_pcie_bridge(pdev);
1260 if (!tmp)
1261 return ret;
1262 /* Secondary interface's bus number and devfn 0 */
1263 parent = pdev->bus->self;
1264 while (parent != tmp) {
1265 ret = device_context_mapped(domain->iommu, parent->bus->number,
1266 parent->devfn);
1267 if (!ret)
1268 return ret;
1269 parent = parent->bus->self;
1271 if (tmp->is_pcie)
1272 return device_context_mapped(domain->iommu,
1273 tmp->subordinate->number, 0);
1274 else
1275 return device_context_mapped(domain->iommu,
1276 tmp->bus->number, tmp->devfn);
1279 static int
1280 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1281 u64 hpa, size_t size, int prot)
1283 u64 start_pfn, end_pfn;
1284 struct dma_pte *pte;
1285 int index;
1287 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1288 return -EINVAL;
1289 iova &= PAGE_MASK_4K;
1290 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1291 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1292 index = 0;
1293 while (start_pfn < end_pfn) {
1294 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1295 if (!pte)
1296 return -ENOMEM;
1297 /* We don't need lock here, nobody else
1298 * touches the iova range
1300 BUG_ON(dma_pte_addr(*pte));
1301 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1302 dma_set_pte_prot(*pte, prot);
1303 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1304 start_pfn++;
1305 index++;
1307 return 0;
1310 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1312 clear_context_table(domain->iommu, bus, devfn);
1313 iommu_flush_context_global(domain->iommu, 0);
1314 iommu_flush_iotlb_global(domain->iommu, 0);
1317 static void domain_remove_dev_info(struct dmar_domain *domain)
1319 struct device_domain_info *info;
1320 unsigned long flags;
1322 spin_lock_irqsave(&device_domain_lock, flags);
1323 while (!list_empty(&domain->devices)) {
1324 info = list_entry(domain->devices.next,
1325 struct device_domain_info, link);
1326 list_del(&info->link);
1327 list_del(&info->global);
1328 if (info->dev)
1329 info->dev->dev.archdata.iommu = NULL;
1330 spin_unlock_irqrestore(&device_domain_lock, flags);
1332 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1333 free_devinfo_mem(info);
1335 spin_lock_irqsave(&device_domain_lock, flags);
1337 spin_unlock_irqrestore(&device_domain_lock, flags);
1341 * find_domain
1342 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1344 static struct dmar_domain *
1345 find_domain(struct pci_dev *pdev)
1347 struct device_domain_info *info;
1349 /* No lock here, assumes no domain exit in normal case */
1350 info = pdev->dev.archdata.iommu;
1351 if (info)
1352 return info->domain;
1353 return NULL;
1356 /* domain is initialized */
1357 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1359 struct dmar_domain *domain, *found = NULL;
1360 struct intel_iommu *iommu;
1361 struct dmar_drhd_unit *drhd;
1362 struct device_domain_info *info, *tmp;
1363 struct pci_dev *dev_tmp;
1364 unsigned long flags;
1365 int bus = 0, devfn = 0;
1367 domain = find_domain(pdev);
1368 if (domain)
1369 return domain;
1371 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1372 if (dev_tmp) {
1373 if (dev_tmp->is_pcie) {
1374 bus = dev_tmp->subordinate->number;
1375 devfn = 0;
1376 } else {
1377 bus = dev_tmp->bus->number;
1378 devfn = dev_tmp->devfn;
1380 spin_lock_irqsave(&device_domain_lock, flags);
1381 list_for_each_entry(info, &device_domain_list, global) {
1382 if (info->bus == bus && info->devfn == devfn) {
1383 found = info->domain;
1384 break;
1387 spin_unlock_irqrestore(&device_domain_lock, flags);
1388 /* pcie-pci bridge already has a domain, uses it */
1389 if (found) {
1390 domain = found;
1391 goto found_domain;
1395 /* Allocate new domain for the device */
1396 drhd = dmar_find_matched_drhd_unit(pdev);
1397 if (!drhd) {
1398 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1399 pci_name(pdev));
1400 return NULL;
1402 iommu = drhd->iommu;
1404 domain = iommu_alloc_domain(iommu);
1405 if (!domain)
1406 goto error;
1408 if (domain_init(domain, gaw)) {
1409 domain_exit(domain);
1410 goto error;
1413 /* register pcie-to-pci device */
1414 if (dev_tmp) {
1415 info = alloc_devinfo_mem();
1416 if (!info) {
1417 domain_exit(domain);
1418 goto error;
1420 info->bus = bus;
1421 info->devfn = devfn;
1422 info->dev = NULL;
1423 info->domain = domain;
1424 /* This domain is shared by devices under p2p bridge */
1425 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1427 /* pcie-to-pci bridge already has a domain, uses it */
1428 found = NULL;
1429 spin_lock_irqsave(&device_domain_lock, flags);
1430 list_for_each_entry(tmp, &device_domain_list, global) {
1431 if (tmp->bus == bus && tmp->devfn == devfn) {
1432 found = tmp->domain;
1433 break;
1436 if (found) {
1437 free_devinfo_mem(info);
1438 domain_exit(domain);
1439 domain = found;
1440 } else {
1441 list_add(&info->link, &domain->devices);
1442 list_add(&info->global, &device_domain_list);
1444 spin_unlock_irqrestore(&device_domain_lock, flags);
1447 found_domain:
1448 info = alloc_devinfo_mem();
1449 if (!info)
1450 goto error;
1451 info->bus = pdev->bus->number;
1452 info->devfn = pdev->devfn;
1453 info->dev = pdev;
1454 info->domain = domain;
1455 spin_lock_irqsave(&device_domain_lock, flags);
1456 /* somebody is fast */
1457 found = find_domain(pdev);
1458 if (found != NULL) {
1459 spin_unlock_irqrestore(&device_domain_lock, flags);
1460 if (found != domain) {
1461 domain_exit(domain);
1462 domain = found;
1464 free_devinfo_mem(info);
1465 return domain;
1467 list_add(&info->link, &domain->devices);
1468 list_add(&info->global, &device_domain_list);
1469 pdev->dev.archdata.iommu = info;
1470 spin_unlock_irqrestore(&device_domain_lock, flags);
1471 return domain;
1472 error:
1473 /* recheck it here, maybe others set it */
1474 return find_domain(pdev);
1477 static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1479 struct dmar_domain *domain;
1480 unsigned long size;
1481 u64 base;
1482 int ret;
1484 printk(KERN_INFO
1485 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1486 pci_name(pdev), start, end);
1487 /* page table init */
1488 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1489 if (!domain)
1490 return -ENOMEM;
1492 /* The address might not be aligned */
1493 base = start & PAGE_MASK_4K;
1494 size = end - base;
1495 size = PAGE_ALIGN_4K(size);
1496 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1497 IOVA_PFN(base + size) - 1)) {
1498 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1499 ret = -ENOMEM;
1500 goto error;
1503 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1504 size, base, pci_name(pdev));
1506 * RMRR range might have overlap with physical memory range,
1507 * clear it first
1509 dma_pte_clear_range(domain, base, base + size);
1511 ret = domain_page_mapping(domain, base, base, size,
1512 DMA_PTE_READ|DMA_PTE_WRITE);
1513 if (ret)
1514 goto error;
1516 /* context entry init */
1517 ret = domain_context_mapping(domain, pdev);
1518 if (!ret)
1519 return 0;
1520 error:
1521 domain_exit(domain);
1522 return ret;
1526 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1527 struct pci_dev *pdev)
1529 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1530 return 0;
1531 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1532 rmrr->end_address + 1);
1535 #ifdef CONFIG_DMAR_GFX_WA
1536 struct iommu_prepare_data {
1537 struct pci_dev *pdev;
1538 int ret;
1541 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1542 unsigned long end_pfn, void *datax)
1544 struct iommu_prepare_data *data;
1546 data = (struct iommu_prepare_data *)datax;
1548 data->ret = iommu_prepare_identity_map(data->pdev,
1549 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1550 return data->ret;
1554 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1556 int nid;
1557 struct iommu_prepare_data data;
1559 data.pdev = pdev;
1560 data.ret = 0;
1562 for_each_online_node(nid) {
1563 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1564 if (data.ret)
1565 return data.ret;
1567 return data.ret;
1570 static void __init iommu_prepare_gfx_mapping(void)
1572 struct pci_dev *pdev = NULL;
1573 int ret;
1575 for_each_pci_dev(pdev) {
1576 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1577 !IS_GFX_DEVICE(pdev))
1578 continue;
1579 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1580 pci_name(pdev));
1581 ret = iommu_prepare_with_active_regions(pdev);
1582 if (ret)
1583 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1586 #endif
1588 #ifdef CONFIG_DMAR_FLOPPY_WA
1589 static inline void iommu_prepare_isa(void)
1591 struct pci_dev *pdev;
1592 int ret;
1594 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1595 if (!pdev)
1596 return;
1598 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1599 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1601 if (ret)
1602 printk("IOMMU: Failed to create 0-64M identity map, "
1603 "floppy might not work\n");
1606 #else
1607 static inline void iommu_prepare_isa(void)
1609 return;
1611 #endif /* !CONFIG_DMAR_FLPY_WA */
1613 int __init init_dmars(void)
1615 struct dmar_drhd_unit *drhd;
1616 struct dmar_rmrr_unit *rmrr;
1617 struct pci_dev *pdev;
1618 struct intel_iommu *iommu;
1619 int i, ret, unit = 0;
1622 * for each drhd
1623 * allocate root
1624 * initialize and program root entry to not present
1625 * endfor
1627 for_each_drhd_unit(drhd) {
1628 g_num_of_iommus++;
1630 * lock not needed as this is only incremented in the single
1631 * threaded kernel __init code path all other access are read
1632 * only
1636 deferred_flush = kzalloc(g_num_of_iommus *
1637 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1638 if (!deferred_flush) {
1639 ret = -ENOMEM;
1640 goto error;
1643 for_each_drhd_unit(drhd) {
1644 if (drhd->ignored)
1645 continue;
1647 iommu = drhd->iommu;
1649 ret = iommu_init_domains(iommu);
1650 if (ret)
1651 goto error;
1654 * TBD:
1655 * we could share the same root & context tables
1656 * amoung all IOMMU's. Need to Split it later.
1658 ret = iommu_alloc_root_entry(iommu);
1659 if (ret) {
1660 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1661 goto error;
1666 * For each rmrr
1667 * for each dev attached to rmrr
1668 * do
1669 * locate drhd for dev, alloc domain for dev
1670 * allocate free domain
1671 * allocate page table entries for rmrr
1672 * if context not allocated for bus
1673 * allocate and init context
1674 * set present in root table for this bus
1675 * init context with domain, translation etc
1676 * endfor
1677 * endfor
1679 for_each_rmrr_units(rmrr) {
1680 for (i = 0; i < rmrr->devices_cnt; i++) {
1681 pdev = rmrr->devices[i];
1682 /* some BIOS lists non-exist devices in DMAR table */
1683 if (!pdev)
1684 continue;
1685 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1686 if (ret)
1687 printk(KERN_ERR
1688 "IOMMU: mapping reserved region failed\n");
1692 iommu_prepare_gfx_mapping();
1694 iommu_prepare_isa();
1697 * for each drhd
1698 * enable fault log
1699 * global invalidate context cache
1700 * global invalidate iotlb
1701 * enable translation
1703 for_each_drhd_unit(drhd) {
1704 if (drhd->ignored)
1705 continue;
1706 iommu = drhd->iommu;
1707 sprintf (iommu->name, "dmar%d", unit++);
1709 iommu_flush_write_buffer(iommu);
1711 ret = dmar_set_interrupt(iommu);
1712 if (ret)
1713 goto error;
1715 iommu_set_root_entry(iommu);
1717 iommu_flush_context_global(iommu, 0);
1718 iommu_flush_iotlb_global(iommu, 0);
1720 iommu_disable_protect_mem_regions(iommu);
1722 ret = iommu_enable_translation(iommu);
1723 if (ret)
1724 goto error;
1727 return 0;
1728 error:
1729 for_each_drhd_unit(drhd) {
1730 if (drhd->ignored)
1731 continue;
1732 iommu = drhd->iommu;
1733 free_iommu(iommu);
1735 return ret;
1738 static inline u64 aligned_size(u64 host_addr, size_t size)
1740 u64 addr;
1741 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1742 return PAGE_ALIGN_4K(addr);
1745 struct iova *
1746 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1748 struct iova *piova;
1750 /* Make sure it's in range */
1751 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1752 if (!size || (IOVA_START_ADDR + size > end))
1753 return NULL;
1755 piova = alloc_iova(&domain->iovad,
1756 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1757 return piova;
1760 static struct iova *
1761 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1762 size_t size)
1764 struct pci_dev *pdev = to_pci_dev(dev);
1765 struct iova *iova = NULL;
1767 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1768 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1769 } else {
1771 * First try to allocate an io virtual address in
1772 * DMA_32BIT_MASK and if that fails then try allocating
1773 * from higher range
1775 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1776 if (!iova)
1777 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1780 if (!iova) {
1781 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1782 return NULL;
1785 return iova;
1788 static struct dmar_domain *
1789 get_valid_domain_for_dev(struct pci_dev *pdev)
1791 struct dmar_domain *domain;
1792 int ret;
1794 domain = get_domain_for_dev(pdev,
1795 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1796 if (!domain) {
1797 printk(KERN_ERR
1798 "Allocating domain for %s failed", pci_name(pdev));
1799 return NULL;
1802 /* make sure context mapping is ok */
1803 if (unlikely(!domain_context_mapped(domain, pdev))) {
1804 ret = domain_context_mapping(domain, pdev);
1805 if (ret) {
1806 printk(KERN_ERR
1807 "Domain context map for %s failed",
1808 pci_name(pdev));
1809 return NULL;
1813 return domain;
1816 static dma_addr_t
1817 intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
1819 struct pci_dev *pdev = to_pci_dev(hwdev);
1820 struct dmar_domain *domain;
1821 unsigned long start_paddr;
1822 struct iova *iova;
1823 int prot = 0;
1824 int ret;
1826 BUG_ON(dir == DMA_NONE);
1827 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1828 return paddr;
1830 domain = get_valid_domain_for_dev(pdev);
1831 if (!domain)
1832 return 0;
1834 size = aligned_size((u64)paddr, size);
1836 iova = __intel_alloc_iova(hwdev, domain, size);
1837 if (!iova)
1838 goto error;
1840 start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
1843 * Check if DMAR supports zero-length reads on write only
1844 * mappings..
1846 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1847 !cap_zlr(domain->iommu->cap))
1848 prot |= DMA_PTE_READ;
1849 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1850 prot |= DMA_PTE_WRITE;
1852 * paddr - (paddr + size) might be partial page, we should map the whole
1853 * page. Note: if two part of one page are separately mapped, we
1854 * might have two guest_addr mapping to the same host paddr, but this
1855 * is not a big problem
1857 ret = domain_page_mapping(domain, start_paddr,
1858 ((u64)paddr) & PAGE_MASK_4K, size, prot);
1859 if (ret)
1860 goto error;
1862 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1863 pci_name(pdev), size, (u64)paddr,
1864 size, (u64)start_paddr, dir);
1866 /* it's a non-present to present mapping */
1867 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1868 start_paddr, size >> PAGE_SHIFT_4K, 1);
1869 if (ret)
1870 iommu_flush_write_buffer(domain->iommu);
1872 return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
1874 error:
1875 if (iova)
1876 __free_iova(&domain->iovad, iova);
1877 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1878 pci_name(pdev), size, (u64)paddr, dir);
1879 return 0;
1882 static void flush_unmaps(void)
1884 int i, j;
1886 timer_on = 0;
1888 /* just flush them all */
1889 for (i = 0; i < g_num_of_iommus; i++) {
1890 if (deferred_flush[i].next) {
1891 struct intel_iommu *iommu =
1892 deferred_flush[i].domain[0]->iommu;
1894 iommu_flush_iotlb_global(iommu, 0);
1895 for (j = 0; j < deferred_flush[i].next; j++) {
1896 __free_iova(&deferred_flush[i].domain[j]->iovad,
1897 deferred_flush[i].iova[j]);
1899 deferred_flush[i].next = 0;
1903 list_size = 0;
1906 static void flush_unmaps_timeout(unsigned long data)
1908 unsigned long flags;
1910 spin_lock_irqsave(&async_umap_flush_lock, flags);
1911 flush_unmaps();
1912 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1915 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1917 unsigned long flags;
1918 int next, iommu_id;
1920 spin_lock_irqsave(&async_umap_flush_lock, flags);
1921 if (list_size == HIGH_WATER_MARK)
1922 flush_unmaps();
1924 iommu_id = dom->iommu->seq_id;
1926 next = deferred_flush[iommu_id].next;
1927 deferred_flush[iommu_id].domain[next] = dom;
1928 deferred_flush[iommu_id].iova[next] = iova;
1929 deferred_flush[iommu_id].next++;
1931 if (!timer_on) {
1932 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1933 timer_on = 1;
1935 list_size++;
1936 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1939 static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1940 size_t size, int dir)
1942 struct pci_dev *pdev = to_pci_dev(dev);
1943 struct dmar_domain *domain;
1944 unsigned long start_addr;
1945 struct iova *iova;
1947 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1948 return;
1949 domain = find_domain(pdev);
1950 BUG_ON(!domain);
1952 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1953 if (!iova)
1954 return;
1956 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1957 size = aligned_size((u64)dev_addr, size);
1959 pr_debug("Device %s unmapping: %lx@%llx\n",
1960 pci_name(pdev), size, (u64)start_addr);
1962 /* clear the whole page */
1963 dma_pte_clear_range(domain, start_addr, start_addr + size);
1964 /* free page tables */
1965 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1966 if (intel_iommu_strict) {
1967 if (iommu_flush_iotlb_psi(domain->iommu,
1968 domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
1969 iommu_flush_write_buffer(domain->iommu);
1970 /* free iova */
1971 __free_iova(&domain->iovad, iova);
1972 } else {
1973 add_unmap(domain, iova);
1975 * queue up the release of the unmap to save the 1/6th of the
1976 * cpu used up by the iotlb flush operation...
1981 static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1982 dma_addr_t *dma_handle, gfp_t flags)
1984 void *vaddr;
1985 int order;
1987 size = PAGE_ALIGN_4K(size);
1988 order = get_order(size);
1989 flags &= ~(GFP_DMA | GFP_DMA32);
1991 vaddr = (void *)__get_free_pages(flags, order);
1992 if (!vaddr)
1993 return NULL;
1994 memset(vaddr, 0, size);
1996 *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL);
1997 if (*dma_handle)
1998 return vaddr;
1999 free_pages((unsigned long)vaddr, order);
2000 return NULL;
2003 static void intel_free_coherent(struct device *hwdev, size_t size,
2004 void *vaddr, dma_addr_t dma_handle)
2006 int order;
2008 size = PAGE_ALIGN_4K(size);
2009 order = get_order(size);
2011 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2012 free_pages((unsigned long)vaddr, order);
2015 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2016 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2017 int nelems, int dir)
2019 int i;
2020 struct pci_dev *pdev = to_pci_dev(hwdev);
2021 struct dmar_domain *domain;
2022 unsigned long start_addr;
2023 struct iova *iova;
2024 size_t size = 0;
2025 void *addr;
2026 struct scatterlist *sg;
2028 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2029 return;
2031 domain = find_domain(pdev);
2033 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2034 if (!iova)
2035 return;
2036 for_each_sg(sglist, sg, nelems, i) {
2037 addr = SG_ENT_VIRT_ADDRESS(sg);
2038 size += aligned_size((u64)addr, sg->length);
2041 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2043 /* clear the whole page */
2044 dma_pte_clear_range(domain, start_addr, start_addr + size);
2045 /* free page tables */
2046 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2048 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2049 size >> PAGE_SHIFT_4K, 0))
2050 iommu_flush_write_buffer(domain->iommu);
2052 /* free iova */
2053 __free_iova(&domain->iovad, iova);
2056 static int intel_nontranslate_map_sg(struct device *hddev,
2057 struct scatterlist *sglist, int nelems, int dir)
2059 int i;
2060 struct scatterlist *sg;
2062 for_each_sg(sglist, sg, nelems, i) {
2063 BUG_ON(!sg_page(sg));
2064 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2065 sg->dma_length = sg->length;
2067 return nelems;
2070 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2071 int nelems, int dir)
2073 void *addr;
2074 int i;
2075 struct pci_dev *pdev = to_pci_dev(hwdev);
2076 struct dmar_domain *domain;
2077 size_t size = 0;
2078 int prot = 0;
2079 size_t offset = 0;
2080 struct iova *iova = NULL;
2081 int ret;
2082 struct scatterlist *sg;
2083 unsigned long start_addr;
2085 BUG_ON(dir == DMA_NONE);
2086 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2087 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2089 domain = get_valid_domain_for_dev(pdev);
2090 if (!domain)
2091 return 0;
2093 for_each_sg(sglist, sg, nelems, i) {
2094 addr = SG_ENT_VIRT_ADDRESS(sg);
2095 addr = (void *)virt_to_phys(addr);
2096 size += aligned_size((u64)addr, sg->length);
2099 iova = __intel_alloc_iova(hwdev, domain, size);
2100 if (!iova) {
2101 sglist->dma_length = 0;
2102 return 0;
2106 * Check if DMAR supports zero-length reads on write only
2107 * mappings..
2109 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2110 !cap_zlr(domain->iommu->cap))
2111 prot |= DMA_PTE_READ;
2112 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2113 prot |= DMA_PTE_WRITE;
2115 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2116 offset = 0;
2117 for_each_sg(sglist, sg, nelems, i) {
2118 addr = SG_ENT_VIRT_ADDRESS(sg);
2119 addr = (void *)virt_to_phys(addr);
2120 size = aligned_size((u64)addr, sg->length);
2121 ret = domain_page_mapping(domain, start_addr + offset,
2122 ((u64)addr) & PAGE_MASK_4K,
2123 size, prot);
2124 if (ret) {
2125 /* clear the page */
2126 dma_pte_clear_range(domain, start_addr,
2127 start_addr + offset);
2128 /* free page tables */
2129 dma_pte_free_pagetable(domain, start_addr,
2130 start_addr + offset);
2131 /* free iova */
2132 __free_iova(&domain->iovad, iova);
2133 return 0;
2135 sg->dma_address = start_addr + offset +
2136 ((u64)addr & (~PAGE_MASK_4K));
2137 sg->dma_length = sg->length;
2138 offset += size;
2141 /* it's a non-present to present mapping */
2142 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2143 start_addr, offset >> PAGE_SHIFT_4K, 1))
2144 iommu_flush_write_buffer(domain->iommu);
2145 return nelems;
2148 static struct dma_mapping_ops intel_dma_ops = {
2149 .alloc_coherent = intel_alloc_coherent,
2150 .free_coherent = intel_free_coherent,
2151 .map_single = intel_map_single,
2152 .unmap_single = intel_unmap_single,
2153 .map_sg = intel_map_sg,
2154 .unmap_sg = intel_unmap_sg,
2157 static inline int iommu_domain_cache_init(void)
2159 int ret = 0;
2161 iommu_domain_cache = kmem_cache_create("iommu_domain",
2162 sizeof(struct dmar_domain),
2164 SLAB_HWCACHE_ALIGN,
2166 NULL);
2167 if (!iommu_domain_cache) {
2168 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2169 ret = -ENOMEM;
2172 return ret;
2175 static inline int iommu_devinfo_cache_init(void)
2177 int ret = 0;
2179 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2180 sizeof(struct device_domain_info),
2182 SLAB_HWCACHE_ALIGN,
2184 NULL);
2185 if (!iommu_devinfo_cache) {
2186 printk(KERN_ERR "Couldn't create devinfo cache\n");
2187 ret = -ENOMEM;
2190 return ret;
2193 static inline int iommu_iova_cache_init(void)
2195 int ret = 0;
2197 iommu_iova_cache = kmem_cache_create("iommu_iova",
2198 sizeof(struct iova),
2200 SLAB_HWCACHE_ALIGN,
2202 NULL);
2203 if (!iommu_iova_cache) {
2204 printk(KERN_ERR "Couldn't create iova cache\n");
2205 ret = -ENOMEM;
2208 return ret;
2211 static int __init iommu_init_mempool(void)
2213 int ret;
2214 ret = iommu_iova_cache_init();
2215 if (ret)
2216 return ret;
2218 ret = iommu_domain_cache_init();
2219 if (ret)
2220 goto domain_error;
2222 ret = iommu_devinfo_cache_init();
2223 if (!ret)
2224 return ret;
2226 kmem_cache_destroy(iommu_domain_cache);
2227 domain_error:
2228 kmem_cache_destroy(iommu_iova_cache);
2230 return -ENOMEM;
2233 static void __init iommu_exit_mempool(void)
2235 kmem_cache_destroy(iommu_devinfo_cache);
2236 kmem_cache_destroy(iommu_domain_cache);
2237 kmem_cache_destroy(iommu_iova_cache);
2241 static void __init init_no_remapping_devices(void)
2243 struct dmar_drhd_unit *drhd;
2245 for_each_drhd_unit(drhd) {
2246 if (!drhd->include_all) {
2247 int i;
2248 for (i = 0; i < drhd->devices_cnt; i++)
2249 if (drhd->devices[i] != NULL)
2250 break;
2251 /* ignore DMAR unit if no pci devices exist */
2252 if (i == drhd->devices_cnt)
2253 drhd->ignored = 1;
2257 if (dmar_map_gfx)
2258 return;
2260 for_each_drhd_unit(drhd) {
2261 int i;
2262 if (drhd->ignored || drhd->include_all)
2263 continue;
2265 for (i = 0; i < drhd->devices_cnt; i++)
2266 if (drhd->devices[i] &&
2267 !IS_GFX_DEVICE(drhd->devices[i]))
2268 break;
2270 if (i < drhd->devices_cnt)
2271 continue;
2273 /* bypass IOMMU if it is just for gfx devices */
2274 drhd->ignored = 1;
2275 for (i = 0; i < drhd->devices_cnt; i++) {
2276 if (!drhd->devices[i])
2277 continue;
2278 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2283 int __init intel_iommu_init(void)
2285 int ret = 0;
2287 if (dmar_table_init())
2288 return -ENODEV;
2290 if (dmar_dev_scope_init())
2291 return -ENODEV;
2294 * Check the need for DMA-remapping initialization now.
2295 * Above initialization will also be used by Interrupt-remapping.
2297 if (no_iommu || swiotlb || dmar_disabled)
2298 return -ENODEV;
2300 iommu_init_mempool();
2301 dmar_init_reserved_ranges();
2303 init_no_remapping_devices();
2305 ret = init_dmars();
2306 if (ret) {
2307 printk(KERN_ERR "IOMMU: dmar init failed\n");
2308 put_iova_domain(&reserved_iova_list);
2309 iommu_exit_mempool();
2310 return ret;
2312 printk(KERN_INFO
2313 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2315 init_timer(&unmap_timer);
2316 force_iommu = 1;
2317 dma_ops = &intel_dma_ops;
2318 return 0;
2321 void intel_iommu_domain_exit(struct dmar_domain *domain)
2323 u64 end;
2325 /* Domain 0 is reserved, so dont process it */
2326 if (!domain)
2327 return;
2329 end = DOMAIN_MAX_ADDR(domain->gaw);
2330 end = end & (~PAGE_MASK_4K);
2332 /* clear ptes */
2333 dma_pte_clear_range(domain, 0, end);
2335 /* free page tables */
2336 dma_pte_free_pagetable(domain, 0, end);
2338 iommu_free_domain(domain);
2339 free_domain_mem(domain);
2341 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2343 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2345 struct dmar_drhd_unit *drhd;
2346 struct dmar_domain *domain;
2347 struct intel_iommu *iommu;
2349 drhd = dmar_find_matched_drhd_unit(pdev);
2350 if (!drhd) {
2351 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2352 return NULL;
2355 iommu = drhd->iommu;
2356 if (!iommu) {
2357 printk(KERN_ERR
2358 "intel_iommu_domain_alloc: iommu == NULL\n");
2359 return NULL;
2361 domain = iommu_alloc_domain(iommu);
2362 if (!domain) {
2363 printk(KERN_ERR
2364 "intel_iommu_domain_alloc: domain == NULL\n");
2365 return NULL;
2367 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2368 printk(KERN_ERR
2369 "intel_iommu_domain_alloc: domain_init() failed\n");
2370 intel_iommu_domain_exit(domain);
2371 return NULL;
2373 return domain;
2375 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2377 int intel_iommu_context_mapping(
2378 struct dmar_domain *domain, struct pci_dev *pdev)
2380 int rc;
2381 rc = domain_context_mapping(domain, pdev);
2382 return rc;
2384 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2386 int intel_iommu_page_mapping(
2387 struct dmar_domain *domain, dma_addr_t iova,
2388 u64 hpa, size_t size, int prot)
2390 int rc;
2391 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2392 return rc;
2394 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2396 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2398 detach_domain_for_dev(domain, bus, devfn);
2400 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2402 struct dmar_domain *
2403 intel_iommu_find_domain(struct pci_dev *pdev)
2405 return find_domain(pdev);
2407 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2409 int intel_iommu_found(void)
2411 return g_num_of_iommus;
2413 EXPORT_SYMBOL_GPL(intel_iommu_found);
2415 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2417 struct dma_pte *pte;
2418 u64 pfn;
2420 pfn = 0;
2421 pte = addr_to_dma_pte(domain, iova);
2423 if (pte)
2424 pfn = dma_pte_addr(*pte);
2426 return pfn >> PAGE_SHIFT_4K;
2428 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);