x86, apic: clean up the cpu_2_logical_apiciddeclaration
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / pci / intel-iommu.c
blobf4b7c79023ffe7d05589dd93526531ea3dc3708f
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
41 #include "pci.h"
43 #define ROOT_SIZE VTD_PAGE_SIZE
44 #define CONTEXT_SIZE VTD_PAGE_SIZE
46 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
49 #define IOAPIC_RANGE_START (0xfee00000)
50 #define IOAPIC_RANGE_END (0xfeefffff)
51 #define IOVA_START_ADDR (0x1000)
53 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
57 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
61 /* global iommu list, set NULL for ignored DMAR units */
62 static struct intel_iommu **g_iommus;
65 * 0: Present
66 * 1-11: Reserved
67 * 12-63: Context Ptr (12 - (haw-1))
68 * 64-127: Reserved
70 struct root_entry {
71 u64 val;
72 u64 rsvd1;
74 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
75 static inline bool root_present(struct root_entry *root)
77 return (root->val & 1);
79 static inline void set_root_present(struct root_entry *root)
81 root->val |= 1;
83 static inline void set_root_value(struct root_entry *root, unsigned long value)
85 root->val |= value & VTD_PAGE_MASK;
88 static inline struct context_entry *
89 get_context_addr_from_root(struct root_entry *root)
91 return (struct context_entry *)
92 (root_present(root)?phys_to_virt(
93 root->val & VTD_PAGE_MASK) :
94 NULL);
98 * low 64 bits:
99 * 0: present
100 * 1: fault processing disable
101 * 2-3: translation type
102 * 12-63: address space root
103 * high 64 bits:
104 * 0-2: address width
105 * 3-6: aval
106 * 8-23: domain id
108 struct context_entry {
109 u64 lo;
110 u64 hi;
113 static inline bool context_present(struct context_entry *context)
115 return (context->lo & 1);
117 static inline void context_set_present(struct context_entry *context)
119 context->lo |= 1;
122 static inline void context_set_fault_enable(struct context_entry *context)
124 context->lo &= (((u64)-1) << 2) | 1;
127 #define CONTEXT_TT_MULTI_LEVEL 0
129 static inline void context_set_translation_type(struct context_entry *context,
130 unsigned long value)
132 context->lo &= (((u64)-1) << 4) | 3;
133 context->lo |= (value & 3) << 2;
136 static inline void context_set_address_root(struct context_entry *context,
137 unsigned long value)
139 context->lo |= value & VTD_PAGE_MASK;
142 static inline void context_set_address_width(struct context_entry *context,
143 unsigned long value)
145 context->hi |= value & 7;
148 static inline void context_set_domain_id(struct context_entry *context,
149 unsigned long value)
151 context->hi |= (value & ((1 << 16) - 1)) << 8;
154 static inline void context_clear_entry(struct context_entry *context)
156 context->lo = 0;
157 context->hi = 0;
161 * 0: readable
162 * 1: writable
163 * 2-6: reserved
164 * 7: super page
165 * 8-11: available
166 * 12-63: Host physcial address
168 struct dma_pte {
169 u64 val;
172 static inline void dma_clear_pte(struct dma_pte *pte)
174 pte->val = 0;
177 static inline void dma_set_pte_readable(struct dma_pte *pte)
179 pte->val |= DMA_PTE_READ;
182 static inline void dma_set_pte_writable(struct dma_pte *pte)
184 pte->val |= DMA_PTE_WRITE;
187 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
189 pte->val = (pte->val & ~3) | (prot & 3);
192 static inline u64 dma_pte_addr(struct dma_pte *pte)
194 return (pte->val & VTD_PAGE_MASK);
197 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
199 pte->val |= (addr & VTD_PAGE_MASK);
202 static inline bool dma_pte_present(struct dma_pte *pte)
204 return (pte->val & 3) != 0;
207 /* devices under the same p2p bridge are owned in one domain */
208 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
210 /* domain represents a virtual machine, more than one devices
211 * across iommus may be owned in one domain, e.g. kvm guest.
213 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
215 struct dmar_domain {
216 int id; /* domain id */
217 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
219 struct list_head devices; /* all devices' list */
220 struct iova_domain iovad; /* iova's that belong to this domain */
222 struct dma_pte *pgd; /* virtual address */
223 spinlock_t mapping_lock; /* page table lock */
224 int gaw; /* max guest address width */
226 /* adjusted guest address width, 0 is level 2 30-bit */
227 int agaw;
229 int flags; /* flags to find out type of domain */
231 int iommu_coherency;/* indicate coherency of iommu access */
232 int iommu_count; /* reference count of iommu */
233 spinlock_t iommu_lock; /* protect iommu set in domain */
234 u64 max_addr; /* maximum mapped address */
237 /* PCI domain-device relationship */
238 struct device_domain_info {
239 struct list_head link; /* link to domain siblings */
240 struct list_head global; /* link to global list */
241 u8 bus; /* PCI bus numer */
242 u8 devfn; /* PCI devfn number */
243 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
244 struct dmar_domain *domain; /* pointer to domain */
247 static void flush_unmaps_timeout(unsigned long data);
249 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
251 #define HIGH_WATER_MARK 250
252 struct deferred_flush_tables {
253 int next;
254 struct iova *iova[HIGH_WATER_MARK];
255 struct dmar_domain *domain[HIGH_WATER_MARK];
258 static struct deferred_flush_tables *deferred_flush;
260 /* bitmap for indexing intel_iommus */
261 static int g_num_of_iommus;
263 static DEFINE_SPINLOCK(async_umap_flush_lock);
264 static LIST_HEAD(unmaps_to_do);
266 static int timer_on;
267 static long list_size;
269 static void domain_remove_dev_info(struct dmar_domain *domain);
271 #ifdef CONFIG_DMAR_DEFAULT_ON
272 int dmar_disabled = 0;
273 #else
274 int dmar_disabled = 1;
275 #endif /*CONFIG_DMAR_DEFAULT_ON*/
277 static int __initdata dmar_map_gfx = 1;
278 static int dmar_forcedac;
279 static int intel_iommu_strict;
281 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
282 static DEFINE_SPINLOCK(device_domain_lock);
283 static LIST_HEAD(device_domain_list);
285 static struct iommu_ops intel_iommu_ops;
287 static int __init intel_iommu_setup(char *str)
289 if (!str)
290 return -EINVAL;
291 while (*str) {
292 if (!strncmp(str, "on", 2)) {
293 dmar_disabled = 0;
294 printk(KERN_INFO "Intel-IOMMU: enabled\n");
295 } else if (!strncmp(str, "off", 3)) {
296 dmar_disabled = 1;
297 printk(KERN_INFO "Intel-IOMMU: disabled\n");
298 } else if (!strncmp(str, "igfx_off", 8)) {
299 dmar_map_gfx = 0;
300 printk(KERN_INFO
301 "Intel-IOMMU: disable GFX device mapping\n");
302 } else if (!strncmp(str, "forcedac", 8)) {
303 printk(KERN_INFO
304 "Intel-IOMMU: Forcing DAC for PCI devices\n");
305 dmar_forcedac = 1;
306 } else if (!strncmp(str, "strict", 6)) {
307 printk(KERN_INFO
308 "Intel-IOMMU: disable batched IOTLB flush\n");
309 intel_iommu_strict = 1;
312 str += strcspn(str, ",");
313 while (*str == ',')
314 str++;
316 return 0;
318 __setup("intel_iommu=", intel_iommu_setup);
320 static struct kmem_cache *iommu_domain_cache;
321 static struct kmem_cache *iommu_devinfo_cache;
322 static struct kmem_cache *iommu_iova_cache;
324 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
326 unsigned int flags;
327 void *vaddr;
329 /* trying to avoid low memory issues */
330 flags = current->flags & PF_MEMALLOC;
331 current->flags |= PF_MEMALLOC;
332 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
333 current->flags &= (~PF_MEMALLOC | flags);
334 return vaddr;
338 static inline void *alloc_pgtable_page(void)
340 unsigned int flags;
341 void *vaddr;
343 /* trying to avoid low memory issues */
344 flags = current->flags & PF_MEMALLOC;
345 current->flags |= PF_MEMALLOC;
346 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
347 current->flags &= (~PF_MEMALLOC | flags);
348 return vaddr;
351 static inline void free_pgtable_page(void *vaddr)
353 free_page((unsigned long)vaddr);
356 static inline void *alloc_domain_mem(void)
358 return iommu_kmem_cache_alloc(iommu_domain_cache);
361 static void free_domain_mem(void *vaddr)
363 kmem_cache_free(iommu_domain_cache, vaddr);
366 static inline void * alloc_devinfo_mem(void)
368 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
371 static inline void free_devinfo_mem(void *vaddr)
373 kmem_cache_free(iommu_devinfo_cache, vaddr);
376 struct iova *alloc_iova_mem(void)
378 return iommu_kmem_cache_alloc(iommu_iova_cache);
381 void free_iova_mem(struct iova *iova)
383 kmem_cache_free(iommu_iova_cache, iova);
387 static inline int width_to_agaw(int width);
389 /* calculate agaw for each iommu.
390 * "SAGAW" may be different across iommus, use a default agaw, and
391 * get a supported less agaw for iommus that don't support the default agaw.
393 int iommu_calculate_agaw(struct intel_iommu *iommu)
395 unsigned long sagaw;
396 int agaw = -1;
398 sagaw = cap_sagaw(iommu->cap);
399 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
400 agaw >= 0; agaw--) {
401 if (test_bit(agaw, &sagaw))
402 break;
405 return agaw;
408 /* in native case, each domain is related to only one iommu */
409 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
411 int iommu_id;
413 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
415 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
416 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
417 return NULL;
419 return g_iommus[iommu_id];
422 /* "Coherency" capability may be different across iommus */
423 static void domain_update_iommu_coherency(struct dmar_domain *domain)
425 int i;
427 domain->iommu_coherency = 1;
429 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
430 for (; i < g_num_of_iommus; ) {
431 if (!ecap_coherent(g_iommus[i]->ecap)) {
432 domain->iommu_coherency = 0;
433 break;
435 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
439 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
441 struct dmar_drhd_unit *drhd = NULL;
442 int i;
444 for_each_drhd_unit(drhd) {
445 if (drhd->ignored)
446 continue;
448 for (i = 0; i < drhd->devices_cnt; i++)
449 if (drhd->devices[i] &&
450 drhd->devices[i]->bus->number == bus &&
451 drhd->devices[i]->devfn == devfn)
452 return drhd->iommu;
454 if (drhd->include_all)
455 return drhd->iommu;
458 return NULL;
461 static void domain_flush_cache(struct dmar_domain *domain,
462 void *addr, int size)
464 if (!domain->iommu_coherency)
465 clflush_cache_range(addr, size);
468 /* Gets context entry for a given bus and devfn */
469 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
470 u8 bus, u8 devfn)
472 struct root_entry *root;
473 struct context_entry *context;
474 unsigned long phy_addr;
475 unsigned long flags;
477 spin_lock_irqsave(&iommu->lock, flags);
478 root = &iommu->root_entry[bus];
479 context = get_context_addr_from_root(root);
480 if (!context) {
481 context = (struct context_entry *)alloc_pgtable_page();
482 if (!context) {
483 spin_unlock_irqrestore(&iommu->lock, flags);
484 return NULL;
486 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
487 phy_addr = virt_to_phys((void *)context);
488 set_root_value(root, phy_addr);
489 set_root_present(root);
490 __iommu_flush_cache(iommu, root, sizeof(*root));
492 spin_unlock_irqrestore(&iommu->lock, flags);
493 return &context[devfn];
496 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
498 struct root_entry *root;
499 struct context_entry *context;
500 int ret;
501 unsigned long flags;
503 spin_lock_irqsave(&iommu->lock, flags);
504 root = &iommu->root_entry[bus];
505 context = get_context_addr_from_root(root);
506 if (!context) {
507 ret = 0;
508 goto out;
510 ret = context_present(&context[devfn]);
511 out:
512 spin_unlock_irqrestore(&iommu->lock, flags);
513 return ret;
516 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
518 struct root_entry *root;
519 struct context_entry *context;
520 unsigned long flags;
522 spin_lock_irqsave(&iommu->lock, flags);
523 root = &iommu->root_entry[bus];
524 context = get_context_addr_from_root(root);
525 if (context) {
526 context_clear_entry(&context[devfn]);
527 __iommu_flush_cache(iommu, &context[devfn], \
528 sizeof(*context));
530 spin_unlock_irqrestore(&iommu->lock, flags);
533 static void free_context_table(struct intel_iommu *iommu)
535 struct root_entry *root;
536 int i;
537 unsigned long flags;
538 struct context_entry *context;
540 spin_lock_irqsave(&iommu->lock, flags);
541 if (!iommu->root_entry) {
542 goto out;
544 for (i = 0; i < ROOT_ENTRY_NR; i++) {
545 root = &iommu->root_entry[i];
546 context = get_context_addr_from_root(root);
547 if (context)
548 free_pgtable_page(context);
550 free_pgtable_page(iommu->root_entry);
551 iommu->root_entry = NULL;
552 out:
553 spin_unlock_irqrestore(&iommu->lock, flags);
556 /* page table handling */
557 #define LEVEL_STRIDE (9)
558 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
560 static inline int agaw_to_level(int agaw)
562 return agaw + 2;
565 static inline int agaw_to_width(int agaw)
567 return 30 + agaw * LEVEL_STRIDE;
571 static inline int width_to_agaw(int width)
573 return (width - 30) / LEVEL_STRIDE;
576 static inline unsigned int level_to_offset_bits(int level)
578 return (12 + (level - 1) * LEVEL_STRIDE);
581 static inline int address_level_offset(u64 addr, int level)
583 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
586 static inline u64 level_mask(int level)
588 return ((u64)-1 << level_to_offset_bits(level));
591 static inline u64 level_size(int level)
593 return ((u64)1 << level_to_offset_bits(level));
596 static inline u64 align_to_level(u64 addr, int level)
598 return ((addr + level_size(level) - 1) & level_mask(level));
601 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
603 int addr_width = agaw_to_width(domain->agaw);
604 struct dma_pte *parent, *pte = NULL;
605 int level = agaw_to_level(domain->agaw);
606 int offset;
607 unsigned long flags;
609 BUG_ON(!domain->pgd);
611 addr &= (((u64)1) << addr_width) - 1;
612 parent = domain->pgd;
614 spin_lock_irqsave(&domain->mapping_lock, flags);
615 while (level > 0) {
616 void *tmp_page;
618 offset = address_level_offset(addr, level);
619 pte = &parent[offset];
620 if (level == 1)
621 break;
623 if (!dma_pte_present(pte)) {
624 tmp_page = alloc_pgtable_page();
626 if (!tmp_page) {
627 spin_unlock_irqrestore(&domain->mapping_lock,
628 flags);
629 return NULL;
631 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
632 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
634 * high level table always sets r/w, last level page
635 * table control read/write
637 dma_set_pte_readable(pte);
638 dma_set_pte_writable(pte);
639 domain_flush_cache(domain, pte, sizeof(*pte));
641 parent = phys_to_virt(dma_pte_addr(pte));
642 level--;
645 spin_unlock_irqrestore(&domain->mapping_lock, flags);
646 return pte;
649 /* return address's pte at specific level */
650 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
651 int level)
653 struct dma_pte *parent, *pte = NULL;
654 int total = agaw_to_level(domain->agaw);
655 int offset;
657 parent = domain->pgd;
658 while (level <= total) {
659 offset = address_level_offset(addr, total);
660 pte = &parent[offset];
661 if (level == total)
662 return pte;
664 if (!dma_pte_present(pte))
665 break;
666 parent = phys_to_virt(dma_pte_addr(pte));
667 total--;
669 return NULL;
672 /* clear one page's page table */
673 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
675 struct dma_pte *pte = NULL;
677 /* get last level pte */
678 pte = dma_addr_level_pte(domain, addr, 1);
680 if (pte) {
681 dma_clear_pte(pte);
682 domain_flush_cache(domain, pte, sizeof(*pte));
686 /* clear last level pte, a tlb flush should be followed */
687 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
689 int addr_width = agaw_to_width(domain->agaw);
691 start &= (((u64)1) << addr_width) - 1;
692 end &= (((u64)1) << addr_width) - 1;
693 /* in case it's partial page */
694 start = PAGE_ALIGN(start);
695 end &= PAGE_MASK;
697 /* we don't need lock here, nobody else touches the iova range */
698 while (start < end) {
699 dma_pte_clear_one(domain, start);
700 start += VTD_PAGE_SIZE;
704 /* free page table pages. last level pte should already be cleared */
705 static void dma_pte_free_pagetable(struct dmar_domain *domain,
706 u64 start, u64 end)
708 int addr_width = agaw_to_width(domain->agaw);
709 struct dma_pte *pte;
710 int total = agaw_to_level(domain->agaw);
711 int level;
712 u64 tmp;
714 start &= (((u64)1) << addr_width) - 1;
715 end &= (((u64)1) << addr_width) - 1;
717 /* we don't need lock here, nobody else touches the iova range */
718 level = 2;
719 while (level <= total) {
720 tmp = align_to_level(start, level);
721 if (tmp >= end || (tmp + level_size(level) > end))
722 return;
724 while (tmp < end) {
725 pte = dma_addr_level_pte(domain, tmp, level);
726 if (pte) {
727 free_pgtable_page(
728 phys_to_virt(dma_pte_addr(pte)));
729 dma_clear_pte(pte);
730 domain_flush_cache(domain, pte, sizeof(*pte));
732 tmp += level_size(level);
734 level++;
736 /* free pgd */
737 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
738 free_pgtable_page(domain->pgd);
739 domain->pgd = NULL;
743 /* iommu handling */
744 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
746 struct root_entry *root;
747 unsigned long flags;
749 root = (struct root_entry *)alloc_pgtable_page();
750 if (!root)
751 return -ENOMEM;
753 __iommu_flush_cache(iommu, root, ROOT_SIZE);
755 spin_lock_irqsave(&iommu->lock, flags);
756 iommu->root_entry = root;
757 spin_unlock_irqrestore(&iommu->lock, flags);
759 return 0;
762 static void iommu_set_root_entry(struct intel_iommu *iommu)
764 void *addr;
765 u32 cmd, sts;
766 unsigned long flag;
768 addr = iommu->root_entry;
770 spin_lock_irqsave(&iommu->register_lock, flag);
771 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
773 cmd = iommu->gcmd | DMA_GCMD_SRTP;
774 writel(cmd, iommu->reg + DMAR_GCMD_REG);
776 /* Make sure hardware complete it */
777 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
778 readl, (sts & DMA_GSTS_RTPS), sts);
780 spin_unlock_irqrestore(&iommu->register_lock, flag);
783 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
785 u32 val;
786 unsigned long flag;
788 if (!cap_rwbf(iommu->cap))
789 return;
790 val = iommu->gcmd | DMA_GCMD_WBF;
792 spin_lock_irqsave(&iommu->register_lock, flag);
793 writel(val, iommu->reg + DMAR_GCMD_REG);
795 /* Make sure hardware complete it */
796 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
797 readl, (!(val & DMA_GSTS_WBFS)), val);
799 spin_unlock_irqrestore(&iommu->register_lock, flag);
802 /* return value determine if we need a write buffer flush */
803 static int __iommu_flush_context(struct intel_iommu *iommu,
804 u16 did, u16 source_id, u8 function_mask, u64 type,
805 int non_present_entry_flush)
807 u64 val = 0;
808 unsigned long flag;
811 * In the non-present entry flush case, if hardware doesn't cache
812 * non-present entry we do nothing and if hardware cache non-present
813 * entry, we flush entries of domain 0 (the domain id is used to cache
814 * any non-present entries)
816 if (non_present_entry_flush) {
817 if (!cap_caching_mode(iommu->cap))
818 return 1;
819 else
820 did = 0;
823 switch (type) {
824 case DMA_CCMD_GLOBAL_INVL:
825 val = DMA_CCMD_GLOBAL_INVL;
826 break;
827 case DMA_CCMD_DOMAIN_INVL:
828 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
829 break;
830 case DMA_CCMD_DEVICE_INVL:
831 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
832 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
833 break;
834 default:
835 BUG();
837 val |= DMA_CCMD_ICC;
839 spin_lock_irqsave(&iommu->register_lock, flag);
840 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
842 /* Make sure hardware complete it */
843 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
844 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
846 spin_unlock_irqrestore(&iommu->register_lock, flag);
848 /* flush context entry will implicitly flush write buffer */
849 return 0;
852 /* return value determine if we need a write buffer flush */
853 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
854 u64 addr, unsigned int size_order, u64 type,
855 int non_present_entry_flush)
857 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
858 u64 val = 0, val_iva = 0;
859 unsigned long flag;
862 * In the non-present entry flush case, if hardware doesn't cache
863 * non-present entry we do nothing and if hardware cache non-present
864 * entry, we flush entries of domain 0 (the domain id is used to cache
865 * any non-present entries)
867 if (non_present_entry_flush) {
868 if (!cap_caching_mode(iommu->cap))
869 return 1;
870 else
871 did = 0;
874 switch (type) {
875 case DMA_TLB_GLOBAL_FLUSH:
876 /* global flush doesn't need set IVA_REG */
877 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
878 break;
879 case DMA_TLB_DSI_FLUSH:
880 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
881 break;
882 case DMA_TLB_PSI_FLUSH:
883 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
884 /* Note: always flush non-leaf currently */
885 val_iva = size_order | addr;
886 break;
887 default:
888 BUG();
890 /* Note: set drain read/write */
891 #if 0
893 * This is probably to be super secure.. Looks like we can
894 * ignore it without any impact.
896 if (cap_read_drain(iommu->cap))
897 val |= DMA_TLB_READ_DRAIN;
898 #endif
899 if (cap_write_drain(iommu->cap))
900 val |= DMA_TLB_WRITE_DRAIN;
902 spin_lock_irqsave(&iommu->register_lock, flag);
903 /* Note: Only uses first TLB reg currently */
904 if (val_iva)
905 dmar_writeq(iommu->reg + tlb_offset, val_iva);
906 dmar_writeq(iommu->reg + tlb_offset + 8, val);
908 /* Make sure hardware complete it */
909 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
910 dmar_readq, (!(val & DMA_TLB_IVT)), val);
912 spin_unlock_irqrestore(&iommu->register_lock, flag);
914 /* check IOTLB invalidation granularity */
915 if (DMA_TLB_IAIG(val) == 0)
916 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
917 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
918 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
919 (unsigned long long)DMA_TLB_IIRG(type),
920 (unsigned long long)DMA_TLB_IAIG(val));
921 /* flush iotlb entry will implicitly flush write buffer */
922 return 0;
925 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
926 u64 addr, unsigned int pages, int non_present_entry_flush)
928 unsigned int mask;
930 BUG_ON(addr & (~VTD_PAGE_MASK));
931 BUG_ON(pages == 0);
933 /* Fallback to domain selective flush if no PSI support */
934 if (!cap_pgsel_inv(iommu->cap))
935 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
936 DMA_TLB_DSI_FLUSH,
937 non_present_entry_flush);
940 * PSI requires page size to be 2 ^ x, and the base address is naturally
941 * aligned to the size
943 mask = ilog2(__roundup_pow_of_two(pages));
944 /* Fallback to domain selective flush if size is too big */
945 if (mask > cap_max_amask_val(iommu->cap))
946 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
947 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
949 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
950 DMA_TLB_PSI_FLUSH,
951 non_present_entry_flush);
954 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
956 u32 pmen;
957 unsigned long flags;
959 spin_lock_irqsave(&iommu->register_lock, flags);
960 pmen = readl(iommu->reg + DMAR_PMEN_REG);
961 pmen &= ~DMA_PMEN_EPM;
962 writel(pmen, iommu->reg + DMAR_PMEN_REG);
964 /* wait for the protected region status bit to clear */
965 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
966 readl, !(pmen & DMA_PMEN_PRS), pmen);
968 spin_unlock_irqrestore(&iommu->register_lock, flags);
971 static int iommu_enable_translation(struct intel_iommu *iommu)
973 u32 sts;
974 unsigned long flags;
976 spin_lock_irqsave(&iommu->register_lock, flags);
977 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
979 /* Make sure hardware complete it */
980 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
981 readl, (sts & DMA_GSTS_TES), sts);
983 iommu->gcmd |= DMA_GCMD_TE;
984 spin_unlock_irqrestore(&iommu->register_lock, flags);
985 return 0;
988 static int iommu_disable_translation(struct intel_iommu *iommu)
990 u32 sts;
991 unsigned long flag;
993 spin_lock_irqsave(&iommu->register_lock, flag);
994 iommu->gcmd &= ~DMA_GCMD_TE;
995 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
997 /* Make sure hardware complete it */
998 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
999 readl, (!(sts & DMA_GSTS_TES)), sts);
1001 spin_unlock_irqrestore(&iommu->register_lock, flag);
1002 return 0;
1005 /* iommu interrupt handling. Most stuff are MSI-like. */
1007 static const char *fault_reason_strings[] =
1009 "Software",
1010 "Present bit in root entry is clear",
1011 "Present bit in context entry is clear",
1012 "Invalid context entry",
1013 "Access beyond MGAW",
1014 "PTE Write access is not set",
1015 "PTE Read access is not set",
1016 "Next page table ptr is invalid",
1017 "Root table address invalid",
1018 "Context table ptr is invalid",
1019 "non-zero reserved fields in RTP",
1020 "non-zero reserved fields in CTP",
1021 "non-zero reserved fields in PTE",
1023 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1025 const char *dmar_get_fault_reason(u8 fault_reason)
1027 if (fault_reason > MAX_FAULT_REASON_IDX)
1028 return "Unknown";
1029 else
1030 return fault_reason_strings[fault_reason];
1033 void dmar_msi_unmask(unsigned int irq)
1035 struct intel_iommu *iommu = get_irq_data(irq);
1036 unsigned long flag;
1038 /* unmask it */
1039 spin_lock_irqsave(&iommu->register_lock, flag);
1040 writel(0, iommu->reg + DMAR_FECTL_REG);
1041 /* Read a reg to force flush the post write */
1042 readl(iommu->reg + DMAR_FECTL_REG);
1043 spin_unlock_irqrestore(&iommu->register_lock, flag);
1046 void dmar_msi_mask(unsigned int irq)
1048 unsigned long flag;
1049 struct intel_iommu *iommu = get_irq_data(irq);
1051 /* mask it */
1052 spin_lock_irqsave(&iommu->register_lock, flag);
1053 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1054 /* Read a reg to force flush the post write */
1055 readl(iommu->reg + DMAR_FECTL_REG);
1056 spin_unlock_irqrestore(&iommu->register_lock, flag);
1059 void dmar_msi_write(int irq, struct msi_msg *msg)
1061 struct intel_iommu *iommu = get_irq_data(irq);
1062 unsigned long flag;
1064 spin_lock_irqsave(&iommu->register_lock, flag);
1065 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1066 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1067 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1068 spin_unlock_irqrestore(&iommu->register_lock, flag);
1071 void dmar_msi_read(int irq, struct msi_msg *msg)
1073 struct intel_iommu *iommu = get_irq_data(irq);
1074 unsigned long flag;
1076 spin_lock_irqsave(&iommu->register_lock, flag);
1077 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1078 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1079 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1080 spin_unlock_irqrestore(&iommu->register_lock, flag);
1083 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1084 u8 fault_reason, u16 source_id, unsigned long long addr)
1086 const char *reason;
1088 reason = dmar_get_fault_reason(fault_reason);
1090 printk(KERN_ERR
1091 "DMAR:[%s] Request device [%02x:%02x.%d] "
1092 "fault addr %llx \n"
1093 "DMAR:[fault reason %02d] %s\n",
1094 (type ? "DMA Read" : "DMA Write"),
1095 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1096 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1097 return 0;
1100 #define PRIMARY_FAULT_REG_LEN (16)
1101 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1103 struct intel_iommu *iommu = dev_id;
1104 int reg, fault_index;
1105 u32 fault_status;
1106 unsigned long flag;
1108 spin_lock_irqsave(&iommu->register_lock, flag);
1109 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1111 /* TBD: ignore advanced fault log currently */
1112 if (!(fault_status & DMA_FSTS_PPF))
1113 goto clear_overflow;
1115 fault_index = dma_fsts_fault_record_index(fault_status);
1116 reg = cap_fault_reg_offset(iommu->cap);
1117 while (1) {
1118 u8 fault_reason;
1119 u16 source_id;
1120 u64 guest_addr;
1121 int type;
1122 u32 data;
1124 /* highest 32 bits */
1125 data = readl(iommu->reg + reg +
1126 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1127 if (!(data & DMA_FRCD_F))
1128 break;
1130 fault_reason = dma_frcd_fault_reason(data);
1131 type = dma_frcd_type(data);
1133 data = readl(iommu->reg + reg +
1134 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1135 source_id = dma_frcd_source_id(data);
1137 guest_addr = dmar_readq(iommu->reg + reg +
1138 fault_index * PRIMARY_FAULT_REG_LEN);
1139 guest_addr = dma_frcd_page_addr(guest_addr);
1140 /* clear the fault */
1141 writel(DMA_FRCD_F, iommu->reg + reg +
1142 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1144 spin_unlock_irqrestore(&iommu->register_lock, flag);
1146 iommu_page_fault_do_one(iommu, type, fault_reason,
1147 source_id, guest_addr);
1149 fault_index++;
1150 if (fault_index > cap_num_fault_regs(iommu->cap))
1151 fault_index = 0;
1152 spin_lock_irqsave(&iommu->register_lock, flag);
1154 clear_overflow:
1155 /* clear primary fault overflow */
1156 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1157 if (fault_status & DMA_FSTS_PFO)
1158 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1160 spin_unlock_irqrestore(&iommu->register_lock, flag);
1161 return IRQ_HANDLED;
1164 int dmar_set_interrupt(struct intel_iommu *iommu)
1166 int irq, ret;
1168 irq = create_irq();
1169 if (!irq) {
1170 printk(KERN_ERR "IOMMU: no free vectors\n");
1171 return -EINVAL;
1174 set_irq_data(irq, iommu);
1175 iommu->irq = irq;
1177 ret = arch_setup_dmar_msi(irq);
1178 if (ret) {
1179 set_irq_data(irq, NULL);
1180 iommu->irq = 0;
1181 destroy_irq(irq);
1182 return 0;
1185 /* Force fault register is cleared */
1186 iommu_page_fault(irq, iommu);
1188 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1189 if (ret)
1190 printk(KERN_ERR "IOMMU: can't request irq\n");
1191 return ret;
1194 static int iommu_init_domains(struct intel_iommu *iommu)
1196 unsigned long ndomains;
1197 unsigned long nlongs;
1199 ndomains = cap_ndoms(iommu->cap);
1200 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1201 nlongs = BITS_TO_LONGS(ndomains);
1203 /* TBD: there might be 64K domains,
1204 * consider other allocation for future chip
1206 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1207 if (!iommu->domain_ids) {
1208 printk(KERN_ERR "Allocating domain id array failed\n");
1209 return -ENOMEM;
1211 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1212 GFP_KERNEL);
1213 if (!iommu->domains) {
1214 printk(KERN_ERR "Allocating domain array failed\n");
1215 kfree(iommu->domain_ids);
1216 return -ENOMEM;
1219 spin_lock_init(&iommu->lock);
1222 * if Caching mode is set, then invalid translations are tagged
1223 * with domainid 0. Hence we need to pre-allocate it.
1225 if (cap_caching_mode(iommu->cap))
1226 set_bit(0, iommu->domain_ids);
1227 return 0;
1231 static void domain_exit(struct dmar_domain *domain);
1232 static void vm_domain_exit(struct dmar_domain *domain);
1234 void free_dmar_iommu(struct intel_iommu *iommu)
1236 struct dmar_domain *domain;
1237 int i;
1238 unsigned long flags;
1240 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1241 for (; i < cap_ndoms(iommu->cap); ) {
1242 domain = iommu->domains[i];
1243 clear_bit(i, iommu->domain_ids);
1245 spin_lock_irqsave(&domain->iommu_lock, flags);
1246 if (--domain->iommu_count == 0) {
1247 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1248 vm_domain_exit(domain);
1249 else
1250 domain_exit(domain);
1252 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1254 i = find_next_bit(iommu->domain_ids,
1255 cap_ndoms(iommu->cap), i+1);
1258 if (iommu->gcmd & DMA_GCMD_TE)
1259 iommu_disable_translation(iommu);
1261 if (iommu->irq) {
1262 set_irq_data(iommu->irq, NULL);
1263 /* This will mask the irq */
1264 free_irq(iommu->irq, iommu);
1265 destroy_irq(iommu->irq);
1268 kfree(iommu->domains);
1269 kfree(iommu->domain_ids);
1271 g_iommus[iommu->seq_id] = NULL;
1273 /* if all iommus are freed, free g_iommus */
1274 for (i = 0; i < g_num_of_iommus; i++) {
1275 if (g_iommus[i])
1276 break;
1279 if (i == g_num_of_iommus)
1280 kfree(g_iommus);
1282 /* free context mapping */
1283 free_context_table(iommu);
1286 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1288 unsigned long num;
1289 unsigned long ndomains;
1290 struct dmar_domain *domain;
1291 unsigned long flags;
1293 domain = alloc_domain_mem();
1294 if (!domain)
1295 return NULL;
1297 ndomains = cap_ndoms(iommu->cap);
1299 spin_lock_irqsave(&iommu->lock, flags);
1300 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1301 if (num >= ndomains) {
1302 spin_unlock_irqrestore(&iommu->lock, flags);
1303 free_domain_mem(domain);
1304 printk(KERN_ERR "IOMMU: no free domain ids\n");
1305 return NULL;
1308 set_bit(num, iommu->domain_ids);
1309 domain->id = num;
1310 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1311 set_bit(iommu->seq_id, &domain->iommu_bmp);
1312 domain->flags = 0;
1313 iommu->domains[num] = domain;
1314 spin_unlock_irqrestore(&iommu->lock, flags);
1316 return domain;
1319 static void iommu_free_domain(struct dmar_domain *domain)
1321 unsigned long flags;
1322 struct intel_iommu *iommu;
1324 iommu = domain_get_iommu(domain);
1326 spin_lock_irqsave(&iommu->lock, flags);
1327 clear_bit(domain->id, iommu->domain_ids);
1328 spin_unlock_irqrestore(&iommu->lock, flags);
1331 static struct iova_domain reserved_iova_list;
1332 static struct lock_class_key reserved_alloc_key;
1333 static struct lock_class_key reserved_rbtree_key;
1335 static void dmar_init_reserved_ranges(void)
1337 struct pci_dev *pdev = NULL;
1338 struct iova *iova;
1339 int i;
1340 u64 addr, size;
1342 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1344 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1345 &reserved_alloc_key);
1346 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1347 &reserved_rbtree_key);
1349 /* IOAPIC ranges shouldn't be accessed by DMA */
1350 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1351 IOVA_PFN(IOAPIC_RANGE_END));
1352 if (!iova)
1353 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1355 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1356 for_each_pci_dev(pdev) {
1357 struct resource *r;
1359 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1360 r = &pdev->resource[i];
1361 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1362 continue;
1363 addr = r->start;
1364 addr &= PAGE_MASK;
1365 size = r->end - addr;
1366 size = PAGE_ALIGN(size);
1367 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1368 IOVA_PFN(size + addr) - 1);
1369 if (!iova)
1370 printk(KERN_ERR "Reserve iova failed\n");
1376 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1378 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1381 static inline int guestwidth_to_adjustwidth(int gaw)
1383 int agaw;
1384 int r = (gaw - 12) % 9;
1386 if (r == 0)
1387 agaw = gaw;
1388 else
1389 agaw = gaw + 9 - r;
1390 if (agaw > 64)
1391 agaw = 64;
1392 return agaw;
1395 static int domain_init(struct dmar_domain *domain, int guest_width)
1397 struct intel_iommu *iommu;
1398 int adjust_width, agaw;
1399 unsigned long sagaw;
1401 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1402 spin_lock_init(&domain->mapping_lock);
1403 spin_lock_init(&domain->iommu_lock);
1405 domain_reserve_special_ranges(domain);
1407 /* calculate AGAW */
1408 iommu = domain_get_iommu(domain);
1409 if (guest_width > cap_mgaw(iommu->cap))
1410 guest_width = cap_mgaw(iommu->cap);
1411 domain->gaw = guest_width;
1412 adjust_width = guestwidth_to_adjustwidth(guest_width);
1413 agaw = width_to_agaw(adjust_width);
1414 sagaw = cap_sagaw(iommu->cap);
1415 if (!test_bit(agaw, &sagaw)) {
1416 /* hardware doesn't support it, choose a bigger one */
1417 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1418 agaw = find_next_bit(&sagaw, 5, agaw);
1419 if (agaw >= 5)
1420 return -ENODEV;
1422 domain->agaw = agaw;
1423 INIT_LIST_HEAD(&domain->devices);
1425 if (ecap_coherent(iommu->ecap))
1426 domain->iommu_coherency = 1;
1427 else
1428 domain->iommu_coherency = 0;
1430 domain->iommu_count = 1;
1432 /* always allocate the top pgd */
1433 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1434 if (!domain->pgd)
1435 return -ENOMEM;
1436 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1437 return 0;
1440 static void domain_exit(struct dmar_domain *domain)
1442 u64 end;
1444 /* Domain 0 is reserved, so dont process it */
1445 if (!domain)
1446 return;
1448 domain_remove_dev_info(domain);
1449 /* destroy iovas */
1450 put_iova_domain(&domain->iovad);
1451 end = DOMAIN_MAX_ADDR(domain->gaw);
1452 end = end & (~PAGE_MASK);
1454 /* clear ptes */
1455 dma_pte_clear_range(domain, 0, end);
1457 /* free page tables */
1458 dma_pte_free_pagetable(domain, 0, end);
1460 iommu_free_domain(domain);
1461 free_domain_mem(domain);
1464 static int domain_context_mapping_one(struct dmar_domain *domain,
1465 u8 bus, u8 devfn)
1467 struct context_entry *context;
1468 unsigned long flags;
1469 struct intel_iommu *iommu;
1470 struct dma_pte *pgd;
1471 unsigned long num;
1472 unsigned long ndomains;
1473 int id;
1474 int agaw;
1476 pr_debug("Set context mapping for %02x:%02x.%d\n",
1477 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1478 BUG_ON(!domain->pgd);
1480 iommu = device_to_iommu(bus, devfn);
1481 if (!iommu)
1482 return -ENODEV;
1484 context = device_to_context_entry(iommu, bus, devfn);
1485 if (!context)
1486 return -ENOMEM;
1487 spin_lock_irqsave(&iommu->lock, flags);
1488 if (context_present(context)) {
1489 spin_unlock_irqrestore(&iommu->lock, flags);
1490 return 0;
1493 id = domain->id;
1494 pgd = domain->pgd;
1496 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1497 int found = 0;
1499 /* find an available domain id for this device in iommu */
1500 ndomains = cap_ndoms(iommu->cap);
1501 num = find_first_bit(iommu->domain_ids, ndomains);
1502 for (; num < ndomains; ) {
1503 if (iommu->domains[num] == domain) {
1504 id = num;
1505 found = 1;
1506 break;
1508 num = find_next_bit(iommu->domain_ids,
1509 cap_ndoms(iommu->cap), num+1);
1512 if (found == 0) {
1513 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1514 if (num >= ndomains) {
1515 spin_unlock_irqrestore(&iommu->lock, flags);
1516 printk(KERN_ERR "IOMMU: no free domain ids\n");
1517 return -EFAULT;
1520 set_bit(num, iommu->domain_ids);
1521 iommu->domains[num] = domain;
1522 id = num;
1525 /* Skip top levels of page tables for
1526 * iommu which has less agaw than default.
1528 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1529 pgd = phys_to_virt(dma_pte_addr(pgd));
1530 if (!dma_pte_present(pgd)) {
1531 spin_unlock_irqrestore(&iommu->lock, flags);
1532 return -ENOMEM;
1537 context_set_domain_id(context, id);
1538 context_set_address_width(context, iommu->agaw);
1539 context_set_address_root(context, virt_to_phys(pgd));
1540 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1541 context_set_fault_enable(context);
1542 context_set_present(context);
1543 domain_flush_cache(domain, context, sizeof(*context));
1545 /* it's a non-present to present mapping */
1546 if (iommu->flush.flush_context(iommu, domain->id,
1547 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1548 DMA_CCMD_DEVICE_INVL, 1))
1549 iommu_flush_write_buffer(iommu);
1550 else
1551 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1553 spin_unlock_irqrestore(&iommu->lock, flags);
1555 spin_lock_irqsave(&domain->iommu_lock, flags);
1556 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1557 domain->iommu_count++;
1558 domain_update_iommu_coherency(domain);
1560 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1561 return 0;
1564 static int
1565 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1567 int ret;
1568 struct pci_dev *tmp, *parent;
1570 ret = domain_context_mapping_one(domain, pdev->bus->number,
1571 pdev->devfn);
1572 if (ret)
1573 return ret;
1575 /* dependent device mapping */
1576 tmp = pci_find_upstream_pcie_bridge(pdev);
1577 if (!tmp)
1578 return 0;
1579 /* Secondary interface's bus number and devfn 0 */
1580 parent = pdev->bus->self;
1581 while (parent != tmp) {
1582 ret = domain_context_mapping_one(domain, parent->bus->number,
1583 parent->devfn);
1584 if (ret)
1585 return ret;
1586 parent = parent->bus->self;
1588 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1589 return domain_context_mapping_one(domain,
1590 tmp->subordinate->number, 0);
1591 else /* this is a legacy PCI bridge */
1592 return domain_context_mapping_one(domain,
1593 tmp->bus->number, tmp->devfn);
1596 static int domain_context_mapped(struct pci_dev *pdev)
1598 int ret;
1599 struct pci_dev *tmp, *parent;
1600 struct intel_iommu *iommu;
1602 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1603 if (!iommu)
1604 return -ENODEV;
1606 ret = device_context_mapped(iommu,
1607 pdev->bus->number, pdev->devfn);
1608 if (!ret)
1609 return ret;
1610 /* dependent device mapping */
1611 tmp = pci_find_upstream_pcie_bridge(pdev);
1612 if (!tmp)
1613 return ret;
1614 /* Secondary interface's bus number and devfn 0 */
1615 parent = pdev->bus->self;
1616 while (parent != tmp) {
1617 ret = device_context_mapped(iommu, parent->bus->number,
1618 parent->devfn);
1619 if (!ret)
1620 return ret;
1621 parent = parent->bus->self;
1623 if (tmp->is_pcie)
1624 return device_context_mapped(iommu,
1625 tmp->subordinate->number, 0);
1626 else
1627 return device_context_mapped(iommu,
1628 tmp->bus->number, tmp->devfn);
1631 static int
1632 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1633 u64 hpa, size_t size, int prot)
1635 u64 start_pfn, end_pfn;
1636 struct dma_pte *pte;
1637 int index;
1638 int addr_width = agaw_to_width(domain->agaw);
1640 hpa &= (((u64)1) << addr_width) - 1;
1642 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1643 return -EINVAL;
1644 iova &= PAGE_MASK;
1645 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1646 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1647 index = 0;
1648 while (start_pfn < end_pfn) {
1649 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1650 if (!pte)
1651 return -ENOMEM;
1652 /* We don't need lock here, nobody else
1653 * touches the iova range
1655 BUG_ON(dma_pte_addr(pte));
1656 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1657 dma_set_pte_prot(pte, prot);
1658 domain_flush_cache(domain, pte, sizeof(*pte));
1659 start_pfn++;
1660 index++;
1662 return 0;
1665 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1667 if (!iommu)
1668 return;
1670 clear_context_table(iommu, bus, devfn);
1671 iommu->flush.flush_context(iommu, 0, 0, 0,
1672 DMA_CCMD_GLOBAL_INVL, 0);
1673 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1674 DMA_TLB_GLOBAL_FLUSH, 0);
1677 static void domain_remove_dev_info(struct dmar_domain *domain)
1679 struct device_domain_info *info;
1680 unsigned long flags;
1681 struct intel_iommu *iommu;
1683 spin_lock_irqsave(&device_domain_lock, flags);
1684 while (!list_empty(&domain->devices)) {
1685 info = list_entry(domain->devices.next,
1686 struct device_domain_info, link);
1687 list_del(&info->link);
1688 list_del(&info->global);
1689 if (info->dev)
1690 info->dev->dev.archdata.iommu = NULL;
1691 spin_unlock_irqrestore(&device_domain_lock, flags);
1693 iommu = device_to_iommu(info->bus, info->devfn);
1694 iommu_detach_dev(iommu, info->bus, info->devfn);
1695 free_devinfo_mem(info);
1697 spin_lock_irqsave(&device_domain_lock, flags);
1699 spin_unlock_irqrestore(&device_domain_lock, flags);
1703 * find_domain
1704 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1706 static struct dmar_domain *
1707 find_domain(struct pci_dev *pdev)
1709 struct device_domain_info *info;
1711 /* No lock here, assumes no domain exit in normal case */
1712 info = pdev->dev.archdata.iommu;
1713 if (info)
1714 return info->domain;
1715 return NULL;
1718 /* domain is initialized */
1719 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1721 struct dmar_domain *domain, *found = NULL;
1722 struct intel_iommu *iommu;
1723 struct dmar_drhd_unit *drhd;
1724 struct device_domain_info *info, *tmp;
1725 struct pci_dev *dev_tmp;
1726 unsigned long flags;
1727 int bus = 0, devfn = 0;
1729 domain = find_domain(pdev);
1730 if (domain)
1731 return domain;
1733 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1734 if (dev_tmp) {
1735 if (dev_tmp->is_pcie) {
1736 bus = dev_tmp->subordinate->number;
1737 devfn = 0;
1738 } else {
1739 bus = dev_tmp->bus->number;
1740 devfn = dev_tmp->devfn;
1742 spin_lock_irqsave(&device_domain_lock, flags);
1743 list_for_each_entry(info, &device_domain_list, global) {
1744 if (info->bus == bus && info->devfn == devfn) {
1745 found = info->domain;
1746 break;
1749 spin_unlock_irqrestore(&device_domain_lock, flags);
1750 /* pcie-pci bridge already has a domain, uses it */
1751 if (found) {
1752 domain = found;
1753 goto found_domain;
1757 /* Allocate new domain for the device */
1758 drhd = dmar_find_matched_drhd_unit(pdev);
1759 if (!drhd) {
1760 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1761 pci_name(pdev));
1762 return NULL;
1764 iommu = drhd->iommu;
1766 domain = iommu_alloc_domain(iommu);
1767 if (!domain)
1768 goto error;
1770 if (domain_init(domain, gaw)) {
1771 domain_exit(domain);
1772 goto error;
1775 /* register pcie-to-pci device */
1776 if (dev_tmp) {
1777 info = alloc_devinfo_mem();
1778 if (!info) {
1779 domain_exit(domain);
1780 goto error;
1782 info->bus = bus;
1783 info->devfn = devfn;
1784 info->dev = NULL;
1785 info->domain = domain;
1786 /* This domain is shared by devices under p2p bridge */
1787 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1789 /* pcie-to-pci bridge already has a domain, uses it */
1790 found = NULL;
1791 spin_lock_irqsave(&device_domain_lock, flags);
1792 list_for_each_entry(tmp, &device_domain_list, global) {
1793 if (tmp->bus == bus && tmp->devfn == devfn) {
1794 found = tmp->domain;
1795 break;
1798 if (found) {
1799 free_devinfo_mem(info);
1800 domain_exit(domain);
1801 domain = found;
1802 } else {
1803 list_add(&info->link, &domain->devices);
1804 list_add(&info->global, &device_domain_list);
1806 spin_unlock_irqrestore(&device_domain_lock, flags);
1809 found_domain:
1810 info = alloc_devinfo_mem();
1811 if (!info)
1812 goto error;
1813 info->bus = pdev->bus->number;
1814 info->devfn = pdev->devfn;
1815 info->dev = pdev;
1816 info->domain = domain;
1817 spin_lock_irqsave(&device_domain_lock, flags);
1818 /* somebody is fast */
1819 found = find_domain(pdev);
1820 if (found != NULL) {
1821 spin_unlock_irqrestore(&device_domain_lock, flags);
1822 if (found != domain) {
1823 domain_exit(domain);
1824 domain = found;
1826 free_devinfo_mem(info);
1827 return domain;
1829 list_add(&info->link, &domain->devices);
1830 list_add(&info->global, &device_domain_list);
1831 pdev->dev.archdata.iommu = info;
1832 spin_unlock_irqrestore(&device_domain_lock, flags);
1833 return domain;
1834 error:
1835 /* recheck it here, maybe others set it */
1836 return find_domain(pdev);
1839 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1840 unsigned long long start,
1841 unsigned long long end)
1843 struct dmar_domain *domain;
1844 unsigned long size;
1845 unsigned long long base;
1846 int ret;
1848 printk(KERN_INFO
1849 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1850 pci_name(pdev), start, end);
1851 /* page table init */
1852 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1853 if (!domain)
1854 return -ENOMEM;
1856 /* The address might not be aligned */
1857 base = start & PAGE_MASK;
1858 size = end - base;
1859 size = PAGE_ALIGN(size);
1860 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1861 IOVA_PFN(base + size) - 1)) {
1862 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1863 ret = -ENOMEM;
1864 goto error;
1867 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1868 size, base, pci_name(pdev));
1870 * RMRR range might have overlap with physical memory range,
1871 * clear it first
1873 dma_pte_clear_range(domain, base, base + size);
1875 ret = domain_page_mapping(domain, base, base, size,
1876 DMA_PTE_READ|DMA_PTE_WRITE);
1877 if (ret)
1878 goto error;
1880 /* context entry init */
1881 ret = domain_context_mapping(domain, pdev);
1882 if (!ret)
1883 return 0;
1884 error:
1885 domain_exit(domain);
1886 return ret;
1890 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1891 struct pci_dev *pdev)
1893 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1894 return 0;
1895 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1896 rmrr->end_address + 1);
1899 #ifdef CONFIG_DMAR_GFX_WA
1900 struct iommu_prepare_data {
1901 struct pci_dev *pdev;
1902 int ret;
1905 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1906 unsigned long end_pfn, void *datax)
1908 struct iommu_prepare_data *data;
1910 data = (struct iommu_prepare_data *)datax;
1912 data->ret = iommu_prepare_identity_map(data->pdev,
1913 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1914 return data->ret;
1918 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1920 int nid;
1921 struct iommu_prepare_data data;
1923 data.pdev = pdev;
1924 data.ret = 0;
1926 for_each_online_node(nid) {
1927 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1928 if (data.ret)
1929 return data.ret;
1931 return data.ret;
1934 static void __init iommu_prepare_gfx_mapping(void)
1936 struct pci_dev *pdev = NULL;
1937 int ret;
1939 for_each_pci_dev(pdev) {
1940 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1941 !IS_GFX_DEVICE(pdev))
1942 continue;
1943 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1944 pci_name(pdev));
1945 ret = iommu_prepare_with_active_regions(pdev);
1946 if (ret)
1947 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1950 #else /* !CONFIG_DMAR_GFX_WA */
1951 static inline void iommu_prepare_gfx_mapping(void)
1953 return;
1955 #endif
1957 #ifdef CONFIG_DMAR_FLOPPY_WA
1958 static inline void iommu_prepare_isa(void)
1960 struct pci_dev *pdev;
1961 int ret;
1963 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1964 if (!pdev)
1965 return;
1967 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1968 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1970 if (ret)
1971 printk("IOMMU: Failed to create 0-64M identity map, "
1972 "floppy might not work\n");
1975 #else
1976 static inline void iommu_prepare_isa(void)
1978 return;
1980 #endif /* !CONFIG_DMAR_FLPY_WA */
1982 static int __init init_dmars(void)
1984 struct dmar_drhd_unit *drhd;
1985 struct dmar_rmrr_unit *rmrr;
1986 struct pci_dev *pdev;
1987 struct intel_iommu *iommu;
1988 int i, ret, unit = 0;
1991 * for each drhd
1992 * allocate root
1993 * initialize and program root entry to not present
1994 * endfor
1996 for_each_drhd_unit(drhd) {
1997 g_num_of_iommus++;
1999 * lock not needed as this is only incremented in the single
2000 * threaded kernel __init code path all other access are read
2001 * only
2005 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2006 GFP_KERNEL);
2007 if (!g_iommus) {
2008 printk(KERN_ERR "Allocating global iommu array failed\n");
2009 ret = -ENOMEM;
2010 goto error;
2013 deferred_flush = kzalloc(g_num_of_iommus *
2014 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2015 if (!deferred_flush) {
2016 kfree(g_iommus);
2017 ret = -ENOMEM;
2018 goto error;
2021 for_each_drhd_unit(drhd) {
2022 if (drhd->ignored)
2023 continue;
2025 iommu = drhd->iommu;
2026 g_iommus[iommu->seq_id] = iommu;
2028 ret = iommu_init_domains(iommu);
2029 if (ret)
2030 goto error;
2033 * TBD:
2034 * we could share the same root & context tables
2035 * amoung all IOMMU's. Need to Split it later.
2037 ret = iommu_alloc_root_entry(iommu);
2038 if (ret) {
2039 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2040 goto error;
2044 for_each_drhd_unit(drhd) {
2045 if (drhd->ignored)
2046 continue;
2048 iommu = drhd->iommu;
2049 if (dmar_enable_qi(iommu)) {
2051 * Queued Invalidate not enabled, use Register Based
2052 * Invalidate
2054 iommu->flush.flush_context = __iommu_flush_context;
2055 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2056 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
2057 "invalidation\n",
2058 (unsigned long long)drhd->reg_base_addr);
2059 } else {
2060 iommu->flush.flush_context = qi_flush_context;
2061 iommu->flush.flush_iotlb = qi_flush_iotlb;
2062 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
2063 "invalidation\n",
2064 (unsigned long long)drhd->reg_base_addr);
2069 * For each rmrr
2070 * for each dev attached to rmrr
2071 * do
2072 * locate drhd for dev, alloc domain for dev
2073 * allocate free domain
2074 * allocate page table entries for rmrr
2075 * if context not allocated for bus
2076 * allocate and init context
2077 * set present in root table for this bus
2078 * init context with domain, translation etc
2079 * endfor
2080 * endfor
2082 for_each_rmrr_units(rmrr) {
2083 for (i = 0; i < rmrr->devices_cnt; i++) {
2084 pdev = rmrr->devices[i];
2085 /* some BIOS lists non-exist devices in DMAR table */
2086 if (!pdev)
2087 continue;
2088 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2089 if (ret)
2090 printk(KERN_ERR
2091 "IOMMU: mapping reserved region failed\n");
2095 iommu_prepare_gfx_mapping();
2097 iommu_prepare_isa();
2100 * for each drhd
2101 * enable fault log
2102 * global invalidate context cache
2103 * global invalidate iotlb
2104 * enable translation
2106 for_each_drhd_unit(drhd) {
2107 if (drhd->ignored)
2108 continue;
2109 iommu = drhd->iommu;
2110 sprintf (iommu->name, "dmar%d", unit++);
2112 iommu_flush_write_buffer(iommu);
2114 ret = dmar_set_interrupt(iommu);
2115 if (ret)
2116 goto error;
2118 iommu_set_root_entry(iommu);
2120 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2122 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2124 iommu_disable_protect_mem_regions(iommu);
2126 ret = iommu_enable_translation(iommu);
2127 if (ret)
2128 goto error;
2131 return 0;
2132 error:
2133 for_each_drhd_unit(drhd) {
2134 if (drhd->ignored)
2135 continue;
2136 iommu = drhd->iommu;
2137 free_iommu(iommu);
2139 kfree(g_iommus);
2140 return ret;
2143 static inline u64 aligned_size(u64 host_addr, size_t size)
2145 u64 addr;
2146 addr = (host_addr & (~PAGE_MASK)) + size;
2147 return PAGE_ALIGN(addr);
2150 struct iova *
2151 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2153 struct iova *piova;
2155 /* Make sure it's in range */
2156 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2157 if (!size || (IOVA_START_ADDR + size > end))
2158 return NULL;
2160 piova = alloc_iova(&domain->iovad,
2161 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2162 return piova;
2165 static struct iova *
2166 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2167 size_t size, u64 dma_mask)
2169 struct pci_dev *pdev = to_pci_dev(dev);
2170 struct iova *iova = NULL;
2172 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2173 iova = iommu_alloc_iova(domain, size, dma_mask);
2174 else {
2176 * First try to allocate an io virtual address in
2177 * DMA_32BIT_MASK and if that fails then try allocating
2178 * from higher range
2180 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2181 if (!iova)
2182 iova = iommu_alloc_iova(domain, size, dma_mask);
2185 if (!iova) {
2186 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2187 return NULL;
2190 return iova;
2193 static struct dmar_domain *
2194 get_valid_domain_for_dev(struct pci_dev *pdev)
2196 struct dmar_domain *domain;
2197 int ret;
2199 domain = get_domain_for_dev(pdev,
2200 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2201 if (!domain) {
2202 printk(KERN_ERR
2203 "Allocating domain for %s failed", pci_name(pdev));
2204 return NULL;
2207 /* make sure context mapping is ok */
2208 if (unlikely(!domain_context_mapped(pdev))) {
2209 ret = domain_context_mapping(domain, pdev);
2210 if (ret) {
2211 printk(KERN_ERR
2212 "Domain context map for %s failed",
2213 pci_name(pdev));
2214 return NULL;
2218 return domain;
2221 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2222 size_t size, int dir, u64 dma_mask)
2224 struct pci_dev *pdev = to_pci_dev(hwdev);
2225 struct dmar_domain *domain;
2226 phys_addr_t start_paddr;
2227 struct iova *iova;
2228 int prot = 0;
2229 int ret;
2230 struct intel_iommu *iommu;
2232 BUG_ON(dir == DMA_NONE);
2233 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2234 return paddr;
2236 domain = get_valid_domain_for_dev(pdev);
2237 if (!domain)
2238 return 0;
2240 iommu = domain_get_iommu(domain);
2241 size = aligned_size((u64)paddr, size);
2243 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2244 if (!iova)
2245 goto error;
2247 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2250 * Check if DMAR supports zero-length reads on write only
2251 * mappings..
2253 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2254 !cap_zlr(iommu->cap))
2255 prot |= DMA_PTE_READ;
2256 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2257 prot |= DMA_PTE_WRITE;
2259 * paddr - (paddr + size) might be partial page, we should map the whole
2260 * page. Note: if two part of one page are separately mapped, we
2261 * might have two guest_addr mapping to the same host paddr, but this
2262 * is not a big problem
2264 ret = domain_page_mapping(domain, start_paddr,
2265 ((u64)paddr) & PAGE_MASK, size, prot);
2266 if (ret)
2267 goto error;
2269 /* it's a non-present to present mapping */
2270 ret = iommu_flush_iotlb_psi(iommu, domain->id,
2271 start_paddr, size >> VTD_PAGE_SHIFT, 1);
2272 if (ret)
2273 iommu_flush_write_buffer(iommu);
2275 return start_paddr + ((u64)paddr & (~PAGE_MASK));
2277 error:
2278 if (iova)
2279 __free_iova(&domain->iovad, iova);
2280 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2281 pci_name(pdev), size, (unsigned long long)paddr, dir);
2282 return 0;
2285 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2286 size_t size, int dir)
2288 return __intel_map_single(hwdev, paddr, size, dir,
2289 to_pci_dev(hwdev)->dma_mask);
2292 static void flush_unmaps(void)
2294 int i, j;
2296 timer_on = 0;
2298 /* just flush them all */
2299 for (i = 0; i < g_num_of_iommus; i++) {
2300 struct intel_iommu *iommu = g_iommus[i];
2301 if (!iommu)
2302 continue;
2304 if (deferred_flush[i].next) {
2305 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2306 DMA_TLB_GLOBAL_FLUSH, 0);
2307 for (j = 0; j < deferred_flush[i].next; j++) {
2308 __free_iova(&deferred_flush[i].domain[j]->iovad,
2309 deferred_flush[i].iova[j]);
2311 deferred_flush[i].next = 0;
2315 list_size = 0;
2318 static void flush_unmaps_timeout(unsigned long data)
2320 unsigned long flags;
2322 spin_lock_irqsave(&async_umap_flush_lock, flags);
2323 flush_unmaps();
2324 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2327 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2329 unsigned long flags;
2330 int next, iommu_id;
2331 struct intel_iommu *iommu;
2333 spin_lock_irqsave(&async_umap_flush_lock, flags);
2334 if (list_size == HIGH_WATER_MARK)
2335 flush_unmaps();
2337 iommu = domain_get_iommu(dom);
2338 iommu_id = iommu->seq_id;
2340 next = deferred_flush[iommu_id].next;
2341 deferred_flush[iommu_id].domain[next] = dom;
2342 deferred_flush[iommu_id].iova[next] = iova;
2343 deferred_flush[iommu_id].next++;
2345 if (!timer_on) {
2346 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2347 timer_on = 1;
2349 list_size++;
2350 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2353 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2354 int dir)
2356 struct pci_dev *pdev = to_pci_dev(dev);
2357 struct dmar_domain *domain;
2358 unsigned long start_addr;
2359 struct iova *iova;
2360 struct intel_iommu *iommu;
2362 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2363 return;
2364 domain = find_domain(pdev);
2365 BUG_ON(!domain);
2367 iommu = domain_get_iommu(domain);
2369 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2370 if (!iova)
2371 return;
2373 start_addr = iova->pfn_lo << PAGE_SHIFT;
2374 size = aligned_size((u64)dev_addr, size);
2376 pr_debug("Device %s unmapping: %lx@%llx\n",
2377 pci_name(pdev), size, (unsigned long long)start_addr);
2379 /* clear the whole page */
2380 dma_pte_clear_range(domain, start_addr, start_addr + size);
2381 /* free page tables */
2382 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2383 if (intel_iommu_strict) {
2384 if (iommu_flush_iotlb_psi(iommu,
2385 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2386 iommu_flush_write_buffer(iommu);
2387 /* free iova */
2388 __free_iova(&domain->iovad, iova);
2389 } else {
2390 add_unmap(domain, iova);
2392 * queue up the release of the unmap to save the 1/6th of the
2393 * cpu used up by the iotlb flush operation...
2398 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2399 dma_addr_t *dma_handle, gfp_t flags)
2401 void *vaddr;
2402 int order;
2404 size = PAGE_ALIGN(size);
2405 order = get_order(size);
2406 flags &= ~(GFP_DMA | GFP_DMA32);
2408 vaddr = (void *)__get_free_pages(flags, order);
2409 if (!vaddr)
2410 return NULL;
2411 memset(vaddr, 0, size);
2413 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2414 DMA_BIDIRECTIONAL,
2415 hwdev->coherent_dma_mask);
2416 if (*dma_handle)
2417 return vaddr;
2418 free_pages((unsigned long)vaddr, order);
2419 return NULL;
2422 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2423 dma_addr_t dma_handle)
2425 int order;
2427 size = PAGE_ALIGN(size);
2428 order = get_order(size);
2430 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2431 free_pages((unsigned long)vaddr, order);
2434 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2436 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2437 int nelems, int dir)
2439 int i;
2440 struct pci_dev *pdev = to_pci_dev(hwdev);
2441 struct dmar_domain *domain;
2442 unsigned long start_addr;
2443 struct iova *iova;
2444 size_t size = 0;
2445 void *addr;
2446 struct scatterlist *sg;
2447 struct intel_iommu *iommu;
2449 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2450 return;
2452 domain = find_domain(pdev);
2453 BUG_ON(!domain);
2455 iommu = domain_get_iommu(domain);
2457 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2458 if (!iova)
2459 return;
2460 for_each_sg(sglist, sg, nelems, i) {
2461 addr = SG_ENT_VIRT_ADDRESS(sg);
2462 size += aligned_size((u64)addr, sg->length);
2465 start_addr = iova->pfn_lo << PAGE_SHIFT;
2467 /* clear the whole page */
2468 dma_pte_clear_range(domain, start_addr, start_addr + size);
2469 /* free page tables */
2470 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2472 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2473 size >> VTD_PAGE_SHIFT, 0))
2474 iommu_flush_write_buffer(iommu);
2476 /* free iova */
2477 __free_iova(&domain->iovad, iova);
2480 static int intel_nontranslate_map_sg(struct device *hddev,
2481 struct scatterlist *sglist, int nelems, int dir)
2483 int i;
2484 struct scatterlist *sg;
2486 for_each_sg(sglist, sg, nelems, i) {
2487 BUG_ON(!sg_page(sg));
2488 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2489 sg->dma_length = sg->length;
2491 return nelems;
2494 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2495 int dir)
2497 void *addr;
2498 int i;
2499 struct pci_dev *pdev = to_pci_dev(hwdev);
2500 struct dmar_domain *domain;
2501 size_t size = 0;
2502 int prot = 0;
2503 size_t offset = 0;
2504 struct iova *iova = NULL;
2505 int ret;
2506 struct scatterlist *sg;
2507 unsigned long start_addr;
2508 struct intel_iommu *iommu;
2510 BUG_ON(dir == DMA_NONE);
2511 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2512 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2514 domain = get_valid_domain_for_dev(pdev);
2515 if (!domain)
2516 return 0;
2518 iommu = domain_get_iommu(domain);
2520 for_each_sg(sglist, sg, nelems, i) {
2521 addr = SG_ENT_VIRT_ADDRESS(sg);
2522 addr = (void *)virt_to_phys(addr);
2523 size += aligned_size((u64)addr, sg->length);
2526 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2527 if (!iova) {
2528 sglist->dma_length = 0;
2529 return 0;
2533 * Check if DMAR supports zero-length reads on write only
2534 * mappings..
2536 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2537 !cap_zlr(iommu->cap))
2538 prot |= DMA_PTE_READ;
2539 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2540 prot |= DMA_PTE_WRITE;
2542 start_addr = iova->pfn_lo << PAGE_SHIFT;
2543 offset = 0;
2544 for_each_sg(sglist, sg, nelems, i) {
2545 addr = SG_ENT_VIRT_ADDRESS(sg);
2546 addr = (void *)virt_to_phys(addr);
2547 size = aligned_size((u64)addr, sg->length);
2548 ret = domain_page_mapping(domain, start_addr + offset,
2549 ((u64)addr) & PAGE_MASK,
2550 size, prot);
2551 if (ret) {
2552 /* clear the page */
2553 dma_pte_clear_range(domain, start_addr,
2554 start_addr + offset);
2555 /* free page tables */
2556 dma_pte_free_pagetable(domain, start_addr,
2557 start_addr + offset);
2558 /* free iova */
2559 __free_iova(&domain->iovad, iova);
2560 return 0;
2562 sg->dma_address = start_addr + offset +
2563 ((u64)addr & (~PAGE_MASK));
2564 sg->dma_length = sg->length;
2565 offset += size;
2568 /* it's a non-present to present mapping */
2569 if (iommu_flush_iotlb_psi(iommu, domain->id,
2570 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2571 iommu_flush_write_buffer(iommu);
2572 return nelems;
2575 static struct dma_mapping_ops intel_dma_ops = {
2576 .alloc_coherent = intel_alloc_coherent,
2577 .free_coherent = intel_free_coherent,
2578 .map_single = intel_map_single,
2579 .unmap_single = intel_unmap_single,
2580 .map_sg = intel_map_sg,
2581 .unmap_sg = intel_unmap_sg,
2584 static inline int iommu_domain_cache_init(void)
2586 int ret = 0;
2588 iommu_domain_cache = kmem_cache_create("iommu_domain",
2589 sizeof(struct dmar_domain),
2591 SLAB_HWCACHE_ALIGN,
2593 NULL);
2594 if (!iommu_domain_cache) {
2595 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2596 ret = -ENOMEM;
2599 return ret;
2602 static inline int iommu_devinfo_cache_init(void)
2604 int ret = 0;
2606 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2607 sizeof(struct device_domain_info),
2609 SLAB_HWCACHE_ALIGN,
2610 NULL);
2611 if (!iommu_devinfo_cache) {
2612 printk(KERN_ERR "Couldn't create devinfo cache\n");
2613 ret = -ENOMEM;
2616 return ret;
2619 static inline int iommu_iova_cache_init(void)
2621 int ret = 0;
2623 iommu_iova_cache = kmem_cache_create("iommu_iova",
2624 sizeof(struct iova),
2626 SLAB_HWCACHE_ALIGN,
2627 NULL);
2628 if (!iommu_iova_cache) {
2629 printk(KERN_ERR "Couldn't create iova cache\n");
2630 ret = -ENOMEM;
2633 return ret;
2636 static int __init iommu_init_mempool(void)
2638 int ret;
2639 ret = iommu_iova_cache_init();
2640 if (ret)
2641 return ret;
2643 ret = iommu_domain_cache_init();
2644 if (ret)
2645 goto domain_error;
2647 ret = iommu_devinfo_cache_init();
2648 if (!ret)
2649 return ret;
2651 kmem_cache_destroy(iommu_domain_cache);
2652 domain_error:
2653 kmem_cache_destroy(iommu_iova_cache);
2655 return -ENOMEM;
2658 static void __init iommu_exit_mempool(void)
2660 kmem_cache_destroy(iommu_devinfo_cache);
2661 kmem_cache_destroy(iommu_domain_cache);
2662 kmem_cache_destroy(iommu_iova_cache);
2666 static void __init init_no_remapping_devices(void)
2668 struct dmar_drhd_unit *drhd;
2670 for_each_drhd_unit(drhd) {
2671 if (!drhd->include_all) {
2672 int i;
2673 for (i = 0; i < drhd->devices_cnt; i++)
2674 if (drhd->devices[i] != NULL)
2675 break;
2676 /* ignore DMAR unit if no pci devices exist */
2677 if (i == drhd->devices_cnt)
2678 drhd->ignored = 1;
2682 if (dmar_map_gfx)
2683 return;
2685 for_each_drhd_unit(drhd) {
2686 int i;
2687 if (drhd->ignored || drhd->include_all)
2688 continue;
2690 for (i = 0; i < drhd->devices_cnt; i++)
2691 if (drhd->devices[i] &&
2692 !IS_GFX_DEVICE(drhd->devices[i]))
2693 break;
2695 if (i < drhd->devices_cnt)
2696 continue;
2698 /* bypass IOMMU if it is just for gfx devices */
2699 drhd->ignored = 1;
2700 for (i = 0; i < drhd->devices_cnt; i++) {
2701 if (!drhd->devices[i])
2702 continue;
2703 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2708 int __init intel_iommu_init(void)
2710 int ret = 0;
2712 if (dmar_table_init())
2713 return -ENODEV;
2715 if (dmar_dev_scope_init())
2716 return -ENODEV;
2719 * Check the need for DMA-remapping initialization now.
2720 * Above initialization will also be used by Interrupt-remapping.
2722 if (no_iommu || swiotlb || dmar_disabled)
2723 return -ENODEV;
2725 iommu_init_mempool();
2726 dmar_init_reserved_ranges();
2728 init_no_remapping_devices();
2730 ret = init_dmars();
2731 if (ret) {
2732 printk(KERN_ERR "IOMMU: dmar init failed\n");
2733 put_iova_domain(&reserved_iova_list);
2734 iommu_exit_mempool();
2735 return ret;
2737 printk(KERN_INFO
2738 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2740 init_timer(&unmap_timer);
2741 force_iommu = 1;
2742 dma_ops = &intel_dma_ops;
2744 register_iommu(&intel_iommu_ops);
2746 return 0;
2749 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2750 struct pci_dev *pdev)
2752 struct device_domain_info *info;
2753 unsigned long flags;
2755 info = alloc_devinfo_mem();
2756 if (!info)
2757 return -ENOMEM;
2759 info->bus = pdev->bus->number;
2760 info->devfn = pdev->devfn;
2761 info->dev = pdev;
2762 info->domain = domain;
2764 spin_lock_irqsave(&device_domain_lock, flags);
2765 list_add(&info->link, &domain->devices);
2766 list_add(&info->global, &device_domain_list);
2767 pdev->dev.archdata.iommu = info;
2768 spin_unlock_irqrestore(&device_domain_lock, flags);
2770 return 0;
2773 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2774 struct pci_dev *pdev)
2776 struct device_domain_info *info;
2777 struct intel_iommu *iommu;
2778 unsigned long flags;
2779 int found = 0;
2780 struct list_head *entry, *tmp;
2782 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2783 if (!iommu)
2784 return;
2786 spin_lock_irqsave(&device_domain_lock, flags);
2787 list_for_each_safe(entry, tmp, &domain->devices) {
2788 info = list_entry(entry, struct device_domain_info, link);
2789 if (info->bus == pdev->bus->number &&
2790 info->devfn == pdev->devfn) {
2791 list_del(&info->link);
2792 list_del(&info->global);
2793 if (info->dev)
2794 info->dev->dev.archdata.iommu = NULL;
2795 spin_unlock_irqrestore(&device_domain_lock, flags);
2797 iommu_detach_dev(iommu, info->bus, info->devfn);
2798 free_devinfo_mem(info);
2800 spin_lock_irqsave(&device_domain_lock, flags);
2802 if (found)
2803 break;
2804 else
2805 continue;
2808 /* if there is no other devices under the same iommu
2809 * owned by this domain, clear this iommu in iommu_bmp
2810 * update iommu count and coherency
2812 if (device_to_iommu(info->bus, info->devfn) == iommu)
2813 found = 1;
2816 if (found == 0) {
2817 unsigned long tmp_flags;
2818 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2819 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2820 domain->iommu_count--;
2821 domain_update_iommu_coherency(domain);
2822 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2825 spin_unlock_irqrestore(&device_domain_lock, flags);
2828 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2830 struct device_domain_info *info;
2831 struct intel_iommu *iommu;
2832 unsigned long flags1, flags2;
2834 spin_lock_irqsave(&device_domain_lock, flags1);
2835 while (!list_empty(&domain->devices)) {
2836 info = list_entry(domain->devices.next,
2837 struct device_domain_info, link);
2838 list_del(&info->link);
2839 list_del(&info->global);
2840 if (info->dev)
2841 info->dev->dev.archdata.iommu = NULL;
2843 spin_unlock_irqrestore(&device_domain_lock, flags1);
2845 iommu = device_to_iommu(info->bus, info->devfn);
2846 iommu_detach_dev(iommu, info->bus, info->devfn);
2848 /* clear this iommu in iommu_bmp, update iommu count
2849 * and coherency
2851 spin_lock_irqsave(&domain->iommu_lock, flags2);
2852 if (test_and_clear_bit(iommu->seq_id,
2853 &domain->iommu_bmp)) {
2854 domain->iommu_count--;
2855 domain_update_iommu_coherency(domain);
2857 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2859 free_devinfo_mem(info);
2860 spin_lock_irqsave(&device_domain_lock, flags1);
2862 spin_unlock_irqrestore(&device_domain_lock, flags1);
2865 /* domain id for virtual machine, it won't be set in context */
2866 static unsigned long vm_domid;
2868 static int vm_domain_min_agaw(struct dmar_domain *domain)
2870 int i;
2871 int min_agaw = domain->agaw;
2873 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2874 for (; i < g_num_of_iommus; ) {
2875 if (min_agaw > g_iommus[i]->agaw)
2876 min_agaw = g_iommus[i]->agaw;
2878 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2881 return min_agaw;
2884 static struct dmar_domain *iommu_alloc_vm_domain(void)
2886 struct dmar_domain *domain;
2888 domain = alloc_domain_mem();
2889 if (!domain)
2890 return NULL;
2892 domain->id = vm_domid++;
2893 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2894 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2896 return domain;
2899 static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2901 int adjust_width;
2903 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2904 spin_lock_init(&domain->mapping_lock);
2905 spin_lock_init(&domain->iommu_lock);
2907 domain_reserve_special_ranges(domain);
2909 /* calculate AGAW */
2910 domain->gaw = guest_width;
2911 adjust_width = guestwidth_to_adjustwidth(guest_width);
2912 domain->agaw = width_to_agaw(adjust_width);
2914 INIT_LIST_HEAD(&domain->devices);
2916 domain->iommu_count = 0;
2917 domain->iommu_coherency = 0;
2918 domain->max_addr = 0;
2920 /* always allocate the top pgd */
2921 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2922 if (!domain->pgd)
2923 return -ENOMEM;
2924 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2925 return 0;
2928 static void iommu_free_vm_domain(struct dmar_domain *domain)
2930 unsigned long flags;
2931 struct dmar_drhd_unit *drhd;
2932 struct intel_iommu *iommu;
2933 unsigned long i;
2934 unsigned long ndomains;
2936 for_each_drhd_unit(drhd) {
2937 if (drhd->ignored)
2938 continue;
2939 iommu = drhd->iommu;
2941 ndomains = cap_ndoms(iommu->cap);
2942 i = find_first_bit(iommu->domain_ids, ndomains);
2943 for (; i < ndomains; ) {
2944 if (iommu->domains[i] == domain) {
2945 spin_lock_irqsave(&iommu->lock, flags);
2946 clear_bit(i, iommu->domain_ids);
2947 iommu->domains[i] = NULL;
2948 spin_unlock_irqrestore(&iommu->lock, flags);
2949 break;
2951 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2956 static void vm_domain_exit(struct dmar_domain *domain)
2958 u64 end;
2960 /* Domain 0 is reserved, so dont process it */
2961 if (!domain)
2962 return;
2964 vm_domain_remove_all_dev_info(domain);
2965 /* destroy iovas */
2966 put_iova_domain(&domain->iovad);
2967 end = DOMAIN_MAX_ADDR(domain->gaw);
2968 end = end & (~VTD_PAGE_MASK);
2970 /* clear ptes */
2971 dma_pte_clear_range(domain, 0, end);
2973 /* free page tables */
2974 dma_pte_free_pagetable(domain, 0, end);
2976 iommu_free_vm_domain(domain);
2977 free_domain_mem(domain);
2980 static int intel_iommu_domain_init(struct iommu_domain *domain)
2982 struct dmar_domain *dmar_domain;
2984 dmar_domain = iommu_alloc_vm_domain();
2985 if (!dmar_domain) {
2986 printk(KERN_ERR
2987 "intel_iommu_domain_init: dmar_domain == NULL\n");
2988 return -ENOMEM;
2990 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2991 printk(KERN_ERR
2992 "intel_iommu_domain_init() failed\n");
2993 vm_domain_exit(dmar_domain);
2994 return -ENOMEM;
2996 domain->priv = dmar_domain;
2998 return 0;
3001 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3003 struct dmar_domain *dmar_domain = domain->priv;
3005 domain->priv = NULL;
3006 vm_domain_exit(dmar_domain);
3009 static int intel_iommu_attach_device(struct iommu_domain *domain,
3010 struct device *dev)
3012 struct dmar_domain *dmar_domain = domain->priv;
3013 struct pci_dev *pdev = to_pci_dev(dev);
3014 struct intel_iommu *iommu;
3015 int addr_width;
3016 u64 end;
3017 int ret;
3019 /* normally pdev is not mapped */
3020 if (unlikely(domain_context_mapped(pdev))) {
3021 struct dmar_domain *old_domain;
3023 old_domain = find_domain(pdev);
3024 if (old_domain) {
3025 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
3026 vm_domain_remove_one_dev_info(old_domain, pdev);
3027 else
3028 domain_remove_dev_info(old_domain);
3032 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3033 if (!iommu)
3034 return -ENODEV;
3036 /* check if this iommu agaw is sufficient for max mapped address */
3037 addr_width = agaw_to_width(iommu->agaw);
3038 end = DOMAIN_MAX_ADDR(addr_width);
3039 end = end & VTD_PAGE_MASK;
3040 if (end < dmar_domain->max_addr) {
3041 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3042 "sufficient for the mapped address (%llx)\n",
3043 __func__, iommu->agaw, dmar_domain->max_addr);
3044 return -EFAULT;
3047 ret = domain_context_mapping(dmar_domain, pdev);
3048 if (ret)
3049 return ret;
3051 ret = vm_domain_add_dev_info(dmar_domain, pdev);
3052 return ret;
3055 static void intel_iommu_detach_device(struct iommu_domain *domain,
3056 struct device *dev)
3058 struct dmar_domain *dmar_domain = domain->priv;
3059 struct pci_dev *pdev = to_pci_dev(dev);
3061 vm_domain_remove_one_dev_info(dmar_domain, pdev);
3064 static int intel_iommu_map_range(struct iommu_domain *domain,
3065 unsigned long iova, phys_addr_t hpa,
3066 size_t size, int iommu_prot)
3068 struct dmar_domain *dmar_domain = domain->priv;
3069 u64 max_addr;
3070 int addr_width;
3071 int prot = 0;
3072 int ret;
3074 if (iommu_prot & IOMMU_READ)
3075 prot |= DMA_PTE_READ;
3076 if (iommu_prot & IOMMU_WRITE)
3077 prot |= DMA_PTE_WRITE;
3079 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
3080 if (dmar_domain->max_addr < max_addr) {
3081 int min_agaw;
3082 u64 end;
3084 /* check if minimum agaw is sufficient for mapped address */
3085 min_agaw = vm_domain_min_agaw(dmar_domain);
3086 addr_width = agaw_to_width(min_agaw);
3087 end = DOMAIN_MAX_ADDR(addr_width);
3088 end = end & VTD_PAGE_MASK;
3089 if (end < max_addr) {
3090 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3091 "sufficient for the mapped address (%llx)\n",
3092 __func__, min_agaw, max_addr);
3093 return -EFAULT;
3095 dmar_domain->max_addr = max_addr;
3098 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
3099 return ret;
3102 static void intel_iommu_unmap_range(struct iommu_domain *domain,
3103 unsigned long iova, size_t size)
3105 struct dmar_domain *dmar_domain = domain->priv;
3106 dma_addr_t base;
3108 /* The address might not be aligned */
3109 base = iova & VTD_PAGE_MASK;
3110 size = VTD_PAGE_ALIGN(size);
3111 dma_pte_clear_range(dmar_domain, base, base + size);
3113 if (dmar_domain->max_addr == base + size)
3114 dmar_domain->max_addr = base;
3117 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3118 unsigned long iova)
3120 struct dmar_domain *dmar_domain = domain->priv;
3121 struct dma_pte *pte;
3122 u64 phys = 0;
3124 pte = addr_to_dma_pte(dmar_domain, iova);
3125 if (pte)
3126 phys = dma_pte_addr(pte);
3128 return phys;
3131 static struct iommu_ops intel_iommu_ops = {
3132 .domain_init = intel_iommu_domain_init,
3133 .domain_destroy = intel_iommu_domain_destroy,
3134 .attach_dev = intel_iommu_attach_device,
3135 .detach_dev = intel_iommu_detach_device,
3136 .map = intel_iommu_map_range,
3137 .unmap = intel_iommu_unmap_range,
3138 .iova_to_phys = intel_iommu_iova_to_phys,