MOXA linux-2.6.x / linux-2.6.9-uc0 from sdlinux-moxaart.tgz
[linux-2.6.9-moxart.git] / arch / x86_64 / kernel / pci-gart.c
blobca608a2a0aafcc5158b3cc3a900843b5ce3d9d5c
1 /*
2 * Dynamic DMA mapping support for AMD Hammer.
3 *
4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
5 * This allows to use PCI devices that only support 32bit addresses on systems
6 * with more than 4GB.
8 * See Documentation/DMA-mapping.txt for the interface specification.
9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs.
13 #include <linux/config.h>
14 #include <linux/types.h>
15 #include <linux/ctype.h>
16 #include <linux/agp_backend.h>
17 #include <linux/init.h>
18 #include <linux/mm.h>
19 #include <linux/string.h>
20 #include <linux/spinlock.h>
21 #include <linux/pci.h>
22 #include <linux/module.h>
23 #include <linux/topology.h>
24 #include <linux/interrupt.h>
25 #include <asm/atomic.h>
26 #include <asm/io.h>
27 #include <asm/mtrr.h>
28 #include <asm/bitops.h>
29 #include <asm/pgtable.h>
30 #include <asm/proto.h>
31 #include <asm/cacheflush.h>
32 #include <asm/kdebug.h>
34 dma_addr_t bad_dma_address;
36 unsigned long iommu_bus_base; /* GART remapping area (physical) */
37 static unsigned long iommu_size; /* size of remapping area bytes */
38 static unsigned long iommu_pages; /* .. and in pages */
40 u32 *iommu_gatt_base; /* Remapping table */
42 int no_iommu;
43 static int no_agp;
44 #ifdef CONFIG_IOMMU_DEBUG
45 int panic_on_overflow = 1;
46 int force_iommu = 1;
47 #else
48 int panic_on_overflow = 0;
49 int force_iommu = 0;
50 #endif
51 int iommu_merge = 1;
52 int iommu_sac_force = 0;
54 /* If this is disabled the IOMMU will use an optimized flushing strategy
55 of only flushing when an mapping is reused. With it true the GART is flushed
56 for every mapping. Problem is that doing the lazy flush seems to trigger
57 bugs with some popular PCI cards, in particular 3ware (but has been also
58 also seen with Qlogic at least). */
59 int iommu_fullflush = 1;
61 /* This tells the BIO block layer to assume merging. Default to off
62 because we cannot guarantee merging later. */
63 int iommu_bio_merge = 0;
65 #define MAX_NB 8
67 /* Allocation bitmap for the remapping area */
68 static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;
69 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
71 static u32 gart_unmapped_entry;
73 #define GPTE_VALID 1
74 #define GPTE_COHERENT 2
75 #define GPTE_ENCODE(x) \
76 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
77 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
79 #define to_pages(addr,size) \
80 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
82 #define for_all_nb(dev) \
83 dev = NULL; \
84 while ((dev = pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\
85 if (dev->bus->number == 0 && \
86 (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
88 static struct pci_dev *northbridges[MAX_NB];
89 static u32 northbridge_flush_word[MAX_NB];
91 #define EMERGENCY_PAGES 32 /* = 128KB */
93 #ifdef CONFIG_AGP
94 #define AGPEXTERN extern
95 #else
96 #define AGPEXTERN
97 #endif
99 /* backdoor interface to AGP driver */
100 AGPEXTERN int agp_memory_reserved;
101 AGPEXTERN __u32 *agp_gatt_table;
103 static unsigned long next_bit; /* protected by iommu_bitmap_lock */
104 static int need_flush; /* global flush state. set for each gart wrap */
105 static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
106 size_t size, int dir, int do_panic);
108 /* Dummy device used for NULL arguments (normally ISA). Better would
109 be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */
110 static struct device fallback_dev = {
111 .bus_id = "fallback device",
112 .coherent_dma_mask = 0xffffffff,
113 .dma_mask = &fallback_dev.coherent_dma_mask,
116 static unsigned long alloc_iommu(int size)
118 unsigned long offset, flags;
120 spin_lock_irqsave(&iommu_bitmap_lock, flags);
121 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
122 if (offset == -1) {
123 need_flush = 1;
124 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
126 if (offset != -1) {
127 set_bit_string(iommu_gart_bitmap, offset, size);
128 next_bit = offset+size;
129 if (next_bit >= iommu_pages) {
130 next_bit = 0;
131 need_flush = 1;
134 if (iommu_fullflush)
135 need_flush = 1;
136 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
137 return offset;
140 static void free_iommu(unsigned long offset, int size)
142 unsigned long flags;
143 if (size == 1) {
144 clear_bit(offset, iommu_gart_bitmap);
145 return;
147 spin_lock_irqsave(&iommu_bitmap_lock, flags);
148 __clear_bit_string(iommu_gart_bitmap, offset, size);
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
153 * Use global flush state to avoid races with multiple flushers.
155 static void flush_gart(struct device *dev)
157 unsigned long flags;
158 int flushed = 0;
159 int i, max;
161 spin_lock_irqsave(&iommu_bitmap_lock, flags);
162 if (need_flush) {
163 max = 0;
164 for (i = 0; i < MAX_NB; i++) {
165 if (!northbridges[i])
166 continue;
167 pci_write_config_dword(northbridges[i], 0x9c,
168 northbridge_flush_word[i] | 1);
169 flushed++;
170 max = i;
172 for (i = 0; i <= max; i++) {
173 u32 w;
174 if (!northbridges[i])
175 continue;
176 /* Make sure the hardware actually executed the flush. */
177 do {
178 pci_read_config_dword(northbridges[i], 0x9c, &w);
179 } while (w & 1);
181 if (!flushed)
182 printk("nothing to flush?\n");
183 need_flush = 0;
185 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
188 /* Allocate DMA memory on node near device */
189 noinline
190 static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
192 struct page *page;
193 int node;
194 if (dev->bus == &pci_bus_type) {
195 cpumask_t mask;
196 mask = pcibus_to_cpumask(to_pci_dev(dev)->bus->number);
197 node = cpu_to_node(first_cpu(mask));
198 } else
199 node = numa_node_id();
200 page = alloc_pages_node(node, gfp, order);
201 return page ? page_address(page) : NULL;
205 * Allocate memory for a coherent mapping.
207 void *
208 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
209 unsigned gfp)
211 void *memory;
212 unsigned long dma_mask = 0;
213 u64 bus;
215 if (!dev)
216 dev = &fallback_dev;
217 dma_mask = dev->coherent_dma_mask;
218 if (dma_mask == 0)
219 dma_mask = 0xffffffff;
221 /* Kludge to make it bug-to-bug compatible with i386. i386
222 uses the normal dma_mask for alloc_coherent. */
223 dma_mask &= *dev->dma_mask;
225 again:
226 memory = dma_alloc_pages(dev, gfp, get_order(size));
227 if (memory == NULL)
228 return NULL;
231 int high, mmu;
232 bus = virt_to_bus(memory);
233 high = (bus + size) >= dma_mask;
234 mmu = high;
235 if (force_iommu && !(gfp & GFP_DMA))
236 mmu = 1;
237 if (no_iommu || dma_mask < 0xffffffffUL) {
238 if (high) {
239 if (!(gfp & GFP_DMA)) {
240 gfp |= GFP_DMA;
241 goto again;
243 goto free;
245 mmu = 0;
247 memset(memory, 0, size);
248 if (!mmu) {
249 *dma_handle = virt_to_bus(memory);
250 return memory;
254 *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0);
255 if (*dma_handle == bad_dma_address)
256 goto error;
257 flush_gart(dev);
258 return memory;
260 error:
261 if (panic_on_overflow)
262 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size);
263 free:
264 free_pages((unsigned long)memory, get_order(size));
265 /* XXX Could use the swiotlb pool here too */
266 return NULL;
270 * Unmap coherent memory.
271 * The caller must ensure that the device has finished accessing the mapping.
273 void dma_free_coherent(struct device *dev, size_t size,
274 void *vaddr, dma_addr_t bus)
276 dma_unmap_single(dev, bus, size, 0);
277 free_pages((unsigned long)vaddr, get_order(size));
280 #ifdef CONFIG_IOMMU_LEAK
282 #define SET_LEAK(x) if (iommu_leak_tab) \
283 iommu_leak_tab[x] = __builtin_return_address(0);
284 #define CLEAR_LEAK(x) if (iommu_leak_tab) \
285 iommu_leak_tab[x] = NULL;
287 /* Debugging aid for drivers that don't free their IOMMU tables */
288 static void **iommu_leak_tab;
289 static int leak_trace;
290 int iommu_leak_pages = 20;
291 void dump_leak(void)
293 int i;
294 static int dump;
295 if (dump || !iommu_leak_tab) return;
296 dump = 1;
297 show_stack(NULL,NULL);
298 /* Very crude. dump some from the end of the table too */
299 printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
300 for (i = 0; i < iommu_leak_pages; i+=2) {
301 printk("%lu: ", iommu_pages-i);
302 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
303 printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
305 printk("\n");
307 #else
308 #define SET_LEAK(x)
309 #define CLEAR_LEAK(x)
310 #endif
312 static void iommu_full(struct device *dev, size_t size, int dir, int do_panic)
315 * Ran out of IOMMU space for this operation. This is very bad.
316 * Unfortunately the drivers cannot handle this operation properly.
317 * Return some non mapped prereserved space in the aperture and
318 * let the Northbridge deal with it. This will result in garbage
319 * in the IO operation. When the size exceeds the prereserved space
320 * memory corruption will occur or random memory will be DMAed
321 * out. Hopefully no network devices use single mappings that big.
324 printk(KERN_ERR
325 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
326 size, dev->bus_id);
328 if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) {
329 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
330 panic("PCI-DMA: Memory would be corrupted\n");
331 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
332 panic("PCI-DMA: Random memory would be DMAed\n");
335 #ifdef CONFIG_IOMMU_LEAK
336 dump_leak();
337 #endif
340 static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
342 u64 mask = *dev->dma_mask;
343 int high = addr + size >= mask;
344 int mmu = high;
345 if (force_iommu)
346 mmu = 1;
347 if (no_iommu) {
348 if (high)
349 panic("PCI-DMA: high address but no IOMMU.\n");
350 mmu = 0;
352 return mmu;
355 static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
357 u64 mask = *dev->dma_mask;
358 int high = addr + size >= mask;
359 int mmu = high;
360 if (no_iommu) {
361 if (high)
362 panic("PCI-DMA: high address but no IOMMU.\n");
363 mmu = 0;
365 return mmu;
368 /* Map a single continuous physical area into the IOMMU.
369 * Caller needs to check if the iommu is needed and flush.
371 static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
372 size_t size, int dir, int do_panic)
374 unsigned long npages = to_pages(phys_mem, size);
375 unsigned long iommu_page = alloc_iommu(npages);
376 int i;
377 if (iommu_page == -1) {
378 if (!nonforced_iommu(dev, phys_mem, size))
379 return phys_mem;
380 if (panic_on_overflow)
381 panic("dma_map_area overflow %lu bytes\n", size);
382 iommu_full(dev, size, dir, do_panic);
383 return bad_dma_address;
386 for (i = 0; i < npages; i++) {
387 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
388 SET_LEAK(iommu_page + i);
389 phys_mem += PAGE_SIZE;
391 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
394 /* Map a single area into the IOMMU */
395 dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir)
397 unsigned long phys_mem, bus;
399 BUG_ON(dir == DMA_NONE);
401 if (swiotlb)
402 return swiotlb_map_single(dev,addr,size,dir);
403 if (!dev)
404 dev = &fallback_dev;
406 phys_mem = virt_to_phys(addr);
407 if (!need_iommu(dev, phys_mem, size))
408 return phys_mem;
410 bus = dma_map_area(dev, phys_mem, size, dir, 1);
411 flush_gart(dev);
412 return bus;
415 /* Fallback for dma_map_sg in case of overflow */
416 static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
417 int nents, int dir)
419 int i;
421 #ifdef CONFIG_IOMMU_DEBUG
422 printk(KERN_DEBUG "dma_map_sg overflow\n");
423 #endif
425 for (i = 0; i < nents; i++ ) {
426 struct scatterlist *s = &sg[i];
427 unsigned long addr = page_to_phys(s->page) + s->offset;
428 if (nonforced_iommu(dev, addr, s->length)) {
429 addr = dma_map_area(dev, addr, s->length, dir, 0);
430 if (addr == bad_dma_address) {
431 if (i > 0)
432 dma_unmap_sg(dev, sg, i, dir);
433 nents = 0;
434 sg[0].dma_length = 0;
435 break;
438 s->dma_address = addr;
439 s->dma_length = s->length;
441 flush_gart(dev);
442 return nents;
445 /* Map multiple scatterlist entries continuous into the first. */
446 static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
447 struct scatterlist *sout, unsigned long pages)
449 unsigned long iommu_start = alloc_iommu(pages);
450 unsigned long iommu_page = iommu_start;
451 int i;
453 if (iommu_start == -1)
454 return -1;
456 for (i = start; i < stopat; i++) {
457 struct scatterlist *s = &sg[i];
458 unsigned long pages, addr;
459 unsigned long phys_addr = s->dma_address;
461 BUG_ON(i > start && s->offset);
462 if (i == start) {
463 *sout = *s;
464 sout->dma_address = iommu_bus_base;
465 sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
466 sout->dma_length = s->length;
467 } else {
468 sout->dma_length += s->length;
471 addr = phys_addr;
472 pages = to_pages(s->offset, s->length);
473 while (pages--) {
474 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
475 SET_LEAK(iommu_page);
476 addr += PAGE_SIZE;
477 iommu_page++;
480 BUG_ON(iommu_page - iommu_start != pages);
481 return 0;
484 static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
485 struct scatterlist *sout,
486 unsigned long pages, int need)
488 if (!need) {
489 BUG_ON(stopat - start != 1);
490 *sout = sg[start];
491 sout->dma_length = sg[start].length;
492 return 0;
494 return __dma_map_cont(sg, start, stopat, sout, pages);
498 * DMA map all entries in a scatterlist.
499 * Merge chunks that have page aligned sizes into a continuous mapping.
501 int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
503 int i;
504 int out;
505 int start;
506 unsigned long pages = 0;
507 int need = 0, nextneed;
509 BUG_ON(dir == DMA_NONE);
510 if (nents == 0)
511 return 0;
513 if (swiotlb)
514 return swiotlb_map_sg(dev,sg,nents,dir);
515 if (!dev)
516 dev = &fallback_dev;
518 out = 0;
519 start = 0;
520 for (i = 0; i < nents; i++) {
521 struct scatterlist *s = &sg[i];
522 dma_addr_t addr = page_to_phys(s->page) + s->offset;
523 s->dma_address = addr;
524 BUG_ON(s->length == 0);
526 nextneed = need_iommu(dev, addr, s->length);
528 /* Handle the previous not yet processed entries */
529 if (i > start) {
530 struct scatterlist *ps = &sg[i-1];
531 /* Can only merge when the last chunk ends on a page
532 boundary and the new one doesn't have an offset. */
533 if (!iommu_merge || !nextneed || !need || s->offset ||
534 (ps->offset + ps->length) % PAGE_SIZE) {
535 if (dma_map_cont(sg, start, i, sg+out, pages,
536 need) < 0)
537 goto error;
538 out++;
539 pages = 0;
540 start = i;
544 need = nextneed;
545 pages += to_pages(s->offset, s->length);
547 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
548 goto error;
549 out++;
550 flush_gart(dev);
551 if (out < nents)
552 sg[out].dma_length = 0;
553 return out;
555 error:
556 flush_gart(NULL);
557 dma_unmap_sg(dev, sg, nents, dir);
558 /* When it was forced try again unforced */
559 if (force_iommu)
560 return dma_map_sg_nonforce(dev, sg, nents, dir);
561 if (panic_on_overflow)
562 panic("dma_map_sg: overflow on %lu pages\n", pages);
563 iommu_full(dev, pages << PAGE_SHIFT, dir, 0);
564 for (i = 0; i < nents; i++)
565 sg[i].dma_address = bad_dma_address;
566 return 0;
570 * Free a DMA mapping.
572 void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
573 size_t size, int direction)
575 unsigned long iommu_page;
576 int npages;
577 int i;
579 if (swiotlb) {
580 swiotlb_unmap_single(dev,dma_addr,size,direction);
581 return;
584 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
585 dma_addr >= iommu_bus_base + iommu_size)
586 return;
587 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
588 npages = to_pages(dma_addr, size);
589 for (i = 0; i < npages; i++) {
590 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
591 CLEAR_LEAK(iommu_page + i);
593 free_iommu(iommu_page, npages);
597 * Wrapper for pci_unmap_single working with scatterlists.
599 void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
601 int i;
602 if (swiotlb) {
603 swiotlb_unmap_sg(dev,sg,nents,dir);
604 return;
606 for (i = 0; i < nents; i++) {
607 struct scatterlist *s = &sg[i];
608 if (!s->dma_length || !s->length)
609 break;
610 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
614 int dma_supported(struct device *dev, u64 mask)
616 /* Copied from i386. Doesn't make much sense, because it will
617 only work for pci_alloc_coherent.
618 The caller just has to use GFP_DMA in this case. */
619 if (mask < 0x00ffffff)
620 return 0;
622 /* Tell the device to use SAC when IOMMU force is on.
623 This allows the driver to use cheaper accesses in some cases.
625 Problem with this is that if we overflow the IOMMU area
626 and return DAC as fallback address the device may not handle it correctly.
628 As a special case some controllers have a 39bit address mode
629 that is as efficient as 32bit (aic79xx). Don't force SAC for these.
630 Assume all masks <= 40 bits are of this type. Normally this doesn't
631 make any difference, but gives more gentle handling of IOMMU overflow. */
632 if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
633 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
634 return 0;
637 return 1;
640 int dma_get_cache_alignment(void)
642 return boot_cpu_data.x86_clflush_size;
645 EXPORT_SYMBOL(dma_unmap_sg);
646 EXPORT_SYMBOL(dma_map_sg);
647 EXPORT_SYMBOL(dma_map_single);
648 EXPORT_SYMBOL(dma_unmap_single);
649 EXPORT_SYMBOL(dma_supported);
650 EXPORT_SYMBOL(no_iommu);
651 EXPORT_SYMBOL(force_iommu);
652 EXPORT_SYMBOL(bad_dma_address);
653 EXPORT_SYMBOL(iommu_bio_merge);
654 EXPORT_SYMBOL(iommu_sac_force);
655 EXPORT_SYMBOL(dma_get_cache_alignment);
656 EXPORT_SYMBOL(dma_alloc_coherent);
657 EXPORT_SYMBOL(dma_free_coherent);
659 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
661 unsigned long a;
662 if (!iommu_size) {
663 iommu_size = aper_size;
664 if (!no_agp)
665 iommu_size /= 2;
668 a = aper + iommu_size;
669 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
671 if (iommu_size < 64*1024*1024)
672 printk(KERN_WARNING
673 "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
675 return iommu_size;
678 static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
680 unsigned aper_size = 0, aper_base_32;
681 u64 aper_base;
682 unsigned aper_order;
684 pci_read_config_dword(dev, 0x94, &aper_base_32);
685 pci_read_config_dword(dev, 0x90, &aper_order);
686 aper_order = (aper_order >> 1) & 7;
688 aper_base = aper_base_32 & 0x7fff;
689 aper_base <<= 25;
691 aper_size = (32 * 1024 * 1024) << aper_order;
692 if (aper_base + aper_size >= 0xffffffff || !aper_size)
693 aper_base = 0;
695 *size = aper_size;
696 return aper_base;
700 * Private Northbridge GATT initialization in case we cannot use the
701 * AGP driver for some reason.
703 static __init int init_k8_gatt(struct agp_kern_info *info)
705 struct pci_dev *dev;
706 void *gatt;
707 unsigned aper_base, new_aper_base;
708 unsigned aper_size, gatt_size, new_aper_size;
710 aper_size = aper_base = info->aper_size = 0;
711 for_all_nb(dev) {
712 new_aper_base = read_aperture(dev, &new_aper_size);
713 if (!new_aper_base)
714 goto nommu;
716 if (!aper_base) {
717 aper_size = new_aper_size;
718 aper_base = new_aper_base;
720 if (aper_size != new_aper_size || aper_base != new_aper_base)
721 goto nommu;
723 if (!aper_base)
724 goto nommu;
725 info->aper_base = aper_base;
726 info->aper_size = aper_size>>20;
728 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
729 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
730 if (!gatt)
731 panic("Cannot allocate GATT table");
732 memset(gatt, 0, gatt_size);
733 agp_gatt_table = gatt;
735 for_all_nb(dev) {
736 u32 ctl;
737 u32 gatt_reg;
739 gatt_reg = __pa(gatt) >> 12;
740 gatt_reg <<= 4;
741 pci_write_config_dword(dev, 0x98, gatt_reg);
742 pci_read_config_dword(dev, 0x90, &ctl);
744 ctl |= 1;
745 ctl &= ~((1<<4) | (1<<5));
747 pci_write_config_dword(dev, 0x90, ctl);
749 flush_gart(NULL);
751 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
752 return 0;
754 nommu:
755 /* Should not happen anymore */
756 printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
757 KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.");
758 return -1;
761 extern int agp_amd64_init(void);
763 static int __init pci_iommu_init(void)
765 struct agp_kern_info info;
766 unsigned long aper_size;
767 unsigned long iommu_start;
768 struct pci_dev *dev;
769 unsigned long scratch;
770 long i;
772 #ifndef CONFIG_AGP_AMD64
773 no_agp = 1;
774 #else
775 /* Makefile puts PCI initialization via subsys_initcall first. */
776 /* Add other K8 AGP bridge drivers here */
777 no_agp = no_agp ||
778 (agp_amd64_init() < 0) ||
779 (agp_copy_info(&info) < 0);
780 #endif
782 if (swiotlb) {
783 no_iommu = 1;
784 printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
785 return -1;
788 if (no_iommu || (!force_iommu && end_pfn < 0xffffffff>>PAGE_SHIFT) ||
789 !iommu_aperture) {
790 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
791 no_iommu = 1;
792 return -1;
795 if (no_agp) {
796 int err = -1;
797 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
798 no_agp = 1;
799 if (force_iommu || end_pfn >= 0xffffffff>>PAGE_SHIFT)
800 err = init_k8_gatt(&info);
801 if (err < 0) {
802 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
803 no_iommu = 1;
804 return -1;
808 aper_size = info.aper_size * 1024 * 1024;
809 iommu_size = check_iommu_size(info.aper_base, aper_size);
810 iommu_pages = iommu_size >> PAGE_SHIFT;
812 iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
813 get_order(iommu_pages/8));
814 if (!iommu_gart_bitmap)
815 panic("Cannot allocate iommu bitmap\n");
816 memset(iommu_gart_bitmap, 0, iommu_pages/8);
818 #ifdef CONFIG_IOMMU_LEAK
819 if (leak_trace) {
820 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
821 get_order(iommu_pages*sizeof(void *)));
822 if (iommu_leak_tab)
823 memset(iommu_leak_tab, 0, iommu_pages * 8);
824 else
825 printk("PCI-DMA: Cannot allocate leak trace area\n");
827 #endif
830 * Out of IOMMU space handling.
831 * Reserve some invalid pages at the beginning of the GART.
833 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
835 agp_memory_reserved = iommu_size;
836 printk(KERN_INFO
837 "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
838 iommu_size>>20);
840 iommu_start = aper_size - iommu_size;
841 iommu_bus_base = info.aper_base + iommu_start;
842 bad_dma_address = iommu_bus_base;
843 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
846 * Unmap the IOMMU part of the GART. The alias of the page is
847 * always mapped with cache enabled and there is no full cache
848 * coherency across the GART remapping. The unmapping avoids
849 * automatic prefetches from the CPU allocating cache lines in
850 * there. All CPU accesses are done via the direct mapping to
851 * the backing memory. The GART address is only used by PCI
852 * devices.
854 clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
857 * Try to workaround a bug (thanks to BenH)
858 * Set unmapped entries to a scratch page instead of 0.
859 * Any prefetches that hit unmapped entries won't get an bus abort
860 * then.
862 scratch = get_zeroed_page(GFP_KERNEL);
863 if (!scratch)
864 panic("Cannot allocate iommu scratch page");
865 gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
866 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
867 iommu_gatt_base[i] = gart_unmapped_entry;
869 for_all_nb(dev) {
870 u32 flag;
871 int cpu = PCI_SLOT(dev->devfn) - 24;
872 if (cpu >= MAX_NB)
873 continue;
874 northbridges[cpu] = dev;
875 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
876 northbridge_flush_word[cpu] = flag;
879 flush_gart(NULL);
881 return 0;
884 /* Must execute after PCI subsystem */
885 fs_initcall(pci_iommu_init);
887 /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge]
888 [,forcesac][,fullflush][,nomerge][,biomerge]
889 size set size of iommu (in bytes)
890 noagp don't initialize the AGP driver and use full aperture.
891 off don't use the IOMMU
892 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
893 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
894 noforce don't force IOMMU usage. Default.
895 force Force IOMMU.
896 merge Do lazy merging. This may improve performance on some block devices.
897 Implies force (experimental)
898 biomerge Do merging at the BIO layer. This is more efficient than merge,
899 but should be only done with very big IOMMUs. Implies merge,force.
900 nomerge Don't do SG merging.
901 forcesac For SAC mode for masks <40bits (experimental)
902 fullflush Flush IOMMU on each allocation (default)
903 nofullflush Don't use IOMMU fullflush
904 allowed overwrite iommu off workarounds for specific chipsets.
905 soft Use software bounce buffering (default for Intel machines)
906 noaperture Don't touch the aperture for AGP.
908 __init int iommu_setup(char *p)
910 int arg;
912 while (*p) {
913 if (!strncmp(p,"noagp",5))
914 no_agp = 1;
915 if (!strncmp(p,"off",3))
916 no_iommu = 1;
917 if (!strncmp(p,"force",5)) {
918 force_iommu = 1;
919 iommu_aperture_allowed = 1;
921 if (!strncmp(p,"allowed",7))
922 iommu_aperture_allowed = 1;
923 if (!strncmp(p,"noforce",7)) {
924 iommu_merge = 0;
925 force_iommu = 0;
927 if (!strncmp(p, "memaper", 7)) {
928 fallback_aper_force = 1;
929 p += 7;
930 if (*p == '=') {
931 ++p;
932 if (get_option(&p, &arg))
933 fallback_aper_order = arg;
936 if (!strncmp(p, "biomerge",8)) {
937 iommu_bio_merge = 4096;
938 iommu_merge = 1;
939 force_iommu = 1;
941 if (!strncmp(p, "panic",5))
942 panic_on_overflow = 1;
943 if (!strncmp(p, "nopanic",7))
944 panic_on_overflow = 0;
945 if (!strncmp(p, "merge",5)) {
946 iommu_merge = 1;
947 force_iommu = 1;
949 if (!strncmp(p, "nomerge",7))
950 iommu_merge = 0;
951 if (!strncmp(p, "forcesac",8))
952 iommu_sac_force = 1;
953 if (!strncmp(p, "fullflush",8))
954 iommu_fullflush = 1;
955 if (!strncmp(p, "nofullflush",11))
956 iommu_fullflush = 0;
957 if (!strncmp(p, "soft",4))
958 swiotlb = 1;
959 if (!strncmp(p, "noaperture",10))
960 fix_aperture = 0;
961 #ifdef CONFIG_IOMMU_LEAK
962 if (!strncmp(p,"leak",4)) {
963 leak_trace = 1;
964 p += 4;
965 if (*p == '=') ++p;
966 if (isdigit(*p) && get_option(&p, &arg))
967 iommu_leak_pages = arg;
968 } else
969 #endif
970 if (isdigit(*p) && get_option(&p, &arg))
971 iommu_size = arg;
972 p += strcspn(p, ",");
973 if (*p == ',')
974 ++p;
976 return 1;