1 /* pci_sun4v.c: SUN4V specific PCI controller support.
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
6 #include <linux/kernel.h>
7 #include <linux/types.h>
9 #include <linux/init.h>
10 #include <linux/slab.h>
11 #include <linux/interrupt.h>
12 #include <linux/percpu.h>
13 #include <linux/irq.h>
14 #include <linux/msi.h>
17 #include <asm/iommu.h>
20 #include <asm/pstate.h>
21 #include <asm/oplib.h>
22 #include <asm/hypervisor.h>
26 #include "iommu_common.h"
28 #include "pci_sun4v.h"
30 #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
32 struct pci_iommu_batch
{
33 struct pci_dev
*pdev
; /* Device mapping is for. */
34 unsigned long prot
; /* IOMMU page protections */
35 unsigned long entry
; /* Index into IOTSB. */
36 u64
*pglist
; /* List of physical pages */
37 unsigned long npages
; /* Number of pages in list. */
40 static DEFINE_PER_CPU(struct pci_iommu_batch
, pci_iommu_batch
);
42 /* Interrupts must be disabled. */
43 static inline void pci_iommu_batch_start(struct pci_dev
*pdev
, unsigned long prot
, unsigned long entry
)
45 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
53 /* Interrupts must be disabled. */
54 static long pci_iommu_batch_flush(struct pci_iommu_batch
*p
)
56 struct pci_pbm_info
*pbm
= p
->pdev
->dev
.archdata
.host_controller
;
57 unsigned long devhandle
= pbm
->devhandle
;
58 unsigned long prot
= p
->prot
;
59 unsigned long entry
= p
->entry
;
60 u64
*pglist
= p
->pglist
;
61 unsigned long npages
= p
->npages
;
66 num
= pci_sun4v_iommu_map(devhandle
, HV_PCI_TSBID(0, entry
),
67 npages
, prot
, __pa(pglist
));
68 if (unlikely(num
< 0)) {
69 if (printk_ratelimit())
70 printk("pci_iommu_batch_flush: IOMMU map of "
71 "[%08lx:%08lx:%lx:%lx:%lx] failed with "
73 devhandle
, HV_PCI_TSBID(0, entry
),
74 npages
, prot
, __pa(pglist
), num
);
89 /* Interrupts must be disabled. */
90 static inline long pci_iommu_batch_add(u64 phys_page
)
92 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
94 BUG_ON(p
->npages
>= PGLIST_NENTS
);
96 p
->pglist
[p
->npages
++] = phys_page
;
97 if (p
->npages
== PGLIST_NENTS
)
98 return pci_iommu_batch_flush(p
);
103 /* Interrupts must be disabled. */
104 static inline long pci_iommu_batch_end(void)
106 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
108 BUG_ON(p
->npages
>= PGLIST_NENTS
);
110 return pci_iommu_batch_flush(p
);
113 static long pci_arena_alloc(struct pci_iommu_arena
*arena
, unsigned long npages
)
115 unsigned long n
, i
, start
, end
, limit
;
118 limit
= arena
->limit
;
123 n
= find_next_zero_bit(arena
->map
, limit
, start
);
125 if (unlikely(end
>= limit
)) {
126 if (likely(pass
< 1)) {
132 /* Scanned the whole thing, give up. */
137 for (i
= n
; i
< end
; i
++) {
138 if (test_bit(i
, arena
->map
)) {
144 for (i
= n
; i
< end
; i
++)
145 __set_bit(i
, arena
->map
);
152 static void pci_arena_free(struct pci_iommu_arena
*arena
, unsigned long base
, unsigned long npages
)
156 for (i
= base
; i
< (base
+ npages
); i
++)
157 __clear_bit(i
, arena
->map
);
160 static void *pci_4v_alloc_consistent(struct pci_dev
*pdev
, size_t size
, dma_addr_t
*dma_addrp
, gfp_t gfp
)
162 struct pci_iommu
*iommu
;
163 unsigned long flags
, order
, first_page
, npages
, n
;
167 size
= IO_PAGE_ALIGN(size
);
168 order
= get_order(size
);
169 if (unlikely(order
>= MAX_ORDER
))
172 npages
= size
>> IO_PAGE_SHIFT
;
174 first_page
= __get_free_pages(gfp
, order
);
175 if (unlikely(first_page
== 0UL))
178 memset((char *)first_page
, 0, PAGE_SIZE
<< order
);
180 iommu
= pdev
->dev
.archdata
.iommu
;
182 spin_lock_irqsave(&iommu
->lock
, flags
);
183 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
184 spin_unlock_irqrestore(&iommu
->lock
, flags
);
186 if (unlikely(entry
< 0L))
187 goto arena_alloc_fail
;
189 *dma_addrp
= (iommu
->page_table_map_base
+
190 (entry
<< IO_PAGE_SHIFT
));
191 ret
= (void *) first_page
;
192 first_page
= __pa(first_page
);
194 local_irq_save(flags
);
196 pci_iommu_batch_start(pdev
,
197 (HV_PCI_MAP_ATTR_READ
|
198 HV_PCI_MAP_ATTR_WRITE
),
201 for (n
= 0; n
< npages
; n
++) {
202 long err
= pci_iommu_batch_add(first_page
+ (n
* PAGE_SIZE
));
203 if (unlikely(err
< 0L))
207 if (unlikely(pci_iommu_batch_end() < 0L))
210 local_irq_restore(flags
);
215 /* Interrupts are disabled. */
216 spin_lock(&iommu
->lock
);
217 pci_arena_free(&iommu
->arena
, entry
, npages
);
218 spin_unlock_irqrestore(&iommu
->lock
, flags
);
221 free_pages(first_page
, order
);
225 static void pci_4v_free_consistent(struct pci_dev
*pdev
, size_t size
, void *cpu
, dma_addr_t dvma
)
227 struct pci_pbm_info
*pbm
;
228 struct pci_iommu
*iommu
;
229 unsigned long flags
, order
, npages
, entry
;
232 npages
= IO_PAGE_ALIGN(size
) >> IO_PAGE_SHIFT
;
233 iommu
= pdev
->dev
.archdata
.iommu
;
234 pbm
= pdev
->dev
.archdata
.host_controller
;
235 devhandle
= pbm
->devhandle
;
236 entry
= ((dvma
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
238 spin_lock_irqsave(&iommu
->lock
, flags
);
240 pci_arena_free(&iommu
->arena
, entry
, npages
);
245 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
249 } while (npages
!= 0);
251 spin_unlock_irqrestore(&iommu
->lock
, flags
);
253 order
= get_order(size
);
255 free_pages((unsigned long)cpu
, order
);
258 static dma_addr_t
pci_4v_map_single(struct pci_dev
*pdev
, void *ptr
, size_t sz
, int direction
)
260 struct pci_iommu
*iommu
;
261 unsigned long flags
, npages
, oaddr
;
262 unsigned long i
, base_paddr
;
267 iommu
= pdev
->dev
.archdata
.iommu
;
269 if (unlikely(direction
== PCI_DMA_NONE
))
272 oaddr
= (unsigned long)ptr
;
273 npages
= IO_PAGE_ALIGN(oaddr
+ sz
) - (oaddr
& IO_PAGE_MASK
);
274 npages
>>= IO_PAGE_SHIFT
;
276 spin_lock_irqsave(&iommu
->lock
, flags
);
277 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
278 spin_unlock_irqrestore(&iommu
->lock
, flags
);
280 if (unlikely(entry
< 0L))
283 bus_addr
= (iommu
->page_table_map_base
+
284 (entry
<< IO_PAGE_SHIFT
));
285 ret
= bus_addr
| (oaddr
& ~IO_PAGE_MASK
);
286 base_paddr
= __pa(oaddr
& IO_PAGE_MASK
);
287 prot
= HV_PCI_MAP_ATTR_READ
;
288 if (direction
!= PCI_DMA_TODEVICE
)
289 prot
|= HV_PCI_MAP_ATTR_WRITE
;
291 local_irq_save(flags
);
293 pci_iommu_batch_start(pdev
, prot
, entry
);
295 for (i
= 0; i
< npages
; i
++, base_paddr
+= IO_PAGE_SIZE
) {
296 long err
= pci_iommu_batch_add(base_paddr
);
297 if (unlikely(err
< 0L))
300 if (unlikely(pci_iommu_batch_end() < 0L))
303 local_irq_restore(flags
);
308 if (printk_ratelimit())
310 return PCI_DMA_ERROR_CODE
;
313 /* Interrupts are disabled. */
314 spin_lock(&iommu
->lock
);
315 pci_arena_free(&iommu
->arena
, entry
, npages
);
316 spin_unlock_irqrestore(&iommu
->lock
, flags
);
318 return PCI_DMA_ERROR_CODE
;
321 static void pci_4v_unmap_single(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
323 struct pci_pbm_info
*pbm
;
324 struct pci_iommu
*iommu
;
325 unsigned long flags
, npages
;
329 if (unlikely(direction
== PCI_DMA_NONE
)) {
330 if (printk_ratelimit())
335 iommu
= pdev
->dev
.archdata
.iommu
;
336 pbm
= pdev
->dev
.archdata
.host_controller
;
337 devhandle
= pbm
->devhandle
;
339 npages
= IO_PAGE_ALIGN(bus_addr
+ sz
) - (bus_addr
& IO_PAGE_MASK
);
340 npages
>>= IO_PAGE_SHIFT
;
341 bus_addr
&= IO_PAGE_MASK
;
343 spin_lock_irqsave(&iommu
->lock
, flags
);
345 entry
= (bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
;
346 pci_arena_free(&iommu
->arena
, entry
, npages
);
351 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
355 } while (npages
!= 0);
357 spin_unlock_irqrestore(&iommu
->lock
, flags
);
360 #define SG_ENT_PHYS_ADDRESS(SG) \
361 (__pa(page_address((SG)->page)) + (SG)->offset)
363 static inline long fill_sg(long entry
, struct pci_dev
*pdev
,
364 struct scatterlist
*sg
,
365 int nused
, int nelems
, unsigned long prot
)
367 struct scatterlist
*dma_sg
= sg
;
368 struct scatterlist
*sg_end
= sg
+ nelems
;
372 local_irq_save(flags
);
374 pci_iommu_batch_start(pdev
, prot
, entry
);
376 for (i
= 0; i
< nused
; i
++) {
377 unsigned long pteval
= ~0UL;
380 dma_npages
= ((dma_sg
->dma_address
& (IO_PAGE_SIZE
- 1UL)) +
382 ((IO_PAGE_SIZE
- 1UL))) >> IO_PAGE_SHIFT
;
384 unsigned long offset
;
387 /* If we are here, we know we have at least one
388 * more page to map. So walk forward until we
389 * hit a page crossing, and begin creating new
390 * mappings from that spot.
395 tmp
= SG_ENT_PHYS_ADDRESS(sg
);
397 if (((tmp
^ pteval
) >> IO_PAGE_SHIFT
) != 0UL) {
398 pteval
= tmp
& IO_PAGE_MASK
;
399 offset
= tmp
& (IO_PAGE_SIZE
- 1UL);
402 if (((tmp
^ (tmp
+ len
- 1UL)) >> IO_PAGE_SHIFT
) != 0UL) {
403 pteval
= (tmp
+ IO_PAGE_SIZE
) & IO_PAGE_MASK
;
405 len
-= (IO_PAGE_SIZE
- (tmp
& (IO_PAGE_SIZE
- 1UL)));
411 pteval
= (pteval
& IOPTE_PAGE
);
415 err
= pci_iommu_batch_add(pteval
);
416 if (unlikely(err
< 0L))
417 goto iommu_map_failed
;
419 pteval
+= IO_PAGE_SIZE
;
420 len
-= (IO_PAGE_SIZE
- offset
);
425 pteval
= (pteval
& IOPTE_PAGE
) + len
;
428 /* Skip over any tail mappings we've fully mapped,
429 * adjusting pteval along the way. Stop when we
430 * detect a page crossing event.
432 while (sg
< sg_end
&&
433 (pteval
<< (64 - IO_PAGE_SHIFT
)) != 0UL &&
434 (pteval
== SG_ENT_PHYS_ADDRESS(sg
)) &&
436 (SG_ENT_PHYS_ADDRESS(sg
) + sg
->length
- 1UL)) >> IO_PAGE_SHIFT
) == 0UL) {
437 pteval
+= sg
->length
;
440 if ((pteval
<< (64 - IO_PAGE_SHIFT
)) == 0UL)
442 } while (dma_npages
!= 0);
446 if (unlikely(pci_iommu_batch_end() < 0L))
447 goto iommu_map_failed
;
449 local_irq_restore(flags
);
453 local_irq_restore(flags
);
457 static int pci_4v_map_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
459 struct pci_iommu
*iommu
;
460 unsigned long flags
, npages
, prot
;
462 struct scatterlist
*sgtmp
;
466 /* Fast path single entry scatterlists. */
468 sglist
->dma_address
=
469 pci_4v_map_single(pdev
,
470 (page_address(sglist
->page
) + sglist
->offset
),
471 sglist
->length
, direction
);
472 if (unlikely(sglist
->dma_address
== PCI_DMA_ERROR_CODE
))
474 sglist
->dma_length
= sglist
->length
;
478 iommu
= pdev
->dev
.archdata
.iommu
;
480 if (unlikely(direction
== PCI_DMA_NONE
))
483 /* Step 1: Prepare scatter list. */
484 npages
= prepare_sg(sglist
, nelems
);
486 /* Step 2: Allocate a cluster and context, if necessary. */
487 spin_lock_irqsave(&iommu
->lock
, flags
);
488 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
489 spin_unlock_irqrestore(&iommu
->lock
, flags
);
491 if (unlikely(entry
< 0L))
494 dma_base
= iommu
->page_table_map_base
+
495 (entry
<< IO_PAGE_SHIFT
);
497 /* Step 3: Normalize DMA addresses. */
501 while (used
&& sgtmp
->dma_length
) {
502 sgtmp
->dma_address
+= dma_base
;
506 used
= nelems
- used
;
508 /* Step 4: Create the mappings. */
509 prot
= HV_PCI_MAP_ATTR_READ
;
510 if (direction
!= PCI_DMA_TODEVICE
)
511 prot
|= HV_PCI_MAP_ATTR_WRITE
;
513 err
= fill_sg(entry
, pdev
, sglist
, used
, nelems
, prot
);
514 if (unlikely(err
< 0L))
515 goto iommu_map_failed
;
520 if (printk_ratelimit())
525 spin_lock_irqsave(&iommu
->lock
, flags
);
526 pci_arena_free(&iommu
->arena
, entry
, npages
);
527 spin_unlock_irqrestore(&iommu
->lock
, flags
);
532 static void pci_4v_unmap_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
534 struct pci_pbm_info
*pbm
;
535 struct pci_iommu
*iommu
;
536 unsigned long flags
, i
, npages
;
538 u32 devhandle
, bus_addr
;
540 if (unlikely(direction
== PCI_DMA_NONE
)) {
541 if (printk_ratelimit())
545 iommu
= pdev
->dev
.archdata
.iommu
;
546 pbm
= pdev
->dev
.archdata
.host_controller
;
547 devhandle
= pbm
->devhandle
;
549 bus_addr
= sglist
->dma_address
& IO_PAGE_MASK
;
551 for (i
= 1; i
< nelems
; i
++)
552 if (sglist
[i
].dma_length
== 0)
555 npages
= (IO_PAGE_ALIGN(sglist
[i
].dma_address
+ sglist
[i
].dma_length
) -
556 bus_addr
) >> IO_PAGE_SHIFT
;
558 entry
= ((bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
560 spin_lock_irqsave(&iommu
->lock
, flags
);
562 pci_arena_free(&iommu
->arena
, entry
, npages
);
567 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
571 } while (npages
!= 0);
573 spin_unlock_irqrestore(&iommu
->lock
, flags
);
576 static void pci_4v_dma_sync_single_for_cpu(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
578 /* Nothing to do... */
581 static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
583 /* Nothing to do... */
586 struct pci_iommu_ops pci_sun4v_iommu_ops
= {
587 .alloc_consistent
= pci_4v_alloc_consistent
,
588 .free_consistent
= pci_4v_free_consistent
,
589 .map_single
= pci_4v_map_single
,
590 .unmap_single
= pci_4v_unmap_single
,
591 .map_sg
= pci_4v_map_sg
,
592 .unmap_sg
= pci_4v_unmap_sg
,
593 .dma_sync_single_for_cpu
= pci_4v_dma_sync_single_for_cpu
,
594 .dma_sync_sg_for_cpu
= pci_4v_dma_sync_sg_for_cpu
,
597 static inline int pci_sun4v_out_of_range(struct pci_pbm_info
*pbm
, unsigned int bus
, unsigned int device
, unsigned int func
)
599 if (bus
< pbm
->pci_first_busno
||
600 bus
> pbm
->pci_last_busno
)
605 static int pci_sun4v_read_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
606 int where
, int size
, u32
*value
)
608 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
609 u32 devhandle
= pbm
->devhandle
;
610 unsigned int bus
= bus_dev
->number
;
611 unsigned int device
= PCI_SLOT(devfn
);
612 unsigned int func
= PCI_FUNC(devfn
);
615 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
618 ret
= pci_sun4v_config_get(devhandle
,
619 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
622 printk("rcfg: [%x:%x:%x:%d]=[%lx]\n",
623 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
632 *value
= ret
& 0xffff;
635 *value
= ret
& 0xffffffff;
640 return PCIBIOS_SUCCESSFUL
;
643 static int pci_sun4v_write_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
644 int where
, int size
, u32 value
)
646 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
647 u32 devhandle
= pbm
->devhandle
;
648 unsigned int bus
= bus_dev
->number
;
649 unsigned int device
= PCI_SLOT(devfn
);
650 unsigned int func
= PCI_FUNC(devfn
);
653 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
656 ret
= pci_sun4v_config_put(devhandle
,
657 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
660 printk("wcfg: [%x:%x:%x:%d] v[%x] == [%lx]\n",
661 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
662 where
, size
, value
, ret
);
665 return PCIBIOS_SUCCESSFUL
;
668 static struct pci_ops pci_sun4v_ops
= {
669 .read
= pci_sun4v_read_pci_cfg
,
670 .write
= pci_sun4v_write_pci_cfg
,
674 static void pbm_scan_bus(struct pci_controller_info
*p
,
675 struct pci_pbm_info
*pbm
)
677 pbm
->pci_bus
= pci_scan_one_pbm(pbm
);
680 static void pci_sun4v_scan_bus(struct pci_controller_info
*p
)
682 struct property
*prop
;
683 struct device_node
*dp
;
685 if ((dp
= p
->pbm_A
.prom_node
) != NULL
) {
686 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
687 p
->pbm_A
.is_66mhz_capable
= (prop
!= NULL
);
689 pbm_scan_bus(p
, &p
->pbm_A
);
691 if ((dp
= p
->pbm_B
.prom_node
) != NULL
) {
692 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
693 p
->pbm_B
.is_66mhz_capable
= (prop
!= NULL
);
695 pbm_scan_bus(p
, &p
->pbm_B
);
698 /* XXX register error interrupt handlers XXX */
701 static unsigned long probe_existing_entries(struct pci_pbm_info
*pbm
,
702 struct pci_iommu
*iommu
)
704 struct pci_iommu_arena
*arena
= &iommu
->arena
;
705 unsigned long i
, cnt
= 0;
708 devhandle
= pbm
->devhandle
;
709 for (i
= 0; i
< arena
->limit
; i
++) {
710 unsigned long ret
, io_attrs
, ra
;
712 ret
= pci_sun4v_iommu_getmap(devhandle
,
716 if (page_in_phys_avail(ra
)) {
717 pci_sun4v_iommu_demap(devhandle
,
718 HV_PCI_TSBID(0, i
), 1);
721 __set_bit(i
, arena
->map
);
729 static void pci_sun4v_iommu_init(struct pci_pbm_info
*pbm
)
731 struct pci_iommu
*iommu
= pbm
->iommu
;
732 struct property
*prop
;
733 unsigned long num_tsb_entries
, sz
;
734 u32 vdma
[2], dma_mask
, dma_offset
;
737 prop
= of_find_property(pbm
->prom_node
, "virtual-dma", NULL
);
739 u32
*val
= prop
->value
;
744 /* No property, use default values. */
745 vdma
[0] = 0x80000000;
746 vdma
[1] = 0x80000000;
752 dma_mask
|= 0x1fffffff;
757 dma_mask
|= 0x3fffffff;
762 dma_mask
|= 0x7fffffff;
767 prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
771 tsbsize
*= (8 * 1024);
773 num_tsb_entries
= tsbsize
/ sizeof(iopte_t
);
775 dma_offset
= vdma
[0];
777 /* Setup initial software IOMMU state. */
778 spin_lock_init(&iommu
->lock
);
779 iommu
->ctx_lowest_free
= 1;
780 iommu
->page_table_map_base
= dma_offset
;
781 iommu
->dma_addr_mask
= dma_mask
;
783 /* Allocate and initialize the free area map. */
784 sz
= num_tsb_entries
/ 8;
785 sz
= (sz
+ 7UL) & ~7UL;
786 iommu
->arena
.map
= kzalloc(sz
, GFP_KERNEL
);
787 if (!iommu
->arena
.map
) {
788 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
791 iommu
->arena
.limit
= num_tsb_entries
;
793 sz
= probe_existing_entries(pbm
, iommu
);
795 printk("%s: Imported %lu TSB entries from OBP\n",
799 static void pci_sun4v_get_bus_range(struct pci_pbm_info
*pbm
)
801 struct property
*prop
;
802 unsigned int *busrange
;
804 prop
= of_find_property(pbm
->prom_node
, "bus-range", NULL
);
806 busrange
= prop
->value
;
808 pbm
->pci_first_busno
= busrange
[0];
809 pbm
->pci_last_busno
= busrange
[1];
813 #ifdef CONFIG_PCI_MSI
814 struct pci_sun4v_msiq_entry
{
816 #define MSIQ_VERSION_MASK 0xffffffff00000000UL
817 #define MSIQ_VERSION_SHIFT 32
818 #define MSIQ_TYPE_MASK 0x00000000000000ffUL
819 #define MSIQ_TYPE_SHIFT 0
820 #define MSIQ_TYPE_NONE 0x00
821 #define MSIQ_TYPE_MSG 0x01
822 #define MSIQ_TYPE_MSI32 0x02
823 #define MSIQ_TYPE_MSI64 0x03
824 #define MSIQ_TYPE_INTX 0x08
825 #define MSIQ_TYPE_NONE2 0xff
830 u64 req_id
; /* bus/device/func */
831 #define MSIQ_REQID_BUS_MASK 0xff00UL
832 #define MSIQ_REQID_BUS_SHIFT 8
833 #define MSIQ_REQID_DEVICE_MASK 0x00f8UL
834 #define MSIQ_REQID_DEVICE_SHIFT 3
835 #define MSIQ_REQID_FUNC_MASK 0x0007UL
836 #define MSIQ_REQID_FUNC_SHIFT 0
840 /* The format of this value is message type dependant.
841 * For MSI bits 15:0 are the data from the MSI packet.
842 * For MSI-X bits 31:0 are the data from the MSI packet.
843 * For MSG, the message code and message routing code where:
844 * bits 39:32 is the bus/device/fn of the msg target-id
845 * bits 18:16 is the message routing code
846 * bits 7:0 is the message code
847 * For INTx the low order 2-bits are:
858 /* For now this just runs as a pre-handler for the real interrupt handler.
859 * So we just walk through the queue and ACK all the entries, update the
860 * head pointer, and return.
862 * In the longer term it would be nice to do something more integrated
863 * wherein we can pass in some of this MSI info to the drivers. This
864 * would be most useful for PCIe fabric error messages, although we could
865 * invoke those directly from the loop here in order to pass the info around.
867 static void pci_sun4v_msi_prehandler(unsigned int ino
, void *data1
, void *data2
)
869 struct pci_pbm_info
*pbm
= data1
;
870 struct pci_sun4v_msiq_entry
*base
, *ep
;
871 unsigned long msiqid
, orig_head
, head
, type
, err
;
873 msiqid
= (unsigned long) data2
;
876 err
= pci_sun4v_msiq_gethead(pbm
->devhandle
, msiqid
, &head
);
880 if (unlikely(head
>= (pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
))))
883 head
/= sizeof(struct pci_sun4v_msiq_entry
);
885 base
= (pbm
->msi_queues
+ ((msiqid
- pbm
->msiq_first
) *
886 (pbm
->msiq_ent_count
*
887 sizeof(struct pci_sun4v_msiq_entry
))));
889 while ((ep
->version_type
& MSIQ_TYPE_MASK
) != 0) {
890 type
= (ep
->version_type
& MSIQ_TYPE_MASK
) >> MSIQ_TYPE_SHIFT
;
891 if (unlikely(type
!= MSIQ_TYPE_MSI32
&&
892 type
!= MSIQ_TYPE_MSI64
))
895 pci_sun4v_msi_setstate(pbm
->devhandle
,
896 ep
->msi_data
/* msi_num */,
899 /* Clear the entry. */
900 ep
->version_type
&= ~MSIQ_TYPE_MASK
;
902 /* Go to next entry in ring. */
904 if (head
>= pbm
->msiq_ent_count
)
909 if (likely(head
!= orig_head
)) {
910 /* ACK entries by updating head pointer. */
911 head
*= sizeof(struct pci_sun4v_msiq_entry
);
912 err
= pci_sun4v_msiq_sethead(pbm
->devhandle
, msiqid
, head
);
919 printk(KERN_EMERG
"MSI: Hypervisor set head gives error %lu\n", err
);
923 printk(KERN_EMERG
"MSI: Hypervisor get head gives error %lu\n", err
);
926 printk(KERN_EMERG
"MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
927 pbm
->devhandle
, msiqid
, head
);
931 printk(KERN_EMERG
"MSI: Hypervisor gives bad offset %lx max(%lx)\n",
932 head
, pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
));
936 printk(KERN_EMERG
"MSI: Entry has bad type %lx\n", type
);
940 static int msi_bitmap_alloc(struct pci_pbm_info
*pbm
)
942 unsigned long size
, bits_per_ulong
;
944 bits_per_ulong
= sizeof(unsigned long) * 8;
945 size
= (pbm
->msi_num
+ (bits_per_ulong
- 1)) & ~(bits_per_ulong
- 1);
947 BUG_ON(size
% sizeof(unsigned long));
949 pbm
->msi_bitmap
= kzalloc(size
, GFP_KERNEL
);
950 if (!pbm
->msi_bitmap
)
956 static void msi_bitmap_free(struct pci_pbm_info
*pbm
)
958 kfree(pbm
->msi_bitmap
);
959 pbm
->msi_bitmap
= NULL
;
962 static int msi_queue_alloc(struct pci_pbm_info
*pbm
)
964 unsigned long q_size
, alloc_size
, pages
, order
;
967 q_size
= pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
);
968 alloc_size
= (pbm
->msiq_num
* q_size
);
969 order
= get_order(alloc_size
);
970 pages
= __get_free_pages(GFP_KERNEL
| __GFP_COMP
, order
);
972 printk(KERN_ERR
"MSI: Cannot allocate MSI queues (o=%lu).\n",
976 memset((char *)pages
, 0, PAGE_SIZE
<< order
);
977 pbm
->msi_queues
= (void *) pages
;
979 for (i
= 0; i
< pbm
->msiq_num
; i
++) {
980 unsigned long err
, base
= __pa(pages
+ (i
* q_size
));
981 unsigned long ret1
, ret2
;
983 err
= pci_sun4v_msiq_conf(pbm
->devhandle
,
985 base
, pbm
->msiq_ent_count
);
987 printk(KERN_ERR
"MSI: msiq register fails (err=%lu)\n",
992 err
= pci_sun4v_msiq_info(pbm
->devhandle
,
996 printk(KERN_ERR
"MSI: Cannot read msiq (err=%lu)\n",
1000 if (ret1
!= base
|| ret2
!= pbm
->msiq_ent_count
) {
1001 printk(KERN_ERR
"MSI: Bogus qconf "
1002 "expected[%lx:%x] got[%lx:%lx]\n",
1003 base
, pbm
->msiq_ent_count
,
1012 free_pages(pages
, order
);
1016 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1021 val
= of_get_property(pbm
->prom_node
, "#msi-eqs", &len
);
1022 if (!val
|| len
!= 4)
1024 pbm
->msiq_num
= *val
;
1025 if (pbm
->msiq_num
) {
1026 const struct msiq_prop
{
1031 const struct msi_range_prop
{
1035 const struct addr_range_prop
{
1044 val
= of_get_property(pbm
->prom_node
, "msi-eq-size", &len
);
1045 if (!val
|| len
!= 4)
1048 pbm
->msiq_ent_count
= *val
;
1050 mqp
= of_get_property(pbm
->prom_node
,
1051 "msi-eq-to-devino", &len
);
1052 if (!mqp
|| len
!= sizeof(struct msiq_prop
))
1055 pbm
->msiq_first
= mqp
->first_msiq
;
1056 pbm
->msiq_first_devino
= mqp
->first_devino
;
1058 val
= of_get_property(pbm
->prom_node
, "#msi", &len
);
1059 if (!val
|| len
!= 4)
1061 pbm
->msi_num
= *val
;
1063 mrng
= of_get_property(pbm
->prom_node
, "msi-ranges", &len
);
1064 if (!mrng
|| len
!= sizeof(struct msi_range_prop
))
1066 pbm
->msi_first
= mrng
->first_msi
;
1068 val
= of_get_property(pbm
->prom_node
, "msi-data-mask", &len
);
1069 if (!val
|| len
!= 4)
1071 pbm
->msi_data_mask
= *val
;
1073 val
= of_get_property(pbm
->prom_node
, "msix-data-width", &len
);
1074 if (!val
|| len
!= 4)
1076 pbm
->msix_data_width
= *val
;
1078 arng
= of_get_property(pbm
->prom_node
, "msi-address-ranges",
1080 if (!arng
|| len
!= sizeof(struct addr_range_prop
))
1082 pbm
->msi32_start
= ((u64
)arng
->msi32_high
<< 32) |
1083 (u64
) arng
->msi32_low
;
1084 pbm
->msi64_start
= ((u64
)arng
->msi64_high
<< 32) |
1085 (u64
) arng
->msi64_low
;
1086 pbm
->msi32_len
= arng
->msi32_len
;
1087 pbm
->msi64_len
= arng
->msi64_len
;
1089 if (msi_bitmap_alloc(pbm
))
1092 if (msi_queue_alloc(pbm
)) {
1093 msi_bitmap_free(pbm
);
1097 printk(KERN_INFO
"%s: MSI Queue first[%u] num[%u] count[%u] "
1100 pbm
->msiq_first
, pbm
->msiq_num
,
1101 pbm
->msiq_ent_count
,
1102 pbm
->msiq_first_devino
);
1103 printk(KERN_INFO
"%s: MSI first[%u] num[%u] mask[0x%x] "
1106 pbm
->msi_first
, pbm
->msi_num
, pbm
->msi_data_mask
,
1107 pbm
->msix_data_width
);
1108 printk(KERN_INFO
"%s: MSI addr32[0x%lx:0x%x] "
1109 "addr64[0x%lx:0x%x]\n",
1111 pbm
->msi32_start
, pbm
->msi32_len
,
1112 pbm
->msi64_start
, pbm
->msi64_len
);
1113 printk(KERN_INFO
"%s: MSI queues at RA [%p]\n",
1122 printk(KERN_INFO
"%s: No MSI support.\n", pbm
->name
);
1125 static int alloc_msi(struct pci_pbm_info
*pbm
)
1129 for (i
= 0; i
< pbm
->msi_num
; i
++) {
1130 if (!test_and_set_bit(i
, pbm
->msi_bitmap
))
1131 return i
+ pbm
->msi_first
;
1137 static void free_msi(struct pci_pbm_info
*pbm
, int msi_num
)
1139 msi_num
-= pbm
->msi_first
;
1140 clear_bit(msi_num
, pbm
->msi_bitmap
);
1143 static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p
,
1144 struct pci_dev
*pdev
,
1145 struct msi_desc
*entry
)
1147 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1148 unsigned long devino
, msiqid
;
1154 msi_num
= alloc_msi(pbm
);
1158 devino
= sun4v_build_msi(pbm
->devhandle
, virt_irq_p
,
1159 pbm
->msiq_first_devino
,
1160 (pbm
->msiq_first_devino
+
1166 set_irq_msi(*virt_irq_p
, entry
);
1168 msiqid
= ((devino
- pbm
->msiq_first_devino
) +
1172 if (pci_sun4v_msiq_setstate(pbm
->devhandle
, msiqid
, HV_MSIQSTATE_IDLE
))
1176 if (pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_VALID
))
1179 if (pci_sun4v_msi_setmsiq(pbm
->devhandle
,
1181 (entry
->msi_attrib
.is_64
?
1182 HV_MSITYPE_MSI64
: HV_MSITYPE_MSI32
)))
1185 if (pci_sun4v_msi_setstate(pbm
->devhandle
, msi_num
, HV_MSISTATE_IDLE
))
1188 if (pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_VALID
))
1191 pdev
->dev
.archdata
.msi_num
= msi_num
;
1193 if (entry
->msi_attrib
.is_64
) {
1194 msg
.address_hi
= pbm
->msi64_start
>> 32;
1195 msg
.address_lo
= pbm
->msi64_start
& 0xffffffff;
1198 msg
.address_lo
= pbm
->msi32_start
;
1201 write_msi_msg(*virt_irq_p
, &msg
);
1203 irq_install_pre_handler(*virt_irq_p
,
1204 pci_sun4v_msi_prehandler
,
1205 pbm
, (void *) msiqid
);
1210 free_msi(pbm
, msi_num
);
1211 sun4v_destroy_msi(*virt_irq_p
);
1217 static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq
,
1218 struct pci_dev
*pdev
)
1220 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1221 unsigned long msiqid
, err
;
1222 unsigned int msi_num
;
1224 msi_num
= pdev
->dev
.archdata
.msi_num
;
1225 err
= pci_sun4v_msi_getmsiq(pbm
->devhandle
, msi_num
, &msiqid
);
1227 printk(KERN_ERR
"%s: getmsiq gives error %lu\n",
1232 pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_INVALID
);
1233 pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_INVALID
);
1235 free_msi(pbm
, msi_num
);
1237 /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ
1240 sun4v_destroy_msi(virt_irq
);
1242 #else /* CONFIG_PCI_MSI */
1243 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1246 #endif /* !(CONFIG_PCI_MSI) */
1248 static void pci_sun4v_pbm_init(struct pci_controller_info
*p
, struct device_node
*dp
, u32 devhandle
)
1250 struct pci_pbm_info
*pbm
;
1252 if (devhandle
& 0x40)
1258 pbm
->prom_node
= dp
;
1259 pbm
->pci_first_slot
= 1;
1261 pbm
->devhandle
= devhandle
;
1263 pbm
->name
= dp
->full_name
;
1265 printk("%s: SUN4V PCI Bus Module\n", pbm
->name
);
1267 pci_determine_mem_io_space(pbm
);
1269 pci_sun4v_get_bus_range(pbm
);
1270 pci_sun4v_iommu_init(pbm
);
1271 pci_sun4v_msi_init(pbm
);
1274 void sun4v_pci_init(struct device_node
*dp
, char *model_name
)
1276 struct pci_controller_info
*p
;
1277 struct pci_iommu
*iommu
;
1278 struct property
*prop
;
1279 struct linux_prom64_registers
*regs
;
1283 prop
= of_find_property(dp
, "reg", NULL
);
1286 devhandle
= (regs
->phys_addr
>> 32UL) & 0x0fffffff;
1288 for (p
= pci_controller_root
; p
; p
= p
->next
) {
1289 struct pci_pbm_info
*pbm
;
1291 if (p
->pbm_A
.prom_node
&& p
->pbm_B
.prom_node
)
1294 pbm
= (p
->pbm_A
.prom_node
?
1298 if (pbm
->devhandle
== (devhandle
^ 0x40)) {
1299 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1304 for_each_possible_cpu(i
) {
1305 unsigned long page
= get_zeroed_page(GFP_ATOMIC
);
1308 goto fatal_memory_error
;
1310 per_cpu(pci_iommu_batch
, i
).pglist
= (u64
*) page
;
1313 p
= kzalloc(sizeof(struct pci_controller_info
), GFP_ATOMIC
);
1315 goto fatal_memory_error
;
1317 iommu
= kzalloc(sizeof(struct pci_iommu
), GFP_ATOMIC
);
1319 goto fatal_memory_error
;
1321 p
->pbm_A
.iommu
= iommu
;
1323 iommu
= kzalloc(sizeof(struct pci_iommu
), GFP_ATOMIC
);
1325 goto fatal_memory_error
;
1327 p
->pbm_B
.iommu
= iommu
;
1329 p
->next
= pci_controller_root
;
1330 pci_controller_root
= p
;
1332 p
->index
= pci_num_controllers
++;
1333 p
->pbms_same_domain
= 0;
1335 p
->scan_bus
= pci_sun4v_scan_bus
;
1336 #ifdef CONFIG_PCI_MSI
1337 p
->setup_msi_irq
= pci_sun4v_setup_msi_irq
;
1338 p
->teardown_msi_irq
= pci_sun4v_teardown_msi_irq
;
1340 p
->pci_ops
= &pci_sun4v_ops
;
1342 /* Like PSYCHO and SCHIZO we have a 2GB aligned area
1345 pci_memspace_mask
= 0x7fffffffUL
;
1347 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1351 prom_printf("SUN4V_PCI: Fatal memory allocation error.\n");