1 /* pci_sun4v.c: SUN4V specific PCI controller support.
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
6 #include <linux/kernel.h>
7 #include <linux/types.h>
9 #include <linux/init.h>
10 #include <linux/slab.h>
11 #include <linux/interrupt.h>
12 #include <linux/percpu.h>
13 #include <linux/irq.h>
14 #include <linux/msi.h>
16 #include <asm/iommu.h>
19 #include <asm/pstate.h>
20 #include <asm/oplib.h>
21 #include <asm/hypervisor.h>
25 #include "iommu_common.h"
27 #include "pci_sun4v.h"
29 #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
32 struct pci_dev
*pdev
; /* Device mapping is for. */
33 unsigned long prot
; /* IOMMU page protections */
34 unsigned long entry
; /* Index into IOTSB. */
35 u64
*pglist
; /* List of physical pages */
36 unsigned long npages
; /* Number of pages in list. */
39 static DEFINE_PER_CPU(struct iommu_batch
, pci_iommu_batch
);
41 /* Interrupts must be disabled. */
42 static inline void pci_iommu_batch_start(struct pci_dev
*pdev
, unsigned long prot
, unsigned long entry
)
44 struct iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
52 /* Interrupts must be disabled. */
53 static long pci_iommu_batch_flush(struct iommu_batch
*p
)
55 struct pci_pbm_info
*pbm
= p
->pdev
->dev
.archdata
.host_controller
;
56 unsigned long devhandle
= pbm
->devhandle
;
57 unsigned long prot
= p
->prot
;
58 unsigned long entry
= p
->entry
;
59 u64
*pglist
= p
->pglist
;
60 unsigned long npages
= p
->npages
;
65 num
= pci_sun4v_iommu_map(devhandle
, HV_PCI_TSBID(0, entry
),
66 npages
, prot
, __pa(pglist
));
67 if (unlikely(num
< 0)) {
68 if (printk_ratelimit())
69 printk("pci_iommu_batch_flush: IOMMU map of "
70 "[%08lx:%08lx:%lx:%lx:%lx] failed with "
72 devhandle
, HV_PCI_TSBID(0, entry
),
73 npages
, prot
, __pa(pglist
), num
);
88 /* Interrupts must be disabled. */
89 static inline long pci_iommu_batch_add(u64 phys_page
)
91 struct iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
93 BUG_ON(p
->npages
>= PGLIST_NENTS
);
95 p
->pglist
[p
->npages
++] = phys_page
;
96 if (p
->npages
== PGLIST_NENTS
)
97 return pci_iommu_batch_flush(p
);
102 /* Interrupts must be disabled. */
103 static inline long pci_iommu_batch_end(void)
105 struct iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
107 BUG_ON(p
->npages
>= PGLIST_NENTS
);
109 return pci_iommu_batch_flush(p
);
112 static long pci_arena_alloc(struct iommu_arena
*arena
, unsigned long npages
)
114 unsigned long n
, i
, start
, end
, limit
;
117 limit
= arena
->limit
;
122 n
= find_next_zero_bit(arena
->map
, limit
, start
);
124 if (unlikely(end
>= limit
)) {
125 if (likely(pass
< 1)) {
131 /* Scanned the whole thing, give up. */
136 for (i
= n
; i
< end
; i
++) {
137 if (test_bit(i
, arena
->map
)) {
143 for (i
= n
; i
< end
; i
++)
144 __set_bit(i
, arena
->map
);
151 static void pci_arena_free(struct iommu_arena
*arena
, unsigned long base
, unsigned long npages
)
155 for (i
= base
; i
< (base
+ npages
); i
++)
156 __clear_bit(i
, arena
->map
);
159 static void *pci_4v_alloc_consistent(struct pci_dev
*pdev
, size_t size
, dma_addr_t
*dma_addrp
, gfp_t gfp
)
162 unsigned long flags
, order
, first_page
, npages
, n
;
166 size
= IO_PAGE_ALIGN(size
);
167 order
= get_order(size
);
168 if (unlikely(order
>= MAX_ORDER
))
171 npages
= size
>> IO_PAGE_SHIFT
;
173 first_page
= __get_free_pages(gfp
, order
);
174 if (unlikely(first_page
== 0UL))
177 memset((char *)first_page
, 0, PAGE_SIZE
<< order
);
179 iommu
= pdev
->dev
.archdata
.iommu
;
181 spin_lock_irqsave(&iommu
->lock
, flags
);
182 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
183 spin_unlock_irqrestore(&iommu
->lock
, flags
);
185 if (unlikely(entry
< 0L))
186 goto arena_alloc_fail
;
188 *dma_addrp
= (iommu
->page_table_map_base
+
189 (entry
<< IO_PAGE_SHIFT
));
190 ret
= (void *) first_page
;
191 first_page
= __pa(first_page
);
193 local_irq_save(flags
);
195 pci_iommu_batch_start(pdev
,
196 (HV_PCI_MAP_ATTR_READ
|
197 HV_PCI_MAP_ATTR_WRITE
),
200 for (n
= 0; n
< npages
; n
++) {
201 long err
= pci_iommu_batch_add(first_page
+ (n
* PAGE_SIZE
));
202 if (unlikely(err
< 0L))
206 if (unlikely(pci_iommu_batch_end() < 0L))
209 local_irq_restore(flags
);
214 /* Interrupts are disabled. */
215 spin_lock(&iommu
->lock
);
216 pci_arena_free(&iommu
->arena
, entry
, npages
);
217 spin_unlock_irqrestore(&iommu
->lock
, flags
);
220 free_pages(first_page
, order
);
224 static void pci_4v_free_consistent(struct pci_dev
*pdev
, size_t size
, void *cpu
, dma_addr_t dvma
)
226 struct pci_pbm_info
*pbm
;
228 unsigned long flags
, order
, npages
, entry
;
231 npages
= IO_PAGE_ALIGN(size
) >> IO_PAGE_SHIFT
;
232 iommu
= pdev
->dev
.archdata
.iommu
;
233 pbm
= pdev
->dev
.archdata
.host_controller
;
234 devhandle
= pbm
->devhandle
;
235 entry
= ((dvma
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
237 spin_lock_irqsave(&iommu
->lock
, flags
);
239 pci_arena_free(&iommu
->arena
, entry
, npages
);
244 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
248 } while (npages
!= 0);
250 spin_unlock_irqrestore(&iommu
->lock
, flags
);
252 order
= get_order(size
);
254 free_pages((unsigned long)cpu
, order
);
257 static dma_addr_t
pci_4v_map_single(struct pci_dev
*pdev
, void *ptr
, size_t sz
, int direction
)
260 unsigned long flags
, npages
, oaddr
;
261 unsigned long i
, base_paddr
;
266 iommu
= pdev
->dev
.archdata
.iommu
;
268 if (unlikely(direction
== PCI_DMA_NONE
))
271 oaddr
= (unsigned long)ptr
;
272 npages
= IO_PAGE_ALIGN(oaddr
+ sz
) - (oaddr
& IO_PAGE_MASK
);
273 npages
>>= IO_PAGE_SHIFT
;
275 spin_lock_irqsave(&iommu
->lock
, flags
);
276 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
277 spin_unlock_irqrestore(&iommu
->lock
, flags
);
279 if (unlikely(entry
< 0L))
282 bus_addr
= (iommu
->page_table_map_base
+
283 (entry
<< IO_PAGE_SHIFT
));
284 ret
= bus_addr
| (oaddr
& ~IO_PAGE_MASK
);
285 base_paddr
= __pa(oaddr
& IO_PAGE_MASK
);
286 prot
= HV_PCI_MAP_ATTR_READ
;
287 if (direction
!= PCI_DMA_TODEVICE
)
288 prot
|= HV_PCI_MAP_ATTR_WRITE
;
290 local_irq_save(flags
);
292 pci_iommu_batch_start(pdev
, prot
, entry
);
294 for (i
= 0; i
< npages
; i
++, base_paddr
+= IO_PAGE_SIZE
) {
295 long err
= pci_iommu_batch_add(base_paddr
);
296 if (unlikely(err
< 0L))
299 if (unlikely(pci_iommu_batch_end() < 0L))
302 local_irq_restore(flags
);
307 if (printk_ratelimit())
309 return PCI_DMA_ERROR_CODE
;
312 /* Interrupts are disabled. */
313 spin_lock(&iommu
->lock
);
314 pci_arena_free(&iommu
->arena
, entry
, npages
);
315 spin_unlock_irqrestore(&iommu
->lock
, flags
);
317 return PCI_DMA_ERROR_CODE
;
320 static void pci_4v_unmap_single(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
322 struct pci_pbm_info
*pbm
;
324 unsigned long flags
, npages
;
328 if (unlikely(direction
== PCI_DMA_NONE
)) {
329 if (printk_ratelimit())
334 iommu
= pdev
->dev
.archdata
.iommu
;
335 pbm
= pdev
->dev
.archdata
.host_controller
;
336 devhandle
= pbm
->devhandle
;
338 npages
= IO_PAGE_ALIGN(bus_addr
+ sz
) - (bus_addr
& IO_PAGE_MASK
);
339 npages
>>= IO_PAGE_SHIFT
;
340 bus_addr
&= IO_PAGE_MASK
;
342 spin_lock_irqsave(&iommu
->lock
, flags
);
344 entry
= (bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
;
345 pci_arena_free(&iommu
->arena
, entry
, npages
);
350 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
354 } while (npages
!= 0);
356 spin_unlock_irqrestore(&iommu
->lock
, flags
);
359 #define SG_ENT_PHYS_ADDRESS(SG) \
360 (__pa(page_address((SG)->page)) + (SG)->offset)
362 static inline long fill_sg(long entry
, struct pci_dev
*pdev
,
363 struct scatterlist
*sg
,
364 int nused
, int nelems
, unsigned long prot
)
366 struct scatterlist
*dma_sg
= sg
;
367 struct scatterlist
*sg_end
= sg
+ nelems
;
371 local_irq_save(flags
);
373 pci_iommu_batch_start(pdev
, prot
, entry
);
375 for (i
= 0; i
< nused
; i
++) {
376 unsigned long pteval
= ~0UL;
379 dma_npages
= ((dma_sg
->dma_address
& (IO_PAGE_SIZE
- 1UL)) +
381 ((IO_PAGE_SIZE
- 1UL))) >> IO_PAGE_SHIFT
;
383 unsigned long offset
;
386 /* If we are here, we know we have at least one
387 * more page to map. So walk forward until we
388 * hit a page crossing, and begin creating new
389 * mappings from that spot.
394 tmp
= SG_ENT_PHYS_ADDRESS(sg
);
396 if (((tmp
^ pteval
) >> IO_PAGE_SHIFT
) != 0UL) {
397 pteval
= tmp
& IO_PAGE_MASK
;
398 offset
= tmp
& (IO_PAGE_SIZE
- 1UL);
401 if (((tmp
^ (tmp
+ len
- 1UL)) >> IO_PAGE_SHIFT
) != 0UL) {
402 pteval
= (tmp
+ IO_PAGE_SIZE
) & IO_PAGE_MASK
;
404 len
-= (IO_PAGE_SIZE
- (tmp
& (IO_PAGE_SIZE
- 1UL)));
410 pteval
= (pteval
& IOPTE_PAGE
);
414 err
= pci_iommu_batch_add(pteval
);
415 if (unlikely(err
< 0L))
416 goto iommu_map_failed
;
418 pteval
+= IO_PAGE_SIZE
;
419 len
-= (IO_PAGE_SIZE
- offset
);
424 pteval
= (pteval
& IOPTE_PAGE
) + len
;
427 /* Skip over any tail mappings we've fully mapped,
428 * adjusting pteval along the way. Stop when we
429 * detect a page crossing event.
431 while (sg
< sg_end
&&
432 (pteval
<< (64 - IO_PAGE_SHIFT
)) != 0UL &&
433 (pteval
== SG_ENT_PHYS_ADDRESS(sg
)) &&
435 (SG_ENT_PHYS_ADDRESS(sg
) + sg
->length
- 1UL)) >> IO_PAGE_SHIFT
) == 0UL) {
436 pteval
+= sg
->length
;
439 if ((pteval
<< (64 - IO_PAGE_SHIFT
)) == 0UL)
441 } while (dma_npages
!= 0);
445 if (unlikely(pci_iommu_batch_end() < 0L))
446 goto iommu_map_failed
;
448 local_irq_restore(flags
);
452 local_irq_restore(flags
);
456 static int pci_4v_map_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
459 unsigned long flags
, npages
, prot
;
461 struct scatterlist
*sgtmp
;
465 /* Fast path single entry scatterlists. */
467 sglist
->dma_address
=
468 pci_4v_map_single(pdev
,
469 (page_address(sglist
->page
) + sglist
->offset
),
470 sglist
->length
, direction
);
471 if (unlikely(sglist
->dma_address
== PCI_DMA_ERROR_CODE
))
473 sglist
->dma_length
= sglist
->length
;
477 iommu
= pdev
->dev
.archdata
.iommu
;
479 if (unlikely(direction
== PCI_DMA_NONE
))
482 /* Step 1: Prepare scatter list. */
483 npages
= prepare_sg(sglist
, nelems
);
485 /* Step 2: Allocate a cluster and context, if necessary. */
486 spin_lock_irqsave(&iommu
->lock
, flags
);
487 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
488 spin_unlock_irqrestore(&iommu
->lock
, flags
);
490 if (unlikely(entry
< 0L))
493 dma_base
= iommu
->page_table_map_base
+
494 (entry
<< IO_PAGE_SHIFT
);
496 /* Step 3: Normalize DMA addresses. */
500 while (used
&& sgtmp
->dma_length
) {
501 sgtmp
->dma_address
+= dma_base
;
505 used
= nelems
- used
;
507 /* Step 4: Create the mappings. */
508 prot
= HV_PCI_MAP_ATTR_READ
;
509 if (direction
!= PCI_DMA_TODEVICE
)
510 prot
|= HV_PCI_MAP_ATTR_WRITE
;
512 err
= fill_sg(entry
, pdev
, sglist
, used
, nelems
, prot
);
513 if (unlikely(err
< 0L))
514 goto iommu_map_failed
;
519 if (printk_ratelimit())
524 spin_lock_irqsave(&iommu
->lock
, flags
);
525 pci_arena_free(&iommu
->arena
, entry
, npages
);
526 spin_unlock_irqrestore(&iommu
->lock
, flags
);
531 static void pci_4v_unmap_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
533 struct pci_pbm_info
*pbm
;
535 unsigned long flags
, i
, npages
;
537 u32 devhandle
, bus_addr
;
539 if (unlikely(direction
== PCI_DMA_NONE
)) {
540 if (printk_ratelimit())
544 iommu
= pdev
->dev
.archdata
.iommu
;
545 pbm
= pdev
->dev
.archdata
.host_controller
;
546 devhandle
= pbm
->devhandle
;
548 bus_addr
= sglist
->dma_address
& IO_PAGE_MASK
;
550 for (i
= 1; i
< nelems
; i
++)
551 if (sglist
[i
].dma_length
== 0)
554 npages
= (IO_PAGE_ALIGN(sglist
[i
].dma_address
+ sglist
[i
].dma_length
) -
555 bus_addr
) >> IO_PAGE_SHIFT
;
557 entry
= ((bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
559 spin_lock_irqsave(&iommu
->lock
, flags
);
561 pci_arena_free(&iommu
->arena
, entry
, npages
);
566 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
570 } while (npages
!= 0);
572 spin_unlock_irqrestore(&iommu
->lock
, flags
);
575 static void pci_4v_dma_sync_single_for_cpu(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
577 /* Nothing to do... */
580 static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
582 /* Nothing to do... */
585 const struct pci_iommu_ops pci_sun4v_iommu_ops
= {
586 .alloc_consistent
= pci_4v_alloc_consistent
,
587 .free_consistent
= pci_4v_free_consistent
,
588 .map_single
= pci_4v_map_single
,
589 .unmap_single
= pci_4v_unmap_single
,
590 .map_sg
= pci_4v_map_sg
,
591 .unmap_sg
= pci_4v_unmap_sg
,
592 .dma_sync_single_for_cpu
= pci_4v_dma_sync_single_for_cpu
,
593 .dma_sync_sg_for_cpu
= pci_4v_dma_sync_sg_for_cpu
,
596 static void pci_sun4v_scan_bus(struct pci_pbm_info
*pbm
)
598 struct property
*prop
;
599 struct device_node
*dp
;
602 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
603 pbm
->is_66mhz_capable
= (prop
!= NULL
);
604 pbm
->pci_bus
= pci_scan_one_pbm(pbm
);
606 /* XXX register error interrupt handlers XXX */
609 static unsigned long probe_existing_entries(struct pci_pbm_info
*pbm
,
612 struct iommu_arena
*arena
= &iommu
->arena
;
613 unsigned long i
, cnt
= 0;
616 devhandle
= pbm
->devhandle
;
617 for (i
= 0; i
< arena
->limit
; i
++) {
618 unsigned long ret
, io_attrs
, ra
;
620 ret
= pci_sun4v_iommu_getmap(devhandle
,
624 if (page_in_phys_avail(ra
)) {
625 pci_sun4v_iommu_demap(devhandle
,
626 HV_PCI_TSBID(0, i
), 1);
629 __set_bit(i
, arena
->map
);
637 static void pci_sun4v_iommu_init(struct pci_pbm_info
*pbm
)
639 struct iommu
*iommu
= pbm
->iommu
;
640 struct property
*prop
;
641 unsigned long num_tsb_entries
, sz
;
642 u32 vdma
[2], dma_mask
, dma_offset
;
645 prop
= of_find_property(pbm
->prom_node
, "virtual-dma", NULL
);
647 u32
*val
= prop
->value
;
652 /* No property, use default values. */
653 vdma
[0] = 0x80000000;
654 vdma
[1] = 0x80000000;
660 dma_mask
|= 0x1fffffff;
665 dma_mask
|= 0x3fffffff;
670 dma_mask
|= 0x7fffffff;
675 prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
679 tsbsize
*= (8 * 1024);
681 num_tsb_entries
= tsbsize
/ sizeof(iopte_t
);
683 dma_offset
= vdma
[0];
685 /* Setup initial software IOMMU state. */
686 spin_lock_init(&iommu
->lock
);
687 iommu
->ctx_lowest_free
= 1;
688 iommu
->page_table_map_base
= dma_offset
;
689 iommu
->dma_addr_mask
= dma_mask
;
691 /* Allocate and initialize the free area map. */
692 sz
= num_tsb_entries
/ 8;
693 sz
= (sz
+ 7UL) & ~7UL;
694 iommu
->arena
.map
= kzalloc(sz
, GFP_KERNEL
);
695 if (!iommu
->arena
.map
) {
696 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
699 iommu
->arena
.limit
= num_tsb_entries
;
701 sz
= probe_existing_entries(pbm
, iommu
);
703 printk("%s: Imported %lu TSB entries from OBP\n",
707 #ifdef CONFIG_PCI_MSI
708 struct pci_sun4v_msiq_entry
{
710 #define MSIQ_VERSION_MASK 0xffffffff00000000UL
711 #define MSIQ_VERSION_SHIFT 32
712 #define MSIQ_TYPE_MASK 0x00000000000000ffUL
713 #define MSIQ_TYPE_SHIFT 0
714 #define MSIQ_TYPE_NONE 0x00
715 #define MSIQ_TYPE_MSG 0x01
716 #define MSIQ_TYPE_MSI32 0x02
717 #define MSIQ_TYPE_MSI64 0x03
718 #define MSIQ_TYPE_INTX 0x08
719 #define MSIQ_TYPE_NONE2 0xff
724 u64 req_id
; /* bus/device/func */
725 #define MSIQ_REQID_BUS_MASK 0xff00UL
726 #define MSIQ_REQID_BUS_SHIFT 8
727 #define MSIQ_REQID_DEVICE_MASK 0x00f8UL
728 #define MSIQ_REQID_DEVICE_SHIFT 3
729 #define MSIQ_REQID_FUNC_MASK 0x0007UL
730 #define MSIQ_REQID_FUNC_SHIFT 0
734 /* The format of this value is message type dependent.
735 * For MSI bits 15:0 are the data from the MSI packet.
736 * For MSI-X bits 31:0 are the data from the MSI packet.
737 * For MSG, the message code and message routing code where:
738 * bits 39:32 is the bus/device/fn of the msg target-id
739 * bits 18:16 is the message routing code
740 * bits 7:0 is the message code
741 * For INTx the low order 2-bits are:
752 /* For now this just runs as a pre-handler for the real interrupt handler.
753 * So we just walk through the queue and ACK all the entries, update the
754 * head pointer, and return.
756 * In the longer term it would be nice to do something more integrated
757 * wherein we can pass in some of this MSI info to the drivers. This
758 * would be most useful for PCIe fabric error messages, although we could
759 * invoke those directly from the loop here in order to pass the info around.
761 static void pci_sun4v_msi_prehandler(unsigned int ino
, void *data1
, void *data2
)
763 struct pci_pbm_info
*pbm
= data1
;
764 struct pci_sun4v_msiq_entry
*base
, *ep
;
765 unsigned long msiqid
, orig_head
, head
, type
, err
;
767 msiqid
= (unsigned long) data2
;
770 err
= pci_sun4v_msiq_gethead(pbm
->devhandle
, msiqid
, &head
);
774 if (unlikely(head
>= (pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
))))
777 head
/= sizeof(struct pci_sun4v_msiq_entry
);
779 base
= (pbm
->msi_queues
+ ((msiqid
- pbm
->msiq_first
) *
780 (pbm
->msiq_ent_count
*
781 sizeof(struct pci_sun4v_msiq_entry
))));
783 while ((ep
->version_type
& MSIQ_TYPE_MASK
) != 0) {
784 type
= (ep
->version_type
& MSIQ_TYPE_MASK
) >> MSIQ_TYPE_SHIFT
;
785 if (unlikely(type
!= MSIQ_TYPE_MSI32
&&
786 type
!= MSIQ_TYPE_MSI64
))
789 pci_sun4v_msi_setstate(pbm
->devhandle
,
790 ep
->msi_data
/* msi_num */,
793 /* Clear the entry. */
794 ep
->version_type
&= ~MSIQ_TYPE_MASK
;
796 /* Go to next entry in ring. */
798 if (head
>= pbm
->msiq_ent_count
)
803 if (likely(head
!= orig_head
)) {
804 /* ACK entries by updating head pointer. */
805 head
*= sizeof(struct pci_sun4v_msiq_entry
);
806 err
= pci_sun4v_msiq_sethead(pbm
->devhandle
, msiqid
, head
);
813 printk(KERN_EMERG
"MSI: Hypervisor set head gives error %lu\n", err
);
817 printk(KERN_EMERG
"MSI: Hypervisor get head gives error %lu\n", err
);
820 printk(KERN_EMERG
"MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
821 pbm
->devhandle
, msiqid
, head
);
825 printk(KERN_EMERG
"MSI: Hypervisor gives bad offset %lx max(%lx)\n",
826 head
, pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
));
830 printk(KERN_EMERG
"MSI: Entry has bad type %lx\n", type
);
834 static int msi_bitmap_alloc(struct pci_pbm_info
*pbm
)
836 unsigned long size
, bits_per_ulong
;
838 bits_per_ulong
= sizeof(unsigned long) * 8;
839 size
= (pbm
->msi_num
+ (bits_per_ulong
- 1)) & ~(bits_per_ulong
- 1);
841 BUG_ON(size
% sizeof(unsigned long));
843 pbm
->msi_bitmap
= kzalloc(size
, GFP_KERNEL
);
844 if (!pbm
->msi_bitmap
)
850 static void msi_bitmap_free(struct pci_pbm_info
*pbm
)
852 kfree(pbm
->msi_bitmap
);
853 pbm
->msi_bitmap
= NULL
;
856 static int msi_queue_alloc(struct pci_pbm_info
*pbm
)
858 unsigned long q_size
, alloc_size
, pages
, order
;
861 q_size
= pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
);
862 alloc_size
= (pbm
->msiq_num
* q_size
);
863 order
= get_order(alloc_size
);
864 pages
= __get_free_pages(GFP_KERNEL
| __GFP_COMP
, order
);
866 printk(KERN_ERR
"MSI: Cannot allocate MSI queues (o=%lu).\n",
870 memset((char *)pages
, 0, PAGE_SIZE
<< order
);
871 pbm
->msi_queues
= (void *) pages
;
873 for (i
= 0; i
< pbm
->msiq_num
; i
++) {
874 unsigned long err
, base
= __pa(pages
+ (i
* q_size
));
875 unsigned long ret1
, ret2
;
877 err
= pci_sun4v_msiq_conf(pbm
->devhandle
,
879 base
, pbm
->msiq_ent_count
);
881 printk(KERN_ERR
"MSI: msiq register fails (err=%lu)\n",
886 err
= pci_sun4v_msiq_info(pbm
->devhandle
,
890 printk(KERN_ERR
"MSI: Cannot read msiq (err=%lu)\n",
894 if (ret1
!= base
|| ret2
!= pbm
->msiq_ent_count
) {
895 printk(KERN_ERR
"MSI: Bogus qconf "
896 "expected[%lx:%x] got[%lx:%lx]\n",
897 base
, pbm
->msiq_ent_count
,
906 free_pages(pages
, order
);
911 static int alloc_msi(struct pci_pbm_info
*pbm
)
915 for (i
= 0; i
< pbm
->msi_num
; i
++) {
916 if (!test_and_set_bit(i
, pbm
->msi_bitmap
))
917 return i
+ pbm
->msi_first
;
923 static void free_msi(struct pci_pbm_info
*pbm
, int msi_num
)
925 msi_num
-= pbm
->msi_first
;
926 clear_bit(msi_num
, pbm
->msi_bitmap
);
929 static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p
,
930 struct pci_dev
*pdev
,
931 struct msi_desc
*entry
)
933 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
934 unsigned long devino
, msiqid
;
940 msi_num
= alloc_msi(pbm
);
944 devino
= sun4v_build_msi(pbm
->devhandle
, virt_irq_p
,
945 pbm
->msiq_first_devino
,
946 (pbm
->msiq_first_devino
+
952 msiqid
= ((devino
- pbm
->msiq_first_devino
) +
956 if (pci_sun4v_msiq_setstate(pbm
->devhandle
, msiqid
, HV_MSIQSTATE_IDLE
))
960 if (pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_VALID
))
963 if (pci_sun4v_msi_setmsiq(pbm
->devhandle
,
965 (entry
->msi_attrib
.is_64
?
966 HV_MSITYPE_MSI64
: HV_MSITYPE_MSI32
)))
969 if (pci_sun4v_msi_setstate(pbm
->devhandle
, msi_num
, HV_MSISTATE_IDLE
))
972 if (pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_VALID
))
975 pdev
->dev
.archdata
.msi_num
= msi_num
;
977 if (entry
->msi_attrib
.is_64
) {
978 msg
.address_hi
= pbm
->msi64_start
>> 32;
979 msg
.address_lo
= pbm
->msi64_start
& 0xffffffff;
982 msg
.address_lo
= pbm
->msi32_start
;
986 set_irq_msi(*virt_irq_p
, entry
);
987 write_msi_msg(*virt_irq_p
, &msg
);
989 irq_install_pre_handler(*virt_irq_p
,
990 pci_sun4v_msi_prehandler
,
991 pbm
, (void *) msiqid
);
996 free_msi(pbm
, msi_num
);
997 sun4v_destroy_msi(*virt_irq_p
);
1003 static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq
,
1004 struct pci_dev
*pdev
)
1006 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1007 unsigned long msiqid
, err
;
1008 unsigned int msi_num
;
1010 msi_num
= pdev
->dev
.archdata
.msi_num
;
1011 err
= pci_sun4v_msi_getmsiq(pbm
->devhandle
, msi_num
, &msiqid
);
1013 printk(KERN_ERR
"%s: getmsiq gives error %lu\n",
1018 pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_INVALID
);
1019 pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_INVALID
);
1021 free_msi(pbm
, msi_num
);
1023 /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ
1026 sun4v_destroy_msi(virt_irq
);
1029 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1034 val
= of_get_property(pbm
->prom_node
, "#msi-eqs", &len
);
1035 if (!val
|| len
!= 4)
1037 pbm
->msiq_num
= *val
;
1038 if (pbm
->msiq_num
) {
1039 const struct msiq_prop
{
1044 const struct msi_range_prop
{
1048 const struct addr_range_prop
{
1057 val
= of_get_property(pbm
->prom_node
, "msi-eq-size", &len
);
1058 if (!val
|| len
!= 4)
1061 pbm
->msiq_ent_count
= *val
;
1063 mqp
= of_get_property(pbm
->prom_node
,
1064 "msi-eq-to-devino", &len
);
1065 if (!mqp
|| len
!= sizeof(struct msiq_prop
))
1068 pbm
->msiq_first
= mqp
->first_msiq
;
1069 pbm
->msiq_first_devino
= mqp
->first_devino
;
1071 val
= of_get_property(pbm
->prom_node
, "#msi", &len
);
1072 if (!val
|| len
!= 4)
1074 pbm
->msi_num
= *val
;
1076 mrng
= of_get_property(pbm
->prom_node
, "msi-ranges", &len
);
1077 if (!mrng
|| len
!= sizeof(struct msi_range_prop
))
1079 pbm
->msi_first
= mrng
->first_msi
;
1081 val
= of_get_property(pbm
->prom_node
, "msi-data-mask", &len
);
1082 if (!val
|| len
!= 4)
1084 pbm
->msi_data_mask
= *val
;
1086 val
= of_get_property(pbm
->prom_node
, "msix-data-width", &len
);
1087 if (!val
|| len
!= 4)
1089 pbm
->msix_data_width
= *val
;
1091 arng
= of_get_property(pbm
->prom_node
, "msi-address-ranges",
1093 if (!arng
|| len
!= sizeof(struct addr_range_prop
))
1095 pbm
->msi32_start
= ((u64
)arng
->msi32_high
<< 32) |
1096 (u64
) arng
->msi32_low
;
1097 pbm
->msi64_start
= ((u64
)arng
->msi64_high
<< 32) |
1098 (u64
) arng
->msi64_low
;
1099 pbm
->msi32_len
= arng
->msi32_len
;
1100 pbm
->msi64_len
= arng
->msi64_len
;
1102 if (msi_bitmap_alloc(pbm
))
1105 if (msi_queue_alloc(pbm
)) {
1106 msi_bitmap_free(pbm
);
1110 printk(KERN_INFO
"%s: MSI Queue first[%u] num[%u] count[%u] "
1113 pbm
->msiq_first
, pbm
->msiq_num
,
1114 pbm
->msiq_ent_count
,
1115 pbm
->msiq_first_devino
);
1116 printk(KERN_INFO
"%s: MSI first[%u] num[%u] mask[0x%x] "
1119 pbm
->msi_first
, pbm
->msi_num
, pbm
->msi_data_mask
,
1120 pbm
->msix_data_width
);
1121 printk(KERN_INFO
"%s: MSI addr32[0x%lx:0x%x] "
1122 "addr64[0x%lx:0x%x]\n",
1124 pbm
->msi32_start
, pbm
->msi32_len
,
1125 pbm
->msi64_start
, pbm
->msi64_len
);
1126 printk(KERN_INFO
"%s: MSI queues at RA [%p]\n",
1130 pbm
->setup_msi_irq
= pci_sun4v_setup_msi_irq
;
1131 pbm
->teardown_msi_irq
= pci_sun4v_teardown_msi_irq
;
1137 printk(KERN_INFO
"%s: No MSI support.\n", pbm
->name
);
1139 #else /* CONFIG_PCI_MSI */
1140 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1143 #endif /* !(CONFIG_PCI_MSI) */
1145 static void pci_sun4v_pbm_init(struct pci_controller_info
*p
, struct device_node
*dp
, u32 devhandle
)
1147 struct pci_pbm_info
*pbm
;
1149 if (devhandle
& 0x40)
1154 pbm
->next
= pci_pbm_root
;
1157 pbm
->scan_bus
= pci_sun4v_scan_bus
;
1158 pbm
->pci_ops
= &sun4v_pci_ops
;
1159 pbm
->config_space_reg_bits
= 12;
1161 pbm
->index
= pci_num_pbms
++;
1164 pbm
->prom_node
= dp
;
1166 pbm
->devhandle
= devhandle
;
1168 pbm
->name
= dp
->full_name
;
1170 printk("%s: SUN4V PCI Bus Module\n", pbm
->name
);
1172 pci_determine_mem_io_space(pbm
);
1174 pci_get_pbm_props(pbm
);
1175 pci_sun4v_iommu_init(pbm
);
1176 pci_sun4v_msi_init(pbm
);
1179 void sun4v_pci_init(struct device_node
*dp
, char *model_name
)
1181 struct pci_controller_info
*p
;
1182 struct pci_pbm_info
*pbm
;
1183 struct iommu
*iommu
;
1184 struct property
*prop
;
1185 struct linux_prom64_registers
*regs
;
1189 prop
= of_find_property(dp
, "reg", NULL
);
1192 devhandle
= (regs
->phys_addr
>> 32UL) & 0x0fffffff;
1194 for (pbm
= pci_pbm_root
; pbm
; pbm
= pbm
->next
) {
1195 if (pbm
->devhandle
== (devhandle
^ 0x40)) {
1196 pci_sun4v_pbm_init(pbm
->parent
, dp
, devhandle
);
1201 for_each_possible_cpu(i
) {
1202 unsigned long page
= get_zeroed_page(GFP_ATOMIC
);
1205 goto fatal_memory_error
;
1207 per_cpu(pci_iommu_batch
, i
).pglist
= (u64
*) page
;
1210 p
= kzalloc(sizeof(struct pci_controller_info
), GFP_ATOMIC
);
1212 goto fatal_memory_error
;
1214 iommu
= kzalloc(sizeof(struct iommu
), GFP_ATOMIC
);
1216 goto fatal_memory_error
;
1218 p
->pbm_A
.iommu
= iommu
;
1220 iommu
= kzalloc(sizeof(struct iommu
), GFP_ATOMIC
);
1222 goto fatal_memory_error
;
1224 p
->pbm_B
.iommu
= iommu
;
1226 /* Like PSYCHO and SCHIZO we have a 2GB aligned area
1229 pci_memspace_mask
= 0x7fffffffUL
;
1231 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1235 prom_printf("SUN4V_PCI: Fatal memory allocation error.\n");