2 * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/types.h>
34 #include <linux/device.h>
35 #include <linux/dmapool.h>
36 #include <linux/slab.h>
37 #include <linux/list.h>
38 #include <linux/highmem.h>
40 #include <linux/uio.h>
41 #include <linux/rbtree.h>
42 #include <linux/spinlock.h>
43 #include <linux/delay.h>
45 #include "ipath_kernel.h"
46 #include "ipath_user_sdma.h"
48 /* minimum size of header */
49 #define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64
50 /* expected size of headers (for dma_pool) */
51 #define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64
52 /* length mask in PBC (lower 11 bits) */
53 #define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1)
55 struct ipath_user_sdma_pkt
{
56 u8 naddr
; /* dimension of addr (1..3) ... */
57 u32 counter
; /* sdma pkts queued counter for this entry */
58 u64 added
; /* global descq number of entries */
61 u32 offset
; /* offset for kvaddr, addr */
62 u32 length
; /* length in page */
63 u8 put_page
; /* should we put_page? */
64 u8 dma_mapped
; /* is page dma_mapped? */
65 struct page
*page
; /* may be NULL (coherent mem) */
66 void *kvaddr
; /* FIXME: only for pio hack */
68 } addr
[4]; /* max pages, any more and we coalesce */
69 struct list_head list
; /* list element */
72 struct ipath_user_sdma_queue
{
74 * pkts sent to dma engine are queued on this
75 * list head. the type of the elements of this
76 * list are struct ipath_user_sdma_pkt...
78 struct list_head sent
;
80 /* headers with expected length are allocated from here... */
81 char header_cache_name
[64];
82 struct dma_pool
*header_cache
;
84 /* packets are allocated from the slab cache... */
85 char pkt_slab_name
[64];
86 struct kmem_cache
*pkt_slab
;
88 /* as packets go on the queued queue, they are counted... */
93 struct rb_root dma_pages_root
;
95 /* protect everything above... */
99 struct ipath_user_sdma_queue
*
100 ipath_user_sdma_queue_create(struct device
*dev
, int unit
, int port
, int sport
)
102 struct ipath_user_sdma_queue
*pq
=
103 kmalloc(sizeof(struct ipath_user_sdma_queue
), GFP_KERNEL
);
109 pq
->sent_counter
= 0;
110 INIT_LIST_HEAD(&pq
->sent
);
112 mutex_init(&pq
->lock
);
114 snprintf(pq
->pkt_slab_name
, sizeof(pq
->pkt_slab_name
),
115 "ipath-user-sdma-pkts-%u-%02u.%02u", unit
, port
, sport
);
116 pq
->pkt_slab
= kmem_cache_create(pq
->pkt_slab_name
,
117 sizeof(struct ipath_user_sdma_pkt
),
123 snprintf(pq
->header_cache_name
, sizeof(pq
->header_cache_name
),
124 "ipath-user-sdma-headers-%u-%02u.%02u", unit
, port
, sport
);
125 pq
->header_cache
= dma_pool_create(pq
->header_cache_name
,
127 IPATH_USER_SDMA_EXP_HEADER_LENGTH
,
129 if (!pq
->header_cache
)
132 pq
->dma_pages_root
= RB_ROOT
;
137 kmem_cache_destroy(pq
->pkt_slab
);
146 static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt
*pkt
,
147 int i
, size_t offset
, size_t len
,
148 int put_page
, int dma_mapped
,
150 void *kvaddr
, dma_addr_t dma_addr
)
152 pkt
->addr
[i
].offset
= offset
;
153 pkt
->addr
[i
].length
= len
;
154 pkt
->addr
[i
].put_page
= put_page
;
155 pkt
->addr
[i
].dma_mapped
= dma_mapped
;
156 pkt
->addr
[i
].page
= page
;
157 pkt
->addr
[i
].kvaddr
= kvaddr
;
158 pkt
->addr
[i
].addr
= dma_addr
;
161 static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt
*pkt
,
162 u32 counter
, size_t offset
,
163 size_t len
, int dma_mapped
,
165 void *kvaddr
, dma_addr_t dma_addr
)
168 pkt
->counter
= counter
;
169 ipath_user_sdma_init_frag(pkt
, 0, offset
, len
, 0, dma_mapped
, page
,
173 /* we've too many pages in the iovec, coalesce to a single page */
174 static int ipath_user_sdma_coalesce(const struct ipath_devdata
*dd
,
175 struct ipath_user_sdma_pkt
*pkt
,
176 const struct iovec
*iov
,
177 unsigned long niov
) {
179 struct page
*page
= alloc_page(GFP_KERNEL
);
193 for (i
= 0; i
< niov
; i
++) {
196 cfur
= copy_from_user(mpage
,
197 iov
[i
].iov_base
, iov
[i
].iov_len
);
203 mpage
+= iov
[i
].iov_len
;
204 len
+= iov
[i
].iov_len
;
207 dma_addr
= dma_map_page(&dd
->pcidev
->dev
, page
, 0, len
,
209 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
214 ipath_user_sdma_init_frag(pkt
, 1, 0, len
, 0, 1, page
, mpage_save
,
227 /* how many pages in this iovec element? */
228 static int ipath_user_sdma_num_pages(const struct iovec
*iov
)
230 const unsigned long addr
= (unsigned long) iov
->iov_base
;
231 const unsigned long len
= iov
->iov_len
;
232 const unsigned long spage
= addr
& PAGE_MASK
;
233 const unsigned long epage
= (addr
+ len
- 1) & PAGE_MASK
;
235 return 1 + ((epage
- spage
) >> PAGE_SHIFT
);
238 /* truncate length to page boundry */
239 static int ipath_user_sdma_page_length(unsigned long addr
, unsigned long len
)
241 const unsigned long offset
= addr
& ~PAGE_MASK
;
243 return ((offset
+ len
) > PAGE_SIZE
) ? (PAGE_SIZE
- offset
) : len
;
246 static void ipath_user_sdma_free_pkt_frag(struct device
*dev
,
247 struct ipath_user_sdma_queue
*pq
,
248 struct ipath_user_sdma_pkt
*pkt
,
253 if (pkt
->addr
[i
].page
) {
254 if (pkt
->addr
[i
].dma_mapped
)
260 if (pkt
->addr
[i
].kvaddr
)
261 kunmap(pkt
->addr
[i
].page
);
263 if (pkt
->addr
[i
].put_page
)
264 put_page(pkt
->addr
[i
].page
);
266 __free_page(pkt
->addr
[i
].page
);
267 } else if (pkt
->addr
[i
].kvaddr
)
268 /* free coherent mem from cache... */
269 dma_pool_free(pq
->header_cache
,
270 pkt
->addr
[i
].kvaddr
, pkt
->addr
[i
].addr
);
273 /* return number of pages pinned... */
274 static int ipath_user_sdma_pin_pages(const struct ipath_devdata
*dd
,
275 struct ipath_user_sdma_pkt
*pkt
,
276 unsigned long addr
, int tlen
, int npages
)
278 struct page
*pages
[2];
282 ret
= get_user_pages(current
, current
->mm
, addr
,
283 npages
, 0, 1, pages
, NULL
);
288 for (i
= 0; i
< ret
; i
++)
295 for (j
= 0; j
< npages
; j
++) {
296 /* map the pages... */
298 ipath_user_sdma_page_length(addr
, tlen
);
299 dma_addr_t dma_addr
=
300 dma_map_page(&dd
->pcidev
->dev
,
301 pages
[j
], 0, flen
, DMA_TO_DEVICE
);
302 unsigned long fofs
= addr
& ~PAGE_MASK
;
304 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
309 ipath_user_sdma_init_frag(pkt
, pkt
->naddr
, fofs
, flen
, 1, 1,
310 pages
[j
], kmap(pages
[j
]),
322 static int ipath_user_sdma_pin_pkt(const struct ipath_devdata
*dd
,
323 struct ipath_user_sdma_queue
*pq
,
324 struct ipath_user_sdma_pkt
*pkt
,
325 const struct iovec
*iov
,
331 for (idx
= 0; idx
< niov
; idx
++) {
332 const int npages
= ipath_user_sdma_num_pages(iov
+ idx
);
333 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
335 ret
= ipath_user_sdma_pin_pages(dd
, pkt
,
336 addr
, iov
[idx
].iov_len
,
345 for (idx
= 0; idx
< pkt
->naddr
; idx
++)
346 ipath_user_sdma_free_pkt_frag(&dd
->pcidev
->dev
, pq
, pkt
, idx
);
352 static int ipath_user_sdma_init_payload(const struct ipath_devdata
*dd
,
353 struct ipath_user_sdma_queue
*pq
,
354 struct ipath_user_sdma_pkt
*pkt
,
355 const struct iovec
*iov
,
356 unsigned long niov
, int npages
)
360 if (npages
>= ARRAY_SIZE(pkt
->addr
))
361 ret
= ipath_user_sdma_coalesce(dd
, pkt
, iov
, niov
);
363 ret
= ipath_user_sdma_pin_pkt(dd
, pq
, pkt
, iov
, niov
);
368 /* free a packet list -- return counter value of last packet */
369 static void ipath_user_sdma_free_pkt_list(struct device
*dev
,
370 struct ipath_user_sdma_queue
*pq
,
371 struct list_head
*list
)
373 struct ipath_user_sdma_pkt
*pkt
, *pkt_next
;
375 list_for_each_entry_safe(pkt
, pkt_next
, list
, list
) {
378 for (i
= 0; i
< pkt
->naddr
; i
++)
379 ipath_user_sdma_free_pkt_frag(dev
, pq
, pkt
, i
);
381 kmem_cache_free(pq
->pkt_slab
, pkt
);
386 * copy headers, coalesce etc -- pq->lock must be held
388 * we queue all the packets to list, returning the
389 * number of bytes total. list must be empty initially,
390 * as, if there is an error we clean it...
392 static int ipath_user_sdma_queue_pkts(const struct ipath_devdata
*dd
,
393 struct ipath_user_sdma_queue
*pq
,
394 struct list_head
*list
,
395 const struct iovec
*iov
,
399 unsigned long idx
= 0;
402 struct page
*page
= NULL
;
405 struct ipath_user_sdma_pkt
*pkt
= NULL
;
408 u32 counter
= pq
->counter
;
411 while (idx
< niov
&& npkts
< maxpkts
) {
412 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
413 const unsigned long idx_save
= idx
;
421 len
= iov
[idx
].iov_len
;
425 pkt
= kmem_cache_alloc(pq
->pkt_slab
, GFP_KERNEL
);
431 if (len
< IPATH_USER_SDMA_MIN_HEADER_LENGTH
||
432 len
> PAGE_SIZE
|| len
& 3 || addr
& 3) {
437 if (len
== IPATH_USER_SDMA_EXP_HEADER_LENGTH
)
438 pbc
= dma_pool_alloc(pq
->header_cache
, GFP_KERNEL
,
444 page
= alloc_page(GFP_KERNEL
);
452 cfur
= copy_from_user(pbc
, iov
[idx
].iov_base
, len
);
459 * this assignment is a bit strange. it's because the
460 * the pbc counts the number of 32 bit words in the full
461 * packet _except_ the first word of the pbc itself...
466 * pktnw computation yields the number of 32 bit words
467 * that the caller has indicated in the PBC. note that
468 * this is one less than the total number of words that
469 * goes to the send DMA engine as the first 32 bit word
470 * of the PBC itself is not counted. Armed with this count,
471 * we can verify that the packet is consistent with the
474 pktnw
= le32_to_cpu(*pbc
) & IPATH_PBC_LENGTH_MASK
;
475 if (pktnw
< pktnwc
|| pktnw
> pktnwc
+ (PAGE_SIZE
>> 2)) {
482 while (pktnwc
< pktnw
&& idx
< niov
) {
483 const size_t slen
= iov
[idx
].iov_len
;
484 const unsigned long faddr
=
485 (unsigned long) iov
[idx
].iov_base
;
487 if (slen
& 3 || faddr
& 3 || !slen
||
494 if ((faddr
& PAGE_MASK
) !=
495 ((faddr
+ slen
- 1) & PAGE_MASK
))
503 if (pktnwc
!= pktnw
) {
509 dma_addr
= dma_map_page(&dd
->pcidev
->dev
,
510 page
, 0, len
, DMA_TO_DEVICE
);
511 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
519 ipath_user_sdma_init_header(pkt
, counter
, 0, len
, dma_mapped
,
520 page
, pbc
, dma_addr
);
523 ret
= ipath_user_sdma_init_payload(dd
, pq
, pkt
,
533 list_add_tail(&pkt
->list
, list
);
541 dma_unmap_page(&dd
->pcidev
->dev
, dma_addr
, len
, DMA_TO_DEVICE
);
547 dma_pool_free(pq
->header_cache
, pbc
, dma_addr
);
549 kmem_cache_free(pq
->pkt_slab
, pkt
);
551 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, list
);
556 static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue
*pq
,
559 pq
->sent_counter
= c
;
562 /* try to clean out queue -- needs pq->lock */
563 static int ipath_user_sdma_queue_clean(const struct ipath_devdata
*dd
,
564 struct ipath_user_sdma_queue
*pq
)
566 struct list_head free_list
;
567 struct ipath_user_sdma_pkt
*pkt
;
568 struct ipath_user_sdma_pkt
*pkt_prev
;
571 INIT_LIST_HEAD(&free_list
);
573 list_for_each_entry_safe(pkt
, pkt_prev
, &pq
->sent
, list
) {
574 s64 descd
= dd
->ipath_sdma_descq_removed
- pkt
->added
;
579 list_move_tail(&pkt
->list
, &free_list
);
581 /* one more packet cleaned */
585 if (!list_empty(&free_list
)) {
588 pkt
= list_entry(free_list
.prev
,
589 struct ipath_user_sdma_pkt
, list
);
590 counter
= pkt
->counter
;
592 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
593 ipath_user_sdma_set_complete_counter(pq
, counter
);
599 void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue
*pq
)
604 kmem_cache_destroy(pq
->pkt_slab
);
605 dma_pool_destroy(pq
->header_cache
);
609 /* clean descriptor queue, returns > 0 if some elements cleaned */
610 static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata
*dd
)
615 spin_lock_irqsave(&dd
->ipath_sdma_lock
, flags
);
616 ret
= ipath_sdma_make_progress(dd
);
617 spin_unlock_irqrestore(&dd
->ipath_sdma_lock
, flags
);
622 /* we're in close, drain packets so that we can cleanup successfully... */
623 void ipath_user_sdma_queue_drain(struct ipath_devdata
*dd
,
624 struct ipath_user_sdma_queue
*pq
)
631 for (i
= 0; i
< 100; i
++) {
632 mutex_lock(&pq
->lock
);
633 if (list_empty(&pq
->sent
)) {
634 mutex_unlock(&pq
->lock
);
637 ipath_user_sdma_hwqueue_clean(dd
);
638 ipath_user_sdma_queue_clean(dd
, pq
);
639 mutex_unlock(&pq
->lock
);
643 if (!list_empty(&pq
->sent
)) {
644 struct list_head free_list
;
646 printk(KERN_INFO
"drain: lists not empty: forcing!\n");
647 INIT_LIST_HEAD(&free_list
);
648 mutex_lock(&pq
->lock
);
649 list_splice_init(&pq
->sent
, &free_list
);
650 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
651 mutex_unlock(&pq
->lock
);
655 static inline __le64
ipath_sdma_make_desc0(struct ipath_devdata
*dd
,
656 u64 addr
, u64 dwlen
, u64 dwoffset
)
658 return cpu_to_le64(/* SDmaPhyAddr[31:0] */
659 ((addr
& 0xfffffffcULL
) << 32) |
660 /* SDmaGeneration[1:0] */
661 ((dd
->ipath_sdma_generation
& 3ULL) << 30) |
662 /* SDmaDwordCount[10:0] */
663 ((dwlen
& 0x7ffULL
) << 16) |
664 /* SDmaBufOffset[12:2] */
665 (dwoffset
& 0x7ffULL
));
668 static inline __le64
ipath_sdma_make_first_desc0(__le64 descq
)
670 return descq
| __constant_cpu_to_le64(1ULL << 12);
673 static inline __le64
ipath_sdma_make_last_desc0(__le64 descq
)
675 /* last */ /* dma head */
676 return descq
| __constant_cpu_to_le64(1ULL << 11 | 1ULL << 13);
679 static inline __le64
ipath_sdma_make_desc1(u64 addr
)
681 /* SDmaPhyAddr[47:32] */
682 return cpu_to_le64(addr
>> 32);
685 static void ipath_user_sdma_send_frag(struct ipath_devdata
*dd
,
686 struct ipath_user_sdma_pkt
*pkt
, int idx
,
687 unsigned ofs
, u16 tail
)
689 const u64 addr
= (u64
) pkt
->addr
[idx
].addr
+
690 (u64
) pkt
->addr
[idx
].offset
;
691 const u64 dwlen
= (u64
) pkt
->addr
[idx
].length
/ 4;
695 descqp
= &dd
->ipath_sdma_descq
[tail
].qw
[0];
697 descq0
= ipath_sdma_make_desc0(dd
, addr
, dwlen
, ofs
);
699 descq0
= ipath_sdma_make_first_desc0(descq0
);
700 if (idx
== pkt
->naddr
- 1)
701 descq0
= ipath_sdma_make_last_desc0(descq0
);
704 descqp
[1] = ipath_sdma_make_desc1(addr
);
707 /* pq->lock must be held, get packets on the wire... */
708 static int ipath_user_sdma_push_pkts(struct ipath_devdata
*dd
,
709 struct ipath_user_sdma_queue
*pq
,
710 struct list_head
*pktlist
)
716 if (list_empty(pktlist
))
719 if (unlikely(!(dd
->ipath_flags
& IPATH_LINKACTIVE
)))
722 spin_lock_irqsave(&dd
->ipath_sdma_lock
, flags
);
724 if (unlikely(dd
->ipath_sdma_status
& IPATH_SDMA_ABORT_MASK
)) {
729 tail
= dd
->ipath_sdma_descq_tail
;
730 while (!list_empty(pktlist
)) {
731 struct ipath_user_sdma_pkt
*pkt
=
732 list_entry(pktlist
->next
, struct ipath_user_sdma_pkt
,
738 if (pkt
->naddr
> ipath_sdma_descq_freecnt(dd
))
739 goto unlock_check_tail
;
741 for (i
= 0; i
< pkt
->naddr
; i
++) {
742 ipath_user_sdma_send_frag(dd
, pkt
, i
, ofs
, tail
);
743 ofs
+= pkt
->addr
[i
].length
>> 2;
745 if (++tail
== dd
->ipath_sdma_descq_cnt
) {
747 ++dd
->ipath_sdma_generation
;
751 if ((ofs
<<2) > dd
->ipath_ibmaxlen
) {
752 ipath_dbg("packet size %X > ibmax %X, fail\n",
753 ofs
<<2, dd
->ipath_ibmaxlen
);
759 * if the packet is >= 2KB mtu equivalent, we have to use
760 * the large buffers, and have to mark each descriptor as
761 * part of a large buffer packet.
763 if (ofs
>= IPATH_SMALLBUF_DWORDS
) {
764 for (i
= 0; i
< pkt
->naddr
; i
++) {
765 dd
->ipath_sdma_descq
[dtail
].qw
[0] |=
766 __constant_cpu_to_le64(1ULL << 14);
767 if (++dtail
== dd
->ipath_sdma_descq_cnt
)
772 dd
->ipath_sdma_descq_added
+= pkt
->naddr
;
773 pkt
->added
= dd
->ipath_sdma_descq_added
;
774 list_move_tail(&pkt
->list
, &pq
->sent
);
779 /* advance the tail on the chip if necessary */
780 if (dd
->ipath_sdma_descq_tail
!= tail
) {
782 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_senddmatail
, tail
);
783 dd
->ipath_sdma_descq_tail
= tail
;
787 spin_unlock_irqrestore(&dd
->ipath_sdma_lock
, flags
);
792 int ipath_user_sdma_writev(struct ipath_devdata
*dd
,
793 struct ipath_user_sdma_queue
*pq
,
794 const struct iovec
*iov
,
798 struct list_head list
;
801 INIT_LIST_HEAD(&list
);
803 mutex_lock(&pq
->lock
);
805 if (dd
->ipath_sdma_descq_added
!= dd
->ipath_sdma_descq_removed
) {
806 ipath_user_sdma_hwqueue_clean(dd
);
807 ipath_user_sdma_queue_clean(dd
, pq
);
813 down_write(¤t
->mm
->mmap_sem
);
814 ret
= ipath_user_sdma_queue_pkts(dd
, pq
, &list
, iov
, dim
, mxp
);
815 up_write(¤t
->mm
->mmap_sem
);
824 /* force packets onto the sdma hw queue... */
825 if (!list_empty(&list
)) {
827 * lazily clean hw queue. the 4 is a guess of about
828 * how many sdma descriptors a packet will take (it
829 * doesn't have to be perfect).
831 if (ipath_sdma_descq_freecnt(dd
) < ret
* 4) {
832 ipath_user_sdma_hwqueue_clean(dd
);
833 ipath_user_sdma_queue_clean(dd
, pq
);
836 ret
= ipath_user_sdma_push_pkts(dd
, pq
, &list
);
843 if (!list_empty(&list
))
850 if (!list_empty(&list
))
851 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &list
);
852 mutex_unlock(&pq
->lock
);
854 return (ret
< 0) ? ret
: npkts
;
857 int ipath_user_sdma_make_progress(struct ipath_devdata
*dd
,
858 struct ipath_user_sdma_queue
*pq
)
862 mutex_lock(&pq
->lock
);
863 ipath_user_sdma_hwqueue_clean(dd
);
864 ret
= ipath_user_sdma_queue_clean(dd
, pq
);
865 mutex_unlock(&pq
->lock
);
870 u32
ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue
*pq
)
872 return pq
->sent_counter
;
875 u32
ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue
*pq
)