2 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
43 #include "firmware_exports.h"
47 #define SGE_RX_SM_BUF_SIZE 1536
48 #define SGE_RX_COPY_THRES 256
50 # define SGE_RX_DROP_THRES 16
53 * Period of the Tx buffer reclaim timer. This timer does not need to run
54 * frequently as Tx buffers are usually reclaimed by new Tx packets.
56 #define TX_RECLAIM_PERIOD (HZ / 4)
58 /* WR size in bytes */
59 #define WR_LEN (WR_FLITS * 8)
62 * Types of Tx queues in each queue set. Order here matters, do not change.
64 enum { TXQ_ETH
, TXQ_OFLD
, TXQ_CTRL
};
66 /* Values for sge_txq.flags */
68 TXQ_RUNNING
= 1 << 0, /* fetch engine is running */
69 TXQ_LAST_PKT_DB
= 1 << 1, /* last packet rang the doorbell */
73 u64 flit
[TX_DESC_FLITS
];
83 struct tx_sw_desc
{ /* SW state per Tx descriptor */
87 struct rx_sw_desc
{ /* SW state per Rx descriptor */
89 DECLARE_PCI_UNMAP_ADDR(dma_addr
);
92 struct rsp_desc
{ /* response queue descriptor */
93 struct rss_header rss_hdr
;
100 struct unmap_info
{ /* packet unmapping info, overlays skb->cb */
101 int sflit
; /* start flit of first SGL entry in Tx descriptor */
102 u16 fragidx
; /* first page fragment in current Tx descriptor */
103 u16 addr_idx
; /* buffer index of first SGL entry in descriptor */
104 u32 len
; /* mapped length of skb main body */
108 * Maps a number of flits to the number of Tx descriptors that can hold them.
111 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
113 * HW allows up to 4 descriptors to be combined into a WR.
115 static u8 flit_desc_map
[] = {
117 #if SGE_NUM_GENBITS == 1
118 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
121 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
122 #elif SGE_NUM_GENBITS == 2
123 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
124 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 # error "SGE_NUM_GENBITS must be 1 or 2"
132 static inline struct sge_qset
*fl_to_qset(const struct sge_fl
*q
, int qidx
)
134 return container_of(q
, struct sge_qset
, fl
[qidx
]);
137 static inline struct sge_qset
*rspq_to_qset(const struct sge_rspq
*q
)
139 return container_of(q
, struct sge_qset
, rspq
);
142 static inline struct sge_qset
*txq_to_qset(const struct sge_txq
*q
, int qidx
)
144 return container_of(q
, struct sge_qset
, txq
[qidx
]);
148 * refill_rspq - replenish an SGE response queue
149 * @adapter: the adapter
150 * @q: the response queue to replenish
151 * @credits: how many new responses to make available
153 * Replenishes a response queue by making the supplied number of responses
156 static inline void refill_rspq(struct adapter
*adapter
,
157 const struct sge_rspq
*q
, unsigned int credits
)
159 t3_write_reg(adapter
, A_SG_RSPQ_CREDIT_RETURN
,
160 V_RSPQ(q
->cntxt_id
) | V_CREDITS(credits
));
164 * need_skb_unmap - does the platform need unmapping of sk_buffs?
166 * Returns true if the platfrom needs sk_buff unmapping. The compiler
167 * optimizes away unecessary code if this returns true.
169 static inline int need_skb_unmap(void)
172 * This structure is used to tell if the platfrom needs buffer
173 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
176 DECLARE_PCI_UNMAP_ADDR(addr
);
179 return sizeof(struct dummy
) != 0;
183 * unmap_skb - unmap a packet main body and its page fragments
185 * @q: the Tx queue containing Tx descriptors for the packet
186 * @cidx: index of Tx descriptor
187 * @pdev: the PCI device
189 * Unmap the main body of an sk_buff and its page fragments, if any.
190 * Because of the fairly complicated structure of our SGLs and the desire
191 * to conserve space for metadata, we keep the information necessary to
192 * unmap an sk_buff partly in the sk_buff itself (in its cb), and partly
193 * in the Tx descriptors (the physical addresses of the various data
194 * buffers). The send functions initialize the state in skb->cb so we
195 * can unmap the buffers held in the first Tx descriptor here, and we
196 * have enough information at this point to update the state for the next
199 static inline void unmap_skb(struct sk_buff
*skb
, struct sge_txq
*q
,
200 unsigned int cidx
, struct pci_dev
*pdev
)
202 const struct sg_ent
*sgp
;
203 struct unmap_info
*ui
= (struct unmap_info
*)skb
->cb
;
204 int nfrags
, frag_idx
, curflit
, j
= ui
->addr_idx
;
206 sgp
= (struct sg_ent
*)&q
->desc
[cidx
].flit
[ui
->sflit
];
209 pci_unmap_single(pdev
, be64_to_cpu(sgp
->addr
[0]), ui
->len
,
211 ui
->len
= 0; /* so we know for next descriptor for this skb */
215 frag_idx
= ui
->fragidx
;
216 curflit
= ui
->sflit
+ 1 + j
;
217 nfrags
= skb_shinfo(skb
)->nr_frags
;
219 while (frag_idx
< nfrags
&& curflit
< WR_FLITS
) {
220 pci_unmap_page(pdev
, be64_to_cpu(sgp
->addr
[j
]),
221 skb_shinfo(skb
)->frags
[frag_idx
].size
,
232 if (frag_idx
< nfrags
) { /* SGL continues into next Tx descriptor */
233 ui
->fragidx
= frag_idx
;
235 ui
->sflit
= curflit
- WR_FLITS
- j
; /* sflit can be -1 */
240 * free_tx_desc - reclaims Tx descriptors and their buffers
241 * @adapter: the adapter
242 * @q: the Tx queue to reclaim descriptors from
243 * @n: the number of descriptors to reclaim
245 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
246 * Tx buffers. Called with the Tx queue lock held.
248 static void free_tx_desc(struct adapter
*adapter
, struct sge_txq
*q
,
251 struct tx_sw_desc
*d
;
252 struct pci_dev
*pdev
= adapter
->pdev
;
253 unsigned int cidx
= q
->cidx
;
257 if (d
->skb
) { /* an SGL is present */
258 if (need_skb_unmap())
259 unmap_skb(d
->skb
, q
, cidx
, pdev
);
260 if (d
->skb
->priority
== cidx
)
264 if (++cidx
== q
->size
) {
273 * reclaim_completed_tx - reclaims completed Tx descriptors
274 * @adapter: the adapter
275 * @q: the Tx queue to reclaim completed descriptors from
277 * Reclaims Tx descriptors that the SGE has indicated it has processed,
278 * and frees the associated buffers if possible. Called with the Tx
281 static inline void reclaim_completed_tx(struct adapter
*adapter
,
284 unsigned int reclaim
= q
->processed
- q
->cleaned
;
287 free_tx_desc(adapter
, q
, reclaim
);
288 q
->cleaned
+= reclaim
;
289 q
->in_use
-= reclaim
;
294 * should_restart_tx - are there enough resources to restart a Tx queue?
297 * Checks if there are enough descriptors to restart a suspended Tx queue.
299 static inline int should_restart_tx(const struct sge_txq
*q
)
301 unsigned int r
= q
->processed
- q
->cleaned
;
303 return q
->in_use
- r
< (q
->size
>> 1);
307 * free_rx_bufs - free the Rx buffers on an SGE free list
308 * @pdev: the PCI device associated with the adapter
309 * @rxq: the SGE free list to clean up
311 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
312 * this queue should be stopped before calling this function.
314 static void free_rx_bufs(struct pci_dev
*pdev
, struct sge_fl
*q
)
316 unsigned int cidx
= q
->cidx
;
318 while (q
->credits
--) {
319 struct rx_sw_desc
*d
= &q
->sdesc
[cidx
];
321 pci_unmap_single(pdev
, pci_unmap_addr(d
, dma_addr
),
322 q
->buf_size
, PCI_DMA_FROMDEVICE
);
325 if (++cidx
== q
->size
)
331 * add_one_rx_buf - add a packet buffer to a free-buffer list
332 * @skb: the buffer to add
333 * @len: the buffer length
334 * @d: the HW Rx descriptor to write
335 * @sd: the SW Rx descriptor to write
336 * @gen: the generation bit value
337 * @pdev: the PCI device associated with the adapter
339 * Add a buffer of the given length to the supplied HW and SW Rx
342 static inline void add_one_rx_buf(struct sk_buff
*skb
, unsigned int len
,
343 struct rx_desc
*d
, struct rx_sw_desc
*sd
,
344 unsigned int gen
, struct pci_dev
*pdev
)
349 mapping
= pci_map_single(pdev
, skb
->data
, len
, PCI_DMA_FROMDEVICE
);
350 pci_unmap_addr_set(sd
, dma_addr
, mapping
);
352 d
->addr_lo
= cpu_to_be32(mapping
);
353 d
->addr_hi
= cpu_to_be32((u64
) mapping
>> 32);
355 d
->len_gen
= cpu_to_be32(V_FLD_GEN1(gen
));
356 d
->gen2
= cpu_to_be32(V_FLD_GEN2(gen
));
360 * refill_fl - refill an SGE free-buffer list
361 * @adapter: the adapter
362 * @q: the free-list to refill
363 * @n: the number of new buffers to allocate
364 * @gfp: the gfp flags for allocating new buffers
366 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
367 * allocated with the supplied gfp flags. The caller must assure that
368 * @n does not exceed the queue's capacity.
370 static void refill_fl(struct adapter
*adap
, struct sge_fl
*q
, int n
, gfp_t gfp
)
372 struct rx_sw_desc
*sd
= &q
->sdesc
[q
->pidx
];
373 struct rx_desc
*d
= &q
->desc
[q
->pidx
];
376 struct sk_buff
*skb
= alloc_skb(q
->buf_size
, gfp
);
381 add_one_rx_buf(skb
, q
->buf_size
, d
, sd
, q
->gen
, adap
->pdev
);
384 if (++q
->pidx
== q
->size
) {
393 t3_write_reg(adap
, A_SG_KDOORBELL
, V_EGRCNTX(q
->cntxt_id
));
396 static inline void __refill_fl(struct adapter
*adap
, struct sge_fl
*fl
)
398 refill_fl(adap
, fl
, min(16U, fl
->size
- fl
->credits
), GFP_ATOMIC
);
402 * recycle_rx_buf - recycle a receive buffer
403 * @adapter: the adapter
404 * @q: the SGE free list
405 * @idx: index of buffer to recycle
407 * Recycles the specified buffer on the given free list by adding it at
408 * the next available slot on the list.
410 static void recycle_rx_buf(struct adapter
*adap
, struct sge_fl
*q
,
413 struct rx_desc
*from
= &q
->desc
[idx
];
414 struct rx_desc
*to
= &q
->desc
[q
->pidx
];
416 q
->sdesc
[q
->pidx
] = q
->sdesc
[idx
];
417 to
->addr_lo
= from
->addr_lo
; /* already big endian */
418 to
->addr_hi
= from
->addr_hi
; /* likewise */
420 to
->len_gen
= cpu_to_be32(V_FLD_GEN1(q
->gen
));
421 to
->gen2
= cpu_to_be32(V_FLD_GEN2(q
->gen
));
424 if (++q
->pidx
== q
->size
) {
428 t3_write_reg(adap
, A_SG_KDOORBELL
, V_EGRCNTX(q
->cntxt_id
));
432 * alloc_ring - allocate resources for an SGE descriptor ring
433 * @pdev: the PCI device
434 * @nelem: the number of descriptors
435 * @elem_size: the size of each descriptor
436 * @sw_size: the size of the SW state associated with each ring element
437 * @phys: the physical address of the allocated ring
438 * @metadata: address of the array holding the SW state for the ring
440 * Allocates resources for an SGE descriptor ring, such as Tx queues,
441 * free buffer lists, or response queues. Each SGE ring requires
442 * space for its HW descriptors plus, optionally, space for the SW state
443 * associated with each HW entry (the metadata). The function returns
444 * three values: the virtual address for the HW ring (the return value
445 * of the function), the physical address of the HW ring, and the address
448 static void *alloc_ring(struct pci_dev
*pdev
, size_t nelem
, size_t elem_size
,
449 size_t sw_size
, dma_addr_t
*phys
, void *metadata
)
451 size_t len
= nelem
* elem_size
;
453 void *p
= dma_alloc_coherent(&pdev
->dev
, len
, phys
, GFP_KERNEL
);
458 s
= kcalloc(nelem
, sw_size
, GFP_KERNEL
);
461 dma_free_coherent(&pdev
->dev
, len
, p
, *phys
);
466 *(void **)metadata
= s
;
472 * free_qset - free the resources of an SGE queue set
473 * @adapter: the adapter owning the queue set
476 * Release the HW and SW resources associated with an SGE queue set, such
477 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
478 * queue set must be quiesced prior to calling this.
480 void t3_free_qset(struct adapter
*adapter
, struct sge_qset
*q
)
483 struct pci_dev
*pdev
= adapter
->pdev
;
485 if (q
->tx_reclaim_timer
.function
)
486 del_timer_sync(&q
->tx_reclaim_timer
);
488 for (i
= 0; i
< SGE_RXQ_PER_SET
; ++i
)
490 spin_lock(&adapter
->sge
.reg_lock
);
491 t3_sge_disable_fl(adapter
, q
->fl
[i
].cntxt_id
);
492 spin_unlock(&adapter
->sge
.reg_lock
);
493 free_rx_bufs(pdev
, &q
->fl
[i
]);
494 kfree(q
->fl
[i
].sdesc
);
495 dma_free_coherent(&pdev
->dev
,
497 sizeof(struct rx_desc
), q
->fl
[i
].desc
,
501 for (i
= 0; i
< SGE_TXQ_PER_SET
; ++i
)
502 if (q
->txq
[i
].desc
) {
503 spin_lock(&adapter
->sge
.reg_lock
);
504 t3_sge_enable_ecntxt(adapter
, q
->txq
[i
].cntxt_id
, 0);
505 spin_unlock(&adapter
->sge
.reg_lock
);
506 if (q
->txq
[i
].sdesc
) {
507 free_tx_desc(adapter
, &q
->txq
[i
],
509 kfree(q
->txq
[i
].sdesc
);
511 dma_free_coherent(&pdev
->dev
,
513 sizeof(struct tx_desc
),
514 q
->txq
[i
].desc
, q
->txq
[i
].phys_addr
);
515 __skb_queue_purge(&q
->txq
[i
].sendq
);
519 spin_lock(&adapter
->sge
.reg_lock
);
520 t3_sge_disable_rspcntxt(adapter
, q
->rspq
.cntxt_id
);
521 spin_unlock(&adapter
->sge
.reg_lock
);
522 dma_free_coherent(&pdev
->dev
,
523 q
->rspq
.size
* sizeof(struct rsp_desc
),
524 q
->rspq
.desc
, q
->rspq
.phys_addr
);
528 q
->netdev
->atalk_ptr
= NULL
;
530 memset(q
, 0, sizeof(*q
));
534 * init_qset_cntxt - initialize an SGE queue set context info
536 * @id: the queue set id
538 * Initializes the TIDs and context ids for the queues of a queue set.
540 static void init_qset_cntxt(struct sge_qset
*qs
, unsigned int id
)
542 qs
->rspq
.cntxt_id
= id
;
543 qs
->fl
[0].cntxt_id
= 2 * id
;
544 qs
->fl
[1].cntxt_id
= 2 * id
+ 1;
545 qs
->txq
[TXQ_ETH
].cntxt_id
= FW_TUNNEL_SGEEC_START
+ id
;
546 qs
->txq
[TXQ_ETH
].token
= FW_TUNNEL_TID_START
+ id
;
547 qs
->txq
[TXQ_OFLD
].cntxt_id
= FW_OFLD_SGEEC_START
+ id
;
548 qs
->txq
[TXQ_CTRL
].cntxt_id
= FW_CTRL_SGEEC_START
+ id
;
549 qs
->txq
[TXQ_CTRL
].token
= FW_CTRL_TID_START
+ id
;
553 * sgl_len - calculates the size of an SGL of the given capacity
554 * @n: the number of SGL entries
556 * Calculates the number of flits needed for a scatter/gather list that
557 * can hold the given number of entries.
559 static inline unsigned int sgl_len(unsigned int n
)
561 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
562 return (3 * n
) / 2 + (n
& 1);
566 * flits_to_desc - returns the num of Tx descriptors for the given flits
567 * @n: the number of flits
569 * Calculates the number of Tx descriptors needed for the supplied number
572 static inline unsigned int flits_to_desc(unsigned int n
)
574 BUG_ON(n
>= ARRAY_SIZE(flit_desc_map
));
575 return flit_desc_map
[n
];
579 * get_packet - return the next ingress packet buffer from a free list
580 * @adap: the adapter that received the packet
581 * @fl: the SGE free list holding the packet
582 * @len: the packet length including any SGE padding
583 * @drop_thres: # of remaining buffers before we start dropping packets
585 * Get the next packet from a free list and complete setup of the
586 * sk_buff. If the packet is small we make a copy and recycle the
587 * original buffer, otherwise we use the original buffer itself. If a
588 * positive drop threshold is supplied packets are dropped and their
589 * buffers recycled if (a) the number of remaining buffers is under the
590 * threshold and the packet is too big to copy, or (b) the packet should
591 * be copied but there is no memory for the copy.
593 static struct sk_buff
*get_packet(struct adapter
*adap
, struct sge_fl
*fl
,
594 unsigned int len
, unsigned int drop_thres
)
596 struct sk_buff
*skb
= NULL
;
597 struct rx_sw_desc
*sd
= &fl
->sdesc
[fl
->cidx
];
599 prefetch(sd
->skb
->data
);
601 if (len
<= SGE_RX_COPY_THRES
) {
602 skb
= alloc_skb(len
, GFP_ATOMIC
);
603 if (likely(skb
!= NULL
)) {
605 pci_dma_sync_single_for_cpu(adap
->pdev
,
608 len
, PCI_DMA_FROMDEVICE
);
609 memcpy(skb
->data
, sd
->skb
->data
, len
);
610 pci_dma_sync_single_for_device(adap
->pdev
,
613 len
, PCI_DMA_FROMDEVICE
);
614 } else if (!drop_thres
)
617 recycle_rx_buf(adap
, fl
, fl
->cidx
);
621 if (unlikely(fl
->credits
< drop_thres
))
625 pci_unmap_single(adap
->pdev
, pci_unmap_addr(sd
, dma_addr
),
626 fl
->buf_size
, PCI_DMA_FROMDEVICE
);
629 __refill_fl(adap
, fl
);
634 * get_imm_packet - return the next ingress packet buffer from a response
635 * @resp: the response descriptor containing the packet data
637 * Return a packet containing the immediate data of the given response.
639 static inline struct sk_buff
*get_imm_packet(const struct rsp_desc
*resp
)
641 struct sk_buff
*skb
= alloc_skb(IMMED_PKT_SIZE
, GFP_ATOMIC
);
644 __skb_put(skb
, IMMED_PKT_SIZE
);
645 memcpy(skb
->data
, resp
->imm_data
, IMMED_PKT_SIZE
);
651 * calc_tx_descs - calculate the number of Tx descriptors for a packet
654 * Returns the number of Tx descriptors needed for the given Ethernet
655 * packet. Ethernet packets require addition of WR and CPL headers.
657 static inline unsigned int calc_tx_descs(const struct sk_buff
*skb
)
661 if (skb
->len
<= WR_LEN
- sizeof(struct cpl_tx_pkt
))
664 flits
= sgl_len(skb_shinfo(skb
)->nr_frags
+ 1) + 2;
665 if (skb_shinfo(skb
)->gso_size
)
667 return flits_to_desc(flits
);
671 * make_sgl - populate a scatter/gather list for a packet
673 * @sgp: the SGL to populate
674 * @start: start address of skb main body data to include in the SGL
675 * @len: length of skb main body data to include in the SGL
676 * @pdev: the PCI device
678 * Generates a scatter/gather list for the buffers that make up a packet
679 * and returns the SGL size in 8-byte words. The caller must size the SGL
682 static inline unsigned int make_sgl(const struct sk_buff
*skb
,
683 struct sg_ent
*sgp
, unsigned char *start
,
684 unsigned int len
, struct pci_dev
*pdev
)
687 unsigned int i
, j
= 0, nfrags
;
690 mapping
= pci_map_single(pdev
, start
, len
, PCI_DMA_TODEVICE
);
691 sgp
->len
[0] = cpu_to_be32(len
);
692 sgp
->addr
[0] = cpu_to_be64(mapping
);
696 nfrags
= skb_shinfo(skb
)->nr_frags
;
697 for (i
= 0; i
< nfrags
; i
++) {
698 skb_frag_t
*frag
= &skb_shinfo(skb
)->frags
[i
];
700 mapping
= pci_map_page(pdev
, frag
->page
, frag
->page_offset
,
701 frag
->size
, PCI_DMA_TODEVICE
);
702 sgp
->len
[j
] = cpu_to_be32(frag
->size
);
703 sgp
->addr
[j
] = cpu_to_be64(mapping
);
710 return ((nfrags
+ (len
!= 0)) * 3) / 2 + j
;
714 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
718 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
719 * where the HW is going to sleep just after we checked, however,
720 * then the interrupt handler will detect the outstanding TX packet
721 * and ring the doorbell for us.
723 * When GTS is disabled we unconditionally ring the doorbell.
725 static inline void check_ring_tx_db(struct adapter
*adap
, struct sge_txq
*q
)
728 clear_bit(TXQ_LAST_PKT_DB
, &q
->flags
);
729 if (test_and_set_bit(TXQ_RUNNING
, &q
->flags
) == 0) {
730 set_bit(TXQ_LAST_PKT_DB
, &q
->flags
);
731 t3_write_reg(adap
, A_SG_KDOORBELL
,
732 F_SELEGRCNTX
| V_EGRCNTX(q
->cntxt_id
));
735 wmb(); /* write descriptors before telling HW */
736 t3_write_reg(adap
, A_SG_KDOORBELL
,
737 F_SELEGRCNTX
| V_EGRCNTX(q
->cntxt_id
));
741 static inline void wr_gen2(struct tx_desc
*d
, unsigned int gen
)
743 #if SGE_NUM_GENBITS == 2
744 d
->flit
[TX_DESC_FLITS
- 1] = cpu_to_be64(gen
);
749 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
750 * @ndesc: number of Tx descriptors spanned by the SGL
751 * @skb: the packet corresponding to the WR
752 * @d: first Tx descriptor to be written
753 * @pidx: index of above descriptors
754 * @q: the SGE Tx queue
756 * @flits: number of flits to the start of the SGL in the first descriptor
757 * @sgl_flits: the SGL size in flits
758 * @gen: the Tx descriptor generation
759 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
760 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
762 * Write a work request header and an associated SGL. If the SGL is
763 * small enough to fit into one Tx descriptor it has already been written
764 * and we just need to write the WR header. Otherwise we distribute the
765 * SGL across the number of descriptors it spans.
767 static void write_wr_hdr_sgl(unsigned int ndesc
, struct sk_buff
*skb
,
768 struct tx_desc
*d
, unsigned int pidx
,
769 const struct sge_txq
*q
,
770 const struct sg_ent
*sgl
,
771 unsigned int flits
, unsigned int sgl_flits
,
772 unsigned int gen
, unsigned int wr_hi
,
775 struct work_request_hdr
*wrp
= (struct work_request_hdr
*)d
;
776 struct tx_sw_desc
*sd
= &q
->sdesc
[pidx
];
779 if (need_skb_unmap()) {
780 struct unmap_info
*ui
= (struct unmap_info
*)skb
->cb
;
787 if (likely(ndesc
== 1)) {
788 skb
->priority
= pidx
;
789 wrp
->wr_hi
= htonl(F_WR_SOP
| F_WR_EOP
| V_WR_DATATYPE(1) |
790 V_WR_SGLSFLT(flits
)) | wr_hi
;
792 wrp
->wr_lo
= htonl(V_WR_LEN(flits
+ sgl_flits
) |
793 V_WR_GEN(gen
)) | wr_lo
;
796 unsigned int ogen
= gen
;
797 const u64
*fp
= (const u64
*)sgl
;
798 struct work_request_hdr
*wp
= wrp
;
800 wrp
->wr_hi
= htonl(F_WR_SOP
| V_WR_DATATYPE(1) |
801 V_WR_SGLSFLT(flits
)) | wr_hi
;
804 unsigned int avail
= WR_FLITS
- flits
;
806 if (avail
> sgl_flits
)
808 memcpy(&d
->flit
[flits
], fp
, avail
* sizeof(*fp
));
817 if (++pidx
== q
->size
) {
825 wrp
= (struct work_request_hdr
*)d
;
826 wrp
->wr_hi
= htonl(V_WR_DATATYPE(1) |
827 V_WR_SGLSFLT(1)) | wr_hi
;
828 wrp
->wr_lo
= htonl(V_WR_LEN(min(WR_FLITS
,
830 V_WR_GEN(gen
)) | wr_lo
;
834 skb
->priority
= pidx
;
835 wrp
->wr_hi
|= htonl(F_WR_EOP
);
837 wp
->wr_lo
= htonl(V_WR_LEN(WR_FLITS
) | V_WR_GEN(ogen
)) | wr_lo
;
838 wr_gen2((struct tx_desc
*)wp
, ogen
);
844 * write_tx_pkt_wr - write a TX_PKT work request
846 * @skb: the packet to send
847 * @pi: the egress interface
848 * @pidx: index of the first Tx descriptor to write
849 * @gen: the generation value to use
851 * @ndesc: number of descriptors the packet will occupy
852 * @compl: the value of the COMPL bit to use
854 * Generate a TX_PKT work request to send the supplied packet.
856 static void write_tx_pkt_wr(struct adapter
*adap
, struct sk_buff
*skb
,
857 const struct port_info
*pi
,
858 unsigned int pidx
, unsigned int gen
,
859 struct sge_txq
*q
, unsigned int ndesc
,
862 unsigned int flits
, sgl_flits
, cntrl
, tso_info
;
863 struct sg_ent
*sgp
, sgl
[MAX_SKB_FRAGS
/ 2 + 1];
864 struct tx_desc
*d
= &q
->desc
[pidx
];
865 struct cpl_tx_pkt
*cpl
= (struct cpl_tx_pkt
*)d
;
867 cpl
->len
= htonl(skb
->len
| 0x80000000);
868 cntrl
= V_TXPKT_INTF(pi
->port_id
);
870 if (vlan_tx_tag_present(skb
) && pi
->vlan_grp
)
871 cntrl
|= F_TXPKT_VLAN_VLD
| V_TXPKT_VLAN(vlan_tx_tag_get(skb
));
873 tso_info
= V_LSO_MSS(skb_shinfo(skb
)->gso_size
);
876 struct cpl_tx_pkt_lso
*hdr
= (struct cpl_tx_pkt_lso
*)cpl
;
879 cntrl
|= V_TXPKT_OPCODE(CPL_TX_PKT_LSO
);
880 hdr
->cntrl
= htonl(cntrl
);
881 eth_type
= skb
->nh
.raw
- skb
->data
== ETH_HLEN
?
882 CPL_ETH_II
: CPL_ETH_II_VLAN
;
883 tso_info
|= V_LSO_ETH_TYPE(eth_type
) |
884 V_LSO_IPHDR_WORDS(skb
->nh
.iph
->ihl
) |
885 V_LSO_TCPHDR_WORDS(skb
->h
.th
->doff
);
886 hdr
->lso_info
= htonl(tso_info
);
889 cntrl
|= V_TXPKT_OPCODE(CPL_TX_PKT
);
890 cntrl
|= F_TXPKT_IPCSUM_DIS
; /* SW calculates IP csum */
891 cntrl
|= V_TXPKT_L4CSUM_DIS(skb
->ip_summed
!= CHECKSUM_PARTIAL
);
892 cpl
->cntrl
= htonl(cntrl
);
894 if (skb
->len
<= WR_LEN
- sizeof(*cpl
)) {
895 q
->sdesc
[pidx
].skb
= NULL
;
897 memcpy(&d
->flit
[2], skb
->data
, skb
->len
);
899 skb_copy_bits(skb
, 0, &d
->flit
[2], skb
->len
);
901 flits
= (skb
->len
+ 7) / 8 + 2;
902 cpl
->wr
.wr_hi
= htonl(V_WR_BCNTLFLT(skb
->len
& 7) |
903 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT
)
904 | F_WR_SOP
| F_WR_EOP
| compl);
906 cpl
->wr
.wr_lo
= htonl(V_WR_LEN(flits
) | V_WR_GEN(gen
) |
916 sgp
= ndesc
== 1 ? (struct sg_ent
*)&d
->flit
[flits
] : sgl
;
917 sgl_flits
= make_sgl(skb
, sgp
, skb
->data
, skb_headlen(skb
), adap
->pdev
);
918 if (need_skb_unmap())
919 ((struct unmap_info
*)skb
->cb
)->len
= skb_headlen(skb
);
921 write_wr_hdr_sgl(ndesc
, skb
, d
, pidx
, q
, sgl
, flits
, sgl_flits
, gen
,
922 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT
) | compl),
923 htonl(V_WR_TID(q
->token
)));
927 * eth_xmit - add a packet to the Ethernet Tx queue
929 * @dev: the egress net device
931 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
933 int t3_eth_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
935 unsigned int ndesc
, pidx
, credits
, gen
, compl;
936 const struct port_info
*pi
= netdev_priv(dev
);
937 struct adapter
*adap
= dev
->priv
;
938 struct sge_qset
*qs
= dev2qset(dev
);
939 struct sge_txq
*q
= &qs
->txq
[TXQ_ETH
];
942 * The chip min packet length is 9 octets but play safe and reject
943 * anything shorter than an Ethernet header.
945 if (unlikely(skb
->len
< ETH_HLEN
)) {
951 reclaim_completed_tx(adap
, q
);
953 credits
= q
->size
- q
->in_use
;
954 ndesc
= calc_tx_descs(skb
);
956 if (unlikely(credits
< ndesc
)) {
957 if (!netif_queue_stopped(dev
)) {
958 netif_stop_queue(dev
);
959 set_bit(TXQ_ETH
, &qs
->txq_stopped
);
961 dev_err(&adap
->pdev
->dev
,
962 "%s: Tx ring %u full while queue awake!\n",
963 dev
->name
, q
->cntxt_id
& 7);
965 spin_unlock(&q
->lock
);
966 return NETDEV_TX_BUSY
;
970 if (unlikely(credits
- ndesc
< q
->stop_thres
)) {
972 netif_stop_queue(dev
);
973 set_bit(TXQ_ETH
, &qs
->txq_stopped
);
975 if (should_restart_tx(q
) &&
976 test_and_clear_bit(TXQ_ETH
, &qs
->txq_stopped
)) {
978 netif_wake_queue(dev
);
985 compl = (q
->unacked
& 8) << (S_WR_COMPL
- 3);
989 if (q
->pidx
>= q
->size
) {
994 /* update port statistics */
995 if (skb
->ip_summed
== CHECKSUM_COMPLETE
)
996 qs
->port_stats
[SGE_PSTAT_TX_CSUM
]++;
997 if (skb_shinfo(skb
)->gso_size
)
998 qs
->port_stats
[SGE_PSTAT_TSO
]++;
999 if (vlan_tx_tag_present(skb
) && pi
->vlan_grp
)
1000 qs
->port_stats
[SGE_PSTAT_VLANINS
]++;
1002 dev
->trans_start
= jiffies
;
1003 spin_unlock(&q
->lock
);
1006 * We do not use Tx completion interrupts to free DMAd Tx packets.
1007 * This is good for performamce but means that we rely on new Tx
1008 * packets arriving to run the destructors of completed packets,
1009 * which open up space in their sockets' send queues. Sometimes
1010 * we do not get such new packets causing Tx to stall. A single
1011 * UDP transmitter is a good example of this situation. We have
1012 * a clean up timer that periodically reclaims completed packets
1013 * but it doesn't run often enough (nor do we want it to) to prevent
1014 * lengthy stalls. A solution to this problem is to run the
1015 * destructor early, after the packet is queued but before it's DMAd.
1016 * A cons is that we lie to socket memory accounting, but the amount
1017 * of extra memory is reasonable (limited by the number of Tx
1018 * descriptors), the packets do actually get freed quickly by new
1019 * packets almost always, and for protocols like TCP that wait for
1020 * acks to really free up the data the extra memory is even less.
1021 * On the positive side we run the destructors on the sending CPU
1022 * rather than on a potentially different completing CPU, usually a
1023 * good thing. We also run them without holding our Tx queue lock,
1024 * unlike what reclaim_completed_tx() would otherwise do.
1026 * Run the destructor before telling the DMA engine about the packet
1027 * to make sure it doesn't complete and get freed prematurely.
1029 if (likely(!skb_shared(skb
)))
1032 write_tx_pkt_wr(adap
, skb
, pi
, pidx
, gen
, q
, ndesc
, compl);
1033 check_ring_tx_db(adap
, q
);
1034 return NETDEV_TX_OK
;
1038 * write_imm - write a packet into a Tx descriptor as immediate data
1039 * @d: the Tx descriptor to write
1041 * @len: the length of packet data to write as immediate data
1042 * @gen: the generation bit value to write
1044 * Writes a packet as immediate data into a Tx descriptor. The packet
1045 * contains a work request at its beginning. We must write the packet
1046 * carefully so the SGE doesn't read accidentally before it's written in
1049 static inline void write_imm(struct tx_desc
*d
, struct sk_buff
*skb
,
1050 unsigned int len
, unsigned int gen
)
1052 struct work_request_hdr
*from
= (struct work_request_hdr
*)skb
->data
;
1053 struct work_request_hdr
*to
= (struct work_request_hdr
*)d
;
1055 memcpy(&to
[1], &from
[1], len
- sizeof(*from
));
1056 to
->wr_hi
= from
->wr_hi
| htonl(F_WR_SOP
| F_WR_EOP
|
1057 V_WR_BCNTLFLT(len
& 7));
1059 to
->wr_lo
= from
->wr_lo
| htonl(V_WR_GEN(gen
) |
1060 V_WR_LEN((len
+ 7) / 8));
1066 * check_desc_avail - check descriptor availability on a send queue
1067 * @adap: the adapter
1068 * @q: the send queue
1069 * @skb: the packet needing the descriptors
1070 * @ndesc: the number of Tx descriptors needed
1071 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1073 * Checks if the requested number of Tx descriptors is available on an
1074 * SGE send queue. If the queue is already suspended or not enough
1075 * descriptors are available the packet is queued for later transmission.
1076 * Must be called with the Tx queue locked.
1078 * Returns 0 if enough descriptors are available, 1 if there aren't
1079 * enough descriptors and the packet has been queued, and 2 if the caller
1080 * needs to retry because there weren't enough descriptors at the
1081 * beginning of the call but some freed up in the mean time.
1083 static inline int check_desc_avail(struct adapter
*adap
, struct sge_txq
*q
,
1084 struct sk_buff
*skb
, unsigned int ndesc
,
1087 if (unlikely(!skb_queue_empty(&q
->sendq
))) {
1088 addq_exit
:__skb_queue_tail(&q
->sendq
, skb
);
1091 if (unlikely(q
->size
- q
->in_use
< ndesc
)) {
1092 struct sge_qset
*qs
= txq_to_qset(q
, qid
);
1094 set_bit(qid
, &qs
->txq_stopped
);
1095 smp_mb__after_clear_bit();
1097 if (should_restart_tx(q
) &&
1098 test_and_clear_bit(qid
, &qs
->txq_stopped
))
1108 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1109 * @q: the SGE control Tx queue
1111 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1112 * that send only immediate data (presently just the control queues) and
1113 * thus do not have any sk_buffs to release.
1115 static inline void reclaim_completed_tx_imm(struct sge_txq
*q
)
1117 unsigned int reclaim
= q
->processed
- q
->cleaned
;
1119 q
->in_use
-= reclaim
;
1120 q
->cleaned
+= reclaim
;
1123 static inline int immediate(const struct sk_buff
*skb
)
1125 return skb
->len
<= WR_LEN
&& !skb
->data_len
;
1129 * ctrl_xmit - send a packet through an SGE control Tx queue
1130 * @adap: the adapter
1131 * @q: the control queue
1134 * Send a packet through an SGE control Tx queue. Packets sent through
1135 * a control queue must fit entirely as immediate data in a single Tx
1136 * descriptor and have no page fragments.
1138 static int ctrl_xmit(struct adapter
*adap
, struct sge_txq
*q
,
1139 struct sk_buff
*skb
)
1142 struct work_request_hdr
*wrp
= (struct work_request_hdr
*)skb
->data
;
1144 if (unlikely(!immediate(skb
))) {
1147 return NET_XMIT_SUCCESS
;
1150 wrp
->wr_hi
|= htonl(F_WR_SOP
| F_WR_EOP
);
1151 wrp
->wr_lo
= htonl(V_WR_TID(q
->token
));
1153 spin_lock(&q
->lock
);
1154 again
:reclaim_completed_tx_imm(q
);
1156 ret
= check_desc_avail(adap
, q
, skb
, 1, TXQ_CTRL
);
1157 if (unlikely(ret
)) {
1159 spin_unlock(&q
->lock
);
1165 write_imm(&q
->desc
[q
->pidx
], skb
, skb
->len
, q
->gen
);
1168 if (++q
->pidx
>= q
->size
) {
1172 spin_unlock(&q
->lock
);
1174 t3_write_reg(adap
, A_SG_KDOORBELL
,
1175 F_SELEGRCNTX
| V_EGRCNTX(q
->cntxt_id
));
1176 return NET_XMIT_SUCCESS
;
1180 * restart_ctrlq - restart a suspended control queue
1181 * @qs: the queue set cotaining the control queue
1183 * Resumes transmission on a suspended Tx control queue.
1185 static void restart_ctrlq(unsigned long data
)
1187 struct sk_buff
*skb
;
1188 struct sge_qset
*qs
= (struct sge_qset
*)data
;
1189 struct sge_txq
*q
= &qs
->txq
[TXQ_CTRL
];
1190 struct adapter
*adap
= qs
->netdev
->priv
;
1192 spin_lock(&q
->lock
);
1193 again
:reclaim_completed_tx_imm(q
);
1195 while (q
->in_use
< q
->size
&& (skb
= __skb_dequeue(&q
->sendq
)) != NULL
) {
1197 write_imm(&q
->desc
[q
->pidx
], skb
, skb
->len
, q
->gen
);
1199 if (++q
->pidx
>= q
->size
) {
1206 if (!skb_queue_empty(&q
->sendq
)) {
1207 set_bit(TXQ_CTRL
, &qs
->txq_stopped
);
1208 smp_mb__after_clear_bit();
1210 if (should_restart_tx(q
) &&
1211 test_and_clear_bit(TXQ_CTRL
, &qs
->txq_stopped
))
1216 spin_unlock(&q
->lock
);
1217 t3_write_reg(adap
, A_SG_KDOORBELL
,
1218 F_SELEGRCNTX
| V_EGRCNTX(q
->cntxt_id
));
1222 * Send a management message through control queue 0
1224 int t3_mgmt_tx(struct adapter
*adap
, struct sk_buff
*skb
)
1226 return ctrl_xmit(adap
, &adap
->sge
.qs
[0].txq
[TXQ_CTRL
], skb
);
1230 * write_ofld_wr - write an offload work request
1231 * @adap: the adapter
1232 * @skb: the packet to send
1234 * @pidx: index of the first Tx descriptor to write
1235 * @gen: the generation value to use
1236 * @ndesc: number of descriptors the packet will occupy
1238 * Write an offload work request to send the supplied packet. The packet
1239 * data already carry the work request with most fields populated.
1241 static void write_ofld_wr(struct adapter
*adap
, struct sk_buff
*skb
,
1242 struct sge_txq
*q
, unsigned int pidx
,
1243 unsigned int gen
, unsigned int ndesc
)
1245 unsigned int sgl_flits
, flits
;
1246 struct work_request_hdr
*from
;
1247 struct sg_ent
*sgp
, sgl
[MAX_SKB_FRAGS
/ 2 + 1];
1248 struct tx_desc
*d
= &q
->desc
[pidx
];
1250 if (immediate(skb
)) {
1251 q
->sdesc
[pidx
].skb
= NULL
;
1252 write_imm(d
, skb
, skb
->len
, gen
);
1256 /* Only TX_DATA builds SGLs */
1258 from
= (struct work_request_hdr
*)skb
->data
;
1259 memcpy(&d
->flit
[1], &from
[1], skb
->h
.raw
- skb
->data
- sizeof(*from
));
1261 flits
= (skb
->h
.raw
- skb
->data
) / 8;
1262 sgp
= ndesc
== 1 ? (struct sg_ent
*)&d
->flit
[flits
] : sgl
;
1263 sgl_flits
= make_sgl(skb
, sgp
, skb
->h
.raw
, skb
->tail
- skb
->h
.raw
,
1265 if (need_skb_unmap())
1266 ((struct unmap_info
*)skb
->cb
)->len
= skb
->tail
- skb
->h
.raw
;
1268 write_wr_hdr_sgl(ndesc
, skb
, d
, pidx
, q
, sgl
, flits
, sgl_flits
,
1269 gen
, from
->wr_hi
, from
->wr_lo
);
1273 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1276 * Returns the number of Tx descriptors needed for the given offload
1277 * packet. These packets are already fully constructed.
1279 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff
*skb
)
1281 unsigned int flits
, cnt
= skb_shinfo(skb
)->nr_frags
;
1283 if (skb
->len
<= WR_LEN
&& cnt
== 0)
1284 return 1; /* packet fits as immediate data */
1286 flits
= (skb
->h
.raw
- skb
->data
) / 8; /* headers */
1287 if (skb
->tail
!= skb
->h
.raw
)
1289 return flits_to_desc(flits
+ sgl_len(cnt
));
1293 * ofld_xmit - send a packet through an offload queue
1294 * @adap: the adapter
1295 * @q: the Tx offload queue
1298 * Send an offload packet through an SGE offload queue.
1300 static int ofld_xmit(struct adapter
*adap
, struct sge_txq
*q
,
1301 struct sk_buff
*skb
)
1304 unsigned int ndesc
= calc_tx_descs_ofld(skb
), pidx
, gen
;
1306 spin_lock(&q
->lock
);
1307 again
:reclaim_completed_tx(adap
, q
);
1309 ret
= check_desc_avail(adap
, q
, skb
, ndesc
, TXQ_OFLD
);
1310 if (unlikely(ret
)) {
1312 skb
->priority
= ndesc
; /* save for restart */
1313 spin_unlock(&q
->lock
);
1323 if (q
->pidx
>= q
->size
) {
1327 spin_unlock(&q
->lock
);
1329 write_ofld_wr(adap
, skb
, q
, pidx
, gen
, ndesc
);
1330 check_ring_tx_db(adap
, q
);
1331 return NET_XMIT_SUCCESS
;
1335 * restart_offloadq - restart a suspended offload queue
1336 * @qs: the queue set cotaining the offload queue
1338 * Resumes transmission on a suspended Tx offload queue.
1340 static void restart_offloadq(unsigned long data
)
1342 struct sk_buff
*skb
;
1343 struct sge_qset
*qs
= (struct sge_qset
*)data
;
1344 struct sge_txq
*q
= &qs
->txq
[TXQ_OFLD
];
1345 struct adapter
*adap
= qs
->netdev
->priv
;
1347 spin_lock(&q
->lock
);
1348 again
:reclaim_completed_tx(adap
, q
);
1350 while ((skb
= skb_peek(&q
->sendq
)) != NULL
) {
1351 unsigned int gen
, pidx
;
1352 unsigned int ndesc
= skb
->priority
;
1354 if (unlikely(q
->size
- q
->in_use
< ndesc
)) {
1355 set_bit(TXQ_OFLD
, &qs
->txq_stopped
);
1356 smp_mb__after_clear_bit();
1358 if (should_restart_tx(q
) &&
1359 test_and_clear_bit(TXQ_OFLD
, &qs
->txq_stopped
))
1369 if (q
->pidx
>= q
->size
) {
1373 __skb_unlink(skb
, &q
->sendq
);
1374 spin_unlock(&q
->lock
);
1376 write_ofld_wr(adap
, skb
, q
, pidx
, gen
, ndesc
);
1377 spin_lock(&q
->lock
);
1379 spin_unlock(&q
->lock
);
1382 set_bit(TXQ_RUNNING
, &q
->flags
);
1383 set_bit(TXQ_LAST_PKT_DB
, &q
->flags
);
1385 t3_write_reg(adap
, A_SG_KDOORBELL
,
1386 F_SELEGRCNTX
| V_EGRCNTX(q
->cntxt_id
));
1390 * queue_set - return the queue set a packet should use
1393 * Maps a packet to the SGE queue set it should use. The desired queue
1394 * set is carried in bits 1-3 in the packet's priority.
1396 static inline int queue_set(const struct sk_buff
*skb
)
1398 return skb
->priority
>> 1;
1402 * is_ctrl_pkt - return whether an offload packet is a control packet
1405 * Determines whether an offload packet should use an OFLD or a CTRL
1406 * Tx queue. This is indicated by bit 0 in the packet's priority.
1408 static inline int is_ctrl_pkt(const struct sk_buff
*skb
)
1410 return skb
->priority
& 1;
1414 * t3_offload_tx - send an offload packet
1415 * @tdev: the offload device to send to
1418 * Sends an offload packet. We use the packet priority to select the
1419 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1420 * should be sent as regular or control, bits 1-3 select the queue set.
1422 int t3_offload_tx(struct t3cdev
*tdev
, struct sk_buff
*skb
)
1424 struct adapter
*adap
= tdev2adap(tdev
);
1425 struct sge_qset
*qs
= &adap
->sge
.qs
[queue_set(skb
)];
1427 if (unlikely(is_ctrl_pkt(skb
)))
1428 return ctrl_xmit(adap
, &qs
->txq
[TXQ_CTRL
], skb
);
1430 return ofld_xmit(adap
, &qs
->txq
[TXQ_OFLD
], skb
);
1434 * offload_enqueue - add an offload packet to an SGE offload receive queue
1435 * @q: the SGE response queue
1438 * Add a new offload packet to an SGE response queue's offload packet
1439 * queue. If the packet is the first on the queue it schedules the RX
1440 * softirq to process the queue.
1442 static inline void offload_enqueue(struct sge_rspq
*q
, struct sk_buff
*skb
)
1444 skb
->next
= skb
->prev
= NULL
;
1446 q
->rx_tail
->next
= skb
;
1448 struct sge_qset
*qs
= rspq_to_qset(q
);
1450 if (__netif_rx_schedule_prep(qs
->netdev
))
1451 __netif_rx_schedule(qs
->netdev
);
1458 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1459 * @tdev: the offload device that will be receiving the packets
1460 * @q: the SGE response queue that assembled the bundle
1461 * @skbs: the partial bundle
1462 * @n: the number of packets in the bundle
1464 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1466 static inline void deliver_partial_bundle(struct t3cdev
*tdev
,
1468 struct sk_buff
*skbs
[], int n
)
1471 q
->offload_bundles
++;
1472 tdev
->recv(tdev
, skbs
, n
);
1477 * ofld_poll - NAPI handler for offload packets in interrupt mode
1478 * @dev: the network device doing the polling
1479 * @budget: polling budget
1481 * The NAPI handler for offload packets when a response queue is serviced
1482 * by the hard interrupt handler, i.e., when it's operating in non-polling
1483 * mode. Creates small packet batches and sends them through the offload
1484 * receive handler. Batches need to be of modest size as we do prefetches
1485 * on the packets in each.
1487 static int ofld_poll(struct net_device
*dev
, int *budget
)
1489 struct adapter
*adapter
= dev
->priv
;
1490 struct sge_qset
*qs
= dev2qset(dev
);
1491 struct sge_rspq
*q
= &qs
->rspq
;
1492 int work_done
, limit
= min(*budget
, dev
->quota
), avail
= limit
;
1495 struct sk_buff
*head
, *tail
, *skbs
[RX_BUNDLE_SIZE
];
1498 spin_lock_irq(&q
->lock
);
1501 work_done
= limit
- avail
;
1502 *budget
-= work_done
;
1503 dev
->quota
-= work_done
;
1504 __netif_rx_complete(dev
);
1505 spin_unlock_irq(&q
->lock
);
1510 q
->rx_head
= q
->rx_tail
= NULL
;
1511 spin_unlock_irq(&q
->lock
);
1513 for (ngathered
= 0; avail
&& head
; avail
--) {
1514 prefetch(head
->data
);
1515 skbs
[ngathered
] = head
;
1517 skbs
[ngathered
]->next
= NULL
;
1518 if (++ngathered
== RX_BUNDLE_SIZE
) {
1519 q
->offload_bundles
++;
1520 adapter
->tdev
.recv(&adapter
->tdev
, skbs
,
1525 if (head
) { /* splice remaining packets back onto Rx queue */
1526 spin_lock_irq(&q
->lock
);
1527 tail
->next
= q
->rx_head
;
1531 spin_unlock_irq(&q
->lock
);
1533 deliver_partial_bundle(&adapter
->tdev
, q
, skbs
, ngathered
);
1535 work_done
= limit
- avail
;
1536 *budget
-= work_done
;
1537 dev
->quota
-= work_done
;
1542 * rx_offload - process a received offload packet
1543 * @tdev: the offload device receiving the packet
1544 * @rq: the response queue that received the packet
1546 * @rx_gather: a gather list of packets if we are building a bundle
1547 * @gather_idx: index of the next available slot in the bundle
1549 * Process an ingress offload pakcet and add it to the offload ingress
1550 * queue. Returns the index of the next available slot in the bundle.
1552 static inline int rx_offload(struct t3cdev
*tdev
, struct sge_rspq
*rq
,
1553 struct sk_buff
*skb
, struct sk_buff
*rx_gather
[],
1554 unsigned int gather_idx
)
1557 skb
->mac
.raw
= skb
->nh
.raw
= skb
->h
.raw
= skb
->data
;
1560 rx_gather
[gather_idx
++] = skb
;
1561 if (gather_idx
== RX_BUNDLE_SIZE
) {
1562 tdev
->recv(tdev
, rx_gather
, RX_BUNDLE_SIZE
);
1564 rq
->offload_bundles
++;
1567 offload_enqueue(rq
, skb
);
1573 * restart_tx - check whether to restart suspended Tx queues
1574 * @qs: the queue set to resume
1576 * Restarts suspended Tx queues of an SGE queue set if they have enough
1577 * free resources to resume operation.
1579 static void restart_tx(struct sge_qset
*qs
)
1581 if (test_bit(TXQ_ETH
, &qs
->txq_stopped
) &&
1582 should_restart_tx(&qs
->txq
[TXQ_ETH
]) &&
1583 test_and_clear_bit(TXQ_ETH
, &qs
->txq_stopped
)) {
1584 qs
->txq
[TXQ_ETH
].restarts
++;
1585 if (netif_running(qs
->netdev
))
1586 netif_wake_queue(qs
->netdev
);
1589 if (test_bit(TXQ_OFLD
, &qs
->txq_stopped
) &&
1590 should_restart_tx(&qs
->txq
[TXQ_OFLD
]) &&
1591 test_and_clear_bit(TXQ_OFLD
, &qs
->txq_stopped
)) {
1592 qs
->txq
[TXQ_OFLD
].restarts
++;
1593 tasklet_schedule(&qs
->txq
[TXQ_OFLD
].qresume_tsk
);
1595 if (test_bit(TXQ_CTRL
, &qs
->txq_stopped
) &&
1596 should_restart_tx(&qs
->txq
[TXQ_CTRL
]) &&
1597 test_and_clear_bit(TXQ_CTRL
, &qs
->txq_stopped
)) {
1598 qs
->txq
[TXQ_CTRL
].restarts
++;
1599 tasklet_schedule(&qs
->txq
[TXQ_CTRL
].qresume_tsk
);
1604 * rx_eth - process an ingress ethernet packet
1605 * @adap: the adapter
1606 * @rq: the response queue that received the packet
1608 * @pad: amount of padding at the start of the buffer
1610 * Process an ingress ethernet pakcet and deliver it to the stack.
1611 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1612 * if it was immediate data in a response.
1614 static void rx_eth(struct adapter
*adap
, struct sge_rspq
*rq
,
1615 struct sk_buff
*skb
, int pad
)
1617 struct cpl_rx_pkt
*p
= (struct cpl_rx_pkt
*)(skb
->data
+ pad
);
1618 struct port_info
*pi
;
1621 skb_pull(skb
, sizeof(*p
) + pad
);
1622 skb
->dev
= adap
->port
[p
->iff
];
1623 skb
->dev
->last_rx
= jiffies
;
1624 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1625 pi
= netdev_priv(skb
->dev
);
1626 if (pi
->rx_csum_offload
&& p
->csum_valid
&& p
->csum
== 0xffff &&
1628 rspq_to_qset(rq
)->port_stats
[SGE_PSTAT_RX_CSUM_GOOD
]++;
1629 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1631 skb
->ip_summed
= CHECKSUM_NONE
;
1633 if (unlikely(p
->vlan_valid
)) {
1634 struct vlan_group
*grp
= pi
->vlan_grp
;
1636 rspq_to_qset(rq
)->port_stats
[SGE_PSTAT_VLANEX
]++;
1638 __vlan_hwaccel_rx(skb
, grp
, ntohs(p
->vlan
),
1641 dev_kfree_skb_any(skb
);
1642 } else if (rq
->polling
)
1643 netif_receive_skb(skb
);
1649 * handle_rsp_cntrl_info - handles control information in a response
1650 * @qs: the queue set corresponding to the response
1651 * @flags: the response control flags
1653 * Handles the control information of an SGE response, such as GTS
1654 * indications and completion credits for the queue set's Tx queues.
1655 * HW coalesces credits, we don't do any extra SW coalescing.
1657 static inline void handle_rsp_cntrl_info(struct sge_qset
*qs
, u32 flags
)
1659 unsigned int credits
;
1662 if (flags
& F_RSPD_TXQ0_GTS
)
1663 clear_bit(TXQ_RUNNING
, &qs
->txq
[TXQ_ETH
].flags
);
1666 credits
= G_RSPD_TXQ0_CR(flags
);
1668 qs
->txq
[TXQ_ETH
].processed
+= credits
;
1670 credits
= G_RSPD_TXQ2_CR(flags
);
1672 qs
->txq
[TXQ_CTRL
].processed
+= credits
;
1675 if (flags
& F_RSPD_TXQ1_GTS
)
1676 clear_bit(TXQ_RUNNING
, &qs
->txq
[TXQ_OFLD
].flags
);
1678 credits
= G_RSPD_TXQ1_CR(flags
);
1680 qs
->txq
[TXQ_OFLD
].processed
+= credits
;
1684 * check_ring_db - check if we need to ring any doorbells
1685 * @adapter: the adapter
1686 * @qs: the queue set whose Tx queues are to be examined
1687 * @sleeping: indicates which Tx queue sent GTS
1689 * Checks if some of a queue set's Tx queues need to ring their doorbells
1690 * to resume transmission after idling while they still have unprocessed
1693 static void check_ring_db(struct adapter
*adap
, struct sge_qset
*qs
,
1694 unsigned int sleeping
)
1696 if (sleeping
& F_RSPD_TXQ0_GTS
) {
1697 struct sge_txq
*txq
= &qs
->txq
[TXQ_ETH
];
1699 if (txq
->cleaned
+ txq
->in_use
!= txq
->processed
&&
1700 !test_and_set_bit(TXQ_LAST_PKT_DB
, &txq
->flags
)) {
1701 set_bit(TXQ_RUNNING
, &txq
->flags
);
1702 t3_write_reg(adap
, A_SG_KDOORBELL
, F_SELEGRCNTX
|
1703 V_EGRCNTX(txq
->cntxt_id
));
1707 if (sleeping
& F_RSPD_TXQ1_GTS
) {
1708 struct sge_txq
*txq
= &qs
->txq
[TXQ_OFLD
];
1710 if (txq
->cleaned
+ txq
->in_use
!= txq
->processed
&&
1711 !test_and_set_bit(TXQ_LAST_PKT_DB
, &txq
->flags
)) {
1712 set_bit(TXQ_RUNNING
, &txq
->flags
);
1713 t3_write_reg(adap
, A_SG_KDOORBELL
, F_SELEGRCNTX
|
1714 V_EGRCNTX(txq
->cntxt_id
));
1720 * is_new_response - check if a response is newly written
1721 * @r: the response descriptor
1722 * @q: the response queue
1724 * Returns true if a response descriptor contains a yet unprocessed
1727 static inline int is_new_response(const struct rsp_desc
*r
,
1728 const struct sge_rspq
*q
)
1730 return (r
->intr_gen
& F_RSPD_GEN2
) == q
->gen
;
1733 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1734 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1735 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1736 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1737 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1739 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1740 #define NOMEM_INTR_DELAY 2500
1743 * process_responses - process responses from an SGE response queue
1744 * @adap: the adapter
1745 * @qs: the queue set to which the response queue belongs
1746 * @budget: how many responses can be processed in this round
1748 * Process responses from an SGE response queue up to the supplied budget.
1749 * Responses include received packets as well as credits and other events
1750 * for the queues that belong to the response queue's queue set.
1751 * A negative budget is effectively unlimited.
1753 * Additionally choose the interrupt holdoff time for the next interrupt
1754 * on this queue. If the system is under memory shortage use a fairly
1755 * long delay to help recovery.
1757 static int process_responses(struct adapter
*adap
, struct sge_qset
*qs
,
1760 struct sge_rspq
*q
= &qs
->rspq
;
1761 struct rsp_desc
*r
= &q
->desc
[q
->cidx
];
1762 int budget_left
= budget
;
1763 unsigned int sleeping
= 0;
1764 struct sk_buff
*offload_skbs
[RX_BUNDLE_SIZE
];
1767 q
->next_holdoff
= q
->holdoff_tmr
;
1769 while (likely(budget_left
&& is_new_response(r
, q
))) {
1770 int eth
, ethpad
= 0;
1771 struct sk_buff
*skb
= NULL
;
1772 u32 len
, flags
= ntohl(r
->flags
);
1773 u32 rss_hi
= *(const u32
*)r
, rss_lo
= r
->rss_hdr
.rss_hash_val
;
1775 eth
= r
->rss_hdr
.opcode
== CPL_RX_PKT
;
1777 if (unlikely(flags
& F_RSPD_ASYNC_NOTIF
)) {
1778 skb
= alloc_skb(AN_PKT_SIZE
, GFP_ATOMIC
);
1782 memcpy(__skb_put(skb
, AN_PKT_SIZE
), r
, AN_PKT_SIZE
);
1783 skb
->data
[0] = CPL_ASYNC_NOTIF
;
1784 rss_hi
= htonl(CPL_ASYNC_NOTIF
<< 24);
1786 } else if (flags
& F_RSPD_IMM_DATA_VALID
) {
1787 skb
= get_imm_packet(r
);
1788 if (unlikely(!skb
)) {
1790 q
->next_holdoff
= NOMEM_INTR_DELAY
;
1792 /* consume one credit since we tried */
1797 } else if ((len
= ntohl(r
->len_cq
)) != 0) {
1800 fl
= (len
& F_RSPD_FLQ
) ? &qs
->fl
[1] : &qs
->fl
[0];
1802 skb
= get_packet(adap
, fl
, G_RSPD_LEN(len
),
1803 eth
? SGE_RX_DROP_THRES
: 0);
1806 else if (r
->rss_hdr
.opcode
== CPL_TRACE_PKT
)
1809 if (++fl
->cidx
== fl
->size
)
1814 if (flags
& RSPD_CTRL_MASK
) {
1815 sleeping
|= flags
& RSPD_GTS_MASK
;
1816 handle_rsp_cntrl_info(qs
, flags
);
1820 if (unlikely(++q
->cidx
== q
->size
)) {
1827 if (++q
->credits
>= (q
->size
/ 4)) {
1828 refill_rspq(adap
, q
, q
->credits
);
1832 if (likely(skb
!= NULL
)) {
1834 rx_eth(adap
, q
, skb
, ethpad
);
1836 /* Preserve the RSS info in csum & priority */
1838 skb
->priority
= rss_lo
;
1839 ngathered
= rx_offload(&adap
->tdev
, q
, skb
,
1840 offload_skbs
, ngathered
);
1847 deliver_partial_bundle(&adap
->tdev
, q
, offload_skbs
, ngathered
);
1849 check_ring_db(adap
, qs
, sleeping
);
1851 smp_mb(); /* commit Tx queue .processed updates */
1852 if (unlikely(qs
->txq_stopped
!= 0))
1855 budget
-= budget_left
;
1859 static inline int is_pure_response(const struct rsp_desc
*r
)
1861 u32 n
= ntohl(r
->flags
) & (F_RSPD_ASYNC_NOTIF
| F_RSPD_IMM_DATA_VALID
);
1863 return (n
| r
->len_cq
) == 0;
1867 * napi_rx_handler - the NAPI handler for Rx processing
1868 * @dev: the net device
1869 * @budget: how many packets we can process in this round
1871 * Handler for new data events when using NAPI.
1873 static int napi_rx_handler(struct net_device
*dev
, int *budget
)
1875 struct adapter
*adap
= dev
->priv
;
1876 struct sge_qset
*qs
= dev2qset(dev
);
1877 int effective_budget
= min(*budget
, dev
->quota
);
1879 int work_done
= process_responses(adap
, qs
, effective_budget
);
1880 *budget
-= work_done
;
1881 dev
->quota
-= work_done
;
1883 if (work_done
>= effective_budget
)
1886 netif_rx_complete(dev
);
1889 * Because we don't atomically flush the following write it is
1890 * possible that in very rare cases it can reach the device in a way
1891 * that races with a new response being written plus an error interrupt
1892 * causing the NAPI interrupt handler below to return unhandled status
1893 * to the OS. To protect against this would require flushing the write
1894 * and doing both the write and the flush with interrupts off. Way too
1895 * expensive and unjustifiable given the rarity of the race.
1897 * The race cannot happen at all with MSI-X.
1899 t3_write_reg(adap
, A_SG_GTS
, V_RSPQ(qs
->rspq
.cntxt_id
) |
1900 V_NEWTIMER(qs
->rspq
.next_holdoff
) |
1901 V_NEWINDEX(qs
->rspq
.cidx
));
1906 * Returns true if the device is already scheduled for polling.
1908 static inline int napi_is_scheduled(struct net_device
*dev
)
1910 return test_bit(__LINK_STATE_RX_SCHED
, &dev
->state
);
1914 * process_pure_responses - process pure responses from a response queue
1915 * @adap: the adapter
1916 * @qs: the queue set owning the response queue
1917 * @r: the first pure response to process
1919 * A simpler version of process_responses() that handles only pure (i.e.,
1920 * non data-carrying) responses. Such respones are too light-weight to
1921 * justify calling a softirq under NAPI, so we handle them specially in
1922 * the interrupt handler. The function is called with a pointer to a
1923 * response, which the caller must ensure is a valid pure response.
1925 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
1927 static int process_pure_responses(struct adapter
*adap
, struct sge_qset
*qs
,
1930 struct sge_rspq
*q
= &qs
->rspq
;
1931 unsigned int sleeping
= 0;
1934 u32 flags
= ntohl(r
->flags
);
1937 if (unlikely(++q
->cidx
== q
->size
)) {
1944 if (flags
& RSPD_CTRL_MASK
) {
1945 sleeping
|= flags
& RSPD_GTS_MASK
;
1946 handle_rsp_cntrl_info(qs
, flags
);
1950 if (++q
->credits
>= (q
->size
/ 4)) {
1951 refill_rspq(adap
, q
, q
->credits
);
1954 } while (is_new_response(r
, q
) && is_pure_response(r
));
1957 check_ring_db(adap
, qs
, sleeping
);
1959 smp_mb(); /* commit Tx queue .processed updates */
1960 if (unlikely(qs
->txq_stopped
!= 0))
1963 return is_new_response(r
, q
);
1967 * handle_responses - decide what to do with new responses in NAPI mode
1968 * @adap: the adapter
1969 * @q: the response queue
1971 * This is used by the NAPI interrupt handlers to decide what to do with
1972 * new SGE responses. If there are no new responses it returns -1. If
1973 * there are new responses and they are pure (i.e., non-data carrying)
1974 * it handles them straight in hard interrupt context as they are very
1975 * cheap and don't deliver any packets. Finally, if there are any data
1976 * signaling responses it schedules the NAPI handler. Returns 1 if it
1977 * schedules NAPI, 0 if all new responses were pure.
1979 * The caller must ascertain NAPI is not already running.
1981 static inline int handle_responses(struct adapter
*adap
, struct sge_rspq
*q
)
1983 struct sge_qset
*qs
= rspq_to_qset(q
);
1984 struct rsp_desc
*r
= &q
->desc
[q
->cidx
];
1986 if (!is_new_response(r
, q
))
1988 if (is_pure_response(r
) && process_pure_responses(adap
, qs
, r
) == 0) {
1989 t3_write_reg(adap
, A_SG_GTS
, V_RSPQ(q
->cntxt_id
) |
1990 V_NEWTIMER(q
->holdoff_tmr
) | V_NEWINDEX(q
->cidx
));
1993 if (likely(__netif_rx_schedule_prep(qs
->netdev
)))
1994 __netif_rx_schedule(qs
->netdev
);
1999 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2000 * (i.e., response queue serviced in hard interrupt).
2002 irqreturn_t
t3_sge_intr_msix(int irq
, void *cookie
)
2004 struct sge_qset
*qs
= cookie
;
2005 struct adapter
*adap
= qs
->netdev
->priv
;
2006 struct sge_rspq
*q
= &qs
->rspq
;
2008 spin_lock(&q
->lock
);
2009 if (process_responses(adap
, qs
, -1) == 0)
2010 q
->unhandled_irqs
++;
2011 t3_write_reg(adap
, A_SG_GTS
, V_RSPQ(q
->cntxt_id
) |
2012 V_NEWTIMER(q
->next_holdoff
) | V_NEWINDEX(q
->cidx
));
2013 spin_unlock(&q
->lock
);
2018 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2019 * (i.e., response queue serviced by NAPI polling).
2021 irqreturn_t
t3_sge_intr_msix_napi(int irq
, void *cookie
)
2023 struct sge_qset
*qs
= cookie
;
2024 struct adapter
*adap
= qs
->netdev
->priv
;
2025 struct sge_rspq
*q
= &qs
->rspq
;
2027 spin_lock(&q
->lock
);
2028 BUG_ON(napi_is_scheduled(qs
->netdev
));
2030 if (handle_responses(adap
, q
) < 0)
2031 q
->unhandled_irqs
++;
2032 spin_unlock(&q
->lock
);
2037 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2038 * SGE response queues as well as error and other async events as they all use
2039 * the same MSI vector. We use one SGE response queue per port in this mode
2040 * and protect all response queues with queue 0's lock.
2042 static irqreturn_t
t3_intr_msi(int irq
, void *cookie
)
2044 int new_packets
= 0;
2045 struct adapter
*adap
= cookie
;
2046 struct sge_rspq
*q
= &adap
->sge
.qs
[0].rspq
;
2048 spin_lock(&q
->lock
);
2050 if (process_responses(adap
, &adap
->sge
.qs
[0], -1)) {
2051 t3_write_reg(adap
, A_SG_GTS
, V_RSPQ(q
->cntxt_id
) |
2052 V_NEWTIMER(q
->next_holdoff
) | V_NEWINDEX(q
->cidx
));
2056 if (adap
->params
.nports
== 2 &&
2057 process_responses(adap
, &adap
->sge
.qs
[1], -1)) {
2058 struct sge_rspq
*q1
= &adap
->sge
.qs
[1].rspq
;
2060 t3_write_reg(adap
, A_SG_GTS
, V_RSPQ(q1
->cntxt_id
) |
2061 V_NEWTIMER(q1
->next_holdoff
) |
2062 V_NEWINDEX(q1
->cidx
));
2066 if (!new_packets
&& t3_slow_intr_handler(adap
) == 0)
2067 q
->unhandled_irqs
++;
2069 spin_unlock(&q
->lock
);
2073 static int rspq_check_napi(struct net_device
*dev
, struct sge_rspq
*q
)
2075 if (!napi_is_scheduled(dev
) && is_new_response(&q
->desc
[q
->cidx
], q
)) {
2076 if (likely(__netif_rx_schedule_prep(dev
)))
2077 __netif_rx_schedule(dev
);
2084 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2085 * by NAPI polling). Handles data events from SGE response queues as well as
2086 * error and other async events as they all use the same MSI vector. We use
2087 * one SGE response queue per port in this mode and protect all response
2088 * queues with queue 0's lock.
2090 irqreturn_t
t3_intr_msi_napi(int irq
, void *cookie
)
2093 struct adapter
*adap
= cookie
;
2094 struct sge_rspq
*q
= &adap
->sge
.qs
[0].rspq
;
2096 spin_lock(&q
->lock
);
2098 new_packets
= rspq_check_napi(adap
->sge
.qs
[0].netdev
, q
);
2099 if (adap
->params
.nports
== 2)
2100 new_packets
+= rspq_check_napi(adap
->sge
.qs
[1].netdev
,
2101 &adap
->sge
.qs
[1].rspq
);
2102 if (!new_packets
&& t3_slow_intr_handler(adap
) == 0)
2103 q
->unhandled_irqs
++;
2105 spin_unlock(&q
->lock
);
2110 * A helper function that processes responses and issues GTS.
2112 static inline int process_responses_gts(struct adapter
*adap
,
2113 struct sge_rspq
*rq
)
2117 work
= process_responses(adap
, rspq_to_qset(rq
), -1);
2118 t3_write_reg(adap
, A_SG_GTS
, V_RSPQ(rq
->cntxt_id
) |
2119 V_NEWTIMER(rq
->next_holdoff
) | V_NEWINDEX(rq
->cidx
));
2124 * The legacy INTx interrupt handler. This needs to handle data events from
2125 * SGE response queues as well as error and other async events as they all use
2126 * the same interrupt pin. We use one SGE response queue per port in this mode
2127 * and protect all response queues with queue 0's lock.
2129 static irqreturn_t
t3_intr(int irq
, void *cookie
)
2131 int work_done
, w0
, w1
;
2132 struct adapter
*adap
= cookie
;
2133 struct sge_rspq
*q0
= &adap
->sge
.qs
[0].rspq
;
2134 struct sge_rspq
*q1
= &adap
->sge
.qs
[1].rspq
;
2136 spin_lock(&q0
->lock
);
2138 w0
= is_new_response(&q0
->desc
[q0
->cidx
], q0
);
2139 w1
= adap
->params
.nports
== 2 &&
2140 is_new_response(&q1
->desc
[q1
->cidx
], q1
);
2142 if (likely(w0
| w1
)) {
2143 t3_write_reg(adap
, A_PL_CLI
, 0);
2144 t3_read_reg(adap
, A_PL_CLI
); /* flush */
2147 process_responses_gts(adap
, q0
);
2150 process_responses_gts(adap
, q1
);
2152 work_done
= w0
| w1
;
2154 work_done
= t3_slow_intr_handler(adap
);
2156 spin_unlock(&q0
->lock
);
2157 return IRQ_RETVAL(work_done
!= 0);
2161 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2162 * Handles data events from SGE response queues as well as error and other
2163 * async events as they all use the same interrupt pin. We use one SGE
2164 * response queue per port in this mode and protect all response queues with
2167 static irqreturn_t
t3b_intr(int irq
, void *cookie
)
2170 struct adapter
*adap
= cookie
;
2171 struct sge_rspq
*q0
= &adap
->sge
.qs
[0].rspq
;
2173 t3_write_reg(adap
, A_PL_CLI
, 0);
2174 map
= t3_read_reg(adap
, A_SG_DATA_INTR
);
2176 if (unlikely(!map
)) /* shared interrupt, most likely */
2179 spin_lock(&q0
->lock
);
2181 if (unlikely(map
& F_ERRINTR
))
2182 t3_slow_intr_handler(adap
);
2184 if (likely(map
& 1))
2185 process_responses_gts(adap
, q0
);
2188 process_responses_gts(adap
, &adap
->sge
.qs
[1].rspq
);
2190 spin_unlock(&q0
->lock
);
2195 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2196 * Handles data events from SGE response queues as well as error and other
2197 * async events as they all use the same interrupt pin. We use one SGE
2198 * response queue per port in this mode and protect all response queues with
2201 static irqreturn_t
t3b_intr_napi(int irq
, void *cookie
)
2204 struct net_device
*dev
;
2205 struct adapter
*adap
= cookie
;
2206 struct sge_rspq
*q0
= &adap
->sge
.qs
[0].rspq
;
2208 t3_write_reg(adap
, A_PL_CLI
, 0);
2209 map
= t3_read_reg(adap
, A_SG_DATA_INTR
);
2211 if (unlikely(!map
)) /* shared interrupt, most likely */
2214 spin_lock(&q0
->lock
);
2216 if (unlikely(map
& F_ERRINTR
))
2217 t3_slow_intr_handler(adap
);
2219 if (likely(map
& 1)) {
2220 dev
= adap
->sge
.qs
[0].netdev
;
2222 if (likely(__netif_rx_schedule_prep(dev
)))
2223 __netif_rx_schedule(dev
);
2226 dev
= adap
->sge
.qs
[1].netdev
;
2228 if (likely(__netif_rx_schedule_prep(dev
)))
2229 __netif_rx_schedule(dev
);
2232 spin_unlock(&q0
->lock
);
2237 * t3_intr_handler - select the top-level interrupt handler
2238 * @adap: the adapter
2239 * @polling: whether using NAPI to service response queues
2241 * Selects the top-level interrupt handler based on the type of interrupts
2242 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2245 intr_handler_t
t3_intr_handler(struct adapter
*adap
, int polling
)
2247 if (adap
->flags
& USING_MSIX
)
2248 return polling
? t3_sge_intr_msix_napi
: t3_sge_intr_msix
;
2249 if (adap
->flags
& USING_MSI
)
2250 return polling
? t3_intr_msi_napi
: t3_intr_msi
;
2251 if (adap
->params
.rev
> 0)
2252 return polling
? t3b_intr_napi
: t3b_intr
;
2257 * t3_sge_err_intr_handler - SGE async event interrupt handler
2258 * @adapter: the adapter
2260 * Interrupt handler for SGE asynchronous (non-data) events.
2262 void t3_sge_err_intr_handler(struct adapter
*adapter
)
2264 unsigned int v
, status
= t3_read_reg(adapter
, A_SG_INT_CAUSE
);
2266 if (status
& F_RSPQCREDITOVERFOW
)
2267 CH_ALERT(adapter
, "SGE response queue credit overflow\n");
2269 if (status
& F_RSPQDISABLED
) {
2270 v
= t3_read_reg(adapter
, A_SG_RSPQ_FL_STATUS
);
2273 "packet delivered to disabled response queue "
2274 "(0x%x)\n", (v
>> S_RSPQ0DISABLED
) & 0xff);
2277 t3_write_reg(adapter
, A_SG_INT_CAUSE
, status
);
2278 if (status
& (F_RSPQCREDITOVERFOW
| F_RSPQDISABLED
))
2279 t3_fatal_err(adapter
);
2283 * sge_timer_cb - perform periodic maintenance of an SGE qset
2284 * @data: the SGE queue set to maintain
2286 * Runs periodically from a timer to perform maintenance of an SGE queue
2287 * set. It performs two tasks:
2289 * a) Cleans up any completed Tx descriptors that may still be pending.
2290 * Normal descriptor cleanup happens when new packets are added to a Tx
2291 * queue so this timer is relatively infrequent and does any cleanup only
2292 * if the Tx queue has not seen any new packets in a while. We make a
2293 * best effort attempt to reclaim descriptors, in that we don't wait
2294 * around if we cannot get a queue's lock (which most likely is because
2295 * someone else is queueing new packets and so will also handle the clean
2296 * up). Since control queues use immediate data exclusively we don't
2297 * bother cleaning them up here.
2299 * b) Replenishes Rx queues that have run out due to memory shortage.
2300 * Normally new Rx buffers are added when existing ones are consumed but
2301 * when out of memory a queue can become empty. We try to add only a few
2302 * buffers here, the queue will be replenished fully as these new buffers
2303 * are used up if memory shortage has subsided.
2305 static void sge_timer_cb(unsigned long data
)
2308 struct sge_qset
*qs
= (struct sge_qset
*)data
;
2309 struct adapter
*adap
= qs
->netdev
->priv
;
2311 if (spin_trylock(&qs
->txq
[TXQ_ETH
].lock
)) {
2312 reclaim_completed_tx(adap
, &qs
->txq
[TXQ_ETH
]);
2313 spin_unlock(&qs
->txq
[TXQ_ETH
].lock
);
2315 if (spin_trylock(&qs
->txq
[TXQ_OFLD
].lock
)) {
2316 reclaim_completed_tx(adap
, &qs
->txq
[TXQ_OFLD
]);
2317 spin_unlock(&qs
->txq
[TXQ_OFLD
].lock
);
2319 lock
= (adap
->flags
& USING_MSIX
) ? &qs
->rspq
.lock
:
2320 &adap
->sge
.qs
[0].rspq
.lock
;
2321 if (spin_trylock_irq(lock
)) {
2322 if (!napi_is_scheduled(qs
->netdev
)) {
2323 if (qs
->fl
[0].credits
< qs
->fl
[0].size
)
2324 __refill_fl(adap
, &qs
->fl
[0]);
2325 if (qs
->fl
[1].credits
< qs
->fl
[1].size
)
2326 __refill_fl(adap
, &qs
->fl
[1]);
2328 spin_unlock_irq(lock
);
2330 mod_timer(&qs
->tx_reclaim_timer
, jiffies
+ TX_RECLAIM_PERIOD
);
2334 * t3_update_qset_coalesce - update coalescing settings for a queue set
2335 * @qs: the SGE queue set
2336 * @p: new queue set parameters
2338 * Update the coalescing settings for an SGE queue set. Nothing is done
2339 * if the queue set is not initialized yet.
2341 void t3_update_qset_coalesce(struct sge_qset
*qs
, const struct qset_params
*p
)
2346 qs
->rspq
.holdoff_tmr
= max(p
->coalesce_usecs
* 10, 1U);/* can't be 0 */
2347 qs
->rspq
.polling
= p
->polling
;
2348 qs
->netdev
->poll
= p
->polling
? napi_rx_handler
: ofld_poll
;
2352 * t3_sge_alloc_qset - initialize an SGE queue set
2353 * @adapter: the adapter
2354 * @id: the queue set id
2355 * @nports: how many Ethernet ports will be using this queue set
2356 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2357 * @p: configuration parameters for this queue set
2358 * @ntxq: number of Tx queues for the queue set
2359 * @netdev: net device associated with this queue set
2361 * Allocate resources and initialize an SGE queue set. A queue set
2362 * comprises a response queue, two Rx free-buffer queues, and up to 3
2363 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2364 * queue, offload queue, and control queue.
2366 int t3_sge_alloc_qset(struct adapter
*adapter
, unsigned int id
, int nports
,
2367 int irq_vec_idx
, const struct qset_params
*p
,
2368 int ntxq
, struct net_device
*netdev
)
2370 int i
, ret
= -ENOMEM
;
2371 struct sge_qset
*q
= &adapter
->sge
.qs
[id
];
2373 init_qset_cntxt(q
, id
);
2374 init_timer(&q
->tx_reclaim_timer
);
2375 q
->tx_reclaim_timer
.data
= (unsigned long)q
;
2376 q
->tx_reclaim_timer
.function
= sge_timer_cb
;
2378 q
->fl
[0].desc
= alloc_ring(adapter
->pdev
, p
->fl_size
,
2379 sizeof(struct rx_desc
),
2380 sizeof(struct rx_sw_desc
),
2381 &q
->fl
[0].phys_addr
, &q
->fl
[0].sdesc
);
2385 q
->fl
[1].desc
= alloc_ring(adapter
->pdev
, p
->jumbo_size
,
2386 sizeof(struct rx_desc
),
2387 sizeof(struct rx_sw_desc
),
2388 &q
->fl
[1].phys_addr
, &q
->fl
[1].sdesc
);
2392 q
->rspq
.desc
= alloc_ring(adapter
->pdev
, p
->rspq_size
,
2393 sizeof(struct rsp_desc
), 0,
2394 &q
->rspq
.phys_addr
, NULL
);
2398 for (i
= 0; i
< ntxq
; ++i
) {
2400 * The control queue always uses immediate data so does not
2401 * need to keep track of any sk_buffs.
2403 size_t sz
= i
== TXQ_CTRL
? 0 : sizeof(struct tx_sw_desc
);
2405 q
->txq
[i
].desc
= alloc_ring(adapter
->pdev
, p
->txq_size
[i
],
2406 sizeof(struct tx_desc
), sz
,
2407 &q
->txq
[i
].phys_addr
,
2409 if (!q
->txq
[i
].desc
)
2413 q
->txq
[i
].size
= p
->txq_size
[i
];
2414 spin_lock_init(&q
->txq
[i
].lock
);
2415 skb_queue_head_init(&q
->txq
[i
].sendq
);
2418 tasklet_init(&q
->txq
[TXQ_OFLD
].qresume_tsk
, restart_offloadq
,
2420 tasklet_init(&q
->txq
[TXQ_CTRL
].qresume_tsk
, restart_ctrlq
,
2423 q
->fl
[0].gen
= q
->fl
[1].gen
= 1;
2424 q
->fl
[0].size
= p
->fl_size
;
2425 q
->fl
[1].size
= p
->jumbo_size
;
2428 q
->rspq
.size
= p
->rspq_size
;
2429 spin_lock_init(&q
->rspq
.lock
);
2431 q
->txq
[TXQ_ETH
].stop_thres
= nports
*
2432 flits_to_desc(sgl_len(MAX_SKB_FRAGS
+ 1) + 3);
2435 q
->fl
[0].buf_size
= SGE_RX_SM_BUF_SIZE
+ 2 +
2436 sizeof(struct cpl_rx_pkt
);
2437 q
->fl
[1].buf_size
= MAX_FRAME_SIZE
+ 2 +
2438 sizeof(struct cpl_rx_pkt
);
2440 q
->fl
[0].buf_size
= SGE_RX_SM_BUF_SIZE
+
2441 sizeof(struct cpl_rx_data
);
2442 q
->fl
[1].buf_size
= (16 * 1024) -
2443 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
2446 spin_lock(&adapter
->sge
.reg_lock
);
2448 /* FL threshold comparison uses < */
2449 ret
= t3_sge_init_rspcntxt(adapter
, q
->rspq
.cntxt_id
, irq_vec_idx
,
2450 q
->rspq
.phys_addr
, q
->rspq
.size
,
2451 q
->fl
[0].buf_size
, 1, 0);
2455 for (i
= 0; i
< SGE_RXQ_PER_SET
; ++i
) {
2456 ret
= t3_sge_init_flcntxt(adapter
, q
->fl
[i
].cntxt_id
, 0,
2457 q
->fl
[i
].phys_addr
, q
->fl
[i
].size
,
2458 q
->fl
[i
].buf_size
, p
->cong_thres
, 1,
2464 ret
= t3_sge_init_ecntxt(adapter
, q
->txq
[TXQ_ETH
].cntxt_id
, USE_GTS
,
2465 SGE_CNTXT_ETH
, id
, q
->txq
[TXQ_ETH
].phys_addr
,
2466 q
->txq
[TXQ_ETH
].size
, q
->txq
[TXQ_ETH
].token
,
2472 ret
= t3_sge_init_ecntxt(adapter
, q
->txq
[TXQ_OFLD
].cntxt_id
,
2473 USE_GTS
, SGE_CNTXT_OFLD
, id
,
2474 q
->txq
[TXQ_OFLD
].phys_addr
,
2475 q
->txq
[TXQ_OFLD
].size
, 0, 1, 0);
2481 ret
= t3_sge_init_ecntxt(adapter
, q
->txq
[TXQ_CTRL
].cntxt_id
, 0,
2483 q
->txq
[TXQ_CTRL
].phys_addr
,
2484 q
->txq
[TXQ_CTRL
].size
,
2485 q
->txq
[TXQ_CTRL
].token
, 1, 0);
2490 spin_unlock(&adapter
->sge
.reg_lock
);
2492 t3_update_qset_coalesce(q
, p
);
2495 * We use atalk_ptr as a backpointer to a qset. In case a device is
2496 * associated with multiple queue sets only the first one sets
2499 if (netdev
->atalk_ptr
== NULL
)
2500 netdev
->atalk_ptr
= q
;
2502 refill_fl(adapter
, &q
->fl
[0], q
->fl
[0].size
, GFP_KERNEL
);
2503 refill_fl(adapter
, &q
->fl
[1], q
->fl
[1].size
, GFP_KERNEL
);
2504 refill_rspq(adapter
, &q
->rspq
, q
->rspq
.size
- 1);
2506 t3_write_reg(adapter
, A_SG_GTS
, V_RSPQ(q
->rspq
.cntxt_id
) |
2507 V_NEWTIMER(q
->rspq
.holdoff_tmr
));
2509 mod_timer(&q
->tx_reclaim_timer
, jiffies
+ TX_RECLAIM_PERIOD
);
2513 spin_unlock(&adapter
->sge
.reg_lock
);
2515 t3_free_qset(adapter
, q
);
2520 * t3_free_sge_resources - free SGE resources
2521 * @adap: the adapter
2523 * Frees resources used by the SGE queue sets.
2525 void t3_free_sge_resources(struct adapter
*adap
)
2529 for (i
= 0; i
< SGE_QSETS
; ++i
)
2530 t3_free_qset(adap
, &adap
->sge
.qs
[i
]);
2534 * t3_sge_start - enable SGE
2535 * @adap: the adapter
2537 * Enables the SGE for DMAs. This is the last step in starting packet
2540 void t3_sge_start(struct adapter
*adap
)
2542 t3_set_reg_field(adap
, A_SG_CONTROL
, F_GLOBALENABLE
, F_GLOBALENABLE
);
2546 * t3_sge_stop - disable SGE operation
2547 * @adap: the adapter
2549 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2550 * from error interrupts) or from normal process context. In the latter
2551 * case it also disables any pending queue restart tasklets. Note that
2552 * if it is called in interrupt context it cannot disable the restart
2553 * tasklets as it cannot wait, however the tasklets will have no effect
2554 * since the doorbells are disabled and the driver will call this again
2555 * later from process context, at which time the tasklets will be stopped
2556 * if they are still running.
2558 void t3_sge_stop(struct adapter
*adap
)
2560 t3_set_reg_field(adap
, A_SG_CONTROL
, F_GLOBALENABLE
, 0);
2561 if (!in_interrupt()) {
2564 for (i
= 0; i
< SGE_QSETS
; ++i
) {
2565 struct sge_qset
*qs
= &adap
->sge
.qs
[i
];
2567 tasklet_kill(&qs
->txq
[TXQ_OFLD
].qresume_tsk
);
2568 tasklet_kill(&qs
->txq
[TXQ_CTRL
].qresume_tsk
);
2574 * t3_sge_init - initialize SGE
2575 * @adap: the adapter
2576 * @p: the SGE parameters
2578 * Performs SGE initialization needed every time after a chip reset.
2579 * We do not initialize any of the queue sets here, instead the driver
2580 * top-level must request those individually. We also do not enable DMA
2581 * here, that should be done after the queues have been set up.
2583 void t3_sge_init(struct adapter
*adap
, struct sge_params
*p
)
2585 unsigned int ctrl
, ups
= ffs(pci_resource_len(adap
->pdev
, 2) >> 12);
2587 ctrl
= F_DROPPKT
| V_PKTSHIFT(2) | F_FLMODE
| F_AVOIDCQOVFL
|
2589 V_HOSTPAGESIZE(PAGE_SHIFT
- 11) | F_BIGENDIANINGRESS
|
2590 V_USERSPACESIZE(ups
? ups
- 1 : 0) | F_ISCSICOALESCING
;
2591 #if SGE_NUM_GENBITS == 1
2592 ctrl
|= F_EGRGENCTRL
;
2594 if (adap
->params
.rev
> 0) {
2595 if (!(adap
->flags
& (USING_MSIX
| USING_MSI
)))
2596 ctrl
|= F_ONEINTMULTQ
| F_OPTONEINTMULTQ
;
2597 ctrl
|= F_CQCRDTCTRL
| F_AVOIDCQOVFL
;
2599 t3_write_reg(adap
, A_SG_CONTROL
, ctrl
);
2600 t3_write_reg(adap
, A_SG_EGR_RCQ_DRB_THRSH
, V_HIRCQDRBTHRSH(512) |
2601 V_LORCQDRBTHRSH(512));
2602 t3_write_reg(adap
, A_SG_TIMER_TICK
, core_ticks_per_usec(adap
) / 10);
2603 t3_write_reg(adap
, A_SG_CMDQ_CREDIT_TH
, V_THRESHOLD(32) |
2604 V_TIMEOUT(200 * core_ticks_per_usec(adap
)));
2605 t3_write_reg(adap
, A_SG_HI_DRB_HI_THRSH
, 1000);
2606 t3_write_reg(adap
, A_SG_HI_DRB_LO_THRSH
, 256);
2607 t3_write_reg(adap
, A_SG_LO_DRB_HI_THRSH
, 1000);
2608 t3_write_reg(adap
, A_SG_LO_DRB_LO_THRSH
, 256);
2609 t3_write_reg(adap
, A_SG_OCO_BASE
, V_BASE1(0xfff));
2610 t3_write_reg(adap
, A_SG_DRB_PRI_THRESH
, 63 * 1024);
2614 * t3_sge_prep - one-time SGE initialization
2615 * @adap: the associated adapter
2616 * @p: SGE parameters
2618 * Performs one-time initialization of SGE SW state. Includes determining
2619 * defaults for the assorted SGE parameters, which admins can change until
2620 * they are used to initialize the SGE.
2622 void __devinit
t3_sge_prep(struct adapter
*adap
, struct sge_params
*p
)
2626 p
->max_pkt_size
= (16 * 1024) - sizeof(struct cpl_rx_data
) -
2627 SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
2629 for (i
= 0; i
< SGE_QSETS
; ++i
) {
2630 struct qset_params
*q
= p
->qset
+ i
;
2632 q
->polling
= adap
->params
.rev
> 0;
2633 q
->coalesce_usecs
= 5;
2634 q
->rspq_size
= 1024;
2636 q
->jumbo_size
= 512;
2637 q
->txq_size
[TXQ_ETH
] = 1024;
2638 q
->txq_size
[TXQ_OFLD
] = 1024;
2639 q
->txq_size
[TXQ_CTRL
] = 256;
2643 spin_lock_init(&adap
->sge
.reg_lock
);
2647 * t3_get_desc - dump an SGE descriptor for debugging purposes
2648 * @qs: the queue set
2649 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2650 * @idx: the descriptor index in the queue
2651 * @data: where to dump the descriptor contents
2653 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2654 * size of the descriptor.
2656 int t3_get_desc(const struct sge_qset
*qs
, unsigned int qnum
, unsigned int idx
,
2657 unsigned char *data
)
2663 if (!qs
->txq
[qnum
].desc
|| idx
>= qs
->txq
[qnum
].size
)
2665 memcpy(data
, &qs
->txq
[qnum
].desc
[idx
], sizeof(struct tx_desc
));
2666 return sizeof(struct tx_desc
);
2670 if (!qs
->rspq
.desc
|| idx
>= qs
->rspq
.size
)
2672 memcpy(data
, &qs
->rspq
.desc
[idx
], sizeof(struct rsp_desc
));
2673 return sizeof(struct rsp_desc
);
2677 if (!qs
->fl
[qnum
].desc
|| idx
>= qs
->fl
[qnum
].size
)
2679 memcpy(data
, &qs
->fl
[qnum
].desc
[idx
], sizeof(struct rx_desc
));
2680 return sizeof(struct rx_desc
);