cxgb3 - Fix dma mapping error path
[linux-2.6/mini2440.git] / drivers / net / cxgb3 / sge.c
blob0741deb86ca6a5901cf2b01c1d44469832a05c9d
1 /*
2 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
36 #include <linux/ip.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
39 #include "common.h"
40 #include "regs.h"
41 #include "sge_defs.h"
42 #include "t3_cpl.h"
43 #include "firmware_exports.h"
45 #define USE_GTS 0
47 #define SGE_RX_SM_BUF_SIZE 1536
49 #define SGE_RX_COPY_THRES 256
50 #define SGE_RX_PULL_LEN 128
53 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
54 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
55 * directly.
57 #define FL0_PG_CHUNK_SIZE 2048
59 #define SGE_RX_DROP_THRES 16
62 * Period of the Tx buffer reclaim timer. This timer does not need to run
63 * frequently as Tx buffers are usually reclaimed by new Tx packets.
65 #define TX_RECLAIM_PERIOD (HZ / 4)
67 /* WR size in bytes */
68 #define WR_LEN (WR_FLITS * 8)
71 * Types of Tx queues in each queue set. Order here matters, do not change.
73 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
75 /* Values for sge_txq.flags */
76 enum {
77 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
78 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
81 struct tx_desc {
82 __be64 flit[TX_DESC_FLITS];
85 struct rx_desc {
86 __be32 addr_lo;
87 __be32 len_gen;
88 __be32 gen2;
89 __be32 addr_hi;
92 struct tx_sw_desc { /* SW state per Tx descriptor */
93 struct sk_buff *skb;
94 u8 eop; /* set if last descriptor for packet */
95 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
96 u8 fragidx; /* first page fragment associated with descriptor */
97 s8 sflit; /* start flit of first SGL entry in descriptor */
100 struct rx_sw_desc { /* SW state per Rx descriptor */
101 union {
102 struct sk_buff *skb;
103 struct fl_pg_chunk pg_chunk;
105 DECLARE_PCI_UNMAP_ADDR(dma_addr);
108 struct rsp_desc { /* response queue descriptor */
109 struct rss_header rss_hdr;
110 __be32 flags;
111 __be32 len_cq;
112 u8 imm_data[47];
113 u8 intr_gen;
117 * Holds unmapping information for Tx packets that need deferred unmapping.
118 * This structure lives at skb->head and must be allocated by callers.
120 struct deferred_unmap_info {
121 struct pci_dev *pdev;
122 dma_addr_t addr[MAX_SKB_FRAGS + 1];
126 * Maps a number of flits to the number of Tx descriptors that can hold them.
127 * The formula is
129 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
131 * HW allows up to 4 descriptors to be combined into a WR.
133 static u8 flit_desc_map[] = {
135 #if SGE_NUM_GENBITS == 1
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
139 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
140 #elif SGE_NUM_GENBITS == 2
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
144 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
145 #else
146 # error "SGE_NUM_GENBITS must be 1 or 2"
147 #endif
150 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
152 return container_of(q, struct sge_qset, fl[qidx]);
155 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
157 return container_of(q, struct sge_qset, rspq);
160 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
162 return container_of(q, struct sge_qset, txq[qidx]);
166 * refill_rspq - replenish an SGE response queue
167 * @adapter: the adapter
168 * @q: the response queue to replenish
169 * @credits: how many new responses to make available
171 * Replenishes a response queue by making the supplied number of responses
172 * available to HW.
174 static inline void refill_rspq(struct adapter *adapter,
175 const struct sge_rspq *q, unsigned int credits)
177 rmb();
178 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
179 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
183 * need_skb_unmap - does the platform need unmapping of sk_buffs?
185 * Returns true if the platfrom needs sk_buff unmapping. The compiler
186 * optimizes away unecessary code if this returns true.
188 static inline int need_skb_unmap(void)
191 * This structure is used to tell if the platfrom needs buffer
192 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
194 struct dummy {
195 DECLARE_PCI_UNMAP_ADDR(addr);
198 return sizeof(struct dummy) != 0;
202 * unmap_skb - unmap a packet main body and its page fragments
203 * @skb: the packet
204 * @q: the Tx queue containing Tx descriptors for the packet
205 * @cidx: index of Tx descriptor
206 * @pdev: the PCI device
208 * Unmap the main body of an sk_buff and its page fragments, if any.
209 * Because of the fairly complicated structure of our SGLs and the desire
210 * to conserve space for metadata, the information necessary to unmap an
211 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
212 * descriptors (the physical addresses of the various data buffers), and
213 * the SW descriptor state (assorted indices). The send functions
214 * initialize the indices for the first packet descriptor so we can unmap
215 * the buffers held in the first Tx descriptor here, and we have enough
216 * information at this point to set the state for the next Tx descriptor.
218 * Note that it is possible to clean up the first descriptor of a packet
219 * before the send routines have written the next descriptors, but this
220 * race does not cause any problem. We just end up writing the unmapping
221 * info for the descriptor first.
223 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
224 unsigned int cidx, struct pci_dev *pdev)
226 const struct sg_ent *sgp;
227 struct tx_sw_desc *d = &q->sdesc[cidx];
228 int nfrags, frag_idx, curflit, j = d->addr_idx;
230 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
231 frag_idx = d->fragidx;
233 if (frag_idx == 0 && skb_headlen(skb)) {
234 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
235 skb_headlen(skb), PCI_DMA_TODEVICE);
236 j = 1;
239 curflit = d->sflit + 1 + j;
240 nfrags = skb_shinfo(skb)->nr_frags;
242 while (frag_idx < nfrags && curflit < WR_FLITS) {
243 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
244 skb_shinfo(skb)->frags[frag_idx].size,
245 PCI_DMA_TODEVICE);
246 j ^= 1;
247 if (j == 0) {
248 sgp++;
249 curflit++;
251 curflit++;
252 frag_idx++;
255 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
256 d = cidx + 1 == q->size ? q->sdesc : d + 1;
257 d->fragidx = frag_idx;
258 d->addr_idx = j;
259 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
264 * free_tx_desc - reclaims Tx descriptors and their buffers
265 * @adapter: the adapter
266 * @q: the Tx queue to reclaim descriptors from
267 * @n: the number of descriptors to reclaim
269 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
270 * Tx buffers. Called with the Tx queue lock held.
272 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
273 unsigned int n)
275 struct tx_sw_desc *d;
276 struct pci_dev *pdev = adapter->pdev;
277 unsigned int cidx = q->cidx;
279 const int need_unmap = need_skb_unmap() &&
280 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
282 d = &q->sdesc[cidx];
283 while (n--) {
284 if (d->skb) { /* an SGL is present */
285 if (need_unmap)
286 unmap_skb(d->skb, q, cidx, pdev);
287 if (d->eop)
288 kfree_skb(d->skb);
290 ++d;
291 if (++cidx == q->size) {
292 cidx = 0;
293 d = q->sdesc;
296 q->cidx = cidx;
300 * reclaim_completed_tx - reclaims completed Tx descriptors
301 * @adapter: the adapter
302 * @q: the Tx queue to reclaim completed descriptors from
304 * Reclaims Tx descriptors that the SGE has indicated it has processed,
305 * and frees the associated buffers if possible. Called with the Tx
306 * queue's lock held.
308 static inline void reclaim_completed_tx(struct adapter *adapter,
309 struct sge_txq *q)
311 unsigned int reclaim = q->processed - q->cleaned;
313 if (reclaim) {
314 free_tx_desc(adapter, q, reclaim);
315 q->cleaned += reclaim;
316 q->in_use -= reclaim;
321 * should_restart_tx - are there enough resources to restart a Tx queue?
322 * @q: the Tx queue
324 * Checks if there are enough descriptors to restart a suspended Tx queue.
326 static inline int should_restart_tx(const struct sge_txq *q)
328 unsigned int r = q->processed - q->cleaned;
330 return q->in_use - r < (q->size >> 1);
334 * free_rx_bufs - free the Rx buffers on an SGE free list
335 * @pdev: the PCI device associated with the adapter
336 * @rxq: the SGE free list to clean up
338 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
339 * this queue should be stopped before calling this function.
341 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
343 unsigned int cidx = q->cidx;
345 while (q->credits--) {
346 struct rx_sw_desc *d = &q->sdesc[cidx];
348 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
349 q->buf_size, PCI_DMA_FROMDEVICE);
350 if (q->use_pages) {
351 put_page(d->pg_chunk.page);
352 d->pg_chunk.page = NULL;
353 } else {
354 kfree_skb(d->skb);
355 d->skb = NULL;
357 if (++cidx == q->size)
358 cidx = 0;
361 if (q->pg_chunk.page) {
362 __free_page(q->pg_chunk.page);
363 q->pg_chunk.page = NULL;
368 * add_one_rx_buf - add a packet buffer to a free-buffer list
369 * @va: buffer start VA
370 * @len: the buffer length
371 * @d: the HW Rx descriptor to write
372 * @sd: the SW Rx descriptor to write
373 * @gen: the generation bit value
374 * @pdev: the PCI device associated with the adapter
376 * Add a buffer of the given length to the supplied HW and SW Rx
377 * descriptors.
379 static inline int add_one_rx_buf(void *va, unsigned int len,
380 struct rx_desc *d, struct rx_sw_desc *sd,
381 unsigned int gen, struct pci_dev *pdev)
383 dma_addr_t mapping;
385 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
386 if (unlikely(pci_dma_mapping_error(mapping)))
387 return -ENOMEM;
389 pci_unmap_addr_set(sd, dma_addr, mapping);
391 d->addr_lo = cpu_to_be32(mapping);
392 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
393 wmb();
394 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
395 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
396 return 0;
399 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
401 if (!q->pg_chunk.page) {
402 q->pg_chunk.page = alloc_page(gfp);
403 if (unlikely(!q->pg_chunk.page))
404 return -ENOMEM;
405 q->pg_chunk.va = page_address(q->pg_chunk.page);
406 q->pg_chunk.offset = 0;
408 sd->pg_chunk = q->pg_chunk;
410 q->pg_chunk.offset += q->buf_size;
411 if (q->pg_chunk.offset == PAGE_SIZE)
412 q->pg_chunk.page = NULL;
413 else {
414 q->pg_chunk.va += q->buf_size;
415 get_page(q->pg_chunk.page);
417 return 0;
421 * refill_fl - refill an SGE free-buffer list
422 * @adapter: the adapter
423 * @q: the free-list to refill
424 * @n: the number of new buffers to allocate
425 * @gfp: the gfp flags for allocating new buffers
427 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
428 * allocated with the supplied gfp flags. The caller must assure that
429 * @n does not exceed the queue's capacity.
431 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
433 void *buf_start;
434 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
435 struct rx_desc *d = &q->desc[q->pidx];
436 unsigned int count = 0;
438 while (n--) {
439 int err;
441 if (q->use_pages) {
442 if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
443 nomem: q->alloc_failed++;
444 break;
446 buf_start = sd->pg_chunk.va;
447 } else {
448 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
450 if (!skb)
451 goto nomem;
453 sd->skb = skb;
454 buf_start = skb->data;
457 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
458 adap->pdev);
459 if (unlikely(err)) {
460 if (!q->use_pages) {
461 kfree_skb(sd->skb);
462 sd->skb = NULL;
464 break;
467 d++;
468 sd++;
469 if (++q->pidx == q->size) {
470 q->pidx = 0;
471 q->gen ^= 1;
472 sd = q->sdesc;
473 d = q->desc;
475 q->credits++;
476 count++;
478 wmb();
479 if (likely(count))
480 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
482 return count;
485 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
487 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
491 * recycle_rx_buf - recycle a receive buffer
492 * @adapter: the adapter
493 * @q: the SGE free list
494 * @idx: index of buffer to recycle
496 * Recycles the specified buffer on the given free list by adding it at
497 * the next available slot on the list.
499 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
500 unsigned int idx)
502 struct rx_desc *from = &q->desc[idx];
503 struct rx_desc *to = &q->desc[q->pidx];
505 q->sdesc[q->pidx] = q->sdesc[idx];
506 to->addr_lo = from->addr_lo; /* already big endian */
507 to->addr_hi = from->addr_hi; /* likewise */
508 wmb();
509 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
510 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
511 q->credits++;
513 if (++q->pidx == q->size) {
514 q->pidx = 0;
515 q->gen ^= 1;
517 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
521 * alloc_ring - allocate resources for an SGE descriptor ring
522 * @pdev: the PCI device
523 * @nelem: the number of descriptors
524 * @elem_size: the size of each descriptor
525 * @sw_size: the size of the SW state associated with each ring element
526 * @phys: the physical address of the allocated ring
527 * @metadata: address of the array holding the SW state for the ring
529 * Allocates resources for an SGE descriptor ring, such as Tx queues,
530 * free buffer lists, or response queues. Each SGE ring requires
531 * space for its HW descriptors plus, optionally, space for the SW state
532 * associated with each HW entry (the metadata). The function returns
533 * three values: the virtual address for the HW ring (the return value
534 * of the function), the physical address of the HW ring, and the address
535 * of the SW ring.
537 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
538 size_t sw_size, dma_addr_t * phys, void *metadata)
540 size_t len = nelem * elem_size;
541 void *s = NULL;
542 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
544 if (!p)
545 return NULL;
546 if (sw_size) {
547 s = kcalloc(nelem, sw_size, GFP_KERNEL);
549 if (!s) {
550 dma_free_coherent(&pdev->dev, len, p, *phys);
551 return NULL;
554 if (metadata)
555 *(void **)metadata = s;
556 memset(p, 0, len);
557 return p;
561 * t3_reset_qset - reset a sge qset
562 * @q: the queue set
564 * Reset the qset structure.
565 * the NAPI structure is preserved in the event of
566 * the qset's reincarnation, for example during EEH recovery.
568 static void t3_reset_qset(struct sge_qset *q)
570 if (q->adap &&
571 !(q->adap->flags & NAPI_INIT)) {
572 memset(q, 0, sizeof(*q));
573 return;
576 q->adap = NULL;
577 memset(&q->rspq, 0, sizeof(q->rspq));
578 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
579 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
580 q->txq_stopped = 0;
581 memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
586 * free_qset - free the resources of an SGE queue set
587 * @adapter: the adapter owning the queue set
588 * @q: the queue set
590 * Release the HW and SW resources associated with an SGE queue set, such
591 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
592 * queue set must be quiesced prior to calling this.
594 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
596 int i;
597 struct pci_dev *pdev = adapter->pdev;
599 if (q->tx_reclaim_timer.function)
600 del_timer_sync(&q->tx_reclaim_timer);
602 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
603 if (q->fl[i].desc) {
604 spin_lock_irq(&adapter->sge.reg_lock);
605 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
606 spin_unlock_irq(&adapter->sge.reg_lock);
607 free_rx_bufs(pdev, &q->fl[i]);
608 kfree(q->fl[i].sdesc);
609 dma_free_coherent(&pdev->dev,
610 q->fl[i].size *
611 sizeof(struct rx_desc), q->fl[i].desc,
612 q->fl[i].phys_addr);
615 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
616 if (q->txq[i].desc) {
617 spin_lock_irq(&adapter->sge.reg_lock);
618 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
619 spin_unlock_irq(&adapter->sge.reg_lock);
620 if (q->txq[i].sdesc) {
621 free_tx_desc(adapter, &q->txq[i],
622 q->txq[i].in_use);
623 kfree(q->txq[i].sdesc);
625 dma_free_coherent(&pdev->dev,
626 q->txq[i].size *
627 sizeof(struct tx_desc),
628 q->txq[i].desc, q->txq[i].phys_addr);
629 __skb_queue_purge(&q->txq[i].sendq);
632 if (q->rspq.desc) {
633 spin_lock_irq(&adapter->sge.reg_lock);
634 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
635 spin_unlock_irq(&adapter->sge.reg_lock);
636 dma_free_coherent(&pdev->dev,
637 q->rspq.size * sizeof(struct rsp_desc),
638 q->rspq.desc, q->rspq.phys_addr);
641 t3_reset_qset(q);
645 * init_qset_cntxt - initialize an SGE queue set context info
646 * @qs: the queue set
647 * @id: the queue set id
649 * Initializes the TIDs and context ids for the queues of a queue set.
651 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
653 qs->rspq.cntxt_id = id;
654 qs->fl[0].cntxt_id = 2 * id;
655 qs->fl[1].cntxt_id = 2 * id + 1;
656 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
657 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
658 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
659 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
660 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
664 * sgl_len - calculates the size of an SGL of the given capacity
665 * @n: the number of SGL entries
667 * Calculates the number of flits needed for a scatter/gather list that
668 * can hold the given number of entries.
670 static inline unsigned int sgl_len(unsigned int n)
672 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
673 return (3 * n) / 2 + (n & 1);
677 * flits_to_desc - returns the num of Tx descriptors for the given flits
678 * @n: the number of flits
680 * Calculates the number of Tx descriptors needed for the supplied number
681 * of flits.
683 static inline unsigned int flits_to_desc(unsigned int n)
685 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
686 return flit_desc_map[n];
690 * get_packet - return the next ingress packet buffer from a free list
691 * @adap: the adapter that received the packet
692 * @fl: the SGE free list holding the packet
693 * @len: the packet length including any SGE padding
694 * @drop_thres: # of remaining buffers before we start dropping packets
696 * Get the next packet from a free list and complete setup of the
697 * sk_buff. If the packet is small we make a copy and recycle the
698 * original buffer, otherwise we use the original buffer itself. If a
699 * positive drop threshold is supplied packets are dropped and their
700 * buffers recycled if (a) the number of remaining buffers is under the
701 * threshold and the packet is too big to copy, or (b) the packet should
702 * be copied but there is no memory for the copy.
704 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
705 unsigned int len, unsigned int drop_thres)
707 struct sk_buff *skb = NULL;
708 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
710 prefetch(sd->skb->data);
711 fl->credits--;
713 if (len <= SGE_RX_COPY_THRES) {
714 skb = alloc_skb(len, GFP_ATOMIC);
715 if (likely(skb != NULL)) {
716 __skb_put(skb, len);
717 pci_dma_sync_single_for_cpu(adap->pdev,
718 pci_unmap_addr(sd, dma_addr), len,
719 PCI_DMA_FROMDEVICE);
720 memcpy(skb->data, sd->skb->data, len);
721 pci_dma_sync_single_for_device(adap->pdev,
722 pci_unmap_addr(sd, dma_addr), len,
723 PCI_DMA_FROMDEVICE);
724 } else if (!drop_thres)
725 goto use_orig_buf;
726 recycle:
727 recycle_rx_buf(adap, fl, fl->cidx);
728 return skb;
731 if (unlikely(fl->credits < drop_thres))
732 goto recycle;
734 use_orig_buf:
735 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
736 fl->buf_size, PCI_DMA_FROMDEVICE);
737 skb = sd->skb;
738 skb_put(skb, len);
739 __refill_fl(adap, fl);
740 return skb;
744 * get_packet_pg - return the next ingress packet buffer from a free list
745 * @adap: the adapter that received the packet
746 * @fl: the SGE free list holding the packet
747 * @len: the packet length including any SGE padding
748 * @drop_thres: # of remaining buffers before we start dropping packets
750 * Get the next packet from a free list populated with page chunks.
751 * If the packet is small we make a copy and recycle the original buffer,
752 * otherwise we attach the original buffer as a page fragment to a fresh
753 * sk_buff. If a positive drop threshold is supplied packets are dropped
754 * and their buffers recycled if (a) the number of remaining buffers is
755 * under the threshold and the packet is too big to copy, or (b) there's
756 * no system memory.
758 * Note: this function is similar to @get_packet but deals with Rx buffers
759 * that are page chunks rather than sk_buffs.
761 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
762 unsigned int len, unsigned int drop_thres)
764 struct sk_buff *skb = NULL;
765 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
767 if (len <= SGE_RX_COPY_THRES) {
768 skb = alloc_skb(len, GFP_ATOMIC);
769 if (likely(skb != NULL)) {
770 __skb_put(skb, len);
771 pci_dma_sync_single_for_cpu(adap->pdev,
772 pci_unmap_addr(sd, dma_addr), len,
773 PCI_DMA_FROMDEVICE);
774 memcpy(skb->data, sd->pg_chunk.va, len);
775 pci_dma_sync_single_for_device(adap->pdev,
776 pci_unmap_addr(sd, dma_addr), len,
777 PCI_DMA_FROMDEVICE);
778 } else if (!drop_thres)
779 return NULL;
780 recycle:
781 fl->credits--;
782 recycle_rx_buf(adap, fl, fl->cidx);
783 return skb;
786 if (unlikely(fl->credits <= drop_thres))
787 goto recycle;
789 skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
790 if (unlikely(!skb)) {
791 if (!drop_thres)
792 return NULL;
793 goto recycle;
796 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
797 fl->buf_size, PCI_DMA_FROMDEVICE);
798 __skb_put(skb, SGE_RX_PULL_LEN);
799 memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
800 skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
801 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
802 len - SGE_RX_PULL_LEN);
803 skb->len = len;
804 skb->data_len = len - SGE_RX_PULL_LEN;
805 skb->truesize += skb->data_len;
807 fl->credits--;
809 * We do not refill FLs here, we let the caller do it to overlap a
810 * prefetch.
812 return skb;
816 * get_imm_packet - return the next ingress packet buffer from a response
817 * @resp: the response descriptor containing the packet data
819 * Return a packet containing the immediate data of the given response.
821 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
823 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
825 if (skb) {
826 __skb_put(skb, IMMED_PKT_SIZE);
827 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
829 return skb;
833 * calc_tx_descs - calculate the number of Tx descriptors for a packet
834 * @skb: the packet
836 * Returns the number of Tx descriptors needed for the given Ethernet
837 * packet. Ethernet packets require addition of WR and CPL headers.
839 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
841 unsigned int flits;
843 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
844 return 1;
846 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
847 if (skb_shinfo(skb)->gso_size)
848 flits++;
849 return flits_to_desc(flits);
853 * make_sgl - populate a scatter/gather list for a packet
854 * @skb: the packet
855 * @sgp: the SGL to populate
856 * @start: start address of skb main body data to include in the SGL
857 * @len: length of skb main body data to include in the SGL
858 * @pdev: the PCI device
860 * Generates a scatter/gather list for the buffers that make up a packet
861 * and returns the SGL size in 8-byte words. The caller must size the SGL
862 * appropriately.
864 static inline unsigned int make_sgl(const struct sk_buff *skb,
865 struct sg_ent *sgp, unsigned char *start,
866 unsigned int len, struct pci_dev *pdev)
868 dma_addr_t mapping;
869 unsigned int i, j = 0, nfrags;
871 if (len) {
872 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
873 sgp->len[0] = cpu_to_be32(len);
874 sgp->addr[0] = cpu_to_be64(mapping);
875 j = 1;
878 nfrags = skb_shinfo(skb)->nr_frags;
879 for (i = 0; i < nfrags; i++) {
880 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
882 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
883 frag->size, PCI_DMA_TODEVICE);
884 sgp->len[j] = cpu_to_be32(frag->size);
885 sgp->addr[j] = cpu_to_be64(mapping);
886 j ^= 1;
887 if (j == 0)
888 ++sgp;
890 if (j)
891 sgp->len[j] = 0;
892 return ((nfrags + (len != 0)) * 3) / 2 + j;
896 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
897 * @adap: the adapter
898 * @q: the Tx queue
900 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
901 * where the HW is going to sleep just after we checked, however,
902 * then the interrupt handler will detect the outstanding TX packet
903 * and ring the doorbell for us.
905 * When GTS is disabled we unconditionally ring the doorbell.
907 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
909 #if USE_GTS
910 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
911 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
912 set_bit(TXQ_LAST_PKT_DB, &q->flags);
913 t3_write_reg(adap, A_SG_KDOORBELL,
914 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
916 #else
917 wmb(); /* write descriptors before telling HW */
918 t3_write_reg(adap, A_SG_KDOORBELL,
919 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
920 #endif
923 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
925 #if SGE_NUM_GENBITS == 2
926 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
927 #endif
931 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
932 * @ndesc: number of Tx descriptors spanned by the SGL
933 * @skb: the packet corresponding to the WR
934 * @d: first Tx descriptor to be written
935 * @pidx: index of above descriptors
936 * @q: the SGE Tx queue
937 * @sgl: the SGL
938 * @flits: number of flits to the start of the SGL in the first descriptor
939 * @sgl_flits: the SGL size in flits
940 * @gen: the Tx descriptor generation
941 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
942 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
944 * Write a work request header and an associated SGL. If the SGL is
945 * small enough to fit into one Tx descriptor it has already been written
946 * and we just need to write the WR header. Otherwise we distribute the
947 * SGL across the number of descriptors it spans.
949 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
950 struct tx_desc *d, unsigned int pidx,
951 const struct sge_txq *q,
952 const struct sg_ent *sgl,
953 unsigned int flits, unsigned int sgl_flits,
954 unsigned int gen, __be32 wr_hi,
955 __be32 wr_lo)
957 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
958 struct tx_sw_desc *sd = &q->sdesc[pidx];
960 sd->skb = skb;
961 if (need_skb_unmap()) {
962 sd->fragidx = 0;
963 sd->addr_idx = 0;
964 sd->sflit = flits;
967 if (likely(ndesc == 1)) {
968 sd->eop = 1;
969 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
970 V_WR_SGLSFLT(flits)) | wr_hi;
971 wmb();
972 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
973 V_WR_GEN(gen)) | wr_lo;
974 wr_gen2(d, gen);
975 } else {
976 unsigned int ogen = gen;
977 const u64 *fp = (const u64 *)sgl;
978 struct work_request_hdr *wp = wrp;
980 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
981 V_WR_SGLSFLT(flits)) | wr_hi;
983 while (sgl_flits) {
984 unsigned int avail = WR_FLITS - flits;
986 if (avail > sgl_flits)
987 avail = sgl_flits;
988 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
989 sgl_flits -= avail;
990 ndesc--;
991 if (!sgl_flits)
992 break;
994 fp += avail;
995 d++;
996 sd->eop = 0;
997 sd++;
998 if (++pidx == q->size) {
999 pidx = 0;
1000 gen ^= 1;
1001 d = q->desc;
1002 sd = q->sdesc;
1005 sd->skb = skb;
1006 wrp = (struct work_request_hdr *)d;
1007 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1008 V_WR_SGLSFLT(1)) | wr_hi;
1009 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1010 sgl_flits + 1)) |
1011 V_WR_GEN(gen)) | wr_lo;
1012 wr_gen2(d, gen);
1013 flits = 1;
1015 sd->eop = 1;
1016 wrp->wr_hi |= htonl(F_WR_EOP);
1017 wmb();
1018 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1019 wr_gen2((struct tx_desc *)wp, ogen);
1020 WARN_ON(ndesc != 0);
1025 * write_tx_pkt_wr - write a TX_PKT work request
1026 * @adap: the adapter
1027 * @skb: the packet to send
1028 * @pi: the egress interface
1029 * @pidx: index of the first Tx descriptor to write
1030 * @gen: the generation value to use
1031 * @q: the Tx queue
1032 * @ndesc: number of descriptors the packet will occupy
1033 * @compl: the value of the COMPL bit to use
1035 * Generate a TX_PKT work request to send the supplied packet.
1037 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1038 const struct port_info *pi,
1039 unsigned int pidx, unsigned int gen,
1040 struct sge_txq *q, unsigned int ndesc,
1041 unsigned int compl)
1043 unsigned int flits, sgl_flits, cntrl, tso_info;
1044 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1045 struct tx_desc *d = &q->desc[pidx];
1046 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1048 cpl->len = htonl(skb->len | 0x80000000);
1049 cntrl = V_TXPKT_INTF(pi->port_id);
1051 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1052 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1054 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1055 if (tso_info) {
1056 int eth_type;
1057 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1059 d->flit[2] = 0;
1060 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1061 hdr->cntrl = htonl(cntrl);
1062 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1063 CPL_ETH_II : CPL_ETH_II_VLAN;
1064 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1065 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1066 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1067 hdr->lso_info = htonl(tso_info);
1068 flits = 3;
1069 } else {
1070 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1071 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1072 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1073 cpl->cntrl = htonl(cntrl);
1075 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1076 q->sdesc[pidx].skb = NULL;
1077 if (!skb->data_len)
1078 skb_copy_from_linear_data(skb, &d->flit[2],
1079 skb->len);
1080 else
1081 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1083 flits = (skb->len + 7) / 8 + 2;
1084 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1085 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1086 | F_WR_SOP | F_WR_EOP | compl);
1087 wmb();
1088 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1089 V_WR_TID(q->token));
1090 wr_gen2(d, gen);
1091 kfree_skb(skb);
1092 return;
1095 flits = 2;
1098 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1099 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1101 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1102 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1103 htonl(V_WR_TID(q->token)));
1106 static inline void t3_stop_queue(struct net_device *dev, struct sge_qset *qs,
1107 struct sge_txq *q)
1109 netif_stop_queue(dev);
1110 set_bit(TXQ_ETH, &qs->txq_stopped);
1111 q->stops++;
1115 * eth_xmit - add a packet to the Ethernet Tx queue
1116 * @skb: the packet
1117 * @dev: the egress net device
1119 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1121 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1123 unsigned int ndesc, pidx, credits, gen, compl;
1124 const struct port_info *pi = netdev_priv(dev);
1125 struct adapter *adap = pi->adapter;
1126 struct sge_qset *qs = pi->qs;
1127 struct sge_txq *q = &qs->txq[TXQ_ETH];
1130 * The chip min packet length is 9 octets but play safe and reject
1131 * anything shorter than an Ethernet header.
1133 if (unlikely(skb->len < ETH_HLEN)) {
1134 dev_kfree_skb(skb);
1135 return NETDEV_TX_OK;
1138 spin_lock(&q->lock);
1139 reclaim_completed_tx(adap, q);
1141 credits = q->size - q->in_use;
1142 ndesc = calc_tx_descs(skb);
1144 if (unlikely(credits < ndesc)) {
1145 t3_stop_queue(dev, qs, q);
1146 dev_err(&adap->pdev->dev,
1147 "%s: Tx ring %u full while queue awake!\n",
1148 dev->name, q->cntxt_id & 7);
1149 spin_unlock(&q->lock);
1150 return NETDEV_TX_BUSY;
1153 q->in_use += ndesc;
1154 if (unlikely(credits - ndesc < q->stop_thres)) {
1155 t3_stop_queue(dev, qs, q);
1157 if (should_restart_tx(q) &&
1158 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1159 q->restarts++;
1160 netif_wake_queue(dev);
1164 gen = q->gen;
1165 q->unacked += ndesc;
1166 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1167 q->unacked &= 7;
1168 pidx = q->pidx;
1169 q->pidx += ndesc;
1170 if (q->pidx >= q->size) {
1171 q->pidx -= q->size;
1172 q->gen ^= 1;
1175 /* update port statistics */
1176 if (skb->ip_summed == CHECKSUM_COMPLETE)
1177 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1178 if (skb_shinfo(skb)->gso_size)
1179 qs->port_stats[SGE_PSTAT_TSO]++;
1180 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1181 qs->port_stats[SGE_PSTAT_VLANINS]++;
1183 dev->trans_start = jiffies;
1184 spin_unlock(&q->lock);
1187 * We do not use Tx completion interrupts to free DMAd Tx packets.
1188 * This is good for performamce but means that we rely on new Tx
1189 * packets arriving to run the destructors of completed packets,
1190 * which open up space in their sockets' send queues. Sometimes
1191 * we do not get such new packets causing Tx to stall. A single
1192 * UDP transmitter is a good example of this situation. We have
1193 * a clean up timer that periodically reclaims completed packets
1194 * but it doesn't run often enough (nor do we want it to) to prevent
1195 * lengthy stalls. A solution to this problem is to run the
1196 * destructor early, after the packet is queued but before it's DMAd.
1197 * A cons is that we lie to socket memory accounting, but the amount
1198 * of extra memory is reasonable (limited by the number of Tx
1199 * descriptors), the packets do actually get freed quickly by new
1200 * packets almost always, and for protocols like TCP that wait for
1201 * acks to really free up the data the extra memory is even less.
1202 * On the positive side we run the destructors on the sending CPU
1203 * rather than on a potentially different completing CPU, usually a
1204 * good thing. We also run them without holding our Tx queue lock,
1205 * unlike what reclaim_completed_tx() would otherwise do.
1207 * Run the destructor before telling the DMA engine about the packet
1208 * to make sure it doesn't complete and get freed prematurely.
1210 if (likely(!skb_shared(skb)))
1211 skb_orphan(skb);
1213 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1214 check_ring_tx_db(adap, q);
1215 return NETDEV_TX_OK;
1219 * write_imm - write a packet into a Tx descriptor as immediate data
1220 * @d: the Tx descriptor to write
1221 * @skb: the packet
1222 * @len: the length of packet data to write as immediate data
1223 * @gen: the generation bit value to write
1225 * Writes a packet as immediate data into a Tx descriptor. The packet
1226 * contains a work request at its beginning. We must write the packet
1227 * carefully so the SGE doesn't read it accidentally before it's written
1228 * in its entirety.
1230 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1231 unsigned int len, unsigned int gen)
1233 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1234 struct work_request_hdr *to = (struct work_request_hdr *)d;
1236 if (likely(!skb->data_len))
1237 memcpy(&to[1], &from[1], len - sizeof(*from));
1238 else
1239 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1241 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1242 V_WR_BCNTLFLT(len & 7));
1243 wmb();
1244 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1245 V_WR_LEN((len + 7) / 8));
1246 wr_gen2(d, gen);
1247 kfree_skb(skb);
1251 * check_desc_avail - check descriptor availability on a send queue
1252 * @adap: the adapter
1253 * @q: the send queue
1254 * @skb: the packet needing the descriptors
1255 * @ndesc: the number of Tx descriptors needed
1256 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1258 * Checks if the requested number of Tx descriptors is available on an
1259 * SGE send queue. If the queue is already suspended or not enough
1260 * descriptors are available the packet is queued for later transmission.
1261 * Must be called with the Tx queue locked.
1263 * Returns 0 if enough descriptors are available, 1 if there aren't
1264 * enough descriptors and the packet has been queued, and 2 if the caller
1265 * needs to retry because there weren't enough descriptors at the
1266 * beginning of the call but some freed up in the mean time.
1268 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1269 struct sk_buff *skb, unsigned int ndesc,
1270 unsigned int qid)
1272 if (unlikely(!skb_queue_empty(&q->sendq))) {
1273 addq_exit:__skb_queue_tail(&q->sendq, skb);
1274 return 1;
1276 if (unlikely(q->size - q->in_use < ndesc)) {
1277 struct sge_qset *qs = txq_to_qset(q, qid);
1279 set_bit(qid, &qs->txq_stopped);
1280 smp_mb__after_clear_bit();
1282 if (should_restart_tx(q) &&
1283 test_and_clear_bit(qid, &qs->txq_stopped))
1284 return 2;
1286 q->stops++;
1287 goto addq_exit;
1289 return 0;
1293 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1294 * @q: the SGE control Tx queue
1296 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1297 * that send only immediate data (presently just the control queues) and
1298 * thus do not have any sk_buffs to release.
1300 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1302 unsigned int reclaim = q->processed - q->cleaned;
1304 q->in_use -= reclaim;
1305 q->cleaned += reclaim;
1308 static inline int immediate(const struct sk_buff *skb)
1310 return skb->len <= WR_LEN;
1314 * ctrl_xmit - send a packet through an SGE control Tx queue
1315 * @adap: the adapter
1316 * @q: the control queue
1317 * @skb: the packet
1319 * Send a packet through an SGE control Tx queue. Packets sent through
1320 * a control queue must fit entirely as immediate data in a single Tx
1321 * descriptor and have no page fragments.
1323 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1324 struct sk_buff *skb)
1326 int ret;
1327 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1329 if (unlikely(!immediate(skb))) {
1330 WARN_ON(1);
1331 dev_kfree_skb(skb);
1332 return NET_XMIT_SUCCESS;
1335 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1336 wrp->wr_lo = htonl(V_WR_TID(q->token));
1338 spin_lock(&q->lock);
1339 again:reclaim_completed_tx_imm(q);
1341 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1342 if (unlikely(ret)) {
1343 if (ret == 1) {
1344 spin_unlock(&q->lock);
1345 return NET_XMIT_CN;
1347 goto again;
1350 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1352 q->in_use++;
1353 if (++q->pidx >= q->size) {
1354 q->pidx = 0;
1355 q->gen ^= 1;
1357 spin_unlock(&q->lock);
1358 wmb();
1359 t3_write_reg(adap, A_SG_KDOORBELL,
1360 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1361 return NET_XMIT_SUCCESS;
1365 * restart_ctrlq - restart a suspended control queue
1366 * @qs: the queue set cotaining the control queue
1368 * Resumes transmission on a suspended Tx control queue.
1370 static void restart_ctrlq(unsigned long data)
1372 struct sk_buff *skb;
1373 struct sge_qset *qs = (struct sge_qset *)data;
1374 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1376 spin_lock(&q->lock);
1377 again:reclaim_completed_tx_imm(q);
1379 while (q->in_use < q->size &&
1380 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1382 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1384 if (++q->pidx >= q->size) {
1385 q->pidx = 0;
1386 q->gen ^= 1;
1388 q->in_use++;
1391 if (!skb_queue_empty(&q->sendq)) {
1392 set_bit(TXQ_CTRL, &qs->txq_stopped);
1393 smp_mb__after_clear_bit();
1395 if (should_restart_tx(q) &&
1396 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1397 goto again;
1398 q->stops++;
1401 spin_unlock(&q->lock);
1402 wmb();
1403 t3_write_reg(qs->adap, A_SG_KDOORBELL,
1404 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1408 * Send a management message through control queue 0
1410 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1412 int ret;
1413 local_bh_disable();
1414 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1415 local_bh_enable();
1417 return ret;
1421 * deferred_unmap_destructor - unmap a packet when it is freed
1422 * @skb: the packet
1424 * This is the packet destructor used for Tx packets that need to remain
1425 * mapped until they are freed rather than until their Tx descriptors are
1426 * freed.
1428 static void deferred_unmap_destructor(struct sk_buff *skb)
1430 int i;
1431 const dma_addr_t *p;
1432 const struct skb_shared_info *si;
1433 const struct deferred_unmap_info *dui;
1435 dui = (struct deferred_unmap_info *)skb->head;
1436 p = dui->addr;
1438 if (skb->tail - skb->transport_header)
1439 pci_unmap_single(dui->pdev, *p++,
1440 skb->tail - skb->transport_header,
1441 PCI_DMA_TODEVICE);
1443 si = skb_shinfo(skb);
1444 for (i = 0; i < si->nr_frags; i++)
1445 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1446 PCI_DMA_TODEVICE);
1449 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1450 const struct sg_ent *sgl, int sgl_flits)
1452 dma_addr_t *p;
1453 struct deferred_unmap_info *dui;
1455 dui = (struct deferred_unmap_info *)skb->head;
1456 dui->pdev = pdev;
1457 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1458 *p++ = be64_to_cpu(sgl->addr[0]);
1459 *p++ = be64_to_cpu(sgl->addr[1]);
1461 if (sgl_flits)
1462 *p = be64_to_cpu(sgl->addr[0]);
1466 * write_ofld_wr - write an offload work request
1467 * @adap: the adapter
1468 * @skb: the packet to send
1469 * @q: the Tx queue
1470 * @pidx: index of the first Tx descriptor to write
1471 * @gen: the generation value to use
1472 * @ndesc: number of descriptors the packet will occupy
1474 * Write an offload work request to send the supplied packet. The packet
1475 * data already carry the work request with most fields populated.
1477 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1478 struct sge_txq *q, unsigned int pidx,
1479 unsigned int gen, unsigned int ndesc)
1481 unsigned int sgl_flits, flits;
1482 struct work_request_hdr *from;
1483 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1484 struct tx_desc *d = &q->desc[pidx];
1486 if (immediate(skb)) {
1487 q->sdesc[pidx].skb = NULL;
1488 write_imm(d, skb, skb->len, gen);
1489 return;
1492 /* Only TX_DATA builds SGLs */
1494 from = (struct work_request_hdr *)skb->data;
1495 memcpy(&d->flit[1], &from[1],
1496 skb_transport_offset(skb) - sizeof(*from));
1498 flits = skb_transport_offset(skb) / 8;
1499 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1500 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1501 skb->tail - skb->transport_header,
1502 adap->pdev);
1503 if (need_skb_unmap()) {
1504 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1505 skb->destructor = deferred_unmap_destructor;
1508 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1509 gen, from->wr_hi, from->wr_lo);
1513 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1514 * @skb: the packet
1516 * Returns the number of Tx descriptors needed for the given offload
1517 * packet. These packets are already fully constructed.
1519 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1521 unsigned int flits, cnt;
1523 if (skb->len <= WR_LEN)
1524 return 1; /* packet fits as immediate data */
1526 flits = skb_transport_offset(skb) / 8; /* headers */
1527 cnt = skb_shinfo(skb)->nr_frags;
1528 if (skb->tail != skb->transport_header)
1529 cnt++;
1530 return flits_to_desc(flits + sgl_len(cnt));
1534 * ofld_xmit - send a packet through an offload queue
1535 * @adap: the adapter
1536 * @q: the Tx offload queue
1537 * @skb: the packet
1539 * Send an offload packet through an SGE offload queue.
1541 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1542 struct sk_buff *skb)
1544 int ret;
1545 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1547 spin_lock(&q->lock);
1548 again:reclaim_completed_tx(adap, q);
1550 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1551 if (unlikely(ret)) {
1552 if (ret == 1) {
1553 skb->priority = ndesc; /* save for restart */
1554 spin_unlock(&q->lock);
1555 return NET_XMIT_CN;
1557 goto again;
1560 gen = q->gen;
1561 q->in_use += ndesc;
1562 pidx = q->pidx;
1563 q->pidx += ndesc;
1564 if (q->pidx >= q->size) {
1565 q->pidx -= q->size;
1566 q->gen ^= 1;
1568 spin_unlock(&q->lock);
1570 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1571 check_ring_tx_db(adap, q);
1572 return NET_XMIT_SUCCESS;
1576 * restart_offloadq - restart a suspended offload queue
1577 * @qs: the queue set cotaining the offload queue
1579 * Resumes transmission on a suspended Tx offload queue.
1581 static void restart_offloadq(unsigned long data)
1583 struct sk_buff *skb;
1584 struct sge_qset *qs = (struct sge_qset *)data;
1585 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1586 const struct port_info *pi = netdev_priv(qs->netdev);
1587 struct adapter *adap = pi->adapter;
1589 spin_lock(&q->lock);
1590 again:reclaim_completed_tx(adap, q);
1592 while ((skb = skb_peek(&q->sendq)) != NULL) {
1593 unsigned int gen, pidx;
1594 unsigned int ndesc = skb->priority;
1596 if (unlikely(q->size - q->in_use < ndesc)) {
1597 set_bit(TXQ_OFLD, &qs->txq_stopped);
1598 smp_mb__after_clear_bit();
1600 if (should_restart_tx(q) &&
1601 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1602 goto again;
1603 q->stops++;
1604 break;
1607 gen = q->gen;
1608 q->in_use += ndesc;
1609 pidx = q->pidx;
1610 q->pidx += ndesc;
1611 if (q->pidx >= q->size) {
1612 q->pidx -= q->size;
1613 q->gen ^= 1;
1615 __skb_unlink(skb, &q->sendq);
1616 spin_unlock(&q->lock);
1618 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1619 spin_lock(&q->lock);
1621 spin_unlock(&q->lock);
1623 #if USE_GTS
1624 set_bit(TXQ_RUNNING, &q->flags);
1625 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1626 #endif
1627 wmb();
1628 t3_write_reg(adap, A_SG_KDOORBELL,
1629 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1633 * queue_set - return the queue set a packet should use
1634 * @skb: the packet
1636 * Maps a packet to the SGE queue set it should use. The desired queue
1637 * set is carried in bits 1-3 in the packet's priority.
1639 static inline int queue_set(const struct sk_buff *skb)
1641 return skb->priority >> 1;
1645 * is_ctrl_pkt - return whether an offload packet is a control packet
1646 * @skb: the packet
1648 * Determines whether an offload packet should use an OFLD or a CTRL
1649 * Tx queue. This is indicated by bit 0 in the packet's priority.
1651 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1653 return skb->priority & 1;
1657 * t3_offload_tx - send an offload packet
1658 * @tdev: the offload device to send to
1659 * @skb: the packet
1661 * Sends an offload packet. We use the packet priority to select the
1662 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1663 * should be sent as regular or control, bits 1-3 select the queue set.
1665 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1667 struct adapter *adap = tdev2adap(tdev);
1668 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1670 if (unlikely(is_ctrl_pkt(skb)))
1671 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1673 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1677 * offload_enqueue - add an offload packet to an SGE offload receive queue
1678 * @q: the SGE response queue
1679 * @skb: the packet
1681 * Add a new offload packet to an SGE response queue's offload packet
1682 * queue. If the packet is the first on the queue it schedules the RX
1683 * softirq to process the queue.
1685 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1687 skb->next = skb->prev = NULL;
1688 if (q->rx_tail)
1689 q->rx_tail->next = skb;
1690 else {
1691 struct sge_qset *qs = rspq_to_qset(q);
1693 napi_schedule(&qs->napi);
1694 q->rx_head = skb;
1696 q->rx_tail = skb;
1700 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1701 * @tdev: the offload device that will be receiving the packets
1702 * @q: the SGE response queue that assembled the bundle
1703 * @skbs: the partial bundle
1704 * @n: the number of packets in the bundle
1706 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1708 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1709 struct sge_rspq *q,
1710 struct sk_buff *skbs[], int n)
1712 if (n) {
1713 q->offload_bundles++;
1714 tdev->recv(tdev, skbs, n);
1719 * ofld_poll - NAPI handler for offload packets in interrupt mode
1720 * @dev: the network device doing the polling
1721 * @budget: polling budget
1723 * The NAPI handler for offload packets when a response queue is serviced
1724 * by the hard interrupt handler, i.e., when it's operating in non-polling
1725 * mode. Creates small packet batches and sends them through the offload
1726 * receive handler. Batches need to be of modest size as we do prefetches
1727 * on the packets in each.
1729 static int ofld_poll(struct napi_struct *napi, int budget)
1731 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1732 struct sge_rspq *q = &qs->rspq;
1733 struct adapter *adapter = qs->adap;
1734 int work_done = 0;
1736 while (work_done < budget) {
1737 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1738 int ngathered;
1740 spin_lock_irq(&q->lock);
1741 head = q->rx_head;
1742 if (!head) {
1743 napi_complete(napi);
1744 spin_unlock_irq(&q->lock);
1745 return work_done;
1748 tail = q->rx_tail;
1749 q->rx_head = q->rx_tail = NULL;
1750 spin_unlock_irq(&q->lock);
1752 for (ngathered = 0; work_done < budget && head; work_done++) {
1753 prefetch(head->data);
1754 skbs[ngathered] = head;
1755 head = head->next;
1756 skbs[ngathered]->next = NULL;
1757 if (++ngathered == RX_BUNDLE_SIZE) {
1758 q->offload_bundles++;
1759 adapter->tdev.recv(&adapter->tdev, skbs,
1760 ngathered);
1761 ngathered = 0;
1764 if (head) { /* splice remaining packets back onto Rx queue */
1765 spin_lock_irq(&q->lock);
1766 tail->next = q->rx_head;
1767 if (!q->rx_head)
1768 q->rx_tail = tail;
1769 q->rx_head = head;
1770 spin_unlock_irq(&q->lock);
1772 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1775 return work_done;
1779 * rx_offload - process a received offload packet
1780 * @tdev: the offload device receiving the packet
1781 * @rq: the response queue that received the packet
1782 * @skb: the packet
1783 * @rx_gather: a gather list of packets if we are building a bundle
1784 * @gather_idx: index of the next available slot in the bundle
1786 * Process an ingress offload pakcet and add it to the offload ingress
1787 * queue. Returns the index of the next available slot in the bundle.
1789 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1790 struct sk_buff *skb, struct sk_buff *rx_gather[],
1791 unsigned int gather_idx)
1793 skb_reset_mac_header(skb);
1794 skb_reset_network_header(skb);
1795 skb_reset_transport_header(skb);
1797 if (rq->polling) {
1798 rx_gather[gather_idx++] = skb;
1799 if (gather_idx == RX_BUNDLE_SIZE) {
1800 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1801 gather_idx = 0;
1802 rq->offload_bundles++;
1804 } else
1805 offload_enqueue(rq, skb);
1807 return gather_idx;
1811 * restart_tx - check whether to restart suspended Tx queues
1812 * @qs: the queue set to resume
1814 * Restarts suspended Tx queues of an SGE queue set if they have enough
1815 * free resources to resume operation.
1817 static void restart_tx(struct sge_qset *qs)
1819 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1820 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1821 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1822 qs->txq[TXQ_ETH].restarts++;
1823 if (netif_running(qs->netdev))
1824 netif_wake_queue(qs->netdev);
1827 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1828 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1829 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1830 qs->txq[TXQ_OFLD].restarts++;
1831 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1833 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1834 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1835 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1836 qs->txq[TXQ_CTRL].restarts++;
1837 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1842 * rx_eth - process an ingress ethernet packet
1843 * @adap: the adapter
1844 * @rq: the response queue that received the packet
1845 * @skb: the packet
1846 * @pad: amount of padding at the start of the buffer
1848 * Process an ingress ethernet pakcet and deliver it to the stack.
1849 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1850 * if it was immediate data in a response.
1852 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1853 struct sk_buff *skb, int pad)
1855 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1856 struct port_info *pi;
1858 skb_pull(skb, sizeof(*p) + pad);
1859 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1860 skb->dev->last_rx = jiffies;
1861 pi = netdev_priv(skb->dev);
1862 if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) &&
1863 !p->fragment) {
1864 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1865 skb->ip_summed = CHECKSUM_UNNECESSARY;
1866 } else
1867 skb->ip_summed = CHECKSUM_NONE;
1869 if (unlikely(p->vlan_valid)) {
1870 struct vlan_group *grp = pi->vlan_grp;
1872 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1873 if (likely(grp))
1874 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1875 rq->polling);
1876 else
1877 dev_kfree_skb_any(skb);
1878 } else if (rq->polling)
1879 netif_receive_skb(skb);
1880 else
1881 netif_rx(skb);
1885 * handle_rsp_cntrl_info - handles control information in a response
1886 * @qs: the queue set corresponding to the response
1887 * @flags: the response control flags
1889 * Handles the control information of an SGE response, such as GTS
1890 * indications and completion credits for the queue set's Tx queues.
1891 * HW coalesces credits, we don't do any extra SW coalescing.
1893 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
1895 unsigned int credits;
1897 #if USE_GTS
1898 if (flags & F_RSPD_TXQ0_GTS)
1899 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1900 #endif
1902 credits = G_RSPD_TXQ0_CR(flags);
1903 if (credits)
1904 qs->txq[TXQ_ETH].processed += credits;
1906 credits = G_RSPD_TXQ2_CR(flags);
1907 if (credits)
1908 qs->txq[TXQ_CTRL].processed += credits;
1910 # if USE_GTS
1911 if (flags & F_RSPD_TXQ1_GTS)
1912 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1913 # endif
1914 credits = G_RSPD_TXQ1_CR(flags);
1915 if (credits)
1916 qs->txq[TXQ_OFLD].processed += credits;
1920 * check_ring_db - check if we need to ring any doorbells
1921 * @adapter: the adapter
1922 * @qs: the queue set whose Tx queues are to be examined
1923 * @sleeping: indicates which Tx queue sent GTS
1925 * Checks if some of a queue set's Tx queues need to ring their doorbells
1926 * to resume transmission after idling while they still have unprocessed
1927 * descriptors.
1929 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1930 unsigned int sleeping)
1932 if (sleeping & F_RSPD_TXQ0_GTS) {
1933 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1935 if (txq->cleaned + txq->in_use != txq->processed &&
1936 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1937 set_bit(TXQ_RUNNING, &txq->flags);
1938 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1939 V_EGRCNTX(txq->cntxt_id));
1943 if (sleeping & F_RSPD_TXQ1_GTS) {
1944 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1946 if (txq->cleaned + txq->in_use != txq->processed &&
1947 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1948 set_bit(TXQ_RUNNING, &txq->flags);
1949 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1950 V_EGRCNTX(txq->cntxt_id));
1956 * is_new_response - check if a response is newly written
1957 * @r: the response descriptor
1958 * @q: the response queue
1960 * Returns true if a response descriptor contains a yet unprocessed
1961 * response.
1963 static inline int is_new_response(const struct rsp_desc *r,
1964 const struct sge_rspq *q)
1966 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1969 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1970 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1971 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1972 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1973 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1975 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1976 #define NOMEM_INTR_DELAY 2500
1979 * process_responses - process responses from an SGE response queue
1980 * @adap: the adapter
1981 * @qs: the queue set to which the response queue belongs
1982 * @budget: how many responses can be processed in this round
1984 * Process responses from an SGE response queue up to the supplied budget.
1985 * Responses include received packets as well as credits and other events
1986 * for the queues that belong to the response queue's queue set.
1987 * A negative budget is effectively unlimited.
1989 * Additionally choose the interrupt holdoff time for the next interrupt
1990 * on this queue. If the system is under memory shortage use a fairly
1991 * long delay to help recovery.
1993 static int process_responses(struct adapter *adap, struct sge_qset *qs,
1994 int budget)
1996 struct sge_rspq *q = &qs->rspq;
1997 struct rsp_desc *r = &q->desc[q->cidx];
1998 int budget_left = budget;
1999 unsigned int sleeping = 0;
2000 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2001 int ngathered = 0;
2003 q->next_holdoff = q->holdoff_tmr;
2005 while (likely(budget_left && is_new_response(r, q))) {
2006 int eth, ethpad = 2;
2007 struct sk_buff *skb = NULL;
2008 u32 len, flags = ntohl(r->flags);
2009 __be32 rss_hi = *(const __be32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
2011 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2013 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2014 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2015 if (!skb)
2016 goto no_mem;
2018 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2019 skb->data[0] = CPL_ASYNC_NOTIF;
2020 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2021 q->async_notif++;
2022 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2023 skb = get_imm_packet(r);
2024 if (unlikely(!skb)) {
2025 no_mem:
2026 q->next_holdoff = NOMEM_INTR_DELAY;
2027 q->nomem++;
2028 /* consume one credit since we tried */
2029 budget_left--;
2030 break;
2032 q->imm_data++;
2033 ethpad = 0;
2034 } else if ((len = ntohl(r->len_cq)) != 0) {
2035 struct sge_fl *fl;
2037 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2038 if (fl->use_pages) {
2039 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2041 prefetch(addr);
2042 #if L1_CACHE_BYTES < 128
2043 prefetch(addr + L1_CACHE_BYTES);
2044 #endif
2045 __refill_fl(adap, fl);
2047 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
2048 eth ? SGE_RX_DROP_THRES : 0);
2049 } else
2050 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2051 eth ? SGE_RX_DROP_THRES : 0);
2052 if (unlikely(!skb)) {
2053 if (!eth)
2054 goto no_mem;
2055 q->rx_drops++;
2056 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2057 __skb_pull(skb, 2);
2059 if (++fl->cidx == fl->size)
2060 fl->cidx = 0;
2061 } else
2062 q->pure_rsps++;
2064 if (flags & RSPD_CTRL_MASK) {
2065 sleeping |= flags & RSPD_GTS_MASK;
2066 handle_rsp_cntrl_info(qs, flags);
2069 r++;
2070 if (unlikely(++q->cidx == q->size)) {
2071 q->cidx = 0;
2072 q->gen ^= 1;
2073 r = q->desc;
2075 prefetch(r);
2077 if (++q->credits >= (q->size / 4)) {
2078 refill_rspq(adap, q, q->credits);
2079 q->credits = 0;
2082 if (likely(skb != NULL)) {
2083 if (eth)
2084 rx_eth(adap, q, skb, ethpad);
2085 else {
2086 q->offload_pkts++;
2087 /* Preserve the RSS info in csum & priority */
2088 skb->csum = rss_hi;
2089 skb->priority = rss_lo;
2090 ngathered = rx_offload(&adap->tdev, q, skb,
2091 offload_skbs,
2092 ngathered);
2095 --budget_left;
2098 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2099 if (sleeping)
2100 check_ring_db(adap, qs, sleeping);
2102 smp_mb(); /* commit Tx queue .processed updates */
2103 if (unlikely(qs->txq_stopped != 0))
2104 restart_tx(qs);
2106 budget -= budget_left;
2107 return budget;
2110 static inline int is_pure_response(const struct rsp_desc *r)
2112 u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2114 return (n | r->len_cq) == 0;
2118 * napi_rx_handler - the NAPI handler for Rx processing
2119 * @napi: the napi instance
2120 * @budget: how many packets we can process in this round
2122 * Handler for new data events when using NAPI.
2124 static int napi_rx_handler(struct napi_struct *napi, int budget)
2126 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2127 struct adapter *adap = qs->adap;
2128 int work_done = process_responses(adap, qs, budget);
2130 if (likely(work_done < budget)) {
2131 napi_complete(napi);
2134 * Because we don't atomically flush the following
2135 * write it is possible that in very rare cases it can
2136 * reach the device in a way that races with a new
2137 * response being written plus an error interrupt
2138 * causing the NAPI interrupt handler below to return
2139 * unhandled status to the OS. To protect against
2140 * this would require flushing the write and doing
2141 * both the write and the flush with interrupts off.
2142 * Way too expensive and unjustifiable given the
2143 * rarity of the race.
2145 * The race cannot happen at all with MSI-X.
2147 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2148 V_NEWTIMER(qs->rspq.next_holdoff) |
2149 V_NEWINDEX(qs->rspq.cidx));
2151 return work_done;
2155 * Returns true if the device is already scheduled for polling.
2157 static inline int napi_is_scheduled(struct napi_struct *napi)
2159 return test_bit(NAPI_STATE_SCHED, &napi->state);
2163 * process_pure_responses - process pure responses from a response queue
2164 * @adap: the adapter
2165 * @qs: the queue set owning the response queue
2166 * @r: the first pure response to process
2168 * A simpler version of process_responses() that handles only pure (i.e.,
2169 * non data-carrying) responses. Such respones are too light-weight to
2170 * justify calling a softirq under NAPI, so we handle them specially in
2171 * the interrupt handler. The function is called with a pointer to a
2172 * response, which the caller must ensure is a valid pure response.
2174 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2176 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2177 struct rsp_desc *r)
2179 struct sge_rspq *q = &qs->rspq;
2180 unsigned int sleeping = 0;
2182 do {
2183 u32 flags = ntohl(r->flags);
2185 r++;
2186 if (unlikely(++q->cidx == q->size)) {
2187 q->cidx = 0;
2188 q->gen ^= 1;
2189 r = q->desc;
2191 prefetch(r);
2193 if (flags & RSPD_CTRL_MASK) {
2194 sleeping |= flags & RSPD_GTS_MASK;
2195 handle_rsp_cntrl_info(qs, flags);
2198 q->pure_rsps++;
2199 if (++q->credits >= (q->size / 4)) {
2200 refill_rspq(adap, q, q->credits);
2201 q->credits = 0;
2203 } while (is_new_response(r, q) && is_pure_response(r));
2205 if (sleeping)
2206 check_ring_db(adap, qs, sleeping);
2208 smp_mb(); /* commit Tx queue .processed updates */
2209 if (unlikely(qs->txq_stopped != 0))
2210 restart_tx(qs);
2212 return is_new_response(r, q);
2216 * handle_responses - decide what to do with new responses in NAPI mode
2217 * @adap: the adapter
2218 * @q: the response queue
2220 * This is used by the NAPI interrupt handlers to decide what to do with
2221 * new SGE responses. If there are no new responses it returns -1. If
2222 * there are new responses and they are pure (i.e., non-data carrying)
2223 * it handles them straight in hard interrupt context as they are very
2224 * cheap and don't deliver any packets. Finally, if there are any data
2225 * signaling responses it schedules the NAPI handler. Returns 1 if it
2226 * schedules NAPI, 0 if all new responses were pure.
2228 * The caller must ascertain NAPI is not already running.
2230 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2232 struct sge_qset *qs = rspq_to_qset(q);
2233 struct rsp_desc *r = &q->desc[q->cidx];
2235 if (!is_new_response(r, q))
2236 return -1;
2237 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2238 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2239 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2240 return 0;
2242 napi_schedule(&qs->napi);
2243 return 1;
2247 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2248 * (i.e., response queue serviced in hard interrupt).
2250 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2252 struct sge_qset *qs = cookie;
2253 struct adapter *adap = qs->adap;
2254 struct sge_rspq *q = &qs->rspq;
2256 spin_lock(&q->lock);
2257 if (process_responses(adap, qs, -1) == 0)
2258 q->unhandled_irqs++;
2259 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2260 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2261 spin_unlock(&q->lock);
2262 return IRQ_HANDLED;
2266 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2267 * (i.e., response queue serviced by NAPI polling).
2269 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2271 struct sge_qset *qs = cookie;
2272 struct sge_rspq *q = &qs->rspq;
2274 spin_lock(&q->lock);
2276 if (handle_responses(qs->adap, q) < 0)
2277 q->unhandled_irqs++;
2278 spin_unlock(&q->lock);
2279 return IRQ_HANDLED;
2283 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2284 * SGE response queues as well as error and other async events as they all use
2285 * the same MSI vector. We use one SGE response queue per port in this mode
2286 * and protect all response queues with queue 0's lock.
2288 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2290 int new_packets = 0;
2291 struct adapter *adap = cookie;
2292 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2294 spin_lock(&q->lock);
2296 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2297 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2298 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2299 new_packets = 1;
2302 if (adap->params.nports == 2 &&
2303 process_responses(adap, &adap->sge.qs[1], -1)) {
2304 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2306 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2307 V_NEWTIMER(q1->next_holdoff) |
2308 V_NEWINDEX(q1->cidx));
2309 new_packets = 1;
2312 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2313 q->unhandled_irqs++;
2315 spin_unlock(&q->lock);
2316 return IRQ_HANDLED;
2319 static int rspq_check_napi(struct sge_qset *qs)
2321 struct sge_rspq *q = &qs->rspq;
2323 if (!napi_is_scheduled(&qs->napi) &&
2324 is_new_response(&q->desc[q->cidx], q)) {
2325 napi_schedule(&qs->napi);
2326 return 1;
2328 return 0;
2332 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2333 * by NAPI polling). Handles data events from SGE response queues as well as
2334 * error and other async events as they all use the same MSI vector. We use
2335 * one SGE response queue per port in this mode and protect all response
2336 * queues with queue 0's lock.
2338 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2340 int new_packets;
2341 struct adapter *adap = cookie;
2342 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2344 spin_lock(&q->lock);
2346 new_packets = rspq_check_napi(&adap->sge.qs[0]);
2347 if (adap->params.nports == 2)
2348 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2349 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2350 q->unhandled_irqs++;
2352 spin_unlock(&q->lock);
2353 return IRQ_HANDLED;
2357 * A helper function that processes responses and issues GTS.
2359 static inline int process_responses_gts(struct adapter *adap,
2360 struct sge_rspq *rq)
2362 int work;
2364 work = process_responses(adap, rspq_to_qset(rq), -1);
2365 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2366 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2367 return work;
2371 * The legacy INTx interrupt handler. This needs to handle data events from
2372 * SGE response queues as well as error and other async events as they all use
2373 * the same interrupt pin. We use one SGE response queue per port in this mode
2374 * and protect all response queues with queue 0's lock.
2376 static irqreturn_t t3_intr(int irq, void *cookie)
2378 int work_done, w0, w1;
2379 struct adapter *adap = cookie;
2380 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2381 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2383 spin_lock(&q0->lock);
2385 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2386 w1 = adap->params.nports == 2 &&
2387 is_new_response(&q1->desc[q1->cidx], q1);
2389 if (likely(w0 | w1)) {
2390 t3_write_reg(adap, A_PL_CLI, 0);
2391 t3_read_reg(adap, A_PL_CLI); /* flush */
2393 if (likely(w0))
2394 process_responses_gts(adap, q0);
2396 if (w1)
2397 process_responses_gts(adap, q1);
2399 work_done = w0 | w1;
2400 } else
2401 work_done = t3_slow_intr_handler(adap);
2403 spin_unlock(&q0->lock);
2404 return IRQ_RETVAL(work_done != 0);
2408 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2409 * Handles data events from SGE response queues as well as error and other
2410 * async events as they all use the same interrupt pin. We use one SGE
2411 * response queue per port in this mode and protect all response queues with
2412 * queue 0's lock.
2414 static irqreturn_t t3b_intr(int irq, void *cookie)
2416 u32 map;
2417 struct adapter *adap = cookie;
2418 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2420 t3_write_reg(adap, A_PL_CLI, 0);
2421 map = t3_read_reg(adap, A_SG_DATA_INTR);
2423 if (unlikely(!map)) /* shared interrupt, most likely */
2424 return IRQ_NONE;
2426 spin_lock(&q0->lock);
2428 if (unlikely(map & F_ERRINTR))
2429 t3_slow_intr_handler(adap);
2431 if (likely(map & 1))
2432 process_responses_gts(adap, q0);
2434 if (map & 2)
2435 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2437 spin_unlock(&q0->lock);
2438 return IRQ_HANDLED;
2442 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2443 * Handles data events from SGE response queues as well as error and other
2444 * async events as they all use the same interrupt pin. We use one SGE
2445 * response queue per port in this mode and protect all response queues with
2446 * queue 0's lock.
2448 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2450 u32 map;
2451 struct adapter *adap = cookie;
2452 struct sge_qset *qs0 = &adap->sge.qs[0];
2453 struct sge_rspq *q0 = &qs0->rspq;
2455 t3_write_reg(adap, A_PL_CLI, 0);
2456 map = t3_read_reg(adap, A_SG_DATA_INTR);
2458 if (unlikely(!map)) /* shared interrupt, most likely */
2459 return IRQ_NONE;
2461 spin_lock(&q0->lock);
2463 if (unlikely(map & F_ERRINTR))
2464 t3_slow_intr_handler(adap);
2466 if (likely(map & 1))
2467 napi_schedule(&qs0->napi);
2469 if (map & 2)
2470 napi_schedule(&adap->sge.qs[1].napi);
2472 spin_unlock(&q0->lock);
2473 return IRQ_HANDLED;
2477 * t3_intr_handler - select the top-level interrupt handler
2478 * @adap: the adapter
2479 * @polling: whether using NAPI to service response queues
2481 * Selects the top-level interrupt handler based on the type of interrupts
2482 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2483 * response queues.
2485 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2487 if (adap->flags & USING_MSIX)
2488 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2489 if (adap->flags & USING_MSI)
2490 return polling ? t3_intr_msi_napi : t3_intr_msi;
2491 if (adap->params.rev > 0)
2492 return polling ? t3b_intr_napi : t3b_intr;
2493 return t3_intr;
2496 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2497 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2498 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2499 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2500 F_HIRCQPARITYERROR)
2501 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2502 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2503 F_RSPQDISABLED)
2506 * t3_sge_err_intr_handler - SGE async event interrupt handler
2507 * @adapter: the adapter
2509 * Interrupt handler for SGE asynchronous (non-data) events.
2511 void t3_sge_err_intr_handler(struct adapter *adapter)
2513 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2515 if (status & SGE_PARERR)
2516 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2517 status & SGE_PARERR);
2518 if (status & SGE_FRAMINGERR)
2519 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2520 status & SGE_FRAMINGERR);
2522 if (status & F_RSPQCREDITOVERFOW)
2523 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2525 if (status & F_RSPQDISABLED) {
2526 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2528 CH_ALERT(adapter,
2529 "packet delivered to disabled response queue "
2530 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2533 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2534 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2535 status & F_HIPIODRBDROPERR ? "high" : "lo");
2537 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2538 if (status & SGE_FATALERR)
2539 t3_fatal_err(adapter);
2543 * sge_timer_cb - perform periodic maintenance of an SGE qset
2544 * @data: the SGE queue set to maintain
2546 * Runs periodically from a timer to perform maintenance of an SGE queue
2547 * set. It performs two tasks:
2549 * a) Cleans up any completed Tx descriptors that may still be pending.
2550 * Normal descriptor cleanup happens when new packets are added to a Tx
2551 * queue so this timer is relatively infrequent and does any cleanup only
2552 * if the Tx queue has not seen any new packets in a while. We make a
2553 * best effort attempt to reclaim descriptors, in that we don't wait
2554 * around if we cannot get a queue's lock (which most likely is because
2555 * someone else is queueing new packets and so will also handle the clean
2556 * up). Since control queues use immediate data exclusively we don't
2557 * bother cleaning them up here.
2559 * b) Replenishes Rx queues that have run out due to memory shortage.
2560 * Normally new Rx buffers are added when existing ones are consumed but
2561 * when out of memory a queue can become empty. We try to add only a few
2562 * buffers here, the queue will be replenished fully as these new buffers
2563 * are used up if memory shortage has subsided.
2565 static void sge_timer_cb(unsigned long data)
2567 spinlock_t *lock;
2568 struct sge_qset *qs = (struct sge_qset *)data;
2569 struct adapter *adap = qs->adap;
2571 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2572 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2573 spin_unlock(&qs->txq[TXQ_ETH].lock);
2575 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2576 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2577 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2579 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2580 &adap->sge.qs[0].rspq.lock;
2581 if (spin_trylock_irq(lock)) {
2582 if (!napi_is_scheduled(&qs->napi)) {
2583 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2585 if (qs->fl[0].credits < qs->fl[0].size)
2586 __refill_fl(adap, &qs->fl[0]);
2587 if (qs->fl[1].credits < qs->fl[1].size)
2588 __refill_fl(adap, &qs->fl[1]);
2590 if (status & (1 << qs->rspq.cntxt_id)) {
2591 qs->rspq.starved++;
2592 if (qs->rspq.credits) {
2593 refill_rspq(adap, &qs->rspq, 1);
2594 qs->rspq.credits--;
2595 qs->rspq.restarted++;
2596 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2597 1 << qs->rspq.cntxt_id);
2601 spin_unlock_irq(lock);
2603 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2607 * t3_update_qset_coalesce - update coalescing settings for a queue set
2608 * @qs: the SGE queue set
2609 * @p: new queue set parameters
2611 * Update the coalescing settings for an SGE queue set. Nothing is done
2612 * if the queue set is not initialized yet.
2614 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2616 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2617 qs->rspq.polling = p->polling;
2618 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2622 * t3_sge_alloc_qset - initialize an SGE queue set
2623 * @adapter: the adapter
2624 * @id: the queue set id
2625 * @nports: how many Ethernet ports will be using this queue set
2626 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2627 * @p: configuration parameters for this queue set
2628 * @ntxq: number of Tx queues for the queue set
2629 * @netdev: net device associated with this queue set
2631 * Allocate resources and initialize an SGE queue set. A queue set
2632 * comprises a response queue, two Rx free-buffer queues, and up to 3
2633 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2634 * queue, offload queue, and control queue.
2636 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2637 int irq_vec_idx, const struct qset_params *p,
2638 int ntxq, struct net_device *dev)
2640 int i, avail, ret = -ENOMEM;
2641 struct sge_qset *q = &adapter->sge.qs[id];
2643 init_qset_cntxt(q, id);
2644 init_timer(&q->tx_reclaim_timer);
2645 q->tx_reclaim_timer.data = (unsigned long)q;
2646 q->tx_reclaim_timer.function = sge_timer_cb;
2648 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2649 sizeof(struct rx_desc),
2650 sizeof(struct rx_sw_desc),
2651 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2652 if (!q->fl[0].desc)
2653 goto err;
2655 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2656 sizeof(struct rx_desc),
2657 sizeof(struct rx_sw_desc),
2658 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2659 if (!q->fl[1].desc)
2660 goto err;
2662 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2663 sizeof(struct rsp_desc), 0,
2664 &q->rspq.phys_addr, NULL);
2665 if (!q->rspq.desc)
2666 goto err;
2668 for (i = 0; i < ntxq; ++i) {
2670 * The control queue always uses immediate data so does not
2671 * need to keep track of any sk_buffs.
2673 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2675 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2676 sizeof(struct tx_desc), sz,
2677 &q->txq[i].phys_addr,
2678 &q->txq[i].sdesc);
2679 if (!q->txq[i].desc)
2680 goto err;
2682 q->txq[i].gen = 1;
2683 q->txq[i].size = p->txq_size[i];
2684 spin_lock_init(&q->txq[i].lock);
2685 skb_queue_head_init(&q->txq[i].sendq);
2688 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2689 (unsigned long)q);
2690 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2691 (unsigned long)q);
2693 q->fl[0].gen = q->fl[1].gen = 1;
2694 q->fl[0].size = p->fl_size;
2695 q->fl[1].size = p->jumbo_size;
2697 q->rspq.gen = 1;
2698 q->rspq.size = p->rspq_size;
2699 spin_lock_init(&q->rspq.lock);
2701 q->txq[TXQ_ETH].stop_thres = nports *
2702 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2704 #if FL0_PG_CHUNK_SIZE > 0
2705 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2706 #else
2707 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2708 #endif
2709 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2710 q->fl[1].buf_size = is_offload(adapter) ?
2711 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2712 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2714 spin_lock_irq(&adapter->sge.reg_lock);
2716 /* FL threshold comparison uses < */
2717 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2718 q->rspq.phys_addr, q->rspq.size,
2719 q->fl[0].buf_size, 1, 0);
2720 if (ret)
2721 goto err_unlock;
2723 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2724 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2725 q->fl[i].phys_addr, q->fl[i].size,
2726 q->fl[i].buf_size, p->cong_thres, 1,
2728 if (ret)
2729 goto err_unlock;
2732 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2733 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2734 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2735 1, 0);
2736 if (ret)
2737 goto err_unlock;
2739 if (ntxq > 1) {
2740 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2741 USE_GTS, SGE_CNTXT_OFLD, id,
2742 q->txq[TXQ_OFLD].phys_addr,
2743 q->txq[TXQ_OFLD].size, 0, 1, 0);
2744 if (ret)
2745 goto err_unlock;
2748 if (ntxq > 2) {
2749 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2750 SGE_CNTXT_CTRL, id,
2751 q->txq[TXQ_CTRL].phys_addr,
2752 q->txq[TXQ_CTRL].size,
2753 q->txq[TXQ_CTRL].token, 1, 0);
2754 if (ret)
2755 goto err_unlock;
2758 spin_unlock_irq(&adapter->sge.reg_lock);
2760 q->adap = adapter;
2761 q->netdev = dev;
2762 t3_update_qset_coalesce(q, p);
2763 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2764 if (!avail) {
2765 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2766 goto err;
2768 if (avail < q->fl[0].size)
2769 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2770 avail);
2772 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2773 if (avail < q->fl[1].size)
2774 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2775 avail);
2776 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2778 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2779 V_NEWTIMER(q->rspq.holdoff_tmr));
2781 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2782 return 0;
2784 err_unlock:
2785 spin_unlock_irq(&adapter->sge.reg_lock);
2786 err:
2787 t3_free_qset(adapter, q);
2788 return ret;
2792 * t3_free_sge_resources - free SGE resources
2793 * @adap: the adapter
2795 * Frees resources used by the SGE queue sets.
2797 void t3_free_sge_resources(struct adapter *adap)
2799 int i;
2801 for (i = 0; i < SGE_QSETS; ++i)
2802 t3_free_qset(adap, &adap->sge.qs[i]);
2806 * t3_sge_start - enable SGE
2807 * @adap: the adapter
2809 * Enables the SGE for DMAs. This is the last step in starting packet
2810 * transfers.
2812 void t3_sge_start(struct adapter *adap)
2814 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2818 * t3_sge_stop - disable SGE operation
2819 * @adap: the adapter
2821 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2822 * from error interrupts) or from normal process context. In the latter
2823 * case it also disables any pending queue restart tasklets. Note that
2824 * if it is called in interrupt context it cannot disable the restart
2825 * tasklets as it cannot wait, however the tasklets will have no effect
2826 * since the doorbells are disabled and the driver will call this again
2827 * later from process context, at which time the tasklets will be stopped
2828 * if they are still running.
2830 void t3_sge_stop(struct adapter *adap)
2832 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2833 if (!in_interrupt()) {
2834 int i;
2836 for (i = 0; i < SGE_QSETS; ++i) {
2837 struct sge_qset *qs = &adap->sge.qs[i];
2839 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2840 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2846 * t3_sge_init - initialize SGE
2847 * @adap: the adapter
2848 * @p: the SGE parameters
2850 * Performs SGE initialization needed every time after a chip reset.
2851 * We do not initialize any of the queue sets here, instead the driver
2852 * top-level must request those individually. We also do not enable DMA
2853 * here, that should be done after the queues have been set up.
2855 void t3_sge_init(struct adapter *adap, struct sge_params *p)
2857 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2859 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
2860 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
2861 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2862 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2863 #if SGE_NUM_GENBITS == 1
2864 ctrl |= F_EGRGENCTRL;
2865 #endif
2866 if (adap->params.rev > 0) {
2867 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2868 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
2870 t3_write_reg(adap, A_SG_CONTROL, ctrl);
2871 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2872 V_LORCQDRBTHRSH(512));
2873 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2874 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
2875 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
2876 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
2877 adap->params.rev < T3_REV_C ? 1000 : 500);
2878 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2879 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2880 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2881 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2882 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2886 * t3_sge_prep - one-time SGE initialization
2887 * @adap: the associated adapter
2888 * @p: SGE parameters
2890 * Performs one-time initialization of SGE SW state. Includes determining
2891 * defaults for the assorted SGE parameters, which admins can change until
2892 * they are used to initialize the SGE.
2894 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
2896 int i;
2898 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2899 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2901 for (i = 0; i < SGE_QSETS; ++i) {
2902 struct qset_params *q = p->qset + i;
2904 q->polling = adap->params.rev > 0;
2905 q->coalesce_usecs = 5;
2906 q->rspq_size = 1024;
2907 q->fl_size = 1024;
2908 q->jumbo_size = 512;
2909 q->txq_size[TXQ_ETH] = 1024;
2910 q->txq_size[TXQ_OFLD] = 1024;
2911 q->txq_size[TXQ_CTRL] = 256;
2912 q->cong_thres = 0;
2915 spin_lock_init(&adap->sge.reg_lock);
2919 * t3_get_desc - dump an SGE descriptor for debugging purposes
2920 * @qs: the queue set
2921 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2922 * @idx: the descriptor index in the queue
2923 * @data: where to dump the descriptor contents
2925 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2926 * size of the descriptor.
2928 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2929 unsigned char *data)
2931 if (qnum >= 6)
2932 return -EINVAL;
2934 if (qnum < 3) {
2935 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2936 return -EINVAL;
2937 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2938 return sizeof(struct tx_desc);
2941 if (qnum == 3) {
2942 if (!qs->rspq.desc || idx >= qs->rspq.size)
2943 return -EINVAL;
2944 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2945 return sizeof(struct rsp_desc);
2948 qnum -= 4;
2949 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2950 return -EINVAL;
2951 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2952 return sizeof(struct rx_desc);