1 /****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2008 Solarflare Communications Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include <linux/pci.h>
12 #include <linux/tcp.h>
15 #include <linux/if_ether.h>
16 #include <linux/highmem.h>
17 #include "net_driver.h"
21 #include "workarounds.h"
24 * TX descriptor ring full threshold
26 * The tx_queue descriptor ring fill-level must fall below this value
27 * before we restart the netif queue
29 #define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \
30 (_tx_queue->efx->type->txd_ring_mask / 2u)
32 /* We want to be able to nest calls to netif_stop_queue(), since each
33 * channel can have an individual stop on the queue.
35 void efx_stop_queue(struct efx_nic
*efx
)
37 spin_lock_bh(&efx
->netif_stop_lock
);
38 EFX_TRACE(efx
, "stop TX queue\n");
40 atomic_inc(&efx
->netif_stop_count
);
41 netif_stop_queue(efx
->net_dev
);
43 spin_unlock_bh(&efx
->netif_stop_lock
);
46 /* Wake netif's TX queue
47 * We want to be able to nest calls to netif_stop_queue(), since each
48 * channel can have an individual stop on the queue.
50 inline void efx_wake_queue(struct efx_nic
*efx
)
53 if (atomic_dec_and_lock(&efx
->netif_stop_count
,
54 &efx
->netif_stop_lock
)) {
55 EFX_TRACE(efx
, "waking TX queue\n");
56 netif_wake_queue(efx
->net_dev
);
57 spin_unlock(&efx
->netif_stop_lock
);
62 static inline void efx_dequeue_buffer(struct efx_tx_queue
*tx_queue
,
63 struct efx_tx_buffer
*buffer
)
65 if (buffer
->unmap_len
) {
66 struct pci_dev
*pci_dev
= tx_queue
->efx
->pci_dev
;
67 if (buffer
->unmap_single
)
68 pci_unmap_single(pci_dev
, buffer
->unmap_addr
,
69 buffer
->unmap_len
, PCI_DMA_TODEVICE
);
71 pci_unmap_page(pci_dev
, buffer
->unmap_addr
,
72 buffer
->unmap_len
, PCI_DMA_TODEVICE
);
73 buffer
->unmap_len
= 0;
74 buffer
->unmap_single
= 0;
78 dev_kfree_skb_any((struct sk_buff
*) buffer
->skb
);
80 EFX_TRACE(tx_queue
->efx
, "TX queue %d transmission id %x "
81 "complete\n", tx_queue
->queue
, read_ptr
);
87 * Add a socket buffer to a TX queue
89 * This maps all fragments of a socket buffer for DMA and adds them to
90 * the TX queue. The queue's insert pointer will be incremented by
91 * the number of fragments in the socket buffer.
93 * If any DMA mapping fails, any mapped fragments will be unmapped,
94 * the queue's insert pointer will be restored to its original value.
96 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
97 * You must hold netif_tx_lock() to call this function.
99 static inline int efx_enqueue_skb(struct efx_tx_queue
*tx_queue
,
100 const struct sk_buff
*skb
)
102 struct efx_nic
*efx
= tx_queue
->efx
;
103 struct pci_dev
*pci_dev
= efx
->pci_dev
;
104 struct efx_tx_buffer
*buffer
;
105 skb_frag_t
*fragment
;
108 unsigned int len
, unmap_len
= 0, fill_level
, insert_ptr
, misalign
;
109 dma_addr_t dma_addr
, unmap_addr
= 0;
110 unsigned int dma_len
;
111 unsigned unmap_single
;
113 int rc
= NETDEV_TX_OK
;
115 EFX_BUG_ON_PARANOID(tx_queue
->write_count
!= tx_queue
->insert_count
);
117 /* Get size of the initial fragment */
118 len
= skb_headlen(skb
);
120 fill_level
= tx_queue
->insert_count
- tx_queue
->old_read_count
;
121 q_space
= efx
->type
->txd_ring_mask
- 1 - fill_level
;
123 /* Map for DMA. Use pci_map_single rather than pci_map_page
124 * since this is more efficient on machines with sparse
128 dma_addr
= pci_map_single(pci_dev
, skb
->data
, len
, PCI_DMA_TODEVICE
);
130 /* Process all fragments */
132 if (unlikely(pci_dma_mapping_error(dma_addr
)))
135 /* Store fields for marking in the per-fragment final
138 unmap_addr
= dma_addr
;
140 /* Add to TX queue, splitting across DMA boundaries */
142 if (unlikely(q_space
-- <= 0)) {
143 /* It might be that completions have
144 * happened since the xmit path last
145 * checked. Update the xmit path's
146 * copy of read_count.
149 /* This memory barrier protects the
150 * change of stopped from the access
153 tx_queue
->old_read_count
=
154 *(volatile unsigned *)
155 &tx_queue
->read_count
;
156 fill_level
= (tx_queue
->insert_count
157 - tx_queue
->old_read_count
);
158 q_space
= (efx
->type
->txd_ring_mask
- 1 -
160 if (unlikely(q_space
-- <= 0))
166 insert_ptr
= (tx_queue
->insert_count
&
167 efx
->type
->txd_ring_mask
);
168 buffer
= &tx_queue
->buffer
[insert_ptr
];
169 EFX_BUG_ON_PARANOID(buffer
->skb
);
170 EFX_BUG_ON_PARANOID(buffer
->len
);
171 EFX_BUG_ON_PARANOID(buffer
->continuation
!= 1);
172 EFX_BUG_ON_PARANOID(buffer
->unmap_len
);
174 dma_len
= (((~dma_addr
) & efx
->type
->tx_dma_mask
) + 1);
175 if (likely(dma_len
> len
))
178 misalign
= (unsigned)dma_addr
& efx
->type
->bug5391_mask
;
179 if (misalign
&& dma_len
+ misalign
> 512)
180 dma_len
= 512 - misalign
;
182 /* Fill out per descriptor fields */
183 buffer
->len
= dma_len
;
184 buffer
->dma_addr
= dma_addr
;
187 ++tx_queue
->insert_count
;
190 /* Transfer ownership of the unmapping to the final buffer */
191 buffer
->unmap_addr
= unmap_addr
;
192 buffer
->unmap_single
= unmap_single
;
193 buffer
->unmap_len
= unmap_len
;
196 /* Get address and size of next fragment */
197 if (i
>= skb_shinfo(skb
)->nr_frags
)
199 fragment
= &skb_shinfo(skb
)->frags
[i
];
200 len
= fragment
->size
;
201 page
= fragment
->page
;
202 page_offset
= fragment
->page_offset
;
206 dma_addr
= pci_map_page(pci_dev
, page
, page_offset
, len
,
210 /* Transfer ownership of the skb to the final buffer */
212 buffer
->continuation
= 0;
214 /* Pass off to hardware */
215 falcon_push_buffers(tx_queue
);
220 EFX_ERR_RL(efx
, " TX queue %d could not map skb with %d bytes %d "
221 "fragments for DMA\n", tx_queue
->queue
, skb
->len
,
222 skb_shinfo(skb
)->nr_frags
+ 1);
224 /* Mark the packet as transmitted, and free the SKB ourselves */
225 dev_kfree_skb_any((struct sk_buff
*)skb
);
231 if (tx_queue
->stopped
== 1)
235 /* Work backwards until we hit the original insert pointer value */
236 while (tx_queue
->insert_count
!= tx_queue
->write_count
) {
237 --tx_queue
->insert_count
;
238 insert_ptr
= tx_queue
->insert_count
& efx
->type
->txd_ring_mask
;
239 buffer
= &tx_queue
->buffer
[insert_ptr
];
240 efx_dequeue_buffer(tx_queue
, buffer
);
244 /* Free the fragment we were mid-way through pushing */
246 pci_unmap_page(pci_dev
, unmap_addr
, unmap_len
,
252 /* Remove packets from the TX queue
254 * This removes packets from the TX queue, up to and including the
257 static inline void efx_dequeue_buffers(struct efx_tx_queue
*tx_queue
,
260 struct efx_nic
*efx
= tx_queue
->efx
;
261 unsigned int stop_index
, read_ptr
;
262 unsigned int mask
= tx_queue
->efx
->type
->txd_ring_mask
;
264 stop_index
= (index
+ 1) & mask
;
265 read_ptr
= tx_queue
->read_count
& mask
;
267 while (read_ptr
!= stop_index
) {
268 struct efx_tx_buffer
*buffer
= &tx_queue
->buffer
[read_ptr
];
269 if (unlikely(buffer
->len
== 0)) {
270 EFX_ERR(tx_queue
->efx
, "TX queue %d spurious TX "
271 "completion id %x\n", tx_queue
->queue
,
273 efx_schedule_reset(efx
, RESET_TYPE_TX_SKIP
);
277 efx_dequeue_buffer(tx_queue
, buffer
);
278 buffer
->continuation
= 1;
281 ++tx_queue
->read_count
;
282 read_ptr
= tx_queue
->read_count
& mask
;
286 /* Initiate a packet transmission on the specified TX queue.
287 * Note that returning anything other than NETDEV_TX_OK will cause the
288 * OS to free the skb.
290 * This function is split out from efx_hard_start_xmit to allow the
291 * loopback test to direct packets via specific TX queues. It is
292 * therefore a non-static inline, so as not to penalise performance
293 * for non-loopback transmissions.
295 * Context: netif_tx_lock held
297 inline int efx_xmit(struct efx_nic
*efx
,
298 struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
)
302 /* Map fragments for DMA and add to TX queue */
303 rc
= efx_enqueue_skb(tx_queue
, skb
);
304 if (unlikely(rc
!= NETDEV_TX_OK
))
307 /* Update last TX timer */
308 efx
->net_dev
->trans_start
= jiffies
;
314 /* Initiate a packet transmission. We use one channel per CPU
315 * (sharing when we have more CPUs than channels). On Falcon, the TX
316 * completion events will be directed back to the CPU that transmitted
317 * the packet, which should be cache-efficient.
319 * Context: non-blocking.
320 * Note that returning anything other than NETDEV_TX_OK will cause the
321 * OS to free the skb.
323 int efx_hard_start_xmit(struct sk_buff
*skb
, struct net_device
*net_dev
)
325 struct efx_nic
*efx
= net_dev
->priv
;
326 return efx_xmit(efx
, &efx
->tx_queue
[0], skb
);
329 void efx_xmit_done(struct efx_tx_queue
*tx_queue
, unsigned int index
)
332 struct efx_nic
*efx
= tx_queue
->efx
;
334 EFX_BUG_ON_PARANOID(index
> efx
->type
->txd_ring_mask
);
336 efx_dequeue_buffers(tx_queue
, index
);
338 /* See if we need to restart the netif queue. This barrier
339 * separates the update of read_count from the test of
342 if (unlikely(tx_queue
->stopped
)) {
343 fill_level
= tx_queue
->insert_count
- tx_queue
->read_count
;
344 if (fill_level
< EFX_NETDEV_TX_THRESHOLD(tx_queue
)) {
345 EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx
));
347 /* Do this under netif_tx_lock(), to avoid racing
348 * with efx_xmit(). */
349 netif_tx_lock(efx
->net_dev
);
350 if (tx_queue
->stopped
) {
351 tx_queue
->stopped
= 0;
354 netif_tx_unlock(efx
->net_dev
);
359 int efx_probe_tx_queue(struct efx_tx_queue
*tx_queue
)
361 struct efx_nic
*efx
= tx_queue
->efx
;
362 unsigned int txq_size
;
365 EFX_LOG(efx
, "creating TX queue %d\n", tx_queue
->queue
);
367 /* Allocate software ring */
368 txq_size
= (efx
->type
->txd_ring_mask
+ 1) * sizeof(*tx_queue
->buffer
);
369 tx_queue
->buffer
= kzalloc(txq_size
, GFP_KERNEL
);
370 if (!tx_queue
->buffer
) {
374 for (i
= 0; i
<= efx
->type
->txd_ring_mask
; ++i
)
375 tx_queue
->buffer
[i
].continuation
= 1;
377 /* Allocate hardware ring */
378 rc
= falcon_probe_tx(tx_queue
);
385 kfree(tx_queue
->buffer
);
386 tx_queue
->buffer
= NULL
;
393 int efx_init_tx_queue(struct efx_tx_queue
*tx_queue
)
395 EFX_LOG(tx_queue
->efx
, "initialising TX queue %d\n", tx_queue
->queue
);
397 tx_queue
->insert_count
= 0;
398 tx_queue
->write_count
= 0;
399 tx_queue
->read_count
= 0;
400 tx_queue
->old_read_count
= 0;
401 BUG_ON(tx_queue
->stopped
);
403 /* Set up TX descriptor ring */
404 return falcon_init_tx(tx_queue
);
407 void efx_release_tx_buffers(struct efx_tx_queue
*tx_queue
)
409 struct efx_tx_buffer
*buffer
;
411 if (!tx_queue
->buffer
)
414 /* Free any buffers left in the ring */
415 while (tx_queue
->read_count
!= tx_queue
->write_count
) {
416 buffer
= &tx_queue
->buffer
[tx_queue
->read_count
&
417 tx_queue
->efx
->type
->txd_ring_mask
];
418 efx_dequeue_buffer(tx_queue
, buffer
);
419 buffer
->continuation
= 1;
422 ++tx_queue
->read_count
;
426 void efx_fini_tx_queue(struct efx_tx_queue
*tx_queue
)
428 EFX_LOG(tx_queue
->efx
, "shutting down TX queue %d\n", tx_queue
->queue
);
430 /* Flush TX queue, remove descriptor ring */
431 falcon_fini_tx(tx_queue
);
433 efx_release_tx_buffers(tx_queue
);
435 /* Release queue's stop on port, if any */
436 if (tx_queue
->stopped
) {
437 tx_queue
->stopped
= 0;
438 efx_wake_queue(tx_queue
->efx
);
442 void efx_remove_tx_queue(struct efx_tx_queue
*tx_queue
)
444 EFX_LOG(tx_queue
->efx
, "destroying TX queue %d\n", tx_queue
->queue
);
445 falcon_remove_tx(tx_queue
);
447 kfree(tx_queue
->buffer
);
448 tx_queue
->buffer
= NULL
;