1 /****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2008 Solarflare Communications Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include <linux/pci.h>
12 #include <linux/tcp.h>
15 #include <linux/if_ether.h>
16 #include <linux/highmem.h>
17 #include "net_driver.h"
21 #include "workarounds.h"
24 * TX descriptor ring full threshold
26 * The tx_queue descriptor ring fill-level must fall below this value
27 * before we restart the netif queue
29 #define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \
30 (_tx_queue->efx->type->txd_ring_mask / 2u)
32 /* We want to be able to nest calls to netif_stop_queue(), since each
33 * channel can have an individual stop on the queue.
35 void efx_stop_queue(struct efx_nic
*efx
)
37 spin_lock_bh(&efx
->netif_stop_lock
);
38 EFX_TRACE(efx
, "stop TX queue\n");
40 atomic_inc(&efx
->netif_stop_count
);
41 netif_stop_queue(efx
->net_dev
);
43 spin_unlock_bh(&efx
->netif_stop_lock
);
46 /* Wake netif's TX queue
47 * We want to be able to nest calls to netif_stop_queue(), since each
48 * channel can have an individual stop on the queue.
50 inline void efx_wake_queue(struct efx_nic
*efx
)
53 if (atomic_dec_and_lock(&efx
->netif_stop_count
,
54 &efx
->netif_stop_lock
)) {
55 EFX_TRACE(efx
, "waking TX queue\n");
56 netif_wake_queue(efx
->net_dev
);
57 spin_unlock(&efx
->netif_stop_lock
);
62 static inline void efx_dequeue_buffer(struct efx_tx_queue
*tx_queue
,
63 struct efx_tx_buffer
*buffer
)
65 if (buffer
->unmap_len
) {
66 struct pci_dev
*pci_dev
= tx_queue
->efx
->pci_dev
;
67 if (buffer
->unmap_single
)
68 pci_unmap_single(pci_dev
, buffer
->unmap_addr
,
69 buffer
->unmap_len
, PCI_DMA_TODEVICE
);
71 pci_unmap_page(pci_dev
, buffer
->unmap_addr
,
72 buffer
->unmap_len
, PCI_DMA_TODEVICE
);
73 buffer
->unmap_len
= 0;
74 buffer
->unmap_single
= 0;
78 dev_kfree_skb_any((struct sk_buff
*) buffer
->skb
);
80 EFX_TRACE(tx_queue
->efx
, "TX queue %d transmission id %x "
81 "complete\n", tx_queue
->queue
, read_ptr
);
86 * struct efx_tso_header - a DMA mapped buffer for packet headers
87 * @next: Linked list of free ones.
88 * The list is protected by the TX queue lock.
89 * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
90 * @dma_addr: The DMA address of the header below.
92 * This controls the memory used for a TSO header. Use TSOH_DATA()
93 * to find the packet header data. Use TSOH_SIZE() to calculate the
94 * total size required for a given packet header length. TSO headers
95 * in the free list are exactly %TSOH_STD_SIZE bytes in size.
97 struct efx_tso_header
{
99 struct efx_tso_header
*next
;
105 static int efx_enqueue_skb_tso(struct efx_tx_queue
*tx_queue
,
106 const struct sk_buff
*skb
);
107 static void efx_fini_tso(struct efx_tx_queue
*tx_queue
);
108 static void efx_tsoh_heap_free(struct efx_tx_queue
*tx_queue
,
109 struct efx_tso_header
*tsoh
);
111 static inline void efx_tsoh_free(struct efx_tx_queue
*tx_queue
,
112 struct efx_tx_buffer
*buffer
)
115 if (likely(!buffer
->tsoh
->unmap_len
)) {
116 buffer
->tsoh
->next
= tx_queue
->tso_headers_free
;
117 tx_queue
->tso_headers_free
= buffer
->tsoh
;
119 efx_tsoh_heap_free(tx_queue
, buffer
->tsoh
);
127 * Add a socket buffer to a TX queue
129 * This maps all fragments of a socket buffer for DMA and adds them to
130 * the TX queue. The queue's insert pointer will be incremented by
131 * the number of fragments in the socket buffer.
133 * If any DMA mapping fails, any mapped fragments will be unmapped,
134 * the queue's insert pointer will be restored to its original value.
136 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
137 * You must hold netif_tx_lock() to call this function.
139 static inline int efx_enqueue_skb(struct efx_tx_queue
*tx_queue
,
140 const struct sk_buff
*skb
)
142 struct efx_nic
*efx
= tx_queue
->efx
;
143 struct pci_dev
*pci_dev
= efx
->pci_dev
;
144 struct efx_tx_buffer
*buffer
;
145 skb_frag_t
*fragment
;
148 unsigned int len
, unmap_len
= 0, fill_level
, insert_ptr
, misalign
;
149 dma_addr_t dma_addr
, unmap_addr
= 0;
150 unsigned int dma_len
;
151 unsigned unmap_single
;
153 int rc
= NETDEV_TX_OK
;
155 EFX_BUG_ON_PARANOID(tx_queue
->write_count
!= tx_queue
->insert_count
);
157 if (skb_shinfo((struct sk_buff
*)skb
)->gso_size
)
158 return efx_enqueue_skb_tso(tx_queue
, skb
);
160 /* Get size of the initial fragment */
161 len
= skb_headlen(skb
);
163 fill_level
= tx_queue
->insert_count
- tx_queue
->old_read_count
;
164 q_space
= efx
->type
->txd_ring_mask
- 1 - fill_level
;
166 /* Map for DMA. Use pci_map_single rather than pci_map_page
167 * since this is more efficient on machines with sparse
171 dma_addr
= pci_map_single(pci_dev
, skb
->data
, len
, PCI_DMA_TODEVICE
);
173 /* Process all fragments */
175 if (unlikely(pci_dma_mapping_error(pci_dev
, dma_addr
)))
178 /* Store fields for marking in the per-fragment final
181 unmap_addr
= dma_addr
;
183 /* Add to TX queue, splitting across DMA boundaries */
185 if (unlikely(q_space
-- <= 0)) {
186 /* It might be that completions have
187 * happened since the xmit path last
188 * checked. Update the xmit path's
189 * copy of read_count.
192 /* This memory barrier protects the
193 * change of stopped from the access
196 tx_queue
->old_read_count
=
197 *(volatile unsigned *)
198 &tx_queue
->read_count
;
199 fill_level
= (tx_queue
->insert_count
200 - tx_queue
->old_read_count
);
201 q_space
= (efx
->type
->txd_ring_mask
- 1 -
203 if (unlikely(q_space
-- <= 0))
209 insert_ptr
= (tx_queue
->insert_count
&
210 efx
->type
->txd_ring_mask
);
211 buffer
= &tx_queue
->buffer
[insert_ptr
];
212 efx_tsoh_free(tx_queue
, buffer
);
213 EFX_BUG_ON_PARANOID(buffer
->tsoh
);
214 EFX_BUG_ON_PARANOID(buffer
->skb
);
215 EFX_BUG_ON_PARANOID(buffer
->len
);
216 EFX_BUG_ON_PARANOID(buffer
->continuation
!= 1);
217 EFX_BUG_ON_PARANOID(buffer
->unmap_len
);
219 dma_len
= (((~dma_addr
) & efx
->type
->tx_dma_mask
) + 1);
220 if (likely(dma_len
> len
))
223 misalign
= (unsigned)dma_addr
& efx
->type
->bug5391_mask
;
224 if (misalign
&& dma_len
+ misalign
> 512)
225 dma_len
= 512 - misalign
;
227 /* Fill out per descriptor fields */
228 buffer
->len
= dma_len
;
229 buffer
->dma_addr
= dma_addr
;
232 ++tx_queue
->insert_count
;
235 /* Transfer ownership of the unmapping to the final buffer */
236 buffer
->unmap_addr
= unmap_addr
;
237 buffer
->unmap_single
= unmap_single
;
238 buffer
->unmap_len
= unmap_len
;
241 /* Get address and size of next fragment */
242 if (i
>= skb_shinfo(skb
)->nr_frags
)
244 fragment
= &skb_shinfo(skb
)->frags
[i
];
245 len
= fragment
->size
;
246 page
= fragment
->page
;
247 page_offset
= fragment
->page_offset
;
251 dma_addr
= pci_map_page(pci_dev
, page
, page_offset
, len
,
255 /* Transfer ownership of the skb to the final buffer */
257 buffer
->continuation
= 0;
259 /* Pass off to hardware */
260 falcon_push_buffers(tx_queue
);
265 EFX_ERR_RL(efx
, " TX queue %d could not map skb with %d bytes %d "
266 "fragments for DMA\n", tx_queue
->queue
, skb
->len
,
267 skb_shinfo(skb
)->nr_frags
+ 1);
269 /* Mark the packet as transmitted, and free the SKB ourselves */
270 dev_kfree_skb_any((struct sk_buff
*)skb
);
276 if (tx_queue
->stopped
== 1)
280 /* Work backwards until we hit the original insert pointer value */
281 while (tx_queue
->insert_count
!= tx_queue
->write_count
) {
282 --tx_queue
->insert_count
;
283 insert_ptr
= tx_queue
->insert_count
& efx
->type
->txd_ring_mask
;
284 buffer
= &tx_queue
->buffer
[insert_ptr
];
285 efx_dequeue_buffer(tx_queue
, buffer
);
289 /* Free the fragment we were mid-way through pushing */
292 pci_unmap_single(pci_dev
, unmap_addr
, unmap_len
,
295 pci_unmap_page(pci_dev
, unmap_addr
, unmap_len
,
302 /* Remove packets from the TX queue
304 * This removes packets from the TX queue, up to and including the
307 static inline void efx_dequeue_buffers(struct efx_tx_queue
*tx_queue
,
310 struct efx_nic
*efx
= tx_queue
->efx
;
311 unsigned int stop_index
, read_ptr
;
312 unsigned int mask
= tx_queue
->efx
->type
->txd_ring_mask
;
314 stop_index
= (index
+ 1) & mask
;
315 read_ptr
= tx_queue
->read_count
& mask
;
317 while (read_ptr
!= stop_index
) {
318 struct efx_tx_buffer
*buffer
= &tx_queue
->buffer
[read_ptr
];
319 if (unlikely(buffer
->len
== 0)) {
320 EFX_ERR(tx_queue
->efx
, "TX queue %d spurious TX "
321 "completion id %x\n", tx_queue
->queue
,
323 efx_schedule_reset(efx
, RESET_TYPE_TX_SKIP
);
327 efx_dequeue_buffer(tx_queue
, buffer
);
328 buffer
->continuation
= 1;
331 ++tx_queue
->read_count
;
332 read_ptr
= tx_queue
->read_count
& mask
;
336 /* Initiate a packet transmission on the specified TX queue.
337 * Note that returning anything other than NETDEV_TX_OK will cause the
338 * OS to free the skb.
340 * This function is split out from efx_hard_start_xmit to allow the
341 * loopback test to direct packets via specific TX queues. It is
342 * therefore a non-static inline, so as not to penalise performance
343 * for non-loopback transmissions.
345 * Context: netif_tx_lock held
347 inline int efx_xmit(struct efx_nic
*efx
,
348 struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
)
352 /* Map fragments for DMA and add to TX queue */
353 rc
= efx_enqueue_skb(tx_queue
, skb
);
354 if (unlikely(rc
!= NETDEV_TX_OK
))
357 /* Update last TX timer */
358 efx
->net_dev
->trans_start
= jiffies
;
364 /* Initiate a packet transmission. We use one channel per CPU
365 * (sharing when we have more CPUs than channels). On Falcon, the TX
366 * completion events will be directed back to the CPU that transmitted
367 * the packet, which should be cache-efficient.
369 * Context: non-blocking.
370 * Note that returning anything other than NETDEV_TX_OK will cause the
371 * OS to free the skb.
373 int efx_hard_start_xmit(struct sk_buff
*skb
, struct net_device
*net_dev
)
375 struct efx_nic
*efx
= netdev_priv(net_dev
);
376 struct efx_tx_queue
*tx_queue
;
378 if (likely(skb
->ip_summed
== CHECKSUM_PARTIAL
))
379 tx_queue
= &efx
->tx_queue
[EFX_TX_QUEUE_OFFLOAD_CSUM
];
381 tx_queue
= &efx
->tx_queue
[EFX_TX_QUEUE_NO_CSUM
];
383 return efx_xmit(efx
, tx_queue
, skb
);
386 void efx_xmit_done(struct efx_tx_queue
*tx_queue
, unsigned int index
)
389 struct efx_nic
*efx
= tx_queue
->efx
;
391 EFX_BUG_ON_PARANOID(index
> efx
->type
->txd_ring_mask
);
393 efx_dequeue_buffers(tx_queue
, index
);
395 /* See if we need to restart the netif queue. This barrier
396 * separates the update of read_count from the test of
399 if (unlikely(tx_queue
->stopped
)) {
400 fill_level
= tx_queue
->insert_count
- tx_queue
->read_count
;
401 if (fill_level
< EFX_NETDEV_TX_THRESHOLD(tx_queue
)) {
402 EFX_BUG_ON_PARANOID(!efx_dev_registered(efx
));
404 /* Do this under netif_tx_lock(), to avoid racing
405 * with efx_xmit(). */
406 netif_tx_lock(efx
->net_dev
);
407 if (tx_queue
->stopped
) {
408 tx_queue
->stopped
= 0;
411 netif_tx_unlock(efx
->net_dev
);
416 int efx_probe_tx_queue(struct efx_tx_queue
*tx_queue
)
418 struct efx_nic
*efx
= tx_queue
->efx
;
419 unsigned int txq_size
;
422 EFX_LOG(efx
, "creating TX queue %d\n", tx_queue
->queue
);
424 /* Allocate software ring */
425 txq_size
= (efx
->type
->txd_ring_mask
+ 1) * sizeof(*tx_queue
->buffer
);
426 tx_queue
->buffer
= kzalloc(txq_size
, GFP_KERNEL
);
427 if (!tx_queue
->buffer
)
429 for (i
= 0; i
<= efx
->type
->txd_ring_mask
; ++i
)
430 tx_queue
->buffer
[i
].continuation
= 1;
432 /* Allocate hardware ring */
433 rc
= falcon_probe_tx(tx_queue
);
440 kfree(tx_queue
->buffer
);
441 tx_queue
->buffer
= NULL
;
445 int efx_init_tx_queue(struct efx_tx_queue
*tx_queue
)
447 EFX_LOG(tx_queue
->efx
, "initialising TX queue %d\n", tx_queue
->queue
);
449 tx_queue
->insert_count
= 0;
450 tx_queue
->write_count
= 0;
451 tx_queue
->read_count
= 0;
452 tx_queue
->old_read_count
= 0;
453 BUG_ON(tx_queue
->stopped
);
455 /* Set up TX descriptor ring */
456 return falcon_init_tx(tx_queue
);
459 void efx_release_tx_buffers(struct efx_tx_queue
*tx_queue
)
461 struct efx_tx_buffer
*buffer
;
463 if (!tx_queue
->buffer
)
466 /* Free any buffers left in the ring */
467 while (tx_queue
->read_count
!= tx_queue
->write_count
) {
468 buffer
= &tx_queue
->buffer
[tx_queue
->read_count
&
469 tx_queue
->efx
->type
->txd_ring_mask
];
470 efx_dequeue_buffer(tx_queue
, buffer
);
471 buffer
->continuation
= 1;
474 ++tx_queue
->read_count
;
478 void efx_fini_tx_queue(struct efx_tx_queue
*tx_queue
)
480 EFX_LOG(tx_queue
->efx
, "shutting down TX queue %d\n", tx_queue
->queue
);
482 /* Flush TX queue, remove descriptor ring */
483 falcon_fini_tx(tx_queue
);
485 efx_release_tx_buffers(tx_queue
);
487 /* Free up TSO header cache */
488 efx_fini_tso(tx_queue
);
490 /* Release queue's stop on port, if any */
491 if (tx_queue
->stopped
) {
492 tx_queue
->stopped
= 0;
493 efx_wake_queue(tx_queue
->efx
);
497 void efx_remove_tx_queue(struct efx_tx_queue
*tx_queue
)
499 EFX_LOG(tx_queue
->efx
, "destroying TX queue %d\n", tx_queue
->queue
);
500 falcon_remove_tx(tx_queue
);
502 kfree(tx_queue
->buffer
);
503 tx_queue
->buffer
= NULL
;
507 /* Efx TCP segmentation acceleration.
509 * Why? Because by doing it here in the driver we can go significantly
510 * faster than the GSO.
512 * Requires TX checksum offload support.
515 /* Number of bytes inserted at the start of a TSO header buffer,
516 * similar to NET_IP_ALIGN.
518 #if defined(__i386__) || defined(__x86_64__)
519 #define TSOH_OFFSET 0
521 #define TSOH_OFFSET NET_IP_ALIGN
524 #define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
526 /* Total size of struct efx_tso_header, buffer and padding */
527 #define TSOH_SIZE(hdr_len) \
528 (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
530 /* Size of blocks on free list. Larger blocks must be allocated from
533 #define TSOH_STD_SIZE 128
535 #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2))
536 #define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
537 #define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
538 #define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
541 * struct tso_state - TSO state for an SKB
542 * @remaining_len: Bytes of data we've yet to segment
543 * @seqnum: Current sequence number
544 * @packet_space: Remaining space in current packet
545 * @ifc: Input fragment cursor.
546 * Where we are in the current fragment of the incoming SKB. These
547 * values get updated in place when we split a fragment over
550 * These values are set once at the start of the TSO send and do
551 * not get changed as the routine progresses.
553 * The state used during segmentation. It is put into this data structure
554 * just to make it easy to pass into inline functions.
557 unsigned remaining_len
;
559 unsigned packet_space
;
562 /* DMA address of current position */
564 /* Remaining length */
566 /* DMA address and length of the whole fragment */
567 unsigned int unmap_len
;
568 dma_addr_t unmap_addr
;
569 unsigned int unmap_single
;
573 /* The number of bytes of header */
574 unsigned int header_length
;
576 /* The number of bytes to put in each outgoing segment. */
577 int full_packet_size
;
579 /* Current IPv4 ID, host endian. */
586 * Verify that our various assumptions about sk_buffs and the conditions
587 * under which TSO will be attempted hold true.
589 static inline void efx_tso_check_safe(const struct sk_buff
*skb
)
591 EFX_BUG_ON_PARANOID(skb
->protocol
!= htons(ETH_P_IP
));
592 EFX_BUG_ON_PARANOID(((struct ethhdr
*)skb
->data
)->h_proto
!=
594 EFX_BUG_ON_PARANOID(ip_hdr(skb
)->protocol
!= IPPROTO_TCP
);
595 EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb
), skb
->data
)
596 + (tcp_hdr(skb
)->doff
<< 2u)) >
602 * Allocate a page worth of efx_tso_header structures, and string them
603 * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
605 static int efx_tsoh_block_alloc(struct efx_tx_queue
*tx_queue
)
608 struct pci_dev
*pci_dev
= tx_queue
->efx
->pci_dev
;
609 struct efx_tso_header
*tsoh
;
613 base_kva
= pci_alloc_consistent(pci_dev
, PAGE_SIZE
, &dma_addr
);
614 if (base_kva
== NULL
) {
615 EFX_ERR(tx_queue
->efx
, "Unable to allocate page for TSO"
620 /* pci_alloc_consistent() allocates pages. */
621 EFX_BUG_ON_PARANOID(dma_addr
& (PAGE_SIZE
- 1u));
623 for (kva
= base_kva
; kva
< base_kva
+ PAGE_SIZE
; kva
+= TSOH_STD_SIZE
) {
624 tsoh
= (struct efx_tso_header
*)kva
;
625 tsoh
->dma_addr
= dma_addr
+ (TSOH_BUFFER(tsoh
) - base_kva
);
626 tsoh
->next
= tx_queue
->tso_headers_free
;
627 tx_queue
->tso_headers_free
= tsoh
;
634 /* Free up a TSO header, and all others in the same page. */
635 static void efx_tsoh_block_free(struct efx_tx_queue
*tx_queue
,
636 struct efx_tso_header
*tsoh
,
637 struct pci_dev
*pci_dev
)
639 struct efx_tso_header
**p
;
640 unsigned long base_kva
;
643 base_kva
= (unsigned long)tsoh
& PAGE_MASK
;
644 base_dma
= tsoh
->dma_addr
& PAGE_MASK
;
646 p
= &tx_queue
->tso_headers_free
;
648 if (((unsigned long)*p
& PAGE_MASK
) == base_kva
)
654 pci_free_consistent(pci_dev
, PAGE_SIZE
, (void *)base_kva
, base_dma
);
657 static struct efx_tso_header
*
658 efx_tsoh_heap_alloc(struct efx_tx_queue
*tx_queue
, size_t header_len
)
660 struct efx_tso_header
*tsoh
;
662 tsoh
= kmalloc(TSOH_SIZE(header_len
), GFP_ATOMIC
| GFP_DMA
);
666 tsoh
->dma_addr
= pci_map_single(tx_queue
->efx
->pci_dev
,
667 TSOH_BUFFER(tsoh
), header_len
,
669 if (unlikely(pci_dma_mapping_error(tx_queue
->efx
->pci_dev
,
675 tsoh
->unmap_len
= header_len
;
680 efx_tsoh_heap_free(struct efx_tx_queue
*tx_queue
, struct efx_tso_header
*tsoh
)
682 pci_unmap_single(tx_queue
->efx
->pci_dev
,
683 tsoh
->dma_addr
, tsoh
->unmap_len
,
689 * efx_tx_queue_insert - push descriptors onto the TX queue
690 * @tx_queue: Efx TX queue
691 * @dma_addr: DMA address of fragment
692 * @len: Length of fragment
693 * @final_buffer: The final buffer inserted into the queue
695 * Push descriptors onto the TX queue. Return 0 on success or 1 if
698 static int efx_tx_queue_insert(struct efx_tx_queue
*tx_queue
,
699 dma_addr_t dma_addr
, unsigned len
,
700 struct efx_tx_buffer
**final_buffer
)
702 struct efx_tx_buffer
*buffer
;
703 struct efx_nic
*efx
= tx_queue
->efx
;
704 unsigned dma_len
, fill_level
, insert_ptr
, misalign
;
707 EFX_BUG_ON_PARANOID(len
<= 0);
709 fill_level
= tx_queue
->insert_count
- tx_queue
->old_read_count
;
710 /* -1 as there is no way to represent all descriptors used */
711 q_space
= efx
->type
->txd_ring_mask
- 1 - fill_level
;
714 if (unlikely(q_space
-- <= 0)) {
715 /* It might be that completions have happened
716 * since the xmit path last checked. Update
717 * the xmit path's copy of read_count.
720 /* This memory barrier protects the change of
721 * stopped from the access of read_count. */
723 tx_queue
->old_read_count
=
724 *(volatile unsigned *)&tx_queue
->read_count
;
725 fill_level
= (tx_queue
->insert_count
726 - tx_queue
->old_read_count
);
727 q_space
= efx
->type
->txd_ring_mask
- 1 - fill_level
;
728 if (unlikely(q_space
-- <= 0)) {
729 *final_buffer
= NULL
;
736 insert_ptr
= tx_queue
->insert_count
& efx
->type
->txd_ring_mask
;
737 buffer
= &tx_queue
->buffer
[insert_ptr
];
738 ++tx_queue
->insert_count
;
740 EFX_BUG_ON_PARANOID(tx_queue
->insert_count
-
741 tx_queue
->read_count
>
742 efx
->type
->txd_ring_mask
);
744 efx_tsoh_free(tx_queue
, buffer
);
745 EFX_BUG_ON_PARANOID(buffer
->len
);
746 EFX_BUG_ON_PARANOID(buffer
->unmap_len
);
747 EFX_BUG_ON_PARANOID(buffer
->skb
);
748 EFX_BUG_ON_PARANOID(buffer
->continuation
!= 1);
749 EFX_BUG_ON_PARANOID(buffer
->tsoh
);
751 buffer
->dma_addr
= dma_addr
;
753 /* Ensure we do not cross a boundary unsupported by H/W */
754 dma_len
= (~dma_addr
& efx
->type
->tx_dma_mask
) + 1;
756 misalign
= (unsigned)dma_addr
& efx
->type
->bug5391_mask
;
757 if (misalign
&& dma_len
+ misalign
> 512)
758 dma_len
= 512 - misalign
;
760 /* If there is enough space to send then do so */
764 buffer
->len
= dma_len
; /* Don't set the other members */
769 EFX_BUG_ON_PARANOID(!len
);
771 *final_buffer
= buffer
;
777 * Put a TSO header into the TX queue.
779 * This is special-cased because we know that it is small enough to fit in
780 * a single fragment, and we know it doesn't cross a page boundary. It
781 * also allows us to not worry about end-of-packet etc.
783 static inline void efx_tso_put_header(struct efx_tx_queue
*tx_queue
,
784 struct efx_tso_header
*tsoh
, unsigned len
)
786 struct efx_tx_buffer
*buffer
;
788 buffer
= &tx_queue
->buffer
[tx_queue
->insert_count
&
789 tx_queue
->efx
->type
->txd_ring_mask
];
790 efx_tsoh_free(tx_queue
, buffer
);
791 EFX_BUG_ON_PARANOID(buffer
->len
);
792 EFX_BUG_ON_PARANOID(buffer
->unmap_len
);
793 EFX_BUG_ON_PARANOID(buffer
->skb
);
794 EFX_BUG_ON_PARANOID(buffer
->continuation
!= 1);
795 EFX_BUG_ON_PARANOID(buffer
->tsoh
);
797 buffer
->dma_addr
= tsoh
->dma_addr
;
800 ++tx_queue
->insert_count
;
804 /* Remove descriptors put into a tx_queue. */
805 static void efx_enqueue_unwind(struct efx_tx_queue
*tx_queue
)
807 struct efx_tx_buffer
*buffer
;
809 /* Work backwards until we hit the original insert pointer value */
810 while (tx_queue
->insert_count
!= tx_queue
->write_count
) {
811 --tx_queue
->insert_count
;
812 buffer
= &tx_queue
->buffer
[tx_queue
->insert_count
&
813 tx_queue
->efx
->type
->txd_ring_mask
];
814 efx_tsoh_free(tx_queue
, buffer
);
815 EFX_BUG_ON_PARANOID(buffer
->skb
);
817 buffer
->continuation
= 1;
818 if (buffer
->unmap_len
) {
819 if (buffer
->unmap_single
)
820 pci_unmap_single(tx_queue
->efx
->pci_dev
,
825 pci_unmap_page(tx_queue
->efx
->pci_dev
,
829 buffer
->unmap_len
= 0;
835 /* Parse the SKB header and initialise state. */
836 static inline void tso_start(struct tso_state
*st
, const struct sk_buff
*skb
)
838 /* All ethernet/IP/TCP headers combined size is TCP header size
839 * plus offset of TCP header relative to start of packet.
841 st
->p
.header_length
= ((tcp_hdr(skb
)->doff
<< 2u)
842 + PTR_DIFF(tcp_hdr(skb
), skb
->data
));
843 st
->p
.full_packet_size
= (st
->p
.header_length
844 + skb_shinfo(skb
)->gso_size
);
846 st
->p
.ipv4_id
= ntohs(ip_hdr(skb
)->id
);
847 st
->seqnum
= ntohl(tcp_hdr(skb
)->seq
);
849 EFX_BUG_ON_PARANOID(tcp_hdr(skb
)->urg
);
850 EFX_BUG_ON_PARANOID(tcp_hdr(skb
)->syn
);
851 EFX_BUG_ON_PARANOID(tcp_hdr(skb
)->rst
);
853 st
->packet_space
= st
->p
.full_packet_size
;
854 st
->remaining_len
= skb
->len
- st
->p
.header_length
;
855 st
->ifc
.unmap_len
= 0;
856 st
->ifc
.unmap_single
= 0;
859 static inline int tso_get_fragment(struct tso_state
*st
, struct efx_nic
*efx
,
862 st
->ifc
.unmap_addr
= pci_map_page(efx
->pci_dev
, frag
->page
,
863 frag
->page_offset
, frag
->size
,
865 if (likely(!pci_dma_mapping_error(efx
->pci_dev
, st
->ifc
.unmap_addr
))) {
866 st
->ifc
.unmap_single
= 0;
867 st
->ifc
.unmap_len
= frag
->size
;
868 st
->ifc
.len
= frag
->size
;
869 st
->ifc
.dma_addr
= st
->ifc
.unmap_addr
;
876 tso_get_head_fragment(struct tso_state
*st
, struct efx_nic
*efx
,
877 const struct sk_buff
*skb
)
879 int hl
= st
->p
.header_length
;
880 int len
= skb_headlen(skb
) - hl
;
882 st
->ifc
.unmap_addr
= pci_map_single(efx
->pci_dev
, skb
->data
+ hl
,
883 len
, PCI_DMA_TODEVICE
);
884 if (likely(!pci_dma_mapping_error(efx
->pci_dev
, st
->ifc
.unmap_addr
))) {
885 st
->ifc
.unmap_single
= 1;
886 st
->ifc
.unmap_len
= len
;
888 st
->ifc
.dma_addr
= st
->ifc
.unmap_addr
;
896 * tso_fill_packet_with_fragment - form descriptors for the current fragment
897 * @tx_queue: Efx TX queue
898 * @skb: Socket buffer
901 * Form descriptors for the current fragment, until we reach the end
902 * of fragment or end-of-packet. Return 0 on success, 1 if not enough
903 * space in @tx_queue.
905 static inline int tso_fill_packet_with_fragment(struct efx_tx_queue
*tx_queue
,
906 const struct sk_buff
*skb
,
907 struct tso_state
*st
)
909 struct efx_tx_buffer
*buffer
;
910 int n
, end_of_packet
, rc
;
912 if (st
->ifc
.len
== 0)
914 if (st
->packet_space
== 0)
917 EFX_BUG_ON_PARANOID(st
->ifc
.len
<= 0);
918 EFX_BUG_ON_PARANOID(st
->packet_space
<= 0);
920 n
= min(st
->ifc
.len
, st
->packet_space
);
922 st
->packet_space
-= n
;
923 st
->remaining_len
-= n
;
926 rc
= efx_tx_queue_insert(tx_queue
, st
->ifc
.dma_addr
, n
, &buffer
);
927 if (likely(rc
== 0)) {
928 if (st
->remaining_len
== 0)
929 /* Transfer ownership of the skb */
932 end_of_packet
= st
->remaining_len
== 0 || st
->packet_space
== 0;
933 buffer
->continuation
= !end_of_packet
;
935 if (st
->ifc
.len
== 0) {
936 /* Transfer ownership of the pci mapping */
937 buffer
->unmap_len
= st
->ifc
.unmap_len
;
938 buffer
->unmap_single
= st
->ifc
.unmap_single
;
939 st
->ifc
.unmap_len
= 0;
943 st
->ifc
.dma_addr
+= n
;
949 * tso_start_new_packet - generate a new header and prepare for the new packet
950 * @tx_queue: Efx TX queue
951 * @skb: Socket buffer
954 * Generate a new header and prepare for the new packet. Return 0 on
955 * success, or -1 if failed to alloc header.
957 static inline int tso_start_new_packet(struct efx_tx_queue
*tx_queue
,
958 const struct sk_buff
*skb
,
959 struct tso_state
*st
)
961 struct efx_tso_header
*tsoh
;
962 struct iphdr
*tsoh_iph
;
963 struct tcphdr
*tsoh_th
;
967 /* Allocate a DMA-mapped header buffer. */
968 if (likely(TSOH_SIZE(st
->p
.header_length
) <= TSOH_STD_SIZE
)) {
969 if (tx_queue
->tso_headers_free
== NULL
) {
970 if (efx_tsoh_block_alloc(tx_queue
))
973 EFX_BUG_ON_PARANOID(!tx_queue
->tso_headers_free
);
974 tsoh
= tx_queue
->tso_headers_free
;
975 tx_queue
->tso_headers_free
= tsoh
->next
;
978 tx_queue
->tso_long_headers
++;
979 tsoh
= efx_tsoh_heap_alloc(tx_queue
, st
->p
.header_length
);
984 header
= TSOH_BUFFER(tsoh
);
985 tsoh_th
= (struct tcphdr
*)(header
+ SKB_TCP_OFF(skb
));
986 tsoh_iph
= (struct iphdr
*)(header
+ SKB_IPV4_OFF(skb
));
988 /* Copy and update the headers. */
989 memcpy(header
, skb
->data
, st
->p
.header_length
);
991 tsoh_th
->seq
= htonl(st
->seqnum
);
992 st
->seqnum
+= skb_shinfo(skb
)->gso_size
;
993 if (st
->remaining_len
> skb_shinfo(skb
)->gso_size
) {
994 /* This packet will not finish the TSO burst. */
995 ip_length
= st
->p
.full_packet_size
- ETH_HDR_LEN(skb
);
999 /* This packet will be the last in the TSO burst. */
1000 ip_length
= (st
->p
.header_length
- ETH_HDR_LEN(skb
)
1001 + st
->remaining_len
);
1002 tsoh_th
->fin
= tcp_hdr(skb
)->fin
;
1003 tsoh_th
->psh
= tcp_hdr(skb
)->psh
;
1005 tsoh_iph
->tot_len
= htons(ip_length
);
1007 /* Linux leaves suitable gaps in the IP ID space for us to fill. */
1008 tsoh_iph
->id
= htons(st
->p
.ipv4_id
);
1011 st
->packet_space
= skb_shinfo(skb
)->gso_size
;
1012 ++tx_queue
->tso_packets
;
1014 /* Form a descriptor for this header. */
1015 efx_tso_put_header(tx_queue
, tsoh
, st
->p
.header_length
);
1022 * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
1023 * @tx_queue: Efx TX queue
1024 * @skb: Socket buffer
1026 * Context: You must hold netif_tx_lock() to call this function.
1028 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
1029 * @skb was not enqueued. In all cases @skb is consumed. Return
1030 * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
1032 static int efx_enqueue_skb_tso(struct efx_tx_queue
*tx_queue
,
1033 const struct sk_buff
*skb
)
1035 struct efx_nic
*efx
= tx_queue
->efx
;
1036 int frag_i
, rc
, rc2
= NETDEV_TX_OK
;
1037 struct tso_state state
;
1039 /* Verify TSO is safe - these checks should never fail. */
1040 efx_tso_check_safe(skb
);
1042 EFX_BUG_ON_PARANOID(tx_queue
->write_count
!= tx_queue
->insert_count
);
1044 tso_start(&state
, skb
);
1046 /* Assume that skb header area contains exactly the headers, and
1047 * all payload is in the frag list.
1049 if (skb_headlen(skb
) == state
.p
.header_length
) {
1050 /* Grab the first payload fragment. */
1051 EFX_BUG_ON_PARANOID(skb_shinfo(skb
)->nr_frags
< 1);
1053 rc
= tso_get_fragment(&state
, efx
,
1054 skb_shinfo(skb
)->frags
+ frag_i
);
1058 rc
= tso_get_head_fragment(&state
, efx
, skb
);
1064 if (tso_start_new_packet(tx_queue
, skb
, &state
) < 0)
1068 rc
= tso_fill_packet_with_fragment(tx_queue
, skb
, &state
);
1072 /* Move onto the next fragment? */
1073 if (state
.ifc
.len
== 0) {
1074 if (++frag_i
>= skb_shinfo(skb
)->nr_frags
)
1075 /* End of payload reached. */
1077 rc
= tso_get_fragment(&state
, efx
,
1078 skb_shinfo(skb
)->frags
+ frag_i
);
1083 /* Start at new packet? */
1084 if (state
.packet_space
== 0 &&
1085 tso_start_new_packet(tx_queue
, skb
, &state
) < 0)
1089 /* Pass off to hardware */
1090 falcon_push_buffers(tx_queue
);
1092 tx_queue
->tso_bursts
++;
1093 return NETDEV_TX_OK
;
1096 EFX_ERR(efx
, "Out of memory for TSO headers, or PCI mapping error\n");
1097 dev_kfree_skb_any((struct sk_buff
*)skb
);
1101 rc2
= NETDEV_TX_BUSY
;
1103 /* Stop the queue if it wasn't stopped before. */
1104 if (tx_queue
->stopped
== 1)
1105 efx_stop_queue(efx
);
1108 /* Free the DMA mapping we were in the process of writing out */
1109 if (state
.ifc
.unmap_len
) {
1110 if (state
.ifc
.unmap_single
)
1111 pci_unmap_single(efx
->pci_dev
, state
.ifc
.unmap_addr
,
1112 state
.ifc
.unmap_len
, PCI_DMA_TODEVICE
);
1114 pci_unmap_page(efx
->pci_dev
, state
.ifc
.unmap_addr
,
1115 state
.ifc
.unmap_len
, PCI_DMA_TODEVICE
);
1118 efx_enqueue_unwind(tx_queue
);
1124 * Free up all TSO datastructures associated with tx_queue. This
1125 * routine should be called only once the tx_queue is both empty and
1126 * will no longer be used.
1128 static void efx_fini_tso(struct efx_tx_queue
*tx_queue
)
1132 if (tx_queue
->buffer
) {
1133 for (i
= 0; i
<= tx_queue
->efx
->type
->txd_ring_mask
; ++i
)
1134 efx_tsoh_free(tx_queue
, &tx_queue
->buffer
[i
]);
1137 while (tx_queue
->tso_headers_free
!= NULL
)
1138 efx_tsoh_block_free(tx_queue
, tx_queue
->tso_headers_free
,
1139 tx_queue
->efx
->pci_dev
);