4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2010-2013, by Broadcom, Inc.
24 * All Rights Reserved.
28 * Copyright (c) 2002, 2010, Oracle and/or its affiliates.
29 * All rights reserved.
36 * The transmit-side code uses an allocation process which is similar
37 * to some theme park roller-coaster rides, where riders sit in cars
38 * that can go individually, but work better in a train.
40 * 1) RESERVE a place - this doesn't refer to any specific car or
41 * seat, just that you will get a ride. The attempt to RESERVE a
42 * place can fail if all spaces in all cars are already committed.
44 * 2) Prepare yourself; this may take an arbitrary (but not unbounded)
45 * time, and you can back out at this stage, in which case you must
46 * give up (RENOUNCE) your place.
48 * 3) CLAIM your space - a specific car (the next sequentially
49 * numbered one) is allocated at this stage, and is guaranteed
50 * to be part of the next train to depart. Once you've done
51 * this, you can't back out, nor wait for any external event
54 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they
55 * all depart together as a single train!
57 * 5) At the end of the ride, you climb out of the car and RENOUNCE
58 * your right to it, so that it can be recycled for another rider.
60 * For each rider, these have to occur in this order, but the riders
61 * don't have to stay in the same order at each stage. In particular,
62 * they may overtake each other between RESERVING a place and CLAIMING
63 * it, or between CLAIMING and OCCUPYING a space.
65 * Once a car is CLAIMED, the train currently being assembled can't go
66 * without that car (this guarantees that the cars in a single train
67 * make up a consecutively-numbered set). Therefore, when any train
68 * leaves, we know there can't be any riders in transit between CLAIMING
69 * and OCCUPYING their cars. There can be some who have RESERVED but
70 * not yet CLAIMED their places. That's OK, though, because they'll go
71 * into the next train.
74 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */
77 * ========== Send-side recycle routines ==========
81 * Recycle all the completed buffers in the specified send ring up to
82 * (but not including) the consumer index in the status block.
84 * This function must advance (srp->tc_next) AND adjust (srp->tx_free)
85 * to account for the packets it has recycled.
87 * This is a trivial version that just does that and nothing more, but
88 * it suffices while there's only one method for sending messages (by
89 * copying) and that method doesn't need any special per-buffer action
92 static boolean_t
bge_recycle_ring(bge_t
*bgep
, send_ring_t
*srp
);
93 #pragma inline(bge_recycle_ring)
96 bge_recycle_ring(bge_t
*bgep
, send_ring_t
*srp
)
99 bge_queue_item_t
*buf_item
;
100 bge_queue_item_t
*buf_item_head
;
101 bge_queue_item_t
*buf_item_tail
;
102 bge_queue_t
*txbuf_queue
;
106 ASSERT(mutex_owned(srp
->tc_lock
));
109 * We're about to release one or more places :-)
110 * These ASSERTions check that our invariants still hold:
111 * there must always be at least one free place
112 * at this point, there must be at least one place NOT free
113 * we're not about to free more places than were claimed!
115 ASSERT(srp
->tx_free
<= srp
->desc
.nslots
);
117 buf_item_head
= buf_item_tail
= NULL
;
118 for (n
= 0, slot
= srp
->tc_next
; slot
!= *srp
->cons_index_p
;
119 slot
= NEXT(slot
, srp
->desc
.nslots
)) {
120 ssbdp
= &srp
->sw_sbds
[slot
];
121 ASSERT(ssbdp
->pbuf
!= NULL
);
122 buf_item
= ssbdp
->pbuf
;
123 if (buf_item_head
== NULL
)
124 buf_item_head
= buf_item_tail
= buf_item
;
126 buf_item_tail
->next
= buf_item
;
127 buf_item_tail
= buf_item
;
136 * Reset the watchdog count: to 0 if all buffers are
137 * now free, or to 1 if some are still outstanding.
138 * Note: non-synchonised access here means we may get
139 * the "wrong" answer, but only in a harmless fashion
140 * (i.e. we deactivate the watchdog because all buffers
141 * are apparently free, even though another thread may
142 * have claimed one before we leave here; in this case
143 * the watchdog will restart on the next send() call).
145 bgep
->watchdog
= (slot
== srp
->tx_next
) ? 0 : 1;
148 * Update recycle index and free tx BD number
151 ASSERT(srp
->tx_free
+ n
<= srp
->desc
.nslots
);
152 bge_atomic_renounce(&srp
->tx_free
, n
);
155 * Return tx buffers to buffer push queue
157 txbuf_queue
= srp
->txbuf_push_queue
;
158 mutex_enter(txbuf_queue
->lock
);
159 buf_item_tail
->next
= txbuf_queue
->head
;
160 txbuf_queue
->head
= buf_item_head
;
161 txbuf_queue
->count
+= n
;
162 mutex_exit(txbuf_queue
->lock
);
165 * Check if we need exchange the tx buffer push and pop queue
167 if ((srp
->txbuf_pop_queue
->count
< srp
->tx_buffers_low
) &&
168 (srp
->txbuf_pop_queue
->count
< txbuf_queue
->count
)) {
169 srp
->txbuf_push_queue
= srp
->txbuf_pop_queue
;
170 srp
->txbuf_pop_queue
= txbuf_queue
;
173 if (srp
->tx_flow
!= 0 || bgep
->tx_resched_needed
)
174 ddi_trigger_softintr(bgep
->drain_id
);
180 * Recycle all returned slots in all rings.
182 * To give priority to low-numbered rings, whenever we have recycled any
183 * slots in any ring except 0, we restart scanning again from ring 0.
184 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the
185 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0:
187 * 0 found some - recycle them
189 * 3 found some - recycle them and restart scan
191 * 10 found some - recycle them and restart scan
193 * 3 found some more - recycle them and restart scan
194 * 0 found some more - recycle them
196 * 10 found some more - recycle them and restart scan
197 * 0 found some more - recycle them
200 * The routine returns only when a complete scan has been performed
201 * without finding any slots to recycle.
203 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time
204 * constant and allows the compiler to optimise away the outer do-loop
205 * if only one send ring is being used.
207 boolean_t
bge_recycle(bge_t
*bgep
, bge_status_t
*bsp
);
208 #pragma no_inline(bge_recycle)
211 bge_recycle(bge_t
*bgep
, bge_status_t
*bsp
)
215 uint64_t tx_rings
= bgep
->chipid
.tx_rings
;
216 boolean_t tx_done
= B_FALSE
;
220 srp
= &bgep
->send
[ring
];
223 * For each ring, (srp->cons_index_p) points to the
224 * proper index within the status block (which has
225 * already been sync'd by the caller).
227 ASSERT(srp
->cons_index_p
== SEND_INDEX_P(bsp
, ring
));
229 if (*srp
->cons_index_p
== srp
->tc_next
)
230 continue; /* no slots to recycle */
231 if (mutex_tryenter(srp
->tc_lock
) == 0)
232 continue; /* already in process */
233 tx_done
|= bge_recycle_ring(bgep
, srp
);
234 mutex_exit(srp
->tc_lock
);
237 * Restart from ring 0, if we're not on ring 0 already.
238 * As H/W selects send BDs totally based on priority and
239 * available BDs on the higher priority ring are always
240 * selected first, driver should keep consistence with H/W
241 * and gives lower-numbered ring with higher priority.
243 if (tx_rings
> 1 && ring
> 0)
247 * Loop over all rings (if there *are* multiple rings)
249 } while (++srp
, ++ring
< tx_rings
);
256 * ========== Send-side transmit routines ==========
258 #define TCP_CKSUM_OFFSET 16
259 #define UDP_CKSUM_OFFSET 6
262 bge_pseudo_cksum(uint8_t *buf
)
269 * Point it to the ip header.
271 buf
+= sizeof (struct ether_header
);
274 * Calculate the pseudo-header checksum.
276 iphl
= 4 * (buf
[0] & 0xF);
277 cksum
= (((uint16_t)buf
[2])<<8) + buf
[3] - iphl
;
278 cksum
+= proto
= buf
[9];
279 cksum
+= (((uint16_t)buf
[12])<<8) + buf
[13];
280 cksum
+= (((uint16_t)buf
[14])<<8) + buf
[15];
281 cksum
+= (((uint16_t)buf
[16])<<8) + buf
[17];
282 cksum
+= (((uint16_t)buf
[18])<<8) + buf
[19];
283 cksum
= (cksum
>>16) + (cksum
& 0xFFFF);
284 cksum
= (cksum
>>16) + (cksum
& 0xFFFF);
287 * Point it to the TCP/UDP header, and
288 * update the checksum field.
290 buf
+= iphl
+ ((proto
== IPPROTO_TCP
) ?
291 TCP_CKSUM_OFFSET
: UDP_CKSUM_OFFSET
);
294 * A real possibility that pointer cast is a problem.
295 * Should be fixed when we know the code better.
296 * E_BAD_PTR_CAST_ALIGN is added to make it temporarily clean.
298 *(uint16_t *)buf
= htons((uint16_t)cksum
);
301 static bge_queue_item_t
*
302 bge_get_txbuf(bge_t
*bgep
, send_ring_t
*srp
)
304 bge_queue_item_t
*txbuf_item
;
305 bge_queue_t
*txbuf_queue
;
307 txbuf_queue
= srp
->txbuf_pop_queue
;
308 mutex_enter(txbuf_queue
->lock
);
309 if (txbuf_queue
->count
== 0) {
310 mutex_exit(txbuf_queue
->lock
);
311 txbuf_queue
= srp
->txbuf_push_queue
;
312 mutex_enter(txbuf_queue
->lock
);
313 if (txbuf_queue
->count
== 0) {
314 mutex_exit(txbuf_queue
->lock
);
315 /* Try to allocate more tx buffers */
316 if (srp
->tx_array
< srp
->tx_array_max
) {
317 mutex_enter(srp
->tx_lock
);
318 txbuf_item
= bge_alloc_txbuf_array(bgep
, srp
);
319 mutex_exit(srp
->tx_lock
);
325 txbuf_item
= txbuf_queue
->head
;
326 txbuf_queue
->head
= (bge_queue_item_t
*)txbuf_item
->next
;
327 txbuf_queue
->count
--;
328 mutex_exit(txbuf_queue
->lock
);
329 txbuf_item
->next
= NULL
;
335 * Send a message by copying it into a preallocated (and premapped) buffer
337 static void bge_send_copy(bge_t
*bgep
, sw_txbuf_t
*txbuf
, mblk_t
*mp
);
338 #pragma inline(bge_send_copy)
341 bge_send_copy(bge_t
*bgep
, sw_txbuf_t
*txbuf
, mblk_t
*mp
)
348 pbuf
= DMA_VPTR(txbuf
->buf
);
349 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
350 if ((mblen
= MBLKL(bp
)) == 0)
352 ASSERT(txbuf
->copy_len
+ mblen
<=
353 bgep
->chipid
.snd_buff_size
);
354 bcopy(bp
->b_rptr
, pbuf
, mblen
);
356 txbuf
->copy_len
+= mblen
;
361 * Fill the Tx buffer descriptors and trigger the h/w transmission
364 bge_send_serial(bge_t
*bgep
, send_ring_t
*srp
)
367 uint64_t txfill_next
;
373 bge_queue_item_t
*txbuf_item
;
377 * Try to hold the tx lock:
378 * If we are in an interrupt context, use mutex_enter() to
379 * ensure quick response for tx in interrupt context;
380 * Otherwise, use mutex_tryenter() to serialize this h/w tx
381 * BD filling and transmission triggering task.
383 if (servicing_interrupt() != 0)
384 mutex_enter(srp
->tx_lock
);
385 else if (mutex_tryenter(srp
->tx_lock
) == 0)
386 return; /* already in process */
388 bsp
= DMA_VPTR(bgep
->status_block
);
389 txfill_next
= srp
->txfill_next
;
390 tx_next
= srp
->tx_next
;
392 for (count
= 0; count
< bgep
->param_drain_max
; ++count
) {
393 pktp
= &srp
->pktp
[txfill_next
];
394 if (!pktp
->tx_ready
) {
401 * If there are no enough BDs: try to recycle more
403 if (srp
->tx_free
<= 1)
404 (void) bge_recycle(bgep
, bsp
);
407 * Reserved required BDs: 1 is enough
409 if (!bge_atomic_reserve(&srp
->tx_free
, 1)) {
419 * Go straight to claiming our already-reserved places
422 ASSERT(pktp
->txbuf_item
!= NULL
);
423 txbuf_item
= pktp
->txbuf_item
;
424 pktp
->txbuf_item
= NULL
;
425 pktp
->tx_ready
= B_FALSE
;
427 txbuf
= txbuf_item
->item
;
428 ASSERT(txbuf
->copy_len
!= 0);
429 (void) ddi_dma_sync(txbuf
->buf
.dma_hdl
, 0,
430 txbuf
->copy_len
, DDI_DMA_SYNC_FORDEV
);
432 ssbdp
= &srp
->sw_sbds
[tx_next
];
433 ASSERT(ssbdp
->pbuf
== NULL
);
434 ssbdp
->pbuf
= txbuf_item
;
437 * Setting hardware send buffer descriptor
439 hw_sbd_p
= DMA_VPTR(ssbdp
->desc
);
441 hw_sbd_p
->host_buf_addr
= txbuf
->buf
.cookie
.dmac_laddress
;
442 hw_sbd_p
->len
= txbuf
->copy_len
;
443 if (pktp
->vlan_tci
!= 0) {
444 hw_sbd_p
->vlan_tci
= pktp
->vlan_tci
;
445 hw_sbd_p
->host_buf_addr
+= VLAN_TAGSZ
;
446 hw_sbd_p
->flags
|= SBD_FLAG_VLAN_TAG
;
448 if (pktp
->pflags
& HCK_IPV4_HDRCKSUM
)
449 hw_sbd_p
->flags
|= SBD_FLAG_IP_CKSUM
;
450 if (pktp
->pflags
& HCK_FULLCKSUM
)
451 hw_sbd_p
->flags
|= SBD_FLAG_TCP_UDP_CKSUM
;
452 if (!(bgep
->chipid
.flags
& CHIP_FLAG_NO_JUMBO
) &&
453 (DEVICE_5717_SERIES_CHIPSETS(bgep
) ||
454 DEVICE_5725_SERIES_CHIPSETS(bgep
)) &&
455 (txbuf
->copy_len
> ETHERMAX
))
456 hw_sbd_p
->flags
|= SBD_FLAG_JMB_PKT
;
457 hw_sbd_p
->flags
|= SBD_FLAG_PACKET_END
;
459 txfill_next
= NEXT(txfill_next
, BGE_SEND_BUF_MAX
);
460 tx_next
= NEXT(tx_next
, srp
->desc
.nslots
);
464 * Trigger h/w to start transmission.
467 bge_atomic_sub64(&srp
->tx_flow
, count
);
468 srp
->txfill_next
= txfill_next
;
470 if (srp
->tx_next
> tx_next
) {
471 (void) ddi_dma_sync(ssbdp
->desc
.dma_hdl
, 0,
472 (srp
->desc
.nslots
- srp
->tx_next
) *
474 DDI_DMA_SYNC_FORDEV
);
475 count
-= srp
->desc
.nslots
- srp
->tx_next
;
476 ssbdp
= &srp
->sw_sbds
[0];
478 (void) ddi_dma_sync(ssbdp
->desc
.dma_hdl
, 0,
479 count
*sizeof (bge_sbd_t
), DDI_DMA_SYNC_FORDEV
);
480 bge_mbx_put(bgep
, srp
->chip_mbx_reg
, tx_next
);
481 srp
->tx_next
= tx_next
;
482 atomic_or_32(&bgep
->watchdog
, 1);
484 if (srp
->tx_flow
!= 0 && srp
->tx_free
> 1)
488 mutex_exit(srp
->tx_lock
);
492 bge_ring_tx(void *arg
, mblk_t
*mp
)
494 send_ring_t
*srp
= arg
;
495 bge_t
*bgep
= srp
->bgep
;
496 struct ether_vlan_header
*ehp
;
497 bge_queue_item_t
*txbuf_item
;
505 ASSERT(mp
->b_next
== NULL
);
508 * Get a s/w tx buffer first
510 txbuf_item
= bge_get_txbuf(bgep
, srp
);
511 if (txbuf_item
== NULL
) {
512 /* no tx buffer available */
514 bgep
->tx_resched_needed
= B_TRUE
;
515 bge_send_serial(bgep
, srp
);
520 * Copy all mp fragments to the pkt buffer
522 txbuf
= txbuf_item
->item
;
523 bge_send_copy(bgep
, txbuf
, mp
);
526 * Determine if the packet is VLAN tagged.
528 ASSERT(txbuf
->copy_len
>= sizeof (struct ether_header
));
529 pbuf
= DMA_VPTR(txbuf
->buf
);
532 if (ehp
->ether_tpid
== htons(ETHERTYPE_VLAN
)) {
533 /* Strip the vlan tag */
534 vlan_tci
= ntohs(ehp
->ether_tci
);
535 pbuf
= memmove(pbuf
+ VLAN_TAGSZ
, pbuf
, 2 * ETHERADDRL
);
536 txbuf
->copy_len
-= VLAN_TAGSZ
;
541 * Retrieve checksum offloading info.
543 mac_hcksum_get(mp
, NULL
, NULL
, NULL
, NULL
, &pflags
);
546 * Calculate pseudo checksum if needed.
548 if ((pflags
& HCK_FULLCKSUM
) &&
549 (bgep
->chipid
.flags
& CHIP_FLAG_PARTIAL_CSUM
))
550 bge_pseudo_cksum((uint8_t *)pbuf
);
553 * Packet buffer is ready to send: get and fill pkt info
555 pkt_slot
= bge_atomic_next(&srp
->txpkt_next
, BGE_SEND_BUF_MAX
);
556 pktp
= &srp
->pktp
[pkt_slot
];
557 ASSERT(pktp
->txbuf_item
== NULL
);
558 pktp
->txbuf_item
= txbuf_item
;
559 pktp
->vlan_tci
= vlan_tci
;
560 pktp
->pflags
= pflags
;
561 atomic_inc_64(&srp
->tx_flow
);
562 ASSERT(pktp
->tx_ready
== B_FALSE
);
563 pktp
->tx_ready
= B_TRUE
;
566 * Filling the h/w bd and trigger the h/w to start transmission
568 bge_send_serial(bgep
, srp
);
570 srp
->pushed_bytes
+= MBLKL(mp
);
573 * We've copied the contents, the message can be freed right away
580 bge_send(bge_t
*bgep
, mblk_t
*mp
)
584 ring
= &bgep
->send
[0]; /* ring 0 */
586 return (bge_ring_tx(ring
, mp
));
590 bge_send_drain(caddr_t arg
)
592 uint_t ring
= 0; /* use ring 0 */
597 BGE_TRACE(("bge_send_drain($%p)", (void *)bgep
));
599 srp
= &bgep
->send
[ring
];
600 bge_send_serial(bgep
, srp
);
602 if (bgep
->tx_resched_needed
&&
603 (srp
->tx_flow
< srp
->tx_buffers_low
) &&
604 (bgep
->bge_mac_state
== BGE_MAC_STARTED
)) {
605 mac_tx_update(bgep
->mh
);
606 bgep
->tx_resched_needed
= B_FALSE
;
610 return (DDI_INTR_CLAIMED
);
614 * bge_m_tx() - send a chain of packets
617 bge_m_tx(void *arg
, mblk_t
*mp
)
619 bge_t
*bgep
= arg
; /* private device info */
622 BGE_TRACE(("bge_m_tx($%p, $%p)", arg
, (void *)mp
));
625 ASSERT(bgep
->bge_mac_state
== BGE_MAC_STARTED
);
627 rw_enter(bgep
->errlock
, RW_READER
);
628 if ((bgep
->bge_chip_state
!= BGE_CHIP_RUNNING
) ||
629 !(bgep
->param_link_up
)) {
630 BGE_DEBUG(("bge_m_tx: chip not running or link down"));
639 if ((mp
= bge_send(bgep
, mp
)) != NULL
) {
646 rw_exit(bgep
->errlock
);