4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
24 * Use is subject to license terms.
27 static const char __idstring
[] =
28 "@(#)$Id: myri10ge_lro.c,v 1.7 2009-06-29 13:47:22 gallatin Exp $";
30 #include "myri10ge_var.h"
32 #define IP_OFFMASK 0x1fff
33 #define TCPOPT_TIMESTAMP 8
34 #define TCPOLEN_TIMESTAMP 10
35 #define TCPOLEN_TSTAMP_APPA 12
39 * Assume len is a multiple of 4. Note that "raw" must be
40 * suitably aligned. In practice, it will always enter algned on
41 * at least a 4 bytes bounday, due to the alignment of our rx buffers.
44 myri10ge_csum_generic(uint16_t *raw
, int len
)
55 csum
= (csum
>> 16) + (csum
& 0xffff);
56 csum
= (csum
>> 16) + (csum
& 0xffff);
57 return ((uint16_t)csum
);
61 myri10ge_in_pseudo(unsigned int a
, unsigned int b
,
66 csum
= (uint64_t)a
+ b
+ c
;
67 csum
= (csum
>> 16) + (csum
& 0xffff);
68 csum
= (csum
>> 16) + (csum
& 0xffff);
69 return ((uint16_t)csum
);
73 myri10ge_lro_flush(struct myri10ge_slice_state
*ss
, struct lro_entry
*lro
,
74 struct myri10ge_mblk_list
*mbl
)
79 uint32_t tcplen
, tcp_csum
;
81 if (lro
->append_cnt
) {
83 * incorporate the new len into the ip header and
84 * re-calculate the checksum
87 ip
->ip_len
= htons(lro
->len
- ETHERNET_HEADER_SIZE
);
90 myri10ge_csum_generic((uint16_t *)ip
, sizeof (*ip
));
91 /* incorporate the latest ack into the tcp header */
92 tcp
= (struct tcphdr
*)(ip
+ 1);
93 tcp
->th_ack
= lro
->ack_seq
;
94 tcp
->th_win
= lro
->window
;
95 tcp
->th_flags
= lro
->flags
;
96 /* incorporate latest timestamp into the tcp header */
98 ts_ptr
= (uint32_t *)(tcp
+ 1);
99 ts_ptr
[1] = htonl(lro
->tsval
);
100 ts_ptr
[2] = lro
->tsecr
;
103 * update checksum in tcp header by re-calculating the
104 * tcp pseudoheader checksum, and adding it to the checksum
105 * of the tcp payload data
108 tcplen
= lro
->len
- sizeof (*ip
) - ETHERNET_HEADER_SIZE
;
109 tcp_csum
= lro
->data_csum
;
110 tcp_csum
+= myri10ge_in_pseudo(ip
->ip_src
.s_addr
,
111 ip
->ip_dst
.s_addr
, htons(tcplen
+ IPPROTO_TCP
));
112 tcp_csum
+= myri10ge_csum_generic((uint16_t *)tcp
,
114 tcp_csum
= (tcp_csum
& 0xffff) + (tcp_csum
>> 16);
115 tcp_csum
= (tcp_csum
& 0xffff) + (tcp_csum
>> 16);
116 tcp
->th_sum
= 0xffff ^ tcp_csum
;
119 mac_hcksum_set(lro
->m_head
, 0, 0, 0,
120 0, HCK_IPV4_HDRCKSUM_OK
| HCK_FULLCKSUM_OK
);
122 mbl
->cnt
+= lro
->append_cnt
;
123 myri10ge_mbl_append(ss
, mbl
, lro
->m_head
);
124 MYRI10GE_SLICE_STAT_INC(lro_flushed
);
125 MYRI10GE_SLICE_STAT_ADD(lro_queued
, lro
->append_cnt
+ 1);
129 lro
->next
= ss
->lro_free
;
134 myri10ge_lro_rx(struct myri10ge_slice_state
*ss
, mblk_t
*m_head
,
135 uint32_t csum
, struct myri10ge_mblk_list
*mbl
)
137 struct ether_header
*eh
;
141 struct lro_entry
*lro
, *curr
;
142 int hlen
, ip_len
, tcp_hdr_len
, tcp_data_len
;
144 int tot_len
= MBLKL(m_head
);
145 uint32_t seq
, tmp_csum
;
147 eh
= (struct ether_header
*)(void *)m_head
->b_rptr
;
148 if (eh
->ether_type
!= htons(ETHERTYPE_IP
))
150 ip
= (struct ip
*)(void *)(eh
+ 1);
151 if (ip
->ip_p
!= IPPROTO_TCP
)
154 /* ensure there are no options */
155 if ((ip
->ip_hl
<< 2) != sizeof (*ip
))
158 /* .. and the packet is not fragmented */
159 if (ip
->ip_off
& htons(IP_MF
|IP_OFFMASK
))
162 /* verify that the IP header checksum is correct */
163 tmp_csum
= myri10ge_csum_generic((uint16_t *)ip
, sizeof (*ip
));
164 if (unlikely((tmp_csum
^ 0xffff) != 0)) {
165 MYRI10GE_SLICE_STAT_INC(lro_bad_csum
);
169 /* find the TCP header */
170 tcp
= (struct tcphdr
*)(ip
+ 1);
172 /* ensure no bits set besides ack or psh */
173 if ((tcp
->th_flags
& ~(TH_ACK
| TH_PUSH
)) != 0)
177 * check for timestamps. Since the only option we handle are
178 * timestamps, we only have to handle the simple case of
182 opt_bytes
= (tcp
->th_off
<< 2) - sizeof (*tcp
);
183 tcp_hdr_len
= sizeof (*tcp
) + opt_bytes
;
184 ts_ptr
= (uint32_t *)(tcp
+ 1);
185 if (opt_bytes
!= 0) {
186 if (unlikely(opt_bytes
!= TCPOLEN_TSTAMP_APPA
) ||
187 (*ts_ptr
!= ntohl(TCPOPT_NOP
<<24|TCPOPT_NOP
<<16|
188 TCPOPT_TIMESTAMP
<<8|TCPOLEN_TIMESTAMP
)))
192 ip_len
= ntohs(ip
->ip_len
);
193 tcp_data_len
= ip_len
- (tcp
->th_off
<< 2) - sizeof (*ip
);
196 * If frame is padded beyond the end of the IP packet,
197 * then we must trim the extra bytes off the end.
199 trim
= tot_len
- (ip_len
+ ETHERNET_HEADER_SIZE
);
202 /* truncated packet */
205 m_head
->b_wptr
-= trim
;
209 /* Verify TCP checksum */
210 csum
= ntohs((uint16_t)csum
);
211 tmp_csum
= csum
+ myri10ge_in_pseudo(ip
->ip_src
.s_addr
,
212 ip
->ip_dst
.s_addr
, htons(tcp_hdr_len
+ tcp_data_len
+ IPPROTO_TCP
));
213 tmp_csum
= (tmp_csum
& 0xffff) + (tmp_csum
>> 16);
214 tmp_csum
= (tmp_csum
& 0xffff) + (tmp_csum
>> 16);
215 if (tmp_csum
!= 0xffff) {
216 MYRI10GE_SLICE_STAT_INC(lro_bad_csum
);
220 hlen
= ip_len
+ ETHERNET_HEADER_SIZE
- tcp_data_len
;
221 seq
= ntohl(tcp
->th_seq
);
223 for (lro
= ss
->lro_active
; lro
!= NULL
; lro
= lro
->next
) {
224 if (lro
->source_port
== tcp
->th_sport
&&
225 lro
->dest_port
== tcp
->th_dport
&&
226 lro
->source_ip
== ip
->ip_src
.s_addr
&&
227 lro
->dest_ip
== ip
->ip_dst
.s_addr
) {
228 /* Try to append it */
230 if (unlikely(seq
!= lro
->next_seq
)) {
231 /* out of order packet */
232 if (ss
->lro_active
== lro
) {
233 ss
->lro_active
= lro
->next
;
235 curr
= ss
->lro_active
;
236 while (curr
->next
!= lro
)
238 curr
->next
= lro
->next
;
240 myri10ge_lro_flush(ss
, lro
, mbl
);
245 uint32_t tsval
= ntohl(*(ts_ptr
+ 1));
246 /* make sure timestamp values are increasing */
247 if (unlikely(lro
->tsval
> tsval
||
248 *(ts_ptr
+ 2) == 0)) {
252 lro
->tsecr
= *(ts_ptr
+ 2);
255 lro
->next_seq
+= tcp_data_len
;
256 lro
->ack_seq
= tcp
->th_ack
;
257 lro
->window
= tcp
->th_win
;
258 lro
->flags
|= tcp
->th_flags
;
260 if (tcp_data_len
== 0) {
265 * subtract off the checksum of the tcp header
266 * from the hardware checksum, and add it to
267 * the stored tcp data checksum. Byteswap
268 * the checksum if the total length so far is
271 tmp_csum
= myri10ge_csum_generic((uint16_t *)tcp
,
273 csum
= csum
+ (tmp_csum
^ 0xffff);
274 csum
= (csum
& 0xffff) + (csum
>> 16);
275 csum
= (csum
& 0xffff) + (csum
>> 16);
276 if (lro
->len
& 0x1) {
277 /* Odd number of bytes so far, flip bytes */
278 csum
= ((csum
<< 8) | (csum
>> 8)) & 0xffff;
280 csum
= csum
+ lro
->data_csum
;
281 csum
= (csum
& 0xffff) + (csum
>> 16);
282 csum
= (csum
& 0xffff) + (csum
>> 16);
283 lro
->data_csum
= csum
;
285 lro
->len
+= tcp_data_len
;
288 * adjust mblk so that rptr points to
289 * the first byte of the payload
291 m_head
->b_rptr
+= hlen
;
292 /* append mbuf chain */
293 lro
->m_tail
->b_cont
= m_head
;
294 /* advance the last pointer */
295 lro
->m_tail
= m_head
;
296 /* flush packet if required */
297 if (lro
->len
> (65535 - myri10ge_mtu
) ||
298 (lro
->append_cnt
+ 1) == myri10ge_lro_max_aggr
) {
299 if (ss
->lro_active
== lro
) {
300 ss
->lro_active
= lro
->next
;
302 curr
= ss
->lro_active
;
303 while (curr
->next
!= lro
)
305 curr
->next
= lro
->next
;
307 myri10ge_lro_flush(ss
, lro
, mbl
);
313 if (ss
->lro_free
== NULL
)
316 /* start a new chain */
318 ss
->lro_free
= lro
->next
;
319 lro
->next
= ss
->lro_active
;
320 ss
->lro_active
= lro
;
321 lro
->source_port
= tcp
->th_sport
;
322 lro
->dest_port
= tcp
->th_dport
;
323 lro
->source_ip
= ip
->ip_src
.s_addr
;
324 lro
->dest_ip
= ip
->ip_dst
.s_addr
;
325 lro
->next_seq
= seq
+ tcp_data_len
;
326 lro
->mss
= (uint16_t)tcp_data_len
;
327 lro
->ack_seq
= tcp
->th_ack
;
328 lro
->window
= tcp
->th_win
;
329 lro
->flags
= tcp
->th_flags
;
332 * save the checksum of just the TCP payload by
333 * subtracting off the checksum of the TCP header from
334 * the entire hardware checksum
335 * Since IP header checksum is correct, checksum over
336 * the IP header is -0. Substracting -0 is unnecessary.
338 tmp_csum
= myri10ge_csum_generic((uint16_t *)tcp
, tcp_hdr_len
);
339 csum
= csum
+ (tmp_csum
^ 0xffff);
340 csum
= (csum
& 0xffff) + (csum
>> 16);
341 csum
= (csum
& 0xffff) + (csum
>> 16);
342 lro
->data_csum
= csum
;
345 /* record timestamp if it is present */
348 lro
->tsval
= ntohl(*(ts_ptr
+ 1));
349 lro
->tsecr
= *(ts_ptr
+ 2);
352 lro
->m_head
= m_head
;
353 lro
->m_tail
= m_head
;
358 * This file uses MyriGE driver indentation.