4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include "myri10ge_var.h"
29 #define IP_OFFMASK 0x1fff
30 #define TCPOPT_TIMESTAMP 8
31 #define TCPOLEN_TIMESTAMP 10
32 #define TCPOLEN_TSTAMP_APPA 12
36 * Assume len is a multiple of 4. Note that "raw" must be
37 * suitably aligned. In practice, it will always enter algned on
38 * at least a 4 bytes bounday, due to the alignment of our rx buffers.
41 myri10ge_csum_generic(uint16_t *raw
, int len
)
52 csum
= (csum
>> 16) + (csum
& 0xffff);
53 csum
= (csum
>> 16) + (csum
& 0xffff);
54 return ((uint16_t)csum
);
58 myri10ge_in_pseudo(unsigned int a
, unsigned int b
,
63 csum
= (uint64_t)a
+ b
+ c
;
64 csum
= (csum
>> 16) + (csum
& 0xffff);
65 csum
= (csum
>> 16) + (csum
& 0xffff);
66 return ((uint16_t)csum
);
70 myri10ge_lro_flush(struct myri10ge_slice_state
*ss
, struct lro_entry
*lro
,
71 struct myri10ge_mblk_list
*mbl
)
76 uint32_t tcplen
, tcp_csum
;
78 if (lro
->append_cnt
) {
80 * incorporate the new len into the ip header and
81 * re-calculate the checksum
84 ip
->ip_len
= htons(lro
->len
- ETHERNET_HEADER_SIZE
);
87 myri10ge_csum_generic((uint16_t *)ip
, sizeof (*ip
));
88 /* incorporate the latest ack into the tcp header */
89 tcp
= (struct tcphdr
*)(ip
+ 1);
90 tcp
->th_ack
= lro
->ack_seq
;
91 tcp
->th_win
= lro
->window
;
92 tcp
->th_flags
= lro
->flags
;
93 /* incorporate latest timestamp into the tcp header */
95 ts_ptr
= (uint32_t *)(tcp
+ 1);
96 ts_ptr
[1] = htonl(lro
->tsval
);
97 ts_ptr
[2] = lro
->tsecr
;
100 * update checksum in tcp header by re-calculating the
101 * tcp pseudoheader checksum, and adding it to the checksum
102 * of the tcp payload data
105 tcplen
= lro
->len
- sizeof (*ip
) - ETHERNET_HEADER_SIZE
;
106 tcp_csum
= lro
->data_csum
;
107 tcp_csum
+= myri10ge_in_pseudo(ip
->ip_src
.s_addr
,
108 ip
->ip_dst
.s_addr
, htons(tcplen
+ IPPROTO_TCP
));
109 tcp_csum
+= myri10ge_csum_generic((uint16_t *)tcp
,
111 tcp_csum
= (tcp_csum
& 0xffff) + (tcp_csum
>> 16);
112 tcp_csum
= (tcp_csum
& 0xffff) + (tcp_csum
>> 16);
113 tcp
->th_sum
= 0xffff ^ tcp_csum
;
116 mac_hcksum_set(lro
->m_head
, 0, 0, 0,
117 0, HCK_IPV4_HDRCKSUM_OK
| HCK_FULLCKSUM_OK
);
119 mbl
->cnt
+= lro
->append_cnt
;
120 myri10ge_mbl_append(ss
, mbl
, lro
->m_head
);
121 MYRI10GE_SLICE_STAT_INC(lro_flushed
);
122 MYRI10GE_SLICE_STAT_ADD(lro_queued
, lro
->append_cnt
+ 1);
126 lro
->next
= ss
->lro_free
;
131 myri10ge_lro_rx(struct myri10ge_slice_state
*ss
, mblk_t
*m_head
,
132 uint32_t csum
, struct myri10ge_mblk_list
*mbl
)
134 struct ether_header
*eh
;
138 struct lro_entry
*lro
, *curr
;
139 int hlen
, ip_len
, tcp_hdr_len
, tcp_data_len
;
141 int tot_len
= MBLKL(m_head
);
142 uint32_t seq
, tmp_csum
;
144 eh
= (struct ether_header
*)(void *)m_head
->b_rptr
;
145 if (eh
->ether_type
!= htons(ETHERTYPE_IP
))
147 ip
= (struct ip
*)(void *)(eh
+ 1);
148 if (ip
->ip_p
!= IPPROTO_TCP
)
151 /* ensure there are no options */
152 if ((ip
->ip_hl
<< 2) != sizeof (*ip
))
155 /* .. and the packet is not fragmented */
156 if (ip
->ip_off
& htons(IP_MF
|IP_OFFMASK
))
159 /* verify that the IP header checksum is correct */
160 tmp_csum
= myri10ge_csum_generic((uint16_t *)ip
, sizeof (*ip
));
161 if (unlikely((tmp_csum
^ 0xffff) != 0)) {
162 MYRI10GE_SLICE_STAT_INC(lro_bad_csum
);
166 /* find the TCP header */
167 tcp
= (struct tcphdr
*)(ip
+ 1);
169 /* ensure no bits set besides ack or psh */
170 if ((tcp
->th_flags
& ~(TH_ACK
| TH_PUSH
)) != 0)
174 * check for timestamps. Since the only option we handle are
175 * timestamps, we only have to handle the simple case of
179 opt_bytes
= (tcp
->th_off
<< 2) - sizeof (*tcp
);
180 tcp_hdr_len
= sizeof (*tcp
) + opt_bytes
;
181 ts_ptr
= (uint32_t *)(tcp
+ 1);
182 if (opt_bytes
!= 0) {
183 if (unlikely(opt_bytes
!= TCPOLEN_TSTAMP_APPA
) ||
184 (*ts_ptr
!= ntohl(TCPOPT_NOP
<<24|TCPOPT_NOP
<<16|
185 TCPOPT_TIMESTAMP
<<8|TCPOLEN_TIMESTAMP
)))
189 ip_len
= ntohs(ip
->ip_len
);
190 tcp_data_len
= ip_len
- (tcp
->th_off
<< 2) - sizeof (*ip
);
193 * If frame is padded beyond the end of the IP packet,
194 * then we must trim the extra bytes off the end.
196 trim
= tot_len
- (ip_len
+ ETHERNET_HEADER_SIZE
);
199 /* truncated packet */
202 m_head
->b_wptr
-= trim
;
206 /* Verify TCP checksum */
207 csum
= ntohs((uint16_t)csum
);
208 tmp_csum
= csum
+ myri10ge_in_pseudo(ip
->ip_src
.s_addr
,
209 ip
->ip_dst
.s_addr
, htons(tcp_hdr_len
+ tcp_data_len
+ IPPROTO_TCP
));
210 tmp_csum
= (tmp_csum
& 0xffff) + (tmp_csum
>> 16);
211 tmp_csum
= (tmp_csum
& 0xffff) + (tmp_csum
>> 16);
212 if (tmp_csum
!= 0xffff) {
213 MYRI10GE_SLICE_STAT_INC(lro_bad_csum
);
217 hlen
= ip_len
+ ETHERNET_HEADER_SIZE
- tcp_data_len
;
218 seq
= ntohl(tcp
->th_seq
);
220 for (lro
= ss
->lro_active
; lro
!= NULL
; lro
= lro
->next
) {
221 if (lro
->source_port
== tcp
->th_sport
&&
222 lro
->dest_port
== tcp
->th_dport
&&
223 lro
->source_ip
== ip
->ip_src
.s_addr
&&
224 lro
->dest_ip
== ip
->ip_dst
.s_addr
) {
225 /* Try to append it */
227 if (unlikely(seq
!= lro
->next_seq
)) {
228 /* out of order packet */
229 if (ss
->lro_active
== lro
) {
230 ss
->lro_active
= lro
->next
;
232 curr
= ss
->lro_active
;
233 while (curr
->next
!= lro
)
235 curr
->next
= lro
->next
;
237 myri10ge_lro_flush(ss
, lro
, mbl
);
242 uint32_t tsval
= ntohl(*(ts_ptr
+ 1));
243 /* make sure timestamp values are increasing */
244 if (unlikely(lro
->tsval
> tsval
||
245 *(ts_ptr
+ 2) == 0)) {
249 lro
->tsecr
= *(ts_ptr
+ 2);
252 lro
->next_seq
+= tcp_data_len
;
253 lro
->ack_seq
= tcp
->th_ack
;
254 lro
->window
= tcp
->th_win
;
255 lro
->flags
|= tcp
->th_flags
;
257 if (tcp_data_len
== 0) {
262 * subtract off the checksum of the tcp header
263 * from the hardware checksum, and add it to
264 * the stored tcp data checksum. Byteswap
265 * the checksum if the total length so far is
268 tmp_csum
= myri10ge_csum_generic((uint16_t *)tcp
,
270 csum
= csum
+ (tmp_csum
^ 0xffff);
271 csum
= (csum
& 0xffff) + (csum
>> 16);
272 csum
= (csum
& 0xffff) + (csum
>> 16);
273 if (lro
->len
& 0x1) {
274 /* Odd number of bytes so far, flip bytes */
275 csum
= ((csum
<< 8) | (csum
>> 8)) & 0xffff;
277 csum
= csum
+ lro
->data_csum
;
278 csum
= (csum
& 0xffff) + (csum
>> 16);
279 csum
= (csum
& 0xffff) + (csum
>> 16);
280 lro
->data_csum
= csum
;
282 lro
->len
+= tcp_data_len
;
285 * adjust mblk so that rptr points to
286 * the first byte of the payload
288 m_head
->b_rptr
+= hlen
;
289 /* append mbuf chain */
290 lro
->m_tail
->b_cont
= m_head
;
291 /* advance the last pointer */
292 lro
->m_tail
= m_head
;
293 /* flush packet if required */
294 if (lro
->len
> (65535 - myri10ge_mtu
) ||
295 (lro
->append_cnt
+ 1) == myri10ge_lro_max_aggr
) {
296 if (ss
->lro_active
== lro
) {
297 ss
->lro_active
= lro
->next
;
299 curr
= ss
->lro_active
;
300 while (curr
->next
!= lro
)
302 curr
->next
= lro
->next
;
304 myri10ge_lro_flush(ss
, lro
, mbl
);
310 if (ss
->lro_free
== NULL
)
313 /* start a new chain */
315 ss
->lro_free
= lro
->next
;
316 lro
->next
= ss
->lro_active
;
317 ss
->lro_active
= lro
;
318 lro
->source_port
= tcp
->th_sport
;
319 lro
->dest_port
= tcp
->th_dport
;
320 lro
->source_ip
= ip
->ip_src
.s_addr
;
321 lro
->dest_ip
= ip
->ip_dst
.s_addr
;
322 lro
->next_seq
= seq
+ tcp_data_len
;
323 lro
->mss
= (uint16_t)tcp_data_len
;
324 lro
->ack_seq
= tcp
->th_ack
;
325 lro
->window
= tcp
->th_win
;
326 lro
->flags
= tcp
->th_flags
;
329 * save the checksum of just the TCP payload by
330 * subtracting off the checksum of the TCP header from
331 * the entire hardware checksum
332 * Since IP header checksum is correct, checksum over
333 * the IP header is -0. Substracting -0 is unnecessary.
335 tmp_csum
= myri10ge_csum_generic((uint16_t *)tcp
, tcp_hdr_len
);
336 csum
= csum
+ (tmp_csum
^ 0xffff);
337 csum
= (csum
& 0xffff) + (csum
>> 16);
338 csum
= (csum
& 0xffff) + (csum
>> 16);
339 lro
->data_csum
= csum
;
342 /* record timestamp if it is present */
345 lro
->tsval
= ntohl(*(ts_ptr
+ 1));
346 lro
->tsecr
= *(ts_ptr
+ 2);
349 lro
->m_head
= m_head
;
350 lro
->m_tail
= m_head
;
355 * This file uses MyriGE driver indentation.