Merge commit '720b16875295d57e0e6a4e0ec32db4d47412f896'
[unleashed.git] / kernel / drivers / net / myri10ge / myri10ge_lro.c
blob2c98e7d6905417fa48e989729e286577e3cbc560
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include "myri10ge_var.h"
29 #define IP_OFFMASK 0x1fff
30 #define TCPOPT_TIMESTAMP 8
31 #define TCPOLEN_TIMESTAMP 10
32 #define TCPOLEN_TSTAMP_APPA 12
36 * Assume len is a multiple of 4. Note that "raw" must be
37 * suitably aligned. In practice, it will always enter algned on
38 * at least a 4 bytes bounday, due to the alignment of our rx buffers.
40 uint16_t
41 myri10ge_csum_generic(uint16_t *raw, int len)
43 uint32_t csum;
44 csum = 0;
45 while (len > 0) {
46 csum += *raw;
47 raw++;
48 csum += *raw;
49 raw++;
50 len -= 4;
52 csum = (csum >> 16) + (csum & 0xffff);
53 csum = (csum >> 16) + (csum & 0xffff);
54 return ((uint16_t)csum);
57 static uint16_t
58 myri10ge_in_pseudo(unsigned int a, unsigned int b,
59 unsigned int c)
61 uint64_t csum;
63 csum = (uint64_t)a + b + c;
64 csum = (csum >> 16) + (csum & 0xffff);
65 csum = (csum >> 16) + (csum & 0xffff);
66 return ((uint16_t)csum);
69 void
70 myri10ge_lro_flush(struct myri10ge_slice_state *ss, struct lro_entry *lro,
71 struct myri10ge_mblk_list *mbl)
73 struct ip *ip;
74 struct tcphdr *tcp;
75 uint32_t *ts_ptr;
76 uint32_t tcplen, tcp_csum;
78 if (lro->append_cnt) {
80 * incorporate the new len into the ip header and
81 * re-calculate the checksum
83 ip = lro->ip;
84 ip->ip_len = htons(lro->len - ETHERNET_HEADER_SIZE);
85 ip->ip_sum = 0;
86 ip->ip_sum = 0xffff ^
87 myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip));
88 /* incorporate the latest ack into the tcp header */
89 tcp = (struct tcphdr *)(ip + 1);
90 tcp->th_ack = lro->ack_seq;
91 tcp->th_win = lro->window;
92 tcp->th_flags = lro->flags;
93 /* incorporate latest timestamp into the tcp header */
94 if (lro->timestamp) {
95 ts_ptr = (uint32_t *)(tcp + 1);
96 ts_ptr[1] = htonl(lro->tsval);
97 ts_ptr[2] = lro->tsecr;
100 * update checksum in tcp header by re-calculating the
101 * tcp pseudoheader checksum, and adding it to the checksum
102 * of the tcp payload data
104 tcp->th_sum = 0;
105 tcplen = lro->len - sizeof (*ip) - ETHERNET_HEADER_SIZE;
106 tcp_csum = lro->data_csum;
107 tcp_csum += myri10ge_in_pseudo(ip->ip_src.s_addr,
108 ip->ip_dst.s_addr, htons(tcplen + IPPROTO_TCP));
109 tcp_csum += myri10ge_csum_generic((uint16_t *)tcp,
110 tcp->th_off << 2);
111 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
112 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
113 tcp->th_sum = 0xffff ^ tcp_csum;
116 mac_hcksum_set(lro->m_head, 0, 0, 0,
117 0, HCK_IPV4_HDRCKSUM_OK | HCK_FULLCKSUM_OK);
119 mbl->cnt += lro->append_cnt;
120 myri10ge_mbl_append(ss, mbl, lro->m_head);
121 MYRI10GE_SLICE_STAT_INC(lro_flushed);
122 MYRI10GE_SLICE_STAT_ADD(lro_queued, lro->append_cnt + 1);
123 lro->m_head = NULL;
124 lro->timestamp = 0;
125 lro->append_cnt = 0;
126 lro->next = ss->lro_free;
127 ss->lro_free = lro;
131 myri10ge_lro_rx(struct myri10ge_slice_state *ss, mblk_t *m_head,
132 uint32_t csum, struct myri10ge_mblk_list *mbl)
134 struct ether_header *eh;
135 struct ip *ip;
136 struct tcphdr *tcp;
137 uint32_t *ts_ptr;
138 struct lro_entry *lro, *curr;
139 int hlen, ip_len, tcp_hdr_len, tcp_data_len;
140 int opt_bytes, trim;
141 int tot_len = MBLKL(m_head);
142 uint32_t seq, tmp_csum;
144 eh = (struct ether_header *)(void *)m_head->b_rptr;
145 if (eh->ether_type != htons(ETHERTYPE_IP))
146 return (EINVAL);
147 ip = (struct ip *)(void *)(eh + 1);
148 if (ip->ip_p != IPPROTO_TCP)
149 return (EINVAL);
151 /* ensure there are no options */
152 if ((ip->ip_hl << 2) != sizeof (*ip))
153 return (EINVAL);
155 /* .. and the packet is not fragmented */
156 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
157 return (EINVAL);
159 /* verify that the IP header checksum is correct */
160 tmp_csum = myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip));
161 if (unlikely((tmp_csum ^ 0xffff) != 0)) {
162 MYRI10GE_SLICE_STAT_INC(lro_bad_csum);
163 return (EINVAL);
166 /* find the TCP header */
167 tcp = (struct tcphdr *)(ip + 1);
169 /* ensure no bits set besides ack or psh */
170 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
171 return (EINVAL);
174 * check for timestamps. Since the only option we handle are
175 * timestamps, we only have to handle the simple case of
176 * aligned timestamps
179 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
180 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
181 ts_ptr = (uint32_t *)(tcp + 1);
182 if (opt_bytes != 0) {
183 if (unlikely(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
184 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
185 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
186 return (EINVAL);
189 ip_len = ntohs(ip->ip_len);
190 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
193 * If frame is padded beyond the end of the IP packet,
194 * then we must trim the extra bytes off the end.
196 trim = tot_len - (ip_len + ETHERNET_HEADER_SIZE);
197 if (trim != 0) {
198 if (trim < 0) {
199 /* truncated packet */
200 return (EINVAL);
202 m_head->b_wptr -= trim;
203 tot_len -= trim;
206 /* Verify TCP checksum */
207 csum = ntohs((uint16_t)csum);
208 tmp_csum = csum + myri10ge_in_pseudo(ip->ip_src.s_addr,
209 ip->ip_dst.s_addr, htons(tcp_hdr_len + tcp_data_len + IPPROTO_TCP));
210 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16);
211 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16);
212 if (tmp_csum != 0xffff) {
213 MYRI10GE_SLICE_STAT_INC(lro_bad_csum);
214 return (EINVAL);
217 hlen = ip_len + ETHERNET_HEADER_SIZE - tcp_data_len;
218 seq = ntohl(tcp->th_seq);
220 for (lro = ss->lro_active; lro != NULL; lro = lro->next) {
221 if (lro->source_port == tcp->th_sport &&
222 lro->dest_port == tcp->th_dport &&
223 lro->source_ip == ip->ip_src.s_addr &&
224 lro->dest_ip == ip->ip_dst.s_addr) {
225 /* Try to append it */
227 if (unlikely(seq != lro->next_seq)) {
228 /* out of order packet */
229 if (ss->lro_active == lro) {
230 ss->lro_active = lro->next;
231 } else {
232 curr = ss->lro_active;
233 while (curr->next != lro)
234 curr = curr->next;
235 curr->next = lro->next;
237 myri10ge_lro_flush(ss, lro, mbl);
238 return (EINVAL);
241 if (opt_bytes) {
242 uint32_t tsval = ntohl(*(ts_ptr + 1));
243 /* make sure timestamp values are increasing */
244 if (unlikely(lro->tsval > tsval ||
245 *(ts_ptr + 2) == 0)) {
246 return (-8);
248 lro->tsval = tsval;
249 lro->tsecr = *(ts_ptr + 2);
252 lro->next_seq += tcp_data_len;
253 lro->ack_seq = tcp->th_ack;
254 lro->window = tcp->th_win;
255 lro->flags |= tcp->th_flags;
256 lro->append_cnt++;
257 if (tcp_data_len == 0) {
258 freeb(m_head);
259 return (0);
262 * subtract off the checksum of the tcp header
263 * from the hardware checksum, and add it to
264 * the stored tcp data checksum. Byteswap
265 * the checksum if the total length so far is
266 * odd
268 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp,
269 tcp_hdr_len);
270 csum = csum + (tmp_csum ^ 0xffff);
271 csum = (csum & 0xffff) + (csum >> 16);
272 csum = (csum & 0xffff) + (csum >> 16);
273 if (lro->len & 0x1) {
274 /* Odd number of bytes so far, flip bytes */
275 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
277 csum = csum + lro->data_csum;
278 csum = (csum & 0xffff) + (csum >> 16);
279 csum = (csum & 0xffff) + (csum >> 16);
280 lro->data_csum = csum;
282 lro->len += tcp_data_len;
285 * adjust mblk so that rptr points to
286 * the first byte of the payload
288 m_head->b_rptr += hlen;
289 /* append mbuf chain */
290 lro->m_tail->b_cont = m_head;
291 /* advance the last pointer */
292 lro->m_tail = m_head;
293 /* flush packet if required */
294 if (lro->len > (65535 - myri10ge_mtu) ||
295 (lro->append_cnt + 1) == myri10ge_lro_max_aggr) {
296 if (ss->lro_active == lro) {
297 ss->lro_active = lro->next;
298 } else {
299 curr = ss->lro_active;
300 while (curr->next != lro)
301 curr = curr->next;
302 curr->next = lro->next;
304 myri10ge_lro_flush(ss, lro, mbl);
306 return (0);
310 if (ss->lro_free == NULL)
311 return (ENOMEM);
313 /* start a new chain */
314 lro = ss->lro_free;
315 ss->lro_free = lro->next;
316 lro->next = ss->lro_active;
317 ss->lro_active = lro;
318 lro->source_port = tcp->th_sport;
319 lro->dest_port = tcp->th_dport;
320 lro->source_ip = ip->ip_src.s_addr;
321 lro->dest_ip = ip->ip_dst.s_addr;
322 lro->next_seq = seq + tcp_data_len;
323 lro->mss = (uint16_t)tcp_data_len;
324 lro->ack_seq = tcp->th_ack;
325 lro->window = tcp->th_win;
326 lro->flags = tcp->th_flags;
329 * save the checksum of just the TCP payload by
330 * subtracting off the checksum of the TCP header from
331 * the entire hardware checksum
332 * Since IP header checksum is correct, checksum over
333 * the IP header is -0. Substracting -0 is unnecessary.
335 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp, tcp_hdr_len);
336 csum = csum + (tmp_csum ^ 0xffff);
337 csum = (csum & 0xffff) + (csum >> 16);
338 csum = (csum & 0xffff) + (csum >> 16);
339 lro->data_csum = csum;
340 lro->ip = ip;
342 /* record timestamp if it is present */
343 if (opt_bytes) {
344 lro->timestamp = 1;
345 lro->tsval = ntohl(*(ts_ptr + 1));
346 lro->tsecr = *(ts_ptr + 2);
348 lro->len = tot_len;
349 lro->m_head = m_head;
350 lro->m_tail = m_head;
351 return (0);
355 * This file uses MyriGE driver indentation.
357 * Local Variables:
358 * c-file-style:"sun"
359 * tab-width:8
360 * End: