Remove inclusion of <sys/cdefs.h> from kernel .c files.
[dragonfly.git] / sys / dev / netif / mxge / mxge_lro.c
blobfe928781bd629351af631423eae6eb08e06184e3
1 /******************************************************************************
3 Copyright (c) 2007-2008, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: src/sys/dev/mxge/mxge_lro.c,v 1.8 2009/06/23 17:42:06 gallatin Exp $
30 ***************************************************************************/
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/endian.h>
35 #include <sys/mbuf.h>
36 #include <sys/kernel.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/bus.h>
41 #include <net/if.h>
42 #include <net/ethernet.h>
43 #include <net/if_media.h>
45 #include <netinet/in_systm.h>
46 #include <netinet/in.h>
47 #include <netinet/ip.h>
48 #include <netinet/tcp.h>
50 #include <machine/bus.h>
51 #include <machine/in_cksum.h>
53 #include <dev/netif/mxge/mxge_mcp.h>
54 #include <dev/netif/mxge/if_mxge_var.h>
56 #include "opt_inet.h"
58 #ifdef INET
60 /* Assume len is a multiple of 4 */
61 static uint16_t
62 mxge_csum_generic(uint16_t *raw, int len)
64 uint32_t csum;
65 csum = 0;
66 while (len > 0) {
67 csum += *raw;
68 raw++;
69 csum += *raw;
70 raw++;
71 len -= 4;
73 csum = (csum >> 16) + (csum & 0xffff);
74 csum = (csum >> 16) + (csum & 0xffff);
75 return (uint16_t)csum;
79 void
80 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
82 mxge_softc_t *mgp = ss->sc;
83 struct ifnet *ifp;
84 struct ip *ip;
85 struct tcphdr *tcp;
86 uint32_t *ts_ptr;
87 uint32_t tcplen, tcp_csum;
89 if (lro->append_cnt) {
90 /* incorporate the new len into the ip header and
91 * re-calculate the checksum */
92 ip = lro->ip;
93 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
94 ip->ip_sum = 0;
95 ip->ip_sum = 0xffff ^
96 mxge_csum_generic((uint16_t*)ip,
97 sizeof (*ip));
99 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
100 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
101 lro->m_head->m_pkthdr.csum_data = 0xffff;
102 lro->m_head->m_pkthdr.len = lro->len;
104 /* incorporate the latest ack into the tcp header */
105 tcp = (struct tcphdr *) (ip + 1);
106 tcp->th_ack = lro->ack_seq;
107 tcp->th_win = lro->window;
108 /* incorporate latest timestamp into the tcp header */
109 if (lro->timestamp) {
110 ts_ptr = (uint32_t *)(tcp + 1);
111 ts_ptr[1] = htonl(lro->tsval);
112 ts_ptr[2] = lro->tsecr;
115 * update checksum in tcp header by re-calculating the
116 * tcp pseudoheader checksum, and adding it to the checksum
117 * of the tcp payload data
119 tcp->th_sum = 0;
120 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
121 tcp_csum = lro->data_csum;
122 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
123 htons(tcplen + IPPROTO_TCP));
124 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
125 tcp->th_off << 2);
126 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
127 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
128 #if 0
129 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
130 in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
131 htons(tcplen + IPPROTO_TCP)),
132 mxge_csum_generic((uint16_t*)tcp,
133 tcp->th_off << 2),
134 htons(0xffff ^ tcp_csum));
135 #endif
136 tcp->th_sum = 0xffff ^ tcp_csum;
138 ifp = mgp->ifp;
139 (*ifp->if_input)(mgp->ifp, lro->m_head);
140 ss->lro_queued += lro->append_cnt + 1;
141 ss->lro_flushed++;
142 lro->m_head = NULL;
143 lro->timestamp = 0;
144 lro->append_cnt = 0;
145 SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
149 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
151 struct ether_header *eh;
152 struct ip *ip;
153 struct tcphdr *tcp;
154 uint32_t *ts_ptr;
155 struct mbuf *m_nxt, *m_tail;
156 struct lro_entry *lro;
157 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
158 int opt_bytes, trim;
159 uint32_t seq, tmp_csum, device_mtu;
161 eh = mtod(m_head, struct ether_header *);
162 if (eh->ether_type != htons(ETHERTYPE_IP))
163 return 1;
164 ip = (struct ip *) (eh + 1);
165 if (ip->ip_p != IPPROTO_TCP)
166 return 1;
168 /* ensure there are no options */
169 if ((ip->ip_hl << 2) != sizeof (*ip))
170 return -1;
172 /* .. and the packet is not fragmented */
173 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
174 return -1;
176 /* verify that the IP header checksum is correct */
177 tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
178 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
179 ss->lro_bad_csum++;
180 return -1;
183 /* find the TCP header */
184 tcp = (struct tcphdr *) (ip + 1);
186 /* ensure no bits set besides ack or psh */
187 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
188 return -1;
190 /* check for timestamps. Since the only option we handle are
191 timestamps, we only have to handle the simple case of
192 aligned timestamps */
194 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
195 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
196 ts_ptr = (uint32_t *)(tcp + 1);
197 if (opt_bytes != 0) {
198 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
199 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
200 return -1;
203 ip_len = ntohs(ip->ip_len);
204 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
208 * If frame is padded beyond the end of the IP packet,
209 * then we must trim the extra bytes off the end.
211 tot_len = m_head->m_pkthdr.len;
212 trim = tot_len - (ip_len + ETHER_HDR_LEN);
213 if (trim != 0) {
214 if (trim < 0) {
215 /* truncated packet */
216 return -1;
218 m_adj(m_head, -trim);
219 tot_len = m_head->m_pkthdr.len;
222 m_nxt = m_head;
223 m_tail = NULL; /* -Wuninitialized */
224 while (m_nxt != NULL) {
225 m_tail = m_nxt;
226 m_nxt = m_tail->m_next;
229 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
230 seq = ntohl(tcp->th_seq);
232 SLIST_FOREACH(lro, &ss->lro_active, next) {
233 if (lro->source_port == tcp->th_sport &&
234 lro->dest_port == tcp->th_dport &&
235 lro->source_ip == ip->ip_src.s_addr &&
236 lro->dest_ip == ip->ip_dst.s_addr) {
237 /* Try to append it */
239 if (__predict_false(seq != lro->next_seq)) {
240 /* out of order packet */
241 SLIST_REMOVE(&ss->lro_active, lro,
242 lro_entry, next);
243 mxge_lro_flush(ss, lro);
244 return -1;
247 if (opt_bytes) {
248 uint32_t tsval = ntohl(*(ts_ptr + 1));
249 /* make sure timestamp values are increasing */
250 if (__predict_false(lro->tsval > tsval ||
251 *(ts_ptr + 2) == 0)) {
252 return -1;
254 lro->tsval = tsval;
255 lro->tsecr = *(ts_ptr + 2);
258 lro->next_seq += tcp_data_len;
259 lro->ack_seq = tcp->th_ack;
260 lro->window = tcp->th_win;
261 lro->append_cnt++;
262 if (tcp_data_len == 0) {
263 m_freem(m_head);
264 return 0;
266 /* subtract off the checksum of the tcp header
267 * from the hardware checksum, and add it to the
268 * stored tcp data checksum. Byteswap the checksum
269 * if the total length so far is odd
271 tmp_csum = mxge_csum_generic((uint16_t*)tcp,
272 tcp_hdr_len);
273 csum = csum + (tmp_csum ^ 0xffff);
274 csum = (csum & 0xffff) + (csum >> 16);
275 csum = (csum & 0xffff) + (csum >> 16);
276 if (lro->len & 0x1) {
277 /* Odd number of bytes so far, flip bytes */
278 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
280 csum = csum + lro->data_csum;
281 csum = (csum & 0xffff) + (csum >> 16);
282 csum = (csum & 0xffff) + (csum >> 16);
283 lro->data_csum = csum;
285 lro->len += tcp_data_len;
287 /* adjust mbuf so that m->m_data points to
288 the first byte of the payload */
289 m_adj(m_head, hlen);
290 /* append mbuf chain */
291 lro->m_tail->m_next = m_head;
292 /* advance the last pointer */
293 lro->m_tail = m_tail;
294 /* flush packet if required */
295 device_mtu = ss->sc->ifp->if_mtu;
296 if (lro->len > (65535 - device_mtu)) {
297 SLIST_REMOVE(&ss->lro_active, lro,
298 lro_entry, next);
299 mxge_lro_flush(ss, lro);
301 return 0;
305 if (SLIST_EMPTY(&ss->lro_free))
306 return -1;
308 /* start a new chain */
309 lro = SLIST_FIRST(&ss->lro_free);
310 SLIST_REMOVE_HEAD(&ss->lro_free, next);
311 SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
312 lro->source_port = tcp->th_sport;
313 lro->dest_port = tcp->th_dport;
314 lro->source_ip = ip->ip_src.s_addr;
315 lro->dest_ip = ip->ip_dst.s_addr;
316 lro->next_seq = seq + tcp_data_len;
317 lro->mss = tcp_data_len;
318 lro->ack_seq = tcp->th_ack;
319 lro->window = tcp->th_win;
321 /* save the checksum of just the TCP payload by
322 * subtracting off the checksum of the TCP header from
323 * the entire hardware checksum
324 * Since IP header checksum is correct, checksum over
325 * the IP header is -0. Substracting -0 is unnecessary.
327 tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
328 csum = csum + (tmp_csum ^ 0xffff);
329 csum = (csum & 0xffff) + (csum >> 16);
330 csum = (csum & 0xffff) + (csum >> 16);
331 lro->data_csum = csum;
333 lro->ip = ip;
334 /* record timestamp if it is present */
335 if (opt_bytes) {
336 lro->timestamp = 1;
337 lro->tsval = ntohl(*(ts_ptr + 1));
338 lro->tsecr = *(ts_ptr + 2);
340 lro->len = tot_len;
341 lro->m_head = m_head;
342 lro->m_tail = m_tail;
343 return 0;
346 #endif /* INET */
348 This file uses Myri10GE driver indentation.
350 Local Variables:
351 c-file-style:"linux"
352 tab-width:8
353 End: