get mxge to build, stage 1/many
[dragonfly.git] / sys / dev / netif / mxge / mxge_lro.c
blob784803cf2f61fa0313b62439cb9b7839947cd7f3
1 /******************************************************************************
3 Copyright (c) 2007-2008, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/mxge_lro.c,v 1.8 2009/06/23 17:42:06 gallatin Exp $");*/
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/endian.h>
36 #include <sys/mbuf.h>
37 #include <sys/kernel.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/bus.h>
42 #include <net/if.h>
43 #include <net/ethernet.h>
44 #include <net/if_media.h>
46 #include <netinet/in_systm.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/tcp.h>
51 #include <machine/bus.h>
52 #include <machine/in_cksum.h>
54 #include <dev/mxge/mxge_mcp.h>
55 #include <dev/mxge/if_mxge_var.h>
57 #include "opt_inet.h"
59 #ifdef INET
61 /* Assume len is a multiple of 4 */
62 static uint16_t
63 mxge_csum_generic(uint16_t *raw, int len)
65 uint32_t csum;
66 csum = 0;
67 while (len > 0) {
68 csum += *raw;
69 raw++;
70 csum += *raw;
71 raw++;
72 len -= 4;
74 csum = (csum >> 16) + (csum & 0xffff);
75 csum = (csum >> 16) + (csum & 0xffff);
76 return (uint16_t)csum;
80 void
81 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
83 mxge_softc_t *mgp = ss->sc;
84 struct ifnet *ifp;
85 struct ip *ip;
86 struct tcphdr *tcp;
87 uint32_t *ts_ptr;
88 uint32_t tcplen, tcp_csum;
90 if (lro->append_cnt) {
91 /* incorporate the new len into the ip header and
92 * re-calculate the checksum */
93 ip = lro->ip;
94 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
95 ip->ip_sum = 0;
96 ip->ip_sum = 0xffff ^
97 mxge_csum_generic((uint16_t*)ip,
98 sizeof (*ip));
100 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
101 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
102 lro->m_head->m_pkthdr.csum_data = 0xffff;
103 lro->m_head->m_pkthdr.len = lro->len;
105 /* incorporate the latest ack into the tcp header */
106 tcp = (struct tcphdr *) (ip + 1);
107 tcp->th_ack = lro->ack_seq;
108 tcp->th_win = lro->window;
109 /* incorporate latest timestamp into the tcp header */
110 if (lro->timestamp) {
111 ts_ptr = (uint32_t *)(tcp + 1);
112 ts_ptr[1] = htonl(lro->tsval);
113 ts_ptr[2] = lro->tsecr;
116 * update checksum in tcp header by re-calculating the
117 * tcp pseudoheader checksum, and adding it to the checksum
118 * of the tcp payload data
120 tcp->th_sum = 0;
121 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
122 tcp_csum = lro->data_csum;
123 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
124 htons(tcplen + IPPROTO_TCP));
125 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
126 tcp->th_off << 2);
127 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
128 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
129 #if 0
130 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
131 in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
132 htons(tcplen + IPPROTO_TCP)),
133 mxge_csum_generic((uint16_t*)tcp,
134 tcp->th_off << 2),
135 htons(0xffff ^ tcp_csum));
136 #endif
137 tcp->th_sum = 0xffff ^ tcp_csum;
139 ifp = mgp->ifp;
140 (*ifp->if_input)(mgp->ifp, lro->m_head);
141 ss->lro_queued += lro->append_cnt + 1;
142 ss->lro_flushed++;
143 lro->m_head = NULL;
144 lro->timestamp = 0;
145 lro->append_cnt = 0;
146 SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
150 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
152 struct ether_header *eh;
153 struct ip *ip;
154 struct tcphdr *tcp;
155 uint32_t *ts_ptr;
156 struct mbuf *m_nxt, *m_tail;
157 struct lro_entry *lro;
158 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
159 int opt_bytes, trim;
160 uint32_t seq, tmp_csum, device_mtu;
162 eh = mtod(m_head, struct ether_header *);
163 if (eh->ether_type != htons(ETHERTYPE_IP))
164 return 1;
165 ip = (struct ip *) (eh + 1);
166 if (ip->ip_p != IPPROTO_TCP)
167 return 1;
169 /* ensure there are no options */
170 if ((ip->ip_hl << 2) != sizeof (*ip))
171 return -1;
173 /* .. and the packet is not fragmented */
174 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
175 return -1;
177 /* verify that the IP header checksum is correct */
178 tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
179 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
180 ss->lro_bad_csum++;
181 return -1;
184 /* find the TCP header */
185 tcp = (struct tcphdr *) (ip + 1);
187 /* ensure no bits set besides ack or psh */
188 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
189 return -1;
191 /* check for timestamps. Since the only option we handle are
192 timestamps, we only have to handle the simple case of
193 aligned timestamps */
195 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
196 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
197 ts_ptr = (uint32_t *)(tcp + 1);
198 if (opt_bytes != 0) {
199 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
200 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
201 return -1;
204 ip_len = ntohs(ip->ip_len);
205 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
209 * If frame is padded beyond the end of the IP packet,
210 * then we must trim the extra bytes off the end.
212 tot_len = m_head->m_pkthdr.len;
213 trim = tot_len - (ip_len + ETHER_HDR_LEN);
214 if (trim != 0) {
215 if (trim < 0) {
216 /* truncated packet */
217 return -1;
219 m_adj(m_head, -trim);
220 tot_len = m_head->m_pkthdr.len;
223 m_nxt = m_head;
224 m_tail = NULL; /* -Wuninitialized */
225 while (m_nxt != NULL) {
226 m_tail = m_nxt;
227 m_nxt = m_tail->m_next;
230 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
231 seq = ntohl(tcp->th_seq);
233 SLIST_FOREACH(lro, &ss->lro_active, next) {
234 if (lro->source_port == tcp->th_sport &&
235 lro->dest_port == tcp->th_dport &&
236 lro->source_ip == ip->ip_src.s_addr &&
237 lro->dest_ip == ip->ip_dst.s_addr) {
238 /* Try to append it */
240 if (__predict_false(seq != lro->next_seq)) {
241 /* out of order packet */
242 SLIST_REMOVE(&ss->lro_active, lro,
243 lro_entry, next);
244 mxge_lro_flush(ss, lro);
245 return -1;
248 if (opt_bytes) {
249 uint32_t tsval = ntohl(*(ts_ptr + 1));
250 /* make sure timestamp values are increasing */
251 if (__predict_false(lro->tsval > tsval ||
252 *(ts_ptr + 2) == 0)) {
253 return -1;
255 lro->tsval = tsval;
256 lro->tsecr = *(ts_ptr + 2);
259 lro->next_seq += tcp_data_len;
260 lro->ack_seq = tcp->th_ack;
261 lro->window = tcp->th_win;
262 lro->append_cnt++;
263 if (tcp_data_len == 0) {
264 m_freem(m_head);
265 return 0;
267 /* subtract off the checksum of the tcp header
268 * from the hardware checksum, and add it to the
269 * stored tcp data checksum. Byteswap the checksum
270 * if the total length so far is odd
272 tmp_csum = mxge_csum_generic((uint16_t*)tcp,
273 tcp_hdr_len);
274 csum = csum + (tmp_csum ^ 0xffff);
275 csum = (csum & 0xffff) + (csum >> 16);
276 csum = (csum & 0xffff) + (csum >> 16);
277 if (lro->len & 0x1) {
278 /* Odd number of bytes so far, flip bytes */
279 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
281 csum = csum + lro->data_csum;
282 csum = (csum & 0xffff) + (csum >> 16);
283 csum = (csum & 0xffff) + (csum >> 16);
284 lro->data_csum = csum;
286 lro->len += tcp_data_len;
288 /* adjust mbuf so that m->m_data points to
289 the first byte of the payload */
290 m_adj(m_head, hlen);
291 /* append mbuf chain */
292 lro->m_tail->m_next = m_head;
293 /* advance the last pointer */
294 lro->m_tail = m_tail;
295 /* flush packet if required */
296 device_mtu = ss->sc->ifp->if_mtu;
297 if (lro->len > (65535 - device_mtu)) {
298 SLIST_REMOVE(&ss->lro_active, lro,
299 lro_entry, next);
300 mxge_lro_flush(ss, lro);
302 return 0;
306 if (SLIST_EMPTY(&ss->lro_free))
307 return -1;
309 /* start a new chain */
310 lro = SLIST_FIRST(&ss->lro_free);
311 SLIST_REMOVE_HEAD(&ss->lro_free, next);
312 SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
313 lro->source_port = tcp->th_sport;
314 lro->dest_port = tcp->th_dport;
315 lro->source_ip = ip->ip_src.s_addr;
316 lro->dest_ip = ip->ip_dst.s_addr;
317 lro->next_seq = seq + tcp_data_len;
318 lro->mss = tcp_data_len;
319 lro->ack_seq = tcp->th_ack;
320 lro->window = tcp->th_win;
322 /* save the checksum of just the TCP payload by
323 * subtracting off the checksum of the TCP header from
324 * the entire hardware checksum
325 * Since IP header checksum is correct, checksum over
326 * the IP header is -0. Substracting -0 is unnecessary.
328 tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
329 csum = csum + (tmp_csum ^ 0xffff);
330 csum = (csum & 0xffff) + (csum >> 16);
331 csum = (csum & 0xffff) + (csum >> 16);
332 lro->data_csum = csum;
334 lro->ip = ip;
335 /* record timestamp if it is present */
336 if (opt_bytes) {
337 lro->timestamp = 1;
338 lro->tsval = ntohl(*(ts_ptr + 1));
339 lro->tsecr = *(ts_ptr + 2);
341 lro->len = tot_len;
342 lro->m_head = m_head;
343 lro->m_tail = m_tail;
344 return 0;
347 #endif /* INET */
349 This file uses Myri10GE driver indentation.
351 Local Variables:
352 c-file-style:"linux"
353 tab-width:8
354 End: