2 * linux/net/ipv4/inet_lro.c
4 * Large Receive Offload (ipv4 / tcp)
6 * (C) Copyright IBM Corp. 2007
9 * Jan-Bernd Themann <themann@de.ibm.com>
10 * Christoph Raisch <raisch@de.ibm.com>
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29 #include <linux/module.h>
30 #include <linux/if_vlan.h>
31 #include <linux/inet_lro.h>
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
35 MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
37 #define TCP_HDR_LEN(tcph) (tcph->doff << 2)
38 #define IP_HDR_LEN(iph) (iph->ihl << 2)
39 #define TCP_PAYLOAD_LENGTH(iph, tcph) \
40 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
42 #define IPH_LEN_WO_OPTIONS 5
43 #define TCPH_LEN_WO_OPTIONS 5
44 #define TCPH_LEN_W_TIMESTAMP 8
46 #define LRO_MAX_PG_HLEN 64
48 #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
51 * Basic tcp checks whether packet is suitable for LRO
54 static int lro_tcp_ip_check(struct iphdr
*iph
, struct tcphdr
*tcph
,
55 int len
, struct net_lro_desc
*lro_desc
)
57 /* check ip header: don't aggregate padded frames */
58 if (ntohs(iph
->tot_len
) != len
)
61 if (TCP_PAYLOAD_LENGTH(iph
, tcph
) == 0)
64 if (iph
->ihl
!= IPH_LEN_WO_OPTIONS
)
67 if (tcph
->cwr
|| tcph
->ece
|| tcph
->urg
|| !tcph
->ack
||
68 tcph
->rst
|| tcph
->syn
|| tcph
->fin
)
71 if (INET_ECN_is_ce(ipv4_get_dsfield(iph
)))
74 if (tcph
->doff
!= TCPH_LEN_WO_OPTIONS
&&
75 tcph
->doff
!= TCPH_LEN_W_TIMESTAMP
)
78 /* check tcp options (only timestamp allowed) */
79 if (tcph
->doff
== TCPH_LEN_W_TIMESTAMP
) {
80 __be32
*topt
= (__be32
*)(tcph
+ 1);
82 if (*topt
!= htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16)
83 | (TCPOPT_TIMESTAMP
<< 8)
87 /* timestamp should be in right order */
89 if (lro_desc
&& after(ntohl(lro_desc
->tcp_rcv_tsval
),
93 /* timestamp reply should not be zero */
102 static void lro_update_tcp_ip_header(struct net_lro_desc
*lro_desc
)
104 struct iphdr
*iph
= lro_desc
->iph
;
105 struct tcphdr
*tcph
= lro_desc
->tcph
;
109 tcph
->ack_seq
= lro_desc
->tcp_ack
;
110 tcph
->window
= lro_desc
->tcp_window
;
112 if (lro_desc
->tcp_saw_tstamp
) {
113 p
= (__be32
*)(tcph
+ 1);
114 *(p
+2) = lro_desc
->tcp_rcv_tsecr
;
117 iph
->tot_len
= htons(lro_desc
->ip_tot_len
);
120 iph
->check
= ip_fast_csum((u8
*)lro_desc
->iph
, iph
->ihl
);
123 tcp_hdr_csum
= csum_partial(tcph
, TCP_HDR_LEN(tcph
), 0);
124 lro_desc
->data_csum
= csum_add(lro_desc
->data_csum
, tcp_hdr_csum
);
125 tcph
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
126 lro_desc
->ip_tot_len
-
127 IP_HDR_LEN(iph
), IPPROTO_TCP
,
128 lro_desc
->data_csum
);
131 static __wsum
lro_tcp_data_csum(struct iphdr
*iph
, struct tcphdr
*tcph
, int len
)
135 __wsum tcp_ps_hdr_csum
;
137 tcp_csum
= ~csum_unfold(tcph
->check
);
138 tcp_hdr_csum
= csum_partial(tcph
, TCP_HDR_LEN(tcph
), tcp_csum
);
140 tcp_ps_hdr_csum
= csum_tcpudp_nofold(iph
->saddr
, iph
->daddr
,
141 len
+ TCP_HDR_LEN(tcph
),
144 return csum_sub(csum_sub(tcp_csum
, tcp_hdr_csum
),
148 static void lro_init_desc(struct net_lro_desc
*lro_desc
, struct sk_buff
*skb
,
149 struct iphdr
*iph
, struct tcphdr
*tcph
,
150 u16 vlan_tag
, struct vlan_group
*vgrp
)
154 u32 tcp_data_len
= TCP_PAYLOAD_LENGTH(iph
, tcph
);
156 nr_frags
= skb_shinfo(skb
)->nr_frags
;
157 lro_desc
->parent
= skb
;
158 lro_desc
->next_frag
= &(skb_shinfo(skb
)->frags
[nr_frags
]);
160 lro_desc
->tcph
= tcph
;
161 lro_desc
->tcp_next_seq
= ntohl(tcph
->seq
) + tcp_data_len
;
162 lro_desc
->tcp_ack
= tcph
->ack_seq
;
163 lro_desc
->tcp_window
= tcph
->window
;
165 lro_desc
->pkt_aggr_cnt
= 1;
166 lro_desc
->ip_tot_len
= ntohs(iph
->tot_len
);
168 if (tcph
->doff
== 8) {
169 ptr
= (__be32
*)(tcph
+1);
170 lro_desc
->tcp_saw_tstamp
= 1;
171 lro_desc
->tcp_rcv_tsval
= *(ptr
+1);
172 lro_desc
->tcp_rcv_tsecr
= *(ptr
+2);
175 lro_desc
->mss
= tcp_data_len
;
176 lro_desc
->vgrp
= vgrp
;
177 lro_desc
->vlan_tag
= vlan_tag
;
178 lro_desc
->active
= 1;
180 lro_desc
->data_csum
= lro_tcp_data_csum(iph
, tcph
,
184 static inline void lro_clear_desc(struct net_lro_desc
*lro_desc
)
186 memset(lro_desc
, 0, sizeof(struct net_lro_desc
));
189 static void lro_add_common(struct net_lro_desc
*lro_desc
, struct iphdr
*iph
,
190 struct tcphdr
*tcph
, int tcp_data_len
)
192 struct sk_buff
*parent
= lro_desc
->parent
;
195 lro_desc
->pkt_aggr_cnt
++;
196 lro_desc
->ip_tot_len
+= tcp_data_len
;
197 lro_desc
->tcp_next_seq
+= tcp_data_len
;
198 lro_desc
->tcp_window
= tcph
->window
;
199 lro_desc
->tcp_ack
= tcph
->ack_seq
;
201 /* don't update tcp_rcv_tsval, would not work with PAWS */
202 if (lro_desc
->tcp_saw_tstamp
) {
203 topt
= (__be32
*) (tcph
+ 1);
204 lro_desc
->tcp_rcv_tsecr
= *(topt
+ 2);
207 lro_desc
->data_csum
= csum_block_add(lro_desc
->data_csum
,
208 lro_tcp_data_csum(iph
, tcph
,
212 parent
->len
+= tcp_data_len
;
213 parent
->data_len
+= tcp_data_len
;
214 if (tcp_data_len
> lro_desc
->mss
)
215 lro_desc
->mss
= tcp_data_len
;
218 static void lro_add_packet(struct net_lro_desc
*lro_desc
, struct sk_buff
*skb
,
219 struct iphdr
*iph
, struct tcphdr
*tcph
)
221 struct sk_buff
*parent
= lro_desc
->parent
;
222 int tcp_data_len
= TCP_PAYLOAD_LENGTH(iph
, tcph
);
224 lro_add_common(lro_desc
, iph
, tcph
, tcp_data_len
);
226 skb_pull(skb
, (skb
->len
- tcp_data_len
));
227 parent
->truesize
+= skb
->truesize
;
229 if (lro_desc
->last_skb
)
230 lro_desc
->last_skb
->next
= skb
;
232 skb_shinfo(parent
)->frag_list
= skb
;
234 lro_desc
->last_skb
= skb
;
237 static void lro_add_frags(struct net_lro_desc
*lro_desc
,
238 int len
, int hlen
, int truesize
,
239 struct skb_frag_struct
*skb_frags
,
240 struct iphdr
*iph
, struct tcphdr
*tcph
)
242 struct sk_buff
*skb
= lro_desc
->parent
;
243 int tcp_data_len
= TCP_PAYLOAD_LENGTH(iph
, tcph
);
245 lro_add_common(lro_desc
, iph
, tcph
, tcp_data_len
);
247 skb
->truesize
+= truesize
;
249 skb_frags
[0].page_offset
+= hlen
;
250 skb_frags
[0].size
-= hlen
;
252 while (tcp_data_len
> 0) {
253 *(lro_desc
->next_frag
) = *skb_frags
;
254 tcp_data_len
-= skb_frags
->size
;
255 lro_desc
->next_frag
++;
257 skb_shinfo(skb
)->nr_frags
++;
261 static int lro_check_tcp_conn(struct net_lro_desc
*lro_desc
,
265 if ((lro_desc
->iph
->saddr
!= iph
->saddr
) ||
266 (lro_desc
->iph
->daddr
!= iph
->daddr
) ||
267 (lro_desc
->tcph
->source
!= tcph
->source
) ||
268 (lro_desc
->tcph
->dest
!= tcph
->dest
))
273 static struct net_lro_desc
*lro_get_desc(struct net_lro_mgr
*lro_mgr
,
274 struct net_lro_desc
*lro_arr
,
278 struct net_lro_desc
*lro_desc
= NULL
;
279 struct net_lro_desc
*tmp
;
280 int max_desc
= lro_mgr
->max_desc
;
283 for (i
= 0; i
< max_desc
; i
++) {
286 if (!lro_check_tcp_conn(tmp
, iph
, tcph
)) {
292 for (i
= 0; i
< max_desc
; i
++) {
293 if (!lro_arr
[i
].active
) {
294 lro_desc
= &lro_arr
[i
];
299 LRO_INC_STATS(lro_mgr
, no_desc
);
304 static void lro_flush(struct net_lro_mgr
*lro_mgr
,
305 struct net_lro_desc
*lro_desc
)
307 if (lro_desc
->pkt_aggr_cnt
> 1)
308 lro_update_tcp_ip_header(lro_desc
);
310 skb_shinfo(lro_desc
->parent
)->gso_size
= lro_desc
->mss
;
312 if (lro_desc
->vgrp
) {
313 if (lro_mgr
->features
& LRO_F_NAPI
)
314 vlan_hwaccel_receive_skb(lro_desc
->parent
,
318 vlan_hwaccel_rx(lro_desc
->parent
,
323 if (lro_mgr
->features
& LRO_F_NAPI
)
324 netif_receive_skb(lro_desc
->parent
);
326 netif_rx(lro_desc
->parent
);
329 LRO_INC_STATS(lro_mgr
, flushed
);
330 lro_clear_desc(lro_desc
);
333 static int __lro_proc_skb(struct net_lro_mgr
*lro_mgr
, struct sk_buff
*skb
,
334 struct vlan_group
*vgrp
, u16 vlan_tag
, void *priv
)
336 struct net_lro_desc
*lro_desc
;
340 int vlan_hdr_len
= 0;
342 if (!lro_mgr
->get_skb_header
||
343 lro_mgr
->get_skb_header(skb
, (void *)&iph
, (void *)&tcph
,
347 if (!(flags
& LRO_IPV4
) || !(flags
& LRO_TCP
))
350 lro_desc
= lro_get_desc(lro_mgr
, lro_mgr
->lro_arr
, iph
, tcph
);
354 if ((skb
->protocol
== htons(ETH_P_8021Q
)) &&
355 !(lro_mgr
->features
& LRO_F_EXTRACT_VLAN_ID
))
356 vlan_hdr_len
= VLAN_HLEN
;
358 if (!lro_desc
->active
) { /* start new lro session */
359 if (lro_tcp_ip_check(iph
, tcph
, skb
->len
- vlan_hdr_len
, NULL
))
362 skb
->ip_summed
= lro_mgr
->ip_summed_aggr
;
363 lro_init_desc(lro_desc
, skb
, iph
, tcph
, vlan_tag
, vgrp
);
364 LRO_INC_STATS(lro_mgr
, aggregated
);
368 if (lro_desc
->tcp_next_seq
!= ntohl(tcph
->seq
))
371 if (lro_tcp_ip_check(iph
, tcph
, skb
->len
, lro_desc
))
374 lro_add_packet(lro_desc
, skb
, iph
, tcph
);
375 LRO_INC_STATS(lro_mgr
, aggregated
);
377 if ((lro_desc
->pkt_aggr_cnt
>= lro_mgr
->max_aggr
) ||
378 lro_desc
->parent
->len
> (0xFFFF - lro_mgr
->dev
->mtu
))
379 lro_flush(lro_mgr
, lro_desc
);
383 out2
: /* send aggregated SKBs to stack */
384 lro_flush(lro_mgr
, lro_desc
);
391 static struct sk_buff
*lro_gen_skb(struct net_lro_mgr
*lro_mgr
,
392 struct skb_frag_struct
*frags
,
393 int len
, int true_size
,
395 int hlen
, __wsum sum
,
399 struct skb_frag_struct
*skb_frags
;
401 int hdr_len
= min(len
, hlen
);
403 skb
= netdev_alloc_skb(lro_mgr
->dev
, hlen
+ lro_mgr
->frag_align_pad
);
407 skb_reserve(skb
, lro_mgr
->frag_align_pad
);
409 skb
->data_len
= len
- hdr_len
;
410 skb
->truesize
+= true_size
;
411 skb
->tail
+= hdr_len
;
413 memcpy(skb
->data
, mac_hdr
, hdr_len
);
415 skb_frags
= skb_shinfo(skb
)->frags
;
416 while (data_len
> 0) {
418 data_len
-= frags
->size
;
421 skb_shinfo(skb
)->nr_frags
++;
424 skb_shinfo(skb
)->frags
[0].page_offset
+= hdr_len
;
425 skb_shinfo(skb
)->frags
[0].size
-= hdr_len
;
427 skb
->ip_summed
= ip_summed
;
429 skb
->protocol
= eth_type_trans(skb
, lro_mgr
->dev
);
433 static struct sk_buff
*__lro_proc_segment(struct net_lro_mgr
*lro_mgr
,
434 struct skb_frag_struct
*frags
,
435 int len
, int true_size
,
436 struct vlan_group
*vgrp
,
437 u16 vlan_tag
, void *priv
, __wsum sum
)
439 struct net_lro_desc
*lro_desc
;
446 int hdr_len
= LRO_MAX_PG_HLEN
;
447 int vlan_hdr_len
= 0;
449 if (!lro_mgr
->get_frag_header
||
450 lro_mgr
->get_frag_header(frags
, (void *)&mac_hdr
, (void *)&iph
,
451 (void *)&tcph
, &flags
, priv
)) {
452 mac_hdr
= page_address(frags
->page
) + frags
->page_offset
;
456 if (!(flags
& LRO_IPV4
) || !(flags
& LRO_TCP
))
459 hdr_len
= (int)((void *)(tcph
) + TCP_HDR_LEN(tcph
) - mac_hdr
);
460 mac_hdr_len
= (int)((void *)(iph
) - mac_hdr
);
462 lro_desc
= lro_get_desc(lro_mgr
, lro_mgr
->lro_arr
, iph
, tcph
);
466 if (!lro_desc
->active
) { /* start new lro session */
467 if (lro_tcp_ip_check(iph
, tcph
, len
- mac_hdr_len
, NULL
))
470 skb
= lro_gen_skb(lro_mgr
, frags
, len
, true_size
, mac_hdr
,
471 hdr_len
, 0, lro_mgr
->ip_summed_aggr
);
475 if ((skb
->protocol
== htons(ETH_P_8021Q
)) &&
476 !(lro_mgr
->features
& LRO_F_EXTRACT_VLAN_ID
))
477 vlan_hdr_len
= VLAN_HLEN
;
479 iph
= (void *)(skb
->data
+ vlan_hdr_len
);
480 tcph
= (void *)((u8
*)skb
->data
+ vlan_hdr_len
483 lro_init_desc(lro_desc
, skb
, iph
, tcph
, 0, NULL
);
484 LRO_INC_STATS(lro_mgr
, aggregated
);
488 if (lro_desc
->tcp_next_seq
!= ntohl(tcph
->seq
))
491 if (lro_tcp_ip_check(iph
, tcph
, len
- mac_hdr_len
, lro_desc
))
494 lro_add_frags(lro_desc
, len
, hdr_len
, true_size
, frags
, iph
, tcph
);
495 LRO_INC_STATS(lro_mgr
, aggregated
);
497 if ((skb_shinfo(lro_desc
->parent
)->nr_frags
>= lro_mgr
->max_aggr
) ||
498 lro_desc
->parent
->len
> (0xFFFF - lro_mgr
->dev
->mtu
))
499 lro_flush(lro_mgr
, lro_desc
);
503 out2
: /* send aggregated packets to the stack */
504 lro_flush(lro_mgr
, lro_desc
);
506 out1
: /* Original packet has to be posted to the stack */
507 skb
= lro_gen_skb(lro_mgr
, frags
, len
, true_size
, mac_hdr
,
508 hdr_len
, sum
, lro_mgr
->ip_summed
);
513 void lro_receive_skb(struct net_lro_mgr
*lro_mgr
,
517 if (__lro_proc_skb(lro_mgr
, skb
, NULL
, 0, priv
)) {
518 if (lro_mgr
->features
& LRO_F_NAPI
)
519 netif_receive_skb(skb
);
524 EXPORT_SYMBOL(lro_receive_skb
);
526 void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr
*lro_mgr
,
528 struct vlan_group
*vgrp
,
532 if (__lro_proc_skb(lro_mgr
, skb
, vgrp
, vlan_tag
, priv
)) {
533 if (lro_mgr
->features
& LRO_F_NAPI
)
534 vlan_hwaccel_receive_skb(skb
, vgrp
, vlan_tag
);
536 vlan_hwaccel_rx(skb
, vgrp
, vlan_tag
);
539 EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb
);
541 void lro_receive_frags(struct net_lro_mgr
*lro_mgr
,
542 struct skb_frag_struct
*frags
,
543 int len
, int true_size
, void *priv
, __wsum sum
)
547 skb
= __lro_proc_segment(lro_mgr
, frags
, len
, true_size
, NULL
, 0,
552 if (lro_mgr
->features
& LRO_F_NAPI
)
553 netif_receive_skb(skb
);
557 EXPORT_SYMBOL(lro_receive_frags
);
559 void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr
*lro_mgr
,
560 struct skb_frag_struct
*frags
,
561 int len
, int true_size
,
562 struct vlan_group
*vgrp
,
563 u16 vlan_tag
, void *priv
, __wsum sum
)
567 skb
= __lro_proc_segment(lro_mgr
, frags
, len
, true_size
, vgrp
,
568 vlan_tag
, priv
, sum
);
572 if (lro_mgr
->features
& LRO_F_NAPI
)
573 vlan_hwaccel_receive_skb(skb
, vgrp
, vlan_tag
);
575 vlan_hwaccel_rx(skb
, vgrp
, vlan_tag
);
577 EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags
);
579 void lro_flush_all(struct net_lro_mgr
*lro_mgr
)
582 struct net_lro_desc
*lro_desc
= lro_mgr
->lro_arr
;
584 for (i
= 0; i
< lro_mgr
->max_desc
; i
++) {
585 if (lro_desc
[i
].active
)
586 lro_flush(lro_mgr
, &lro_desc
[i
]);
589 EXPORT_SYMBOL(lro_flush_all
);
591 void lro_flush_pkt(struct net_lro_mgr
*lro_mgr
,
592 struct iphdr
*iph
, struct tcphdr
*tcph
)
594 struct net_lro_desc
*lro_desc
;
596 lro_desc
= lro_get_desc(lro_mgr
, lro_mgr
->lro_arr
, iph
, tcph
);
597 if (lro_desc
->active
)
598 lro_flush(lro_mgr
, lro_desc
);
600 EXPORT_SYMBOL(lro_flush_pkt
);