3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.122 2000/03/25 01:52:11 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
24 #define __NO_VERSION__
25 #include <linux/module.h>
26 #include <linux/config.h>
27 #include <linux/errno.h>
28 #include <linux/types.h>
29 #include <linux/socket.h>
30 #include <linux/sockios.h>
31 #include <linux/net.h>
32 #include <linux/sched.h>
34 #include <linux/in6.h>
35 #include <linux/netdevice.h>
36 #include <linux/init.h>
37 #include <linux/ipsec.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
44 #include <net/ndisc.h>
46 #include <net/transp_v6.h>
47 #include <net/addrconf.h>
48 #include <net/ip6_route.h>
50 #include <asm/uaccess.h>
52 static void tcp_v6_send_reset(struct sk_buff
*skb
);
53 static void tcp_v6_or_send_ack(struct sk_buff
*skb
, struct open_request
*req
);
54 static void tcp_v6_send_check(struct sock
*sk
, struct tcphdr
*th
, int len
,
57 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
);
58 static int tcp_v6_xmit(struct sk_buff
*skb
);
60 static struct tcp_func ipv6_mapped
;
61 static struct tcp_func ipv6_specific
;
63 /* I have no idea if this is a good hash for v6 or not. -DaveM */
64 static __inline__
int tcp_v6_hashfn(struct in6_addr
*laddr
, u16 lport
,
65 struct in6_addr
*faddr
, u16 fport
)
67 int hashent
= (lport
^ fport
);
69 hashent
^= (laddr
->s6_addr32
[3] ^ faddr
->s6_addr32
[3]);
70 hashent
^= hashent
>>16;
71 hashent
^= hashent
>>8;
72 return (hashent
& (tcp_ehash_size
- 1));
75 static __inline__
int tcp_v6_sk_hashfn(struct sock
*sk
)
77 struct in6_addr
*laddr
= &sk
->net_pinfo
.af_inet6
.rcv_saddr
;
78 struct in6_addr
*faddr
= &sk
->net_pinfo
.af_inet6
.daddr
;
79 __u16 lport
= sk
->num
;
80 __u16 fport
= sk
->dport
;
81 return tcp_v6_hashfn(laddr
, lport
, faddr
, fport
);
84 /* Grrr, addr_type already calculated by caller, but I don't want
85 * to add some silly "cookie" argument to this method just for that.
86 * But it doesn't matter, the recalculation is in the rarest path
87 * this function ever takes.
89 static int tcp_v6_get_port(struct sock
*sk
, unsigned short snum
)
91 struct tcp_bind_hashbucket
*head
;
92 struct tcp_bind_bucket
*tb
;
97 int low
= sysctl_local_port_range
[0];
98 int high
= sysctl_local_port_range
[1];
99 int remaining
= (high
- low
) + 1;
102 spin_lock(&tcp_portalloc_lock
);
103 rover
= tcp_port_rover
;
105 if ((rover
< low
) || (rover
> high
))
107 head
= &tcp_bhash
[tcp_bhashfn(rover
)];
108 spin_lock(&head
->lock
);
109 for (tb
= head
->chain
; tb
; tb
= tb
->next
)
110 if (tb
->port
== rover
)
114 spin_unlock(&head
->lock
);
115 } while (--remaining
> 0);
116 tcp_port_rover
= rover
;
117 spin_unlock(&tcp_portalloc_lock
);
119 /* Exhausted local port range during search? */
124 /* OK, here is the one we will use. */
128 head
= &tcp_bhash
[tcp_bhashfn(snum
)];
129 spin_lock(&head
->lock
);
130 for (tb
= head
->chain
; tb
!= NULL
; tb
= tb
->next
)
131 if (tb
->port
== snum
)
134 if (tb
!= NULL
&& tb
->owners
!= NULL
) {
135 if (tb
->fastreuse
!= 0 && sk
->reuse
!= 0 && sk
->state
!= TCP_LISTEN
) {
138 struct sock
*sk2
= tb
->owners
;
139 int sk_reuse
= sk
->reuse
;
140 int addr_type
= ipv6_addr_type(&sk
->net_pinfo
.af_inet6
.rcv_saddr
);
142 /* We must walk the whole port owner list in this case. -DaveM */
143 for( ; sk2
!= NULL
; sk2
= sk2
->bind_next
) {
145 sk
->bound_dev_if
== sk2
->bound_dev_if
) {
148 sk2
->state
== TCP_LISTEN
) {
149 /* NOTE: IPv6 tw bucket have different format */
150 if (!sk2
->rcv_saddr
||
151 addr_type
== IPV6_ADDR_ANY
||
152 !ipv6_addr_cmp(&sk
->net_pinfo
.af_inet6
.rcv_saddr
,
153 sk2
->state
!= TCP_TIME_WAIT
?
154 &sk2
->net_pinfo
.af_inet6
.rcv_saddr
:
155 &((struct tcp_tw_bucket
*)sk
)->v6_rcv_saddr
))
160 /* If we found a conflict, fail. */
168 (tb
= tcp_bucket_create(head
, snum
)) == NULL
)
170 if (tb
->owners
== NULL
) {
171 if (sk
->reuse
&& sk
->state
!= TCP_LISTEN
)
175 } else if (tb
->fastreuse
&&
176 ((sk
->reuse
== 0) || (sk
->state
== TCP_LISTEN
)))
181 if (sk
->prev
== NULL
) {
182 if ((sk
->bind_next
= tb
->owners
) != NULL
)
183 tb
->owners
->bind_pprev
= &sk
->bind_next
;
185 sk
->bind_pprev
= &tb
->owners
;
186 sk
->prev
= (struct sock
*) tb
;
188 BUG_TRAP(sk
->prev
== (struct sock
*) tb
);
193 spin_unlock(&head
->lock
);
199 static __inline__
void __tcp_v6_hash(struct sock
*sk
)
204 BUG_TRAP(sk
->pprev
==NULL
);
206 if(sk
->state
== TCP_LISTEN
) {
207 skp
= &tcp_listening_hash
[tcp_sk_listen_hashfn(sk
)];
208 lock
= &tcp_lhash_lock
;
211 skp
= &tcp_ehash
[(sk
->hashent
= tcp_v6_sk_hashfn(sk
))].chain
;
212 lock
= &tcp_ehash
[sk
->hashent
].lock
;
216 if((sk
->next
= *skp
) != NULL
)
217 (*skp
)->pprev
= &sk
->next
;
220 sock_prot_inc_use(sk
->prot
);
225 static void tcp_v6_hash(struct sock
*sk
)
227 if(sk
->state
!= TCP_CLOSE
) {
228 if (sk
->tp_pinfo
.af_tcp
.af_specific
== &ipv6_mapped
) {
238 static struct sock
*tcp_v6_lookup_listener(struct in6_addr
*daddr
, unsigned short hnum
, int dif
)
241 struct sock
*result
= NULL
;
245 read_lock(&tcp_lhash_lock
);
246 sk
= tcp_listening_hash
[tcp_lhashfn(hnum
)];
247 for(; sk
; sk
= sk
->next
) {
248 if((sk
->num
== hnum
) && (sk
->family
== PF_INET6
)) {
249 struct ipv6_pinfo
*np
= &sk
->net_pinfo
.af_inet6
;
252 if(!ipv6_addr_any(&np
->rcv_saddr
)) {
253 if(ipv6_addr_cmp(&np
->rcv_saddr
, daddr
))
257 if (sk
->bound_dev_if
) {
258 if (sk
->bound_dev_if
!= dif
)
266 if (score
> hiscore
) {
274 read_unlock(&tcp_lhash_lock
);
278 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
279 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
281 * The sockhash lock must be held as a reader here.
284 static inline struct sock
*__tcp_v6_lookup_established(struct in6_addr
*saddr
, u16 sport
,
285 struct in6_addr
*daddr
, u16 hnum
,
288 struct tcp_ehash_bucket
*head
;
290 __u32 ports
= TCP_COMBINED_PORTS(sport
, hnum
);
293 /* Optimize here for direct hit, only listening connections can
294 * have wildcards anyways.
296 hash
= tcp_v6_hashfn(daddr
, hnum
, saddr
, sport
);
297 head
= &tcp_ehash
[hash
];
298 read_lock(&head
->lock
);
299 for(sk
= head
->chain
; sk
; sk
= sk
->next
) {
300 /* For IPV6 do the cheaper port and family tests first. */
301 if(TCP_IPV6_MATCH(sk
, saddr
, daddr
, ports
, dif
))
302 goto hit
; /* You sunk my battleship! */
304 /* Must check for a TIME_WAIT'er before going to listener hash. */
305 for(sk
= (head
+ tcp_ehash_size
)->chain
; sk
; sk
= sk
->next
) {
306 if(*((__u32
*)&(sk
->dport
)) == ports
&&
307 sk
->family
== PF_INET6
) {
308 struct tcp_tw_bucket
*tw
= (struct tcp_tw_bucket
*)sk
;
309 if(!ipv6_addr_cmp(&tw
->v6_daddr
, saddr
) &&
310 !ipv6_addr_cmp(&tw
->v6_rcv_saddr
, daddr
) &&
311 (!sk
->bound_dev_if
|| sk
->bound_dev_if
== dif
))
315 read_unlock(&head
->lock
);
320 read_unlock(&head
->lock
);
325 static inline struct sock
*__tcp_v6_lookup(struct in6_addr
*saddr
, u16 sport
,
326 struct in6_addr
*daddr
, u16 hnum
,
331 sk
= __tcp_v6_lookup_established(saddr
, sport
, daddr
, hnum
, dif
);
336 return tcp_v6_lookup_listener(daddr
, hnum
, dif
);
339 #define tcp_v6_lookup(sa, sp, da, dp, dif) \
340 ({ struct sock *___sk; \
341 local_bh_disable(); \
342 ___sk = __tcp_v6_lookup((sa),(sp),(da),ntohs(dp),(dif)); \
349 * Open request hash tables.
352 static __inline__
unsigned tcp_v6_synq_hash(struct in6_addr
*raddr
, u16 rport
)
354 unsigned h
= raddr
->s6_addr32
[3] ^ rport
;
357 return h
&(TCP_SYNQ_HSIZE
-1);
360 static struct open_request
*tcp_v6_search_req(struct tcp_opt
*tp
,
361 struct ipv6hdr
*ip6h
,
364 struct open_request
***prevp
)
366 struct tcp_listen_opt
*lopt
= tp
->listen_opt
;
367 struct open_request
*req
, **prev
;
368 __u16 rport
= th
->source
;
370 for (prev
= &lopt
->syn_table
[tcp_v6_synq_hash(&ip6h
->saddr
, rport
)];
371 (req
= *prev
) != NULL
;
372 prev
= &req
->dl_next
) {
373 if (req
->rmt_port
== rport
&&
374 req
->class->family
== AF_INET6
&&
375 !ipv6_addr_cmp(&req
->af
.v6_req
.rmt_addr
, &ip6h
->saddr
) &&
376 !ipv6_addr_cmp(&req
->af
.v6_req
.loc_addr
, &ip6h
->daddr
) &&
377 (!req
->af
.v6_req
.iif
|| req
->af
.v6_req
.iif
== iif
)) {
378 BUG_TRAP(req
->sk
== NULL
);
387 static __inline__ u16
tcp_v6_check(struct tcphdr
*th
, int len
,
388 struct in6_addr
*saddr
,
389 struct in6_addr
*daddr
,
392 return csum_ipv6_magic(saddr
, daddr
, len
, IPPROTO_TCP
, base
);
395 static __u32
tcp_v6_init_sequence(struct sock
*sk
, struct sk_buff
*skb
)
397 if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
398 return secure_tcpv6_sequence_number(skb
->nh
.ipv6h
->daddr
.s6_addr32
,
399 skb
->nh
.ipv6h
->saddr
.s6_addr32
,
403 return secure_tcp_sequence_number(skb
->nh
.iph
->daddr
,
410 static int tcp_v6_check_established(struct sock
*sk
)
412 struct in6_addr
*daddr
= &sk
->net_pinfo
.af_inet6
.rcv_saddr
;
413 struct in6_addr
*saddr
= &sk
->net_pinfo
.af_inet6
.daddr
;
414 int dif
= sk
->bound_dev_if
;
415 u32 ports
= TCP_COMBINED_PORTS(sk
->dport
, sk
->num
);
416 int hash
= tcp_v6_hashfn(daddr
, sk
->num
, saddr
, sk
->dport
);
417 struct tcp_ehash_bucket
*head
= &tcp_ehash
[hash
];
418 struct sock
*sk2
, **skp
;
419 struct tcp_tw_bucket
*tw
;
421 write_lock(&head
->lock
);
423 for(skp
= &(head
+ tcp_ehash_size
)->chain
; (sk2
=*skp
)!=NULL
; skp
= &sk2
->next
) {
424 tw
= (struct tcp_tw_bucket
*)sk2
;
426 if(*((__u32
*)&(sk2
->dport
)) == ports
&&
427 sk2
->family
== PF_INET6
&&
428 !ipv6_addr_cmp(&tw
->v6_daddr
, saddr
) &&
429 !ipv6_addr_cmp(&tw
->v6_rcv_saddr
, daddr
) &&
430 sk2
->bound_dev_if
== sk
->bound_dev_if
) {
431 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
433 if (tw
->substate
== TCP_TIME_WAIT
&&
434 sysctl_tcp_tw_recycle
&& tw
->ts_recent_stamp
) {
435 /* See comment in tcp_ipv4.c */
436 if ((tp
->write_seq
= tw
->snd_nxt
+ 2) == 0)
438 tp
->ts_recent
= tw
->ts_recent
;
439 tp
->ts_recent_stamp
= tw
->ts_recent_stamp
;
449 for(skp
= &head
->chain
; (sk2
=*skp
)!=NULL
; skp
= &sk2
->next
) {
450 if(TCP_IPV6_MATCH(sk
, saddr
, daddr
, ports
, dif
))
455 BUG_TRAP(sk
->pprev
==NULL
);
456 if ((sk
->next
= *skp
) != NULL
)
457 (*skp
)->pprev
= &sk
->next
;
462 sock_prot_inc_use(sk
->prot
);
463 write_unlock_bh(&head
->lock
);
466 /* Silly. Should hash-dance instead... */
468 tcp_tw_deschedule(tw
);
469 tcp_timewait_kill(tw
);
470 NET_INC_STATS_BH(TimeWaitRecycled
);
478 write_unlock_bh(&head
->lock
);
479 return -EADDRNOTAVAIL
;
482 static int tcp_v6_hash_connecting(struct sock
*sk
)
484 unsigned short snum
= sk
->num
;
485 struct tcp_bind_hashbucket
*head
= &tcp_bhash
[tcp_bhashfn(snum
)];
486 struct tcp_bind_bucket
*tb
= head
->chain
;
488 spin_lock_bh(&head
->lock
);
490 if (tb
->owners
== sk
&& sk
->bind_next
== NULL
) {
492 spin_unlock_bh(&head
->lock
);
495 spin_unlock_bh(&head
->lock
);
496 return tcp_v6_check_established(sk
);
500 static __inline__
int tcp_v6_iif(struct sk_buff
*skb
)
502 struct inet6_skb_parm
*opt
= (struct inet6_skb_parm
*) skb
->cb
;
506 static int tcp_v6_connect(struct sock
*sk
, struct sockaddr
*uaddr
,
509 struct sockaddr_in6
*usin
= (struct sockaddr_in6
*) uaddr
;
510 struct ipv6_pinfo
*np
= &sk
->net_pinfo
.af_inet6
;
511 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
;
512 struct in6_addr
*saddr
= NULL
;
513 struct in6_addr saddr_buf
;
515 struct dst_entry
*dst
;
516 struct sk_buff
*buff
;
520 if (addr_len
< SIN6_LEN_RFC2133
)
523 if (usin
->sin6_family
!= AF_INET6
)
524 return(-EAFNOSUPPORT
);
526 fl
.fl6_flowlabel
= 0;
528 fl
.fl6_flowlabel
= usin
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
529 if (fl
.fl6_flowlabel
&IPV6_FLOWLABEL_MASK
) {
530 struct ip6_flowlabel
*flowlabel
;
531 flowlabel
= fl6_sock_lookup(sk
, fl
.fl6_flowlabel
);
532 if (flowlabel
== NULL
)
534 ipv6_addr_copy(&usin
->sin6_addr
, &flowlabel
->dst
);
535 fl6_sock_release(flowlabel
);
540 * connect() to INADDR_ANY means loopback (BSD'ism).
543 if(ipv6_addr_any(&usin
->sin6_addr
))
544 usin
->sin6_addr
.s6_addr
[15] = 0x1;
546 addr_type
= ipv6_addr_type(&usin
->sin6_addr
);
548 if(addr_type
& IPV6_ADDR_MULTICAST
)
551 if (addr_type
&IPV6_ADDR_LINKLOCAL
) {
552 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
553 usin
->sin6_scope_id
) {
554 /* If interface is set while binding, indices
557 if (sk
->bound_dev_if
&&
558 sk
->bound_dev_if
!= usin
->sin6_scope_id
)
561 sk
->bound_dev_if
= usin
->sin6_scope_id
;
564 /* Connect to link-local address requires an interface */
565 if (sk
->bound_dev_if
== 0)
569 if (tp
->ts_recent_stamp
&& ipv6_addr_cmp(&np
->daddr
, &usin
->sin6_addr
)) {
571 tp
->ts_recent_stamp
= 0;
575 ipv6_addr_copy(&np
->daddr
, &usin
->sin6_addr
);
576 np
->flow_label
= fl
.fl6_flowlabel
;
582 if (addr_type
== IPV6_ADDR_MAPPED
) {
583 u32 exthdrlen
= tp
->ext_header_len
;
584 struct sockaddr_in sin
;
586 SOCK_DEBUG(sk
, "connect: ipv4 mapped\n");
588 sin
.sin_family
= AF_INET
;
589 sin
.sin_port
= usin
->sin6_port
;
590 sin
.sin_addr
.s_addr
= usin
->sin6_addr
.s6_addr32
[3];
592 sk
->tp_pinfo
.af_tcp
.af_specific
= &ipv6_mapped
;
593 sk
->backlog_rcv
= tcp_v4_do_rcv
;
595 err
= tcp_v4_connect(sk
, (struct sockaddr
*)&sin
, sizeof(sin
));
598 tp
->ext_header_len
= exthdrlen
;
599 sk
->tp_pinfo
.af_tcp
.af_specific
= &ipv6_specific
;
600 sk
->backlog_rcv
= tcp_v6_do_rcv
;
603 ipv6_addr_set(&np
->saddr
, 0, 0, __constant_htonl(0x0000FFFF),
605 ipv6_addr_set(&np
->rcv_saddr
, 0, 0, __constant_htonl(0x0000FFFF),
612 if (!ipv6_addr_any(&np
->rcv_saddr
))
613 saddr
= &np
->rcv_saddr
;
615 fl
.proto
= IPPROTO_TCP
;
616 fl
.fl6_dst
= &np
->daddr
;
618 fl
.oif
= sk
->bound_dev_if
;
619 fl
.uli_u
.ports
.dport
= usin
->sin6_port
;
620 fl
.uli_u
.ports
.sport
= sk
->sport
;
622 if (np
->opt
&& np
->opt
->srcrt
) {
623 struct rt0_hdr
*rt0
= (struct rt0_hdr
*) np
->opt
->srcrt
;
624 fl
.nl_u
.ip6_u
.daddr
= rt0
->addr
;
627 dst
= ip6_route_output(sk
, &fl
);
629 if ((err
= dst
->error
) != 0) {
634 ip6_dst_store(sk
, dst
, NULL
);
637 err
= ipv6_get_saddr(dst
, &np
->daddr
, &saddr_buf
);
644 /* set the source address */
645 ipv6_addr_copy(&np
->rcv_saddr
, saddr
);
646 ipv6_addr_copy(&np
->saddr
, saddr
);
648 tp
->ext_header_len
= 0;
650 tp
->ext_header_len
= np
->opt
->opt_flen
+np
->opt
->opt_nflen
;
651 tp
->mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) - sizeof(struct ipv6hdr
);
654 buff
= sock_wmalloc(sk
, MAX_TCP_HEADER
+ 15, 0, GFP_KERNEL
);
659 sk
->dport
= usin
->sin6_port
;
666 tp
->write_seq
= secure_tcpv6_sequence_number(np
->saddr
.s6_addr32
,
668 sk
->sport
, sk
->dport
);
670 err
= tcp_connect(sk
, buff
);
680 void tcp_v6_err(struct sk_buff
*skb
, struct ipv6hdr
*hdr
,
681 struct inet6_skb_parm
*opt
,
682 int type
, int code
, unsigned char *header
, __u32 info
)
684 struct in6_addr
*saddr
= &hdr
->saddr
;
685 struct in6_addr
*daddr
= &hdr
->daddr
;
686 struct tcphdr
*th
= (struct tcphdr
*)header
;
687 struct ipv6_pinfo
*np
;
693 if (header
+ 8 > skb
->tail
)
696 sk
= tcp_v6_lookup(daddr
, th
->dest
, saddr
, th
->source
, skb
->dev
->ifindex
);
699 ICMP6_INC_STATS_BH(Icmp6InErrors
);
703 if (sk
->state
== TCP_TIME_WAIT
) {
704 tcp_tw_put((struct tcp_tw_bucket
*)sk
);
710 NET_INC_STATS_BH(LockDroppedIcmps
);
712 if (sk
->state
== TCP_CLOSE
)
715 tp
= &sk
->tp_pinfo
.af_tcp
;
716 seq
= ntohl(th
->seq
);
717 if (sk
->state
!= TCP_LISTEN
&& !between(seq
, tp
->snd_una
, tp
->snd_nxt
)) {
718 NET_INC_STATS_BH(OutOfWindowIcmps
);
722 np
= &sk
->net_pinfo
.af_inet6
;
724 if (type
== ICMPV6_PKT_TOOBIG
) {
725 struct dst_entry
*dst
= NULL
;
729 if ((1<<sk
->state
)&(TCPF_LISTEN
|TCPF_CLOSE
))
732 /* icmp should have updated the destination cache entry */
733 dst
= __sk_dst_check(sk
, np
->dst_cookie
);
738 /* BUGGG_FUTURE: Again, it is not clear how
739 to handle rthdr case. Ignore this complexity
742 fl
.proto
= IPPROTO_TCP
;
743 fl
.nl_u
.ip6_u
.daddr
= &np
->daddr
;
744 fl
.nl_u
.ip6_u
.saddr
= &np
->saddr
;
745 fl
.oif
= sk
->bound_dev_if
;
746 fl
.uli_u
.ports
.dport
= sk
->dport
;
747 fl
.uli_u
.ports
.sport
= sk
->sport
;
749 dst
= ip6_route_output(sk
, &fl
);
754 sk
->err_soft
= -dst
->error
;
755 } else if (tp
->pmtu_cookie
> dst
->pmtu
) {
756 tcp_sync_mss(sk
, dst
->pmtu
);
757 tcp_simple_retransmit(sk
);
758 } /* else let the usual retransmit timer handle it */
763 icmpv6_err_convert(type
, code
, &err
);
765 /* Might be for an open_request */
767 struct open_request
*req
, **prev
;
773 /* Grrrr - fix this later. */
774 ipv6_addr_copy(&hd
.saddr
, saddr
);
775 ipv6_addr_copy(&hd
.daddr
, daddr
);
776 req
= tcp_v6_search_req(tp
, &hd
, th
, tcp_v6_iif(skb
), &prev
);
780 /* ICMPs are not backlogged, hence we cannot get
781 * an established socket here.
783 BUG_TRAP(req
->sk
== NULL
);
785 if (seq
!= req
->snt_isn
) {
786 NET_INC_STATS_BH(OutOfWindowIcmps
);
790 tcp_synq_drop(sk
, req
, prev
);
794 case TCP_SYN_RECV
: /* Cannot happen.
795 It can, it SYNs are crossed. --ANK */
796 if (sk
->lock
.users
== 0) {
797 TCP_INC_STATS_BH(TcpAttemptFails
);
799 sk
->error_report(sk
); /* Wake people up to see the error (see connect in sock.c) */
808 if (sk
->lock
.users
== 0 && np
->recverr
) {
810 sk
->error_report(sk
);
821 static int tcp_v6_send_synack(struct sock
*sk
, struct open_request
*req
,
822 struct dst_entry
*dst
)
824 struct sk_buff
* skb
;
825 struct ipv6_txoptions
*opt
= NULL
;
829 fl
.proto
= IPPROTO_TCP
;
830 fl
.nl_u
.ip6_u
.daddr
= &req
->af
.v6_req
.rmt_addr
;
831 fl
.nl_u
.ip6_u
.saddr
= &req
->af
.v6_req
.loc_addr
;
832 fl
.fl6_flowlabel
= 0;
833 fl
.oif
= req
->af
.v6_req
.iif
;
834 fl
.uli_u
.ports
.dport
= req
->rmt_port
;
835 fl
.uli_u
.ports
.sport
= sk
->sport
;
838 opt
= sk
->net_pinfo
.af_inet6
.opt
;
840 sk
->net_pinfo
.af_inet6
.rxopt
.bits
.srcrt
== 2 &&
841 req
->af
.v6_req
.pktopts
) {
842 struct sk_buff
*pktopts
= req
->af
.v6_req
.pktopts
;
843 struct inet6_skb_parm
*rxopt
= (struct inet6_skb_parm
*)pktopts
->cb
;
845 opt
= ipv6_invert_rthdr(sk
, (struct ipv6_rt_hdr
*)(pktopts
->nh
.raw
+ rxopt
->srcrt
));
848 if (opt
&& opt
->srcrt
) {
849 struct rt0_hdr
*rt0
= (struct rt0_hdr
*) opt
->srcrt
;
850 fl
.nl_u
.ip6_u
.daddr
= rt0
->addr
;
853 dst
= ip6_route_output(sk
, &fl
);
858 skb
= tcp_make_synack(sk
, dst
, req
);
860 struct tcphdr
*th
= skb
->h
.th
;
862 th
->check
= tcp_v6_check(th
, skb
->len
,
863 &req
->af
.v6_req
.loc_addr
, &req
->af
.v6_req
.rmt_addr
,
864 csum_partial((char *)th
, skb
->len
, skb
->csum
));
866 fl
.nl_u
.ip6_u
.daddr
= &req
->af
.v6_req
.rmt_addr
;
867 err
= ip6_xmit(sk
, skb
, &fl
, opt
);
868 if (err
== NET_XMIT_CN
)
874 if (opt
&& opt
!= sk
->net_pinfo
.af_inet6
.opt
)
875 sock_kfree_s(sk
, opt
, opt
->tot_len
);
879 static void tcp_v6_or_free(struct open_request
*req
)
881 if (req
->af
.v6_req
.pktopts
)
882 kfree_skb(req
->af
.v6_req
.pktopts
);
885 static struct or_calltable or_ipv6
= {
893 static int ipv6_opt_accepted(struct sock
*sk
, struct sk_buff
*skb
)
895 struct inet6_skb_parm
*opt
= (struct inet6_skb_parm
*)skb
->cb
;
897 if (sk
->net_pinfo
.af_inet6
.rxopt
.all
) {
898 if ((opt
->hop
&& sk
->net_pinfo
.af_inet6
.rxopt
.bits
.hopopts
) ||
899 ((IPV6_FLOWINFO_MASK
&*(u32
*)skb
->nh
.raw
) &&
900 sk
->net_pinfo
.af_inet6
.rxopt
.bits
.rxflow
) ||
901 (opt
->srcrt
&& sk
->net_pinfo
.af_inet6
.rxopt
.bits
.srcrt
) ||
902 ((opt
->dst1
|| opt
->dst0
) && sk
->net_pinfo
.af_inet6
.rxopt
.bits
.dstopts
))
909 static void tcp_v6_send_check(struct sock
*sk
, struct tcphdr
*th
, int len
,
912 struct ipv6_pinfo
*np
= &sk
->net_pinfo
.af_inet6
;
914 th
->check
= csum_ipv6_magic(&np
->saddr
, &np
->daddr
, len
, IPPROTO_TCP
,
915 csum_partial((char *)th
, th
->doff
<<2,
920 static void tcp_v6_send_reset(struct sk_buff
*skb
)
922 struct tcphdr
*th
= skb
->h
.th
, *t1
;
923 struct sk_buff
*buff
;
929 if (ipv6_addr_is_multicast(&skb
->nh
.ipv6h
->daddr
))
933 * We need to grab some memory, and put together an RST,
934 * and then put it into the queue to be sent.
937 buff
= alloc_skb(MAX_HEADER
+ sizeof(struct ipv6hdr
), GFP_ATOMIC
);
941 skb_reserve(buff
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
943 t1
= (struct tcphdr
*) skb_push(buff
,sizeof(struct tcphdr
));
945 /* Swap the send and the receive. */
946 memset(t1
, 0, sizeof(*t1
));
947 t1
->dest
= th
->source
;
948 t1
->source
= th
->dest
;
949 t1
->doff
= sizeof(*t1
)/4;
953 t1
->seq
= th
->ack_seq
;
956 t1
->ack_seq
= htonl(ntohl(th
->seq
) + th
->syn
+ th
->fin
957 + skb
->len
- (th
->doff
<<2));
960 buff
->csum
= csum_partial((char *)t1
, sizeof(*t1
), 0);
962 fl
.nl_u
.ip6_u
.daddr
= &skb
->nh
.ipv6h
->saddr
;
963 fl
.nl_u
.ip6_u
.saddr
= &skb
->nh
.ipv6h
->daddr
;
964 fl
.fl6_flowlabel
= 0;
966 t1
->check
= csum_ipv6_magic(fl
.nl_u
.ip6_u
.saddr
,
968 sizeof(*t1
), IPPROTO_TCP
,
971 fl
.proto
= IPPROTO_TCP
;
972 fl
.oif
= tcp_v6_iif(skb
);
973 fl
.uli_u
.ports
.dport
= t1
->dest
;
974 fl
.uli_u
.ports
.sport
= t1
->source
;
976 /* sk = NULL, but it is safe for now. RST socket required. */
977 buff
->dst
= ip6_route_output(NULL
, &fl
);
979 if (buff
->dst
->error
== 0) {
980 ip6_xmit(NULL
, buff
, &fl
, NULL
);
981 TCP_INC_STATS_BH(TcpOutSegs
);
982 TCP_INC_STATS_BH(TcpOutRsts
);
989 static void tcp_v6_send_ack(struct sk_buff
*skb
, u32 seq
, u32 ack
, u32 win
, u32 ts
)
991 struct tcphdr
*th
= skb
->h
.th
, *t1
;
992 struct sk_buff
*buff
;
994 int tot_len
= sizeof(struct tcphdr
);
996 buff
= alloc_skb(MAX_HEADER
+ sizeof(struct ipv6hdr
), GFP_ATOMIC
);
1000 skb_reserve(buff
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
1005 t1
= (struct tcphdr
*) skb_push(buff
,tot_len
);
1007 /* Swap the send and the receive. */
1008 memset(t1
, 0, sizeof(*t1
));
1009 t1
->dest
= th
->source
;
1010 t1
->source
= th
->dest
;
1011 t1
->doff
= tot_len
/4;
1012 t1
->seq
= htonl(seq
);
1013 t1
->ack_seq
= htonl(ack
);
1015 t1
->window
= htons(win
);
1018 u32
*ptr
= (u32
*)(t1
+ 1);
1019 *ptr
++ = __constant_htonl((TCPOPT_NOP
<< 24) |
1020 (TCPOPT_NOP
<< 16) |
1021 (TCPOPT_TIMESTAMP
<< 8) |
1023 *ptr
++ = htonl(tcp_time_stamp
);
1027 buff
->csum
= csum_partial((char *)t1
, tot_len
, 0);
1029 fl
.nl_u
.ip6_u
.daddr
= &skb
->nh
.ipv6h
->saddr
;
1030 fl
.nl_u
.ip6_u
.saddr
= &skb
->nh
.ipv6h
->daddr
;
1031 fl
.fl6_flowlabel
= 0;
1033 t1
->check
= csum_ipv6_magic(fl
.nl_u
.ip6_u
.saddr
,
1034 fl
.nl_u
.ip6_u
.daddr
,
1035 tot_len
, IPPROTO_TCP
,
1038 fl
.proto
= IPPROTO_TCP
;
1039 fl
.oif
= tcp_v6_iif(skb
);
1040 fl
.uli_u
.ports
.dport
= t1
->dest
;
1041 fl
.uli_u
.ports
.sport
= t1
->source
;
1043 buff
->dst
= ip6_route_output(NULL
, &fl
);
1045 if (buff
->dst
->error
== 0) {
1046 ip6_xmit(NULL
, buff
, &fl
, NULL
);
1047 TCP_INC_STATS_BH(TcpOutSegs
);
1054 static void tcp_v6_timewait_ack(struct sock
*sk
, struct sk_buff
*skb
)
1056 struct tcp_tw_bucket
*tw
= (struct tcp_tw_bucket
*)sk
;
1058 tcp_v6_send_ack(skb
, tw
->snd_nxt
, tw
->rcv_nxt
,
1059 tw
->rcv_wnd
>>tw
->rcv_wscale
, tw
->ts_recent
);
1064 static void tcp_v6_or_send_ack(struct sk_buff
*skb
, struct open_request
*req
)
1066 tcp_v6_send_ack(skb
, req
->snt_isn
+1, req
->rcv_isn
+1, req
->rcv_wnd
, req
->ts_recent
);
1070 static struct sock
*tcp_v6_hnd_req(struct sock
*sk
,struct sk_buff
*skb
)
1072 struct open_request
*req
, **prev
;
1073 struct tcphdr
*th
= skb
->h
.th
;
1074 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1076 /* Find possible connection requests. */
1077 req
= tcp_v6_search_req(tp
, skb
->nh
.ipv6h
, th
, tcp_v6_iif(skb
), &prev
);
1079 return tcp_check_req(sk
, skb
, req
, prev
);
1081 if (tp
->accept_queue
) {
1084 nsk
= __tcp_v6_lookup_established(&skb
->nh
.ipv6h
->saddr
,
1086 &skb
->nh
.ipv6h
->daddr
,
1091 if (nsk
->state
!= TCP_TIME_WAIT
) {
1095 tcp_tw_put((struct tcp_tw_bucket
*)sk
);
1100 #if 0 /*def CONFIG_SYN_COOKIES*/
1101 if (!th
->rst
&& (th
->syn
|| th
->ack
))
1102 sk
= cookie_v6_check(sk
, skb
, &(IPCB(skb
)->opt
));
1107 static void tcp_v6_synq_add(struct sock
*sk
, struct open_request
*req
)
1109 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
;
1110 struct tcp_listen_opt
*lopt
= tp
->listen_opt
;
1111 unsigned h
= tcp_v6_synq_hash(&req
->af
.v6_req
.rmt_addr
, req
->rmt_port
);
1114 req
->expires
= jiffies
+ TCP_TIMEOUT_INIT
;
1117 req
->dl_next
= lopt
->syn_table
[h
];
1119 write_lock(&tp
->syn_wait_lock
);
1120 lopt
->syn_table
[h
] = req
;
1121 write_unlock(&tp
->syn_wait_lock
);
1127 /* FIXME: this is substantially similar to the ipv4 code.
1128 * Can some kind of merge be done? -- erics
1130 static int tcp_v6_conn_request(struct sock
*sk
, struct sk_buff
*skb
)
1133 struct open_request
*req
= NULL
;
1134 __u32 isn
= TCP_SKB_CB(skb
)->when
;
1136 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
1137 return tcp_v4_conn_request(sk
, skb
);
1139 /* FIXME: do the same check for anycast */
1140 if (ipv6_addr_is_multicast(&skb
->nh
.ipv6h
->daddr
))
1144 * There are no SYN attacks on IPv6, yet...
1146 if (tcp_synq_is_full(sk
) && !isn
) {
1147 if (net_ratelimit())
1148 printk(KERN_INFO
"TCPv6: dropping request, synflood is possible\n");
1152 if (tcp_acceptq_is_full(sk
) && tcp_synq_young(sk
) > 1)
1155 req
= tcp_openreq_alloc();
1159 tp
.tstamp_ok
= tp
.sack_ok
= tp
.wscale_ok
= tp
.snd_wscale
= 0;
1160 tp
.mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) - sizeof(struct ipv6hdr
);
1161 tp
.user_mss
= sk
->tp_pinfo
.af_tcp
.user_mss
;
1163 tcp_parse_options(NULL
, skb
->h
.th
, &tp
, 0);
1165 tcp_openreq_init(req
, &tp
, skb
);
1167 req
->class = &or_ipv6
;
1168 ipv6_addr_copy(&req
->af
.v6_req
.rmt_addr
, &skb
->nh
.ipv6h
->saddr
);
1169 ipv6_addr_copy(&req
->af
.v6_req
.loc_addr
, &skb
->nh
.ipv6h
->daddr
);
1170 req
->af
.v6_req
.pktopts
= NULL
;
1171 if (ipv6_opt_accepted(sk
, skb
) ||
1172 sk
->net_pinfo
.af_inet6
.rxopt
.bits
.rxinfo
||
1173 sk
->net_pinfo
.af_inet6
.rxopt
.bits
.rxhlim
) {
1174 atomic_inc(&skb
->users
);
1175 req
->af
.v6_req
.pktopts
= skb
;
1177 req
->af
.v6_req
.iif
= sk
->bound_dev_if
;
1179 /* So that link locals have meaning */
1180 if (!sk
->bound_dev_if
&& ipv6_addr_type(&req
->af
.v6_req
.rmt_addr
)&IPV6_ADDR_LINKLOCAL
)
1181 req
->af
.v6_req
.iif
= tcp_v6_iif(skb
);
1184 isn
= tcp_v6_init_sequence(sk
,skb
);
1188 if (tcp_v6_send_synack(sk
, req
, NULL
))
1191 tcp_v6_synq_add(sk
, req
);
1197 tcp_openreq_free(req
);
1199 TCP_INC_STATS_BH(TcpAttemptFails
);
1200 return 0; /* don't send reset */
1203 static struct sock
* tcp_v6_syn_recv_sock(struct sock
*sk
, struct sk_buff
*skb
,
1204 struct open_request
*req
,
1205 struct dst_entry
*dst
)
1207 struct ipv6_pinfo
*np
;
1209 struct tcp_opt
*newtp
;
1211 struct ipv6_txoptions
*opt
;
1213 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
1218 newsk
= tcp_v4_syn_recv_sock(sk
, skb
, req
, dst
);
1223 np
= &newsk
->net_pinfo
.af_inet6
;
1225 ipv6_addr_set(&np
->daddr
, 0, 0, __constant_htonl(0x0000FFFF),
1228 ipv6_addr_set(&np
->saddr
, 0, 0, __constant_htonl(0x0000FFFF),
1231 ipv6_addr_copy(&np
->rcv_saddr
, &np
->saddr
);
1233 newsk
->tp_pinfo
.af_tcp
.af_specific
= &ipv6_mapped
;
1234 newsk
->backlog_rcv
= tcp_v4_do_rcv
;
1235 newsk
->net_pinfo
.af_inet6
.pktoptions
= NULL
;
1236 newsk
->net_pinfo
.af_inet6
.opt
= NULL
;
1237 newsk
->net_pinfo
.af_inet6
.mcast_oif
= tcp_v6_iif(skb
);
1238 newsk
->net_pinfo
.af_inet6
.mcast_hops
= skb
->nh
.ipv6h
->hop_limit
;
1240 /* Charge newly allocated IPv6 socket. Though it is mapped,
1243 #ifdef INET_REFCNT_DEBUG
1244 atomic_inc(&inet6_sock_nr
);
1248 /* It is tricky place. Until this moment IPv4 tcp
1249 worked with IPv6 af_tcp.af_specific.
1252 tcp_sync_mss(newsk
, newsk
->tp_pinfo
.af_tcp
.pmtu_cookie
);
1257 opt
= sk
->net_pinfo
.af_inet6
.opt
;
1259 if (tcp_acceptq_is_full(sk
))
1262 if (sk
->net_pinfo
.af_inet6
.rxopt
.bits
.srcrt
== 2 &&
1263 opt
== NULL
&& req
->af
.v6_req
.pktopts
) {
1264 struct inet6_skb_parm
*rxopt
= (struct inet6_skb_parm
*)req
->af
.v6_req
.pktopts
->cb
;
1266 opt
= ipv6_invert_rthdr(sk
, (struct ipv6_rt_hdr
*)(req
->af
.v6_req
.pktopts
->nh
.raw
+rxopt
->srcrt
));
1270 fl
.proto
= IPPROTO_TCP
;
1271 fl
.nl_u
.ip6_u
.daddr
= &req
->af
.v6_req
.rmt_addr
;
1272 if (opt
&& opt
->srcrt
) {
1273 struct rt0_hdr
*rt0
= (struct rt0_hdr
*) opt
->srcrt
;
1274 fl
.nl_u
.ip6_u
.daddr
= rt0
->addr
;
1276 fl
.nl_u
.ip6_u
.saddr
= &req
->af
.v6_req
.loc_addr
;
1277 fl
.fl6_flowlabel
= 0;
1278 fl
.oif
= sk
->bound_dev_if
;
1279 fl
.uli_u
.ports
.dport
= req
->rmt_port
;
1280 fl
.uli_u
.ports
.sport
= sk
->sport
;
1282 dst
= ip6_route_output(sk
, &fl
);
1288 newsk
= tcp_create_openreq_child(sk
, req
, skb
);
1292 /* Charge newly allocated IPv6 socket */
1293 #ifdef INET_REFCNT_DEBUG
1294 atomic_inc(&inet6_sock_nr
);
1298 ip6_dst_store(newsk
, dst
, NULL
);
1300 newtp
= &(newsk
->tp_pinfo
.af_tcp
);
1302 np
= &newsk
->net_pinfo
.af_inet6
;
1303 ipv6_addr_copy(&np
->daddr
, &req
->af
.v6_req
.rmt_addr
);
1304 ipv6_addr_copy(&np
->saddr
, &req
->af
.v6_req
.loc_addr
);
1305 ipv6_addr_copy(&np
->rcv_saddr
, &req
->af
.v6_req
.loc_addr
);
1306 newsk
->bound_dev_if
= req
->af
.v6_req
.iif
;
1308 /* Now IPv6 options...
1310 First: no IPv4 options.
1312 newsk
->protinfo
.af_inet
.opt
= NULL
;
1315 np
->rxopt
.all
= sk
->net_pinfo
.af_inet6
.rxopt
.all
;
1317 /* Clone pktoptions received with SYN */
1318 np
->pktoptions
= NULL
;
1319 if (req
->af
.v6_req
.pktopts
) {
1320 np
->pktoptions
= skb_clone(req
->af
.v6_req
.pktopts
, GFP_ATOMIC
);
1321 kfree_skb(req
->af
.v6_req
.pktopts
);
1322 req
->af
.v6_req
.pktopts
= NULL
;
1324 skb_set_owner_r(np
->pktoptions
, newsk
);
1327 np
->mcast_oif
= tcp_v6_iif(skb
);
1328 np
->mcast_hops
= skb
->nh
.ipv6h
->hop_limit
;
1330 /* Clone native IPv6 options from listening socket (if any)
1332 Yes, keeping reference count would be much more clever,
1333 but we make one more one thing there: reattach optmem
1337 np
->opt
= ipv6_dup_options(newsk
, opt
);
1338 if (opt
!= sk
->net_pinfo
.af_inet6
.opt
)
1339 sock_kfree_s(sk
, opt
, opt
->tot_len
);
1342 newtp
->ext_header_len
= 0;
1344 newtp
->ext_header_len
= np
->opt
->opt_nflen
+ np
->opt
->opt_flen
;
1346 tcp_sync_mss(newsk
, dst
->pmtu
);
1347 tcp_initialize_rcv_mss(newsk
);
1348 newtp
->advmss
= dst
->advmss
;
1350 tcp_init_buffer_space(newsk
);
1352 newsk
->daddr
= LOOPBACK4_IPV6
;
1353 newsk
->saddr
= LOOPBACK4_IPV6
;
1354 newsk
->rcv_saddr
= LOOPBACK4_IPV6
;
1356 __tcp_v6_hash(newsk
);
1357 tcp_inherit_port(sk
, newsk
);
1362 NET_INC_STATS_BH(ListenOverflows
);
1364 NET_INC_STATS_BH(ListenDrops
);
1365 if (opt
&& opt
!= sk
->net_pinfo
.af_inet6
.opt
)
1366 sock_kfree_s(sk
, opt
, opt
->tot_len
);
1371 static int tcp_v6_checksum_init(struct sk_buff
*skb
)
1373 if (skb
->ip_summed
== CHECKSUM_HW
) {
1374 if (tcp_v6_check(skb
->h
.th
,skb
->len
,&skb
->nh
.ipv6h
->saddr
,
1375 &skb
->nh
.ipv6h
->daddr
,skb
->csum
)) {
1376 NETDEBUG(printk(KERN_DEBUG
"hw tcp v6 csum failed\n"));
1379 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1380 } else if (skb
->ip_summed
!= CHECKSUM_UNNECESSARY
) {
1381 if (skb
->len
<= 68) {
1382 if (tcp_v6_check(skb
->h
.th
,skb
->len
,&skb
->nh
.ipv6h
->saddr
,
1383 &skb
->nh
.ipv6h
->daddr
,csum_partial((char *)skb
->h
.th
, skb
->len
, 0)))
1385 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1387 skb
->csum
= ~tcp_v6_check(skb
->h
.th
,skb
->len
,&skb
->nh
.ipv6h
->saddr
,
1388 &skb
->nh
.ipv6h
->daddr
,0);
1394 /* The socket must have it's spinlock held when we get
1397 * We have a potential double-lock case here, so even when
1398 * doing backlog processing we use the BH locking scheme.
1399 * This is because we cannot sleep with the original spinlock
1402 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1404 #ifdef CONFIG_FILTER
1405 struct sk_filter
*filter
;
1409 /* Imagine: socket is IPv6. IPv4 packet arrives,
1410 goes to IPv4 receive handler and backlogged.
1411 From backlog it always goes here. Kerboom...
1412 Fortunately, tcp_rcv_established and rcv_established
1413 handle them correctly, but it is not case with
1414 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1417 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
1418 return tcp_v4_do_rcv(sk
, skb
);
1420 #ifdef CONFIG_FILTER
1421 filter
= sk
->filter
;
1422 if (filter
&& sk_filter(skb
, filter
))
1424 #endif /* CONFIG_FILTER */
1427 * socket locking is here for SMP purposes as backlog rcv
1428 * is currently called with bh processing disabled.
1431 IP6_INC_STATS_BH(Ip6InDelivers
);
1433 /* Do Stevens' IPV6_PKTOPTIONS.
1435 Yes, guys, it is the only place in our code, where we
1436 may make it not affecting IPv4.
1437 The rest of code is protocol independent,
1438 and I do not like idea to uglify IPv4.
1440 Actually, all the idea behind IPV6_PKTOPTIONS
1441 looks not very well thought. For now we latch
1442 options, received in the last packet, enqueued
1443 by tcp. Feel free to propose better solution.
1446 if (sk
->net_pinfo
.af_inet6
.rxopt
.all
) {
1447 users
= atomic_read(&skb
->users
);
1448 atomic_inc(&skb
->users
);
1451 if (sk
->state
== TCP_ESTABLISHED
) { /* Fast path */
1452 TCP_CHECK_TIMER(sk
);
1453 if (tcp_rcv_established(sk
, skb
, skb
->h
.th
, skb
->len
))
1455 TCP_CHECK_TIMER(sk
);
1457 goto ipv6_pktoptions
;
1461 if (tcp_checksum_complete(skb
))
1464 if (sk
->state
== TCP_LISTEN
) {
1465 struct sock
*nsk
= tcp_v6_hnd_req(sk
, skb
);
1470 * Queue it on the new socket if the new socket is active,
1471 * otherwise we just shortcircuit this and continue with
1475 if (tcp_child_process(sk
, nsk
, skb
))
1483 TCP_CHECK_TIMER(sk
);
1484 if (tcp_rcv_state_process(sk
, skb
, skb
->h
.th
, skb
->len
))
1486 TCP_CHECK_TIMER(sk
);
1488 goto ipv6_pktoptions
;
1492 tcp_v6_send_reset(skb
);
1499 TCP_INC_STATS_BH(TcpInErrs
);
1504 /* Do you ask, what is it?
1506 1. skb was enqueued by tcp.
1507 2. skb is added to tail of read queue, rather than out of order.
1508 3. socket is not in passive state.
1509 4. Finally, it really contains options, which user wants to receive.
1511 if (atomic_read(&skb
->users
) > users
&&
1512 TCP_SKB_CB(skb
)->end_seq
== sk
->tp_pinfo
.af_tcp
.rcv_nxt
&&
1513 !((1<<sk
->state
)&(TCPF_CLOSE
|TCPF_LISTEN
))) {
1514 if (sk
->net_pinfo
.af_inet6
.rxopt
.bits
.rxinfo
)
1515 sk
->net_pinfo
.af_inet6
.mcast_oif
= tcp_v6_iif(skb
);
1516 if (sk
->net_pinfo
.af_inet6
.rxopt
.bits
.rxhlim
)
1517 sk
->net_pinfo
.af_inet6
.mcast_hops
= skb
->nh
.ipv6h
->hop_limit
;
1518 if (ipv6_opt_accepted(sk
, skb
)) {
1519 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1523 skb_set_owner_r(skb2
, sk
);
1524 skb
= xchg(&sk
->net_pinfo
.af_inet6
.pktoptions
, skb2
);
1528 skb
= xchg(&sk
->net_pinfo
.af_inet6
.pktoptions
, NULL
);
1537 int tcp_v6_rcv(struct sk_buff
*skb
, unsigned long len
)
1541 struct in6_addr
*saddr
= &skb
->nh
.ipv6h
->saddr
;
1542 struct in6_addr
*daddr
= &skb
->nh
.ipv6h
->daddr
;
1547 if (skb
->pkt_type
!= PACKET_HOST
)
1551 * Pull up the IP header.
1554 __skb_pull(skb
, skb
->h
.raw
- skb
->data
);
1557 * Count it even if it's bad.
1560 TCP_INC_STATS_BH(TcpInSegs
);
1562 if (len
< sizeof(struct tcphdr
))
1565 if (tcp_v6_checksum_init(skb
) < 0)
1568 TCP_SKB_CB(skb
)->seq
= ntohl(th
->seq
);
1569 TCP_SKB_CB(skb
)->end_seq
= (TCP_SKB_CB(skb
)->seq
+ th
->syn
+ th
->fin
+
1571 TCP_SKB_CB(skb
)->ack_seq
= ntohl(th
->ack_seq
);
1572 TCP_SKB_CB(skb
)->when
= 0;
1575 sk
= __tcp_v6_lookup(saddr
, th
->source
, daddr
, ntohs(th
->dest
), tcp_v6_iif(skb
));
1581 if(!ipsec_sk_policy(sk
,skb
))
1582 goto discard_and_relse
;
1583 if(sk
->state
== TCP_TIME_WAIT
)
1588 if (!sk
->lock
.users
) {
1589 if (!tcp_prequeue(sk
, skb
))
1590 ret
= tcp_v6_do_rcv(sk
, skb
);
1592 sk_add_backlog(sk
, skb
);
1599 if (tcp_checksum_complete(skb
)) {
1601 TCP_INC_STATS_BH(TcpInErrs
);
1603 tcp_v6_send_reset(skb
);
1620 if (tcp_checksum_complete(skb
)) {
1621 TCP_INC_STATS_BH(TcpInErrs
);
1626 switch(tcp_timewait_state_process((struct tcp_tw_bucket
*)sk
,
1627 skb
, th
, skb
->len
)) {
1632 sk2
= tcp_v6_lookup_listener(&skb
->nh
.ipv6h
->daddr
, ntohs(th
->dest
), tcp_v6_iif(skb
));
1634 tcp_tw_deschedule((struct tcp_tw_bucket
*)sk
);
1635 tcp_timewait_kill((struct tcp_tw_bucket
*)sk
);
1636 tcp_tw_put((struct tcp_tw_bucket
*)sk
);
1640 /* Fall through to ACK */
1643 tcp_v6_timewait_ack(sk
, skb
);
1647 case TCP_TW_SUCCESS
:
1652 static int tcp_v6_rebuild_header(struct sock
*sk
)
1655 struct dst_entry
*dst
;
1656 struct ipv6_pinfo
*np
= &sk
->net_pinfo
.af_inet6
;
1658 dst
= __sk_dst_check(sk
, np
->dst_cookie
);
1663 fl
.proto
= IPPROTO_TCP
;
1664 fl
.nl_u
.ip6_u
.daddr
= &np
->daddr
;
1665 fl
.nl_u
.ip6_u
.saddr
= &np
->saddr
;
1666 fl
.fl6_flowlabel
= np
->flow_label
;
1667 fl
.oif
= sk
->bound_dev_if
;
1668 fl
.uli_u
.ports
.dport
= sk
->dport
;
1669 fl
.uli_u
.ports
.sport
= sk
->sport
;
1671 if (np
->opt
&& np
->opt
->srcrt
) {
1672 struct rt0_hdr
*rt0
= (struct rt0_hdr
*) np
->opt
->srcrt
;
1673 fl
.nl_u
.ip6_u
.daddr
= rt0
->addr
;
1676 dst
= ip6_route_output(sk
, &fl
);
1684 ip6_dst_store(sk
, dst
, NULL
);
1690 static int tcp_v6_xmit(struct sk_buff
*skb
)
1692 struct sock
*sk
= skb
->sk
;
1693 struct ipv6_pinfo
* np
= &sk
->net_pinfo
.af_inet6
;
1695 struct dst_entry
*dst
;
1697 fl
.proto
= IPPROTO_TCP
;
1698 fl
.fl6_dst
= &np
->daddr
;
1699 fl
.fl6_src
= &np
->saddr
;
1700 fl
.fl6_flowlabel
= np
->flow_label
;
1701 fl
.oif
= sk
->bound_dev_if
;
1702 fl
.uli_u
.ports
.sport
= sk
->sport
;
1703 fl
.uli_u
.ports
.dport
= sk
->dport
;
1705 if (np
->opt
&& np
->opt
->srcrt
) {
1706 struct rt0_hdr
*rt0
= (struct rt0_hdr
*) np
->opt
->srcrt
;
1707 fl
.nl_u
.ip6_u
.daddr
= rt0
->addr
;
1710 dst
= __sk_dst_check(sk
, np
->dst_cookie
);
1713 dst
= ip6_route_output(sk
, &fl
);
1716 sk
->err_soft
= -dst
->error
;
1718 return -sk
->err_soft
;
1721 ip6_dst_store(sk
, dst
, NULL
);
1724 skb
->dst
= dst_clone(dst
);
1726 /* Restore final destination back after routing done */
1727 fl
.nl_u
.ip6_u
.daddr
= &np
->daddr
;
1729 return ip6_xmit(sk
, skb
, &fl
, np
->opt
);
1732 static void v6_addr2sockaddr(struct sock
*sk
, struct sockaddr
* uaddr
)
1734 struct ipv6_pinfo
* np
= &sk
->net_pinfo
.af_inet6
;
1735 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*) uaddr
;
1737 sin6
->sin6_family
= AF_INET6
;
1738 memcpy(&sin6
->sin6_addr
, &np
->daddr
, sizeof(struct in6_addr
));
1739 sin6
->sin6_port
= sk
->dport
;
1740 /* We do not store received flowlabel for TCP */
1741 sin6
->sin6_flowinfo
= 0;
1742 sin6
->sin6_scope_id
= 0;
1743 if (sk
->bound_dev_if
&& ipv6_addr_type(&sin6
->sin6_addr
)&IPV6_ADDR_LINKLOCAL
)
1744 sin6
->sin6_scope_id
= sk
->bound_dev_if
;
1747 static int tcp_v6_remember_stamp(struct sock
*sk
)
1749 /* Alas, not yet... */
1753 static struct tcp_func ipv6_specific
= {
1756 tcp_v6_rebuild_header
,
1757 tcp_v6_conn_request
,
1758 tcp_v6_syn_recv_sock
,
1759 tcp_v6_hash_connecting
,
1760 tcp_v6_remember_stamp
,
1761 sizeof(struct ipv6hdr
),
1766 sizeof(struct sockaddr_in6
)
1770 * TCP over IPv4 via INET6 API
1773 static struct tcp_func ipv6_mapped
= {
1776 tcp_v4_rebuild_header
,
1777 tcp_v6_conn_request
,
1778 tcp_v6_syn_recv_sock
,
1779 tcp_v4_hash_connecting
,
1780 tcp_v4_remember_stamp
,
1781 sizeof(struct iphdr
),
1786 sizeof(struct sockaddr_in6
)
1791 /* NOTE: A lot of things set to zero explicitly by call to
1792 * sk_alloc() so need not be done here.
1794 static int tcp_v6_init_sock(struct sock
*sk
)
1796 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1798 skb_queue_head_init(&tp
->out_of_order_queue
);
1799 tcp_init_xmit_timers(sk
);
1800 tcp_prequeue_init(tp
);
1802 tp
->rto
= TCP_TIMEOUT_INIT
;
1803 tp
->mdev
= TCP_TIMEOUT_INIT
;
1805 /* So many TCP implementations out there (incorrectly) count the
1806 * initial SYN frame in their delayed-ACK and congestion control
1807 * algorithms that we must have the following bandaid to talk
1808 * efficiently to them. -DaveM
1812 /* See draft-stevens-tcpca-spec-01 for discussion of the
1813 * initialization of these values.
1815 tp
->snd_ssthresh
= 0x7fffffff;
1816 tp
->snd_cwnd_clamp
= ~0;
1817 tp
->mss_cache
= 536;
1819 sk
->state
= TCP_CLOSE
;
1821 sk
->tp_pinfo
.af_tcp
.af_specific
= &ipv6_specific
;
1823 sk
->write_space
= tcp_write_space
;
1828 static int tcp_v6_destroy_sock(struct sock
*sk
)
1830 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1832 tcp_clear_xmit_timers(sk
);
1834 /* Cleanup up the write buffer. */
1835 __skb_queue_purge(&sk
->write_queue
);
1837 /* Cleans up our, hopefuly empty, out_of_order_queue. */
1838 __skb_queue_purge(&tp
->out_of_order_queue
);
1840 /* Clean prequeue, it must be empty really */
1841 __skb_queue_purge(&tp
->ucopy
.prequeue
);
1843 /* Clean up a referenced TCP bind bucket. */
1844 if(sk
->prev
!= NULL
)
1847 return inet6_destroy_sock(sk
);
1850 /* Proc filesystem TCPv6 sock list dumping. */
1851 static void get_openreq6(struct sock
*sk
, struct open_request
*req
, char *tmpbuf
, int i
)
1853 struct in6_addr
*dest
, *src
;
1854 int ttd
= req
->expires
- jiffies
;
1859 src
= &req
->af
.v6_req
.loc_addr
;
1860 dest
= &req
->af
.v6_req
.rmt_addr
;
1862 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1863 "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
1865 src
->s6_addr32
[0], src
->s6_addr32
[1],
1866 src
->s6_addr32
[2], src
->s6_addr32
[3],
1868 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1869 dest
->s6_addr32
[2], dest
->s6_addr32
[3],
1870 ntohs(req
->rmt_port
),
1872 0,0, /* could print option size, but that is af dependent. */
1873 1, /* timers active (only the expire timer) */
1876 sk
->socket
? sk
->socket
->inode
->i_uid
: 0,
1877 0, /* non standard timer */
1878 0, /* open_requests have no inode */
1882 static void get_tcp6_sock(struct sock
*sp
, char *tmpbuf
, int i
)
1884 struct in6_addr
*dest
, *src
;
1887 unsigned long timer_expires
;
1888 struct tcp_opt
*tp
= &sp
->tp_pinfo
.af_tcp
;
1890 dest
= &sp
->net_pinfo
.af_inet6
.daddr
;
1891 src
= &sp
->net_pinfo
.af_inet6
.rcv_saddr
;
1892 destp
= ntohs(sp
->dport
);
1893 srcp
= ntohs(sp
->sport
);
1895 timer_expires
= (unsigned) -1;
1896 if (tp
->retransmit_timer
.prev
!= NULL
&& tp
->retransmit_timer
.expires
< timer_expires
) {
1898 timer_expires
= tp
->retransmit_timer
.expires
;
1899 } else if (tp
->probe_timer
.prev
!= NULL
&& tp
->probe_timer
.expires
< timer_expires
) {
1901 timer_expires
= tp
->probe_timer
.expires
;
1903 if (sp
->timer
.prev
!= NULL
&& sp
->timer
.expires
< timer_expires
) {
1905 timer_expires
= sp
->timer
.expires
;
1907 if(timer_active
== 0)
1908 timer_expires
= jiffies
;
1911 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1912 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p %u %u %u %u",
1914 src
->s6_addr32
[0], src
->s6_addr32
[1],
1915 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1916 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1917 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1919 tp
->write_seq
-tp
->snd_una
, tp
->rcv_nxt
-tp
->copied_seq
,
1920 timer_active
, timer_expires
-jiffies
,
1922 sp
->socket
? sp
->socket
->inode
->i_uid
: 0,
1924 sp
->socket
? sp
->socket
->inode
->i_ino
: 0,
1925 atomic_read(&sp
->refcnt
), sp
,
1926 tp
->rto
, tp
->ack
.ato
, tp
->ack
.quick
, tp
->ack
.pingpong
1930 static void get_timewait6_sock(struct tcp_tw_bucket
*tw
, char *tmpbuf
, int i
)
1932 struct in6_addr
*dest
, *src
;
1934 int ttd
= tw
->ttd
- jiffies
;
1939 dest
= &tw
->v6_daddr
;
1940 src
= &tw
->v6_rcv_saddr
;
1941 destp
= ntohs(tw
->dport
);
1942 srcp
= ntohs(tw
->sport
);
1945 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1946 "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
1948 src
->s6_addr32
[0], src
->s6_addr32
[1],
1949 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1950 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1951 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1954 atomic_read(&tw
->refcnt
), tw
);
1957 #define LINE_LEN 190
1958 #define LINE_FMT "%-190s\n"
1960 int tcp6_get_info(char *buffer
, char **start
, off_t offset
, int length
)
1962 int len
= 0, num
= 0, i
;
1963 off_t begin
, pos
= 0;
1964 char tmpbuf
[LINE_LEN
+2];
1966 if(offset
< LINE_LEN
+1)
1967 len
+= sprintf(buffer
, LINE_FMT
,
1969 "local_address " /* 38 */
1970 "remote_address " /* 38 */
1971 "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
1972 " uid timeout inode"); /* 21 */
1978 /* First, walk listening socket table. */
1980 for(i
= 0; i
< TCP_LHTABLE_SIZE
; i
++) {
1981 struct sock
*sk
= tcp_listening_hash
[i
];
1982 struct tcp_listen_opt
*lopt
;
1985 for (sk
= tcp_listening_hash
[i
]; sk
; sk
= sk
->next
, num
++) {
1986 struct open_request
*req
;
1987 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1989 if (sk
->family
!= PF_INET6
)
1992 if (pos
>= offset
) {
1993 get_tcp6_sock(sk
, tmpbuf
, num
);
1994 len
+= sprintf(buffer
+len
, LINE_FMT
, tmpbuf
);
1995 if (len
>= length
) {
1996 tcp_listen_unlock();
2001 read_lock_bh(&tp
->syn_wait_lock
);
2002 lopt
= tp
->listen_opt
;
2003 if (lopt
&& lopt
->qlen
!= 0) {
2004 for (k
=0; k
<TCP_SYNQ_HSIZE
; k
++) {
2005 for (req
= lopt
->syn_table
[k
]; req
; req
= req
->dl_next
, num
++) {
2006 if (req
->class->family
!= PF_INET6
)
2011 get_openreq6(sk
, req
, tmpbuf
, num
);
2012 len
+= sprintf(buffer
+len
, LINE_FMT
, tmpbuf
);
2014 read_unlock_bh(&tp
->syn_wait_lock
);
2015 tcp_listen_unlock();
2021 read_unlock_bh(&tp
->syn_wait_lock
);
2023 /* Completed requests are in normal socket hash table */
2026 tcp_listen_unlock();
2030 /* Next, walk established hash chain. */
2031 for (i
= 0; i
< tcp_ehash_size
; i
++) {
2032 struct tcp_ehash_bucket
*head
= &tcp_ehash
[i
];
2034 struct tcp_tw_bucket
*tw
;
2036 read_lock(&head
->lock
);
2037 for(sk
= head
->chain
; sk
; sk
= sk
->next
, num
++) {
2038 if (sk
->family
!= PF_INET6
)
2043 get_tcp6_sock(sk
, tmpbuf
, num
);
2044 len
+= sprintf(buffer
+len
, LINE_FMT
, tmpbuf
);
2046 read_unlock(&head
->lock
);
2050 for (tw
= (struct tcp_tw_bucket
*)tcp_ehash
[i
+tcp_ehash_size
].chain
;
2052 tw
= (struct tcp_tw_bucket
*)tw
->next
, num
++) {
2053 if (tw
->family
!= PF_INET6
)
2058 get_timewait6_sock(tw
, tmpbuf
, num
);
2059 len
+= sprintf(buffer
+len
, LINE_FMT
, tmpbuf
);
2061 read_unlock(&head
->lock
);
2065 read_unlock(&head
->lock
);
2072 begin
= len
- (pos
- offset
);
2073 *start
= buffer
+ begin
;
2082 struct proto tcpv6_prot
= {
2083 tcp_close
, /* close */
2084 tcp_v6_connect
, /* connect */
2085 tcp_disconnect
, /* disconnect */
2086 tcp_accept
, /* accept */
2087 tcp_ioctl
, /* ioctl */
2088 tcp_v6_init_sock
, /* init */
2089 tcp_v6_destroy_sock
, /* destroy */
2090 tcp_shutdown
, /* shutdown */
2091 tcp_setsockopt
, /* setsockopt */
2092 tcp_getsockopt
, /* getsockopt */
2093 tcp_sendmsg
, /* sendmsg */
2094 tcp_recvmsg
, /* recvmsg */
2096 tcp_v6_do_rcv
, /* backlog_rcv */
2097 tcp_v6_hash
, /* hash */
2098 tcp_unhash
, /* unhash */
2099 tcp_v6_get_port
, /* get_port */
2103 static struct inet6_protocol tcpv6_protocol
=
2105 tcp_v6_rcv
, /* TCP handler */
2106 tcp_v6_err
, /* TCP error control */
2108 IPPROTO_TCP
, /* protocol ID */
2114 void __init
tcpv6_init(void)
2116 /* register inet6 protocol */
2117 inet6_add_protocol(&tcpv6_protocol
);