[IPV6]: Generalise tcp_v6_search_req & tcp_v6_synq_add
[linux-2.6/x86.git] / net / ipv6 / tcp_ipv6.c
blob5a10d30cec4a9ad265f030a37fd77a2001dab304
1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/inet6_connection_sock.h>
52 #include <net/ipv6.h>
53 #include <net/transp_v6.h>
54 #include <net/addrconf.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_checksum.h>
57 #include <net/inet_ecn.h>
58 #include <net/protocol.h>
59 #include <net/xfrm.h>
60 #include <net/addrconf.h>
61 #include <net/snmp.h>
62 #include <net/dsfield.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 static void tcp_v6_send_reset(struct sk_buff *skb);
70 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
71 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 struct sk_buff *skb);
74 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
75 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
77 static struct tcp_func ipv6_mapped;
78 static struct tcp_func ipv6_specific;
80 int inet6_csk_bind_conflict(const struct sock *sk,
81 const struct inet_bind_bucket *tb)
83 const struct sock *sk2;
84 const struct hlist_node *node;
86 /* We must walk the whole port owner list in this case. -DaveM */
87 sk_for_each_bound(sk2, node, &tb->owners) {
88 if (sk != sk2 &&
89 (!sk->sk_bound_dev_if ||
90 !sk2->sk_bound_dev_if ||
91 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
92 (!sk->sk_reuse || !sk2->sk_reuse ||
93 sk2->sk_state == TCP_LISTEN) &&
94 ipv6_rcv_saddr_equal(sk, sk2))
95 break;
98 return node != NULL;
101 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
103 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
104 inet6_csk_bind_conflict);
107 static void tcp_v6_hash(struct sock *sk)
109 if (sk->sk_state != TCP_CLOSE) {
110 struct tcp_sock *tp = tcp_sk(sk);
112 if (tp->af_specific == &ipv6_mapped) {
113 tcp_prot.hash(sk);
114 return;
116 local_bh_disable();
117 __inet6_hash(&tcp_hashinfo, sk);
118 local_bh_enable();
122 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
123 struct in6_addr *saddr,
124 struct in6_addr *daddr,
125 unsigned long base)
127 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
130 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
132 if (skb->protocol == htons(ETH_P_IPV6)) {
133 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
134 skb->nh.ipv6h->saddr.s6_addr32,
135 skb->h.th->dest,
136 skb->h.th->source);
137 } else {
138 return secure_tcp_sequence_number(skb->nh.iph->daddr,
139 skb->nh.iph->saddr,
140 skb->h.th->dest,
141 skb->h.th->source);
145 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
146 struct inet_timewait_sock **twp)
148 struct inet_sock *inet = inet_sk(sk);
149 const struct ipv6_pinfo *np = inet6_sk(sk);
150 const struct in6_addr *daddr = &np->rcv_saddr;
151 const struct in6_addr *saddr = &np->daddr;
152 const int dif = sk->sk_bound_dev_if;
153 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
154 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
155 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
156 struct sock *sk2;
157 const struct hlist_node *node;
158 struct inet_timewait_sock *tw;
160 prefetch(head->chain.first);
161 write_lock(&head->lock);
163 /* Check TIME-WAIT sockets first. */
164 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
165 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
167 tw = inet_twsk(sk2);
169 if(*((__u32 *)&(tw->tw_dport)) == ports &&
170 sk2->sk_family == PF_INET6 &&
171 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
172 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
173 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
174 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
175 struct tcp_sock *tp = tcp_sk(sk);
177 if (tcptw->tw_ts_recent_stamp &&
178 (!twp ||
179 (sysctl_tcp_tw_reuse &&
180 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
181 /* See comment in tcp_ipv4.c */
182 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
183 if (!tp->write_seq)
184 tp->write_seq = 1;
185 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
186 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
187 sock_hold(sk2);
188 goto unique;
189 } else
190 goto not_unique;
193 tw = NULL;
195 /* And established part... */
196 sk_for_each(sk2, node, &head->chain) {
197 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
198 goto not_unique;
201 unique:
202 BUG_TRAP(sk_unhashed(sk));
203 __sk_add_node(sk, &head->chain);
204 sk->sk_hash = hash;
205 sock_prot_inc_use(sk->sk_prot);
206 write_unlock(&head->lock);
208 if (twp) {
209 *twp = tw;
210 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
211 } else if (tw) {
212 /* Silly. Should hash-dance instead... */
213 inet_twsk_deschedule(tw, &tcp_death_row);
214 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
216 inet_twsk_put(tw);
218 return 0;
220 not_unique:
221 write_unlock(&head->lock);
222 return -EADDRNOTAVAIL;
225 static inline u32 tcpv6_port_offset(const struct sock *sk)
227 const struct inet_sock *inet = inet_sk(sk);
228 const struct ipv6_pinfo *np = inet6_sk(sk);
230 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
231 np->daddr.s6_addr32,
232 inet->dport);
235 static int tcp_v6_hash_connect(struct sock *sk)
237 unsigned short snum = inet_sk(sk)->num;
238 struct inet_bind_hashbucket *head;
239 struct inet_bind_bucket *tb;
240 int ret;
242 if (!snum) {
243 int low = sysctl_local_port_range[0];
244 int high = sysctl_local_port_range[1];
245 int range = high - low;
246 int i;
247 int port;
248 static u32 hint;
249 u32 offset = hint + tcpv6_port_offset(sk);
250 struct hlist_node *node;
251 struct inet_timewait_sock *tw = NULL;
253 local_bh_disable();
254 for (i = 1; i <= range; i++) {
255 port = low + (i + offset) % range;
256 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
257 spin_lock(&head->lock);
259 /* Does not bother with rcv_saddr checks,
260 * because the established check is already
261 * unique enough.
263 inet_bind_bucket_for_each(tb, node, &head->chain) {
264 if (tb->port == port) {
265 BUG_TRAP(!hlist_empty(&tb->owners));
266 if (tb->fastreuse >= 0)
267 goto next_port;
268 if (!__tcp_v6_check_established(sk,
269 port,
270 &tw))
271 goto ok;
272 goto next_port;
276 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
277 if (!tb) {
278 spin_unlock(&head->lock);
279 break;
281 tb->fastreuse = -1;
282 goto ok;
284 next_port:
285 spin_unlock(&head->lock);
287 local_bh_enable();
289 return -EADDRNOTAVAIL;
292 hint += i;
294 /* Head lock still held and bh's disabled */
295 inet_bind_hash(sk, tb, port);
296 if (sk_unhashed(sk)) {
297 inet_sk(sk)->sport = htons(port);
298 __inet6_hash(&tcp_hashinfo, sk);
300 spin_unlock(&head->lock);
302 if (tw) {
303 inet_twsk_deschedule(tw, &tcp_death_row);
304 inet_twsk_put(tw);
307 ret = 0;
308 goto out;
311 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
312 tb = inet_csk(sk)->icsk_bind_hash;
313 spin_lock_bh(&head->lock);
315 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
316 __inet6_hash(&tcp_hashinfo, sk);
317 spin_unlock_bh(&head->lock);
318 return 0;
319 } else {
320 spin_unlock(&head->lock);
321 /* No definite answer... Walk to established hash table */
322 ret = __tcp_v6_check_established(sk, snum, NULL);
323 out:
324 local_bh_enable();
325 return ret;
329 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
330 int addr_len)
332 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
333 struct inet_sock *inet = inet_sk(sk);
334 struct ipv6_pinfo *np = inet6_sk(sk);
335 struct tcp_sock *tp = tcp_sk(sk);
336 struct in6_addr *saddr = NULL, *final_p = NULL, final;
337 struct flowi fl;
338 struct dst_entry *dst;
339 int addr_type;
340 int err;
342 if (addr_len < SIN6_LEN_RFC2133)
343 return -EINVAL;
345 if (usin->sin6_family != AF_INET6)
346 return(-EAFNOSUPPORT);
348 memset(&fl, 0, sizeof(fl));
350 if (np->sndflow) {
351 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
352 IP6_ECN_flow_init(fl.fl6_flowlabel);
353 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
354 struct ip6_flowlabel *flowlabel;
355 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
356 if (flowlabel == NULL)
357 return -EINVAL;
358 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
359 fl6_sock_release(flowlabel);
364 * connect() to INADDR_ANY means loopback (BSD'ism).
367 if(ipv6_addr_any(&usin->sin6_addr))
368 usin->sin6_addr.s6_addr[15] = 0x1;
370 addr_type = ipv6_addr_type(&usin->sin6_addr);
372 if(addr_type & IPV6_ADDR_MULTICAST)
373 return -ENETUNREACH;
375 if (addr_type&IPV6_ADDR_LINKLOCAL) {
376 if (addr_len >= sizeof(struct sockaddr_in6) &&
377 usin->sin6_scope_id) {
378 /* If interface is set while binding, indices
379 * must coincide.
381 if (sk->sk_bound_dev_if &&
382 sk->sk_bound_dev_if != usin->sin6_scope_id)
383 return -EINVAL;
385 sk->sk_bound_dev_if = usin->sin6_scope_id;
388 /* Connect to link-local address requires an interface */
389 if (!sk->sk_bound_dev_if)
390 return -EINVAL;
393 if (tp->rx_opt.ts_recent_stamp &&
394 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
395 tp->rx_opt.ts_recent = 0;
396 tp->rx_opt.ts_recent_stamp = 0;
397 tp->write_seq = 0;
400 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
401 np->flow_label = fl.fl6_flowlabel;
404 * TCP over IPv4
407 if (addr_type == IPV6_ADDR_MAPPED) {
408 u32 exthdrlen = tp->ext_header_len;
409 struct sockaddr_in sin;
411 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
413 if (__ipv6_only_sock(sk))
414 return -ENETUNREACH;
416 sin.sin_family = AF_INET;
417 sin.sin_port = usin->sin6_port;
418 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
420 tp->af_specific = &ipv6_mapped;
421 sk->sk_backlog_rcv = tcp_v4_do_rcv;
423 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
425 if (err) {
426 tp->ext_header_len = exthdrlen;
427 tp->af_specific = &ipv6_specific;
428 sk->sk_backlog_rcv = tcp_v6_do_rcv;
429 goto failure;
430 } else {
431 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
432 inet->saddr);
433 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
434 inet->rcv_saddr);
437 return err;
440 if (!ipv6_addr_any(&np->rcv_saddr))
441 saddr = &np->rcv_saddr;
443 fl.proto = IPPROTO_TCP;
444 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
445 ipv6_addr_copy(&fl.fl6_src,
446 (saddr ? saddr : &np->saddr));
447 fl.oif = sk->sk_bound_dev_if;
448 fl.fl_ip_dport = usin->sin6_port;
449 fl.fl_ip_sport = inet->sport;
451 if (np->opt && np->opt->srcrt) {
452 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
453 ipv6_addr_copy(&final, &fl.fl6_dst);
454 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
455 final_p = &final;
458 err = ip6_dst_lookup(sk, &dst, &fl);
459 if (err)
460 goto failure;
461 if (final_p)
462 ipv6_addr_copy(&fl.fl6_dst, final_p);
464 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
465 goto failure;
467 if (saddr == NULL) {
468 saddr = &fl.fl6_src;
469 ipv6_addr_copy(&np->rcv_saddr, saddr);
472 /* set the source address */
473 ipv6_addr_copy(&np->saddr, saddr);
474 inet->rcv_saddr = LOOPBACK4_IPV6;
476 ip6_dst_store(sk, dst, NULL);
477 sk->sk_route_caps = dst->dev->features &
478 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
480 tp->ext_header_len = 0;
481 if (np->opt)
482 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
484 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
486 inet->dport = usin->sin6_port;
488 tcp_set_state(sk, TCP_SYN_SENT);
489 err = tcp_v6_hash_connect(sk);
490 if (err)
491 goto late_failure;
493 if (!tp->write_seq)
494 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
495 np->daddr.s6_addr32,
496 inet->sport,
497 inet->dport);
499 err = tcp_connect(sk);
500 if (err)
501 goto late_failure;
503 return 0;
505 late_failure:
506 tcp_set_state(sk, TCP_CLOSE);
507 __sk_dst_reset(sk);
508 failure:
509 inet->dport = 0;
510 sk->sk_route_caps = 0;
511 return err;
514 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
515 int type, int code, int offset, __u32 info)
517 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
518 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
519 struct ipv6_pinfo *np;
520 struct sock *sk;
521 int err;
522 struct tcp_sock *tp;
523 __u32 seq;
525 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
526 th->source, skb->dev->ifindex);
528 if (sk == NULL) {
529 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
530 return;
533 if (sk->sk_state == TCP_TIME_WAIT) {
534 inet_twsk_put((struct inet_timewait_sock *)sk);
535 return;
538 bh_lock_sock(sk);
539 if (sock_owned_by_user(sk))
540 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
542 if (sk->sk_state == TCP_CLOSE)
543 goto out;
545 tp = tcp_sk(sk);
546 seq = ntohl(th->seq);
547 if (sk->sk_state != TCP_LISTEN &&
548 !between(seq, tp->snd_una, tp->snd_nxt)) {
549 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
550 goto out;
553 np = inet6_sk(sk);
555 if (type == ICMPV6_PKT_TOOBIG) {
556 struct dst_entry *dst = NULL;
558 if (sock_owned_by_user(sk))
559 goto out;
560 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
561 goto out;
563 /* icmp should have updated the destination cache entry */
564 dst = __sk_dst_check(sk, np->dst_cookie);
566 if (dst == NULL) {
567 struct inet_sock *inet = inet_sk(sk);
568 struct flowi fl;
570 /* BUGGG_FUTURE: Again, it is not clear how
571 to handle rthdr case. Ignore this complexity
572 for now.
574 memset(&fl, 0, sizeof(fl));
575 fl.proto = IPPROTO_TCP;
576 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
577 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
578 fl.oif = sk->sk_bound_dev_if;
579 fl.fl_ip_dport = inet->dport;
580 fl.fl_ip_sport = inet->sport;
582 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
583 sk->sk_err_soft = -err;
584 goto out;
587 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
588 sk->sk_err_soft = -err;
589 goto out;
592 } else
593 dst_hold(dst);
595 if (tp->pmtu_cookie > dst_mtu(dst)) {
596 tcp_sync_mss(sk, dst_mtu(dst));
597 tcp_simple_retransmit(sk);
598 } /* else let the usual retransmit timer handle it */
599 dst_release(dst);
600 goto out;
603 icmpv6_err_convert(type, code, &err);
605 /* Might be for an request_sock */
606 switch (sk->sk_state) {
607 struct request_sock *req, **prev;
608 case TCP_LISTEN:
609 if (sock_owned_by_user(sk))
610 goto out;
612 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
613 &hdr->saddr, inet6_iif(skb));
614 if (!req)
615 goto out;
617 /* ICMPs are not backlogged, hence we cannot get
618 * an established socket here.
620 BUG_TRAP(req->sk == NULL);
622 if (seq != tcp_rsk(req)->snt_isn) {
623 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
624 goto out;
627 inet_csk_reqsk_queue_drop(sk, req, prev);
628 goto out;
630 case TCP_SYN_SENT:
631 case TCP_SYN_RECV: /* Cannot happen.
632 It can, it SYNs are crossed. --ANK */
633 if (!sock_owned_by_user(sk)) {
634 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
635 sk->sk_err = err;
636 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
638 tcp_done(sk);
639 } else
640 sk->sk_err_soft = err;
641 goto out;
644 if (!sock_owned_by_user(sk) && np->recverr) {
645 sk->sk_err = err;
646 sk->sk_error_report(sk);
647 } else
648 sk->sk_err_soft = err;
650 out:
651 bh_unlock_sock(sk);
652 sock_put(sk);
656 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
657 struct dst_entry *dst)
659 struct tcp6_request_sock *treq = tcp6_rsk(req);
660 struct ipv6_pinfo *np = inet6_sk(sk);
661 struct sk_buff * skb;
662 struct ipv6_txoptions *opt = NULL;
663 struct in6_addr * final_p = NULL, final;
664 struct flowi fl;
665 int err = -1;
667 memset(&fl, 0, sizeof(fl));
668 fl.proto = IPPROTO_TCP;
669 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
670 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
671 fl.fl6_flowlabel = 0;
672 fl.oif = treq->iif;
673 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
674 fl.fl_ip_sport = inet_sk(sk)->sport;
676 if (dst == NULL) {
677 opt = np->opt;
678 if (opt == NULL &&
679 np->rxopt.bits.osrcrt == 2 &&
680 treq->pktopts) {
681 struct sk_buff *pktopts = treq->pktopts;
682 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
683 if (rxopt->srcrt)
684 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
687 if (opt && opt->srcrt) {
688 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
689 ipv6_addr_copy(&final, &fl.fl6_dst);
690 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
691 final_p = &final;
694 err = ip6_dst_lookup(sk, &dst, &fl);
695 if (err)
696 goto done;
697 if (final_p)
698 ipv6_addr_copy(&fl.fl6_dst, final_p);
699 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
700 goto done;
703 skb = tcp_make_synack(sk, dst, req);
704 if (skb) {
705 struct tcphdr *th = skb->h.th;
707 th->check = tcp_v6_check(th, skb->len,
708 &treq->loc_addr, &treq->rmt_addr,
709 csum_partial((char *)th, skb->len, skb->csum));
711 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
712 err = ip6_xmit(sk, skb, &fl, opt, 0);
713 if (err == NET_XMIT_CN)
714 err = 0;
717 done:
718 if (opt && opt != np->opt)
719 sock_kfree_s(sk, opt, opt->tot_len);
720 return err;
723 static void tcp_v6_reqsk_destructor(struct request_sock *req)
725 if (tcp6_rsk(req)->pktopts)
726 kfree_skb(tcp6_rsk(req)->pktopts);
729 static struct request_sock_ops tcp6_request_sock_ops = {
730 .family = AF_INET6,
731 .obj_size = sizeof(struct tcp6_request_sock),
732 .rtx_syn_ack = tcp_v6_send_synack,
733 .send_ack = tcp_v6_reqsk_send_ack,
734 .destructor = tcp_v6_reqsk_destructor,
735 .send_reset = tcp_v6_send_reset
738 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
740 struct ipv6_pinfo *np = inet6_sk(sk);
741 struct inet6_skb_parm *opt = IP6CB(skb);
743 if (np->rxopt.all) {
744 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
745 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
746 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
747 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
748 return 1;
750 return 0;
754 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
755 struct sk_buff *skb)
757 struct ipv6_pinfo *np = inet6_sk(sk);
759 if (skb->ip_summed == CHECKSUM_HW) {
760 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
761 skb->csum = offsetof(struct tcphdr, check);
762 } else {
763 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
764 csum_partial((char *)th, th->doff<<2,
765 skb->csum));
770 static void tcp_v6_send_reset(struct sk_buff *skb)
772 struct tcphdr *th = skb->h.th, *t1;
773 struct sk_buff *buff;
774 struct flowi fl;
776 if (th->rst)
777 return;
779 if (!ipv6_unicast_destination(skb))
780 return;
783 * We need to grab some memory, and put together an RST,
784 * and then put it into the queue to be sent.
787 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
788 GFP_ATOMIC);
789 if (buff == NULL)
790 return;
792 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
794 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
796 /* Swap the send and the receive. */
797 memset(t1, 0, sizeof(*t1));
798 t1->dest = th->source;
799 t1->source = th->dest;
800 t1->doff = sizeof(*t1)/4;
801 t1->rst = 1;
803 if(th->ack) {
804 t1->seq = th->ack_seq;
805 } else {
806 t1->ack = 1;
807 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
808 + skb->len - (th->doff<<2));
811 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
813 memset(&fl, 0, sizeof(fl));
814 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
815 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
817 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
818 sizeof(*t1), IPPROTO_TCP,
819 buff->csum);
821 fl.proto = IPPROTO_TCP;
822 fl.oif = inet6_iif(skb);
823 fl.fl_ip_dport = t1->dest;
824 fl.fl_ip_sport = t1->source;
826 /* sk = NULL, but it is safe for now. RST socket required. */
827 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
829 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
830 ip6_xmit(NULL, buff, &fl, NULL, 0);
831 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
832 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
833 return;
837 kfree_skb(buff);
840 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
842 struct tcphdr *th = skb->h.th, *t1;
843 struct sk_buff *buff;
844 struct flowi fl;
845 int tot_len = sizeof(struct tcphdr);
847 if (ts)
848 tot_len += 3*4;
850 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
851 GFP_ATOMIC);
852 if (buff == NULL)
853 return;
855 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
857 t1 = (struct tcphdr *) skb_push(buff,tot_len);
859 /* Swap the send and the receive. */
860 memset(t1, 0, sizeof(*t1));
861 t1->dest = th->source;
862 t1->source = th->dest;
863 t1->doff = tot_len/4;
864 t1->seq = htonl(seq);
865 t1->ack_seq = htonl(ack);
866 t1->ack = 1;
867 t1->window = htons(win);
869 if (ts) {
870 u32 *ptr = (u32*)(t1 + 1);
871 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
872 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
873 *ptr++ = htonl(tcp_time_stamp);
874 *ptr = htonl(ts);
877 buff->csum = csum_partial((char *)t1, tot_len, 0);
879 memset(&fl, 0, sizeof(fl));
880 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
881 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
883 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
884 tot_len, IPPROTO_TCP,
885 buff->csum);
887 fl.proto = IPPROTO_TCP;
888 fl.oif = inet6_iif(skb);
889 fl.fl_ip_dport = t1->dest;
890 fl.fl_ip_sport = t1->source;
892 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
893 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
894 ip6_xmit(NULL, buff, &fl, NULL, 0);
895 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
896 return;
900 kfree_skb(buff);
903 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
905 struct inet_timewait_sock *tw = inet_twsk(sk);
906 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
908 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
909 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
910 tcptw->tw_ts_recent);
912 inet_twsk_put(tw);
915 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
917 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
921 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
923 struct request_sock *req, **prev;
924 const struct tcphdr *th = skb->h.th;
925 struct sock *nsk;
927 /* Find possible connection requests. */
928 req = inet6_csk_search_req(sk, &prev, th->source,
929 &skb->nh.ipv6h->saddr,
930 &skb->nh.ipv6h->daddr, inet6_iif(skb));
931 if (req)
932 return tcp_check_req(sk, skb, req, prev);
934 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
935 th->source, &skb->nh.ipv6h->daddr,
936 ntohs(th->dest), inet6_iif(skb));
938 if (nsk) {
939 if (nsk->sk_state != TCP_TIME_WAIT) {
940 bh_lock_sock(nsk);
941 return nsk;
943 inet_twsk_put((struct inet_timewait_sock *)nsk);
944 return NULL;
947 #if 0 /*def CONFIG_SYN_COOKIES*/
948 if (!th->rst && !th->syn && th->ack)
949 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
950 #endif
951 return sk;
954 /* FIXME: this is substantially similar to the ipv4 code.
955 * Can some kind of merge be done? -- erics
957 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
959 struct tcp6_request_sock *treq;
960 struct ipv6_pinfo *np = inet6_sk(sk);
961 struct tcp_options_received tmp_opt;
962 struct tcp_sock *tp = tcp_sk(sk);
963 struct request_sock *req = NULL;
964 __u32 isn = TCP_SKB_CB(skb)->when;
966 if (skb->protocol == htons(ETH_P_IP))
967 return tcp_v4_conn_request(sk, skb);
969 if (!ipv6_unicast_destination(skb))
970 goto drop;
973 * There are no SYN attacks on IPv6, yet...
975 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
976 if (net_ratelimit())
977 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
978 goto drop;
981 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
982 goto drop;
984 req = reqsk_alloc(&tcp6_request_sock_ops);
985 if (req == NULL)
986 goto drop;
988 tcp_clear_options(&tmp_opt);
989 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
990 tmp_opt.user_mss = tp->rx_opt.user_mss;
992 tcp_parse_options(skb, &tmp_opt, 0);
994 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
995 tcp_openreq_init(req, &tmp_opt, skb);
997 treq = tcp6_rsk(req);
998 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
999 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1000 TCP_ECN_create_request(req, skb->h.th);
1001 treq->pktopts = NULL;
1002 if (ipv6_opt_accepted(sk, skb) ||
1003 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1004 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1005 atomic_inc(&skb->users);
1006 treq->pktopts = skb;
1008 treq->iif = sk->sk_bound_dev_if;
1010 /* So that link locals have meaning */
1011 if (!sk->sk_bound_dev_if &&
1012 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1013 treq->iif = inet6_iif(skb);
1015 if (isn == 0)
1016 isn = tcp_v6_init_sequence(sk,skb);
1018 tcp_rsk(req)->snt_isn = isn;
1020 if (tcp_v6_send_synack(sk, req, NULL))
1021 goto drop;
1023 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1024 return 0;
1026 drop:
1027 if (req)
1028 reqsk_free(req);
1030 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1031 return 0; /* don't send reset */
1034 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1035 struct request_sock *req,
1036 struct dst_entry *dst)
1038 struct tcp6_request_sock *treq = tcp6_rsk(req);
1039 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1040 struct tcp6_sock *newtcp6sk;
1041 struct inet_sock *newinet;
1042 struct tcp_sock *newtp;
1043 struct sock *newsk;
1044 struct ipv6_txoptions *opt;
1046 if (skb->protocol == htons(ETH_P_IP)) {
1048 * v6 mapped
1051 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1053 if (newsk == NULL)
1054 return NULL;
1056 newtcp6sk = (struct tcp6_sock *)newsk;
1057 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1059 newinet = inet_sk(newsk);
1060 newnp = inet6_sk(newsk);
1061 newtp = tcp_sk(newsk);
1063 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1065 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1066 newinet->daddr);
1068 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1069 newinet->saddr);
1071 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1073 newtp->af_specific = &ipv6_mapped;
1074 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1075 newnp->pktoptions = NULL;
1076 newnp->opt = NULL;
1077 newnp->mcast_oif = inet6_iif(skb);
1078 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1081 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1082 * here, tcp_create_openreq_child now does this for us, see the comment in
1083 * that function for the gory details. -acme
1086 /* It is tricky place. Until this moment IPv4 tcp
1087 worked with IPv6 af_tcp.af_specific.
1088 Sync it now.
1090 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1092 return newsk;
1095 opt = np->opt;
1097 if (sk_acceptq_is_full(sk))
1098 goto out_overflow;
1100 if (np->rxopt.bits.osrcrt == 2 &&
1101 opt == NULL && treq->pktopts) {
1102 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1103 if (rxopt->srcrt)
1104 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1107 if (dst == NULL) {
1108 struct in6_addr *final_p = NULL, final;
1109 struct flowi fl;
1111 memset(&fl, 0, sizeof(fl));
1112 fl.proto = IPPROTO_TCP;
1113 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1114 if (opt && opt->srcrt) {
1115 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1116 ipv6_addr_copy(&final, &fl.fl6_dst);
1117 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1118 final_p = &final;
1120 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1121 fl.oif = sk->sk_bound_dev_if;
1122 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1123 fl.fl_ip_sport = inet_sk(sk)->sport;
1125 if (ip6_dst_lookup(sk, &dst, &fl))
1126 goto out;
1128 if (final_p)
1129 ipv6_addr_copy(&fl.fl6_dst, final_p);
1131 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1132 goto out;
1135 newsk = tcp_create_openreq_child(sk, req, skb);
1136 if (newsk == NULL)
1137 goto out;
1140 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1141 * count here, tcp_create_openreq_child now does this for us, see the
1142 * comment in that function for the gory details. -acme
1145 ip6_dst_store(newsk, dst, NULL);
1146 newsk->sk_route_caps = dst->dev->features &
1147 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1149 newtcp6sk = (struct tcp6_sock *)newsk;
1150 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1152 newtp = tcp_sk(newsk);
1153 newinet = inet_sk(newsk);
1154 newnp = inet6_sk(newsk);
1156 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1158 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1159 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1160 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1161 newsk->sk_bound_dev_if = treq->iif;
1163 /* Now IPv6 options...
1165 First: no IPv4 options.
1167 newinet->opt = NULL;
1169 /* Clone RX bits */
1170 newnp->rxopt.all = np->rxopt.all;
1172 /* Clone pktoptions received with SYN */
1173 newnp->pktoptions = NULL;
1174 if (treq->pktopts != NULL) {
1175 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1176 kfree_skb(treq->pktopts);
1177 treq->pktopts = NULL;
1178 if (newnp->pktoptions)
1179 skb_set_owner_r(newnp->pktoptions, newsk);
1181 newnp->opt = NULL;
1182 newnp->mcast_oif = inet6_iif(skb);
1183 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1185 /* Clone native IPv6 options from listening socket (if any)
1187 Yes, keeping reference count would be much more clever,
1188 but we make one more one thing there: reattach optmem
1189 to newsk.
1191 if (opt) {
1192 newnp->opt = ipv6_dup_options(newsk, opt);
1193 if (opt != np->opt)
1194 sock_kfree_s(sk, opt, opt->tot_len);
1197 newtp->ext_header_len = 0;
1198 if (newnp->opt)
1199 newtp->ext_header_len = newnp->opt->opt_nflen +
1200 newnp->opt->opt_flen;
1202 tcp_sync_mss(newsk, dst_mtu(dst));
1203 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1204 tcp_initialize_rcv_mss(newsk);
1206 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1208 __inet6_hash(&tcp_hashinfo, newsk);
1209 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1211 return newsk;
1213 out_overflow:
1214 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1215 out:
1216 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1217 if (opt && opt != np->opt)
1218 sock_kfree_s(sk, opt, opt->tot_len);
1219 dst_release(dst);
1220 return NULL;
1223 static int tcp_v6_checksum_init(struct sk_buff *skb)
1225 if (skb->ip_summed == CHECKSUM_HW) {
1226 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1227 &skb->nh.ipv6h->daddr,skb->csum)) {
1228 skb->ip_summed = CHECKSUM_UNNECESSARY;
1229 return 0;
1233 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1234 &skb->nh.ipv6h->daddr, 0);
1236 if (skb->len <= 76) {
1237 return __skb_checksum_complete(skb);
1239 return 0;
1242 /* The socket must have it's spinlock held when we get
1243 * here.
1245 * We have a potential double-lock case here, so even when
1246 * doing backlog processing we use the BH locking scheme.
1247 * This is because we cannot sleep with the original spinlock
1248 * held.
1250 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1252 struct ipv6_pinfo *np = inet6_sk(sk);
1253 struct tcp_sock *tp;
1254 struct sk_buff *opt_skb = NULL;
1256 /* Imagine: socket is IPv6. IPv4 packet arrives,
1257 goes to IPv4 receive handler and backlogged.
1258 From backlog it always goes here. Kerboom...
1259 Fortunately, tcp_rcv_established and rcv_established
1260 handle them correctly, but it is not case with
1261 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1264 if (skb->protocol == htons(ETH_P_IP))
1265 return tcp_v4_do_rcv(sk, skb);
1267 if (sk_filter(sk, skb, 0))
1268 goto discard;
1271 * socket locking is here for SMP purposes as backlog rcv
1272 * is currently called with bh processing disabled.
1275 /* Do Stevens' IPV6_PKTOPTIONS.
1277 Yes, guys, it is the only place in our code, where we
1278 may make it not affecting IPv4.
1279 The rest of code is protocol independent,
1280 and I do not like idea to uglify IPv4.
1282 Actually, all the idea behind IPV6_PKTOPTIONS
1283 looks not very well thought. For now we latch
1284 options, received in the last packet, enqueued
1285 by tcp. Feel free to propose better solution.
1286 --ANK (980728)
1288 if (np->rxopt.all)
1289 opt_skb = skb_clone(skb, GFP_ATOMIC);
1291 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1292 TCP_CHECK_TIMER(sk);
1293 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1294 goto reset;
1295 TCP_CHECK_TIMER(sk);
1296 if (opt_skb)
1297 goto ipv6_pktoptions;
1298 return 0;
1301 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1302 goto csum_err;
1304 if (sk->sk_state == TCP_LISTEN) {
1305 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1306 if (!nsk)
1307 goto discard;
1310 * Queue it on the new socket if the new socket is active,
1311 * otherwise we just shortcircuit this and continue with
1312 * the new socket..
1314 if(nsk != sk) {
1315 if (tcp_child_process(sk, nsk, skb))
1316 goto reset;
1317 if (opt_skb)
1318 __kfree_skb(opt_skb);
1319 return 0;
1323 TCP_CHECK_TIMER(sk);
1324 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1325 goto reset;
1326 TCP_CHECK_TIMER(sk);
1327 if (opt_skb)
1328 goto ipv6_pktoptions;
1329 return 0;
1331 reset:
1332 tcp_v6_send_reset(skb);
1333 discard:
1334 if (opt_skb)
1335 __kfree_skb(opt_skb);
1336 kfree_skb(skb);
1337 return 0;
1338 csum_err:
1339 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1340 goto discard;
1343 ipv6_pktoptions:
1344 /* Do you ask, what is it?
1346 1. skb was enqueued by tcp.
1347 2. skb is added to tail of read queue, rather than out of order.
1348 3. socket is not in passive state.
1349 4. Finally, it really contains options, which user wants to receive.
1351 tp = tcp_sk(sk);
1352 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1353 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1354 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1355 np->mcast_oif = inet6_iif(opt_skb);
1356 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1357 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1358 if (ipv6_opt_accepted(sk, opt_skb)) {
1359 skb_set_owner_r(opt_skb, sk);
1360 opt_skb = xchg(&np->pktoptions, opt_skb);
1361 } else {
1362 __kfree_skb(opt_skb);
1363 opt_skb = xchg(&np->pktoptions, NULL);
1367 if (opt_skb)
1368 kfree_skb(opt_skb);
1369 return 0;
1372 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1374 struct sk_buff *skb = *pskb;
1375 struct tcphdr *th;
1376 struct sock *sk;
1377 int ret;
1379 if (skb->pkt_type != PACKET_HOST)
1380 goto discard_it;
1383 * Count it even if it's bad.
1385 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1387 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1388 goto discard_it;
1390 th = skb->h.th;
1392 if (th->doff < sizeof(struct tcphdr)/4)
1393 goto bad_packet;
1394 if (!pskb_may_pull(skb, th->doff*4))
1395 goto discard_it;
1397 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1398 tcp_v6_checksum_init(skb)))
1399 goto bad_packet;
1401 th = skb->h.th;
1402 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1403 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1404 skb->len - th->doff*4);
1405 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1406 TCP_SKB_CB(skb)->when = 0;
1407 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1408 TCP_SKB_CB(skb)->sacked = 0;
1410 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1411 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1412 inet6_iif(skb));
1414 if (!sk)
1415 goto no_tcp_socket;
1417 process:
1418 if (sk->sk_state == TCP_TIME_WAIT)
1419 goto do_time_wait;
1421 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1422 goto discard_and_relse;
1424 if (sk_filter(sk, skb, 0))
1425 goto discard_and_relse;
1427 skb->dev = NULL;
1429 bh_lock_sock(sk);
1430 ret = 0;
1431 if (!sock_owned_by_user(sk)) {
1432 if (!tcp_prequeue(sk, skb))
1433 ret = tcp_v6_do_rcv(sk, skb);
1434 } else
1435 sk_add_backlog(sk, skb);
1436 bh_unlock_sock(sk);
1438 sock_put(sk);
1439 return ret ? -1 : 0;
1441 no_tcp_socket:
1442 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1443 goto discard_it;
1445 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1446 bad_packet:
1447 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1448 } else {
1449 tcp_v6_send_reset(skb);
1452 discard_it:
1455 * Discard frame
1458 kfree_skb(skb);
1459 return 0;
1461 discard_and_relse:
1462 sock_put(sk);
1463 goto discard_it;
1465 do_time_wait:
1466 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1467 inet_twsk_put((struct inet_timewait_sock *)sk);
1468 goto discard_it;
1471 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1472 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1473 inet_twsk_put((struct inet_timewait_sock *)sk);
1474 goto discard_it;
1477 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1478 skb, th)) {
1479 case TCP_TW_SYN:
1481 struct sock *sk2;
1483 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1484 &skb->nh.ipv6h->daddr,
1485 ntohs(th->dest), inet6_iif(skb));
1486 if (sk2 != NULL) {
1487 struct inet_timewait_sock *tw = inet_twsk(sk);
1488 inet_twsk_deschedule(tw, &tcp_death_row);
1489 inet_twsk_put(tw);
1490 sk = sk2;
1491 goto process;
1493 /* Fall through to ACK */
1495 case TCP_TW_ACK:
1496 tcp_v6_timewait_ack(sk, skb);
1497 break;
1498 case TCP_TW_RST:
1499 goto no_tcp_socket;
1500 case TCP_TW_SUCCESS:;
1502 goto discard_it;
1505 static int tcp_v6_rebuild_header(struct sock *sk)
1507 int err;
1508 struct dst_entry *dst;
1509 struct ipv6_pinfo *np = inet6_sk(sk);
1511 dst = __sk_dst_check(sk, np->dst_cookie);
1513 if (dst == NULL) {
1514 struct inet_sock *inet = inet_sk(sk);
1515 struct in6_addr *final_p = NULL, final;
1516 struct flowi fl;
1518 memset(&fl, 0, sizeof(fl));
1519 fl.proto = IPPROTO_TCP;
1520 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1521 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1522 fl.fl6_flowlabel = np->flow_label;
1523 fl.oif = sk->sk_bound_dev_if;
1524 fl.fl_ip_dport = inet->dport;
1525 fl.fl_ip_sport = inet->sport;
1527 if (np->opt && np->opt->srcrt) {
1528 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1529 ipv6_addr_copy(&final, &fl.fl6_dst);
1530 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1531 final_p = &final;
1534 err = ip6_dst_lookup(sk, &dst, &fl);
1535 if (err) {
1536 sk->sk_route_caps = 0;
1537 return err;
1539 if (final_p)
1540 ipv6_addr_copy(&fl.fl6_dst, final_p);
1542 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1543 sk->sk_err_soft = -err;
1544 return err;
1547 ip6_dst_store(sk, dst, NULL);
1548 sk->sk_route_caps = dst->dev->features &
1549 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1552 return 0;
1555 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1557 struct sock *sk = skb->sk;
1558 struct inet_sock *inet = inet_sk(sk);
1559 struct ipv6_pinfo *np = inet6_sk(sk);
1560 struct flowi fl;
1561 struct dst_entry *dst;
1562 struct in6_addr *final_p = NULL, final;
1564 memset(&fl, 0, sizeof(fl));
1565 fl.proto = IPPROTO_TCP;
1566 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1567 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1568 fl.fl6_flowlabel = np->flow_label;
1569 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1570 fl.oif = sk->sk_bound_dev_if;
1571 fl.fl_ip_sport = inet->sport;
1572 fl.fl_ip_dport = inet->dport;
1574 if (np->opt && np->opt->srcrt) {
1575 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1576 ipv6_addr_copy(&final, &fl.fl6_dst);
1577 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1578 final_p = &final;
1581 dst = __sk_dst_check(sk, np->dst_cookie);
1583 if (dst == NULL) {
1584 int err = ip6_dst_lookup(sk, &dst, &fl);
1586 if (err) {
1587 sk->sk_err_soft = -err;
1588 return err;
1591 if (final_p)
1592 ipv6_addr_copy(&fl.fl6_dst, final_p);
1594 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1595 sk->sk_route_caps = 0;
1596 return err;
1599 ip6_dst_store(sk, dst, NULL);
1600 sk->sk_route_caps = dst->dev->features &
1601 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1604 skb->dst = dst_clone(dst);
1606 /* Restore final destination back after routing done */
1607 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1609 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1612 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1614 struct ipv6_pinfo *np = inet6_sk(sk);
1615 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1617 sin6->sin6_family = AF_INET6;
1618 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1619 sin6->sin6_port = inet_sk(sk)->dport;
1620 /* We do not store received flowlabel for TCP */
1621 sin6->sin6_flowinfo = 0;
1622 sin6->sin6_scope_id = 0;
1623 if (sk->sk_bound_dev_if &&
1624 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1625 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1628 static int tcp_v6_remember_stamp(struct sock *sk)
1630 /* Alas, not yet... */
1631 return 0;
1634 static struct tcp_func ipv6_specific = {
1635 .queue_xmit = tcp_v6_xmit,
1636 .send_check = tcp_v6_send_check,
1637 .rebuild_header = tcp_v6_rebuild_header,
1638 .conn_request = tcp_v6_conn_request,
1639 .syn_recv_sock = tcp_v6_syn_recv_sock,
1640 .remember_stamp = tcp_v6_remember_stamp,
1641 .net_header_len = sizeof(struct ipv6hdr),
1643 .setsockopt = ipv6_setsockopt,
1644 .getsockopt = ipv6_getsockopt,
1645 .addr2sockaddr = v6_addr2sockaddr,
1646 .sockaddr_len = sizeof(struct sockaddr_in6)
1650 * TCP over IPv4 via INET6 API
1653 static struct tcp_func ipv6_mapped = {
1654 .queue_xmit = ip_queue_xmit,
1655 .send_check = tcp_v4_send_check,
1656 .rebuild_header = inet_sk_rebuild_header,
1657 .conn_request = tcp_v6_conn_request,
1658 .syn_recv_sock = tcp_v6_syn_recv_sock,
1659 .remember_stamp = tcp_v4_remember_stamp,
1660 .net_header_len = sizeof(struct iphdr),
1662 .setsockopt = ipv6_setsockopt,
1663 .getsockopt = ipv6_getsockopt,
1664 .addr2sockaddr = v6_addr2sockaddr,
1665 .sockaddr_len = sizeof(struct sockaddr_in6)
1670 /* NOTE: A lot of things set to zero explicitly by call to
1671 * sk_alloc() so need not be done here.
1673 static int tcp_v6_init_sock(struct sock *sk)
1675 struct inet_connection_sock *icsk = inet_csk(sk);
1676 struct tcp_sock *tp = tcp_sk(sk);
1678 skb_queue_head_init(&tp->out_of_order_queue);
1679 tcp_init_xmit_timers(sk);
1680 tcp_prequeue_init(tp);
1682 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1683 tp->mdev = TCP_TIMEOUT_INIT;
1685 /* So many TCP implementations out there (incorrectly) count the
1686 * initial SYN frame in their delayed-ACK and congestion control
1687 * algorithms that we must have the following bandaid to talk
1688 * efficiently to them. -DaveM
1690 tp->snd_cwnd = 2;
1692 /* See draft-stevens-tcpca-spec-01 for discussion of the
1693 * initialization of these values.
1695 tp->snd_ssthresh = 0x7fffffff;
1696 tp->snd_cwnd_clamp = ~0;
1697 tp->mss_cache = 536;
1699 tp->reordering = sysctl_tcp_reordering;
1701 sk->sk_state = TCP_CLOSE;
1703 tp->af_specific = &ipv6_specific;
1704 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1705 sk->sk_write_space = sk_stream_write_space;
1706 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1708 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1709 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1711 atomic_inc(&tcp_sockets_allocated);
1713 return 0;
1716 static int tcp_v6_destroy_sock(struct sock *sk)
1718 tcp_v4_destroy_sock(sk);
1719 return inet6_destroy_sock(sk);
1722 /* Proc filesystem TCPv6 sock list dumping. */
1723 static void get_openreq6(struct seq_file *seq,
1724 struct sock *sk, struct request_sock *req, int i, int uid)
1726 struct in6_addr *dest, *src;
1727 int ttd = req->expires - jiffies;
1729 if (ttd < 0)
1730 ttd = 0;
1732 src = &tcp6_rsk(req)->loc_addr;
1733 dest = &tcp6_rsk(req)->rmt_addr;
1734 seq_printf(seq,
1735 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1736 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1738 src->s6_addr32[0], src->s6_addr32[1],
1739 src->s6_addr32[2], src->s6_addr32[3],
1740 ntohs(inet_sk(sk)->sport),
1741 dest->s6_addr32[0], dest->s6_addr32[1],
1742 dest->s6_addr32[2], dest->s6_addr32[3],
1743 ntohs(inet_rsk(req)->rmt_port),
1744 TCP_SYN_RECV,
1745 0,0, /* could print option size, but that is af dependent. */
1746 1, /* timers active (only the expire timer) */
1747 jiffies_to_clock_t(ttd),
1748 req->retrans,
1749 uid,
1750 0, /* non standard timer */
1751 0, /* open_requests have no inode */
1752 0, req);
1755 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1757 struct in6_addr *dest, *src;
1758 __u16 destp, srcp;
1759 int timer_active;
1760 unsigned long timer_expires;
1761 struct inet_sock *inet = inet_sk(sp);
1762 struct tcp_sock *tp = tcp_sk(sp);
1763 const struct inet_connection_sock *icsk = inet_csk(sp);
1764 struct ipv6_pinfo *np = inet6_sk(sp);
1766 dest = &np->daddr;
1767 src = &np->rcv_saddr;
1768 destp = ntohs(inet->dport);
1769 srcp = ntohs(inet->sport);
1771 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1772 timer_active = 1;
1773 timer_expires = icsk->icsk_timeout;
1774 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1775 timer_active = 4;
1776 timer_expires = icsk->icsk_timeout;
1777 } else if (timer_pending(&sp->sk_timer)) {
1778 timer_active = 2;
1779 timer_expires = sp->sk_timer.expires;
1780 } else {
1781 timer_active = 0;
1782 timer_expires = jiffies;
1785 seq_printf(seq,
1786 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1787 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1789 src->s6_addr32[0], src->s6_addr32[1],
1790 src->s6_addr32[2], src->s6_addr32[3], srcp,
1791 dest->s6_addr32[0], dest->s6_addr32[1],
1792 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1793 sp->sk_state,
1794 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1795 timer_active,
1796 jiffies_to_clock_t(timer_expires - jiffies),
1797 icsk->icsk_retransmits,
1798 sock_i_uid(sp),
1799 icsk->icsk_probes_out,
1800 sock_i_ino(sp),
1801 atomic_read(&sp->sk_refcnt), sp,
1802 icsk->icsk_rto,
1803 icsk->icsk_ack.ato,
1804 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1805 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1809 static void get_timewait6_sock(struct seq_file *seq,
1810 struct inet_timewait_sock *tw, int i)
1812 struct in6_addr *dest, *src;
1813 __u16 destp, srcp;
1814 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1815 int ttd = tw->tw_ttd - jiffies;
1817 if (ttd < 0)
1818 ttd = 0;
1820 dest = &tcp6tw->tw_v6_daddr;
1821 src = &tcp6tw->tw_v6_rcv_saddr;
1822 destp = ntohs(tw->tw_dport);
1823 srcp = ntohs(tw->tw_sport);
1825 seq_printf(seq,
1826 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1827 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1829 src->s6_addr32[0], src->s6_addr32[1],
1830 src->s6_addr32[2], src->s6_addr32[3], srcp,
1831 dest->s6_addr32[0], dest->s6_addr32[1],
1832 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1833 tw->tw_substate, 0, 0,
1834 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1835 atomic_read(&tw->tw_refcnt), tw);
1838 #ifdef CONFIG_PROC_FS
1839 static int tcp6_seq_show(struct seq_file *seq, void *v)
1841 struct tcp_iter_state *st;
1843 if (v == SEQ_START_TOKEN) {
1844 seq_puts(seq,
1845 " sl "
1846 "local_address "
1847 "remote_address "
1848 "st tx_queue rx_queue tr tm->when retrnsmt"
1849 " uid timeout inode\n");
1850 goto out;
1852 st = seq->private;
1854 switch (st->state) {
1855 case TCP_SEQ_STATE_LISTENING:
1856 case TCP_SEQ_STATE_ESTABLISHED:
1857 get_tcp6_sock(seq, v, st->num);
1858 break;
1859 case TCP_SEQ_STATE_OPENREQ:
1860 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1861 break;
1862 case TCP_SEQ_STATE_TIME_WAIT:
1863 get_timewait6_sock(seq, v, st->num);
1864 break;
1866 out:
1867 return 0;
1870 static struct file_operations tcp6_seq_fops;
1871 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1872 .owner = THIS_MODULE,
1873 .name = "tcp6",
1874 .family = AF_INET6,
1875 .seq_show = tcp6_seq_show,
1876 .seq_fops = &tcp6_seq_fops,
1879 int __init tcp6_proc_init(void)
1881 return tcp_proc_register(&tcp6_seq_afinfo);
1884 void tcp6_proc_exit(void)
1886 tcp_proc_unregister(&tcp6_seq_afinfo);
1888 #endif
1890 struct proto tcpv6_prot = {
1891 .name = "TCPv6",
1892 .owner = THIS_MODULE,
1893 .close = tcp_close,
1894 .connect = tcp_v6_connect,
1895 .disconnect = tcp_disconnect,
1896 .accept = inet_csk_accept,
1897 .ioctl = tcp_ioctl,
1898 .init = tcp_v6_init_sock,
1899 .destroy = tcp_v6_destroy_sock,
1900 .shutdown = tcp_shutdown,
1901 .setsockopt = tcp_setsockopt,
1902 .getsockopt = tcp_getsockopt,
1903 .sendmsg = tcp_sendmsg,
1904 .recvmsg = tcp_recvmsg,
1905 .backlog_rcv = tcp_v6_do_rcv,
1906 .hash = tcp_v6_hash,
1907 .unhash = tcp_unhash,
1908 .get_port = tcp_v6_get_port,
1909 .enter_memory_pressure = tcp_enter_memory_pressure,
1910 .sockets_allocated = &tcp_sockets_allocated,
1911 .memory_allocated = &tcp_memory_allocated,
1912 .memory_pressure = &tcp_memory_pressure,
1913 .orphan_count = &tcp_orphan_count,
1914 .sysctl_mem = sysctl_tcp_mem,
1915 .sysctl_wmem = sysctl_tcp_wmem,
1916 .sysctl_rmem = sysctl_tcp_rmem,
1917 .max_header = MAX_TCP_HEADER,
1918 .obj_size = sizeof(struct tcp6_sock),
1919 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1920 .rsk_prot = &tcp6_request_sock_ops,
1923 static struct inet6_protocol tcpv6_protocol = {
1924 .handler = tcp_v6_rcv,
1925 .err_handler = tcp_v6_err,
1926 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1929 static struct inet_protosw tcpv6_protosw = {
1930 .type = SOCK_STREAM,
1931 .protocol = IPPROTO_TCP,
1932 .prot = &tcpv6_prot,
1933 .ops = &inet6_stream_ops,
1934 .capability = -1,
1935 .no_check = 0,
1936 .flags = INET_PROTOSW_PERMANENT,
1939 void __init tcpv6_init(void)
1941 /* register inet6 protocol */
1942 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1943 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1944 inet6_register_protosw(&tcpv6_protosw);