[IPV6]: Export ipv6_opt_accepted
[linux-2.6/linux-mips.git] / net / ipv6 / tcp_ipv6.c
blobe5c8a669e84ecfc79665381d8c16a316e04df15c
1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/inet6_connection_sock.h>
52 #include <net/ipv6.h>
53 #include <net/transp_v6.h>
54 #include <net/addrconf.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_checksum.h>
57 #include <net/inet_ecn.h>
58 #include <net/protocol.h>
59 #include <net/xfrm.h>
60 #include <net/addrconf.h>
61 #include <net/snmp.h>
62 #include <net/dsfield.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 static void tcp_v6_send_reset(struct sk_buff *skb);
70 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
71 static void tcp_v6_send_check(struct sock *sk, int len,
72 struct sk_buff *skb);
74 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 static struct inet_connection_sock_af_ops ipv6_mapped;
77 static struct inet_connection_sock_af_ops ipv6_specific;
79 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
81 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
82 inet6_csk_bind_conflict);
85 static void tcp_v6_hash(struct sock *sk)
87 if (sk->sk_state != TCP_CLOSE) {
88 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
89 tcp_prot.hash(sk);
90 return;
92 local_bh_disable();
93 __inet6_hash(&tcp_hashinfo, sk);
94 local_bh_enable();
98 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
99 struct in6_addr *saddr,
100 struct in6_addr *daddr,
101 unsigned long base)
103 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
106 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
108 if (skb->protocol == htons(ETH_P_IPV6)) {
109 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
110 skb->nh.ipv6h->saddr.s6_addr32,
111 skb->h.th->dest,
112 skb->h.th->source);
113 } else {
114 return secure_tcp_sequence_number(skb->nh.iph->daddr,
115 skb->nh.iph->saddr,
116 skb->h.th->dest,
117 skb->h.th->source);
121 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
122 struct inet_timewait_sock **twp)
124 struct inet_sock *inet = inet_sk(sk);
125 const struct ipv6_pinfo *np = inet6_sk(sk);
126 const struct in6_addr *daddr = &np->rcv_saddr;
127 const struct in6_addr *saddr = &np->daddr;
128 const int dif = sk->sk_bound_dev_if;
129 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
130 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
131 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
132 struct sock *sk2;
133 const struct hlist_node *node;
134 struct inet_timewait_sock *tw;
136 prefetch(head->chain.first);
137 write_lock(&head->lock);
139 /* Check TIME-WAIT sockets first. */
140 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
141 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
143 tw = inet_twsk(sk2);
145 if(*((__u32 *)&(tw->tw_dport)) == ports &&
146 sk2->sk_family == PF_INET6 &&
147 ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
148 ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
149 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
150 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
151 struct tcp_sock *tp = tcp_sk(sk);
153 if (tcptw->tw_ts_recent_stamp &&
154 (!twp ||
155 (sysctl_tcp_tw_reuse &&
156 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
157 /* See comment in tcp_ipv4.c */
158 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
159 if (!tp->write_seq)
160 tp->write_seq = 1;
161 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
162 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
163 sock_hold(sk2);
164 goto unique;
165 } else
166 goto not_unique;
169 tw = NULL;
171 /* And established part... */
172 sk_for_each(sk2, node, &head->chain) {
173 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
174 goto not_unique;
177 unique:
178 BUG_TRAP(sk_unhashed(sk));
179 __sk_add_node(sk, &head->chain);
180 sk->sk_hash = hash;
181 sock_prot_inc_use(sk->sk_prot);
182 write_unlock(&head->lock);
184 if (twp) {
185 *twp = tw;
186 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
187 } else if (tw) {
188 /* Silly. Should hash-dance instead... */
189 inet_twsk_deschedule(tw, &tcp_death_row);
190 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
192 inet_twsk_put(tw);
194 return 0;
196 not_unique:
197 write_unlock(&head->lock);
198 return -EADDRNOTAVAIL;
201 static inline u32 tcpv6_port_offset(const struct sock *sk)
203 const struct inet_sock *inet = inet_sk(sk);
204 const struct ipv6_pinfo *np = inet6_sk(sk);
206 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
207 np->daddr.s6_addr32,
208 inet->dport);
211 static int tcp_v6_hash_connect(struct sock *sk)
213 unsigned short snum = inet_sk(sk)->num;
214 struct inet_bind_hashbucket *head;
215 struct inet_bind_bucket *tb;
216 int ret;
218 if (!snum) {
219 int low = sysctl_local_port_range[0];
220 int high = sysctl_local_port_range[1];
221 int range = high - low;
222 int i;
223 int port;
224 static u32 hint;
225 u32 offset = hint + tcpv6_port_offset(sk);
226 struct hlist_node *node;
227 struct inet_timewait_sock *tw = NULL;
229 local_bh_disable();
230 for (i = 1; i <= range; i++) {
231 port = low + (i + offset) % range;
232 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
233 spin_lock(&head->lock);
235 /* Does not bother with rcv_saddr checks,
236 * because the established check is already
237 * unique enough.
239 inet_bind_bucket_for_each(tb, node, &head->chain) {
240 if (tb->port == port) {
241 BUG_TRAP(!hlist_empty(&tb->owners));
242 if (tb->fastreuse >= 0)
243 goto next_port;
244 if (!__tcp_v6_check_established(sk,
245 port,
246 &tw))
247 goto ok;
248 goto next_port;
252 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
253 if (!tb) {
254 spin_unlock(&head->lock);
255 break;
257 tb->fastreuse = -1;
258 goto ok;
260 next_port:
261 spin_unlock(&head->lock);
263 local_bh_enable();
265 return -EADDRNOTAVAIL;
268 hint += i;
270 /* Head lock still held and bh's disabled */
271 inet_bind_hash(sk, tb, port);
272 if (sk_unhashed(sk)) {
273 inet_sk(sk)->sport = htons(port);
274 __inet6_hash(&tcp_hashinfo, sk);
276 spin_unlock(&head->lock);
278 if (tw) {
279 inet_twsk_deschedule(tw, &tcp_death_row);
280 inet_twsk_put(tw);
283 ret = 0;
284 goto out;
287 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
288 tb = inet_csk(sk)->icsk_bind_hash;
289 spin_lock_bh(&head->lock);
291 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
292 __inet6_hash(&tcp_hashinfo, sk);
293 spin_unlock_bh(&head->lock);
294 return 0;
295 } else {
296 spin_unlock(&head->lock);
297 /* No definite answer... Walk to established hash table */
298 ret = __tcp_v6_check_established(sk, snum, NULL);
299 out:
300 local_bh_enable();
301 return ret;
305 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
306 int addr_len)
308 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
309 struct inet_sock *inet = inet_sk(sk);
310 struct ipv6_pinfo *np = inet6_sk(sk);
311 struct tcp_sock *tp = tcp_sk(sk);
312 struct in6_addr *saddr = NULL, *final_p = NULL, final;
313 struct flowi fl;
314 struct dst_entry *dst;
315 int addr_type;
316 int err;
318 if (addr_len < SIN6_LEN_RFC2133)
319 return -EINVAL;
321 if (usin->sin6_family != AF_INET6)
322 return(-EAFNOSUPPORT);
324 memset(&fl, 0, sizeof(fl));
326 if (np->sndflow) {
327 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
328 IP6_ECN_flow_init(fl.fl6_flowlabel);
329 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
330 struct ip6_flowlabel *flowlabel;
331 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
332 if (flowlabel == NULL)
333 return -EINVAL;
334 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
335 fl6_sock_release(flowlabel);
340 * connect() to INADDR_ANY means loopback (BSD'ism).
343 if(ipv6_addr_any(&usin->sin6_addr))
344 usin->sin6_addr.s6_addr[15] = 0x1;
346 addr_type = ipv6_addr_type(&usin->sin6_addr);
348 if(addr_type & IPV6_ADDR_MULTICAST)
349 return -ENETUNREACH;
351 if (addr_type&IPV6_ADDR_LINKLOCAL) {
352 if (addr_len >= sizeof(struct sockaddr_in6) &&
353 usin->sin6_scope_id) {
354 /* If interface is set while binding, indices
355 * must coincide.
357 if (sk->sk_bound_dev_if &&
358 sk->sk_bound_dev_if != usin->sin6_scope_id)
359 return -EINVAL;
361 sk->sk_bound_dev_if = usin->sin6_scope_id;
364 /* Connect to link-local address requires an interface */
365 if (!sk->sk_bound_dev_if)
366 return -EINVAL;
369 if (tp->rx_opt.ts_recent_stamp &&
370 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
371 tp->rx_opt.ts_recent = 0;
372 tp->rx_opt.ts_recent_stamp = 0;
373 tp->write_seq = 0;
376 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
377 np->flow_label = fl.fl6_flowlabel;
380 * TCP over IPv4
383 if (addr_type == IPV6_ADDR_MAPPED) {
384 u32 exthdrlen = tp->ext_header_len;
385 struct sockaddr_in sin;
387 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
389 if (__ipv6_only_sock(sk))
390 return -ENETUNREACH;
392 sin.sin_family = AF_INET;
393 sin.sin_port = usin->sin6_port;
394 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
396 inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
397 sk->sk_backlog_rcv = tcp_v4_do_rcv;
399 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
401 if (err) {
402 tp->ext_header_len = exthdrlen;
403 inet_csk(sk)->icsk_af_ops = &ipv6_specific;
404 sk->sk_backlog_rcv = tcp_v6_do_rcv;
405 goto failure;
406 } else {
407 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
408 inet->saddr);
409 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
410 inet->rcv_saddr);
413 return err;
416 if (!ipv6_addr_any(&np->rcv_saddr))
417 saddr = &np->rcv_saddr;
419 fl.proto = IPPROTO_TCP;
420 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
421 ipv6_addr_copy(&fl.fl6_src,
422 (saddr ? saddr : &np->saddr));
423 fl.oif = sk->sk_bound_dev_if;
424 fl.fl_ip_dport = usin->sin6_port;
425 fl.fl_ip_sport = inet->sport;
427 if (np->opt && np->opt->srcrt) {
428 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
429 ipv6_addr_copy(&final, &fl.fl6_dst);
430 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
431 final_p = &final;
434 err = ip6_dst_lookup(sk, &dst, &fl);
435 if (err)
436 goto failure;
437 if (final_p)
438 ipv6_addr_copy(&fl.fl6_dst, final_p);
440 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
441 goto failure;
443 if (saddr == NULL) {
444 saddr = &fl.fl6_src;
445 ipv6_addr_copy(&np->rcv_saddr, saddr);
448 /* set the source address */
449 ipv6_addr_copy(&np->saddr, saddr);
450 inet->rcv_saddr = LOOPBACK4_IPV6;
452 ip6_dst_store(sk, dst, NULL);
453 sk->sk_route_caps = dst->dev->features &
454 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
456 tp->ext_header_len = 0;
457 if (np->opt)
458 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
460 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
462 inet->dport = usin->sin6_port;
464 tcp_set_state(sk, TCP_SYN_SENT);
465 err = tcp_v6_hash_connect(sk);
466 if (err)
467 goto late_failure;
469 if (!tp->write_seq)
470 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
471 np->daddr.s6_addr32,
472 inet->sport,
473 inet->dport);
475 err = tcp_connect(sk);
476 if (err)
477 goto late_failure;
479 return 0;
481 late_failure:
482 tcp_set_state(sk, TCP_CLOSE);
483 __sk_dst_reset(sk);
484 failure:
485 inet->dport = 0;
486 sk->sk_route_caps = 0;
487 return err;
490 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
491 int type, int code, int offset, __u32 info)
493 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
494 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
495 struct ipv6_pinfo *np;
496 struct sock *sk;
497 int err;
498 struct tcp_sock *tp;
499 __u32 seq;
501 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
502 th->source, skb->dev->ifindex);
504 if (sk == NULL) {
505 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
506 return;
509 if (sk->sk_state == TCP_TIME_WAIT) {
510 inet_twsk_put((struct inet_timewait_sock *)sk);
511 return;
514 bh_lock_sock(sk);
515 if (sock_owned_by_user(sk))
516 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
518 if (sk->sk_state == TCP_CLOSE)
519 goto out;
521 tp = tcp_sk(sk);
522 seq = ntohl(th->seq);
523 if (sk->sk_state != TCP_LISTEN &&
524 !between(seq, tp->snd_una, tp->snd_nxt)) {
525 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
526 goto out;
529 np = inet6_sk(sk);
531 if (type == ICMPV6_PKT_TOOBIG) {
532 struct dst_entry *dst = NULL;
534 if (sock_owned_by_user(sk))
535 goto out;
536 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
537 goto out;
539 /* icmp should have updated the destination cache entry */
540 dst = __sk_dst_check(sk, np->dst_cookie);
542 if (dst == NULL) {
543 struct inet_sock *inet = inet_sk(sk);
544 struct flowi fl;
546 /* BUGGG_FUTURE: Again, it is not clear how
547 to handle rthdr case. Ignore this complexity
548 for now.
550 memset(&fl, 0, sizeof(fl));
551 fl.proto = IPPROTO_TCP;
552 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
553 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
554 fl.oif = sk->sk_bound_dev_if;
555 fl.fl_ip_dport = inet->dport;
556 fl.fl_ip_sport = inet->sport;
558 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
559 sk->sk_err_soft = -err;
560 goto out;
563 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
564 sk->sk_err_soft = -err;
565 goto out;
568 } else
569 dst_hold(dst);
571 if (tp->pmtu_cookie > dst_mtu(dst)) {
572 tcp_sync_mss(sk, dst_mtu(dst));
573 tcp_simple_retransmit(sk);
574 } /* else let the usual retransmit timer handle it */
575 dst_release(dst);
576 goto out;
579 icmpv6_err_convert(type, code, &err);
581 /* Might be for an request_sock */
582 switch (sk->sk_state) {
583 struct request_sock *req, **prev;
584 case TCP_LISTEN:
585 if (sock_owned_by_user(sk))
586 goto out;
588 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
589 &hdr->saddr, inet6_iif(skb));
590 if (!req)
591 goto out;
593 /* ICMPs are not backlogged, hence we cannot get
594 * an established socket here.
596 BUG_TRAP(req->sk == NULL);
598 if (seq != tcp_rsk(req)->snt_isn) {
599 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
600 goto out;
603 inet_csk_reqsk_queue_drop(sk, req, prev);
604 goto out;
606 case TCP_SYN_SENT:
607 case TCP_SYN_RECV: /* Cannot happen.
608 It can, it SYNs are crossed. --ANK */
609 if (!sock_owned_by_user(sk)) {
610 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
611 sk->sk_err = err;
612 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
614 tcp_done(sk);
615 } else
616 sk->sk_err_soft = err;
617 goto out;
620 if (!sock_owned_by_user(sk) && np->recverr) {
621 sk->sk_err = err;
622 sk->sk_error_report(sk);
623 } else
624 sk->sk_err_soft = err;
626 out:
627 bh_unlock_sock(sk);
628 sock_put(sk);
632 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
633 struct dst_entry *dst)
635 struct inet6_request_sock *treq = inet6_rsk(req);
636 struct ipv6_pinfo *np = inet6_sk(sk);
637 struct sk_buff * skb;
638 struct ipv6_txoptions *opt = NULL;
639 struct in6_addr * final_p = NULL, final;
640 struct flowi fl;
641 int err = -1;
643 memset(&fl, 0, sizeof(fl));
644 fl.proto = IPPROTO_TCP;
645 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
646 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
647 fl.fl6_flowlabel = 0;
648 fl.oif = treq->iif;
649 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
650 fl.fl_ip_sport = inet_sk(sk)->sport;
652 if (dst == NULL) {
653 opt = np->opt;
654 if (opt == NULL &&
655 np->rxopt.bits.osrcrt == 2 &&
656 treq->pktopts) {
657 struct sk_buff *pktopts = treq->pktopts;
658 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
659 if (rxopt->srcrt)
660 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
663 if (opt && opt->srcrt) {
664 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
665 ipv6_addr_copy(&final, &fl.fl6_dst);
666 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
667 final_p = &final;
670 err = ip6_dst_lookup(sk, &dst, &fl);
671 if (err)
672 goto done;
673 if (final_p)
674 ipv6_addr_copy(&fl.fl6_dst, final_p);
675 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
676 goto done;
679 skb = tcp_make_synack(sk, dst, req);
680 if (skb) {
681 struct tcphdr *th = skb->h.th;
683 th->check = tcp_v6_check(th, skb->len,
684 &treq->loc_addr, &treq->rmt_addr,
685 csum_partial((char *)th, skb->len, skb->csum));
687 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
688 err = ip6_xmit(sk, skb, &fl, opt, 0);
689 if (err == NET_XMIT_CN)
690 err = 0;
693 done:
694 if (opt && opt != np->opt)
695 sock_kfree_s(sk, opt, opt->tot_len);
696 return err;
699 static void tcp_v6_reqsk_destructor(struct request_sock *req)
701 if (inet6_rsk(req)->pktopts)
702 kfree_skb(inet6_rsk(req)->pktopts);
705 static struct request_sock_ops tcp6_request_sock_ops = {
706 .family = AF_INET6,
707 .obj_size = sizeof(struct tcp6_request_sock),
708 .rtx_syn_ack = tcp_v6_send_synack,
709 .send_ack = tcp_v6_reqsk_send_ack,
710 .destructor = tcp_v6_reqsk_destructor,
711 .send_reset = tcp_v6_send_reset
714 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
716 struct ipv6_pinfo *np = inet6_sk(sk);
717 struct tcphdr *th = skb->h.th;
719 if (skb->ip_summed == CHECKSUM_HW) {
720 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
721 skb->csum = offsetof(struct tcphdr, check);
722 } else {
723 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
724 csum_partial((char *)th, th->doff<<2,
725 skb->csum));
730 static void tcp_v6_send_reset(struct sk_buff *skb)
732 struct tcphdr *th = skb->h.th, *t1;
733 struct sk_buff *buff;
734 struct flowi fl;
736 if (th->rst)
737 return;
739 if (!ipv6_unicast_destination(skb))
740 return;
743 * We need to grab some memory, and put together an RST,
744 * and then put it into the queue to be sent.
747 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
748 GFP_ATOMIC);
749 if (buff == NULL)
750 return;
752 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
754 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
756 /* Swap the send and the receive. */
757 memset(t1, 0, sizeof(*t1));
758 t1->dest = th->source;
759 t1->source = th->dest;
760 t1->doff = sizeof(*t1)/4;
761 t1->rst = 1;
763 if(th->ack) {
764 t1->seq = th->ack_seq;
765 } else {
766 t1->ack = 1;
767 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
768 + skb->len - (th->doff<<2));
771 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
773 memset(&fl, 0, sizeof(fl));
774 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
775 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
777 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
778 sizeof(*t1), IPPROTO_TCP,
779 buff->csum);
781 fl.proto = IPPROTO_TCP;
782 fl.oif = inet6_iif(skb);
783 fl.fl_ip_dport = t1->dest;
784 fl.fl_ip_sport = t1->source;
786 /* sk = NULL, but it is safe for now. RST socket required. */
787 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
789 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
790 ip6_xmit(NULL, buff, &fl, NULL, 0);
791 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
792 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
793 return;
797 kfree_skb(buff);
800 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
802 struct tcphdr *th = skb->h.th, *t1;
803 struct sk_buff *buff;
804 struct flowi fl;
805 int tot_len = sizeof(struct tcphdr);
807 if (ts)
808 tot_len += 3*4;
810 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
811 GFP_ATOMIC);
812 if (buff == NULL)
813 return;
815 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
817 t1 = (struct tcphdr *) skb_push(buff,tot_len);
819 /* Swap the send and the receive. */
820 memset(t1, 0, sizeof(*t1));
821 t1->dest = th->source;
822 t1->source = th->dest;
823 t1->doff = tot_len/4;
824 t1->seq = htonl(seq);
825 t1->ack_seq = htonl(ack);
826 t1->ack = 1;
827 t1->window = htons(win);
829 if (ts) {
830 u32 *ptr = (u32*)(t1 + 1);
831 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
832 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
833 *ptr++ = htonl(tcp_time_stamp);
834 *ptr = htonl(ts);
837 buff->csum = csum_partial((char *)t1, tot_len, 0);
839 memset(&fl, 0, sizeof(fl));
840 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
841 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
843 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
844 tot_len, IPPROTO_TCP,
845 buff->csum);
847 fl.proto = IPPROTO_TCP;
848 fl.oif = inet6_iif(skb);
849 fl.fl_ip_dport = t1->dest;
850 fl.fl_ip_sport = t1->source;
852 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
853 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
854 ip6_xmit(NULL, buff, &fl, NULL, 0);
855 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
856 return;
860 kfree_skb(buff);
863 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
865 struct inet_timewait_sock *tw = inet_twsk(sk);
866 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
868 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
869 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
870 tcptw->tw_ts_recent);
872 inet_twsk_put(tw);
875 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
877 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
881 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
883 struct request_sock *req, **prev;
884 const struct tcphdr *th = skb->h.th;
885 struct sock *nsk;
887 /* Find possible connection requests. */
888 req = inet6_csk_search_req(sk, &prev, th->source,
889 &skb->nh.ipv6h->saddr,
890 &skb->nh.ipv6h->daddr, inet6_iif(skb));
891 if (req)
892 return tcp_check_req(sk, skb, req, prev);
894 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
895 th->source, &skb->nh.ipv6h->daddr,
896 ntohs(th->dest), inet6_iif(skb));
898 if (nsk) {
899 if (nsk->sk_state != TCP_TIME_WAIT) {
900 bh_lock_sock(nsk);
901 return nsk;
903 inet_twsk_put((struct inet_timewait_sock *)nsk);
904 return NULL;
907 #if 0 /*def CONFIG_SYN_COOKIES*/
908 if (!th->rst && !th->syn && th->ack)
909 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
910 #endif
911 return sk;
914 /* FIXME: this is substantially similar to the ipv4 code.
915 * Can some kind of merge be done? -- erics
917 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
919 struct inet6_request_sock *treq;
920 struct ipv6_pinfo *np = inet6_sk(sk);
921 struct tcp_options_received tmp_opt;
922 struct tcp_sock *tp = tcp_sk(sk);
923 struct request_sock *req = NULL;
924 __u32 isn = TCP_SKB_CB(skb)->when;
926 if (skb->protocol == htons(ETH_P_IP))
927 return tcp_v4_conn_request(sk, skb);
929 if (!ipv6_unicast_destination(skb))
930 goto drop;
933 * There are no SYN attacks on IPv6, yet...
935 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
936 if (net_ratelimit())
937 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
938 goto drop;
941 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
942 goto drop;
944 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
945 if (req == NULL)
946 goto drop;
948 tcp_clear_options(&tmp_opt);
949 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
950 tmp_opt.user_mss = tp->rx_opt.user_mss;
952 tcp_parse_options(skb, &tmp_opt, 0);
954 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
955 tcp_openreq_init(req, &tmp_opt, skb);
957 treq = inet6_rsk(req);
958 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
959 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
960 TCP_ECN_create_request(req, skb->h.th);
961 treq->pktopts = NULL;
962 if (ipv6_opt_accepted(sk, skb) ||
963 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
964 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
965 atomic_inc(&skb->users);
966 treq->pktopts = skb;
968 treq->iif = sk->sk_bound_dev_if;
970 /* So that link locals have meaning */
971 if (!sk->sk_bound_dev_if &&
972 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
973 treq->iif = inet6_iif(skb);
975 if (isn == 0)
976 isn = tcp_v6_init_sequence(sk,skb);
978 tcp_rsk(req)->snt_isn = isn;
980 if (tcp_v6_send_synack(sk, req, NULL))
981 goto drop;
983 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
984 return 0;
986 drop:
987 if (req)
988 reqsk_free(req);
990 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
991 return 0; /* don't send reset */
994 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
995 struct request_sock *req,
996 struct dst_entry *dst)
998 struct inet6_request_sock *treq = inet6_rsk(req);
999 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1000 struct tcp6_sock *newtcp6sk;
1001 struct inet_sock *newinet;
1002 struct tcp_sock *newtp;
1003 struct sock *newsk;
1004 struct ipv6_txoptions *opt;
1006 if (skb->protocol == htons(ETH_P_IP)) {
1008 * v6 mapped
1011 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1013 if (newsk == NULL)
1014 return NULL;
1016 newtcp6sk = (struct tcp6_sock *)newsk;
1017 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1019 newinet = inet_sk(newsk);
1020 newnp = inet6_sk(newsk);
1021 newtp = tcp_sk(newsk);
1023 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1025 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1026 newinet->daddr);
1028 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1029 newinet->saddr);
1031 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1033 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1034 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1035 newnp->pktoptions = NULL;
1036 newnp->opt = NULL;
1037 newnp->mcast_oif = inet6_iif(skb);
1038 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1041 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1042 * here, tcp_create_openreq_child now does this for us, see the comment in
1043 * that function for the gory details. -acme
1046 /* It is tricky place. Until this moment IPv4 tcp
1047 worked with IPv6 icsk.icsk_af_ops.
1048 Sync it now.
1050 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1052 return newsk;
1055 opt = np->opt;
1057 if (sk_acceptq_is_full(sk))
1058 goto out_overflow;
1060 if (np->rxopt.bits.osrcrt == 2 &&
1061 opt == NULL && treq->pktopts) {
1062 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1063 if (rxopt->srcrt)
1064 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1067 if (dst == NULL) {
1068 struct in6_addr *final_p = NULL, final;
1069 struct flowi fl;
1071 memset(&fl, 0, sizeof(fl));
1072 fl.proto = IPPROTO_TCP;
1073 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1074 if (opt && opt->srcrt) {
1075 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1076 ipv6_addr_copy(&final, &fl.fl6_dst);
1077 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1078 final_p = &final;
1080 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1081 fl.oif = sk->sk_bound_dev_if;
1082 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1083 fl.fl_ip_sport = inet_sk(sk)->sport;
1085 if (ip6_dst_lookup(sk, &dst, &fl))
1086 goto out;
1088 if (final_p)
1089 ipv6_addr_copy(&fl.fl6_dst, final_p);
1091 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1092 goto out;
1095 newsk = tcp_create_openreq_child(sk, req, skb);
1096 if (newsk == NULL)
1097 goto out;
1100 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1101 * count here, tcp_create_openreq_child now does this for us, see the
1102 * comment in that function for the gory details. -acme
1105 ip6_dst_store(newsk, dst, NULL);
1106 newsk->sk_route_caps = dst->dev->features &
1107 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1109 newtcp6sk = (struct tcp6_sock *)newsk;
1110 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1112 newtp = tcp_sk(newsk);
1113 newinet = inet_sk(newsk);
1114 newnp = inet6_sk(newsk);
1116 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1118 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1119 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1120 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1121 newsk->sk_bound_dev_if = treq->iif;
1123 /* Now IPv6 options...
1125 First: no IPv4 options.
1127 newinet->opt = NULL;
1129 /* Clone RX bits */
1130 newnp->rxopt.all = np->rxopt.all;
1132 /* Clone pktoptions received with SYN */
1133 newnp->pktoptions = NULL;
1134 if (treq->pktopts != NULL) {
1135 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1136 kfree_skb(treq->pktopts);
1137 treq->pktopts = NULL;
1138 if (newnp->pktoptions)
1139 skb_set_owner_r(newnp->pktoptions, newsk);
1141 newnp->opt = NULL;
1142 newnp->mcast_oif = inet6_iif(skb);
1143 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1145 /* Clone native IPv6 options from listening socket (if any)
1147 Yes, keeping reference count would be much more clever,
1148 but we make one more one thing there: reattach optmem
1149 to newsk.
1151 if (opt) {
1152 newnp->opt = ipv6_dup_options(newsk, opt);
1153 if (opt != np->opt)
1154 sock_kfree_s(sk, opt, opt->tot_len);
1157 newtp->ext_header_len = 0;
1158 if (newnp->opt)
1159 newtp->ext_header_len = newnp->opt->opt_nflen +
1160 newnp->opt->opt_flen;
1162 tcp_sync_mss(newsk, dst_mtu(dst));
1163 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1164 tcp_initialize_rcv_mss(newsk);
1166 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1168 __inet6_hash(&tcp_hashinfo, newsk);
1169 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1171 return newsk;
1173 out_overflow:
1174 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1175 out:
1176 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1177 if (opt && opt != np->opt)
1178 sock_kfree_s(sk, opt, opt->tot_len);
1179 dst_release(dst);
1180 return NULL;
1183 static int tcp_v6_checksum_init(struct sk_buff *skb)
1185 if (skb->ip_summed == CHECKSUM_HW) {
1186 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1187 &skb->nh.ipv6h->daddr,skb->csum)) {
1188 skb->ip_summed = CHECKSUM_UNNECESSARY;
1189 return 0;
1193 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1194 &skb->nh.ipv6h->daddr, 0);
1196 if (skb->len <= 76) {
1197 return __skb_checksum_complete(skb);
1199 return 0;
1202 /* The socket must have it's spinlock held when we get
1203 * here.
1205 * We have a potential double-lock case here, so even when
1206 * doing backlog processing we use the BH locking scheme.
1207 * This is because we cannot sleep with the original spinlock
1208 * held.
1210 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1212 struct ipv6_pinfo *np = inet6_sk(sk);
1213 struct tcp_sock *tp;
1214 struct sk_buff *opt_skb = NULL;
1216 /* Imagine: socket is IPv6. IPv4 packet arrives,
1217 goes to IPv4 receive handler and backlogged.
1218 From backlog it always goes here. Kerboom...
1219 Fortunately, tcp_rcv_established and rcv_established
1220 handle them correctly, but it is not case with
1221 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1224 if (skb->protocol == htons(ETH_P_IP))
1225 return tcp_v4_do_rcv(sk, skb);
1227 if (sk_filter(sk, skb, 0))
1228 goto discard;
1231 * socket locking is here for SMP purposes as backlog rcv
1232 * is currently called with bh processing disabled.
1235 /* Do Stevens' IPV6_PKTOPTIONS.
1237 Yes, guys, it is the only place in our code, where we
1238 may make it not affecting IPv4.
1239 The rest of code is protocol independent,
1240 and I do not like idea to uglify IPv4.
1242 Actually, all the idea behind IPV6_PKTOPTIONS
1243 looks not very well thought. For now we latch
1244 options, received in the last packet, enqueued
1245 by tcp. Feel free to propose better solution.
1246 --ANK (980728)
1248 if (np->rxopt.all)
1249 opt_skb = skb_clone(skb, GFP_ATOMIC);
1251 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1252 TCP_CHECK_TIMER(sk);
1253 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1254 goto reset;
1255 TCP_CHECK_TIMER(sk);
1256 if (opt_skb)
1257 goto ipv6_pktoptions;
1258 return 0;
1261 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1262 goto csum_err;
1264 if (sk->sk_state == TCP_LISTEN) {
1265 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1266 if (!nsk)
1267 goto discard;
1270 * Queue it on the new socket if the new socket is active,
1271 * otherwise we just shortcircuit this and continue with
1272 * the new socket..
1274 if(nsk != sk) {
1275 if (tcp_child_process(sk, nsk, skb))
1276 goto reset;
1277 if (opt_skb)
1278 __kfree_skb(opt_skb);
1279 return 0;
1283 TCP_CHECK_TIMER(sk);
1284 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1285 goto reset;
1286 TCP_CHECK_TIMER(sk);
1287 if (opt_skb)
1288 goto ipv6_pktoptions;
1289 return 0;
1291 reset:
1292 tcp_v6_send_reset(skb);
1293 discard:
1294 if (opt_skb)
1295 __kfree_skb(opt_skb);
1296 kfree_skb(skb);
1297 return 0;
1298 csum_err:
1299 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1300 goto discard;
1303 ipv6_pktoptions:
1304 /* Do you ask, what is it?
1306 1. skb was enqueued by tcp.
1307 2. skb is added to tail of read queue, rather than out of order.
1308 3. socket is not in passive state.
1309 4. Finally, it really contains options, which user wants to receive.
1311 tp = tcp_sk(sk);
1312 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1313 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1314 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1315 np->mcast_oif = inet6_iif(opt_skb);
1316 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1317 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1318 if (ipv6_opt_accepted(sk, opt_skb)) {
1319 skb_set_owner_r(opt_skb, sk);
1320 opt_skb = xchg(&np->pktoptions, opt_skb);
1321 } else {
1322 __kfree_skb(opt_skb);
1323 opt_skb = xchg(&np->pktoptions, NULL);
1327 if (opt_skb)
1328 kfree_skb(opt_skb);
1329 return 0;
1332 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1334 struct sk_buff *skb = *pskb;
1335 struct tcphdr *th;
1336 struct sock *sk;
1337 int ret;
1339 if (skb->pkt_type != PACKET_HOST)
1340 goto discard_it;
1343 * Count it even if it's bad.
1345 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1347 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1348 goto discard_it;
1350 th = skb->h.th;
1352 if (th->doff < sizeof(struct tcphdr)/4)
1353 goto bad_packet;
1354 if (!pskb_may_pull(skb, th->doff*4))
1355 goto discard_it;
1357 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1358 tcp_v6_checksum_init(skb)))
1359 goto bad_packet;
1361 th = skb->h.th;
1362 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1363 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1364 skb->len - th->doff*4);
1365 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1366 TCP_SKB_CB(skb)->when = 0;
1367 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1368 TCP_SKB_CB(skb)->sacked = 0;
1370 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1371 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1372 inet6_iif(skb));
1374 if (!sk)
1375 goto no_tcp_socket;
1377 process:
1378 if (sk->sk_state == TCP_TIME_WAIT)
1379 goto do_time_wait;
1381 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1382 goto discard_and_relse;
1384 if (sk_filter(sk, skb, 0))
1385 goto discard_and_relse;
1387 skb->dev = NULL;
1389 bh_lock_sock(sk);
1390 ret = 0;
1391 if (!sock_owned_by_user(sk)) {
1392 if (!tcp_prequeue(sk, skb))
1393 ret = tcp_v6_do_rcv(sk, skb);
1394 } else
1395 sk_add_backlog(sk, skb);
1396 bh_unlock_sock(sk);
1398 sock_put(sk);
1399 return ret ? -1 : 0;
1401 no_tcp_socket:
1402 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1403 goto discard_it;
1405 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1406 bad_packet:
1407 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1408 } else {
1409 tcp_v6_send_reset(skb);
1412 discard_it:
1415 * Discard frame
1418 kfree_skb(skb);
1419 return 0;
1421 discard_and_relse:
1422 sock_put(sk);
1423 goto discard_it;
1425 do_time_wait:
1426 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1427 inet_twsk_put((struct inet_timewait_sock *)sk);
1428 goto discard_it;
1431 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1432 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1433 inet_twsk_put((struct inet_timewait_sock *)sk);
1434 goto discard_it;
1437 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1438 skb, th)) {
1439 case TCP_TW_SYN:
1441 struct sock *sk2;
1443 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1444 &skb->nh.ipv6h->daddr,
1445 ntohs(th->dest), inet6_iif(skb));
1446 if (sk2 != NULL) {
1447 struct inet_timewait_sock *tw = inet_twsk(sk);
1448 inet_twsk_deschedule(tw, &tcp_death_row);
1449 inet_twsk_put(tw);
1450 sk = sk2;
1451 goto process;
1453 /* Fall through to ACK */
1455 case TCP_TW_ACK:
1456 tcp_v6_timewait_ack(sk, skb);
1457 break;
1458 case TCP_TW_RST:
1459 goto no_tcp_socket;
1460 case TCP_TW_SUCCESS:;
1462 goto discard_it;
1465 static int tcp_v6_remember_stamp(struct sock *sk)
1467 /* Alas, not yet... */
1468 return 0;
1471 static struct inet_connection_sock_af_ops ipv6_specific = {
1472 .queue_xmit = inet6_csk_xmit,
1473 .send_check = tcp_v6_send_check,
1474 .rebuild_header = inet6_sk_rebuild_header,
1475 .conn_request = tcp_v6_conn_request,
1476 .syn_recv_sock = tcp_v6_syn_recv_sock,
1477 .remember_stamp = tcp_v6_remember_stamp,
1478 .net_header_len = sizeof(struct ipv6hdr),
1480 .setsockopt = ipv6_setsockopt,
1481 .getsockopt = ipv6_getsockopt,
1482 .addr2sockaddr = inet6_csk_addr2sockaddr,
1483 .sockaddr_len = sizeof(struct sockaddr_in6)
1487 * TCP over IPv4 via INET6 API
1490 static struct inet_connection_sock_af_ops ipv6_mapped = {
1491 .queue_xmit = ip_queue_xmit,
1492 .send_check = tcp_v4_send_check,
1493 .rebuild_header = inet_sk_rebuild_header,
1494 .conn_request = tcp_v6_conn_request,
1495 .syn_recv_sock = tcp_v6_syn_recv_sock,
1496 .remember_stamp = tcp_v4_remember_stamp,
1497 .net_header_len = sizeof(struct iphdr),
1499 .setsockopt = ipv6_setsockopt,
1500 .getsockopt = ipv6_getsockopt,
1501 .addr2sockaddr = inet6_csk_addr2sockaddr,
1502 .sockaddr_len = sizeof(struct sockaddr_in6)
1507 /* NOTE: A lot of things set to zero explicitly by call to
1508 * sk_alloc() so need not be done here.
1510 static int tcp_v6_init_sock(struct sock *sk)
1512 struct inet_connection_sock *icsk = inet_csk(sk);
1513 struct tcp_sock *tp = tcp_sk(sk);
1515 skb_queue_head_init(&tp->out_of_order_queue);
1516 tcp_init_xmit_timers(sk);
1517 tcp_prequeue_init(tp);
1519 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1520 tp->mdev = TCP_TIMEOUT_INIT;
1522 /* So many TCP implementations out there (incorrectly) count the
1523 * initial SYN frame in their delayed-ACK and congestion control
1524 * algorithms that we must have the following bandaid to talk
1525 * efficiently to them. -DaveM
1527 tp->snd_cwnd = 2;
1529 /* See draft-stevens-tcpca-spec-01 for discussion of the
1530 * initialization of these values.
1532 tp->snd_ssthresh = 0x7fffffff;
1533 tp->snd_cwnd_clamp = ~0;
1534 tp->mss_cache = 536;
1536 tp->reordering = sysctl_tcp_reordering;
1538 sk->sk_state = TCP_CLOSE;
1540 icsk->icsk_af_ops = &ipv6_specific;
1541 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1542 sk->sk_write_space = sk_stream_write_space;
1543 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1545 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1546 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1548 atomic_inc(&tcp_sockets_allocated);
1550 return 0;
1553 static int tcp_v6_destroy_sock(struct sock *sk)
1555 tcp_v4_destroy_sock(sk);
1556 return inet6_destroy_sock(sk);
1559 /* Proc filesystem TCPv6 sock list dumping. */
1560 static void get_openreq6(struct seq_file *seq,
1561 struct sock *sk, struct request_sock *req, int i, int uid)
1563 int ttd = req->expires - jiffies;
1564 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1565 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1567 if (ttd < 0)
1568 ttd = 0;
1570 seq_printf(seq,
1571 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1572 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1574 src->s6_addr32[0], src->s6_addr32[1],
1575 src->s6_addr32[2], src->s6_addr32[3],
1576 ntohs(inet_sk(sk)->sport),
1577 dest->s6_addr32[0], dest->s6_addr32[1],
1578 dest->s6_addr32[2], dest->s6_addr32[3],
1579 ntohs(inet_rsk(req)->rmt_port),
1580 TCP_SYN_RECV,
1581 0,0, /* could print option size, but that is af dependent. */
1582 1, /* timers active (only the expire timer) */
1583 jiffies_to_clock_t(ttd),
1584 req->retrans,
1585 uid,
1586 0, /* non standard timer */
1587 0, /* open_requests have no inode */
1588 0, req);
1591 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1593 struct in6_addr *dest, *src;
1594 __u16 destp, srcp;
1595 int timer_active;
1596 unsigned long timer_expires;
1597 struct inet_sock *inet = inet_sk(sp);
1598 struct tcp_sock *tp = tcp_sk(sp);
1599 const struct inet_connection_sock *icsk = inet_csk(sp);
1600 struct ipv6_pinfo *np = inet6_sk(sp);
1602 dest = &np->daddr;
1603 src = &np->rcv_saddr;
1604 destp = ntohs(inet->dport);
1605 srcp = ntohs(inet->sport);
1607 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1608 timer_active = 1;
1609 timer_expires = icsk->icsk_timeout;
1610 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1611 timer_active = 4;
1612 timer_expires = icsk->icsk_timeout;
1613 } else if (timer_pending(&sp->sk_timer)) {
1614 timer_active = 2;
1615 timer_expires = sp->sk_timer.expires;
1616 } else {
1617 timer_active = 0;
1618 timer_expires = jiffies;
1621 seq_printf(seq,
1622 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1623 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1625 src->s6_addr32[0], src->s6_addr32[1],
1626 src->s6_addr32[2], src->s6_addr32[3], srcp,
1627 dest->s6_addr32[0], dest->s6_addr32[1],
1628 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1629 sp->sk_state,
1630 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1631 timer_active,
1632 jiffies_to_clock_t(timer_expires - jiffies),
1633 icsk->icsk_retransmits,
1634 sock_i_uid(sp),
1635 icsk->icsk_probes_out,
1636 sock_i_ino(sp),
1637 atomic_read(&sp->sk_refcnt), sp,
1638 icsk->icsk_rto,
1639 icsk->icsk_ack.ato,
1640 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1641 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1645 static void get_timewait6_sock(struct seq_file *seq,
1646 struct inet_timewait_sock *tw, int i)
1648 struct in6_addr *dest, *src;
1649 __u16 destp, srcp;
1650 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1651 int ttd = tw->tw_ttd - jiffies;
1653 if (ttd < 0)
1654 ttd = 0;
1656 dest = &tw6->tw_v6_daddr;
1657 src = &tw6->tw_v6_rcv_saddr;
1658 destp = ntohs(tw->tw_dport);
1659 srcp = ntohs(tw->tw_sport);
1661 seq_printf(seq,
1662 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1663 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1665 src->s6_addr32[0], src->s6_addr32[1],
1666 src->s6_addr32[2], src->s6_addr32[3], srcp,
1667 dest->s6_addr32[0], dest->s6_addr32[1],
1668 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1669 tw->tw_substate, 0, 0,
1670 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1671 atomic_read(&tw->tw_refcnt), tw);
1674 #ifdef CONFIG_PROC_FS
1675 static int tcp6_seq_show(struct seq_file *seq, void *v)
1677 struct tcp_iter_state *st;
1679 if (v == SEQ_START_TOKEN) {
1680 seq_puts(seq,
1681 " sl "
1682 "local_address "
1683 "remote_address "
1684 "st tx_queue rx_queue tr tm->when retrnsmt"
1685 " uid timeout inode\n");
1686 goto out;
1688 st = seq->private;
1690 switch (st->state) {
1691 case TCP_SEQ_STATE_LISTENING:
1692 case TCP_SEQ_STATE_ESTABLISHED:
1693 get_tcp6_sock(seq, v, st->num);
1694 break;
1695 case TCP_SEQ_STATE_OPENREQ:
1696 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1697 break;
1698 case TCP_SEQ_STATE_TIME_WAIT:
1699 get_timewait6_sock(seq, v, st->num);
1700 break;
1702 out:
1703 return 0;
1706 static struct file_operations tcp6_seq_fops;
1707 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1708 .owner = THIS_MODULE,
1709 .name = "tcp6",
1710 .family = AF_INET6,
1711 .seq_show = tcp6_seq_show,
1712 .seq_fops = &tcp6_seq_fops,
1715 int __init tcp6_proc_init(void)
1717 return tcp_proc_register(&tcp6_seq_afinfo);
1720 void tcp6_proc_exit(void)
1722 tcp_proc_unregister(&tcp6_seq_afinfo);
1724 #endif
1726 struct proto tcpv6_prot = {
1727 .name = "TCPv6",
1728 .owner = THIS_MODULE,
1729 .close = tcp_close,
1730 .connect = tcp_v6_connect,
1731 .disconnect = tcp_disconnect,
1732 .accept = inet_csk_accept,
1733 .ioctl = tcp_ioctl,
1734 .init = tcp_v6_init_sock,
1735 .destroy = tcp_v6_destroy_sock,
1736 .shutdown = tcp_shutdown,
1737 .setsockopt = tcp_setsockopt,
1738 .getsockopt = tcp_getsockopt,
1739 .sendmsg = tcp_sendmsg,
1740 .recvmsg = tcp_recvmsg,
1741 .backlog_rcv = tcp_v6_do_rcv,
1742 .hash = tcp_v6_hash,
1743 .unhash = tcp_unhash,
1744 .get_port = tcp_v6_get_port,
1745 .enter_memory_pressure = tcp_enter_memory_pressure,
1746 .sockets_allocated = &tcp_sockets_allocated,
1747 .memory_allocated = &tcp_memory_allocated,
1748 .memory_pressure = &tcp_memory_pressure,
1749 .orphan_count = &tcp_orphan_count,
1750 .sysctl_mem = sysctl_tcp_mem,
1751 .sysctl_wmem = sysctl_tcp_wmem,
1752 .sysctl_rmem = sysctl_tcp_rmem,
1753 .max_header = MAX_TCP_HEADER,
1754 .obj_size = sizeof(struct tcp6_sock),
1755 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1756 .rsk_prot = &tcp6_request_sock_ops,
1759 static struct inet6_protocol tcpv6_protocol = {
1760 .handler = tcp_v6_rcv,
1761 .err_handler = tcp_v6_err,
1762 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1765 static struct inet_protosw tcpv6_protosw = {
1766 .type = SOCK_STREAM,
1767 .protocol = IPPROTO_TCP,
1768 .prot = &tcpv6_prot,
1769 .ops = &inet6_stream_ops,
1770 .capability = -1,
1771 .no_check = 0,
1772 .flags = INET_PROTOSW_PERMANENT,
1775 void __init tcpv6_init(void)
1777 /* register inet6 protocol */
1778 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1779 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1780 inet6_register_protosw(&tcpv6_protosw);