USB: xHCI: prevent infinite loop when processing MSE event
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / tcp_ipv6.c
blob3c9fa618b69daae984931c171d4cb54b150c8166
1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on:
9 * linux/net/ipv4/tcp.c
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
13 * Fixes:
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
34 #include <linux/in.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
47 #include <net/tcp.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
66 #include <asm/uaccess.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
71 #include <linux/crypto.h>
72 #include <linux/scatterlist.h>
74 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
75 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
76 struct request_sock *req);
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
79 static void __tcp_v6_send_check(struct sk_buff *skb,
80 const struct in6_addr *saddr,
81 const struct in6_addr *daddr);
83 static const struct inet_connection_sock_af_ops ipv6_mapped;
84 static const struct inet_connection_sock_af_ops ipv6_specific;
85 #ifdef CONFIG_TCP_MD5SIG
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
88 #else
89 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
90 const struct in6_addr *addr)
92 return NULL;
94 #endif
96 static void tcp_v6_hash(struct sock *sk)
98 if (sk->sk_state != TCP_CLOSE) {
99 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
100 tcp_prot.hash(sk);
101 return;
103 local_bh_disable();
104 __inet6_hash(sk, NULL);
105 local_bh_enable();
109 static __inline__ __sum16 tcp_v6_check(int len,
110 const struct in6_addr *saddr,
111 const struct in6_addr *daddr,
112 __wsum base)
114 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
117 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
119 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
120 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->dest,
122 tcp_hdr(skb)->source);
125 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
126 int addr_len)
128 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
129 struct inet_sock *inet = inet_sk(sk);
130 struct inet_connection_sock *icsk = inet_csk(sk);
131 struct ipv6_pinfo *np = inet6_sk(sk);
132 struct tcp_sock *tp = tcp_sk(sk);
133 struct in6_addr *saddr = NULL, *final_p, final;
134 struct rt6_info *rt;
135 struct flowi6 fl6;
136 struct dst_entry *dst;
137 int addr_type;
138 int err;
140 if (addr_len < SIN6_LEN_RFC2133)
141 return -EINVAL;
143 if (usin->sin6_family != AF_INET6)
144 return -EAFNOSUPPORT;
146 memset(&fl6, 0, sizeof(fl6));
148 if (np->sndflow) {
149 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
150 IP6_ECN_flow_init(fl6.flowlabel);
151 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
152 struct ip6_flowlabel *flowlabel;
153 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
154 if (flowlabel == NULL)
155 return -EINVAL;
156 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
157 fl6_sock_release(flowlabel);
162 * connect() to INADDR_ANY means loopback (BSD'ism).
165 if(ipv6_addr_any(&usin->sin6_addr))
166 usin->sin6_addr.s6_addr[15] = 0x1;
168 addr_type = ipv6_addr_type(&usin->sin6_addr);
170 if(addr_type & IPV6_ADDR_MULTICAST)
171 return -ENETUNREACH;
173 if (addr_type&IPV6_ADDR_LINKLOCAL) {
174 if (addr_len >= sizeof(struct sockaddr_in6) &&
175 usin->sin6_scope_id) {
176 /* If interface is set while binding, indices
177 * must coincide.
179 if (sk->sk_bound_dev_if &&
180 sk->sk_bound_dev_if != usin->sin6_scope_id)
181 return -EINVAL;
183 sk->sk_bound_dev_if = usin->sin6_scope_id;
186 /* Connect to link-local address requires an interface */
187 if (!sk->sk_bound_dev_if)
188 return -EINVAL;
191 if (tp->rx_opt.ts_recent_stamp &&
192 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
193 tp->rx_opt.ts_recent = 0;
194 tp->rx_opt.ts_recent_stamp = 0;
195 tp->write_seq = 0;
198 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
199 np->flow_label = fl6.flowlabel;
202 * TCP over IPv4
205 if (addr_type == IPV6_ADDR_MAPPED) {
206 u32 exthdrlen = icsk->icsk_ext_hdr_len;
207 struct sockaddr_in sin;
209 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
211 if (__ipv6_only_sock(sk))
212 return -ENETUNREACH;
214 sin.sin_family = AF_INET;
215 sin.sin_port = usin->sin6_port;
216 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
218 icsk->icsk_af_ops = &ipv6_mapped;
219 sk->sk_backlog_rcv = tcp_v4_do_rcv;
220 #ifdef CONFIG_TCP_MD5SIG
221 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
222 #endif
224 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
226 if (err) {
227 icsk->icsk_ext_hdr_len = exthdrlen;
228 icsk->icsk_af_ops = &ipv6_specific;
229 sk->sk_backlog_rcv = tcp_v6_do_rcv;
230 #ifdef CONFIG_TCP_MD5SIG
231 tp->af_specific = &tcp_sock_ipv6_specific;
232 #endif
233 goto failure;
234 } else {
235 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
236 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
237 &np->rcv_saddr);
240 return err;
243 if (!ipv6_addr_any(&np->rcv_saddr))
244 saddr = &np->rcv_saddr;
246 fl6.flowi6_proto = IPPROTO_TCP;
247 ipv6_addr_copy(&fl6.daddr, &np->daddr);
248 ipv6_addr_copy(&fl6.saddr,
249 (saddr ? saddr : &np->saddr));
250 fl6.flowi6_oif = sk->sk_bound_dev_if;
251 fl6.flowi6_mark = sk->sk_mark;
252 fl6.fl6_dport = usin->sin6_port;
253 fl6.fl6_sport = inet->inet_sport;
255 final_p = fl6_update_dst(&fl6, np->opt, &final);
257 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
259 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
260 if (IS_ERR(dst)) {
261 err = PTR_ERR(dst);
262 goto failure;
265 if (saddr == NULL) {
266 saddr = &fl6.saddr;
267 ipv6_addr_copy(&np->rcv_saddr, saddr);
270 /* set the source address */
271 ipv6_addr_copy(&np->saddr, saddr);
272 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
274 sk->sk_gso_type = SKB_GSO_TCPV6;
275 __ip6_dst_store(sk, dst, NULL, NULL);
277 rt = (struct rt6_info *) dst;
278 if (tcp_death_row.sysctl_tw_recycle &&
279 !tp->rx_opt.ts_recent_stamp &&
280 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
281 struct inet_peer *peer = rt6_get_peer(rt);
283 * VJ's idea. We save last timestamp seen from
284 * the destination in peer table, when entering state
285 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
286 * when trying new connection.
288 if (peer) {
289 inet_peer_refcheck(peer);
290 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
291 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
292 tp->rx_opt.ts_recent = peer->tcp_ts;
297 icsk->icsk_ext_hdr_len = 0;
298 if (np->opt)
299 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
300 np->opt->opt_nflen);
302 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
304 inet->inet_dport = usin->sin6_port;
306 tcp_set_state(sk, TCP_SYN_SENT);
307 err = inet6_hash_connect(&tcp_death_row, sk);
308 if (err)
309 goto late_failure;
311 if (!tp->write_seq)
312 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
313 np->daddr.s6_addr32,
314 inet->inet_sport,
315 inet->inet_dport);
317 err = tcp_connect(sk);
318 if (err)
319 goto late_failure;
321 return 0;
323 late_failure:
324 tcp_set_state(sk, TCP_CLOSE);
325 __sk_dst_reset(sk);
326 failure:
327 inet->inet_dport = 0;
328 sk->sk_route_caps = 0;
329 return err;
332 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
333 u8 type, u8 code, int offset, __be32 info)
335 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
336 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
337 struct ipv6_pinfo *np;
338 struct sock *sk;
339 int err;
340 struct tcp_sock *tp;
341 __u32 seq;
342 struct net *net = dev_net(skb->dev);
344 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
345 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
347 if (sk == NULL) {
348 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
349 ICMP6_MIB_INERRORS);
350 return;
353 if (sk->sk_state == TCP_TIME_WAIT) {
354 inet_twsk_put(inet_twsk(sk));
355 return;
358 bh_lock_sock(sk);
359 if (sock_owned_by_user(sk))
360 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
362 if (sk->sk_state == TCP_CLOSE)
363 goto out;
365 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
366 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
367 goto out;
370 tp = tcp_sk(sk);
371 seq = ntohl(th->seq);
372 if (sk->sk_state != TCP_LISTEN &&
373 !between(seq, tp->snd_una, tp->snd_nxt)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
375 goto out;
378 np = inet6_sk(sk);
380 if (type == ICMPV6_PKT_TOOBIG) {
381 struct dst_entry *dst;
383 if (sock_owned_by_user(sk))
384 goto out;
385 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
386 goto out;
388 /* icmp should have updated the destination cache entry */
389 dst = __sk_dst_check(sk, np->dst_cookie);
391 if (dst == NULL) {
392 struct inet_sock *inet = inet_sk(sk);
393 struct flowi6 fl6;
395 /* BUGGG_FUTURE: Again, it is not clear how
396 to handle rthdr case. Ignore this complexity
397 for now.
399 memset(&fl6, 0, sizeof(fl6));
400 fl6.flowi6_proto = IPPROTO_TCP;
401 ipv6_addr_copy(&fl6.daddr, &np->daddr);
402 ipv6_addr_copy(&fl6.saddr, &np->saddr);
403 fl6.flowi6_oif = sk->sk_bound_dev_if;
404 fl6.flowi6_mark = sk->sk_mark;
405 fl6.fl6_dport = inet->inet_dport;
406 fl6.fl6_sport = inet->inet_sport;
407 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
409 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
410 if (IS_ERR(dst)) {
411 sk->sk_err_soft = -PTR_ERR(dst);
412 goto out;
415 } else
416 dst_hold(dst);
418 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
419 tcp_sync_mss(sk, dst_mtu(dst));
420 tcp_simple_retransmit(sk);
421 } /* else let the usual retransmit timer handle it */
422 dst_release(dst);
423 goto out;
426 icmpv6_err_convert(type, code, &err);
428 /* Might be for an request_sock */
429 switch (sk->sk_state) {
430 struct request_sock *req, **prev;
431 case TCP_LISTEN:
432 if (sock_owned_by_user(sk))
433 goto out;
435 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
436 &hdr->saddr, inet6_iif(skb));
437 if (!req)
438 goto out;
440 /* ICMPs are not backlogged, hence we cannot get
441 * an established socket here.
443 WARN_ON(req->sk != NULL);
445 if (seq != tcp_rsk(req)->snt_isn) {
446 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
447 goto out;
450 inet_csk_reqsk_queue_drop(sk, req, prev);
451 goto out;
453 case TCP_SYN_SENT:
454 case TCP_SYN_RECV: /* Cannot happen.
455 It can, it SYNs are crossed. --ANK */
456 if (!sock_owned_by_user(sk)) {
457 sk->sk_err = err;
458 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
460 tcp_done(sk);
461 } else
462 sk->sk_err_soft = err;
463 goto out;
466 if (!sock_owned_by_user(sk) && np->recverr) {
467 sk->sk_err = err;
468 sk->sk_error_report(sk);
469 } else
470 sk->sk_err_soft = err;
472 out:
473 bh_unlock_sock(sk);
474 sock_put(sk);
478 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
479 struct request_values *rvp)
481 struct inet6_request_sock *treq = inet6_rsk(req);
482 struct ipv6_pinfo *np = inet6_sk(sk);
483 struct sk_buff * skb;
484 struct ipv6_txoptions *opt = NULL;
485 struct in6_addr * final_p, final;
486 struct flowi6 fl6;
487 struct dst_entry *dst;
488 int err;
490 memset(&fl6, 0, sizeof(fl6));
491 fl6.flowi6_proto = IPPROTO_TCP;
492 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
493 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
494 fl6.flowlabel = 0;
495 fl6.flowi6_oif = treq->iif;
496 fl6.flowi6_mark = sk->sk_mark;
497 fl6.fl6_dport = inet_rsk(req)->rmt_port;
498 fl6.fl6_sport = inet_rsk(req)->loc_port;
499 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
501 opt = np->opt;
502 final_p = fl6_update_dst(&fl6, opt, &final);
504 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
505 if (IS_ERR(dst)) {
506 err = PTR_ERR(dst);
507 dst = NULL;
508 goto done;
510 skb = tcp_make_synack(sk, dst, req, rvp);
511 err = -ENOMEM;
512 if (skb) {
513 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
515 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
516 err = ip6_xmit(sk, skb, &fl6, opt);
517 err = net_xmit_eval(err);
520 done:
521 if (opt && opt != np->opt)
522 sock_kfree_s(sk, opt, opt->tot_len);
523 dst_release(dst);
524 return err;
527 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
528 struct request_values *rvp)
530 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
531 return tcp_v6_send_synack(sk, req, rvp);
534 static void tcp_v6_reqsk_destructor(struct request_sock *req)
536 kfree_skb(inet6_rsk(req)->pktopts);
539 #ifdef CONFIG_TCP_MD5SIG
540 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
541 const struct in6_addr *addr)
543 struct tcp_sock *tp = tcp_sk(sk);
544 int i;
546 BUG_ON(tp == NULL);
548 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
549 return NULL;
551 for (i = 0; i < tp->md5sig_info->entries6; i++) {
552 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
553 return &tp->md5sig_info->keys6[i].base;
555 return NULL;
558 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
559 struct sock *addr_sk)
561 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
564 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
565 struct request_sock *req)
567 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
570 static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
571 char *newkey, u8 newkeylen)
573 /* Add key to the list */
574 struct tcp_md5sig_key *key;
575 struct tcp_sock *tp = tcp_sk(sk);
576 struct tcp6_md5sig_key *keys;
578 key = tcp_v6_md5_do_lookup(sk, peer);
579 if (key) {
580 /* modify existing entry - just update that one */
581 kfree(key->key);
582 key->key = newkey;
583 key->keylen = newkeylen;
584 } else {
585 /* reallocate new list if current one is full. */
586 if (!tp->md5sig_info) {
587 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
588 if (!tp->md5sig_info) {
589 kfree(newkey);
590 return -ENOMEM;
592 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
594 if (tcp_alloc_md5sig_pool(sk) == NULL) {
595 kfree(newkey);
596 return -ENOMEM;
598 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
599 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
600 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
602 if (!keys) {
603 tcp_free_md5sig_pool();
604 kfree(newkey);
605 return -ENOMEM;
608 if (tp->md5sig_info->entries6)
609 memmove(keys, tp->md5sig_info->keys6,
610 (sizeof (tp->md5sig_info->keys6[0]) *
611 tp->md5sig_info->entries6));
613 kfree(tp->md5sig_info->keys6);
614 tp->md5sig_info->keys6 = keys;
615 tp->md5sig_info->alloced6++;
618 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
619 peer);
620 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
621 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
623 tp->md5sig_info->entries6++;
625 return 0;
628 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
629 u8 *newkey, __u8 newkeylen)
631 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
632 newkey, newkeylen);
635 static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
637 struct tcp_sock *tp = tcp_sk(sk);
638 int i;
640 for (i = 0; i < tp->md5sig_info->entries6; i++) {
641 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
642 /* Free the key */
643 kfree(tp->md5sig_info->keys6[i].base.key);
644 tp->md5sig_info->entries6--;
646 if (tp->md5sig_info->entries6 == 0) {
647 kfree(tp->md5sig_info->keys6);
648 tp->md5sig_info->keys6 = NULL;
649 tp->md5sig_info->alloced6 = 0;
650 } else {
651 /* shrink the database */
652 if (tp->md5sig_info->entries6 != i)
653 memmove(&tp->md5sig_info->keys6[i],
654 &tp->md5sig_info->keys6[i+1],
655 (tp->md5sig_info->entries6 - i)
656 * sizeof (tp->md5sig_info->keys6[0]));
658 tcp_free_md5sig_pool();
659 return 0;
662 return -ENOENT;
665 static void tcp_v6_clear_md5_list (struct sock *sk)
667 struct tcp_sock *tp = tcp_sk(sk);
668 int i;
670 if (tp->md5sig_info->entries6) {
671 for (i = 0; i < tp->md5sig_info->entries6; i++)
672 kfree(tp->md5sig_info->keys6[i].base.key);
673 tp->md5sig_info->entries6 = 0;
674 tcp_free_md5sig_pool();
677 kfree(tp->md5sig_info->keys6);
678 tp->md5sig_info->keys6 = NULL;
679 tp->md5sig_info->alloced6 = 0;
681 if (tp->md5sig_info->entries4) {
682 for (i = 0; i < tp->md5sig_info->entries4; i++)
683 kfree(tp->md5sig_info->keys4[i].base.key);
684 tp->md5sig_info->entries4 = 0;
685 tcp_free_md5sig_pool();
688 kfree(tp->md5sig_info->keys4);
689 tp->md5sig_info->keys4 = NULL;
690 tp->md5sig_info->alloced4 = 0;
693 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
694 int optlen)
696 struct tcp_md5sig cmd;
697 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
698 u8 *newkey;
700 if (optlen < sizeof(cmd))
701 return -EINVAL;
703 if (copy_from_user(&cmd, optval, sizeof(cmd)))
704 return -EFAULT;
706 if (sin6->sin6_family != AF_INET6)
707 return -EINVAL;
709 if (!cmd.tcpm_keylen) {
710 if (!tcp_sk(sk)->md5sig_info)
711 return -ENOENT;
712 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
713 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
714 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
717 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
718 return -EINVAL;
720 if (!tcp_sk(sk)->md5sig_info) {
721 struct tcp_sock *tp = tcp_sk(sk);
722 struct tcp_md5sig_info *p;
724 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
725 if (!p)
726 return -ENOMEM;
728 tp->md5sig_info = p;
729 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
732 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
733 if (!newkey)
734 return -ENOMEM;
735 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
736 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
737 newkey, cmd.tcpm_keylen);
739 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
742 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
743 const struct in6_addr *daddr,
744 const struct in6_addr *saddr, int nbytes)
746 struct tcp6_pseudohdr *bp;
747 struct scatterlist sg;
749 bp = &hp->md5_blk.ip6;
750 /* 1. TCP pseudo-header (RFC2460) */
751 ipv6_addr_copy(&bp->saddr, saddr);
752 ipv6_addr_copy(&bp->daddr, daddr);
753 bp->protocol = cpu_to_be32(IPPROTO_TCP);
754 bp->len = cpu_to_be32(nbytes);
756 sg_init_one(&sg, bp, sizeof(*bp));
757 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
760 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
761 const struct in6_addr *daddr, struct in6_addr *saddr,
762 struct tcphdr *th)
764 struct tcp_md5sig_pool *hp;
765 struct hash_desc *desc;
767 hp = tcp_get_md5sig_pool();
768 if (!hp)
769 goto clear_hash_noput;
770 desc = &hp->md5_desc;
772 if (crypto_hash_init(desc))
773 goto clear_hash;
774 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
775 goto clear_hash;
776 if (tcp_md5_hash_header(hp, th))
777 goto clear_hash;
778 if (tcp_md5_hash_key(hp, key))
779 goto clear_hash;
780 if (crypto_hash_final(desc, md5_hash))
781 goto clear_hash;
783 tcp_put_md5sig_pool();
784 return 0;
786 clear_hash:
787 tcp_put_md5sig_pool();
788 clear_hash_noput:
789 memset(md5_hash, 0, 16);
790 return 1;
793 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
794 struct sock *sk, struct request_sock *req,
795 struct sk_buff *skb)
797 const struct in6_addr *saddr, *daddr;
798 struct tcp_md5sig_pool *hp;
799 struct hash_desc *desc;
800 struct tcphdr *th = tcp_hdr(skb);
802 if (sk) {
803 saddr = &inet6_sk(sk)->saddr;
804 daddr = &inet6_sk(sk)->daddr;
805 } else if (req) {
806 saddr = &inet6_rsk(req)->loc_addr;
807 daddr = &inet6_rsk(req)->rmt_addr;
808 } else {
809 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
810 saddr = &ip6h->saddr;
811 daddr = &ip6h->daddr;
814 hp = tcp_get_md5sig_pool();
815 if (!hp)
816 goto clear_hash_noput;
817 desc = &hp->md5_desc;
819 if (crypto_hash_init(desc))
820 goto clear_hash;
822 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
823 goto clear_hash;
824 if (tcp_md5_hash_header(hp, th))
825 goto clear_hash;
826 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
827 goto clear_hash;
828 if (tcp_md5_hash_key(hp, key))
829 goto clear_hash;
830 if (crypto_hash_final(desc, md5_hash))
831 goto clear_hash;
833 tcp_put_md5sig_pool();
834 return 0;
836 clear_hash:
837 tcp_put_md5sig_pool();
838 clear_hash_noput:
839 memset(md5_hash, 0, 16);
840 return 1;
843 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
845 __u8 *hash_location = NULL;
846 struct tcp_md5sig_key *hash_expected;
847 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
848 struct tcphdr *th = tcp_hdr(skb);
849 int genhash;
850 u8 newhash[16];
852 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
853 hash_location = tcp_parse_md5sig_option(th);
855 /* We've parsed the options - do we have a hash? */
856 if (!hash_expected && !hash_location)
857 return 0;
859 if (hash_expected && !hash_location) {
860 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
861 return 1;
864 if (!hash_expected && hash_location) {
865 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
866 return 1;
869 /* check the signature */
870 genhash = tcp_v6_md5_hash_skb(newhash,
871 hash_expected,
872 NULL, NULL, skb);
874 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
875 if (net_ratelimit()) {
876 printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
877 genhash ? "failed" : "mismatch",
878 &ip6h->saddr, ntohs(th->source),
879 &ip6h->daddr, ntohs(th->dest));
881 return 1;
883 return 0;
885 #endif
887 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
888 .family = AF_INET6,
889 .obj_size = sizeof(struct tcp6_request_sock),
890 .rtx_syn_ack = tcp_v6_rtx_synack,
891 .send_ack = tcp_v6_reqsk_send_ack,
892 .destructor = tcp_v6_reqsk_destructor,
893 .send_reset = tcp_v6_send_reset,
894 .syn_ack_timeout = tcp_syn_ack_timeout,
897 #ifdef CONFIG_TCP_MD5SIG
898 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
899 .md5_lookup = tcp_v6_reqsk_md5_lookup,
900 .calc_md5_hash = tcp_v6_md5_hash_skb,
902 #endif
904 static void __tcp_v6_send_check(struct sk_buff *skb,
905 const struct in6_addr *saddr, const struct in6_addr *daddr)
907 struct tcphdr *th = tcp_hdr(skb);
909 if (skb->ip_summed == CHECKSUM_PARTIAL) {
910 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
911 skb->csum_start = skb_transport_header(skb) - skb->head;
912 skb->csum_offset = offsetof(struct tcphdr, check);
913 } else {
914 th->check = tcp_v6_check(skb->len, saddr, daddr,
915 csum_partial(th, th->doff << 2,
916 skb->csum));
920 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
922 struct ipv6_pinfo *np = inet6_sk(sk);
924 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
927 static int tcp_v6_gso_send_check(struct sk_buff *skb)
929 const struct ipv6hdr *ipv6h;
930 struct tcphdr *th;
932 if (!pskb_may_pull(skb, sizeof(*th)))
933 return -EINVAL;
935 ipv6h = ipv6_hdr(skb);
936 th = tcp_hdr(skb);
938 th->check = 0;
939 skb->ip_summed = CHECKSUM_PARTIAL;
940 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
941 return 0;
944 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
945 struct sk_buff *skb)
947 const struct ipv6hdr *iph = skb_gro_network_header(skb);
949 switch (skb->ip_summed) {
950 case CHECKSUM_COMPLETE:
951 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
952 skb->csum)) {
953 skb->ip_summed = CHECKSUM_UNNECESSARY;
954 break;
957 /* fall through */
958 case CHECKSUM_NONE:
959 NAPI_GRO_CB(skb)->flush = 1;
960 return NULL;
963 return tcp_gro_receive(head, skb);
966 static int tcp6_gro_complete(struct sk_buff *skb)
968 const struct ipv6hdr *iph = ipv6_hdr(skb);
969 struct tcphdr *th = tcp_hdr(skb);
971 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
972 &iph->saddr, &iph->daddr, 0);
973 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
975 return tcp_gro_complete(skb);
978 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
979 u32 ts, struct tcp_md5sig_key *key, int rst)
981 struct tcphdr *th = tcp_hdr(skb), *t1;
982 struct sk_buff *buff;
983 struct flowi6 fl6;
984 struct net *net = dev_net(skb_dst(skb)->dev);
985 struct sock *ctl_sk = net->ipv6.tcp_sk;
986 unsigned int tot_len = sizeof(struct tcphdr);
987 struct dst_entry *dst;
988 __be32 *topt;
990 if (ts)
991 tot_len += TCPOLEN_TSTAMP_ALIGNED;
992 #ifdef CONFIG_TCP_MD5SIG
993 if (key)
994 tot_len += TCPOLEN_MD5SIG_ALIGNED;
995 #endif
997 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
998 GFP_ATOMIC);
999 if (buff == NULL)
1000 return;
1002 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1004 t1 = (struct tcphdr *) skb_push(buff, tot_len);
1005 skb_reset_transport_header(buff);
1007 /* Swap the send and the receive. */
1008 memset(t1, 0, sizeof(*t1));
1009 t1->dest = th->source;
1010 t1->source = th->dest;
1011 t1->doff = tot_len / 4;
1012 t1->seq = htonl(seq);
1013 t1->ack_seq = htonl(ack);
1014 t1->ack = !rst || !th->ack;
1015 t1->rst = rst;
1016 t1->window = htons(win);
1018 topt = (__be32 *)(t1 + 1);
1020 if (ts) {
1021 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1022 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1023 *topt++ = htonl(tcp_time_stamp);
1024 *topt++ = htonl(ts);
1027 #ifdef CONFIG_TCP_MD5SIG
1028 if (key) {
1029 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1030 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1031 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
1032 &ipv6_hdr(skb)->saddr,
1033 &ipv6_hdr(skb)->daddr, t1);
1035 #endif
1037 memset(&fl6, 0, sizeof(fl6));
1038 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
1039 ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
1041 buff->ip_summed = CHECKSUM_PARTIAL;
1042 buff->csum = 0;
1044 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
1046 fl6.flowi6_proto = IPPROTO_TCP;
1047 fl6.flowi6_oif = inet6_iif(skb);
1048 fl6.fl6_dport = t1->dest;
1049 fl6.fl6_sport = t1->source;
1050 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
1052 /* Pass a socket to ip6_dst_lookup either it is for RST
1053 * Underlying function will use this to retrieve the network
1054 * namespace
1056 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
1057 if (!IS_ERR(dst)) {
1058 skb_dst_set(buff, dst);
1059 ip6_xmit(ctl_sk, buff, &fl6, NULL);
1060 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1061 if (rst)
1062 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1063 return;
1066 kfree_skb(buff);
1069 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
1071 struct tcphdr *th = tcp_hdr(skb);
1072 u32 seq = 0, ack_seq = 0;
1073 struct tcp_md5sig_key *key = NULL;
1075 if (th->rst)
1076 return;
1078 if (!ipv6_unicast_destination(skb))
1079 return;
1081 #ifdef CONFIG_TCP_MD5SIG
1082 if (sk)
1083 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
1084 #endif
1086 if (th->ack)
1087 seq = ntohl(th->ack_seq);
1088 else
1089 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1090 (th->doff << 2);
1092 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
1095 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1096 struct tcp_md5sig_key *key)
1098 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
1101 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1103 struct inet_timewait_sock *tw = inet_twsk(sk);
1104 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1106 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1107 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1108 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1110 inet_twsk_put(tw);
1113 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
1114 struct request_sock *req)
1116 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1117 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
1121 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1123 struct request_sock *req, **prev;
1124 const struct tcphdr *th = tcp_hdr(skb);
1125 struct sock *nsk;
1127 /* Find possible connection requests. */
1128 req = inet6_csk_search_req(sk, &prev, th->source,
1129 &ipv6_hdr(skb)->saddr,
1130 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1131 if (req)
1132 return tcp_check_req(sk, skb, req, prev);
1134 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1135 &ipv6_hdr(skb)->saddr, th->source,
1136 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1138 if (nsk) {
1139 if (nsk->sk_state != TCP_TIME_WAIT) {
1140 bh_lock_sock(nsk);
1141 return nsk;
1143 inet_twsk_put(inet_twsk(nsk));
1144 return NULL;
1147 #ifdef CONFIG_SYN_COOKIES
1148 if (!th->syn)
1149 sk = cookie_v6_check(sk, skb);
1150 #endif
1151 return sk;
1154 /* FIXME: this is substantially similar to the ipv4 code.
1155 * Can some kind of merge be done? -- erics
1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1159 struct tcp_extend_values tmp_ext;
1160 struct tcp_options_received tmp_opt;
1161 u8 *hash_location;
1162 struct request_sock *req;
1163 struct inet6_request_sock *treq;
1164 struct ipv6_pinfo *np = inet6_sk(sk);
1165 struct tcp_sock *tp = tcp_sk(sk);
1166 __u32 isn = TCP_SKB_CB(skb)->when;
1167 struct dst_entry *dst = NULL;
1168 int want_cookie = 0;
1170 if (skb->protocol == htons(ETH_P_IP))
1171 return tcp_v4_conn_request(sk, skb);
1173 if (!ipv6_unicast_destination(skb))
1174 goto drop;
1176 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1177 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1178 if (!want_cookie)
1179 goto drop;
1182 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1183 goto drop;
1185 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1186 if (req == NULL)
1187 goto drop;
1189 #ifdef CONFIG_TCP_MD5SIG
1190 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1191 #endif
1193 tcp_clear_options(&tmp_opt);
1194 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1195 tmp_opt.user_mss = tp->rx_opt.user_mss;
1196 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1198 if (tmp_opt.cookie_plus > 0 &&
1199 tmp_opt.saw_tstamp &&
1200 !tp->rx_opt.cookie_out_never &&
1201 (sysctl_tcp_cookie_size > 0 ||
1202 (tp->cookie_values != NULL &&
1203 tp->cookie_values->cookie_desired > 0))) {
1204 u8 *c;
1205 u32 *d;
1206 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1207 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1209 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1210 goto drop_and_free;
1212 /* Secret recipe starts with IP addresses */
1213 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1214 *mess++ ^= *d++;
1215 *mess++ ^= *d++;
1216 *mess++ ^= *d++;
1217 *mess++ ^= *d++;
1218 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1219 *mess++ ^= *d++;
1220 *mess++ ^= *d++;
1221 *mess++ ^= *d++;
1222 *mess++ ^= *d++;
1224 /* plus variable length Initiator Cookie */
1225 c = (u8 *)mess;
1226 while (l-- > 0)
1227 *c++ ^= *hash_location++;
1229 want_cookie = 0; /* not our kind of cookie */
1230 tmp_ext.cookie_out_never = 0; /* false */
1231 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1232 } else if (!tp->rx_opt.cookie_in_always) {
1233 /* redundant indications, but ensure initialization. */
1234 tmp_ext.cookie_out_never = 1; /* true */
1235 tmp_ext.cookie_plus = 0;
1236 } else {
1237 goto drop_and_free;
1239 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1241 if (want_cookie && !tmp_opt.saw_tstamp)
1242 tcp_clear_options(&tmp_opt);
1244 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1245 tcp_openreq_init(req, &tmp_opt, skb);
1247 treq = inet6_rsk(req);
1248 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1249 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1250 if (!want_cookie || tmp_opt.tstamp_ok)
1251 TCP_ECN_create_request(req, tcp_hdr(skb));
1253 if (!isn) {
1254 struct inet_peer *peer = NULL;
1256 if (ipv6_opt_accepted(sk, skb) ||
1257 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1258 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1259 atomic_inc(&skb->users);
1260 treq->pktopts = skb;
1262 treq->iif = sk->sk_bound_dev_if;
1264 /* So that link locals have meaning */
1265 if (!sk->sk_bound_dev_if &&
1266 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1267 treq->iif = inet6_iif(skb);
1269 if (want_cookie) {
1270 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1271 req->cookie_ts = tmp_opt.tstamp_ok;
1272 goto have_isn;
1275 /* VJ's idea. We save last timestamp seen
1276 * from the destination in peer table, when entering
1277 * state TIME-WAIT, and check against it before
1278 * accepting new connection request.
1280 * If "isn" is not zero, this request hit alive
1281 * timewait bucket, so that all the necessary checks
1282 * are made in the function processing timewait state.
1284 if (tmp_opt.saw_tstamp &&
1285 tcp_death_row.sysctl_tw_recycle &&
1286 (dst = inet6_csk_route_req(sk, req)) != NULL &&
1287 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1288 ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
1289 &treq->rmt_addr)) {
1290 inet_peer_refcheck(peer);
1291 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1292 (s32)(peer->tcp_ts - req->ts_recent) >
1293 TCP_PAWS_WINDOW) {
1294 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1295 goto drop_and_release;
1298 /* Kill the following clause, if you dislike this way. */
1299 else if (!sysctl_tcp_syncookies &&
1300 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1301 (sysctl_max_syn_backlog >> 2)) &&
1302 (!peer || !peer->tcp_ts_stamp) &&
1303 (!dst || !dst_metric(dst, RTAX_RTT))) {
1304 /* Without syncookies last quarter of
1305 * backlog is filled with destinations,
1306 * proven to be alive.
1307 * It means that we continue to communicate
1308 * to destinations, already remembered
1309 * to the moment of synflood.
1311 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1312 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1313 goto drop_and_release;
1316 isn = tcp_v6_init_sequence(skb);
1318 have_isn:
1319 tcp_rsk(req)->snt_isn = isn;
1320 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1322 security_inet_conn_request(sk, skb, req);
1324 if (tcp_v6_send_synack(sk, req,
1325 (struct request_values *)&tmp_ext) ||
1326 want_cookie)
1327 goto drop_and_free;
1329 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1330 return 0;
1332 drop_and_release:
1333 dst_release(dst);
1334 drop_and_free:
1335 reqsk_free(req);
1336 drop:
1337 return 0; /* don't send reset */
1340 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1341 struct request_sock *req,
1342 struct dst_entry *dst)
1344 struct inet6_request_sock *treq;
1345 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1346 struct tcp6_sock *newtcp6sk;
1347 struct inet_sock *newinet;
1348 struct tcp_sock *newtp;
1349 struct sock *newsk;
1350 struct ipv6_txoptions *opt;
1351 #ifdef CONFIG_TCP_MD5SIG
1352 struct tcp_md5sig_key *key;
1353 #endif
1355 if (skb->protocol == htons(ETH_P_IP)) {
1357 * v6 mapped
1360 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1362 if (newsk == NULL)
1363 return NULL;
1365 newtcp6sk = (struct tcp6_sock *)newsk;
1366 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1368 newinet = inet_sk(newsk);
1369 newnp = inet6_sk(newsk);
1370 newtp = tcp_sk(newsk);
1372 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1374 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1376 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1378 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1380 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1381 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1382 #ifdef CONFIG_TCP_MD5SIG
1383 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1384 #endif
1386 newnp->pktoptions = NULL;
1387 newnp->opt = NULL;
1388 newnp->mcast_oif = inet6_iif(skb);
1389 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1392 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1393 * here, tcp_create_openreq_child now does this for us, see the comment in
1394 * that function for the gory details. -acme
1397 /* It is tricky place. Until this moment IPv4 tcp
1398 worked with IPv6 icsk.icsk_af_ops.
1399 Sync it now.
1401 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1403 return newsk;
1406 treq = inet6_rsk(req);
1407 opt = np->opt;
1409 if (sk_acceptq_is_full(sk))
1410 goto out_overflow;
1412 if (!dst) {
1413 dst = inet6_csk_route_req(sk, req);
1414 if (!dst)
1415 goto out;
1418 newsk = tcp_create_openreq_child(sk, req, skb);
1419 if (newsk == NULL)
1420 goto out_nonewsk;
1423 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1424 * count here, tcp_create_openreq_child now does this for us, see the
1425 * comment in that function for the gory details. -acme
1428 newsk->sk_gso_type = SKB_GSO_TCPV6;
1429 __ip6_dst_store(newsk, dst, NULL, NULL);
1431 newtcp6sk = (struct tcp6_sock *)newsk;
1432 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1434 newtp = tcp_sk(newsk);
1435 newinet = inet_sk(newsk);
1436 newnp = inet6_sk(newsk);
1438 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1440 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1441 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1442 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1443 newsk->sk_bound_dev_if = treq->iif;
1445 /* Now IPv6 options...
1447 First: no IPv4 options.
1449 newinet->inet_opt = NULL;
1450 newnp->ipv6_fl_list = NULL;
1452 /* Clone RX bits */
1453 newnp->rxopt.all = np->rxopt.all;
1455 /* Clone pktoptions received with SYN */
1456 newnp->pktoptions = NULL;
1457 if (treq->pktopts != NULL) {
1458 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1459 kfree_skb(treq->pktopts);
1460 treq->pktopts = NULL;
1461 if (newnp->pktoptions)
1462 skb_set_owner_r(newnp->pktoptions, newsk);
1464 newnp->opt = NULL;
1465 newnp->mcast_oif = inet6_iif(skb);
1466 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1468 /* Clone native IPv6 options from listening socket (if any)
1470 Yes, keeping reference count would be much more clever,
1471 but we make one more one thing there: reattach optmem
1472 to newsk.
1474 if (opt) {
1475 newnp->opt = ipv6_dup_options(newsk, opt);
1476 if (opt != np->opt)
1477 sock_kfree_s(sk, opt, opt->tot_len);
1480 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1481 if (newnp->opt)
1482 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1483 newnp->opt->opt_flen);
1485 tcp_mtup_init(newsk);
1486 tcp_sync_mss(newsk, dst_mtu(dst));
1487 newtp->advmss = dst_metric_advmss(dst);
1488 tcp_initialize_rcv_mss(newsk);
1489 if (tcp_rsk(req)->snt_synack)
1490 tcp_valid_rtt_meas(newsk,
1491 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1492 newtp->total_retrans = req->retrans;
1494 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1495 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1497 #ifdef CONFIG_TCP_MD5SIG
1498 /* Copy over the MD5 key from the original socket */
1499 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1500 /* We're using one, so create a matching key
1501 * on the newsk structure. If we fail to get
1502 * memory, then we end up not copying the key
1503 * across. Shucks.
1505 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1506 if (newkey != NULL)
1507 tcp_v6_md5_do_add(newsk, &newnp->daddr,
1508 newkey, key->keylen);
1510 #endif
1512 if (__inet_inherit_port(sk, newsk) < 0) {
1513 sock_put(newsk);
1514 goto out;
1516 __inet6_hash(newsk, NULL);
1518 return newsk;
1520 out_overflow:
1521 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1522 out_nonewsk:
1523 if (opt && opt != np->opt)
1524 sock_kfree_s(sk, opt, opt->tot_len);
1525 dst_release(dst);
1526 out:
1527 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1528 return NULL;
1531 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1533 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1534 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1535 &ipv6_hdr(skb)->daddr, skb->csum)) {
1536 skb->ip_summed = CHECKSUM_UNNECESSARY;
1537 return 0;
1541 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1542 &ipv6_hdr(skb)->saddr,
1543 &ipv6_hdr(skb)->daddr, 0));
1545 if (skb->len <= 76) {
1546 return __skb_checksum_complete(skb);
1548 return 0;
1551 /* The socket must have it's spinlock held when we get
1552 * here.
1554 * We have a potential double-lock case here, so even when
1555 * doing backlog processing we use the BH locking scheme.
1556 * This is because we cannot sleep with the original spinlock
1557 * held.
1559 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1561 struct ipv6_pinfo *np = inet6_sk(sk);
1562 struct tcp_sock *tp;
1563 struct sk_buff *opt_skb = NULL;
1565 /* Imagine: socket is IPv6. IPv4 packet arrives,
1566 goes to IPv4 receive handler and backlogged.
1567 From backlog it always goes here. Kerboom...
1568 Fortunately, tcp_rcv_established and rcv_established
1569 handle them correctly, but it is not case with
1570 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1573 if (skb->protocol == htons(ETH_P_IP))
1574 return tcp_v4_do_rcv(sk, skb);
1576 #ifdef CONFIG_TCP_MD5SIG
1577 if (tcp_v6_inbound_md5_hash (sk, skb))
1578 goto discard;
1579 #endif
1581 if (sk_filter(sk, skb))
1582 goto discard;
1585 * socket locking is here for SMP purposes as backlog rcv
1586 * is currently called with bh processing disabled.
1589 /* Do Stevens' IPV6_PKTOPTIONS.
1591 Yes, guys, it is the only place in our code, where we
1592 may make it not affecting IPv4.
1593 The rest of code is protocol independent,
1594 and I do not like idea to uglify IPv4.
1596 Actually, all the idea behind IPV6_PKTOPTIONS
1597 looks not very well thought. For now we latch
1598 options, received in the last packet, enqueued
1599 by tcp. Feel free to propose better solution.
1600 --ANK (980728)
1602 if (np->rxopt.all)
1603 opt_skb = skb_clone(skb, GFP_ATOMIC);
1605 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1606 sock_rps_save_rxhash(sk, skb->rxhash);
1607 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1608 goto reset;
1609 if (opt_skb)
1610 goto ipv6_pktoptions;
1611 return 0;
1614 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1615 goto csum_err;
1617 if (sk->sk_state == TCP_LISTEN) {
1618 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1619 if (!nsk)
1620 goto discard;
1623 * Queue it on the new socket if the new socket is active,
1624 * otherwise we just shortcircuit this and continue with
1625 * the new socket..
1627 if(nsk != sk) {
1628 sock_rps_save_rxhash(nsk, skb->rxhash);
1629 if (tcp_child_process(sk, nsk, skb))
1630 goto reset;
1631 if (opt_skb)
1632 __kfree_skb(opt_skb);
1633 return 0;
1635 } else
1636 sock_rps_save_rxhash(sk, skb->rxhash);
1638 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1639 goto reset;
1640 if (opt_skb)
1641 goto ipv6_pktoptions;
1642 return 0;
1644 reset:
1645 tcp_v6_send_reset(sk, skb);
1646 discard:
1647 if (opt_skb)
1648 __kfree_skb(opt_skb);
1649 kfree_skb(skb);
1650 return 0;
1651 csum_err:
1652 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1653 goto discard;
1656 ipv6_pktoptions:
1657 /* Do you ask, what is it?
1659 1. skb was enqueued by tcp.
1660 2. skb is added to tail of read queue, rather than out of order.
1661 3. socket is not in passive state.
1662 4. Finally, it really contains options, which user wants to receive.
1664 tp = tcp_sk(sk);
1665 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1666 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1667 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1668 np->mcast_oif = inet6_iif(opt_skb);
1669 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1670 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1671 if (ipv6_opt_accepted(sk, opt_skb)) {
1672 skb_set_owner_r(opt_skb, sk);
1673 opt_skb = xchg(&np->pktoptions, opt_skb);
1674 } else {
1675 __kfree_skb(opt_skb);
1676 opt_skb = xchg(&np->pktoptions, NULL);
1680 kfree_skb(opt_skb);
1681 return 0;
1684 static int tcp_v6_rcv(struct sk_buff *skb)
1686 struct tcphdr *th;
1687 const struct ipv6hdr *hdr;
1688 struct sock *sk;
1689 int ret;
1690 struct net *net = dev_net(skb->dev);
1692 if (skb->pkt_type != PACKET_HOST)
1693 goto discard_it;
1696 * Count it even if it's bad.
1698 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1700 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1701 goto discard_it;
1703 th = tcp_hdr(skb);
1705 if (th->doff < sizeof(struct tcphdr)/4)
1706 goto bad_packet;
1707 if (!pskb_may_pull(skb, th->doff*4))
1708 goto discard_it;
1710 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1711 goto bad_packet;
1713 th = tcp_hdr(skb);
1714 hdr = ipv6_hdr(skb);
1715 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1716 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1717 skb->len - th->doff*4);
1718 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1719 TCP_SKB_CB(skb)->when = 0;
1720 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
1721 TCP_SKB_CB(skb)->sacked = 0;
1723 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1724 if (!sk)
1725 goto no_tcp_socket;
1727 process:
1728 if (sk->sk_state == TCP_TIME_WAIT)
1729 goto do_time_wait;
1731 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1732 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1733 goto discard_and_relse;
1736 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1737 goto discard_and_relse;
1739 if (sk_filter(sk, skb))
1740 goto discard_and_relse;
1742 skb->dev = NULL;
1744 bh_lock_sock_nested(sk);
1745 ret = 0;
1746 if (!sock_owned_by_user(sk)) {
1747 #ifdef CONFIG_NET_DMA
1748 struct tcp_sock *tp = tcp_sk(sk);
1749 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1750 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1751 if (tp->ucopy.dma_chan)
1752 ret = tcp_v6_do_rcv(sk, skb);
1753 else
1754 #endif
1756 if (!tcp_prequeue(sk, skb))
1757 ret = tcp_v6_do_rcv(sk, skb);
1759 } else if (unlikely(sk_add_backlog(sk, skb))) {
1760 bh_unlock_sock(sk);
1761 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1762 goto discard_and_relse;
1764 bh_unlock_sock(sk);
1766 sock_put(sk);
1767 return ret ? -1 : 0;
1769 no_tcp_socket:
1770 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1771 goto discard_it;
1773 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1774 bad_packet:
1775 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1776 } else {
1777 tcp_v6_send_reset(NULL, skb);
1780 discard_it:
1783 * Discard frame
1786 kfree_skb(skb);
1787 return 0;
1789 discard_and_relse:
1790 sock_put(sk);
1791 goto discard_it;
1793 do_time_wait:
1794 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1795 inet_twsk_put(inet_twsk(sk));
1796 goto discard_it;
1799 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1800 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1801 inet_twsk_put(inet_twsk(sk));
1802 goto discard_it;
1805 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1806 case TCP_TW_SYN:
1808 struct sock *sk2;
1810 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1811 &ipv6_hdr(skb)->daddr,
1812 ntohs(th->dest), inet6_iif(skb));
1813 if (sk2 != NULL) {
1814 struct inet_timewait_sock *tw = inet_twsk(sk);
1815 inet_twsk_deschedule(tw, &tcp_death_row);
1816 inet_twsk_put(tw);
1817 sk = sk2;
1818 goto process;
1820 /* Fall through to ACK */
1822 case TCP_TW_ACK:
1823 tcp_v6_timewait_ack(sk, skb);
1824 break;
1825 case TCP_TW_RST:
1826 goto no_tcp_socket;
1827 case TCP_TW_SUCCESS:;
1829 goto discard_it;
1832 static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
1834 struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
1835 struct ipv6_pinfo *np = inet6_sk(sk);
1836 struct inet_peer *peer;
1838 if (!rt ||
1839 !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
1840 peer = inet_getpeer_v6(&np->daddr, 1);
1841 *release_it = true;
1842 } else {
1843 if (!rt->rt6i_peer)
1844 rt6_bind_peer(rt, 1);
1845 peer = rt->rt6i_peer;
1846 *release_it = false;
1849 return peer;
1852 static void *tcp_v6_tw_get_peer(struct sock *sk)
1854 struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
1855 struct inet_timewait_sock *tw = inet_twsk(sk);
1857 if (tw->tw_family == AF_INET)
1858 return tcp_v4_tw_get_peer(sk);
1860 return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
1863 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1864 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1865 .twsk_unique = tcp_twsk_unique,
1866 .twsk_destructor= tcp_twsk_destructor,
1867 .twsk_getpeer = tcp_v6_tw_get_peer,
1870 static const struct inet_connection_sock_af_ops ipv6_specific = {
1871 .queue_xmit = inet6_csk_xmit,
1872 .send_check = tcp_v6_send_check,
1873 .rebuild_header = inet6_sk_rebuild_header,
1874 .conn_request = tcp_v6_conn_request,
1875 .syn_recv_sock = tcp_v6_syn_recv_sock,
1876 .get_peer = tcp_v6_get_peer,
1877 .net_header_len = sizeof(struct ipv6hdr),
1878 .setsockopt = ipv6_setsockopt,
1879 .getsockopt = ipv6_getsockopt,
1880 .addr2sockaddr = inet6_csk_addr2sockaddr,
1881 .sockaddr_len = sizeof(struct sockaddr_in6),
1882 .bind_conflict = inet6_csk_bind_conflict,
1883 #ifdef CONFIG_COMPAT
1884 .compat_setsockopt = compat_ipv6_setsockopt,
1885 .compat_getsockopt = compat_ipv6_getsockopt,
1886 #endif
1889 #ifdef CONFIG_TCP_MD5SIG
1890 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1891 .md5_lookup = tcp_v6_md5_lookup,
1892 .calc_md5_hash = tcp_v6_md5_hash_skb,
1893 .md5_add = tcp_v6_md5_add_func,
1894 .md5_parse = tcp_v6_parse_md5_keys,
1896 #endif
1899 * TCP over IPv4 via INET6 API
1902 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1903 .queue_xmit = ip_queue_xmit,
1904 .send_check = tcp_v4_send_check,
1905 .rebuild_header = inet_sk_rebuild_header,
1906 .conn_request = tcp_v6_conn_request,
1907 .syn_recv_sock = tcp_v6_syn_recv_sock,
1908 .get_peer = tcp_v4_get_peer,
1909 .net_header_len = sizeof(struct iphdr),
1910 .setsockopt = ipv6_setsockopt,
1911 .getsockopt = ipv6_getsockopt,
1912 .addr2sockaddr = inet6_csk_addr2sockaddr,
1913 .sockaddr_len = sizeof(struct sockaddr_in6),
1914 .bind_conflict = inet6_csk_bind_conflict,
1915 #ifdef CONFIG_COMPAT
1916 .compat_setsockopt = compat_ipv6_setsockopt,
1917 .compat_getsockopt = compat_ipv6_getsockopt,
1918 #endif
1921 #ifdef CONFIG_TCP_MD5SIG
1922 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1923 .md5_lookup = tcp_v4_md5_lookup,
1924 .calc_md5_hash = tcp_v4_md5_hash_skb,
1925 .md5_add = tcp_v6_md5_add_func,
1926 .md5_parse = tcp_v6_parse_md5_keys,
1928 #endif
1930 /* NOTE: A lot of things set to zero explicitly by call to
1931 * sk_alloc() so need not be done here.
1933 static int tcp_v6_init_sock(struct sock *sk)
1935 struct inet_connection_sock *icsk = inet_csk(sk);
1936 struct tcp_sock *tp = tcp_sk(sk);
1938 skb_queue_head_init(&tp->out_of_order_queue);
1939 tcp_init_xmit_timers(sk);
1940 tcp_prequeue_init(tp);
1942 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1943 tp->mdev = TCP_TIMEOUT_INIT;
1945 /* So many TCP implementations out there (incorrectly) count the
1946 * initial SYN frame in their delayed-ACK and congestion control
1947 * algorithms that we must have the following bandaid to talk
1948 * efficiently to them. -DaveM
1950 tp->snd_cwnd = 2;
1952 /* See draft-stevens-tcpca-spec-01 for discussion of the
1953 * initialization of these values.
1955 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1956 tp->snd_cwnd_clamp = ~0;
1957 tp->mss_cache = TCP_MSS_DEFAULT;
1959 tp->reordering = sysctl_tcp_reordering;
1961 sk->sk_state = TCP_CLOSE;
1963 icsk->icsk_af_ops = &ipv6_specific;
1964 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1965 icsk->icsk_sync_mss = tcp_sync_mss;
1966 sk->sk_write_space = sk_stream_write_space;
1967 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1969 #ifdef CONFIG_TCP_MD5SIG
1970 tp->af_specific = &tcp_sock_ipv6_specific;
1971 #endif
1973 /* TCP Cookie Transactions */
1974 if (sysctl_tcp_cookie_size > 0) {
1975 /* Default, cookies without s_data_payload. */
1976 tp->cookie_values =
1977 kzalloc(sizeof(*tp->cookie_values),
1978 sk->sk_allocation);
1979 if (tp->cookie_values != NULL)
1980 kref_init(&tp->cookie_values->kref);
1982 /* Presumed zeroed, in order of appearance:
1983 * cookie_in_always, cookie_out_never,
1984 * s_data_constant, s_data_in, s_data_out
1986 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1987 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1989 local_bh_disable();
1990 percpu_counter_inc(&tcp_sockets_allocated);
1991 local_bh_enable();
1993 return 0;
1996 static void tcp_v6_destroy_sock(struct sock *sk)
1998 #ifdef CONFIG_TCP_MD5SIG
1999 /* Clean up the MD5 key list */
2000 if (tcp_sk(sk)->md5sig_info)
2001 tcp_v6_clear_md5_list(sk);
2002 #endif
2003 tcp_v4_destroy_sock(sk);
2004 inet6_destroy_sock(sk);
2007 #ifdef CONFIG_PROC_FS
2008 /* Proc filesystem TCPv6 sock list dumping. */
2009 static void get_openreq6(struct seq_file *seq,
2010 struct sock *sk, struct request_sock *req, int i, int uid)
2012 int ttd = req->expires - jiffies;
2013 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
2014 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
2016 if (ttd < 0)
2017 ttd = 0;
2019 seq_printf(seq,
2020 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2021 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2023 src->s6_addr32[0], src->s6_addr32[1],
2024 src->s6_addr32[2], src->s6_addr32[3],
2025 ntohs(inet_rsk(req)->loc_port),
2026 dest->s6_addr32[0], dest->s6_addr32[1],
2027 dest->s6_addr32[2], dest->s6_addr32[3],
2028 ntohs(inet_rsk(req)->rmt_port),
2029 TCP_SYN_RECV,
2030 0,0, /* could print option size, but that is af dependent. */
2031 1, /* timers active (only the expire timer) */
2032 jiffies_to_clock_t(ttd),
2033 req->retrans,
2034 uid,
2035 0, /* non standard timer */
2036 0, /* open_requests have no inode */
2037 0, req);
2040 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2042 const struct in6_addr *dest, *src;
2043 __u16 destp, srcp;
2044 int timer_active;
2045 unsigned long timer_expires;
2046 struct inet_sock *inet = inet_sk(sp);
2047 struct tcp_sock *tp = tcp_sk(sp);
2048 const struct inet_connection_sock *icsk = inet_csk(sp);
2049 struct ipv6_pinfo *np = inet6_sk(sp);
2051 dest = &np->daddr;
2052 src = &np->rcv_saddr;
2053 destp = ntohs(inet->inet_dport);
2054 srcp = ntohs(inet->inet_sport);
2056 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2057 timer_active = 1;
2058 timer_expires = icsk->icsk_timeout;
2059 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2060 timer_active = 4;
2061 timer_expires = icsk->icsk_timeout;
2062 } else if (timer_pending(&sp->sk_timer)) {
2063 timer_active = 2;
2064 timer_expires = sp->sk_timer.expires;
2065 } else {
2066 timer_active = 0;
2067 timer_expires = jiffies;
2070 seq_printf(seq,
2071 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2072 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
2074 src->s6_addr32[0], src->s6_addr32[1],
2075 src->s6_addr32[2], src->s6_addr32[3], srcp,
2076 dest->s6_addr32[0], dest->s6_addr32[1],
2077 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2078 sp->sk_state,
2079 tp->write_seq-tp->snd_una,
2080 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
2081 timer_active,
2082 jiffies_to_clock_t(timer_expires - jiffies),
2083 icsk->icsk_retransmits,
2084 sock_i_uid(sp),
2085 icsk->icsk_probes_out,
2086 sock_i_ino(sp),
2087 atomic_read(&sp->sk_refcnt), sp,
2088 jiffies_to_clock_t(icsk->icsk_rto),
2089 jiffies_to_clock_t(icsk->icsk_ack.ato),
2090 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
2091 tp->snd_cwnd,
2092 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
2096 static void get_timewait6_sock(struct seq_file *seq,
2097 struct inet_timewait_sock *tw, int i)
2099 const struct in6_addr *dest, *src;
2100 __u16 destp, srcp;
2101 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
2102 int ttd = tw->tw_ttd - jiffies;
2104 if (ttd < 0)
2105 ttd = 0;
2107 dest = &tw6->tw_v6_daddr;
2108 src = &tw6->tw_v6_rcv_saddr;
2109 destp = ntohs(tw->tw_dport);
2110 srcp = ntohs(tw->tw_sport);
2112 seq_printf(seq,
2113 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2114 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2116 src->s6_addr32[0], src->s6_addr32[1],
2117 src->s6_addr32[2], src->s6_addr32[3], srcp,
2118 dest->s6_addr32[0], dest->s6_addr32[1],
2119 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2120 tw->tw_substate, 0, 0,
2121 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2122 atomic_read(&tw->tw_refcnt), tw);
2125 static int tcp6_seq_show(struct seq_file *seq, void *v)
2127 struct tcp_iter_state *st;
2129 if (v == SEQ_START_TOKEN) {
2130 seq_puts(seq,
2131 " sl "
2132 "local_address "
2133 "remote_address "
2134 "st tx_queue rx_queue tr tm->when retrnsmt"
2135 " uid timeout inode\n");
2136 goto out;
2138 st = seq->private;
2140 switch (st->state) {
2141 case TCP_SEQ_STATE_LISTENING:
2142 case TCP_SEQ_STATE_ESTABLISHED:
2143 get_tcp6_sock(seq, v, st->num);
2144 break;
2145 case TCP_SEQ_STATE_OPENREQ:
2146 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2147 break;
2148 case TCP_SEQ_STATE_TIME_WAIT:
2149 get_timewait6_sock(seq, v, st->num);
2150 break;
2152 out:
2153 return 0;
2156 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2157 .name = "tcp6",
2158 .family = AF_INET6,
2159 .seq_fops = {
2160 .owner = THIS_MODULE,
2162 .seq_ops = {
2163 .show = tcp6_seq_show,
2167 int __net_init tcp6_proc_init(struct net *net)
2169 return tcp_proc_register(net, &tcp6_seq_afinfo);
2172 void tcp6_proc_exit(struct net *net)
2174 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2176 #endif
2178 struct proto tcpv6_prot = {
2179 .name = "TCPv6",
2180 .owner = THIS_MODULE,
2181 .close = tcp_close,
2182 .connect = tcp_v6_connect,
2183 .disconnect = tcp_disconnect,
2184 .accept = inet_csk_accept,
2185 .ioctl = tcp_ioctl,
2186 .init = tcp_v6_init_sock,
2187 .destroy = tcp_v6_destroy_sock,
2188 .shutdown = tcp_shutdown,
2189 .setsockopt = tcp_setsockopt,
2190 .getsockopt = tcp_getsockopt,
2191 .recvmsg = tcp_recvmsg,
2192 .sendmsg = tcp_sendmsg,
2193 .sendpage = tcp_sendpage,
2194 .backlog_rcv = tcp_v6_do_rcv,
2195 .hash = tcp_v6_hash,
2196 .unhash = inet_unhash,
2197 .get_port = inet_csk_get_port,
2198 .enter_memory_pressure = tcp_enter_memory_pressure,
2199 .sockets_allocated = &tcp_sockets_allocated,
2200 .memory_allocated = &tcp_memory_allocated,
2201 .memory_pressure = &tcp_memory_pressure,
2202 .orphan_count = &tcp_orphan_count,
2203 .sysctl_mem = sysctl_tcp_mem,
2204 .sysctl_wmem = sysctl_tcp_wmem,
2205 .sysctl_rmem = sysctl_tcp_rmem,
2206 .max_header = MAX_TCP_HEADER,
2207 .obj_size = sizeof(struct tcp6_sock),
2208 .slab_flags = SLAB_DESTROY_BY_RCU,
2209 .twsk_prot = &tcp6_timewait_sock_ops,
2210 .rsk_prot = &tcp6_request_sock_ops,
2211 .h.hashinfo = &tcp_hashinfo,
2212 .no_autobind = true,
2213 #ifdef CONFIG_COMPAT
2214 .compat_setsockopt = compat_tcp_setsockopt,
2215 .compat_getsockopt = compat_tcp_getsockopt,
2216 #endif
2219 static const struct inet6_protocol tcpv6_protocol = {
2220 .handler = tcp_v6_rcv,
2221 .err_handler = tcp_v6_err,
2222 .gso_send_check = tcp_v6_gso_send_check,
2223 .gso_segment = tcp_tso_segment,
2224 .gro_receive = tcp6_gro_receive,
2225 .gro_complete = tcp6_gro_complete,
2226 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2229 static struct inet_protosw tcpv6_protosw = {
2230 .type = SOCK_STREAM,
2231 .protocol = IPPROTO_TCP,
2232 .prot = &tcpv6_prot,
2233 .ops = &inet6_stream_ops,
2234 .no_check = 0,
2235 .flags = INET_PROTOSW_PERMANENT |
2236 INET_PROTOSW_ICSK,
2239 static int __net_init tcpv6_net_init(struct net *net)
2241 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2242 SOCK_RAW, IPPROTO_TCP, net);
2245 static void __net_exit tcpv6_net_exit(struct net *net)
2247 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2250 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2252 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2255 static struct pernet_operations tcpv6_net_ops = {
2256 .init = tcpv6_net_init,
2257 .exit = tcpv6_net_exit,
2258 .exit_batch = tcpv6_net_exit_batch,
2261 int __init tcpv6_init(void)
2263 int ret;
2265 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2266 if (ret)
2267 goto out;
2269 /* register inet6 protocol */
2270 ret = inet6_register_protosw(&tcpv6_protosw);
2271 if (ret)
2272 goto out_tcpv6_protocol;
2274 ret = register_pernet_subsys(&tcpv6_net_ops);
2275 if (ret)
2276 goto out_tcpv6_protosw;
2277 out:
2278 return ret;
2280 out_tcpv6_protocol:
2281 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2282 out_tcpv6_protosw:
2283 inet6_unregister_protosw(&tcpv6_protosw);
2284 goto out;
2287 void tcpv6_exit(void)
2289 unregister_pernet_subsys(&tcpv6_net_ops);
2290 inet6_unregister_protosw(&tcpv6_protosw);
2291 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);