net/ipv6/tcp_ipv6.c

   1 /*
   2  *      TCP over IPv6
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on:
  11  *      linux/net/ipv4/tcp.c
  12  *      linux/net/ipv4/tcp_input.c
  13  *      linux/net/ipv4/tcp_output.c
  14  *
  15  *      Fixes:
  16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  19  *                                      a single port at the same time.
  20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/config.h>
  30 #include <linux/errno.h>
  31 #include <linux/types.h>
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/net.h>
  35 #include <linux/jiffies.h>
  36 #include <linux/in.h>
  37 #include <linux/in6.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/init.h>
  40 #include <linux/jhash.h>
  41 #include <linux/ipsec.h>
  42 #include <linux/times.h>
  43
  44 #include <linux/ipv6.h>
  45 #include <linux/icmpv6.h>
  46 #include <linux/random.h>
  47
  48 #include <net/tcp.h>
  49 #include <net/ndisc.h>
  50 #include <net/inet6_hashtables.h>
  51 #include <net/ipv6.h>
  52 #include <net/transp_v6.h>
  53 #include <net/addrconf.h>
  54 #include <net/ip6_route.h>
  55 #include <net/ip6_checksum.h>
  56 #include <net/inet_ecn.h>
  57 #include <net/protocol.h>
  58 #include <net/xfrm.h>
  59 #include <net/addrconf.h>
  60 #include <net/snmp.h>
  61 #include <net/dsfield.h>
  62
  63 #include <asm/uaccess.h>
  64
  65 #include <linux/proc_fs.h>
  66 #include <linux/seq_file.h>
  67
  68 static void     tcp_v6_send_reset(struct sk_buff *skb);
  69 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
  70 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
  71                                   struct sk_buff *skb);
  72
  73 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  74 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  75
  76 static struct tcp_func ipv6_mapped;
  77 static struct tcp_func ipv6_specific;
  78
  79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
  80                                        const struct inet_bind_bucket *tb)
  81 {
  82         const struct sock *sk2;
  83         const struct hlist_node *node;
  84
  85         /* We must walk the whole port owner list in this case. -DaveM */
  86         sk_for_each_bound(sk2, node, &tb->owners) {
  87                 if (sk != sk2 &&
  88                     (!sk->sk_bound_dev_if ||
  89                      !sk2->sk_bound_dev_if ||
  90                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
  91                     (!sk->sk_reuse || !sk2->sk_reuse ||
  92                      sk2->sk_state == TCP_LISTEN) &&
  93                      ipv6_rcv_saddr_equal(sk, sk2))
  94                         break;
  95         }
  96
  97         return node != NULL;
  98 }
  99
 100 /* Grrr, addr_type already calculated by caller, but I don't want
 101  * to add some silly "cookie" argument to this method just for that.
 102  * But it doesn't matter, the recalculation is in the rarest path
 103  * this function ever takes.
 104  */
 105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 106 {
 107         struct inet_bind_hashbucket *head;
 108         struct inet_bind_bucket *tb;
 109         struct hlist_node *node;
 110         int ret;
 111
 112         local_bh_disable();
 113         if (snum == 0) {
 114                 int low = sysctl_local_port_range[0];
 115                 int high = sysctl_local_port_range[1];
 116                 int remaining = (high - low) + 1;
 117                 int rover;
 118
 119                 spin_lock(&tcp_hashinfo.portalloc_lock);
 120                 if (tcp_hashinfo.port_rover < low)
 121                         rover = low;
 122                 else
 123                         rover = tcp_hashinfo.port_rover;
 124                 do {    rover++;
 125                         if (rover > high)
 126                                 rover = low;
 127                         head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
 128                         spin_lock(&head->lock);
 129                         inet_bind_bucket_for_each(tb, node, &head->chain)
 130                                 if (tb->port == rover)
 131                                         goto next;
 132                         break;
 133                 next:
 134                         spin_unlock(&head->lock);
 135                 } while (--remaining > 0);
 136                 tcp_hashinfo.port_rover = rover;
 137                 spin_unlock(&tcp_hashinfo.portalloc_lock);
 138
 139                 /* Exhausted local port range during search?  It is not
 140                  * possible for us to be holding one of the bind hash
 141                  * locks if this test triggers, because if 'remaining'
 142                  * drops to zero, we broke out of the do/while loop at
 143                  * the top level, not from the 'break;' statement.
 144                  */
 145                 ret = 1;
 146                 if (unlikely(remaining <= 0))
 147                         goto fail;
 148
 149                 /* OK, here is the one we will use. */
 150                 snum = rover;
 151         } else {
 152                 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
 153                 spin_lock(&head->lock);
 154                 inet_bind_bucket_for_each(tb, node, &head->chain)
 155                         if (tb->port == snum)
 156                                 goto tb_found;
 157         }
 158         tb = NULL;
 159         goto tb_not_found;
 160 tb_found:
 161         if (tb && !hlist_empty(&tb->owners)) {
 162                 if (tb->fastreuse > 0 && sk->sk_reuse &&
 163                     sk->sk_state != TCP_LISTEN) {
 164                         goto success;
 165                 } else {
 166                         ret = 1;
 167                         if (tcp_v6_bind_conflict(sk, tb))
 168                                 goto fail_unlock;
 169                 }
 170         }
 171 tb_not_found:
 172         ret = 1;
 173         if (tb == NULL) {
 174                 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
 175                 if (tb == NULL)
 176                         goto fail_unlock;
 177         }
 178         if (hlist_empty(&tb->owners)) {
 179                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
 180                         tb->fastreuse = 1;
 181                 else
 182                         tb->fastreuse = 0;
 183         } else if (tb->fastreuse &&
 184                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
 185                 tb->fastreuse = 0;
 186
 187 success:
 188         if (!inet_csk(sk)->icsk_bind_hash)
 189                 inet_bind_hash(sk, tb, snum);
 190         BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
 191         ret = 0;
 192
 193 fail_unlock:
 194         spin_unlock(&head->lock);
 195 fail:
 196         local_bh_enable();
 197         return ret;
 198 }
 199
 200 static __inline__ void __tcp_v6_hash(struct sock *sk)
 201 {
 202         struct hlist_head *list;
 203         rwlock_t *lock;
 204
 205         BUG_TRAP(sk_unhashed(sk));
 206
 207         if (sk->sk_state == TCP_LISTEN) {
 208                 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
 209                 lock = &tcp_hashinfo.lhash_lock;
 210                 inet_listen_wlock(&tcp_hashinfo);
 211         } else {
 212                 sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
 213                 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
 214                 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
 215                 write_lock(lock);
 216         }
 217
 218         __sk_add_node(sk, list);
 219         sock_prot_inc_use(sk->sk_prot);
 220         write_unlock(lock);
 221 }
 222
 223
 224 static void tcp_v6_hash(struct sock *sk)
 225 {
 226         if (sk->sk_state != TCP_CLOSE) {
 227                 struct tcp_sock *tp = tcp_sk(sk);
 228
 229                 if (tp->af_specific == &ipv6_mapped) {
 230                         tcp_prot.hash(sk);
 231                         return;
 232                 }
 233                 local_bh_disable();
 234                 __tcp_v6_hash(sk);
 235                 local_bh_enable();
 236         }
 237 }
 238
 239 /*
 240  * Open request hash tables.
 241  */
 242
 243 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
 244 {
 245         u32 a, b, c;
 246
 247         a = raddr->s6_addr32[0];
 248         b = raddr->s6_addr32[1];
 249         c = raddr->s6_addr32[2];
 250
 251         a += JHASH_GOLDEN_RATIO;
 252         b += JHASH_GOLDEN_RATIO;
 253         c += rnd;
 254         __jhash_mix(a, b, c);
 255
 256         a += raddr->s6_addr32[3];
 257         b += (u32) rport;
 258         __jhash_mix(a, b, c);
 259
 260         return c & (TCP_SYNQ_HSIZE - 1);
 261 }
 262
 263 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
 264                                               struct request_sock ***prevp,
 265                                               __u16 rport,
 266                                               struct in6_addr *raddr,
 267                                               struct in6_addr *laddr,
 268                                               int iif)
 269 {
 270         const struct inet_connection_sock *icsk = inet_csk(sk);
 271         struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
 272         struct request_sock *req, **prev;
 273
 274         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 275              (req = *prev) != NULL;
 276              prev = &req->dl_next) {
 277                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
 278
 279                 if (inet_rsk(req)->rmt_port == rport &&
 280                     req->rsk_ops->family == AF_INET6 &&
 281                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
 282                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
 283                     (!treq->iif || treq->iif == iif)) {
 284                         BUG_TRAP(req->sk == NULL);
 285                         *prevp = prev;
 286                         return req;
 287                 }
 288         }
 289
 290         return NULL;
 291 }
 292
 293 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 294                                    struct in6_addr *saddr,
 295                                    struct in6_addr *daddr,
 296                                    unsigned long base)
 297 {
 298         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 299 }
 300
 301 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 302 {
 303         if (skb->protocol == htons(ETH_P_IPV6)) {
 304                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
 305                                                     skb->nh.ipv6h->saddr.s6_addr32,
 306                                                     skb->h.th->dest,
 307                                                     skb->h.th->source);
 308         } else {
 309                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
 310                                                   skb->nh.iph->saddr,
 311                                                   skb->h.th->dest,
 312                                                   skb->h.th->source);
 313         }
 314 }
 315
 316 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
 317                                       struct inet_timewait_sock **twp)
 318 {
 319         struct inet_sock *inet = inet_sk(sk);
 320         const struct ipv6_pinfo *np = inet6_sk(sk);
 321         const struct in6_addr *daddr = &np->rcv_saddr;
 322         const struct in6_addr *saddr = &np->daddr;
 323         const int dif = sk->sk_bound_dev_if;
 324         const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
 325         const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
 326                                        tcp_hashinfo.ehash_size);
 327         struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
 328         struct sock *sk2;
 329         const struct hlist_node *node;
 330         struct inet_timewait_sock *tw;
 331
 332         write_lock(&head->lock);
 333
 334         /* Check TIME-WAIT sockets first. */
 335         sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
 336                 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
 337
 338                 tw = inet_twsk(sk2);
 339
 340                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 341                    sk2->sk_family               == PF_INET6     &&
 342                    ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
 343                    ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)     &&
 344                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 345                         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
 346                         struct tcp_sock *tp = tcp_sk(sk);
 347
 348                         if (tcptw->tw_ts_recent_stamp &&
 349                             (!twp ||
 350                              (sysctl_tcp_tw_reuse &&
 351                               xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
 352                                 /* See comment in tcp_ipv4.c */
 353                                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 354                                 if (!tp->write_seq)
 355                                         tp->write_seq = 1;
 356                                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 357                                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 358                                 sock_hold(sk2);
 359                                 goto unique;
 360                         } else
 361                                 goto not_unique;
 362                 }
 363         }
 364         tw = NULL;
 365
 366         /* And established part... */
 367         sk_for_each(sk2, node, &head->chain) {
 368                 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
 369                         goto not_unique;
 370         }
 371
 372 unique:
 373         BUG_TRAP(sk_unhashed(sk));
 374         __sk_add_node(sk, &head->chain);
 375         sk->sk_hashent = hash;
 376         sock_prot_inc_use(sk->sk_prot);
 377         write_unlock(&head->lock);
 378
 379         if (twp) {
 380                 *twp = tw;
 381                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 382         } else if (tw) {
 383                 /* Silly. Should hash-dance instead... */
 384                 inet_twsk_deschedule(tw, &tcp_death_row);
 385                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 386
 387                 inet_twsk_put(tw);
 388         }
 389         return 0;
 390
 391 not_unique:
 392         write_unlock(&head->lock);
 393         return -EADDRNOTAVAIL;
 394 }
 395
 396 static inline u32 tcpv6_port_offset(const struct sock *sk)
 397 {
 398         const struct inet_sock *inet = inet_sk(sk);
 399         const struct ipv6_pinfo *np = inet6_sk(sk);
 400
 401         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 402                                            np->daddr.s6_addr32,
 403                                            inet->dport);
 404 }
 405
 406 static int tcp_v6_hash_connect(struct sock *sk)
 407 {
 408         unsigned short snum = inet_sk(sk)->num;
 409         struct inet_bind_hashbucket *head;
 410         struct inet_bind_bucket *tb;
 411         int ret;
 412
 413         if (!snum) {
 414                 int low = sysctl_local_port_range[0];
 415                 int high = sysctl_local_port_range[1];
 416                 int range = high - low;
 417                 int i;
 418                 int port;
 419                 static u32 hint;
 420                 u32 offset = hint + tcpv6_port_offset(sk);
 421                 struct hlist_node *node;
 422                 struct inet_timewait_sock *tw = NULL;
 423
 424                 local_bh_disable();
 425                 for (i = 1; i <= range; i++) {
 426                         port = low + (i + offset) % range;
 427                         head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
 428                         spin_lock(&head->lock);
 429
 430                         /* Does not bother with rcv_saddr checks,
 431                          * because the established check is already
 432                          * unique enough.
 433                          */
 434                         inet_bind_bucket_for_each(tb, node, &head->chain) {
 435                                 if (tb->port == port) {
 436                                         BUG_TRAP(!hlist_empty(&tb->owners));
 437                                         if (tb->fastreuse >= 0)
 438                                                 goto next_port;
 439                                         if (!__tcp_v6_check_established(sk,
 440                                                                         port,
 441                                                                         &tw))
 442                                                 goto ok;
 443                                         goto next_port;
 444                                 }
 445                         }
 446
 447                         tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
 448                         if (!tb) {
 449                                 spin_unlock(&head->lock);
 450                                 break;
 451                         }
 452                         tb->fastreuse = -1;
 453                         goto ok;
 454
 455                 next_port:
 456                         spin_unlock(&head->lock);
 457                 }
 458                 local_bh_enable();
 459
 460                 return -EADDRNOTAVAIL;
 461
 462 ok:
 463                 hint += i;
 464
 465                 /* Head lock still held and bh's disabled */
 466                 inet_bind_hash(sk, tb, port);
 467                 if (sk_unhashed(sk)) {
 468                         inet_sk(sk)->sport = htons(port);
 469                         __tcp_v6_hash(sk);
 470                 }
 471                 spin_unlock(&head->lock);
 472
 473                 if (tw) {
 474                         inet_twsk_deschedule(tw, &tcp_death_row);
 475                         inet_twsk_put(tw);
 476                 }
 477
 478                 ret = 0;
 479                 goto out;
 480         }
 481
 482         head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
 483         tb   = inet_csk(sk)->icsk_bind_hash;
 484         spin_lock_bh(&head->lock);
 485
 486         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 487                 __tcp_v6_hash(sk);
 488                 spin_unlock_bh(&head->lock);
 489                 return 0;
 490         } else {
 491                 spin_unlock(&head->lock);
 492                 /* No definite answer... Walk to established hash table */
 493                 ret = __tcp_v6_check_established(sk, snum, NULL);
 494 out:
 495                 local_bh_enable();
 496                 return ret;
 497         }
 498 }
 499
 500 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 501                           int addr_len)
 502 {
 503         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 504         struct inet_sock *inet = inet_sk(sk);
 505         struct ipv6_pinfo *np = inet6_sk(sk);
 506         struct tcp_sock *tp = tcp_sk(sk);
 507         struct in6_addr *saddr = NULL, *final_p = NULL, final;
 508         struct flowi fl;
 509         struct dst_entry *dst;
 510         int addr_type;
 511         int err;
 512
 513         if (addr_len < SIN6_LEN_RFC2133)
 514                 return -EINVAL;
 515
 516         if (usin->sin6_family != AF_INET6)
 517                 return(-EAFNOSUPPORT);
 518
 519         memset(&fl, 0, sizeof(fl));
 520
 521         if (np->sndflow) {
 522                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 523                 IP6_ECN_flow_init(fl.fl6_flowlabel);
 524                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
 525                         struct ip6_flowlabel *flowlabel;
 526                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 527                         if (flowlabel == NULL)
 528                                 return -EINVAL;
 529                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 530                         fl6_sock_release(flowlabel);
 531                 }
 532         }
 533
 534         /*
 535          *      connect() to INADDR_ANY means loopback (BSD'ism).
 536          */
 537
 538         if(ipv6_addr_any(&usin->sin6_addr))
 539                 usin->sin6_addr.s6_addr[15] = 0x1;
 540
 541         addr_type = ipv6_addr_type(&usin->sin6_addr);
 542
 543         if(addr_type & IPV6_ADDR_MULTICAST)
 544                 return -ENETUNREACH;
 545
 546         if (addr_type&IPV6_ADDR_LINKLOCAL) {
 547                 if (addr_len >= sizeof(struct sockaddr_in6) &&
 548                     usin->sin6_scope_id) {
 549                         /* If interface is set while binding, indices
 550                          * must coincide.
 551                          */
 552                         if (sk->sk_bound_dev_if &&
 553                             sk->sk_bound_dev_if != usin->sin6_scope_id)
 554                                 return -EINVAL;
 555
 556                         sk->sk_bound_dev_if = usin->sin6_scope_id;
 557                 }
 558
 559                 /* Connect to link-local address requires an interface */
 560                 if (!sk->sk_bound_dev_if)
 561                         return -EINVAL;
 562         }
 563
 564         if (tp->rx_opt.ts_recent_stamp &&
 565             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
 566                 tp->rx_opt.ts_recent = 0;
 567                 tp->rx_opt.ts_recent_stamp = 0;
 568                 tp->write_seq = 0;
 569         }
 570
 571         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
 572         np->flow_label = fl.fl6_flowlabel;
 573
 574         /*
 575          *      TCP over IPv4
 576          */
 577
 578         if (addr_type == IPV6_ADDR_MAPPED) {
 579                 u32 exthdrlen = tp->ext_header_len;
 580                 struct sockaddr_in sin;
 581
 582                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
 583
 584                 if (__ipv6_only_sock(sk))
 585                         return -ENETUNREACH;
 586
 587                 sin.sin_family = AF_INET;
 588                 sin.sin_port = usin->sin6_port;
 589                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 590
 591                 tp->af_specific = &ipv6_mapped;
 592                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
 593
 594                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 595
 596                 if (err) {
 597                         tp->ext_header_len = exthdrlen;
 598                         tp->af_specific = &ipv6_specific;
 599                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
 600                         goto failure;
 601                 } else {
 602                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
 603                                       inet->saddr);
 604                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
 605                                       inet->rcv_saddr);
 606                 }
 607
 608                 return err;
 609         }
 610
 611         if (!ipv6_addr_any(&np->rcv_saddr))
 612                 saddr = &np->rcv_saddr;
 613
 614         fl.proto = IPPROTO_TCP;
 615         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 616         ipv6_addr_copy(&fl.fl6_src,
 617                        (saddr ? saddr : &np->saddr));
 618         fl.oif = sk->sk_bound_dev_if;
 619         fl.fl_ip_dport = usin->sin6_port;
 620         fl.fl_ip_sport = inet->sport;
 621
 622         if (np->opt && np->opt->srcrt) {
 623                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
 624                 ipv6_addr_copy(&final, &fl.fl6_dst);
 625                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 626                 final_p = &final;
 627         }
 628
 629         err = ip6_dst_lookup(sk, &dst, &fl);
 630         if (err)
 631                 goto failure;
 632         if (final_p)
 633                 ipv6_addr_copy(&fl.fl6_dst, final_p);
 634
 635         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 636                 dst_release(dst);
 637                 goto failure;
 638         }
 639
 640         if (saddr == NULL) {
 641                 saddr = &fl.fl6_src;
 642                 ipv6_addr_copy(&np->rcv_saddr, saddr);
 643         }
 644
 645         /* set the source address */
 646         ipv6_addr_copy(&np->saddr, saddr);
 647         inet->rcv_saddr = LOOPBACK4_IPV6;
 648
 649         ip6_dst_store(sk, dst, NULL);
 650         sk->sk_route_caps = dst->dev->features &
 651                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 652
 653         tp->ext_header_len = 0;
 654         if (np->opt)
 655                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 656
 657         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 658
 659         inet->dport = usin->sin6_port;
 660
 661         tcp_set_state(sk, TCP_SYN_SENT);
 662         err = tcp_v6_hash_connect(sk);
 663         if (err)
 664                 goto late_failure;
 665
 666         if (!tp->write_seq)
 667                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 668                                                              np->daddr.s6_addr32,
 669                                                              inet->sport,
 670                                                              inet->dport);
 671
 672         err = tcp_connect(sk);
 673         if (err)
 674                 goto late_failure;
 675
 676         return 0;
 677
 678 late_failure:
 679         tcp_set_state(sk, TCP_CLOSE);
 680         __sk_dst_reset(sk);
 681 failure:
 682         inet->dport = 0;
 683         sk->sk_route_caps = 0;
 684         return err;
 685 }
 686
 687 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 688                 int type, int code, int offset, __u32 info)
 689 {
 690         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 691         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 692         struct ipv6_pinfo *np;
 693         struct sock *sk;
 694         int err;
 695         struct tcp_sock *tp;
 696         __u32 seq;
 697
 698         sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
 699                           th->source, skb->dev->ifindex);
 700
 701         if (sk == NULL) {
 702                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 703                 return;
 704         }
 705
 706         if (sk->sk_state == TCP_TIME_WAIT) {
 707                 inet_twsk_put((struct inet_timewait_sock *)sk);
 708                 return;
 709         }
 710
 711         bh_lock_sock(sk);
 712         if (sock_owned_by_user(sk))
 713                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 714
 715         if (sk->sk_state == TCP_CLOSE)
 716                 goto out;
 717
 718         tp = tcp_sk(sk);
 719         seq = ntohl(th->seq);
 720         if (sk->sk_state != TCP_LISTEN &&
 721             !between(seq, tp->snd_una, tp->snd_nxt)) {
 722                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 723                 goto out;
 724         }
 725
 726         np = inet6_sk(sk);
 727
 728         if (type == ICMPV6_PKT_TOOBIG) {
 729                 struct dst_entry *dst = NULL;
 730
 731                 if (sock_owned_by_user(sk))
 732                         goto out;
 733                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 734                         goto out;
 735
 736                 /* icmp should have updated the destination cache entry */
 737                 dst = __sk_dst_check(sk, np->dst_cookie);
 738
 739                 if (dst == NULL) {
 740                         struct inet_sock *inet = inet_sk(sk);
 741                         struct flowi fl;
 742
 743                         /* BUGGG_FUTURE: Again, it is not clear how
 744                            to handle rthdr case. Ignore this complexity
 745                            for now.
 746                          */
 747                         memset(&fl, 0, sizeof(fl));
 748                         fl.proto = IPPROTO_TCP;
 749                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 750                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 751                         fl.oif = sk->sk_bound_dev_if;
 752                         fl.fl_ip_dport = inet->dport;
 753                         fl.fl_ip_sport = inet->sport;
 754
 755                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 756                                 sk->sk_err_soft = -err;
 757                                 goto out;
 758                         }
 759
 760                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 761                                 sk->sk_err_soft = -err;
 762                                 goto out;
 763                         }
 764
 765                 } else
 766                         dst_hold(dst);
 767
 768                 if (tp->pmtu_cookie > dst_mtu(dst)) {
 769                         tcp_sync_mss(sk, dst_mtu(dst));
 770                         tcp_simple_retransmit(sk);
 771                 } /* else let the usual retransmit timer handle it */
 772                 dst_release(dst);
 773                 goto out;
 774         }
 775
 776         icmpv6_err_convert(type, code, &err);
 777
 778         /* Might be for an request_sock */
 779         switch (sk->sk_state) {
 780                 struct request_sock *req, **prev;
 781         case TCP_LISTEN:
 782                 if (sock_owned_by_user(sk))
 783                         goto out;
 784
 785                 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
 786                                         &hdr->saddr, inet6_iif(skb));
 787                 if (!req)
 788                         goto out;
 789
 790                 /* ICMPs are not backlogged, hence we cannot get
 791                  * an established socket here.
 792                  */
 793                 BUG_TRAP(req->sk == NULL);
 794
 795                 if (seq != tcp_rsk(req)->snt_isn) {
 796                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 797                         goto out;
 798                 }
 799
 800                 inet_csk_reqsk_queue_drop(sk, req, prev);
 801                 goto out;
 802
 803         case TCP_SYN_SENT:
 804         case TCP_SYN_RECV:  /* Cannot happen.
 805                                It can, it SYNs are crossed. --ANK */
 806                 if (!sock_owned_by_user(sk)) {
 807                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 808                         sk->sk_err = err;
 809                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
 810
 811                         tcp_done(sk);
 812                 } else
 813                         sk->sk_err_soft = err;
 814                 goto out;
 815         }
 816
 817         if (!sock_owned_by_user(sk) && np->recverr) {
 818                 sk->sk_err = err;
 819                 sk->sk_error_report(sk);
 820         } else
 821                 sk->sk_err_soft = err;
 822
 823 out:
 824         bh_unlock_sock(sk);
 825         sock_put(sk);
 826 }
 827
 828
 829 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 830                               struct dst_entry *dst)
 831 {
 832         struct tcp6_request_sock *treq = tcp6_rsk(req);
 833         struct ipv6_pinfo *np = inet6_sk(sk);
 834         struct sk_buff * skb;
 835         struct ipv6_txoptions *opt = NULL;
 836         struct in6_addr * final_p = NULL, final;
 837         struct flowi fl;
 838         int err = -1;
 839
 840         memset(&fl, 0, sizeof(fl));
 841         fl.proto = IPPROTO_TCP;
 842         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 843         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 844         fl.fl6_flowlabel = 0;
 845         fl.oif = treq->iif;
 846         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 847         fl.fl_ip_sport = inet_sk(sk)->sport;
 848
 849         if (dst == NULL) {
 850                 opt = np->opt;
 851                 if (opt == NULL &&
 852                     np->rxopt.bits.srcrt == 2 &&
 853                     treq->pktopts) {
 854                         struct sk_buff *pktopts = treq->pktopts;
 855                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 856                         if (rxopt->srcrt)
 857                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
 858                 }
 859
 860                 if (opt && opt->srcrt) {
 861                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 862                         ipv6_addr_copy(&final, &fl.fl6_dst);
 863                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 864                         final_p = &final;
 865                 }
 866
 867                 err = ip6_dst_lookup(sk, &dst, &fl);
 868                 if (err)
 869                         goto done;
 870                 if (final_p)
 871                         ipv6_addr_copy(&fl.fl6_dst, final_p);
 872                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
 873                         goto done;
 874         }
 875
 876         skb = tcp_make_synack(sk, dst, req);
 877         if (skb) {
 878                 struct tcphdr *th = skb->h.th;
 879
 880                 th->check = tcp_v6_check(th, skb->len,
 881                                          &treq->loc_addr, &treq->rmt_addr,
 882                                          csum_partial((char *)th, skb->len, skb->csum));
 883
 884                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 885                 err = ip6_xmit(sk, skb, &fl, opt, 0);
 886                 if (err == NET_XMIT_CN)
 887                         err = 0;
 888         }
 889
 890 done:
 891         dst_release(dst);
 892         if (opt && opt != np->opt)
 893                 sock_kfree_s(sk, opt, opt->tot_len);
 894         return err;
 895 }
 896
 897 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 898 {
 899         if (tcp6_rsk(req)->pktopts)
 900                 kfree_skb(tcp6_rsk(req)->pktopts);
 901 }
 902
 903 static struct request_sock_ops tcp6_request_sock_ops = {
 904         .family         =       AF_INET6,
 905         .obj_size       =       sizeof(struct tcp6_request_sock),
 906         .rtx_syn_ack    =       tcp_v6_send_synack,
 907         .send_ack       =       tcp_v6_reqsk_send_ack,
 908         .destructor     =       tcp_v6_reqsk_destructor,
 909         .send_reset     =       tcp_v6_send_reset
 910 };
 911
 912 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 913 {
 914         struct ipv6_pinfo *np = inet6_sk(sk);
 915         struct inet6_skb_parm *opt = IP6CB(skb);
 916
 917         if (np->rxopt.all) {
 918                 if ((opt->hop && np->rxopt.bits.hopopts) ||
 919                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
 920                      np->rxopt.bits.rxflow) ||
 921                     (opt->srcrt && np->rxopt.bits.srcrt) ||
 922                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
 923                         return 1;
 924         }
 925         return 0;
 926 }
 927
 928
 929 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
 930                               struct sk_buff *skb)
 931 {
 932         struct ipv6_pinfo *np = inet6_sk(sk);
 933
 934         if (skb->ip_summed == CHECKSUM_HW) {
 935                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
 936                 skb->csum = offsetof(struct tcphdr, check);
 937         } else {
 938                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
 939                                             csum_partial((char *)th, th->doff<<2,
 940                                                          skb->csum));
 941         }
 942 }
 943
 944
 945 static void tcp_v6_send_reset(struct sk_buff *skb)
 946 {
 947         struct tcphdr *th = skb->h.th, *t1;
 948         struct sk_buff *buff;
 949         struct flowi fl;
 950
 951         if (th->rst)
 952                 return;
 953
 954         if (!ipv6_unicast_destination(skb))
 955                 return;
 956
 957         /*
 958          * We need to grab some memory, and put together an RST,
 959          * and then put it into the queue to be sent.
 960          */
 961
 962         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
 963                          GFP_ATOMIC);
 964         if (buff == NULL)
 965                 return;
 966
 967         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
 968
 969         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
 970
 971         /* Swap the send and the receive. */
 972         memset(t1, 0, sizeof(*t1));
 973         t1->dest = th->source;
 974         t1->source = th->dest;
 975         t1->doff = sizeof(*t1)/4;
 976         t1->rst = 1;
 977
 978         if(th->ack) {
 979                 t1->seq = th->ack_seq;
 980         } else {
 981                 t1->ack = 1;
 982                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
 983                                     + skb->len - (th->doff<<2));
 984         }
 985
 986         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
 987
 988         memset(&fl, 0, sizeof(fl));
 989         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
 990         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
 991
 992         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 993                                     sizeof(*t1), IPPROTO_TCP,
 994                                     buff->csum);
 995
 996         fl.proto = IPPROTO_TCP;
 997         fl.oif = inet6_iif(skb);
 998         fl.fl_ip_dport = t1->dest;
 999         fl.fl_ip_sport = t1->source;
1000
1001         /* sk = NULL, but it is safe for now. RST socket required. */
1002         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1003
1004                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1005                         dst_release(buff->dst);
1006                         return;
1007                 }
1008
1009                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1010                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1011                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1012                 return;
1013         }
1014
1015         kfree_skb(buff);
1016 }
1017
1018 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1019 {
1020         struct tcphdr *th = skb->h.th, *t1;
1021         struct sk_buff *buff;
1022         struct flowi fl;
1023         int tot_len = sizeof(struct tcphdr);
1024
1025         if (ts)
1026                 tot_len += 3*4;
1027
1028         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1029                          GFP_ATOMIC);
1030         if (buff == NULL)
1031                 return;
1032
1033         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1034
1035         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1036
1037         /* Swap the send and the receive. */
1038         memset(t1, 0, sizeof(*t1));
1039         t1->dest = th->source;
1040         t1->source = th->dest;
1041         t1->doff = tot_len/4;
1042         t1->seq = htonl(seq);
1043         t1->ack_seq = htonl(ack);
1044         t1->ack = 1;
1045         t1->window = htons(win);
1046
1047         if (ts) {
1048                 u32 *ptr = (u32*)(t1 + 1);
1049                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1050                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1051                 *ptr++ = htonl(tcp_time_stamp);
1052                 *ptr = htonl(ts);
1053         }
1054
1055         buff->csum = csum_partial((char *)t1, tot_len, 0);
1056
1057         memset(&fl, 0, sizeof(fl));
1058         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1059         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1060
1061         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1062                                     tot_len, IPPROTO_TCP,
1063                                     buff->csum);
1064
1065         fl.proto = IPPROTO_TCP;
1066         fl.oif = inet6_iif(skb);
1067         fl.fl_ip_dport = t1->dest;
1068         fl.fl_ip_sport = t1->source;
1069
1070         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1071                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1072                         dst_release(buff->dst);
1073                         return;
1074                 }
1075                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1076                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1077                 return;
1078         }
1079
1080         kfree_skb(buff);
1081 }
1082
1083 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1084 {
1085         struct inet_timewait_sock *tw = inet_twsk(sk);
1086         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1087
1088         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1089                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1090                         tcptw->tw_ts_recent);
1091
1092         inet_twsk_put(tw);
1093 }
1094
1095 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1096 {
1097         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1098 }
1099
1100
1101 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1102 {
1103         struct request_sock *req, **prev;
1104         const struct tcphdr *th = skb->h.th;
1105         struct sock *nsk;
1106
1107         /* Find possible connection requests. */
1108         req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1109                                 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1110         if (req)
1111                 return tcp_check_req(sk, skb, req, prev);
1112
1113         nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1114                                          th->source, &skb->nh.ipv6h->daddr,
1115                                          ntohs(th->dest), inet6_iif(skb));
1116
1117         if (nsk) {
1118                 if (nsk->sk_state != TCP_TIME_WAIT) {
1119                         bh_lock_sock(nsk);
1120                         return nsk;
1121                 }
1122                 inet_twsk_put((struct inet_timewait_sock *)nsk);
1123                 return NULL;
1124         }
1125
1126 #if 0 /*def CONFIG_SYN_COOKIES*/
1127         if (!th->rst && !th->syn && th->ack)
1128                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1129 #endif
1130         return sk;
1131 }
1132
1133 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1134 {
1135         struct inet_connection_sock *icsk = inet_csk(sk);
1136         struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1137         const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1138
1139         reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1140         inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1141 }
1142
1143
1144 /* FIXME: this is substantially similar to the ipv4 code.
1145  * Can some kind of merge be done? -- erics
1146  */
1147 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1148 {
1149         struct tcp6_request_sock *treq;
1150         struct ipv6_pinfo *np = inet6_sk(sk);
1151         struct tcp_options_received tmp_opt;
1152         struct tcp_sock *tp = tcp_sk(sk);
1153         struct request_sock *req = NULL;
1154         __u32 isn = TCP_SKB_CB(skb)->when;
1155
1156         if (skb->protocol == htons(ETH_P_IP))
1157                 return tcp_v4_conn_request(sk, skb);
1158
1159         if (!ipv6_unicast_destination(skb))
1160                 goto drop;
1161
1162         /*
1163          *      There are no SYN attacks on IPv6, yet...
1164          */
1165         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1166                 if (net_ratelimit())
1167                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1168                 goto drop;
1169         }
1170
1171         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1172                 goto drop;
1173
1174         req = reqsk_alloc(&tcp6_request_sock_ops);
1175         if (req == NULL)
1176                 goto drop;
1177
1178         tcp_clear_options(&tmp_opt);
1179         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1180         tmp_opt.user_mss = tp->rx_opt.user_mss;
1181
1182         tcp_parse_options(skb, &tmp_opt, 0);
1183
1184         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1185         tcp_openreq_init(req, &tmp_opt, skb);
1186
1187         treq = tcp6_rsk(req);
1188         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1189         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1190         TCP_ECN_create_request(req, skb->h.th);
1191         treq->pktopts = NULL;
1192         if (ipv6_opt_accepted(sk, skb) ||
1193             np->rxopt.bits.rxinfo ||
1194             np->rxopt.bits.rxhlim) {
1195                 atomic_inc(&skb->users);
1196                 treq->pktopts = skb;
1197         }
1198         treq->iif = sk->sk_bound_dev_if;
1199
1200         /* So that link locals have meaning */
1201         if (!sk->sk_bound_dev_if &&
1202             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1203                 treq->iif = inet6_iif(skb);
1204
1205         if (isn == 0)
1206                 isn = tcp_v6_init_sequence(sk,skb);
1207
1208         tcp_rsk(req)->snt_isn = isn;
1209
1210         if (tcp_v6_send_synack(sk, req, NULL))
1211                 goto drop;
1212
1213         tcp_v6_synq_add(sk, req);
1214
1215         return 0;
1216
1217 drop:
1218         if (req)
1219                 reqsk_free(req);
1220
1221         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1222         return 0; /* don't send reset */
1223 }
1224
1225 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1226                                           struct request_sock *req,
1227                                           struct dst_entry *dst)
1228 {
1229         struct tcp6_request_sock *treq = tcp6_rsk(req);
1230         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1231         struct tcp6_sock *newtcp6sk;
1232         struct inet_sock *newinet;
1233         struct tcp_sock *newtp;
1234         struct sock *newsk;
1235         struct ipv6_txoptions *opt;
1236
1237         if (skb->protocol == htons(ETH_P_IP)) {
1238                 /*
1239                  *      v6 mapped
1240                  */
1241
1242                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1243
1244                 if (newsk == NULL)
1245                         return NULL;
1246
1247                 newtcp6sk = (struct tcp6_sock *)newsk;
1248                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1249
1250                 newinet = inet_sk(newsk);
1251                 newnp = inet6_sk(newsk);
1252                 newtp = tcp_sk(newsk);
1253
1254                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1255
1256                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1257                               newinet->daddr);
1258
1259                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1260                               newinet->saddr);
1261
1262                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1263
1264                 newtp->af_specific = &ipv6_mapped;
1265                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1266                 newnp->pktoptions  = NULL;
1267                 newnp->opt         = NULL;
1268                 newnp->mcast_oif   = inet6_iif(skb);
1269                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1270
1271                 /*
1272                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1273                  * here, tcp_create_openreq_child now does this for us, see the comment in
1274                  * that function for the gory details. -acme
1275                  */
1276
1277                 /* It is tricky place. Until this moment IPv4 tcp
1278                    worked with IPv6 af_tcp.af_specific.
1279                    Sync it now.
1280                  */
1281                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1282
1283                 return newsk;
1284         }
1285
1286         opt = np->opt;
1287
1288         if (sk_acceptq_is_full(sk))
1289                 goto out_overflow;
1290
1291         if (np->rxopt.bits.srcrt == 2 &&
1292             opt == NULL && treq->pktopts) {
1293                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1294                 if (rxopt->srcrt)
1295                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1296         }
1297
1298         if (dst == NULL) {
1299                 struct in6_addr *final_p = NULL, final;
1300                 struct flowi fl;
1301
1302                 memset(&fl, 0, sizeof(fl));
1303                 fl.proto = IPPROTO_TCP;
1304                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1305                 if (opt && opt->srcrt) {
1306                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1307                         ipv6_addr_copy(&final, &fl.fl6_dst);
1308                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1309                         final_p = &final;
1310                 }
1311                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1312                 fl.oif = sk->sk_bound_dev_if;
1313                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1314                 fl.fl_ip_sport = inet_sk(sk)->sport;
1315
1316                 if (ip6_dst_lookup(sk, &dst, &fl))
1317                         goto out;
1318
1319                 if (final_p)
1320                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1321
1322                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1323                         goto out;
1324         }
1325
1326         newsk = tcp_create_openreq_child(sk, req, skb);
1327         if (newsk == NULL)
1328                 goto out;
1329
1330         /*
1331          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1332          * count here, tcp_create_openreq_child now does this for us, see the
1333          * comment in that function for the gory details. -acme
1334          */
1335
1336         ip6_dst_store(newsk, dst, NULL);
1337         newsk->sk_route_caps = dst->dev->features &
1338                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1339
1340         newtcp6sk = (struct tcp6_sock *)newsk;
1341         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1342
1343         newtp = tcp_sk(newsk);
1344         newinet = inet_sk(newsk);
1345         newnp = inet6_sk(newsk);
1346
1347         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1348
1349         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1350         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1351         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1352         newsk->sk_bound_dev_if = treq->iif;
1353
1354         /* Now IPv6 options...
1355
1356            First: no IPv4 options.
1357          */
1358         newinet->opt = NULL;
1359
1360         /* Clone RX bits */
1361         newnp->rxopt.all = np->rxopt.all;
1362
1363         /* Clone pktoptions received with SYN */
1364         newnp->pktoptions = NULL;
1365         if (treq->pktopts != NULL) {
1366                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1367                 kfree_skb(treq->pktopts);
1368                 treq->pktopts = NULL;
1369                 if (newnp->pktoptions)
1370                         skb_set_owner_r(newnp->pktoptions, newsk);
1371         }
1372         newnp->opt        = NULL;
1373         newnp->mcast_oif  = inet6_iif(skb);
1374         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1375
1376         /* Clone native IPv6 options from listening socket (if any)
1377
1378            Yes, keeping reference count would be much more clever,
1379            but we make one more one thing there: reattach optmem
1380            to newsk.
1381          */
1382         if (opt) {
1383                 newnp->opt = ipv6_dup_options(newsk, opt);
1384                 if (opt != np->opt)
1385                         sock_kfree_s(sk, opt, opt->tot_len);
1386         }
1387
1388         newtp->ext_header_len = 0;
1389         if (newnp->opt)
1390                 newtp->ext_header_len = newnp->opt->opt_nflen +
1391                                         newnp->opt->opt_flen;
1392
1393         tcp_sync_mss(newsk, dst_mtu(dst));
1394         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1395         tcp_initialize_rcv_mss(newsk);
1396
1397         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1398
1399         __tcp_v6_hash(newsk);
1400         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1401
1402         return newsk;
1403
1404 out_overflow:
1405         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1406 out:
1407         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1408         if (opt && opt != np->opt)
1409                 sock_kfree_s(sk, opt, opt->tot_len);
1410         dst_release(dst);
1411         return NULL;
1412 }
1413
1414 static int tcp_v6_checksum_init(struct sk_buff *skb)
1415 {
1416         if (skb->ip_summed == CHECKSUM_HW) {
1417                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1418                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1419                                   &skb->nh.ipv6h->daddr,skb->csum))
1420                         return 0;
1421                 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1422         }
1423         if (skb->len <= 76) {
1424                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1425                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1426                         return -1;
1427                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1428         } else {
1429                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1430                                           &skb->nh.ipv6h->daddr,0);
1431         }
1432         return 0;
1433 }
1434
1435 /* The socket must have it's spinlock held when we get
1436  * here.
1437  *
1438  * We have a potential double-lock case here, so even when
1439  * doing backlog processing we use the BH locking scheme.
1440  * This is because we cannot sleep with the original spinlock
1441  * held.
1442  */
1443 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1444 {
1445         struct ipv6_pinfo *np = inet6_sk(sk);
1446         struct tcp_sock *tp;
1447         struct sk_buff *opt_skb = NULL;
1448
1449         /* Imagine: socket is IPv6. IPv4 packet arrives,
1450            goes to IPv4 receive handler and backlogged.
1451            From backlog it always goes here. Kerboom...
1452            Fortunately, tcp_rcv_established and rcv_established
1453            handle them correctly, but it is not case with
1454            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1455          */
1456
1457         if (skb->protocol == htons(ETH_P_IP))
1458                 return tcp_v4_do_rcv(sk, skb);
1459
1460         if (sk_filter(sk, skb, 0))
1461                 goto discard;
1462
1463         /*
1464          *      socket locking is here for SMP purposes as backlog rcv
1465          *      is currently called with bh processing disabled.
1466          */
1467
1468         /* Do Stevens' IPV6_PKTOPTIONS.
1469
1470            Yes, guys, it is the only place in our code, where we
1471            may make it not affecting IPv4.
1472            The rest of code is protocol independent,
1473            and I do not like idea to uglify IPv4.
1474
1475            Actually, all the idea behind IPV6_PKTOPTIONS
1476            looks not very well thought. For now we latch
1477            options, received in the last packet, enqueued
1478            by tcp. Feel free to propose better solution.
1479                                                --ANK (980728)
1480          */
1481         if (np->rxopt.all)
1482                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1483
1484         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1485                 TCP_CHECK_TIMER(sk);
1486                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1487                         goto reset;
1488                 TCP_CHECK_TIMER(sk);
1489                 if (opt_skb)
1490                         goto ipv6_pktoptions;
1491                 return 0;
1492         }
1493
1494         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1495                 goto csum_err;
1496
1497         if (sk->sk_state == TCP_LISTEN) {
1498                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1499                 if (!nsk)
1500                         goto discard;
1501
1502                 /*
1503                  * Queue it on the new socket if the new socket is active,
1504                  * otherwise we just shortcircuit this and continue with
1505                  * the new socket..
1506                  */
1507                 if(nsk != sk) {
1508                         if (tcp_child_process(sk, nsk, skb))
1509                                 goto reset;
1510                         if (opt_skb)
1511                                 __kfree_skb(opt_skb);
1512                         return 0;
1513                 }
1514         }
1515
1516         TCP_CHECK_TIMER(sk);
1517         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1518                 goto reset;
1519         TCP_CHECK_TIMER(sk);
1520         if (opt_skb)
1521                 goto ipv6_pktoptions;
1522         return 0;
1523
1524 reset:
1525         tcp_v6_send_reset(skb);
1526 discard:
1527         if (opt_skb)
1528                 __kfree_skb(opt_skb);
1529         kfree_skb(skb);
1530         return 0;
1531 csum_err:
1532         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1533         goto discard;
1534
1535
1536 ipv6_pktoptions:
1537         /* Do you ask, what is it?
1538
1539            1. skb was enqueued by tcp.
1540            2. skb is added to tail of read queue, rather than out of order.
1541            3. socket is not in passive state.
1542            4. Finally, it really contains options, which user wants to receive.
1543          */
1544         tp = tcp_sk(sk);
1545         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1546             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1547                 if (np->rxopt.bits.rxinfo)
1548                         np->mcast_oif = inet6_iif(opt_skb);
1549                 if (np->rxopt.bits.rxhlim)
1550                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1551                 if (ipv6_opt_accepted(sk, opt_skb)) {
1552                         skb_set_owner_r(opt_skb, sk);
1553                         opt_skb = xchg(&np->pktoptions, opt_skb);
1554                 } else {
1555                         __kfree_skb(opt_skb);
1556                         opt_skb = xchg(&np->pktoptions, NULL);
1557                 }
1558         }
1559
1560         if (opt_skb)
1561                 kfree_skb(opt_skb);
1562         return 0;
1563 }
1564
1565 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1566 {
1567         struct sk_buff *skb = *pskb;
1568         struct tcphdr *th;
1569         struct sock *sk;
1570         int ret;
1571
1572         if (skb->pkt_type != PACKET_HOST)
1573                 goto discard_it;
1574
1575         /*
1576          *      Count it even if it's bad.
1577          */
1578         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1579
1580         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1581                 goto discard_it;
1582
1583         th = skb->h.th;
1584
1585         if (th->doff < sizeof(struct tcphdr)/4)
1586                 goto bad_packet;
1587         if (!pskb_may_pull(skb, th->doff*4))
1588                 goto discard_it;
1589
1590         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1591              tcp_v6_checksum_init(skb) < 0))
1592                 goto bad_packet;
1593
1594         th = skb->h.th;
1595         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1596         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1597                                     skb->len - th->doff*4);
1598         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1599         TCP_SKB_CB(skb)->when = 0;
1600         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1601         TCP_SKB_CB(skb)->sacked = 0;
1602
1603         sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1604                             &skb->nh.ipv6h->daddr, ntohs(th->dest),
1605                             inet6_iif(skb));
1606
1607         if (!sk)
1608                 goto no_tcp_socket;
1609
1610 process:
1611         if (sk->sk_state == TCP_TIME_WAIT)
1612                 goto do_time_wait;
1613
1614         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1615                 goto discard_and_relse;
1616
1617         if (sk_filter(sk, skb, 0))
1618                 goto discard_and_relse;
1619
1620         skb->dev = NULL;
1621
1622         bh_lock_sock(sk);
1623         ret = 0;
1624         if (!sock_owned_by_user(sk)) {
1625                 if (!tcp_prequeue(sk, skb))
1626                         ret = tcp_v6_do_rcv(sk, skb);
1627         } else
1628                 sk_add_backlog(sk, skb);
1629         bh_unlock_sock(sk);
1630
1631         sock_put(sk);
1632         return ret ? -1 : 0;
1633
1634 no_tcp_socket:
1635         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1636                 goto discard_it;
1637
1638         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1639 bad_packet:
1640                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1641         } else {
1642                 tcp_v6_send_reset(skb);
1643         }
1644
1645 discard_it:
1646
1647         /*
1648          *      Discard frame
1649          */
1650
1651         kfree_skb(skb);
1652         return 0;
1653
1654 discard_and_relse:
1655         sock_put(sk);
1656         goto discard_it;
1657
1658 do_time_wait:
1659         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1660                 inet_twsk_put((struct inet_timewait_sock *)sk);
1661                 goto discard_it;
1662         }
1663
1664         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1665                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1666                 inet_twsk_put((struct inet_timewait_sock *)sk);
1667                 goto discard_it;
1668         }
1669
1670         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1671                                            skb, th)) {
1672         case TCP_TW_SYN:
1673         {
1674                 struct sock *sk2;
1675
1676                 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1677                                             &skb->nh.ipv6h->daddr,
1678                                             ntohs(th->dest), inet6_iif(skb));
1679                 if (sk2 != NULL) {
1680                         struct inet_timewait_sock *tw = inet_twsk(sk);
1681                         inet_twsk_deschedule(tw, &tcp_death_row);
1682                         inet_twsk_put(tw);
1683                         sk = sk2;
1684                         goto process;
1685                 }
1686                 /* Fall through to ACK */
1687         }
1688         case TCP_TW_ACK:
1689                 tcp_v6_timewait_ack(sk, skb);
1690                 break;
1691         case TCP_TW_RST:
1692                 goto no_tcp_socket;
1693         case TCP_TW_SUCCESS:;
1694         }
1695         goto discard_it;
1696 }
1697
1698 static int tcp_v6_rebuild_header(struct sock *sk)
1699 {
1700         int err;
1701         struct dst_entry *dst;
1702         struct ipv6_pinfo *np = inet6_sk(sk);
1703
1704         dst = __sk_dst_check(sk, np->dst_cookie);
1705
1706         if (dst == NULL) {
1707                 struct inet_sock *inet = inet_sk(sk);
1708                 struct in6_addr *final_p = NULL, final;
1709                 struct flowi fl;
1710
1711                 memset(&fl, 0, sizeof(fl));
1712                 fl.proto = IPPROTO_TCP;
1713                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1714                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1715                 fl.fl6_flowlabel = np->flow_label;
1716                 fl.oif = sk->sk_bound_dev_if;
1717                 fl.fl_ip_dport = inet->dport;
1718                 fl.fl_ip_sport = inet->sport;
1719
1720                 if (np->opt && np->opt->srcrt) {
1721                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1722                         ipv6_addr_copy(&final, &fl.fl6_dst);
1723                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1724                         final_p = &final;
1725                 }
1726
1727                 err = ip6_dst_lookup(sk, &dst, &fl);
1728                 if (err) {
1729                         sk->sk_route_caps = 0;
1730                         return err;
1731                 }
1732                 if (final_p)
1733                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1734
1735                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1736                         sk->sk_err_soft = -err;
1737                         dst_release(dst);
1738                         return err;
1739                 }
1740
1741                 ip6_dst_store(sk, dst, NULL);
1742                 sk->sk_route_caps = dst->dev->features &
1743                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1744         }
1745
1746         return 0;
1747 }
1748
1749 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1750 {
1751         struct sock *sk = skb->sk;
1752         struct inet_sock *inet = inet_sk(sk);
1753         struct ipv6_pinfo *np = inet6_sk(sk);
1754         struct flowi fl;
1755         struct dst_entry *dst;
1756         struct in6_addr *final_p = NULL, final;
1757
1758         memset(&fl, 0, sizeof(fl));
1759         fl.proto = IPPROTO_TCP;
1760         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1761         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1762         fl.fl6_flowlabel = np->flow_label;
1763         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1764         fl.oif = sk->sk_bound_dev_if;
1765         fl.fl_ip_sport = inet->sport;
1766         fl.fl_ip_dport = inet->dport;
1767
1768         if (np->opt && np->opt->srcrt) {
1769                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1770                 ipv6_addr_copy(&final, &fl.fl6_dst);
1771                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1772                 final_p = &final;
1773         }
1774
1775         dst = __sk_dst_check(sk, np->dst_cookie);
1776
1777         if (dst == NULL) {
1778                 int err = ip6_dst_lookup(sk, &dst, &fl);
1779
1780                 if (err) {
1781                         sk->sk_err_soft = -err;
1782                         return err;
1783                 }
1784
1785                 if (final_p)
1786                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1787
1788                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1789                         sk->sk_route_caps = 0;
1790                         dst_release(dst);
1791                         return err;
1792                 }
1793
1794                 ip6_dst_store(sk, dst, NULL);
1795                 sk->sk_route_caps = dst->dev->features &
1796                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1797         }
1798
1799         skb->dst = dst_clone(dst);
1800
1801         /* Restore final destination back after routing done */
1802         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1803
1804         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1805 }
1806
1807 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1808 {
1809         struct ipv6_pinfo *np = inet6_sk(sk);
1810         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1811
1812         sin6->sin6_family = AF_INET6;
1813         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1814         sin6->sin6_port = inet_sk(sk)->dport;
1815         /* We do not store received flowlabel for TCP */
1816         sin6->sin6_flowinfo = 0;
1817         sin6->sin6_scope_id = 0;
1818         if (sk->sk_bound_dev_if &&
1819             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1820                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1821 }
1822
1823 static int tcp_v6_remember_stamp(struct sock *sk)
1824 {
1825         /* Alas, not yet... */
1826         return 0;
1827 }
1828
1829 static struct tcp_func ipv6_specific = {
1830         .queue_xmit     =       tcp_v6_xmit,
1831         .send_check     =       tcp_v6_send_check,
1832         .rebuild_header =       tcp_v6_rebuild_header,
1833         .conn_request   =       tcp_v6_conn_request,
1834         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1835         .remember_stamp =       tcp_v6_remember_stamp,
1836         .net_header_len =       sizeof(struct ipv6hdr),
1837
1838         .setsockopt     =       ipv6_setsockopt,
1839         .getsockopt     =       ipv6_getsockopt,
1840         .addr2sockaddr  =       v6_addr2sockaddr,
1841         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1842 };
1843
1844 /*
1845  *      TCP over IPv4 via INET6 API
1846  */
1847
1848 static struct tcp_func ipv6_mapped = {
1849         .queue_xmit     =       ip_queue_xmit,
1850         .send_check     =       tcp_v4_send_check,
1851         .rebuild_header =       inet_sk_rebuild_header,
1852         .conn_request   =       tcp_v6_conn_request,
1853         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1854         .remember_stamp =       tcp_v4_remember_stamp,
1855         .net_header_len =       sizeof(struct iphdr),
1856
1857         .setsockopt     =       ipv6_setsockopt,
1858         .getsockopt     =       ipv6_getsockopt,
1859         .addr2sockaddr  =       v6_addr2sockaddr,
1860         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1861 };
1862
1863
1864
1865 /* NOTE: A lot of things set to zero explicitly by call to
1866  *       sk_alloc() so need not be done here.
1867  */
1868 static int tcp_v6_init_sock(struct sock *sk)
1869 {
1870         struct inet_connection_sock *icsk = inet_csk(sk);
1871         struct tcp_sock *tp = tcp_sk(sk);
1872
1873         skb_queue_head_init(&tp->out_of_order_queue);
1874         tcp_init_xmit_timers(sk);
1875         tcp_prequeue_init(tp);
1876
1877         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1878         tp->mdev = TCP_TIMEOUT_INIT;
1879
1880         /* So many TCP implementations out there (incorrectly) count the
1881          * initial SYN frame in their delayed-ACK and congestion control
1882          * algorithms that we must have the following bandaid to talk
1883          * efficiently to them.  -DaveM
1884          */
1885         tp->snd_cwnd = 2;
1886
1887         /* See draft-stevens-tcpca-spec-01 for discussion of the
1888          * initialization of these values.
1889          */
1890         tp->snd_ssthresh = 0x7fffffff;
1891         tp->snd_cwnd_clamp = ~0;
1892         tp->mss_cache = 536;
1893
1894         tp->reordering = sysctl_tcp_reordering;
1895
1896         sk->sk_state = TCP_CLOSE;
1897
1898         tp->af_specific = &ipv6_specific;
1899         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1900         sk->sk_write_space = sk_stream_write_space;
1901         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1902
1903         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1904         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1905
1906         atomic_inc(&tcp_sockets_allocated);
1907
1908         return 0;
1909 }
1910
1911 static int tcp_v6_destroy_sock(struct sock *sk)
1912 {
1913         tcp_v4_destroy_sock(sk);
1914         return inet6_destroy_sock(sk);
1915 }
1916
1917 /* Proc filesystem TCPv6 sock list dumping. */
1918 static void get_openreq6(struct seq_file *seq,
1919                          struct sock *sk, struct request_sock *req, int i, int uid)
1920 {
1921         struct in6_addr *dest, *src;
1922         int ttd = req->expires - jiffies;
1923
1924         if (ttd < 0)
1925                 ttd = 0;
1926
1927         src = &tcp6_rsk(req)->loc_addr;
1928         dest = &tcp6_rsk(req)->rmt_addr;
1929         seq_printf(seq,
1930                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1931                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1932                    i,
1933                    src->s6_addr32[0], src->s6_addr32[1],
1934                    src->s6_addr32[2], src->s6_addr32[3],
1935                    ntohs(inet_sk(sk)->sport),
1936                    dest->s6_addr32[0], dest->s6_addr32[1],
1937                    dest->s6_addr32[2], dest->s6_addr32[3],
1938                    ntohs(inet_rsk(req)->rmt_port),
1939                    TCP_SYN_RECV,
1940                    0,0, /* could print option size, but that is af dependent. */
1941                    1,   /* timers active (only the expire timer) */
1942                    jiffies_to_clock_t(ttd),
1943                    req->retrans,
1944                    uid,
1945                    0,  /* non standard timer */
1946                    0, /* open_requests have no inode */
1947                    0, req);
1948 }
1949
1950 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1951 {
1952         struct in6_addr *dest, *src;
1953         __u16 destp, srcp;
1954         int timer_active;
1955         unsigned long timer_expires;
1956         struct inet_sock *inet = inet_sk(sp);
1957         struct tcp_sock *tp = tcp_sk(sp);
1958         const struct inet_connection_sock *icsk = inet_csk(sp);
1959         struct ipv6_pinfo *np = inet6_sk(sp);
1960
1961         dest  = &np->daddr;
1962         src   = &np->rcv_saddr;
1963         destp = ntohs(inet->dport);
1964         srcp  = ntohs(inet->sport);
1965
1966         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1967                 timer_active    = 1;
1968                 timer_expires   = icsk->icsk_timeout;
1969         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1970                 timer_active    = 4;
1971                 timer_expires   = icsk->icsk_timeout;
1972         } else if (timer_pending(&sp->sk_timer)) {
1973                 timer_active    = 2;
1974                 timer_expires   = sp->sk_timer.expires;
1975         } else {
1976                 timer_active    = 0;
1977                 timer_expires = jiffies;
1978         }
1979
1980         seq_printf(seq,
1981                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1982                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1983                    i,
1984                    src->s6_addr32[0], src->s6_addr32[1],
1985                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1986                    dest->s6_addr32[0], dest->s6_addr32[1],
1987                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1988                    sp->sk_state,
1989                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1990                    timer_active,
1991                    jiffies_to_clock_t(timer_expires - jiffies),
1992                    icsk->icsk_retransmits,
1993                    sock_i_uid(sp),
1994                    icsk->icsk_probes_out,
1995                    sock_i_ino(sp),
1996                    atomic_read(&sp->sk_refcnt), sp,
1997                    icsk->icsk_rto,
1998                    icsk->icsk_ack.ato,
1999                    (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
2000                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2001                    );
2002 }
2003
2004 static void get_timewait6_sock(struct seq_file *seq,
2005                                struct inet_timewait_sock *tw, int i)
2006 {
2007         struct in6_addr *dest, *src;
2008         __u16 destp, srcp;
2009         struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2010         int ttd = tw->tw_ttd - jiffies;
2011
2012         if (ttd < 0)
2013                 ttd = 0;
2014
2015         dest = &tcp6tw->tw_v6_daddr;
2016         src  = &tcp6tw->tw_v6_rcv_saddr;
2017         destp = ntohs(tw->tw_dport);
2018         srcp  = ntohs(tw->tw_sport);
2019
2020         seq_printf(seq,
2021                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2022                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2023                    i,
2024                    src->s6_addr32[0], src->s6_addr32[1],
2025                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2026                    dest->s6_addr32[0], dest->s6_addr32[1],
2027                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2028                    tw->tw_substate, 0, 0,
2029                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2030                    atomic_read(&tw->tw_refcnt), tw);
2031 }
2032
2033 #ifdef CONFIG_PROC_FS
2034 static int tcp6_seq_show(struct seq_file *seq, void *v)
2035 {
2036         struct tcp_iter_state *st;
2037
2038         if (v == SEQ_START_TOKEN) {
2039                 seq_puts(seq,
2040                          "  sl  "
2041                          "local_address                         "
2042                          "remote_address                        "
2043                          "st tx_queue rx_queue tr tm->when retrnsmt"
2044                          "   uid  timeout inode\n");
2045                 goto out;
2046         }
2047         st = seq->private;
2048
2049         switch (st->state) {
2050         case TCP_SEQ_STATE_LISTENING:
2051         case TCP_SEQ_STATE_ESTABLISHED:
2052                 get_tcp6_sock(seq, v, st->num);
2053                 break;
2054         case TCP_SEQ_STATE_OPENREQ:
2055                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2056                 break;
2057         case TCP_SEQ_STATE_TIME_WAIT:
2058                 get_timewait6_sock(seq, v, st->num);
2059                 break;
2060         }
2061 out:
2062         return 0;
2063 }
2064
2065 static struct file_operations tcp6_seq_fops;
2066 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2067         .owner          = THIS_MODULE,
2068         .name           = "tcp6",
2069         .family         = AF_INET6,
2070         .seq_show       = tcp6_seq_show,
2071         .seq_fops       = &tcp6_seq_fops,
2072 };
2073
2074 int __init tcp6_proc_init(void)
2075 {
2076         return tcp_proc_register(&tcp6_seq_afinfo);
2077 }
2078
2079 void tcp6_proc_exit(void)
2080 {
2081         tcp_proc_unregister(&tcp6_seq_afinfo);
2082 }
2083 #endif
2084
2085 struct proto tcpv6_prot = {
2086         .name                   = "TCPv6",
2087         .owner                  = THIS_MODULE,
2088         .close                  = tcp_close,
2089         .connect                = tcp_v6_connect,
2090         .disconnect             = tcp_disconnect,
2091         .accept                 = inet_csk_accept,
2092         .ioctl                  = tcp_ioctl,
2093         .init                   = tcp_v6_init_sock,
2094         .destroy                = tcp_v6_destroy_sock,
2095         .shutdown               = tcp_shutdown,
2096         .setsockopt             = tcp_setsockopt,
2097         .getsockopt             = tcp_getsockopt,
2098         .sendmsg                = tcp_sendmsg,
2099         .recvmsg                = tcp_recvmsg,
2100         .backlog_rcv            = tcp_v6_do_rcv,
2101         .hash                   = tcp_v6_hash,
2102         .unhash                 = tcp_unhash,
2103         .get_port               = tcp_v6_get_port,
2104         .enter_memory_pressure  = tcp_enter_memory_pressure,
2105         .sockets_allocated      = &tcp_sockets_allocated,
2106         .memory_allocated       = &tcp_memory_allocated,
2107         .memory_pressure        = &tcp_memory_pressure,
2108         .orphan_count           = &tcp_orphan_count,
2109         .sysctl_mem             = sysctl_tcp_mem,
2110         .sysctl_wmem            = sysctl_tcp_wmem,
2111         .sysctl_rmem            = sysctl_tcp_rmem,
2112         .max_header             = MAX_TCP_HEADER,
2113         .obj_size               = sizeof(struct tcp6_sock),
2114         .twsk_obj_size          = sizeof(struct tcp6_timewait_sock),
2115         .rsk_prot               = &tcp6_request_sock_ops,
2116 };
2117
2118 static struct inet6_protocol tcpv6_protocol = {
2119         .handler        =       tcp_v6_rcv,
2120         .err_handler    =       tcp_v6_err,
2121         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2122 };
2123
2124 static struct inet_protosw tcpv6_protosw = {
2125         .type           =       SOCK_STREAM,
2126         .protocol       =       IPPROTO_TCP,
2127         .prot           =       &tcpv6_prot,
2128         .ops            =       &inet6_stream_ops,
2129         .capability     =       -1,
2130         .no_check       =       0,
2131         .flags          =       INET_PROTOSW_PERMANENT,
2132 };
2133
2134 void __init tcpv6_init(void)
2135 {
2136         /* register inet6 protocol */
2137         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2138                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2139         inet6_register_protosw(&tcpv6_protosw);
2140 }