net/ipv6/tcp_ipv6.c

   1 /*
   2  *      TCP over IPv6
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on:
  11  *      linux/net/ipv4/tcp.c
  12  *      linux/net/ipv4/tcp_input.c
  13  *      linux/net/ipv4/tcp_output.c
  14  *
  15  *      Fixes:
  16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  19  *                                      a single port at the same time.
  20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/config.h>
  30 #include <linux/errno.h>
  31 #include <linux/types.h>
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/net.h>
  35 #include <linux/jiffies.h>
  36 #include <linux/in.h>
  37 #include <linux/in6.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/init.h>
  40 #include <linux/jhash.h>
  41 #include <linux/ipsec.h>
  42 #include <linux/times.h>
  43
  44 #include <linux/ipv6.h>
  45 #include <linux/icmpv6.h>
  46 #include <linux/random.h>
  47
  48 #include <net/tcp.h>
  49 #include <net/ndisc.h>
  50 #include <net/ipv6.h>
  51 #include <net/transp_v6.h>
  52 #include <net/addrconf.h>
  53 #include <net/ip6_route.h>
  54 #include <net/ip6_checksum.h>
  55 #include <net/inet_ecn.h>
  56 #include <net/protocol.h>
  57 #include <net/xfrm.h>
  58 #include <net/addrconf.h>
  59 #include <net/snmp.h>
  60 #include <net/dsfield.h>
  61
  62 #include <asm/uaccess.h>
  63
  64 #include <linux/proc_fs.h>
  65 #include <linux/seq_file.h>
  66
  67 static void     tcp_v6_send_reset(struct sk_buff *skb);
  68 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
  69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
  70                                   struct sk_buff *skb);
  71
  72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  74
  75 static struct tcp_func ipv6_mapped;
  76 static struct tcp_func ipv6_specific;
  77
  78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
  79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
  80                                     struct in6_addr *faddr, u16 fport)
  81 {
  82         int hashent = (lport ^ fport);
  83
  84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
  85         hashent ^= hashent>>16;
  86         hashent ^= hashent>>8;
  87         return (hashent & (tcp_ehash_size - 1));
  88 }
  89
  90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
  91 {
  92         struct inet_sock *inet = inet_sk(sk);
  93         struct ipv6_pinfo *np = inet6_sk(sk);
  94         struct in6_addr *laddr = &np->rcv_saddr;
  95         struct in6_addr *faddr = &np->daddr;
  96         __u16 lport = inet->num;
  97         __u16 fport = inet->dport;
  98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
  99 }
 100
 101 static inline int tcp_v6_bind_conflict(struct sock *sk,
 102                                        struct tcp_bind_bucket *tb)
 103 {
 104         struct sock *sk2;
 105         struct hlist_node *node;
 106
 107         /* We must walk the whole port owner list in this case. -DaveM */
 108         sk_for_each_bound(sk2, node, &tb->owners) {
 109                 if (sk != sk2 &&
 110                     (!sk->sk_bound_dev_if ||
 111                      !sk2->sk_bound_dev_if ||
 112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 113                     (!sk->sk_reuse || !sk2->sk_reuse ||
 114                      sk2->sk_state == TCP_LISTEN) &&
 115                      ipv6_rcv_saddr_equal(sk, sk2))
 116                         break;
 117         }
 118
 119         return node != NULL;
 120 }
 121
 122 /* Grrr, addr_type already calculated by caller, but I don't want
 123  * to add some silly "cookie" argument to this method just for that.
 124  * But it doesn't matter, the recalculation is in the rarest path
 125  * this function ever takes.
 126  */
 127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 128 {
 129         struct tcp_bind_hashbucket *head;
 130         struct tcp_bind_bucket *tb;
 131         struct hlist_node *node;
 132         int ret;
 133
 134         local_bh_disable();
 135         if (snum == 0) {
 136                 int low = sysctl_local_port_range[0];
 137                 int high = sysctl_local_port_range[1];
 138                 int remaining = (high - low) + 1;
 139                 int rover;
 140
 141                 spin_lock(&tcp_portalloc_lock);
 142                 if (tcp_port_rover < low)
 143                         rover = low;
 144                 else
 145                         rover = tcp_port_rover;
 146                 do {    rover++;
 147                         if (rover > high)
 148                                 rover = low;
 149                         head = &tcp_bhash[tcp_bhashfn(rover)];
 150                         spin_lock(&head->lock);
 151                         tb_for_each(tb, node, &head->chain)
 152                                 if (tb->port == rover)
 153                                         goto next;
 154                         break;
 155                 next:
 156                         spin_unlock(&head->lock);
 157                 } while (--remaining > 0);
 158                 tcp_port_rover = rover;
 159                 spin_unlock(&tcp_portalloc_lock);
 160
 161                 /* Exhausted local port range during search? */
 162                 ret = 1;
 163                 if (remaining <= 0)
 164                         goto fail;
 165
 166                 /* OK, here is the one we will use. */
 167                 snum = rover;
 168         } else {
 169                 head = &tcp_bhash[tcp_bhashfn(snum)];
 170                 spin_lock(&head->lock);
 171                 tb_for_each(tb, node, &head->chain)
 172                         if (tb->port == snum)
 173                                 goto tb_found;
 174         }
 175         tb = NULL;
 176         goto tb_not_found;
 177 tb_found:
 178         if (tb && !hlist_empty(&tb->owners)) {
 179                 if (tb->fastreuse > 0 && sk->sk_reuse &&
 180                     sk->sk_state != TCP_LISTEN) {
 181                         goto success;
 182                 } else {
 183                         ret = 1;
 184                         if (tcp_v6_bind_conflict(sk, tb))
 185                                 goto fail_unlock;
 186                 }
 187         }
 188 tb_not_found:
 189         ret = 1;
 190         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
 191                 goto fail_unlock;
 192         if (hlist_empty(&tb->owners)) {
 193                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
 194                         tb->fastreuse = 1;
 195                 else
 196                         tb->fastreuse = 0;
 197         } else if (tb->fastreuse &&
 198                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
 199                 tb->fastreuse = 0;
 200
 201 success:
 202         if (!tcp_sk(sk)->bind_hash)
 203                 tcp_bind_hash(sk, tb, snum);
 204         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
 205         ret = 0;
 206
 207 fail_unlock:
 208         spin_unlock(&head->lock);
 209 fail:
 210         local_bh_enable();
 211         return ret;
 212 }
 213
 214 static __inline__ void __tcp_v6_hash(struct sock *sk)
 215 {
 216         struct hlist_head *list;
 217         rwlock_t *lock;
 218
 219         BUG_TRAP(sk_unhashed(sk));
 220
 221         if (sk->sk_state == TCP_LISTEN) {
 222                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
 223                 lock = &tcp_lhash_lock;
 224                 tcp_listen_wlock();
 225         } else {
 226                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
 227                 list = &tcp_ehash[sk->sk_hashent].chain;
 228                 lock = &tcp_ehash[sk->sk_hashent].lock;
 229                 write_lock(lock);
 230         }
 231
 232         __sk_add_node(sk, list);
 233         sock_prot_inc_use(sk->sk_prot);
 234         write_unlock(lock);
 235 }
 236
 237
 238 static void tcp_v6_hash(struct sock *sk)
 239 {
 240         if (sk->sk_state != TCP_CLOSE) {
 241                 struct tcp_sock *tp = tcp_sk(sk);
 242
 243                 if (tp->af_specific == &ipv6_mapped) {
 244                         tcp_prot.hash(sk);
 245                         return;
 246                 }
 247                 local_bh_disable();
 248                 __tcp_v6_hash(sk);
 249                 local_bh_enable();
 250         }
 251 }
 252
 253 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
 254 {
 255         struct sock *sk;
 256         struct hlist_node *node;
 257         struct sock *result = NULL;
 258         int score, hiscore;
 259
 260         hiscore=0;
 261         read_lock(&tcp_lhash_lock);
 262         sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
 263                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 264                         struct ipv6_pinfo *np = inet6_sk(sk);
 265
 266                         score = 1;
 267                         if (!ipv6_addr_any(&np->rcv_saddr)) {
 268                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
 269                                         continue;
 270                                 score++;
 271                         }
 272                         if (sk->sk_bound_dev_if) {
 273                                 if (sk->sk_bound_dev_if != dif)
 274                                         continue;
 275                                 score++;
 276                         }
 277                         if (score == 3) {
 278                                 result = sk;
 279                                 break;
 280                         }
 281                         if (score > hiscore) {
 282                                 hiscore = score;
 283                                 result = sk;
 284                         }
 285                 }
 286         }
 287         if (result)
 288                 sock_hold(result);
 289         read_unlock(&tcp_lhash_lock);
 290         return result;
 291 }
 292
 293 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 294  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 295  *
 296  * The sockhash lock must be held as a reader here.
 297  */
 298
 299 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
 300                                                        struct in6_addr *daddr, u16 hnum,
 301                                                        int dif)
 302 {
 303         struct tcp_ehash_bucket *head;
 304         struct sock *sk;
 305         struct hlist_node *node;
 306         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
 307         int hash;
 308
 309         /* Optimize here for direct hit, only listening connections can
 310          * have wildcards anyways.
 311          */
 312         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
 313         head = &tcp_ehash[hash];
 314         read_lock(&head->lock);
 315         sk_for_each(sk, node, &head->chain) {
 316                 /* For IPV6 do the cheaper port and family tests first. */
 317                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
 318                         goto hit; /* You sunk my battleship! */
 319         }
 320         /* Must check for a TIME_WAIT'er before going to listener hash. */
 321         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
 322                 /* FIXME: acme: check this... */
 323                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
 324
 325                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 326                    sk->sk_family                == PF_INET6) {
 327                         if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 328                            ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 329                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 330                                 goto hit;
 331                 }
 332         }
 333         read_unlock(&head->lock);
 334         return NULL;
 335
 336 hit:
 337         sock_hold(sk);
 338         read_unlock(&head->lock);
 339         return sk;
 340 }
 341
 342
 343 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 344                                            struct in6_addr *daddr, u16 hnum,
 345                                            int dif)
 346 {
 347         struct sock *sk;
 348
 349         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
 350
 351         if (sk)
 352                 return sk;
 353
 354         return tcp_v6_lookup_listener(daddr, hnum, dif);
 355 }
 356
 357 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 358                                   struct in6_addr *daddr, u16 dport,
 359                                   int dif)
 360 {
 361         struct sock *sk;
 362
 363         local_bh_disable();
 364         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
 365         local_bh_enable();
 366
 367         return sk;
 368 }
 369
 370 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
 371
 372
 373 /*
 374  * Open request hash tables.
 375  */
 376
 377 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
 378 {
 379         u32 a, b, c;
 380
 381         a = raddr->s6_addr32[0];
 382         b = raddr->s6_addr32[1];
 383         c = raddr->s6_addr32[2];
 384
 385         a += JHASH_GOLDEN_RATIO;
 386         b += JHASH_GOLDEN_RATIO;
 387         c += rnd;
 388         __jhash_mix(a, b, c);
 389
 390         a += raddr->s6_addr32[3];
 391         b += (u32) rport;
 392         __jhash_mix(a, b, c);
 393
 394         return c & (TCP_SYNQ_HSIZE - 1);
 395 }
 396
 397 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
 398                                               struct request_sock ***prevp,
 399                                               __u16 rport,
 400                                               struct in6_addr *raddr,
 401                                               struct in6_addr *laddr,
 402                                               int iif)
 403 {
 404         struct listen_sock *lopt = tp->accept_queue.listen_opt;
 405         struct request_sock *req, **prev;
 406
 407         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 408              (req = *prev) != NULL;
 409              prev = &req->dl_next) {
 410                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
 411
 412                 if (inet_rsk(req)->rmt_port == rport &&
 413                     req->rsk_ops->family == AF_INET6 &&
 414                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
 415                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
 416                     (!treq->iif || treq->iif == iif)) {
 417                         BUG_TRAP(req->sk == NULL);
 418                         *prevp = prev;
 419                         return req;
 420                 }
 421         }
 422
 423         return NULL;
 424 }
 425
 426 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 427                                    struct in6_addr *saddr,
 428                                    struct in6_addr *daddr,
 429                                    unsigned long base)
 430 {
 431         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 432 }
 433
 434 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 435 {
 436         if (skb->protocol == htons(ETH_P_IPV6)) {
 437                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
 438                                                     skb->nh.ipv6h->saddr.s6_addr32,
 439                                                     skb->h.th->dest,
 440                                                     skb->h.th->source);
 441         } else {
 442                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
 443                                                   skb->nh.iph->saddr,
 444                                                   skb->h.th->dest,
 445                                                   skb->h.th->source);
 446         }
 447 }
 448
 449 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
 450                                       struct tcp_tw_bucket **twp)
 451 {
 452         struct inet_sock *inet = inet_sk(sk);
 453         struct ipv6_pinfo *np = inet6_sk(sk);
 454         struct in6_addr *daddr = &np->rcv_saddr;
 455         struct in6_addr *saddr = &np->daddr;
 456         int dif = sk->sk_bound_dev_if;
 457         u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
 458         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
 459         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
 460         struct sock *sk2;
 461         struct hlist_node *node;
 462         struct tcp_tw_bucket *tw;
 463
 464         write_lock(&head->lock);
 465
 466         /* Check TIME-WAIT sockets first. */
 467         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
 468                 tw = (struct tcp_tw_bucket*)sk2;
 469
 470                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 471                    sk2->sk_family               == PF_INET6     &&
 472                    ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 473                    ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 474                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 475                         struct tcp_sock *tp = tcp_sk(sk);
 476
 477                         if (tw->tw_ts_recent_stamp &&
 478                             (!twp || (sysctl_tcp_tw_reuse &&
 479                                       xtime.tv_sec -
 480                                       tw->tw_ts_recent_stamp > 1))) {
 481                                 /* See comment in tcp_ipv4.c */
 482                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
 483                                 if (!tp->write_seq)
 484                                         tp->write_seq = 1;
 485                                 tp->rx_opt.ts_recent = tw->tw_ts_recent;
 486                                 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
 487                                 sock_hold(sk2);
 488                                 goto unique;
 489                         } else
 490                                 goto not_unique;
 491                 }
 492         }
 493         tw = NULL;
 494
 495         /* And established part... */
 496         sk_for_each(sk2, node, &head->chain) {
 497                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
 498                         goto not_unique;
 499         }
 500
 501 unique:
 502         BUG_TRAP(sk_unhashed(sk));
 503         __sk_add_node(sk, &head->chain);
 504         sk->sk_hashent = hash;
 505         sock_prot_inc_use(sk->sk_prot);
 506         write_unlock(&head->lock);
 507
 508         if (twp) {
 509                 *twp = tw;
 510                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 511         } else if (tw) {
 512                 /* Silly. Should hash-dance instead... */
 513                 tcp_tw_deschedule(tw);
 514                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 515
 516                 tcp_tw_put(tw);
 517         }
 518         return 0;
 519
 520 not_unique:
 521         write_unlock(&head->lock);
 522         return -EADDRNOTAVAIL;
 523 }
 524
 525 static inline u32 tcpv6_port_offset(const struct sock *sk)
 526 {
 527         const struct inet_sock *inet = inet_sk(sk);
 528         const struct ipv6_pinfo *np = inet6_sk(sk);
 529
 530         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 531                                            np->daddr.s6_addr32,
 532                                            inet->dport);
 533 }
 534
 535 static int tcp_v6_hash_connect(struct sock *sk)
 536 {
 537         unsigned short snum = inet_sk(sk)->num;
 538         struct tcp_bind_hashbucket *head;
 539         struct tcp_bind_bucket *tb;
 540         int ret;
 541
 542         if (!snum) {
 543                 int low = sysctl_local_port_range[0];
 544                 int high = sysctl_local_port_range[1];
 545                 int range = high - low;
 546                 int i;
 547                 int port;
 548                 static u32 hint;
 549                 u32 offset = hint + tcpv6_port_offset(sk);
 550                 struct hlist_node *node;
 551                 struct tcp_tw_bucket *tw = NULL;
 552
 553                 local_bh_disable();
 554                 for (i = 1; i <= range; i++) {
 555                         port = low + (i + offset) % range;
 556                         head = &tcp_bhash[tcp_bhashfn(port)];
 557                         spin_lock(&head->lock);
 558
 559                         /* Does not bother with rcv_saddr checks,
 560                          * because the established check is already
 561                          * unique enough.
 562                          */
 563                         tb_for_each(tb, node, &head->chain) {
 564                                 if (tb->port == port) {
 565                                         BUG_TRAP(!hlist_empty(&tb->owners));
 566                                         if (tb->fastreuse >= 0)
 567                                                 goto next_port;
 568                                         if (!__tcp_v6_check_established(sk,
 569                                                                         port,
 570                                                                         &tw))
 571                                                 goto ok;
 572                                         goto next_port;
 573                                 }
 574                         }
 575
 576                         tb = tcp_bucket_create(head, port);
 577                         if (!tb) {
 578                                 spin_unlock(&head->lock);
 579                                 break;
 580                         }
 581                         tb->fastreuse = -1;
 582                         goto ok;
 583
 584                 next_port:
 585                         spin_unlock(&head->lock);
 586                 }
 587                 local_bh_enable();
 588
 589                 return -EADDRNOTAVAIL;
 590
 591 ok:
 592                 hint += i;
 593
 594                 /* Head lock still held and bh's disabled */
 595                 tcp_bind_hash(sk, tb, port);
 596                 if (sk_unhashed(sk)) {
 597                         inet_sk(sk)->sport = htons(port);
 598                         __tcp_v6_hash(sk);
 599                 }
 600                 spin_unlock(&head->lock);
 601
 602                 if (tw) {
 603                         tcp_tw_deschedule(tw);
 604                         tcp_tw_put(tw);
 605                 }
 606
 607                 ret = 0;
 608                 goto out;
 609         }
 610
 611         head  = &tcp_bhash[tcp_bhashfn(snum)];
 612         tb  = tcp_sk(sk)->bind_hash;
 613         spin_lock_bh(&head->lock);
 614
 615         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 616                 __tcp_v6_hash(sk);
 617                 spin_unlock_bh(&head->lock);
 618                 return 0;
 619         } else {
 620                 spin_unlock(&head->lock);
 621                 /* No definite answer... Walk to established hash table */
 622                 ret = __tcp_v6_check_established(sk, snum, NULL);
 623 out:
 624                 local_bh_enable();
 625                 return ret;
 626         }
 627 }
 628
 629 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
 630 {
 631         return IP6CB(skb)->iif;
 632 }
 633
 634 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 635                           int addr_len)
 636 {
 637         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 638         struct inet_sock *inet = inet_sk(sk);
 639         struct ipv6_pinfo *np = inet6_sk(sk);
 640         struct tcp_sock *tp = tcp_sk(sk);
 641         struct in6_addr *saddr = NULL, *final_p = NULL, final;
 642         struct flowi fl;
 643         struct dst_entry *dst;
 644         int addr_type;
 645         int err;
 646
 647         if (addr_len < SIN6_LEN_RFC2133)
 648                 return -EINVAL;
 649
 650         if (usin->sin6_family != AF_INET6)
 651                 return(-EAFNOSUPPORT);
 652
 653         memset(&fl, 0, sizeof(fl));
 654
 655         if (np->sndflow) {
 656                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 657                 IP6_ECN_flow_init(fl.fl6_flowlabel);
 658                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
 659                         struct ip6_flowlabel *flowlabel;
 660                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 661                         if (flowlabel == NULL)
 662                                 return -EINVAL;
 663                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 664                         fl6_sock_release(flowlabel);
 665                 }
 666         }
 667
 668         /*
 669          *      connect() to INADDR_ANY means loopback (BSD'ism).
 670          */
 671
 672         if(ipv6_addr_any(&usin->sin6_addr))
 673                 usin->sin6_addr.s6_addr[15] = 0x1;
 674
 675         addr_type = ipv6_addr_type(&usin->sin6_addr);
 676
 677         if(addr_type & IPV6_ADDR_MULTICAST)
 678                 return -ENETUNREACH;
 679
 680         if (addr_type&IPV6_ADDR_LINKLOCAL) {
 681                 if (addr_len >= sizeof(struct sockaddr_in6) &&
 682                     usin->sin6_scope_id) {
 683                         /* If interface is set while binding, indices
 684                          * must coincide.
 685                          */
 686                         if (sk->sk_bound_dev_if &&
 687                             sk->sk_bound_dev_if != usin->sin6_scope_id)
 688                                 return -EINVAL;
 689
 690                         sk->sk_bound_dev_if = usin->sin6_scope_id;
 691                 }
 692
 693                 /* Connect to link-local address requires an interface */
 694                 if (!sk->sk_bound_dev_if)
 695                         return -EINVAL;
 696         }
 697
 698         if (tp->rx_opt.ts_recent_stamp &&
 699             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
 700                 tp->rx_opt.ts_recent = 0;
 701                 tp->rx_opt.ts_recent_stamp = 0;
 702                 tp->write_seq = 0;
 703         }
 704
 705         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
 706         np->flow_label = fl.fl6_flowlabel;
 707
 708         /*
 709          *      TCP over IPv4
 710          */
 711
 712         if (addr_type == IPV6_ADDR_MAPPED) {
 713                 u32 exthdrlen = tp->ext_header_len;
 714                 struct sockaddr_in sin;
 715
 716                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
 717
 718                 if (__ipv6_only_sock(sk))
 719                         return -ENETUNREACH;
 720
 721                 sin.sin_family = AF_INET;
 722                 sin.sin_port = usin->sin6_port;
 723                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 724
 725                 tp->af_specific = &ipv6_mapped;
 726                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
 727
 728                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 729
 730                 if (err) {
 731                         tp->ext_header_len = exthdrlen;
 732                         tp->af_specific = &ipv6_specific;
 733                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
 734                         goto failure;
 735                 } else {
 736                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
 737                                       inet->saddr);
 738                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
 739                                       inet->rcv_saddr);
 740                 }
 741
 742                 return err;
 743         }
 744
 745         if (!ipv6_addr_any(&np->rcv_saddr))
 746                 saddr = &np->rcv_saddr;
 747
 748         fl.proto = IPPROTO_TCP;
 749         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 750         ipv6_addr_copy(&fl.fl6_src,
 751                        (saddr ? saddr : &np->saddr));
 752         fl.oif = sk->sk_bound_dev_if;
 753         fl.fl_ip_dport = usin->sin6_port;
 754         fl.fl_ip_sport = inet->sport;
 755
 756         if (np->opt && np->opt->srcrt) {
 757                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
 758                 ipv6_addr_copy(&final, &fl.fl6_dst);
 759                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 760                 final_p = &final;
 761         }
 762
 763         err = ip6_dst_lookup(sk, &dst, &fl);
 764         if (err)
 765                 goto failure;
 766         if (final_p)
 767                 ipv6_addr_copy(&fl.fl6_dst, final_p);
 768
 769         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 770                 dst_release(dst);
 771                 goto failure;
 772         }
 773
 774         if (saddr == NULL) {
 775                 saddr = &fl.fl6_src;
 776                 ipv6_addr_copy(&np->rcv_saddr, saddr);
 777         }
 778
 779         /* set the source address */
 780         ipv6_addr_copy(&np->saddr, saddr);
 781         inet->rcv_saddr = LOOPBACK4_IPV6;
 782
 783         ip6_dst_store(sk, dst, NULL);
 784         sk->sk_route_caps = dst->dev->features &
 785                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 786
 787         tp->ext_header_len = 0;
 788         if (np->opt)
 789                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 790
 791         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 792
 793         inet->dport = usin->sin6_port;
 794
 795         tcp_set_state(sk, TCP_SYN_SENT);
 796         err = tcp_v6_hash_connect(sk);
 797         if (err)
 798                 goto late_failure;
 799
 800         if (!tp->write_seq)
 801                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 802                                                              np->daddr.s6_addr32,
 803                                                              inet->sport,
 804                                                              inet->dport);
 805
 806         err = tcp_connect(sk);
 807         if (err)
 808                 goto late_failure;
 809
 810         return 0;
 811
 812 late_failure:
 813         tcp_set_state(sk, TCP_CLOSE);
 814         __sk_dst_reset(sk);
 815 failure:
 816         inet->dport = 0;
 817         sk->sk_route_caps = 0;
 818         return err;
 819 }
 820
 821 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 822                 int type, int code, int offset, __u32 info)
 823 {
 824         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 825         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 826         struct ipv6_pinfo *np;
 827         struct sock *sk;
 828         int err;
 829         struct tcp_sock *tp;
 830         __u32 seq;
 831
 832         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 833
 834         if (sk == NULL) {
 835                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 836                 return;
 837         }
 838
 839         if (sk->sk_state == TCP_TIME_WAIT) {
 840                 tcp_tw_put((struct tcp_tw_bucket*)sk);
 841                 return;
 842         }
 843
 844         bh_lock_sock(sk);
 845         if (sock_owned_by_user(sk))
 846                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 847
 848         if (sk->sk_state == TCP_CLOSE)
 849                 goto out;
 850
 851         tp = tcp_sk(sk);
 852         seq = ntohl(th->seq);
 853         if (sk->sk_state != TCP_LISTEN &&
 854             !between(seq, tp->snd_una, tp->snd_nxt)) {
 855                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 856                 goto out;
 857         }
 858
 859         np = inet6_sk(sk);
 860
 861         if (type == ICMPV6_PKT_TOOBIG) {
 862                 struct dst_entry *dst = NULL;
 863
 864                 if (sock_owned_by_user(sk))
 865                         goto out;
 866                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 867                         goto out;
 868
 869                 /* icmp should have updated the destination cache entry */
 870                 dst = __sk_dst_check(sk, np->dst_cookie);
 871
 872                 if (dst == NULL) {
 873                         struct inet_sock *inet = inet_sk(sk);
 874                         struct flowi fl;
 875
 876                         /* BUGGG_FUTURE: Again, it is not clear how
 877                            to handle rthdr case. Ignore this complexity
 878                            for now.
 879                          */
 880                         memset(&fl, 0, sizeof(fl));
 881                         fl.proto = IPPROTO_TCP;
 882                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 883                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 884                         fl.oif = sk->sk_bound_dev_if;
 885                         fl.fl_ip_dport = inet->dport;
 886                         fl.fl_ip_sport = inet->sport;
 887
 888                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 889                                 sk->sk_err_soft = -err;
 890                                 goto out;
 891                         }
 892
 893                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 894                                 sk->sk_err_soft = -err;
 895                                 goto out;
 896                         }
 897
 898                 } else
 899                         dst_hold(dst);
 900
 901                 if (tp->pmtu_cookie > dst_mtu(dst)) {
 902                         tcp_sync_mss(sk, dst_mtu(dst));
 903                         tcp_simple_retransmit(sk);
 904                 } /* else let the usual retransmit timer handle it */
 905                 dst_release(dst);
 906                 goto out;
 907         }
 908
 909         icmpv6_err_convert(type, code, &err);
 910
 911         /* Might be for an request_sock */
 912         switch (sk->sk_state) {
 913                 struct request_sock *req, **prev;
 914         case TCP_LISTEN:
 915                 if (sock_owned_by_user(sk))
 916                         goto out;
 917
 918                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
 919                                         &hdr->saddr, tcp_v6_iif(skb));
 920                 if (!req)
 921                         goto out;
 922
 923                 /* ICMPs are not backlogged, hence we cannot get
 924                  * an established socket here.
 925                  */
 926                 BUG_TRAP(req->sk == NULL);
 927
 928                 if (seq != tcp_rsk(req)->snt_isn) {
 929                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 930                         goto out;
 931                 }
 932
 933                 tcp_synq_drop(sk, req, prev);
 934                 goto out;
 935
 936         case TCP_SYN_SENT:
 937         case TCP_SYN_RECV:  /* Cannot happen.
 938                                It can, it SYNs are crossed. --ANK */
 939                 if (!sock_owned_by_user(sk)) {
 940                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 941                         sk->sk_err = err;
 942                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
 943
 944                         tcp_done(sk);
 945                 } else
 946                         sk->sk_err_soft = err;
 947                 goto out;
 948         }
 949
 950         if (!sock_owned_by_user(sk) && np->recverr) {
 951                 sk->sk_err = err;
 952                 sk->sk_error_report(sk);
 953         } else
 954                 sk->sk_err_soft = err;
 955
 956 out:
 957         bh_unlock_sock(sk);
 958         sock_put(sk);
 959 }
 960
 961
 962 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 963                               struct dst_entry *dst)
 964 {
 965         struct tcp6_request_sock *treq = tcp6_rsk(req);
 966         struct ipv6_pinfo *np = inet6_sk(sk);
 967         struct sk_buff * skb;
 968         struct ipv6_txoptions *opt = NULL;
 969         struct in6_addr * final_p = NULL, final;
 970         struct flowi fl;
 971         int err = -1;
 972
 973         memset(&fl, 0, sizeof(fl));
 974         fl.proto = IPPROTO_TCP;
 975         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 976         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 977         fl.fl6_flowlabel = 0;
 978         fl.oif = treq->iif;
 979         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 980         fl.fl_ip_sport = inet_sk(sk)->sport;
 981
 982         if (dst == NULL) {
 983                 opt = np->opt;
 984                 if (opt == NULL &&
 985                     np->rxopt.bits.srcrt == 2 &&
 986                     treq->pktopts) {
 987                         struct sk_buff *pktopts = treq->pktopts;
 988                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 989                         if (rxopt->srcrt)
 990                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
 991                 }
 992
 993                 if (opt && opt->srcrt) {
 994                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 995                         ipv6_addr_copy(&final, &fl.fl6_dst);
 996                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 997                         final_p = &final;
 998                 }
 999
1000                 err = ip6_dst_lookup(sk, &dst, &fl);
1001                 if (err)
1002                         goto done;
1003                 if (final_p)
1004                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1005                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1006                         goto done;
1007         }
1008
1009         skb = tcp_make_synack(sk, dst, req);
1010         if (skb) {
1011                 struct tcphdr *th = skb->h.th;
1012
1013                 th->check = tcp_v6_check(th, skb->len,
1014                                          &treq->loc_addr, &treq->rmt_addr,
1015                                          csum_partial((char *)th, skb->len, skb->csum));
1016
1017                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1018                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1019                 if (err == NET_XMIT_CN)
1020                         err = 0;
1021         }
1022
1023 done:
1024         dst_release(dst);
1025         if (opt && opt != np->opt)
1026                 sock_kfree_s(sk, opt, opt->tot_len);
1027         return err;
1028 }
1029
1030 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1031 {
1032         if (tcp6_rsk(req)->pktopts)
1033                 kfree_skb(tcp6_rsk(req)->pktopts);
1034 }
1035
1036 static struct request_sock_ops tcp6_request_sock_ops = {
1037         .family         =       AF_INET6,
1038         .obj_size       =       sizeof(struct tcp6_request_sock),
1039         .rtx_syn_ack    =       tcp_v6_send_synack,
1040         .send_ack       =       tcp_v6_reqsk_send_ack,
1041         .destructor     =       tcp_v6_reqsk_destructor,
1042         .send_reset     =       tcp_v6_send_reset
1043 };
1044
1045 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1046 {
1047         struct ipv6_pinfo *np = inet6_sk(sk);
1048         struct inet6_skb_parm *opt = IP6CB(skb);
1049
1050         if (np->rxopt.all) {
1051                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1052                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1053                      np->rxopt.bits.rxflow) ||
1054                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1055                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1056                         return 1;
1057         }
1058         return 0;
1059 }
1060
1061
1062 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1063                               struct sk_buff *skb)
1064 {
1065         struct ipv6_pinfo *np = inet6_sk(sk);
1066
1067         if (skb->ip_summed == CHECKSUM_HW) {
1068                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1069                 skb->csum = offsetof(struct tcphdr, check);
1070         } else {
1071                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1072                                             csum_partial((char *)th, th->doff<<2,
1073                                                          skb->csum));
1074         }
1075 }
1076
1077
1078 static void tcp_v6_send_reset(struct sk_buff *skb)
1079 {
1080         struct tcphdr *th = skb->h.th, *t1;
1081         struct sk_buff *buff;
1082         struct flowi fl;
1083
1084         if (th->rst)
1085                 return;
1086
1087         if (!ipv6_unicast_destination(skb))
1088                 return;
1089
1090         /*
1091          * We need to grab some memory, and put together an RST,
1092          * and then put it into the queue to be sent.
1093          */
1094
1095         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1096                          GFP_ATOMIC);
1097         if (buff == NULL)
1098                 return;
1099
1100         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1101
1102         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1103
1104         /* Swap the send and the receive. */
1105         memset(t1, 0, sizeof(*t1));
1106         t1->dest = th->source;
1107         t1->source = th->dest;
1108         t1->doff = sizeof(*t1)/4;
1109         t1->rst = 1;
1110
1111         if(th->ack) {
1112                 t1->seq = th->ack_seq;
1113         } else {
1114                 t1->ack = 1;
1115                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1116                                     + skb->len - (th->doff<<2));
1117         }
1118
1119         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1120
1121         memset(&fl, 0, sizeof(fl));
1122         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1123         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1124
1125         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1126                                     sizeof(*t1), IPPROTO_TCP,
1127                                     buff->csum);
1128
1129         fl.proto = IPPROTO_TCP;
1130         fl.oif = tcp_v6_iif(skb);
1131         fl.fl_ip_dport = t1->dest;
1132         fl.fl_ip_sport = t1->source;
1133
1134         /* sk = NULL, but it is safe for now. RST socket required. */
1135         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1136
1137                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1138                         dst_release(buff->dst);
1139                         return;
1140                 }
1141
1142                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1143                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1144                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1145                 return;
1146         }
1147
1148         kfree_skb(buff);
1149 }
1150
1151 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1152 {
1153         struct tcphdr *th = skb->h.th, *t1;
1154         struct sk_buff *buff;
1155         struct flowi fl;
1156         int tot_len = sizeof(struct tcphdr);
1157
1158         if (ts)
1159                 tot_len += 3*4;
1160
1161         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1162                          GFP_ATOMIC);
1163         if (buff == NULL)
1164                 return;
1165
1166         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1167
1168         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1169
1170         /* Swap the send and the receive. */
1171         memset(t1, 0, sizeof(*t1));
1172         t1->dest = th->source;
1173         t1->source = th->dest;
1174         t1->doff = tot_len/4;
1175         t1->seq = htonl(seq);
1176         t1->ack_seq = htonl(ack);
1177         t1->ack = 1;
1178         t1->window = htons(win);
1179
1180         if (ts) {
1181                 u32 *ptr = (u32*)(t1 + 1);
1182                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1183                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1184                 *ptr++ = htonl(tcp_time_stamp);
1185                 *ptr = htonl(ts);
1186         }
1187
1188         buff->csum = csum_partial((char *)t1, tot_len, 0);
1189
1190         memset(&fl, 0, sizeof(fl));
1191         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1192         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1193
1194         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1195                                     tot_len, IPPROTO_TCP,
1196                                     buff->csum);
1197
1198         fl.proto = IPPROTO_TCP;
1199         fl.oif = tcp_v6_iif(skb);
1200         fl.fl_ip_dport = t1->dest;
1201         fl.fl_ip_sport = t1->source;
1202
1203         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1204                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1205                         dst_release(buff->dst);
1206                         return;
1207                 }
1208                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1209                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1210                 return;
1211         }
1212
1213         kfree_skb(buff);
1214 }
1215
1216 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1217 {
1218         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1219
1220         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1221                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1222
1223         tcp_tw_put(tw);
1224 }
1225
1226 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1227 {
1228         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1229 }
1230
1231
1232 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1233 {
1234         struct request_sock *req, **prev;
1235         struct tcphdr *th = skb->h.th;
1236         struct tcp_sock *tp = tcp_sk(sk);
1237         struct sock *nsk;
1238
1239         /* Find possible connection requests. */
1240         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1241                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1242         if (req)
1243                 return tcp_check_req(sk, skb, req, prev);
1244
1245         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1246                                           th->source,
1247                                           &skb->nh.ipv6h->daddr,
1248                                           ntohs(th->dest),
1249                                           tcp_v6_iif(skb));
1250
1251         if (nsk) {
1252                 if (nsk->sk_state != TCP_TIME_WAIT) {
1253                         bh_lock_sock(nsk);
1254                         return nsk;
1255                 }
1256                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1257                 return NULL;
1258         }
1259
1260 #if 0 /*def CONFIG_SYN_COOKIES*/
1261         if (!th->rst && !th->syn && th->ack)
1262                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1263 #endif
1264         return sk;
1265 }
1266
1267 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1268 {
1269         struct tcp_sock *tp = tcp_sk(sk);
1270         struct listen_sock *lopt = tp->accept_queue.listen_opt;
1271         u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1272
1273         reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1274         tcp_synq_added(sk);
1275 }
1276
1277
1278 /* FIXME: this is substantially similar to the ipv4 code.
1279  * Can some kind of merge be done? -- erics
1280  */
1281 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1282 {
1283         struct tcp6_request_sock *treq;
1284         struct ipv6_pinfo *np = inet6_sk(sk);
1285         struct tcp_options_received tmp_opt;
1286         struct tcp_sock *tp = tcp_sk(sk);
1287         struct request_sock *req = NULL;
1288         __u32 isn = TCP_SKB_CB(skb)->when;
1289
1290         if (skb->protocol == htons(ETH_P_IP))
1291                 return tcp_v4_conn_request(sk, skb);
1292
1293         if (!ipv6_unicast_destination(skb))
1294                 goto drop;
1295
1296         /*
1297          *      There are no SYN attacks on IPv6, yet...
1298          */
1299         if (tcp_synq_is_full(sk) && !isn) {
1300                 if (net_ratelimit())
1301                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1302                 goto drop;
1303         }
1304
1305         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1306                 goto drop;
1307
1308         req = reqsk_alloc(&tcp6_request_sock_ops);
1309         if (req == NULL)
1310                 goto drop;
1311
1312         tcp_clear_options(&tmp_opt);
1313         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1314         tmp_opt.user_mss = tp->rx_opt.user_mss;
1315
1316         tcp_parse_options(skb, &tmp_opt, 0);
1317
1318         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1319         tcp_openreq_init(req, &tmp_opt, skb);
1320
1321         treq = tcp6_rsk(req);
1322         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1323         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1324         TCP_ECN_create_request(req, skb->h.th);
1325         treq->pktopts = NULL;
1326         if (ipv6_opt_accepted(sk, skb) ||
1327             np->rxopt.bits.rxinfo ||
1328             np->rxopt.bits.rxhlim) {
1329                 atomic_inc(&skb->users);
1330                 treq->pktopts = skb;
1331         }
1332         treq->iif = sk->sk_bound_dev_if;
1333
1334         /* So that link locals have meaning */
1335         if (!sk->sk_bound_dev_if &&
1336             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1337                 treq->iif = tcp_v6_iif(skb);
1338
1339         if (isn == 0)
1340                 isn = tcp_v6_init_sequence(sk,skb);
1341
1342         tcp_rsk(req)->snt_isn = isn;
1343
1344         if (tcp_v6_send_synack(sk, req, NULL))
1345                 goto drop;
1346
1347         tcp_v6_synq_add(sk, req);
1348
1349         return 0;
1350
1351 drop:
1352         if (req)
1353                 reqsk_free(req);
1354
1355         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1356         return 0; /* don't send reset */
1357 }
1358
1359 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1360                                           struct request_sock *req,
1361                                           struct dst_entry *dst)
1362 {
1363         struct tcp6_request_sock *treq = tcp6_rsk(req);
1364         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1365         struct tcp6_sock *newtcp6sk;
1366         struct inet_sock *newinet;
1367         struct tcp_sock *newtp;
1368         struct sock *newsk;
1369         struct ipv6_txoptions *opt;
1370
1371         if (skb->protocol == htons(ETH_P_IP)) {
1372                 /*
1373                  *      v6 mapped
1374                  */
1375
1376                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1377
1378                 if (newsk == NULL)
1379                         return NULL;
1380
1381                 newtcp6sk = (struct tcp6_sock *)newsk;
1382                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1383
1384                 newinet = inet_sk(newsk);
1385                 newnp = inet6_sk(newsk);
1386                 newtp = tcp_sk(newsk);
1387
1388                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1389
1390                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1391                               newinet->daddr);
1392
1393                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1394                               newinet->saddr);
1395
1396                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1397
1398                 newtp->af_specific = &ipv6_mapped;
1399                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1400                 newnp->pktoptions  = NULL;
1401                 newnp->opt         = NULL;
1402                 newnp->mcast_oif   = tcp_v6_iif(skb);
1403                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1404
1405                 /* Charge newly allocated IPv6 socket. Though it is mapped,
1406                  * it is IPv6 yet.
1407                  */
1408 #ifdef INET_REFCNT_DEBUG
1409                 atomic_inc(&inet6_sock_nr);
1410 #endif
1411
1412                 /* It is tricky place. Until this moment IPv4 tcp
1413                    worked with IPv6 af_tcp.af_specific.
1414                    Sync it now.
1415                  */
1416                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1417
1418                 return newsk;
1419         }
1420
1421         opt = np->opt;
1422
1423         if (sk_acceptq_is_full(sk))
1424                 goto out_overflow;
1425
1426         if (np->rxopt.bits.srcrt == 2 &&
1427             opt == NULL && treq->pktopts) {
1428                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1429                 if (rxopt->srcrt)
1430                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1431         }
1432
1433         if (dst == NULL) {
1434                 struct in6_addr *final_p = NULL, final;
1435                 struct flowi fl;
1436
1437                 memset(&fl, 0, sizeof(fl));
1438                 fl.proto = IPPROTO_TCP;
1439                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1440                 if (opt && opt->srcrt) {
1441                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1442                         ipv6_addr_copy(&final, &fl.fl6_dst);
1443                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1444                         final_p = &final;
1445                 }
1446                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1447                 fl.oif = sk->sk_bound_dev_if;
1448                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1449                 fl.fl_ip_sport = inet_sk(sk)->sport;
1450
1451                 if (ip6_dst_lookup(sk, &dst, &fl))
1452                         goto out;
1453
1454                 if (final_p)
1455                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1456
1457                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1458                         goto out;
1459         }
1460
1461         newsk = tcp_create_openreq_child(sk, req, skb);
1462         if (newsk == NULL)
1463                 goto out;
1464
1465         /* Charge newly allocated IPv6 socket */
1466 #ifdef INET_REFCNT_DEBUG
1467         atomic_inc(&inet6_sock_nr);
1468 #endif
1469
1470         ip6_dst_store(newsk, dst, NULL);
1471         newsk->sk_route_caps = dst->dev->features &
1472                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1473
1474         newtcp6sk = (struct tcp6_sock *)newsk;
1475         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1476
1477         newtp = tcp_sk(newsk);
1478         newinet = inet_sk(newsk);
1479         newnp = inet6_sk(newsk);
1480
1481         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1482
1483         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1484         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1485         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1486         newsk->sk_bound_dev_if = treq->iif;
1487
1488         /* Now IPv6 options...
1489
1490            First: no IPv4 options.
1491          */
1492         newinet->opt = NULL;
1493
1494         /* Clone RX bits */
1495         newnp->rxopt.all = np->rxopt.all;
1496
1497         /* Clone pktoptions received with SYN */
1498         newnp->pktoptions = NULL;
1499         if (treq->pktopts != NULL) {
1500                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1501                 kfree_skb(treq->pktopts);
1502                 treq->pktopts = NULL;
1503                 if (newnp->pktoptions)
1504                         skb_set_owner_r(newnp->pktoptions, newsk);
1505         }
1506         newnp->opt        = NULL;
1507         newnp->mcast_oif  = tcp_v6_iif(skb);
1508         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1509
1510         /* Clone native IPv6 options from listening socket (if any)
1511
1512            Yes, keeping reference count would be much more clever,
1513            but we make one more one thing there: reattach optmem
1514            to newsk.
1515          */
1516         if (opt) {
1517                 newnp->opt = ipv6_dup_options(newsk, opt);
1518                 if (opt != np->opt)
1519                         sock_kfree_s(sk, opt, opt->tot_len);
1520         }
1521
1522         newtp->ext_header_len = 0;
1523         if (newnp->opt)
1524                 newtp->ext_header_len = newnp->opt->opt_nflen +
1525                                         newnp->opt->opt_flen;
1526
1527         tcp_sync_mss(newsk, dst_mtu(dst));
1528         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1529         tcp_initialize_rcv_mss(newsk);
1530
1531         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1532
1533         __tcp_v6_hash(newsk);
1534         tcp_inherit_port(sk, newsk);
1535
1536         return newsk;
1537
1538 out_overflow:
1539         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1540 out:
1541         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1542         if (opt && opt != np->opt)
1543                 sock_kfree_s(sk, opt, opt->tot_len);
1544         dst_release(dst);
1545         return NULL;
1546 }
1547
1548 static int tcp_v6_checksum_init(struct sk_buff *skb)
1549 {
1550         if (skb->ip_summed == CHECKSUM_HW) {
1551                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1552                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1553                                   &skb->nh.ipv6h->daddr,skb->csum))
1554                         return 0;
1555                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1556         }
1557         if (skb->len <= 76) {
1558                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1559                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1560                         return -1;
1561                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1562         } else {
1563                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564                                           &skb->nh.ipv6h->daddr,0);
1565         }
1566         return 0;
1567 }
1568
1569 /* The socket must have it's spinlock held when we get
1570  * here.
1571  *
1572  * We have a potential double-lock case here, so even when
1573  * doing backlog processing we use the BH locking scheme.
1574  * This is because we cannot sleep with the original spinlock
1575  * held.
1576  */
1577 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1578 {
1579         struct ipv6_pinfo *np = inet6_sk(sk);
1580         struct tcp_sock *tp;
1581         struct sk_buff *opt_skb = NULL;
1582
1583         /* Imagine: socket is IPv6. IPv4 packet arrives,
1584            goes to IPv4 receive handler and backlogged.
1585            From backlog it always goes here. Kerboom...
1586            Fortunately, tcp_rcv_established and rcv_established
1587            handle them correctly, but it is not case with
1588            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1589          */
1590
1591         if (skb->protocol == htons(ETH_P_IP))
1592                 return tcp_v4_do_rcv(sk, skb);
1593
1594         if (sk_filter(sk, skb, 0))
1595                 goto discard;
1596
1597         /*
1598          *      socket locking is here for SMP purposes as backlog rcv
1599          *      is currently called with bh processing disabled.
1600          */
1601
1602         /* Do Stevens' IPV6_PKTOPTIONS.
1603
1604            Yes, guys, it is the only place in our code, where we
1605            may make it not affecting IPv4.
1606            The rest of code is protocol independent,
1607            and I do not like idea to uglify IPv4.
1608
1609            Actually, all the idea behind IPV6_PKTOPTIONS
1610            looks not very well thought. For now we latch
1611            options, received in the last packet, enqueued
1612            by tcp. Feel free to propose better solution.
1613                                                --ANK (980728)
1614          */
1615         if (np->rxopt.all)
1616                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1617
1618         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1619                 TCP_CHECK_TIMER(sk);
1620                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1621                         goto reset;
1622                 TCP_CHECK_TIMER(sk);
1623                 if (opt_skb)
1624                         goto ipv6_pktoptions;
1625                 return 0;
1626         }
1627
1628         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1629                 goto csum_err;
1630
1631         if (sk->sk_state == TCP_LISTEN) {
1632                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1633                 if (!nsk)
1634                         goto discard;
1635
1636                 /*
1637                  * Queue it on the new socket if the new socket is active,
1638                  * otherwise we just shortcircuit this and continue with
1639                  * the new socket..
1640                  */
1641                 if(nsk != sk) {
1642                         if (tcp_child_process(sk, nsk, skb))
1643                                 goto reset;
1644                         if (opt_skb)
1645                                 __kfree_skb(opt_skb);
1646                         return 0;
1647                 }
1648         }
1649
1650         TCP_CHECK_TIMER(sk);
1651         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1652                 goto reset;
1653         TCP_CHECK_TIMER(sk);
1654         if (opt_skb)
1655                 goto ipv6_pktoptions;
1656         return 0;
1657
1658 reset:
1659         tcp_v6_send_reset(skb);
1660 discard:
1661         if (opt_skb)
1662                 __kfree_skb(opt_skb);
1663         kfree_skb(skb);
1664         return 0;
1665 csum_err:
1666         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1667         goto discard;
1668
1669
1670 ipv6_pktoptions:
1671         /* Do you ask, what is it?
1672
1673            1. skb was enqueued by tcp.
1674            2. skb is added to tail of read queue, rather than out of order.
1675            3. socket is not in passive state.
1676            4. Finally, it really contains options, which user wants to receive.
1677          */
1678         tp = tcp_sk(sk);
1679         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1680             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1681                 if (np->rxopt.bits.rxinfo)
1682                         np->mcast_oif = tcp_v6_iif(opt_skb);
1683                 if (np->rxopt.bits.rxhlim)
1684                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1685                 if (ipv6_opt_accepted(sk, opt_skb)) {
1686                         skb_set_owner_r(opt_skb, sk);
1687                         opt_skb = xchg(&np->pktoptions, opt_skb);
1688                 } else {
1689                         __kfree_skb(opt_skb);
1690                         opt_skb = xchg(&np->pktoptions, NULL);
1691                 }
1692         }
1693
1694         if (opt_skb)
1695                 kfree_skb(opt_skb);
1696         return 0;
1697 }
1698
1699 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1700 {
1701         struct sk_buff *skb = *pskb;
1702         struct tcphdr *th;
1703         struct sock *sk;
1704         int ret;
1705
1706         if (skb->pkt_type != PACKET_HOST)
1707                 goto discard_it;
1708
1709         /*
1710          *      Count it even if it's bad.
1711          */
1712         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1713
1714         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1715                 goto discard_it;
1716
1717         th = skb->h.th;
1718
1719         if (th->doff < sizeof(struct tcphdr)/4)
1720                 goto bad_packet;
1721         if (!pskb_may_pull(skb, th->doff*4))
1722                 goto discard_it;
1723
1724         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1725              tcp_v6_checksum_init(skb) < 0))
1726                 goto bad_packet;
1727
1728         th = skb->h.th;
1729         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1730         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1731                                     skb->len - th->doff*4);
1732         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1733         TCP_SKB_CB(skb)->when = 0;
1734         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1735         TCP_SKB_CB(skb)->sacked = 0;
1736
1737         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1738                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1739
1740         if (!sk)
1741                 goto no_tcp_socket;
1742
1743 process:
1744         if (sk->sk_state == TCP_TIME_WAIT)
1745                 goto do_time_wait;
1746
1747         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1748                 goto discard_and_relse;
1749
1750         if (sk_filter(sk, skb, 0))
1751                 goto discard_and_relse;
1752
1753         skb->dev = NULL;
1754
1755         bh_lock_sock(sk);
1756         ret = 0;
1757         if (!sock_owned_by_user(sk)) {
1758                 if (!tcp_prequeue(sk, skb))
1759                         ret = tcp_v6_do_rcv(sk, skb);
1760         } else
1761                 sk_add_backlog(sk, skb);
1762         bh_unlock_sock(sk);
1763
1764         sock_put(sk);
1765         return ret ? -1 : 0;
1766
1767 no_tcp_socket:
1768         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1769                 goto discard_it;
1770
1771         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1772 bad_packet:
1773                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1774         } else {
1775                 tcp_v6_send_reset(skb);
1776         }
1777
1778 discard_it:
1779
1780         /*
1781          *      Discard frame
1782          */
1783
1784         kfree_skb(skb);
1785         return 0;
1786
1787 discard_and_relse:
1788         sock_put(sk);
1789         goto discard_it;
1790
1791 do_time_wait:
1792         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1793                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1794                 goto discard_it;
1795         }
1796
1797         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1798                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1799                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1800                 goto discard_it;
1801         }
1802
1803         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1804                                           skb, th, skb->len)) {
1805         case TCP_TW_SYN:
1806         {
1807                 struct sock *sk2;
1808
1809                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1810                 if (sk2 != NULL) {
1811                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1812                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1813                         sk = sk2;
1814                         goto process;
1815                 }
1816                 /* Fall through to ACK */
1817         }
1818         case TCP_TW_ACK:
1819                 tcp_v6_timewait_ack(sk, skb);
1820                 break;
1821         case TCP_TW_RST:
1822                 goto no_tcp_socket;
1823         case TCP_TW_SUCCESS:;
1824         }
1825         goto discard_it;
1826 }
1827
1828 static int tcp_v6_rebuild_header(struct sock *sk)
1829 {
1830         int err;
1831         struct dst_entry *dst;
1832         struct ipv6_pinfo *np = inet6_sk(sk);
1833
1834         dst = __sk_dst_check(sk, np->dst_cookie);
1835
1836         if (dst == NULL) {
1837                 struct inet_sock *inet = inet_sk(sk);
1838                 struct in6_addr *final_p = NULL, final;
1839                 struct flowi fl;
1840
1841                 memset(&fl, 0, sizeof(fl));
1842                 fl.proto = IPPROTO_TCP;
1843                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1844                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1845                 fl.fl6_flowlabel = np->flow_label;
1846                 fl.oif = sk->sk_bound_dev_if;
1847                 fl.fl_ip_dport = inet->dport;
1848                 fl.fl_ip_sport = inet->sport;
1849
1850                 if (np->opt && np->opt->srcrt) {
1851                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1852                         ipv6_addr_copy(&final, &fl.fl6_dst);
1853                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1854                         final_p = &final;
1855                 }
1856
1857                 err = ip6_dst_lookup(sk, &dst, &fl);
1858                 if (err) {
1859                         sk->sk_route_caps = 0;
1860                         return err;
1861                 }
1862                 if (final_p)
1863                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1864
1865                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1866                         sk->sk_err_soft = -err;
1867                         dst_release(dst);
1868                         return err;
1869                 }
1870
1871                 ip6_dst_store(sk, dst, NULL);
1872                 sk->sk_route_caps = dst->dev->features &
1873                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1874         }
1875
1876         return 0;
1877 }
1878
1879 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1880 {
1881         struct sock *sk = skb->sk;
1882         struct inet_sock *inet = inet_sk(sk);
1883         struct ipv6_pinfo *np = inet6_sk(sk);
1884         struct flowi fl;
1885         struct dst_entry *dst;
1886         struct in6_addr *final_p = NULL, final;
1887
1888         memset(&fl, 0, sizeof(fl));
1889         fl.proto = IPPROTO_TCP;
1890         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1891         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1892         fl.fl6_flowlabel = np->flow_label;
1893         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1894         fl.oif = sk->sk_bound_dev_if;
1895         fl.fl_ip_sport = inet->sport;
1896         fl.fl_ip_dport = inet->dport;
1897
1898         if (np->opt && np->opt->srcrt) {
1899                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1900                 ipv6_addr_copy(&final, &fl.fl6_dst);
1901                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1902                 final_p = &final;
1903         }
1904
1905         dst = __sk_dst_check(sk, np->dst_cookie);
1906
1907         if (dst == NULL) {
1908                 int err = ip6_dst_lookup(sk, &dst, &fl);
1909
1910                 if (err) {
1911                         sk->sk_err_soft = -err;
1912                         return err;
1913                 }
1914
1915                 if (final_p)
1916                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1917
1918                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1919                         sk->sk_route_caps = 0;
1920                         dst_release(dst);
1921                         return err;
1922                 }
1923
1924                 ip6_dst_store(sk, dst, NULL);
1925                 sk->sk_route_caps = dst->dev->features &
1926                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1927         }
1928
1929         skb->dst = dst_clone(dst);
1930
1931         /* Restore final destination back after routing done */
1932         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1933
1934         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1935 }
1936
1937 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1938 {
1939         struct ipv6_pinfo *np = inet6_sk(sk);
1940         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1941
1942         sin6->sin6_family = AF_INET6;
1943         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1944         sin6->sin6_port = inet_sk(sk)->dport;
1945         /* We do not store received flowlabel for TCP */
1946         sin6->sin6_flowinfo = 0;
1947         sin6->sin6_scope_id = 0;
1948         if (sk->sk_bound_dev_if &&
1949             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1950                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1951 }
1952
1953 static int tcp_v6_remember_stamp(struct sock *sk)
1954 {
1955         /* Alas, not yet... */
1956         return 0;
1957 }
1958
1959 static struct tcp_func ipv6_specific = {
1960         .queue_xmit     =       tcp_v6_xmit,
1961         .send_check     =       tcp_v6_send_check,
1962         .rebuild_header =       tcp_v6_rebuild_header,
1963         .conn_request   =       tcp_v6_conn_request,
1964         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1965         .remember_stamp =       tcp_v6_remember_stamp,
1966         .net_header_len =       sizeof(struct ipv6hdr),
1967
1968         .setsockopt     =       ipv6_setsockopt,
1969         .getsockopt     =       ipv6_getsockopt,
1970         .addr2sockaddr  =       v6_addr2sockaddr,
1971         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1972 };
1973
1974 /*
1975  *      TCP over IPv4 via INET6 API
1976  */
1977
1978 static struct tcp_func ipv6_mapped = {
1979         .queue_xmit     =       ip_queue_xmit,
1980         .send_check     =       tcp_v4_send_check,
1981         .rebuild_header =       tcp_v4_rebuild_header,
1982         .conn_request   =       tcp_v6_conn_request,
1983         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1984         .remember_stamp =       tcp_v4_remember_stamp,
1985         .net_header_len =       sizeof(struct iphdr),
1986
1987         .setsockopt     =       ipv6_setsockopt,
1988         .getsockopt     =       ipv6_getsockopt,
1989         .addr2sockaddr  =       v6_addr2sockaddr,
1990         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1991 };
1992
1993
1994
1995 /* NOTE: A lot of things set to zero explicitly by call to
1996  *       sk_alloc() so need not be done here.
1997  */
1998 static int tcp_v6_init_sock(struct sock *sk)
1999 {
2000         struct tcp_sock *tp = tcp_sk(sk);
2001
2002         skb_queue_head_init(&tp->out_of_order_queue);
2003         tcp_init_xmit_timers(sk);
2004         tcp_prequeue_init(tp);
2005
2006         tp->rto  = TCP_TIMEOUT_INIT;
2007         tp->mdev = TCP_TIMEOUT_INIT;
2008
2009         /* So many TCP implementations out there (incorrectly) count the
2010          * initial SYN frame in their delayed-ACK and congestion control
2011          * algorithms that we must have the following bandaid to talk
2012          * efficiently to them.  -DaveM
2013          */
2014         tp->snd_cwnd = 2;
2015
2016         /* See draft-stevens-tcpca-spec-01 for discussion of the
2017          * initialization of these values.
2018          */
2019         tp->snd_ssthresh = 0x7fffffff;
2020         tp->snd_cwnd_clamp = ~0;
2021         tp->mss_cache = 536;
2022
2023         tp->reordering = sysctl_tcp_reordering;
2024
2025         sk->sk_state = TCP_CLOSE;
2026
2027         tp->af_specific = &ipv6_specific;
2028         tp->ca_ops = &tcp_init_congestion_ops;
2029         sk->sk_write_space = sk_stream_write_space;
2030         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2031
2032         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2033         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2034
2035         atomic_inc(&tcp_sockets_allocated);
2036
2037         return 0;
2038 }
2039
2040 static int tcp_v6_destroy_sock(struct sock *sk)
2041 {
2042         extern int tcp_v4_destroy_sock(struct sock *sk);
2043
2044         tcp_v4_destroy_sock(sk);
2045         return inet6_destroy_sock(sk);
2046 }
2047
2048 /* Proc filesystem TCPv6 sock list dumping. */
2049 static void get_openreq6(struct seq_file *seq,
2050                          struct sock *sk, struct request_sock *req, int i, int uid)
2051 {
2052         struct in6_addr *dest, *src;
2053         int ttd = req->expires - jiffies;
2054
2055         if (ttd < 0)
2056                 ttd = 0;
2057
2058         src = &tcp6_rsk(req)->loc_addr;
2059         dest = &tcp6_rsk(req)->rmt_addr;
2060         seq_printf(seq,
2061                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2063                    i,
2064                    src->s6_addr32[0], src->s6_addr32[1],
2065                    src->s6_addr32[2], src->s6_addr32[3],
2066                    ntohs(inet_sk(sk)->sport),
2067                    dest->s6_addr32[0], dest->s6_addr32[1],
2068                    dest->s6_addr32[2], dest->s6_addr32[3],
2069                    ntohs(inet_rsk(req)->rmt_port),
2070                    TCP_SYN_RECV,
2071                    0,0, /* could print option size, but that is af dependent. */
2072                    1,   /* timers active (only the expire timer) */
2073                    jiffies_to_clock_t(ttd),
2074                    req->retrans,
2075                    uid,
2076                    0,  /* non standard timer */
2077                    0, /* open_requests have no inode */
2078                    0, req);
2079 }
2080
2081 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2082 {
2083         struct in6_addr *dest, *src;
2084         __u16 destp, srcp;
2085         int timer_active;
2086         unsigned long timer_expires;
2087         struct inet_sock *inet = inet_sk(sp);
2088         struct tcp_sock *tp = tcp_sk(sp);
2089         struct ipv6_pinfo *np = inet6_sk(sp);
2090
2091         dest  = &np->daddr;
2092         src   = &np->rcv_saddr;
2093         destp = ntohs(inet->dport);
2094         srcp  = ntohs(inet->sport);
2095         if (tp->pending == TCP_TIME_RETRANS) {
2096                 timer_active    = 1;
2097                 timer_expires   = tp->timeout;
2098         } else if (tp->pending == TCP_TIME_PROBE0) {
2099                 timer_active    = 4;
2100                 timer_expires   = tp->timeout;
2101         } else if (timer_pending(&sp->sk_timer)) {
2102                 timer_active    = 2;
2103                 timer_expires   = sp->sk_timer.expires;
2104         } else {
2105                 timer_active    = 0;
2106                 timer_expires = jiffies;
2107         }
2108
2109         seq_printf(seq,
2110                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2111                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2112                    i,
2113                    src->s6_addr32[0], src->s6_addr32[1],
2114                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2115                    dest->s6_addr32[0], dest->s6_addr32[1],
2116                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2117                    sp->sk_state,
2118                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2119                    timer_active,
2120                    jiffies_to_clock_t(timer_expires - jiffies),
2121                    tp->retransmits,
2122                    sock_i_uid(sp),
2123                    tp->probes_out,
2124                    sock_i_ino(sp),
2125                    atomic_read(&sp->sk_refcnt), sp,
2126                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2127                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2128                    );
2129 }
2130
2131 static void get_timewait6_sock(struct seq_file *seq,
2132                                struct tcp_tw_bucket *tw, int i)
2133 {
2134         struct in6_addr *dest, *src;
2135         __u16 destp, srcp;
2136         int ttd = tw->tw_ttd - jiffies;
2137
2138         if (ttd < 0)
2139                 ttd = 0;
2140
2141         dest  = &tw->tw_v6_daddr;
2142         src   = &tw->tw_v6_rcv_saddr;
2143         destp = ntohs(tw->tw_dport);
2144         srcp  = ntohs(tw->tw_sport);
2145
2146         seq_printf(seq,
2147                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2148                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2149                    i,
2150                    src->s6_addr32[0], src->s6_addr32[1],
2151                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2152                    dest->s6_addr32[0], dest->s6_addr32[1],
2153                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2154                    tw->tw_substate, 0, 0,
2155                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2156                    atomic_read(&tw->tw_refcnt), tw);
2157 }
2158
2159 #ifdef CONFIG_PROC_FS
2160 static int tcp6_seq_show(struct seq_file *seq, void *v)
2161 {
2162         struct tcp_iter_state *st;
2163
2164         if (v == SEQ_START_TOKEN) {
2165                 seq_puts(seq,
2166                          "  sl  "
2167                          "local_address                         "
2168                          "remote_address                        "
2169                          "st tx_queue rx_queue tr tm->when retrnsmt"
2170                          "   uid  timeout inode\n");
2171                 goto out;
2172         }
2173         st = seq->private;
2174
2175         switch (st->state) {
2176         case TCP_SEQ_STATE_LISTENING:
2177         case TCP_SEQ_STATE_ESTABLISHED:
2178                 get_tcp6_sock(seq, v, st->num);
2179                 break;
2180         case TCP_SEQ_STATE_OPENREQ:
2181                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2182                 break;
2183         case TCP_SEQ_STATE_TIME_WAIT:
2184                 get_timewait6_sock(seq, v, st->num);
2185                 break;
2186         }
2187 out:
2188         return 0;
2189 }
2190
2191 static struct file_operations tcp6_seq_fops;
2192 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2193         .owner          = THIS_MODULE,
2194         .name           = "tcp6",
2195         .family         = AF_INET6,
2196         .seq_show       = tcp6_seq_show,
2197         .seq_fops       = &tcp6_seq_fops,
2198 };
2199
2200 int __init tcp6_proc_init(void)
2201 {
2202         return tcp_proc_register(&tcp6_seq_afinfo);
2203 }
2204
2205 void tcp6_proc_exit(void)
2206 {
2207         tcp_proc_unregister(&tcp6_seq_afinfo);
2208 }
2209 #endif
2210
2211 struct proto tcpv6_prot = {
2212         .name                   = "TCPv6",
2213         .owner                  = THIS_MODULE,
2214         .close                  = tcp_close,
2215         .connect                = tcp_v6_connect,
2216         .disconnect             = tcp_disconnect,
2217         .accept                 = tcp_accept,
2218         .ioctl                  = tcp_ioctl,
2219         .init                   = tcp_v6_init_sock,
2220         .destroy                = tcp_v6_destroy_sock,
2221         .shutdown               = tcp_shutdown,
2222         .setsockopt             = tcp_setsockopt,
2223         .getsockopt             = tcp_getsockopt,
2224         .sendmsg                = tcp_sendmsg,
2225         .recvmsg                = tcp_recvmsg,
2226         .backlog_rcv            = tcp_v6_do_rcv,
2227         .hash                   = tcp_v6_hash,
2228         .unhash                 = tcp_unhash,
2229         .get_port               = tcp_v6_get_port,
2230         .enter_memory_pressure  = tcp_enter_memory_pressure,
2231         .sockets_allocated      = &tcp_sockets_allocated,
2232         .memory_allocated       = &tcp_memory_allocated,
2233         .memory_pressure        = &tcp_memory_pressure,
2234         .sysctl_mem             = sysctl_tcp_mem,
2235         .sysctl_wmem            = sysctl_tcp_wmem,
2236         .sysctl_rmem            = sysctl_tcp_rmem,
2237         .max_header             = MAX_TCP_HEADER,
2238         .obj_size               = sizeof(struct tcp6_sock),
2239         .rsk_prot               = &tcp6_request_sock_ops,
2240 };
2241
2242 static struct inet6_protocol tcpv6_protocol = {
2243         .handler        =       tcp_v6_rcv,
2244         .err_handler    =       tcp_v6_err,
2245         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2246 };
2247
2248 extern struct proto_ops inet6_stream_ops;
2249
2250 static struct inet_protosw tcpv6_protosw = {
2251         .type           =       SOCK_STREAM,
2252         .protocol       =       IPPROTO_TCP,
2253         .prot           =       &tcpv6_prot,
2254         .ops            =       &inet6_stream_ops,
2255         .capability     =       -1,
2256         .no_check       =       0,
2257         .flags          =       INET_PROTOSW_PERMANENT,
2258 };
2259
2260 void __init tcpv6_init(void)
2261 {
2262         /* register inet6 protocol */
2263         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2264                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2265         inet6_register_protosw(&tcpv6_protosw);
2266 }