net/ipv4/tcp_ipv4.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  *              IPv4 specific functions
   9  *
  10  *
  11  *              code split from:
  12  *              linux/ipv4/tcp.c
  13  *              linux/ipv4/tcp_input.c
  14  *              linux/ipv4/tcp_output.c
  15  *
  16  *              See tcp.c for author information
  17  *
  18  *      This program is free software; you can redistribute it and/or
  19  *      modify it under the terms of the GNU General Public License
  20  *      as published by the Free Software Foundation; either version
  21  *      2 of the License, or (at your option) any later version.
  22  */
  23
  24 /*
  25  * Changes:
  26  *              David S. Miller :       New socket lookup architecture.
  27  *                                      This code is dedicated to John Dyson.
  28  *              David S. Miller :       Change semantics of established hash,
  29  *                                      half is devoted to TIME_WAIT sockets
  30  *                                      and the rest go in the other half.
  31  *              Andi Kleen :            Add support for syncookies and fixed
  32  *                                      some bugs: ip options weren't passed to
  33  *                                      the TCP layer, missed a check for an
  34  *                                      ACK bit.
  35  *              Andi Kleen :            Implemented fast path mtu discovery.
  36  *                                      Fixed many serious bugs in the
  37  *                                      request_sock handling and moved
  38  *                                      most of it into the af independent code.
  39  *                                      Added tail drop and some other bugfixes.
  40  *                                      Added new listen semantics.
  41  *              Mike McLagan    :       Routing by source
  42  *      Juan Jose Ciarlante:            ip_dynaddr bits
  43  *              Andi Kleen:             various fixes.
  44  *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45  *                                      coma.
  46  *      Andi Kleen              :       Fix new listen.
  47  *      Andi Kleen              :       Fix accept error reporting.
  48  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50  *                                      a single port at the same time.
  51  */
  52
  53
  54 #include <linux/bottom_half.h>
  55 #include <linux/types.h>
  56 #include <linux/fcntl.h>
  57 #include <linux/module.h>
  58 #include <linux/random.h>
  59 #include <linux/cache.h>
  60 #include <linux/jhash.h>
  61 #include <linux/init.h>
  62 #include <linux/times.h>
  63
  64 #include <net/net_namespace.h>
  65 #include <net/icmp.h>
  66 #include <net/inet_hashtables.h>
  67 #include <net/tcp.h>
  68 #include <net/transp_v6.h>
  69 #include <net/ipv6.h>
  70 #include <net/inet_common.h>
  71 #include <net/timewait_sock.h>
  72 #include <net/xfrm.h>
  73 #include <net/netdma.h>
  74
  75 #include <linux/inet.h>
  76 #include <linux/ipv6.h>
  77 #include <linux/stddef.h>
  78 #include <linux/proc_fs.h>
  79 #include <linux/seq_file.h>
  80
  81 #include <linux/crypto.h>
  82 #include <linux/scatterlist.h>
  83
  84 int sysctl_tcp_tw_reuse __read_mostly;
  85 int sysctl_tcp_low_latency __read_mostly;
  86
  87
  88 #ifdef CONFIG_TCP_MD5SIG
  89 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  90                                                    __be32 addr);
  91 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
  92                                __be32 daddr, __be32 saddr, struct tcphdr *th);
  93 #else
  94 static inline
  95 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
  96 {
  97         return NULL;
  98 }
  99 #endif
 100
 101 struct inet_hashinfo tcp_hashinfo;
 102
 103 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 104 {
 105         return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 106                                           ip_hdr(skb)->saddr,
 107                                           tcp_hdr(skb)->dest,
 108                                           tcp_hdr(skb)->source);
 109 }
 110
 111 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 112 {
 113         const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 114         struct tcp_sock *tp = tcp_sk(sk);
 115
 116         /* With PAWS, it is safe from the viewpoint
 117            of data integrity. Even without PAWS it is safe provided sequence
 118            spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 119
 120            Actually, the idea is close to VJ's one, only timestamp cache is
 121            held not per host, but per port pair and TW bucket is used as state
 122            holder.
 123
 124            If TW bucket has been already destroyed we fall back to VJ's scheme
 125            and use initial timestamp retrieved from peer table.
 126          */
 127         if (tcptw->tw_ts_recent_stamp &&
 128             (twp == NULL || (sysctl_tcp_tw_reuse &&
 129                              get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 130                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 131                 if (tp->write_seq == 0)
 132                         tp->write_seq = 1;
 133                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 134                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 135                 sock_hold(sktw);
 136                 return 1;
 137         }
 138
 139         return 0;
 140 }
 141
 142 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 143
 144 /* This will initiate an outgoing connection. */
 145 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 146 {
 147         struct inet_sock *inet = inet_sk(sk);
 148         struct tcp_sock *tp = tcp_sk(sk);
 149         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 150         struct rtable *rt;
 151         __be32 daddr, nexthop;
 152         int tmp;
 153         int err;
 154
 155         if (addr_len < sizeof(struct sockaddr_in))
 156                 return -EINVAL;
 157
 158         if (usin->sin_family != AF_INET)
 159                 return -EAFNOSUPPORT;
 160
 161         nexthop = daddr = usin->sin_addr.s_addr;
 162         if (inet->opt && inet->opt->srr) {
 163                 if (!daddr)
 164                         return -EINVAL;
 165                 nexthop = inet->opt->faddr;
 166         }
 167
 168         tmp = ip_route_connect(&rt, nexthop, inet->saddr,
 169                                RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 170                                IPPROTO_TCP,
 171                                inet->sport, usin->sin_port, sk, 1);
 172         if (tmp < 0) {
 173                 if (tmp == -ENETUNREACH)
 174                         IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 175                 return tmp;
 176         }
 177
 178         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 179                 ip_rt_put(rt);
 180                 return -ENETUNREACH;
 181         }
 182
 183         if (!inet->opt || !inet->opt->srr)
 184                 daddr = rt->rt_dst;
 185
 186         if (!inet->saddr)
 187                 inet->saddr = rt->rt_src;
 188         inet->rcv_saddr = inet->saddr;
 189
 190         if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
 191                 /* Reset inherited state */
 192                 tp->rx_opt.ts_recent       = 0;
 193                 tp->rx_opt.ts_recent_stamp = 0;
 194                 tp->write_seq              = 0;
 195         }
 196
 197         if (tcp_death_row.sysctl_tw_recycle &&
 198             !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 199                 struct inet_peer *peer = rt_get_peer(rt);
 200                 /*
 201                  * VJ's idea. We save last timestamp seen from
 202                  * the destination in peer table, when entering state
 203                  * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 204                  * when trying new connection.
 205                  */
 206                 if (peer != NULL &&
 207                     peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
 208                         tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 209                         tp->rx_opt.ts_recent = peer->tcp_ts;
 210                 }
 211         }
 212
 213         inet->dport = usin->sin_port;
 214         inet->daddr = daddr;
 215
 216         inet_csk(sk)->icsk_ext_hdr_len = 0;
 217         if (inet->opt)
 218                 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 219
 220         tp->rx_opt.mss_clamp = 536;
 221
 222         /* Socket identity is still unknown (sport may be zero).
 223          * However we set state to SYN-SENT and not releasing socket
 224          * lock select source port, enter ourselves into the hash tables and
 225          * complete initialization after this.
 226          */
 227         tcp_set_state(sk, TCP_SYN_SENT);
 228         err = inet_hash_connect(&tcp_death_row, sk);
 229         if (err)
 230                 goto failure;
 231
 232         err = ip_route_newports(&rt, IPPROTO_TCP,
 233                                 inet->sport, inet->dport, sk);
 234         if (err)
 235                 goto failure;
 236
 237         /* OK, now commit destination to socket.  */
 238         sk->sk_gso_type = SKB_GSO_TCPV4;
 239         sk_setup_caps(sk, &rt->u.dst);
 240
 241         if (!tp->write_seq)
 242                 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
 243                                                            inet->daddr,
 244                                                            inet->sport,
 245                                                            usin->sin_port);
 246
 247         inet->id = tp->write_seq ^ jiffies;
 248
 249         err = tcp_connect(sk);
 250         rt = NULL;
 251         if (err)
 252                 goto failure;
 253
 254         return 0;
 255
 256 failure:
 257         /*
 258          * This unhashes the socket and releases the local port,
 259          * if necessary.
 260          */
 261         tcp_set_state(sk, TCP_CLOSE);
 262         ip_rt_put(rt);
 263         sk->sk_route_caps = 0;
 264         inet->dport = 0;
 265         return err;
 266 }
 267
 268 /*
 269  * This routine does path mtu discovery as defined in RFC1191.
 270  */
 271 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 272 {
 273         struct dst_entry *dst;
 274         struct inet_sock *inet = inet_sk(sk);
 275
 276         /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 277          * send out by Linux are always <576bytes so they should go through
 278          * unfragmented).
 279          */
 280         if (sk->sk_state == TCP_LISTEN)
 281                 return;
 282
 283         /* We don't check in the destentry if pmtu discovery is forbidden
 284          * on this route. We just assume that no packet_to_big packets
 285          * are send back when pmtu discovery is not active.
 286          * There is a small race when the user changes this flag in the
 287          * route, but I think that's acceptable.
 288          */
 289         if ((dst = __sk_dst_check(sk, 0)) == NULL)
 290                 return;
 291
 292         dst->ops->update_pmtu(dst, mtu);
 293
 294         /* Something is about to be wrong... Remember soft error
 295          * for the case, if this connection will not able to recover.
 296          */
 297         if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 298                 sk->sk_err_soft = EMSGSIZE;
 299
 300         mtu = dst_mtu(dst);
 301
 302         if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 303             inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 304                 tcp_sync_mss(sk, mtu);
 305
 306                 /* Resend the TCP packet because it's
 307                  * clear that the old packet has been
 308                  * dropped. This is the new "fast" path mtu
 309                  * discovery.
 310                  */
 311                 tcp_simple_retransmit(sk);
 312         } /* else let the usual retransmit timer handle it */
 313 }
 314
 315 /*
 316  * This routine is called by the ICMP module when it gets some
 317  * sort of error condition.  If err < 0 then the socket should
 318  * be closed and the error returned to the user.  If err > 0
 319  * it's just the icmp type << 8 | icmp code.  After adjustment
 320  * header points to the first 8 bytes of the tcp header.  We need
 321  * to find the appropriate port.
 322  *
 323  * The locking strategy used here is very "optimistic". When
 324  * someone else accesses the socket the ICMP is just dropped
 325  * and for some paths there is no check at all.
 326  * A more general error queue to queue errors for later handling
 327  * is probably better.
 328  *
 329  */
 330
 331 void tcp_v4_err(struct sk_buff *skb, u32 info)
 332 {
 333         struct iphdr *iph = (struct iphdr *)skb->data;
 334         struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
 335         struct tcp_sock *tp;
 336         struct inet_sock *inet;
 337         const int type = icmp_hdr(skb)->type;
 338         const int code = icmp_hdr(skb)->code;
 339         struct sock *sk;
 340         __u32 seq;
 341         int err;
 342         struct net *net = dev_net(skb->dev);
 343
 344         if (skb->len < (iph->ihl << 2) + 8) {
 345                 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 346                 return;
 347         }
 348
 349         sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 350                         iph->saddr, th->source, inet_iif(skb));
 351         if (!sk) {
 352                 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 353                 return;
 354         }
 355         if (sk->sk_state == TCP_TIME_WAIT) {
 356                 inet_twsk_put(inet_twsk(sk));
 357                 return;
 358         }
 359
 360         bh_lock_sock(sk);
 361         /* If too many ICMPs get dropped on busy
 362          * servers this needs to be solved differently.
 363          */
 364         if (sock_owned_by_user(sk))
 365                 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 366
 367         if (sk->sk_state == TCP_CLOSE)
 368                 goto out;
 369
 370         tp = tcp_sk(sk);
 371         seq = ntohl(th->seq);
 372         if (sk->sk_state != TCP_LISTEN &&
 373             !between(seq, tp->snd_una, tp->snd_nxt)) {
 374                 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 375                 goto out;
 376         }
 377
 378         switch (type) {
 379         case ICMP_SOURCE_QUENCH:
 380                 /* Just silently ignore these. */
 381                 goto out;
 382         case ICMP_PARAMETERPROB:
 383                 err = EPROTO;
 384                 break;
 385         case ICMP_DEST_UNREACH:
 386                 if (code > NR_ICMP_UNREACH)
 387                         goto out;
 388
 389                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 390                         if (!sock_owned_by_user(sk))
 391                                 do_pmtu_discovery(sk, iph, info);
 392                         goto out;
 393                 }
 394
 395                 err = icmp_err_convert[code].errno;
 396                 break;
 397         case ICMP_TIME_EXCEEDED:
 398                 err = EHOSTUNREACH;
 399                 break;
 400         default:
 401                 goto out;
 402         }
 403
 404         switch (sk->sk_state) {
 405                 struct request_sock *req, **prev;
 406         case TCP_LISTEN:
 407                 if (sock_owned_by_user(sk))
 408                         goto out;
 409
 410                 req = inet_csk_search_req(sk, &prev, th->dest,
 411                                           iph->daddr, iph->saddr);
 412                 if (!req)
 413                         goto out;
 414
 415                 /* ICMPs are not backlogged, hence we cannot get
 416                    an established socket here.
 417                  */
 418                 WARN_ON(req->sk);
 419
 420                 if (seq != tcp_rsk(req)->snt_isn) {
 421                         NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 422                         goto out;
 423                 }
 424
 425                 /*
 426                  * Still in SYN_RECV, just remove it silently.
 427                  * There is no good way to pass the error to the newly
 428                  * created socket, and POSIX does not want network
 429                  * errors returned from accept().
 430                  */
 431                 inet_csk_reqsk_queue_drop(sk, req, prev);
 432                 goto out;
 433
 434         case TCP_SYN_SENT:
 435         case TCP_SYN_RECV:  /* Cannot happen.
 436                                It can f.e. if SYNs crossed.
 437                              */
 438                 if (!sock_owned_by_user(sk)) {
 439                         sk->sk_err = err;
 440
 441                         sk->sk_error_report(sk);
 442
 443                         tcp_done(sk);
 444                 } else {
 445                         sk->sk_err_soft = err;
 446                 }
 447                 goto out;
 448         }
 449
 450         /* If we've already connected we will keep trying
 451          * until we time out, or the user gives up.
 452          *
 453          * rfc1122 4.2.3.9 allows to consider as hard errors
 454          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 455          * but it is obsoleted by pmtu discovery).
 456          *
 457          * Note, that in modern internet, where routing is unreliable
 458          * and in each dark corner broken firewalls sit, sending random
 459          * errors ordered by their masters even this two messages finally lose
 460          * their original sense (even Linux sends invalid PORT_UNREACHs)
 461          *
 462          * Now we are in compliance with RFCs.
 463          *                                                      --ANK (980905)
 464          */
 465
 466         inet = inet_sk(sk);
 467         if (!sock_owned_by_user(sk) && inet->recverr) {
 468                 sk->sk_err = err;
 469                 sk->sk_error_report(sk);
 470         } else  { /* Only an error on timeout */
 471                 sk->sk_err_soft = err;
 472         }
 473
 474 out:
 475         bh_unlock_sock(sk);
 476         sock_put(sk);
 477 }
 478
 479 /* This routine computes an IPv4 TCP checksum. */
 480 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 481 {
 482         struct inet_sock *inet = inet_sk(sk);
 483         struct tcphdr *th = tcp_hdr(skb);
 484
 485         if (skb->ip_summed == CHECKSUM_PARTIAL) {
 486                 th->check = ~tcp_v4_check(len, inet->saddr,
 487                                           inet->daddr, 0);
 488                 skb->csum_start = skb_transport_header(skb) - skb->head;
 489                 skb->csum_offset = offsetof(struct tcphdr, check);
 490         } else {
 491                 th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
 492                                          csum_partial(th,
 493                                                       th->doff << 2,
 494                                                       skb->csum));
 495         }
 496 }
 497
 498 int tcp_v4_gso_send_check(struct sk_buff *skb)
 499 {
 500         const struct iphdr *iph;
 501         struct tcphdr *th;
 502
 503         if (!pskb_may_pull(skb, sizeof(*th)))
 504                 return -EINVAL;
 505
 506         iph = ip_hdr(skb);
 507         th = tcp_hdr(skb);
 508
 509         th->check = 0;
 510         th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
 511         skb->csum_start = skb_transport_header(skb) - skb->head;
 512         skb->csum_offset = offsetof(struct tcphdr, check);
 513         skb->ip_summed = CHECKSUM_PARTIAL;
 514         return 0;
 515 }
 516
 517 /*
 518  *      This routine will send an RST to the other tcp.
 519  *
 520  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 521  *                    for reset.
 522  *      Answer: if a packet caused RST, it is not for a socket
 523  *              existing in our system, if it is matched to a socket,
 524  *              it is just duplicate segment or bug in other side's TCP.
 525  *              So that we build reply only basing on parameters
 526  *              arrived with segment.
 527  *      Exception: precedence violation. We do not implement it in any case.
 528  */
 529
 530 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 531 {
 532         struct tcphdr *th = tcp_hdr(skb);
 533         struct {
 534                 struct tcphdr th;
 535 #ifdef CONFIG_TCP_MD5SIG
 536                 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 537 #endif
 538         } rep;
 539         struct ip_reply_arg arg;
 540 #ifdef CONFIG_TCP_MD5SIG
 541         struct tcp_md5sig_key *key;
 542 #endif
 543         struct net *net;
 544
 545         /* Never send a reset in response to a reset. */
 546         if (th->rst)
 547                 return;
 548
 549         if (skb->rtable->rt_type != RTN_LOCAL)
 550                 return;
 551
 552         /* Swap the send and the receive. */
 553         memset(&rep, 0, sizeof(rep));
 554         rep.th.dest   = th->source;
 555         rep.th.source = th->dest;
 556         rep.th.doff   = sizeof(struct tcphdr) / 4;
 557         rep.th.rst    = 1;
 558
 559         if (th->ack) {
 560                 rep.th.seq = th->ack_seq;
 561         } else {
 562                 rep.th.ack = 1;
 563                 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 564                                        skb->len - (th->doff << 2));
 565         }
 566
 567         memset(&arg, 0, sizeof(arg));
 568         arg.iov[0].iov_base = (unsigned char *)&rep;
 569         arg.iov[0].iov_len  = sizeof(rep.th);
 570
 571 #ifdef CONFIG_TCP_MD5SIG
 572         key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 573         if (key) {
 574                 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 575                                    (TCPOPT_NOP << 16) |
 576                                    (TCPOPT_MD5SIG << 8) |
 577                                    TCPOLEN_MD5SIG);
 578                 /* Update length and the length the header thinks exists */
 579                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 580                 rep.th.doff = arg.iov[0].iov_len / 4;
 581
 582                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 583                                      key, ip_hdr(skb)->saddr,
 584                                      ip_hdr(skb)->daddr, &rep.th);
 585         }
 586 #endif
 587         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 588                                       ip_hdr(skb)->saddr, /* XXX */
 589                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 590         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 591         arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 592
 593         net = dev_net(skb->dst->dev);
 594         ip_send_reply(net->ipv4.tcp_sock, skb,
 595                       &arg, arg.iov[0].iov_len);
 596
 597         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 598         TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 599 }
 600
 601 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 602    outside socket context is ugly, certainly. What can I do?
 603  */
 604
 605 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 606                             u32 win, u32 ts, int oif,
 607                             struct tcp_md5sig_key *key,
 608                             int reply_flags)
 609 {
 610         struct tcphdr *th = tcp_hdr(skb);
 611         struct {
 612                 struct tcphdr th;
 613                 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 614 #ifdef CONFIG_TCP_MD5SIG
 615                            + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 616 #endif
 617                         ];
 618         } rep;
 619         struct ip_reply_arg arg;
 620         struct net *net = dev_net(skb->dst->dev);
 621
 622         memset(&rep.th, 0, sizeof(struct tcphdr));
 623         memset(&arg, 0, sizeof(arg));
 624
 625         arg.iov[0].iov_base = (unsigned char *)&rep;
 626         arg.iov[0].iov_len  = sizeof(rep.th);
 627         if (ts) {
 628                 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 629                                    (TCPOPT_TIMESTAMP << 8) |
 630                                    TCPOLEN_TIMESTAMP);
 631                 rep.opt[1] = htonl(tcp_time_stamp);
 632                 rep.opt[2] = htonl(ts);
 633                 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 634         }
 635
 636         /* Swap the send and the receive. */
 637         rep.th.dest    = th->source;
 638         rep.th.source  = th->dest;
 639         rep.th.doff    = arg.iov[0].iov_len / 4;
 640         rep.th.seq     = htonl(seq);
 641         rep.th.ack_seq = htonl(ack);
 642         rep.th.ack     = 1;
 643         rep.th.window  = htons(win);
 644
 645 #ifdef CONFIG_TCP_MD5SIG
 646         if (key) {
 647                 int offset = (ts) ? 3 : 0;
 648
 649                 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 650                                           (TCPOPT_NOP << 16) |
 651                                           (TCPOPT_MD5SIG << 8) |
 652                                           TCPOLEN_MD5SIG);
 653                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 654                 rep.th.doff = arg.iov[0].iov_len/4;
 655
 656                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 657                                     key, ip_hdr(skb)->saddr,
 658                                     ip_hdr(skb)->daddr, &rep.th);
 659         }
 660 #endif
 661         arg.flags = reply_flags;
 662         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 663                                       ip_hdr(skb)->saddr, /* XXX */
 664                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 665         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 666         if (oif)
 667                 arg.bound_dev_if = oif;
 668
 669         ip_send_reply(net->ipv4.tcp_sock, skb,
 670                       &arg, arg.iov[0].iov_len);
 671
 672         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 673 }
 674
 675 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 676 {
 677         struct inet_timewait_sock *tw = inet_twsk(sk);
 678         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 679
 680         tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 681                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 682                         tcptw->tw_ts_recent,
 683                         tw->tw_bound_dev_if,
 684                         tcp_twsk_md5_key(tcptw),
 685                         tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 686                         );
 687
 688         inet_twsk_put(tw);
 689 }
 690
 691 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 692                                   struct request_sock *req)
 693 {
 694         tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 695                         tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 696                         req->ts_recent,
 697                         0,
 698                         tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
 699                         inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 700 }
 701
 702 /*
 703  *      Send a SYN-ACK after having received a SYN.
 704  *      This still operates on a request_sock only, not on a big
 705  *      socket.
 706  */
 707 static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 708                                 struct dst_entry *dst)
 709 {
 710         const struct inet_request_sock *ireq = inet_rsk(req);
 711         int err = -1;
 712         struct sk_buff * skb;
 713
 714         /* First, grab a route. */
 715         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 716                 return -1;
 717
 718         skb = tcp_make_synack(sk, dst, req);
 719
 720         if (skb) {
 721                 struct tcphdr *th = tcp_hdr(skb);
 722
 723                 th->check = tcp_v4_check(skb->len,
 724                                          ireq->loc_addr,
 725                                          ireq->rmt_addr,
 726                                          csum_partial(th, skb->len,
 727                                                       skb->csum));
 728
 729                 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 730                                             ireq->rmt_addr,
 731                                             ireq->opt);
 732                 err = net_xmit_eval(err);
 733         }
 734
 735         dst_release(dst);
 736         return err;
 737 }
 738
 739 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
 740 {
 741         return __tcp_v4_send_synack(sk, req, NULL);
 742 }
 743
 744 /*
 745  *      IPv4 request_sock destructor.
 746  */
 747 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 748 {
 749         kfree(inet_rsk(req)->opt);
 750 }
 751
 752 #ifdef CONFIG_SYN_COOKIES
 753 static void syn_flood_warning(struct sk_buff *skb)
 754 {
 755         static unsigned long warntime;
 756
 757         if (time_after(jiffies, (warntime + HZ * 60))) {
 758                 warntime = jiffies;
 759                 printk(KERN_INFO
 760                        "possible SYN flooding on port %d. Sending cookies.\n",
 761                        ntohs(tcp_hdr(skb)->dest));
 762         }
 763 }
 764 #endif
 765
 766 /*
 767  * Save and compile IPv4 options into the request_sock if needed.
 768  */
 769 static struct ip_options *tcp_v4_save_options(struct sock *sk,
 770                                               struct sk_buff *skb)
 771 {
 772         struct ip_options *opt = &(IPCB(skb)->opt);
 773         struct ip_options *dopt = NULL;
 774
 775         if (opt && opt->optlen) {
 776                 int opt_size = optlength(opt);
 777                 dopt = kmalloc(opt_size, GFP_ATOMIC);
 778                 if (dopt) {
 779                         if (ip_options_echo(dopt, skb)) {
 780                                 kfree(dopt);
 781                                 dopt = NULL;
 782                         }
 783                 }
 784         }
 785         return dopt;
 786 }
 787
 788 #ifdef CONFIG_TCP_MD5SIG
 789 /*
 790  * RFC2385 MD5 checksumming requires a mapping of
 791  * IP address->MD5 Key.
 792  * We need to maintain these in the sk structure.
 793  */
 794
 795 /* Find the Key structure for an address.  */
 796 static struct tcp_md5sig_key *
 797                         tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 798 {
 799         struct tcp_sock *tp = tcp_sk(sk);
 800         int i;
 801
 802         if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 803                 return NULL;
 804         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 805                 if (tp->md5sig_info->keys4[i].addr == addr)
 806                         return &tp->md5sig_info->keys4[i].base;
 807         }
 808         return NULL;
 809 }
 810
 811 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 812                                          struct sock *addr_sk)
 813 {
 814         return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
 815 }
 816
 817 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 818
 819 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 820                                                       struct request_sock *req)
 821 {
 822         return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 823 }
 824
 825 /* This can be called on a newly created socket, from other files */
 826 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 827                       u8 *newkey, u8 newkeylen)
 828 {
 829         /* Add Key to the list */
 830         struct tcp_md5sig_key *key;
 831         struct tcp_sock *tp = tcp_sk(sk);
 832         struct tcp4_md5sig_key *keys;
 833
 834         key = tcp_v4_md5_do_lookup(sk, addr);
 835         if (key) {
 836                 /* Pre-existing entry - just update that one. */
 837                 kfree(key->key);
 838                 key->key = newkey;
 839                 key->keylen = newkeylen;
 840         } else {
 841                 struct tcp_md5sig_info *md5sig;
 842
 843                 if (!tp->md5sig_info) {
 844                         tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
 845                                                   GFP_ATOMIC);
 846                         if (!tp->md5sig_info) {
 847                                 kfree(newkey);
 848                                 return -ENOMEM;
 849                         }
 850                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 851                 }
 852                 if (tcp_alloc_md5sig_pool() == NULL) {
 853                         kfree(newkey);
 854                         return -ENOMEM;
 855                 }
 856                 md5sig = tp->md5sig_info;
 857
 858                 if (md5sig->alloced4 == md5sig->entries4) {
 859                         keys = kmalloc((sizeof(*keys) *
 860                                         (md5sig->entries4 + 1)), GFP_ATOMIC);
 861                         if (!keys) {
 862                                 kfree(newkey);
 863                                 tcp_free_md5sig_pool();
 864                                 return -ENOMEM;
 865                         }
 866
 867                         if (md5sig->entries4)
 868                                 memcpy(keys, md5sig->keys4,
 869                                        sizeof(*keys) * md5sig->entries4);
 870
 871                         /* Free old key list, and reference new one */
 872                         kfree(md5sig->keys4);
 873                         md5sig->keys4 = keys;
 874                         md5sig->alloced4++;
 875                 }
 876                 md5sig->entries4++;
 877                 md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
 878                 md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
 879                 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
 880         }
 881         return 0;
 882 }
 883
 884 EXPORT_SYMBOL(tcp_v4_md5_do_add);
 885
 886 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 887                                u8 *newkey, u8 newkeylen)
 888 {
 889         return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
 890                                  newkey, newkeylen);
 891 }
 892
 893 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 894 {
 895         struct tcp_sock *tp = tcp_sk(sk);
 896         int i;
 897
 898         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 899                 if (tp->md5sig_info->keys4[i].addr == addr) {
 900                         /* Free the key */
 901                         kfree(tp->md5sig_info->keys4[i].base.key);
 902                         tp->md5sig_info->entries4--;
 903
 904                         if (tp->md5sig_info->entries4 == 0) {
 905                                 kfree(tp->md5sig_info->keys4);
 906                                 tp->md5sig_info->keys4 = NULL;
 907                                 tp->md5sig_info->alloced4 = 0;
 908                         } else if (tp->md5sig_info->entries4 != i) {
 909                                 /* Need to do some manipulation */
 910                                 memmove(&tp->md5sig_info->keys4[i],
 911                                         &tp->md5sig_info->keys4[i+1],
 912                                         (tp->md5sig_info->entries4 - i) *
 913                                          sizeof(struct tcp4_md5sig_key));
 914                         }
 915                         tcp_free_md5sig_pool();
 916                         return 0;
 917                 }
 918         }
 919         return -ENOENT;
 920 }
 921
 922 EXPORT_SYMBOL(tcp_v4_md5_do_del);
 923
 924 static void tcp_v4_clear_md5_list(struct sock *sk)
 925 {
 926         struct tcp_sock *tp = tcp_sk(sk);
 927
 928         /* Free each key, then the set of key keys,
 929          * the crypto element, and then decrement our
 930          * hold on the last resort crypto.
 931          */
 932         if (tp->md5sig_info->entries4) {
 933                 int i;
 934                 for (i = 0; i < tp->md5sig_info->entries4; i++)
 935                         kfree(tp->md5sig_info->keys4[i].base.key);
 936                 tp->md5sig_info->entries4 = 0;
 937                 tcp_free_md5sig_pool();
 938         }
 939         if (tp->md5sig_info->keys4) {
 940                 kfree(tp->md5sig_info->keys4);
 941                 tp->md5sig_info->keys4 = NULL;
 942                 tp->md5sig_info->alloced4  = 0;
 943         }
 944 }
 945
 946 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 947                                  int optlen)
 948 {
 949         struct tcp_md5sig cmd;
 950         struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
 951         u8 *newkey;
 952
 953         if (optlen < sizeof(cmd))
 954                 return -EINVAL;
 955
 956         if (copy_from_user(&cmd, optval, sizeof(cmd)))
 957                 return -EFAULT;
 958
 959         if (sin->sin_family != AF_INET)
 960                 return -EINVAL;
 961
 962         if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
 963                 if (!tcp_sk(sk)->md5sig_info)
 964                         return -ENOENT;
 965                 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
 966         }
 967
 968         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 969                 return -EINVAL;
 970
 971         if (!tcp_sk(sk)->md5sig_info) {
 972                 struct tcp_sock *tp = tcp_sk(sk);
 973                 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
 974
 975                 if (!p)
 976                         return -EINVAL;
 977
 978                 tp->md5sig_info = p;
 979                 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 980         }
 981
 982         newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 983         if (!newkey)
 984                 return -ENOMEM;
 985         return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
 986                                  newkey, cmd.tcpm_keylen);
 987 }
 988
 989 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
 990                                         __be32 daddr, __be32 saddr, int nbytes)
 991 {
 992         struct tcp4_pseudohdr *bp;
 993         struct scatterlist sg;
 994
 995         bp = &hp->md5_blk.ip4;
 996
 997         /*
 998          * 1. the TCP pseudo-header (in the order: source IP address,
 999          * destination IP address, zero-padded protocol number, and
1000          * segment length)
1001          */
1002         bp->saddr = saddr;
1003         bp->daddr = daddr;
1004         bp->pad = 0;
1005         bp->protocol = IPPROTO_TCP;
1006         bp->len = cpu_to_be16(nbytes);
1007
1008         sg_init_one(&sg, bp, sizeof(*bp));
1009         return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1010 }
1011
1012 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1013                                __be32 daddr, __be32 saddr, struct tcphdr *th)
1014 {
1015         struct tcp_md5sig_pool *hp;
1016         struct hash_desc *desc;
1017
1018         hp = tcp_get_md5sig_pool();
1019         if (!hp)
1020                 goto clear_hash_noput;
1021         desc = &hp->md5_desc;
1022
1023         if (crypto_hash_init(desc))
1024                 goto clear_hash;
1025         if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1026                 goto clear_hash;
1027         if (tcp_md5_hash_header(hp, th))
1028                 goto clear_hash;
1029         if (tcp_md5_hash_key(hp, key))
1030                 goto clear_hash;
1031         if (crypto_hash_final(desc, md5_hash))
1032                 goto clear_hash;
1033
1034         tcp_put_md5sig_pool();
1035         return 0;
1036
1037 clear_hash:
1038         tcp_put_md5sig_pool();
1039 clear_hash_noput:
1040         memset(md5_hash, 0, 16);
1041         return 1;
1042 }
1043
1044 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1045                         struct sock *sk, struct request_sock *req,
1046                         struct sk_buff *skb)
1047 {
1048         struct tcp_md5sig_pool *hp;
1049         struct hash_desc *desc;
1050         struct tcphdr *th = tcp_hdr(skb);
1051         __be32 saddr, daddr;
1052
1053         if (sk) {
1054                 saddr = inet_sk(sk)->saddr;
1055                 daddr = inet_sk(sk)->daddr;
1056         } else if (req) {
1057                 saddr = inet_rsk(req)->loc_addr;
1058                 daddr = inet_rsk(req)->rmt_addr;
1059         } else {
1060                 const struct iphdr *iph = ip_hdr(skb);
1061                 saddr = iph->saddr;
1062                 daddr = iph->daddr;
1063         }
1064
1065         hp = tcp_get_md5sig_pool();
1066         if (!hp)
1067                 goto clear_hash_noput;
1068         desc = &hp->md5_desc;
1069
1070         if (crypto_hash_init(desc))
1071                 goto clear_hash;
1072
1073         if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1074                 goto clear_hash;
1075         if (tcp_md5_hash_header(hp, th))
1076                 goto clear_hash;
1077         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1078                 goto clear_hash;
1079         if (tcp_md5_hash_key(hp, key))
1080                 goto clear_hash;
1081         if (crypto_hash_final(desc, md5_hash))
1082                 goto clear_hash;
1083
1084         tcp_put_md5sig_pool();
1085         return 0;
1086
1087 clear_hash:
1088         tcp_put_md5sig_pool();
1089 clear_hash_noput:
1090         memset(md5_hash, 0, 16);
1091         return 1;
1092 }
1093
1094 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1095
1096 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1097 {
1098         /*
1099          * This gets called for each TCP segment that arrives
1100          * so we want to be efficient.
1101          * We have 3 drop cases:
1102          * o No MD5 hash and one expected.
1103          * o MD5 hash and we're not expecting one.
1104          * o MD5 hash and its wrong.
1105          */
1106         __u8 *hash_location = NULL;
1107         struct tcp_md5sig_key *hash_expected;
1108         const struct iphdr *iph = ip_hdr(skb);
1109         struct tcphdr *th = tcp_hdr(skb);
1110         int genhash;
1111         unsigned char newhash[16];
1112
1113         hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1114         hash_location = tcp_parse_md5sig_option(th);
1115
1116         /* We've parsed the options - do we have a hash? */
1117         if (!hash_expected && !hash_location)
1118                 return 0;
1119
1120         if (hash_expected && !hash_location) {
1121                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1122                 return 1;
1123         }
1124
1125         if (!hash_expected && hash_location) {
1126                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1127                 return 1;
1128         }
1129
1130         /* Okay, so this is hash_expected and hash_location -
1131          * so we need to calculate the checksum.
1132          */
1133         genhash = tcp_v4_md5_hash_skb(newhash,
1134                                       hash_expected,
1135                                       NULL, NULL, skb);
1136
1137         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1138                 if (net_ratelimit()) {
1139                         printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1140                                &iph->saddr, ntohs(th->source),
1141                                &iph->daddr, ntohs(th->dest),
1142                                genhash ? " tcp_v4_calc_md5_hash failed" : "");
1143                 }
1144                 return 1;
1145         }
1146         return 0;
1147 }
1148
1149 #endif
1150
1151 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1152         .family         =       PF_INET,
1153         .obj_size       =       sizeof(struct tcp_request_sock),
1154         .rtx_syn_ack    =       tcp_v4_send_synack,
1155         .send_ack       =       tcp_v4_reqsk_send_ack,
1156         .destructor     =       tcp_v4_reqsk_destructor,
1157         .send_reset     =       tcp_v4_send_reset,
1158 };
1159
1160 #ifdef CONFIG_TCP_MD5SIG
1161 static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1162         .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1163 };
1164 #endif
1165
1166 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1167         .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1168         .twsk_unique    = tcp_twsk_unique,
1169         .twsk_destructor= tcp_twsk_destructor,
1170 };
1171
1172 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1173 {
1174         struct inet_request_sock *ireq;
1175         struct tcp_options_received tmp_opt;
1176         struct request_sock *req;
1177         __be32 saddr = ip_hdr(skb)->saddr;
1178         __be32 daddr = ip_hdr(skb)->daddr;
1179         __u32 isn = TCP_SKB_CB(skb)->when;
1180         struct dst_entry *dst = NULL;
1181 #ifdef CONFIG_SYN_COOKIES
1182         int want_cookie = 0;
1183 #else
1184 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1185 #endif
1186
1187         /* Never answer to SYNs send to broadcast or multicast */
1188         if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1189                 goto drop;
1190
1191         /* TW buckets are converted to open requests without
1192          * limitations, they conserve resources and peer is
1193          * evidently real one.
1194          */
1195         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1196 #ifdef CONFIG_SYN_COOKIES
1197                 if (sysctl_tcp_syncookies) {
1198                         want_cookie = 1;
1199                 } else
1200 #endif
1201                 goto drop;
1202         }
1203
1204         /* Accept backlog is full. If we have already queued enough
1205          * of warm entries in syn queue, drop request. It is better than
1206          * clogging syn queue with openreqs with exponentially increasing
1207          * timeout.
1208          */
1209         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1210                 goto drop;
1211
1212         req = inet_reqsk_alloc(&tcp_request_sock_ops);
1213         if (!req)
1214                 goto drop;
1215
1216 #ifdef CONFIG_TCP_MD5SIG
1217         tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1218 #endif
1219
1220         tcp_clear_options(&tmp_opt);
1221         tmp_opt.mss_clamp = 536;
1222         tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1223
1224         tcp_parse_options(skb, &tmp_opt, 0);
1225
1226         if (want_cookie && !tmp_opt.saw_tstamp)
1227                 tcp_clear_options(&tmp_opt);
1228
1229         if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1230                 /* Some OSes (unknown ones, but I see them on web server, which
1231                  * contains information interesting only for windows'
1232                  * users) do not send their stamp in SYN. It is easy case.
1233                  * We simply do not advertise TS support.
1234                  */
1235                 tmp_opt.saw_tstamp = 0;
1236                 tmp_opt.tstamp_ok  = 0;
1237         }
1238         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1239
1240         tcp_openreq_init(req, &tmp_opt, skb);
1241
1242         ireq = inet_rsk(req);
1243         ireq->loc_addr = daddr;
1244         ireq->rmt_addr = saddr;
1245         ireq->no_srccheck = inet_sk(sk)->transparent;
1246         ireq->opt = tcp_v4_save_options(sk, skb);
1247
1248         if (security_inet_conn_request(sk, skb, req))
1249                 goto drop_and_free;
1250
1251         if (!want_cookie)
1252                 TCP_ECN_create_request(req, tcp_hdr(skb));
1253
1254         if (want_cookie) {
1255 #ifdef CONFIG_SYN_COOKIES
1256                 syn_flood_warning(skb);
1257                 req->cookie_ts = tmp_opt.tstamp_ok;
1258 #endif
1259                 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1260         } else if (!isn) {
1261                 struct inet_peer *peer = NULL;
1262
1263                 /* VJ's idea. We save last timestamp seen
1264                  * from the destination in peer table, when entering
1265                  * state TIME-WAIT, and check against it before
1266                  * accepting new connection request.
1267                  *
1268                  * If "isn" is not zero, this request hit alive
1269                  * timewait bucket, so that all the necessary checks
1270                  * are made in the function processing timewait state.
1271                  */
1272                 if (tmp_opt.saw_tstamp &&
1273                     tcp_death_row.sysctl_tw_recycle &&
1274                     (dst = inet_csk_route_req(sk, req)) != NULL &&
1275                     (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1276                     peer->v4daddr == saddr) {
1277                         if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1278                             (s32)(peer->tcp_ts - req->ts_recent) >
1279                                                         TCP_PAWS_WINDOW) {
1280                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1281                                 goto drop_and_release;
1282                         }
1283                 }
1284                 /* Kill the following clause, if you dislike this way. */
1285                 else if (!sysctl_tcp_syncookies &&
1286                          (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1287                           (sysctl_max_syn_backlog >> 2)) &&
1288                          (!peer || !peer->tcp_ts_stamp) &&
1289                          (!dst || !dst_metric(dst, RTAX_RTT))) {
1290                         /* Without syncookies last quarter of
1291                          * backlog is filled with destinations,
1292                          * proven to be alive.
1293                          * It means that we continue to communicate
1294                          * to destinations, already remembered
1295                          * to the moment of synflood.
1296                          */
1297                         LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1298                                        &saddr, ntohs(tcp_hdr(skb)->source));
1299                         goto drop_and_release;
1300                 }
1301
1302                 isn = tcp_v4_init_sequence(skb);
1303         }
1304         tcp_rsk(req)->snt_isn = isn;
1305
1306         if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1307                 goto drop_and_free;
1308
1309         inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1310         return 0;
1311
1312 drop_and_release:
1313         dst_release(dst);
1314 drop_and_free:
1315         reqsk_free(req);
1316 drop:
1317         return 0;
1318 }
1319
1320
1321 /*
1322  * The three way handshake has completed - we got a valid synack -
1323  * now create the new socket.
1324  */
1325 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1326                                   struct request_sock *req,
1327                                   struct dst_entry *dst)
1328 {
1329         struct inet_request_sock *ireq;
1330         struct inet_sock *newinet;
1331         struct tcp_sock *newtp;
1332         struct sock *newsk;
1333 #ifdef CONFIG_TCP_MD5SIG
1334         struct tcp_md5sig_key *key;
1335 #endif
1336
1337         if (sk_acceptq_is_full(sk))
1338                 goto exit_overflow;
1339
1340         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1341                 goto exit;
1342
1343         newsk = tcp_create_openreq_child(sk, req, skb);
1344         if (!newsk)
1345                 goto exit;
1346
1347         newsk->sk_gso_type = SKB_GSO_TCPV4;
1348         sk_setup_caps(newsk, dst);
1349
1350         newtp                 = tcp_sk(newsk);
1351         newinet               = inet_sk(newsk);
1352         ireq                  = inet_rsk(req);
1353         newinet->daddr        = ireq->rmt_addr;
1354         newinet->rcv_saddr    = ireq->loc_addr;
1355         newinet->saddr        = ireq->loc_addr;
1356         newinet->opt          = ireq->opt;
1357         ireq->opt             = NULL;
1358         newinet->mc_index     = inet_iif(skb);
1359         newinet->mc_ttl       = ip_hdr(skb)->ttl;
1360         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1361         if (newinet->opt)
1362                 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1363         newinet->id = newtp->write_seq ^ jiffies;
1364
1365         tcp_mtup_init(newsk);
1366         tcp_sync_mss(newsk, dst_mtu(dst));
1367         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1368         if (tcp_sk(sk)->rx_opt.user_mss &&
1369             tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1370                 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1371
1372         tcp_initialize_rcv_mss(newsk);
1373
1374 #ifdef CONFIG_TCP_MD5SIG
1375         /* Copy over the MD5 key from the original socket */
1376         if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1377                 /*
1378                  * We're using one, so create a matching key
1379                  * on the newsk structure. If we fail to get
1380                  * memory, then we end up not copying the key
1381                  * across. Shucks.
1382                  */
1383                 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1384                 if (newkey != NULL)
1385                         tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1386                                           newkey, key->keylen);
1387                 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1388         }
1389 #endif
1390
1391         __inet_hash_nolisten(newsk);
1392         __inet_inherit_port(sk, newsk);
1393
1394         return newsk;
1395
1396 exit_overflow:
1397         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1398 exit:
1399         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1400         dst_release(dst);
1401         return NULL;
1402 }
1403
1404 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1405 {
1406         struct tcphdr *th = tcp_hdr(skb);
1407         const struct iphdr *iph = ip_hdr(skb);
1408         struct sock *nsk;
1409         struct request_sock **prev;
1410         /* Find possible connection requests. */
1411         struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1412                                                        iph->saddr, iph->daddr);
1413         if (req)
1414                 return tcp_check_req(sk, skb, req, prev);
1415
1416         nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1417                         th->source, iph->daddr, th->dest, inet_iif(skb));
1418
1419         if (nsk) {
1420                 if (nsk->sk_state != TCP_TIME_WAIT) {
1421                         bh_lock_sock(nsk);
1422                         return nsk;
1423                 }
1424                 inet_twsk_put(inet_twsk(nsk));
1425                 return NULL;
1426         }
1427
1428 #ifdef CONFIG_SYN_COOKIES
1429         if (!th->rst && !th->syn && th->ack)
1430                 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1431 #endif
1432         return sk;
1433 }
1434
1435 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1436 {
1437         const struct iphdr *iph = ip_hdr(skb);
1438
1439         if (skb->ip_summed == CHECKSUM_COMPLETE) {
1440                 if (!tcp_v4_check(skb->len, iph->saddr,
1441                                   iph->daddr, skb->csum)) {
1442                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1443                         return 0;
1444                 }
1445         }
1446
1447         skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1448                                        skb->len, IPPROTO_TCP, 0);
1449
1450         if (skb->len <= 76) {
1451                 return __skb_checksum_complete(skb);
1452         }
1453         return 0;
1454 }
1455
1456
1457 /* The socket must have it's spinlock held when we get
1458  * here.
1459  *
1460  * We have a potential double-lock case here, so even when
1461  * doing backlog processing we use the BH locking scheme.
1462  * This is because we cannot sleep with the original spinlock
1463  * held.
1464  */
1465 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1466 {
1467         struct sock *rsk;
1468 #ifdef CONFIG_TCP_MD5SIG
1469         /*
1470          * We really want to reject the packet as early as possible
1471          * if:
1472          *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1473          *  o There is an MD5 option and we're not expecting one
1474          */
1475         if (tcp_v4_inbound_md5_hash(sk, skb))
1476                 goto discard;
1477 #endif
1478
1479         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1480                 TCP_CHECK_TIMER(sk);
1481                 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1482                         rsk = sk;
1483                         goto reset;
1484                 }
1485                 TCP_CHECK_TIMER(sk);
1486                 return 0;
1487         }
1488
1489         if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1490                 goto csum_err;
1491
1492         if (sk->sk_state == TCP_LISTEN) {
1493                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1494                 if (!nsk)
1495                         goto discard;
1496
1497                 if (nsk != sk) {
1498                         if (tcp_child_process(sk, nsk, skb)) {
1499                                 rsk = nsk;
1500                                 goto reset;
1501                         }
1502                         return 0;
1503                 }
1504         }
1505
1506         TCP_CHECK_TIMER(sk);
1507         if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1508                 rsk = sk;
1509                 goto reset;
1510         }
1511         TCP_CHECK_TIMER(sk);
1512         return 0;
1513
1514 reset:
1515         tcp_v4_send_reset(rsk, skb);
1516 discard:
1517         kfree_skb(skb);
1518         /* Be careful here. If this function gets more complicated and
1519          * gcc suffers from register pressure on the x86, sk (in %ebx)
1520          * might be destroyed here. This current version compiles correctly,
1521          * but you have been warned.
1522          */
1523         return 0;
1524
1525 csum_err:
1526         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1527         goto discard;
1528 }
1529
1530 /*
1531  *      From tcp_input.c
1532  */
1533
1534 int tcp_v4_rcv(struct sk_buff *skb)
1535 {
1536         const struct iphdr *iph;
1537         struct tcphdr *th;
1538         struct sock *sk;
1539         int ret;
1540         struct net *net = dev_net(skb->dev);
1541
1542         if (skb->pkt_type != PACKET_HOST)
1543                 goto discard_it;
1544
1545         /* Count it even if it's bad */
1546         TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1547
1548         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1549                 goto discard_it;
1550
1551         th = tcp_hdr(skb);
1552
1553         if (th->doff < sizeof(struct tcphdr) / 4)
1554                 goto bad_packet;
1555         if (!pskb_may_pull(skb, th->doff * 4))
1556                 goto discard_it;
1557
1558         /* An explanation is required here, I think.
1559          * Packet length and doff are validated by header prediction,
1560          * provided case of th->doff==0 is eliminated.
1561          * So, we defer the checks. */
1562         if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1563                 goto bad_packet;
1564
1565         th = tcp_hdr(skb);
1566         iph = ip_hdr(skb);
1567         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1568         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1569                                     skb->len - th->doff * 4);
1570         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1571         TCP_SKB_CB(skb)->when    = 0;
1572         TCP_SKB_CB(skb)->flags   = iph->tos;
1573         TCP_SKB_CB(skb)->sacked  = 0;
1574
1575         sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1576         if (!sk)
1577                 goto no_tcp_socket;
1578
1579 process:
1580         if (sk->sk_state == TCP_TIME_WAIT)
1581                 goto do_time_wait;
1582
1583         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1584                 goto discard_and_relse;
1585         nf_reset(skb);
1586
1587         if (sk_filter(sk, skb))
1588                 goto discard_and_relse;
1589
1590         skb->dev = NULL;
1591
1592         bh_lock_sock_nested(sk);
1593         ret = 0;
1594         if (!sock_owned_by_user(sk)) {
1595 #ifdef CONFIG_NET_DMA
1596                 struct tcp_sock *tp = tcp_sk(sk);
1597                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1598                         tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1599                 if (tp->ucopy.dma_chan)
1600                         ret = tcp_v4_do_rcv(sk, skb);
1601                 else
1602 #endif
1603                 {
1604                         if (!tcp_prequeue(sk, skb))
1605                         ret = tcp_v4_do_rcv(sk, skb);
1606                 }
1607         } else
1608                 sk_add_backlog(sk, skb);
1609         bh_unlock_sock(sk);
1610
1611         sock_put(sk);
1612
1613         return ret;
1614
1615 no_tcp_socket:
1616         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1617                 goto discard_it;
1618
1619         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1620 bad_packet:
1621                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1622         } else {
1623                 tcp_v4_send_reset(NULL, skb);
1624         }
1625
1626 discard_it:
1627         /* Discard frame. */
1628         kfree_skb(skb);
1629         return 0;
1630
1631 discard_and_relse:
1632         sock_put(sk);
1633         goto discard_it;
1634
1635 do_time_wait:
1636         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1637                 inet_twsk_put(inet_twsk(sk));
1638                 goto discard_it;
1639         }
1640
1641         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1642                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1643                 inet_twsk_put(inet_twsk(sk));
1644                 goto discard_it;
1645         }
1646         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1647         case TCP_TW_SYN: {
1648                 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1649                                                         &tcp_hashinfo,
1650                                                         iph->daddr, th->dest,
1651                                                         inet_iif(skb));
1652                 if (sk2) {
1653                         inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1654                         inet_twsk_put(inet_twsk(sk));
1655                         sk = sk2;
1656                         goto process;
1657                 }
1658                 /* Fall through to ACK */
1659         }
1660         case TCP_TW_ACK:
1661                 tcp_v4_timewait_ack(sk, skb);
1662                 break;
1663         case TCP_TW_RST:
1664                 goto no_tcp_socket;
1665         case TCP_TW_SUCCESS:;
1666         }
1667         goto discard_it;
1668 }
1669
1670 /* VJ's idea. Save last timestamp seen from this destination
1671  * and hold it at least for normal timewait interval to use for duplicate
1672  * segment detection in subsequent connections, before they enter synchronized
1673  * state.
1674  */
1675
1676 int tcp_v4_remember_stamp(struct sock *sk)
1677 {
1678         struct inet_sock *inet = inet_sk(sk);
1679         struct tcp_sock *tp = tcp_sk(sk);
1680         struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1681         struct inet_peer *peer = NULL;
1682         int release_it = 0;
1683
1684         if (!rt || rt->rt_dst != inet->daddr) {
1685                 peer = inet_getpeer(inet->daddr, 1);
1686                 release_it = 1;
1687         } else {
1688                 if (!rt->peer)
1689                         rt_bind_peer(rt, 1);
1690                 peer = rt->peer;
1691         }
1692
1693         if (peer) {
1694                 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1695                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1696                      peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1697                         peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1698                         peer->tcp_ts = tp->rx_opt.ts_recent;
1699                 }
1700                 if (release_it)
1701                         inet_putpeer(peer);
1702                 return 1;
1703         }
1704
1705         return 0;
1706 }
1707
1708 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1709 {
1710         struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1711
1712         if (peer) {
1713                 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1714
1715                 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1716                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1717                      peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1718                         peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1719                         peer->tcp_ts       = tcptw->tw_ts_recent;
1720                 }
1721                 inet_putpeer(peer);
1722                 return 1;
1723         }
1724
1725         return 0;
1726 }
1727
1728 struct inet_connection_sock_af_ops ipv4_specific = {
1729         .queue_xmit        = ip_queue_xmit,
1730         .send_check        = tcp_v4_send_check,
1731         .rebuild_header    = inet_sk_rebuild_header,
1732         .conn_request      = tcp_v4_conn_request,
1733         .syn_recv_sock     = tcp_v4_syn_recv_sock,
1734         .remember_stamp    = tcp_v4_remember_stamp,
1735         .net_header_len    = sizeof(struct iphdr),
1736         .setsockopt        = ip_setsockopt,
1737         .getsockopt        = ip_getsockopt,
1738         .addr2sockaddr     = inet_csk_addr2sockaddr,
1739         .sockaddr_len      = sizeof(struct sockaddr_in),
1740         .bind_conflict     = inet_csk_bind_conflict,
1741 #ifdef CONFIG_COMPAT
1742         .compat_setsockopt = compat_ip_setsockopt,
1743         .compat_getsockopt = compat_ip_getsockopt,
1744 #endif
1745 };
1746
1747 #ifdef CONFIG_TCP_MD5SIG
1748 static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1749         .md5_lookup             = tcp_v4_md5_lookup,
1750         .calc_md5_hash          = tcp_v4_md5_hash_skb,
1751         .md5_add                = tcp_v4_md5_add_func,
1752         .md5_parse              = tcp_v4_parse_md5_keys,
1753 };
1754 #endif
1755
1756 /* NOTE: A lot of things set to zero explicitly by call to
1757  *       sk_alloc() so need not be done here.
1758  */
1759 static int tcp_v4_init_sock(struct sock *sk)
1760 {
1761         struct inet_connection_sock *icsk = inet_csk(sk);
1762         struct tcp_sock *tp = tcp_sk(sk);
1763
1764         skb_queue_head_init(&tp->out_of_order_queue);
1765         tcp_init_xmit_timers(sk);
1766         tcp_prequeue_init(tp);
1767
1768         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1769         tp->mdev = TCP_TIMEOUT_INIT;
1770
1771         /* So many TCP implementations out there (incorrectly) count the
1772          * initial SYN frame in their delayed-ACK and congestion control
1773          * algorithms that we must have the following bandaid to talk
1774          * efficiently to them.  -DaveM
1775          */
1776         tp->snd_cwnd = 2;
1777
1778         /* See draft-stevens-tcpca-spec-01 for discussion of the
1779          * initialization of these values.
1780          */
1781         tp->snd_ssthresh = 0x7fffffff;  /* Infinity */
1782         tp->snd_cwnd_clamp = ~0;
1783         tp->mss_cache = 536;
1784
1785         tp->reordering = sysctl_tcp_reordering;
1786         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1787
1788         sk->sk_state = TCP_CLOSE;
1789
1790         sk->sk_write_space = sk_stream_write_space;
1791         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1792
1793         icsk->icsk_af_ops = &ipv4_specific;
1794         icsk->icsk_sync_mss = tcp_sync_mss;
1795 #ifdef CONFIG_TCP_MD5SIG
1796         tp->af_specific = &tcp_sock_ipv4_specific;
1797 #endif
1798
1799         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1800         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1801
1802         local_bh_disable();
1803         percpu_counter_inc(&tcp_sockets_allocated);
1804         local_bh_enable();
1805
1806         return 0;
1807 }
1808
1809 void tcp_v4_destroy_sock(struct sock *sk)
1810 {
1811         struct tcp_sock *tp = tcp_sk(sk);
1812
1813         tcp_clear_xmit_timers(sk);
1814
1815         tcp_cleanup_congestion_control(sk);
1816
1817         /* Cleanup up the write buffer. */
1818         tcp_write_queue_purge(sk);
1819
1820         /* Cleans up our, hopefully empty, out_of_order_queue. */
1821         __skb_queue_purge(&tp->out_of_order_queue);
1822
1823 #ifdef CONFIG_TCP_MD5SIG
1824         /* Clean up the MD5 key list, if any */
1825         if (tp->md5sig_info) {
1826                 tcp_v4_clear_md5_list(sk);
1827                 kfree(tp->md5sig_info);
1828                 tp->md5sig_info = NULL;
1829         }
1830 #endif
1831
1832 #ifdef CONFIG_NET_DMA
1833         /* Cleans up our sk_async_wait_queue */
1834         __skb_queue_purge(&sk->sk_async_wait_queue);
1835 #endif
1836
1837         /* Clean prequeue, it must be empty really */
1838         __skb_queue_purge(&tp->ucopy.prequeue);
1839
1840         /* Clean up a referenced TCP bind bucket. */
1841         if (inet_csk(sk)->icsk_bind_hash)
1842                 inet_put_port(sk);
1843
1844         /*
1845          * If sendmsg cached page exists, toss it.
1846          */
1847         if (sk->sk_sndmsg_page) {
1848                 __free_page(sk->sk_sndmsg_page);
1849                 sk->sk_sndmsg_page = NULL;
1850         }
1851
1852         percpu_counter_dec(&tcp_sockets_allocated);
1853 }
1854
1855 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1856
1857 #ifdef CONFIG_PROC_FS
1858 /* Proc filesystem TCP sock list dumping. */
1859
1860 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1861 {
1862         return hlist_nulls_empty(head) ? NULL :
1863                 list_entry(head->first, struct inet_timewait_sock, tw_node);
1864 }
1865
1866 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1867 {
1868         return !is_a_nulls(tw->tw_node.next) ?
1869                 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1870 }
1871
1872 static void *listening_get_next(struct seq_file *seq, void *cur)
1873 {
1874         struct inet_connection_sock *icsk;
1875         struct hlist_nulls_node *node;
1876         struct sock *sk = cur;
1877         struct inet_listen_hashbucket *ilb;
1878         struct tcp_iter_state *st = seq->private;
1879         struct net *net = seq_file_net(seq);
1880
1881         if (!sk) {
1882                 st->bucket = 0;
1883                 ilb = &tcp_hashinfo.listening_hash[0];
1884                 spin_lock_bh(&ilb->lock);
1885                 sk = sk_nulls_head(&ilb->head);
1886                 goto get_sk;
1887         }
1888         ilb = &tcp_hashinfo.listening_hash[st->bucket];
1889         ++st->num;
1890
1891         if (st->state == TCP_SEQ_STATE_OPENREQ) {
1892                 struct request_sock *req = cur;
1893
1894                 icsk = inet_csk(st->syn_wait_sk);
1895                 req = req->dl_next;
1896                 while (1) {
1897                         while (req) {
1898                                 if (req->rsk_ops->family == st->family) {
1899                                         cur = req;
1900                                         goto out;
1901                                 }
1902                                 req = req->dl_next;
1903                         }
1904                         if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1905                                 break;
1906 get_req:
1907                         req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1908                 }
1909                 sk        = sk_next(st->syn_wait_sk);
1910                 st->state = TCP_SEQ_STATE_LISTENING;
1911                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1912         } else {
1913                 icsk = inet_csk(sk);
1914                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1915                 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1916                         goto start_req;
1917                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1918                 sk = sk_next(sk);
1919         }
1920 get_sk:
1921         sk_nulls_for_each_from(sk, node) {
1922                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1923                         cur = sk;
1924                         goto out;
1925                 }
1926                 icsk = inet_csk(sk);
1927                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1928                 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1929 start_req:
1930                         st->uid         = sock_i_uid(sk);
1931                         st->syn_wait_sk = sk;
1932                         st->state       = TCP_SEQ_STATE_OPENREQ;
1933                         st->sbucket     = 0;
1934                         goto get_req;
1935                 }
1936                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1937         }
1938         spin_unlock_bh(&ilb->lock);
1939         if (++st->bucket < INET_LHTABLE_SIZE) {
1940                 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1941                 spin_lock_bh(&ilb->lock);
1942                 sk = sk_nulls_head(&ilb->head);
1943                 goto get_sk;
1944         }
1945         cur = NULL;
1946 out:
1947         return cur;
1948 }
1949
1950 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1951 {
1952         void *rc = listening_get_next(seq, NULL);
1953
1954         while (rc && *pos) {
1955                 rc = listening_get_next(seq, rc);
1956                 --*pos;
1957         }
1958         return rc;
1959 }
1960
1961 static inline int empty_bucket(struct tcp_iter_state *st)
1962 {
1963         return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
1964                 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
1965 }
1966
1967 static void *established_get_first(struct seq_file *seq)
1968 {
1969         struct tcp_iter_state *st = seq->private;
1970         struct net *net = seq_file_net(seq);
1971         void *rc = NULL;
1972
1973         for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1974                 struct sock *sk;
1975                 struct hlist_nulls_node *node;
1976                 struct inet_timewait_sock *tw;
1977                 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1978
1979                 /* Lockless fast path for the common case of empty buckets */
1980                 if (empty_bucket(st))
1981                         continue;
1982
1983                 spin_lock_bh(lock);
1984                 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1985                         if (sk->sk_family != st->family ||
1986                             !net_eq(sock_net(sk), net)) {
1987                                 continue;
1988                         }
1989                         rc = sk;
1990                         goto out;
1991                 }
1992                 st->state = TCP_SEQ_STATE_TIME_WAIT;
1993                 inet_twsk_for_each(tw, node,
1994                                    &tcp_hashinfo.ehash[st->bucket].twchain) {
1995                         if (tw->tw_family != st->family ||
1996                             !net_eq(twsk_net(tw), net)) {
1997                                 continue;
1998                         }
1999                         rc = tw;
2000                         goto out;
2001                 }
2002                 spin_unlock_bh(lock);
2003                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2004         }
2005 out:
2006         return rc;
2007 }
2008
2009 static void *established_get_next(struct seq_file *seq, void *cur)
2010 {
2011         struct sock *sk = cur;
2012         struct inet_timewait_sock *tw;
2013         struct hlist_nulls_node *node;
2014         struct tcp_iter_state *st = seq->private;
2015         struct net *net = seq_file_net(seq);
2016
2017         ++st->num;
2018
2019         if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2020                 tw = cur;
2021                 tw = tw_next(tw);
2022 get_tw:
2023                 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2024                         tw = tw_next(tw);
2025                 }
2026                 if (tw) {
2027                         cur = tw;
2028                         goto out;
2029                 }
2030                 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2031                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2032
2033                 /* Look for next non empty bucket */
2034                 while (++st->bucket < tcp_hashinfo.ehash_size &&
2035                                 empty_bucket(st))
2036                         ;
2037                 if (st->bucket >= tcp_hashinfo.ehash_size)
2038                         return NULL;
2039
2040                 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2041                 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2042         } else
2043                 sk = sk_nulls_next(sk);
2044
2045         sk_nulls_for_each_from(sk, node) {
2046                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2047                         goto found;
2048         }
2049
2050         st->state = TCP_SEQ_STATE_TIME_WAIT;
2051         tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2052         goto get_tw;
2053 found:
2054         cur = sk;
2055 out:
2056         return cur;
2057 }
2058
2059 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2060 {
2061         void *rc = established_get_first(seq);
2062
2063         while (rc && pos) {
2064                 rc = established_get_next(seq, rc);
2065                 --pos;
2066         }
2067         return rc;
2068 }
2069
2070 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2071 {
2072         void *rc;
2073         struct tcp_iter_state *st = seq->private;
2074
2075         st->state = TCP_SEQ_STATE_LISTENING;
2076         rc        = listening_get_idx(seq, &pos);
2077
2078         if (!rc) {
2079                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2080                 rc        = established_get_idx(seq, pos);
2081         }
2082
2083         return rc;
2084 }
2085
2086 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2087 {
2088         struct tcp_iter_state *st = seq->private;
2089         st->state = TCP_SEQ_STATE_LISTENING;
2090         st->num = 0;
2091         return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2092 }
2093
2094 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2095 {
2096         void *rc = NULL;
2097         struct tcp_iter_state *st;
2098
2099         if (v == SEQ_START_TOKEN) {
2100                 rc = tcp_get_idx(seq, 0);
2101                 goto out;
2102         }
2103         st = seq->private;
2104
2105         switch (st->state) {
2106         case TCP_SEQ_STATE_OPENREQ:
2107         case TCP_SEQ_STATE_LISTENING:
2108                 rc = listening_get_next(seq, v);
2109                 if (!rc) {
2110                         st->state = TCP_SEQ_STATE_ESTABLISHED;
2111                         rc        = established_get_first(seq);
2112                 }
2113                 break;
2114         case TCP_SEQ_STATE_ESTABLISHED:
2115         case TCP_SEQ_STATE_TIME_WAIT:
2116                 rc = established_get_next(seq, v);
2117                 break;
2118         }
2119 out:
2120         ++*pos;
2121         return rc;
2122 }
2123
2124 static void tcp_seq_stop(struct seq_file *seq, void *v)
2125 {
2126         struct tcp_iter_state *st = seq->private;
2127
2128         switch (st->state) {
2129         case TCP_SEQ_STATE_OPENREQ:
2130                 if (v) {
2131                         struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2132                         read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2133                 }
2134         case TCP_SEQ_STATE_LISTENING:
2135                 if (v != SEQ_START_TOKEN)
2136                         spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2137                 break;
2138         case TCP_SEQ_STATE_TIME_WAIT:
2139         case TCP_SEQ_STATE_ESTABLISHED:
2140                 if (v)
2141                         spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2142                 break;
2143         }
2144 }
2145
2146 static int tcp_seq_open(struct inode *inode, struct file *file)
2147 {
2148         struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2149         struct tcp_iter_state *s;
2150         int err;
2151
2152         err = seq_open_net(inode, file, &afinfo->seq_ops,
2153                           sizeof(struct tcp_iter_state));
2154         if (err < 0)
2155                 return err;
2156
2157         s = ((struct seq_file *)file->private_data)->private;
2158         s->family               = afinfo->family;
2159         return 0;
2160 }
2161
2162 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2163 {
2164         int rc = 0;
2165         struct proc_dir_entry *p;
2166
2167         afinfo->seq_fops.open           = tcp_seq_open;
2168         afinfo->seq_fops.read           = seq_read;
2169         afinfo->seq_fops.llseek         = seq_lseek;
2170         afinfo->seq_fops.release        = seq_release_net;
2171
2172         afinfo->seq_ops.start           = tcp_seq_start;
2173         afinfo->seq_ops.next            = tcp_seq_next;
2174         afinfo->seq_ops.stop            = tcp_seq_stop;
2175
2176         p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2177                              &afinfo->seq_fops, afinfo);
2178         if (!p)
2179                 rc = -ENOMEM;
2180         return rc;
2181 }
2182
2183 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2184 {
2185         proc_net_remove(net, afinfo->name);
2186 }
2187
2188 static void get_openreq4(struct sock *sk, struct request_sock *req,
2189                          struct seq_file *f, int i, int uid, int *len)
2190 {
2191         const struct inet_request_sock *ireq = inet_rsk(req);
2192         int ttd = req->expires - jiffies;
2193
2194         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2195                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2196                 i,
2197                 ireq->loc_addr,
2198                 ntohs(inet_sk(sk)->sport),
2199                 ireq->rmt_addr,
2200                 ntohs(ireq->rmt_port),
2201                 TCP_SYN_RECV,
2202                 0, 0, /* could print option size, but that is af dependent. */
2203                 1,    /* timers active (only the expire timer) */
2204                 jiffies_to_clock_t(ttd),
2205                 req->retrans,
2206                 uid,
2207                 0,  /* non standard timer */
2208                 0, /* open_requests have no inode */
2209                 atomic_read(&sk->sk_refcnt),
2210                 req,
2211                 len);
2212 }
2213
2214 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2215 {
2216         int timer_active;
2217         unsigned long timer_expires;
2218         struct tcp_sock *tp = tcp_sk(sk);
2219         const struct inet_connection_sock *icsk = inet_csk(sk);
2220         struct inet_sock *inet = inet_sk(sk);
2221         __be32 dest = inet->daddr;
2222         __be32 src = inet->rcv_saddr;
2223         __u16 destp = ntohs(inet->dport);
2224         __u16 srcp = ntohs(inet->sport);
2225
2226         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2227                 timer_active    = 1;
2228                 timer_expires   = icsk->icsk_timeout;
2229         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2230                 timer_active    = 4;
2231                 timer_expires   = icsk->icsk_timeout;
2232         } else if (timer_pending(&sk->sk_timer)) {
2233                 timer_active    = 2;
2234                 timer_expires   = sk->sk_timer.expires;
2235         } else {
2236                 timer_active    = 0;
2237                 timer_expires = jiffies;
2238         }
2239
2240         seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2241                         "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2242                 i, src, srcp, dest, destp, sk->sk_state,
2243                 tp->write_seq - tp->snd_una,
2244                 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2245                                              (tp->rcv_nxt - tp->copied_seq),
2246                 timer_active,
2247                 jiffies_to_clock_t(timer_expires - jiffies),
2248                 icsk->icsk_retransmits,
2249                 sock_i_uid(sk),
2250                 icsk->icsk_probes_out,
2251                 sock_i_ino(sk),
2252                 atomic_read(&sk->sk_refcnt), sk,
2253                 jiffies_to_clock_t(icsk->icsk_rto),
2254                 jiffies_to_clock_t(icsk->icsk_ack.ato),
2255                 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2256                 tp->snd_cwnd,
2257                 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
2258                 len);
2259 }
2260
2261 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2262                                struct seq_file *f, int i, int *len)
2263 {
2264         __be32 dest, src;
2265         __u16 destp, srcp;
2266         int ttd = tw->tw_ttd - jiffies;
2267
2268         if (ttd < 0)
2269                 ttd = 0;
2270
2271         dest  = tw->tw_daddr;
2272         src   = tw->tw_rcv_saddr;
2273         destp = ntohs(tw->tw_dport);
2274         srcp  = ntohs(tw->tw_sport);
2275
2276         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2277                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2278                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2279                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2280                 atomic_read(&tw->tw_refcnt), tw, len);
2281 }
2282
2283 #define TMPSZ 150
2284
2285 static int tcp4_seq_show(struct seq_file *seq, void *v)
2286 {
2287         struct tcp_iter_state *st;
2288         int len;
2289
2290         if (v == SEQ_START_TOKEN) {
2291                 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2292                            "  sl  local_address rem_address   st tx_queue "
2293                            "rx_queue tr tm->when retrnsmt   uid  timeout "
2294                            "inode");
2295                 goto out;
2296         }
2297         st = seq->private;
2298
2299         switch (st->state) {
2300         case TCP_SEQ_STATE_LISTENING:
2301         case TCP_SEQ_STATE_ESTABLISHED:
2302                 get_tcp4_sock(v, seq, st->num, &len);
2303                 break;
2304         case TCP_SEQ_STATE_OPENREQ:
2305                 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2306                 break;
2307         case TCP_SEQ_STATE_TIME_WAIT:
2308                 get_timewait4_sock(v, seq, st->num, &len);
2309                 break;
2310         }
2311         seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2312 out:
2313         return 0;
2314 }
2315
2316 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2317         .name           = "tcp",
2318         .family         = AF_INET,
2319         .seq_fops       = {
2320                 .owner          = THIS_MODULE,
2321         },
2322         .seq_ops        = {
2323                 .show           = tcp4_seq_show,
2324         },
2325 };
2326
2327 static int tcp4_proc_init_net(struct net *net)
2328 {
2329         return tcp_proc_register(net, &tcp4_seq_afinfo);
2330 }
2331
2332 static void tcp4_proc_exit_net(struct net *net)
2333 {
2334         tcp_proc_unregister(net, &tcp4_seq_afinfo);
2335 }
2336
2337 static struct pernet_operations tcp4_net_ops = {
2338         .init = tcp4_proc_init_net,
2339         .exit = tcp4_proc_exit_net,
2340 };
2341
2342 int __init tcp4_proc_init(void)
2343 {
2344         return register_pernet_subsys(&tcp4_net_ops);
2345 }
2346
2347 void tcp4_proc_exit(void)
2348 {
2349         unregister_pernet_subsys(&tcp4_net_ops);
2350 }
2351 #endif /* CONFIG_PROC_FS */
2352
2353 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2354 {
2355         struct iphdr *iph = ip_hdr(skb);
2356
2357         switch (skb->ip_summed) {
2358         case CHECKSUM_COMPLETE:
2359                 if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr,
2360                                   skb->csum)) {
2361                         skb->ip_summed = CHECKSUM_UNNECESSARY;
2362                         break;
2363                 }
2364
2365                 /* fall through */
2366         case CHECKSUM_NONE:
2367                 NAPI_GRO_CB(skb)->flush = 1;
2368                 return NULL;
2369         }
2370
2371         return tcp_gro_receive(head, skb);
2372 }
2373 EXPORT_SYMBOL(tcp4_gro_receive);
2374
2375 int tcp4_gro_complete(struct sk_buff *skb)
2376 {
2377         struct iphdr *iph = ip_hdr(skb);
2378         struct tcphdr *th = tcp_hdr(skb);
2379
2380         th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2381                                   iph->saddr, iph->daddr, 0);
2382         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2383
2384         return tcp_gro_complete(skb);
2385 }
2386 EXPORT_SYMBOL(tcp4_gro_complete);
2387
2388 struct proto tcp_prot = {
2389         .name                   = "TCP",
2390         .owner                  = THIS_MODULE,
2391         .close                  = tcp_close,
2392         .connect                = tcp_v4_connect,
2393         .disconnect             = tcp_disconnect,
2394         .accept                 = inet_csk_accept,
2395         .ioctl                  = tcp_ioctl,
2396         .init                   = tcp_v4_init_sock,
2397         .destroy                = tcp_v4_destroy_sock,
2398         .shutdown               = tcp_shutdown,
2399         .setsockopt             = tcp_setsockopt,
2400         .getsockopt             = tcp_getsockopt,
2401         .recvmsg                = tcp_recvmsg,
2402         .backlog_rcv            = tcp_v4_do_rcv,
2403         .hash                   = inet_hash,
2404         .unhash                 = inet_unhash,
2405         .get_port               = inet_csk_get_port,
2406         .enter_memory_pressure  = tcp_enter_memory_pressure,
2407         .sockets_allocated      = &tcp_sockets_allocated,
2408         .orphan_count           = &tcp_orphan_count,
2409         .memory_allocated       = &tcp_memory_allocated,
2410         .memory_pressure        = &tcp_memory_pressure,
2411         .sysctl_mem             = sysctl_tcp_mem,
2412         .sysctl_wmem            = sysctl_tcp_wmem,
2413         .sysctl_rmem            = sysctl_tcp_rmem,
2414         .max_header             = MAX_TCP_HEADER,
2415         .obj_size               = sizeof(struct tcp_sock),
2416         .slab_flags             = SLAB_DESTROY_BY_RCU,
2417         .twsk_prot              = &tcp_timewait_sock_ops,
2418         .rsk_prot               = &tcp_request_sock_ops,
2419         .h.hashinfo             = &tcp_hashinfo,
2420 #ifdef CONFIG_COMPAT
2421         .compat_setsockopt      = compat_tcp_setsockopt,
2422         .compat_getsockopt      = compat_tcp_getsockopt,
2423 #endif
2424 };
2425
2426
2427 static int __net_init tcp_sk_init(struct net *net)
2428 {
2429         return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2430                                     PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2431 }
2432
2433 static void __net_exit tcp_sk_exit(struct net *net)
2434 {
2435         inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2436         inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
2437 }
2438
2439 static struct pernet_operations __net_initdata tcp_sk_ops = {
2440        .init = tcp_sk_init,
2441        .exit = tcp_sk_exit,
2442 };
2443
2444 void __init tcp_v4_init(void)
2445 {
2446         inet_hashinfo_init(&tcp_hashinfo);
2447         if (register_pernet_subsys(&tcp_sk_ops))
2448                 panic("Failed to create the TCP control socket.\n");
2449 }
2450
2451 EXPORT_SYMBOL(ipv4_specific);
2452 EXPORT_SYMBOL(tcp_hashinfo);
2453 EXPORT_SYMBOL(tcp_prot);
2454 EXPORT_SYMBOL(tcp_v4_conn_request);
2455 EXPORT_SYMBOL(tcp_v4_connect);
2456 EXPORT_SYMBOL(tcp_v4_do_rcv);
2457 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2458 EXPORT_SYMBOL(tcp_v4_send_check);
2459 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2460
2461 #ifdef CONFIG_PROC_FS
2462 EXPORT_SYMBOL(tcp_proc_register);
2463 EXPORT_SYMBOL(tcp_proc_unregister);
2464 #endif
2465 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2466