release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_ipv4.c

   1 /* Modified by Broadcom Corp. Portions Copyright (c) Broadcom Corp, 2012. */
   2 /*
   3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4  *              operating system.  INET is implemented using the  BSD Socket
   5  *              interface as the means of communication with the user level.
   6  *
   7  *              Implementation of the Transmission Control Protocol(TCP).
   8  *
   9  *              IPv4 specific functions
  10  *
  11  *
  12  *              code split from:
  13  *              linux/ipv4/tcp.c
  14  *              linux/ipv4/tcp_input.c
  15  *              linux/ipv4/tcp_output.c
  16  *
  17  *              See tcp.c for author information
  18  *
  19  *      This program is free software; you can redistribute it and/or
  20  *      modify it under the terms of the GNU General Public License
  21  *      as published by the Free Software Foundation; either version
  22  *      2 of the License, or (at your option) any later version.
  23  */
  24
  25 /*
  26  * Changes:
  27  *              David S. Miller :       New socket lookup architecture.
  28  *                                      This code is dedicated to John Dyson.
  29  *              David S. Miller :       Change semantics of established hash,
  30  *                                      half is devoted to TIME_WAIT sockets
  31  *                                      and the rest go in the other half.
  32  *              Andi Kleen :            Add support for syncookies and fixed
  33  *                                      some bugs: ip options weren't passed to
  34  *                                      the TCP layer, missed a check for an
  35  *                                      ACK bit.
  36  *              Andi Kleen :            Implemented fast path mtu discovery.
  37  *                                      Fixed many serious bugs in the
  38  *                                      request_sock handling and moved
  39  *                                      most of it into the af independent code.
  40  *                                      Added tail drop and some other bugfixes.
  41  *                                      Added new listen semantics.
  42  *              Mike McLagan    :       Routing by source
  43  *      Juan Jose Ciarlante:            ip_dynaddr bits
  44  *              Andi Kleen:             various fixes.
  45  *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  46  *                                      coma.
  47  *      Andi Kleen              :       Fix new listen.
  48  *      Andi Kleen              :       Fix accept error reporting.
  49  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  50  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  51  *                                      a single port at the same time.
  52  */
  53
  54
  55 #include <linux/bottom_half.h>
  56 #include <linux/types.h>
  57 #include <linux/fcntl.h>
  58 #include <linux/module.h>
  59 #include <linux/random.h>
  60 #include <linux/cache.h>
  61 #include <linux/jhash.h>
  62 #include <linux/init.h>
  63 #include <linux/times.h>
  64 #include <linux/slab.h>
  65
  66 #include <net/net_namespace.h>
  67 #include <net/icmp.h>
  68 #include <net/inet_hashtables.h>
  69 #include <net/tcp.h>
  70 #include <net/transp_v6.h>
  71 #include <net/ipv6.h>
  72 #include <net/inet_common.h>
  73 #include <net/timewait_sock.h>
  74 #include <net/xfrm.h>
  75 #include <net/netdma.h>
  76
  77 #include <linux/inet.h>
  78 #include <linux/ipv6.h>
  79 #include <linux/stddef.h>
  80 #include <linux/proc_fs.h>
  81 #include <linux/seq_file.h>
  82
  83 #include <linux/crypto.h>
  84 #include <linux/scatterlist.h>
  85
  86 #include <typedefs.h>
  87 #include <bcmdefs.h>
  88
  89 int sysctl_tcp_tw_reuse __read_mostly;
  90 int sysctl_tcp_low_latency __read_mostly;
  91 EXPORT_SYMBOL(sysctl_tcp_low_latency);
  92
  93
  94 #ifdef CONFIG_TCP_MD5SIG
  95 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  96                                                    __be32 addr);
  97 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
  98                                __be32 daddr, __be32 saddr, struct tcphdr *th);
  99 #else
 100 static inline
 101 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 102 {
 103         return NULL;
 104 }
 105 #endif
 106
 107 struct inet_hashinfo tcp_hashinfo;
 108 EXPORT_SYMBOL(tcp_hashinfo);
 109
 110 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 111 {
 112         return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 113                                           ip_hdr(skb)->saddr,
 114                                           tcp_hdr(skb)->dest,
 115                                           tcp_hdr(skb)->source);
 116 }
 117
 118 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 119 {
 120         const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 121         struct tcp_sock *tp = tcp_sk(sk);
 122
 123         /* With PAWS, it is safe from the viewpoint
 124            of data integrity. Even without PAWS it is safe provided sequence
 125            spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 126
 127            Actually, the idea is close to VJ's one, only timestamp cache is
 128            held not per host, but per port pair and TW bucket is used as state
 129            holder.
 130
 131            If TW bucket has been already destroyed we fall back to VJ's scheme
 132            and use initial timestamp retrieved from peer table.
 133          */
 134         if (tcptw->tw_ts_recent_stamp &&
 135             (twp == NULL || (sysctl_tcp_tw_reuse &&
 136                              get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 137                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 138                 if (tp->write_seq == 0)
 139                         tp->write_seq = 1;
 140                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 141                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 142                 sock_hold(sktw);
 143                 return 1;
 144         }
 145
 146         return 0;
 147 }
 148 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 149
 150 /* This will initiate an outgoing connection. */
 151 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 152 {
 153         struct inet_sock *inet = inet_sk(sk);
 154         struct tcp_sock *tp = tcp_sk(sk);
 155         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 156         struct rtable *rt;
 157         __be32 daddr, nexthop;
 158         int tmp;
 159         int err;
 160
 161         if (addr_len < sizeof(struct sockaddr_in))
 162                 return -EINVAL;
 163
 164         if (usin->sin_family != AF_INET)
 165                 return -EAFNOSUPPORT;
 166
 167         nexthop = daddr = usin->sin_addr.s_addr;
 168         if (inet->opt && inet->opt->srr) {
 169                 if (!daddr)
 170                         return -EINVAL;
 171                 nexthop = inet->opt->faddr;
 172         }
 173
 174         tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
 175                                RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 176                                IPPROTO_TCP,
 177                                inet->inet_sport, usin->sin_port, sk, 1);
 178         if (tmp < 0) {
 179                 if (tmp == -ENETUNREACH)
 180                         IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 181                 return tmp;
 182         }
 183
 184         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 185                 ip_rt_put(rt);
 186                 return -ENETUNREACH;
 187         }
 188
 189         if (!inet->opt || !inet->opt->srr)
 190                 daddr = rt->rt_dst;
 191
 192         if (!inet->inet_saddr)
 193                 inet->inet_saddr = rt->rt_src;
 194         inet->inet_rcv_saddr = inet->inet_saddr;
 195
 196         if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 197                 /* Reset inherited state */
 198                 tp->rx_opt.ts_recent       = 0;
 199                 tp->rx_opt.ts_recent_stamp = 0;
 200                 tp->write_seq              = 0;
 201         }
 202
 203         if (tcp_death_row.sysctl_tw_recycle &&
 204             !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 205                 struct inet_peer *peer = rt_get_peer(rt);
 206                 /*
 207                  * VJ's idea. We save last timestamp seen from
 208                  * the destination in peer table, when entering state
 209                  * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 210                  * when trying new connection.
 211                  */
 212                 if (peer) {
 213                         inet_peer_refcheck(peer);
 214                         if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
 215                                 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 216                                 tp->rx_opt.ts_recent = peer->tcp_ts;
 217                         }
 218                 }
 219         }
 220
 221         inet->inet_dport = usin->sin_port;
 222         inet->inet_daddr = daddr;
 223
 224         inet_csk(sk)->icsk_ext_hdr_len = 0;
 225         if (inet->opt)
 226                 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 227
 228         tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 229
 230         /* Socket identity is still unknown (sport may be zero).
 231          * However we set state to SYN-SENT and not releasing socket
 232          * lock select source port, enter ourselves into the hash tables and
 233          * complete initialization after this.
 234          */
 235         tcp_set_state(sk, TCP_SYN_SENT);
 236         err = inet_hash_connect(&tcp_death_row, sk);
 237         if (err)
 238                 goto failure;
 239
 240         err = ip_route_newports(&rt, IPPROTO_TCP,
 241                                 inet->inet_sport, inet->inet_dport, sk);
 242         if (err)
 243                 goto failure;
 244
 245         /* OK, now commit destination to socket.  */
 246         sk->sk_gso_type = SKB_GSO_TCPV4;
 247         sk_setup_caps(sk, &rt->dst);
 248
 249         if (!tp->write_seq)
 250                 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 251                                                            inet->inet_daddr,
 252                                                            inet->inet_sport,
 253                                                            usin->sin_port);
 254
 255         inet->inet_id = tp->write_seq ^ jiffies;
 256
 257         err = tcp_connect(sk);
 258         rt = NULL;
 259         if (err)
 260                 goto failure;
 261
 262         return 0;
 263
 264 failure:
 265         /*
 266          * This unhashes the socket and releases the local port,
 267          * if necessary.
 268          */
 269         tcp_set_state(sk, TCP_CLOSE);
 270         ip_rt_put(rt);
 271         sk->sk_route_caps = 0;
 272         inet->inet_dport = 0;
 273         return err;
 274 }
 275 EXPORT_SYMBOL(tcp_v4_connect);
 276
 277 /*
 278  * This routine does path mtu discovery as defined in RFC1191.
 279  */
 280 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 281 {
 282         struct dst_entry *dst;
 283         struct inet_sock *inet = inet_sk(sk);
 284
 285         /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 286          * send out by Linux are always <576bytes so they should go through
 287          * unfragmented).
 288          */
 289         if (sk->sk_state == TCP_LISTEN)
 290                 return;
 291
 292         /* We don't check in the destentry if pmtu discovery is forbidden
 293          * on this route. We just assume that no packet_to_big packets
 294          * are send back when pmtu discovery is not active.
 295          * There is a small race when the user changes this flag in the
 296          * route, but I think that's acceptable.
 297          */
 298         if ((dst = __sk_dst_check(sk, 0)) == NULL)
 299                 return;
 300
 301         dst->ops->update_pmtu(dst, mtu);
 302
 303         /* Something is about to be wrong... Remember soft error
 304          * for the case, if this connection will not able to recover.
 305          */
 306         if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 307                 sk->sk_err_soft = EMSGSIZE;
 308
 309         mtu = dst_mtu(dst);
 310
 311         if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 312             inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 313                 tcp_sync_mss(sk, mtu);
 314
 315                 /* Resend the TCP packet because it's
 316                  * clear that the old packet has been
 317                  * dropped. This is the new "fast" path mtu
 318                  * discovery.
 319                  */
 320                 tcp_simple_retransmit(sk);
 321         } /* else let the usual retransmit timer handle it */
 322 }
 323
 324 /*
 325  * This routine is called by the ICMP module when it gets some
 326  * sort of error condition.  If err < 0 then the socket should
 327  * be closed and the error returned to the user.  If err > 0
 328  * it's just the icmp type << 8 | icmp code.  After adjustment
 329  * header points to the first 8 bytes of the tcp header.  We need
 330  * to find the appropriate port.
 331  *
 332  * The locking strategy used here is very "optimistic". When
 333  * someone else accesses the socket the ICMP is just dropped
 334  * and for some paths there is no check at all.
 335  * A more general error queue to queue errors for later handling
 336  * is probably better.
 337  *
 338  */
 339
 340 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 341 {
 342         struct iphdr *iph = (struct iphdr *)icmp_skb->data;
 343         struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 344         struct inet_connection_sock *icsk;
 345         struct tcp_sock *tp;
 346         struct inet_sock *inet;
 347         const int type = icmp_hdr(icmp_skb)->type;
 348         const int code = icmp_hdr(icmp_skb)->code;
 349         struct sock *sk;
 350         struct sk_buff *skb;
 351         __u32 seq;
 352         __u32 remaining;
 353         int err;
 354         struct net *net = dev_net(icmp_skb->dev);
 355
 356         if (icmp_skb->len < (iph->ihl << 2) + 8) {
 357                 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 358                 return;
 359         }
 360
 361         sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 362                         iph->saddr, th->source, inet_iif(icmp_skb));
 363         if (!sk) {
 364                 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 365                 return;
 366         }
 367         if (sk->sk_state == TCP_TIME_WAIT) {
 368                 inet_twsk_put(inet_twsk(sk));
 369                 return;
 370         }
 371
 372         bh_lock_sock(sk);
 373         /* If too many ICMPs get dropped on busy
 374          * servers this needs to be solved differently.
 375          */
 376         if (sock_owned_by_user(sk))
 377                 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 378
 379         if (sk->sk_state == TCP_CLOSE)
 380                 goto out;
 381
 382         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 383                 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 384                 goto out;
 385         }
 386
 387         icsk = inet_csk(sk);
 388         tp = tcp_sk(sk);
 389         seq = ntohl(th->seq);
 390         if (sk->sk_state != TCP_LISTEN &&
 391             !between(seq, tp->snd_una, tp->snd_nxt)) {
 392                 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 393                 goto out;
 394         }
 395
 396         switch (type) {
 397         case ICMP_SOURCE_QUENCH:
 398                 /* Just silently ignore these. */
 399                 goto out;
 400         case ICMP_PARAMETERPROB:
 401                 err = EPROTO;
 402                 break;
 403         case ICMP_DEST_UNREACH:
 404                 if (code > NR_ICMP_UNREACH)
 405                         goto out;
 406
 407                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 408                         if (!sock_owned_by_user(sk))
 409                                 do_pmtu_discovery(sk, iph, info);
 410                         goto out;
 411                 }
 412
 413                 err = icmp_err_convert[code].errno;
 414                 /* check if icmp_skb allows revert of backoff
 415                  * (see draft-zimmermann-tcp-lcd) */
 416                 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 417                         break;
 418                 if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 419                     !icsk->icsk_backoff)
 420                         break;
 421
 422                 if (sock_owned_by_user(sk))
 423                         break;
 424
 425                 icsk->icsk_backoff--;
 426                 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
 427                                          icsk->icsk_backoff;
 428                 tcp_bound_rto(sk);
 429
 430                 skb = tcp_write_queue_head(sk);
 431                 BUG_ON(!skb);
 432
 433                 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 434                                 tcp_time_stamp - TCP_SKB_CB(skb)->when);
 435
 436                 if (remaining) {
 437                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 438                                                   remaining, TCP_RTO_MAX);
 439                 } else {
 440                         /* RTO revert clocked out retransmission.
 441                          * Will retransmit now */
 442                         tcp_retransmit_timer(sk);
 443                 }
 444
 445                 break;
 446         case ICMP_TIME_EXCEEDED:
 447                 err = EHOSTUNREACH;
 448                 break;
 449         default:
 450                 goto out;
 451         }
 452
 453         switch (sk->sk_state) {
 454                 struct request_sock *req, **prev;
 455         case TCP_LISTEN:
 456                 if (sock_owned_by_user(sk))
 457                         goto out;
 458
 459                 req = inet_csk_search_req(sk, &prev, th->dest,
 460                                           iph->daddr, iph->saddr);
 461                 if (!req)
 462                         goto out;
 463
 464                 /* ICMPs are not backlogged, hence we cannot get
 465                    an established socket here.
 466                  */
 467                 WARN_ON(req->sk);
 468
 469                 if (seq != tcp_rsk(req)->snt_isn) {
 470                         NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 471                         goto out;
 472                 }
 473
 474                 /*
 475                  * Still in SYN_RECV, just remove it silently.
 476                  * There is no good way to pass the error to the newly
 477                  * created socket, and POSIX does not want network
 478                  * errors returned from accept().
 479                  */
 480                 inet_csk_reqsk_queue_drop(sk, req, prev);
 481                 goto out;
 482
 483         case TCP_SYN_SENT:
 484         case TCP_SYN_RECV:  /* Cannot happen.
 485                                It can f.e. if SYNs crossed.
 486                              */
 487                 if (!sock_owned_by_user(sk)) {
 488                         sk->sk_err = err;
 489
 490                         sk->sk_error_report(sk);
 491
 492                         tcp_done(sk);
 493                 } else {
 494                         sk->sk_err_soft = err;
 495                 }
 496                 goto out;
 497         }
 498
 499         /* If we've already connected we will keep trying
 500          * until we time out, or the user gives up.
 501          *
 502          * rfc1122 4.2.3.9 allows to consider as hard errors
 503          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 504          * but it is obsoleted by pmtu discovery).
 505          *
 506          * Note, that in modern internet, where routing is unreliable
 507          * and in each dark corner broken firewalls sit, sending random
 508          * errors ordered by their masters even this two messages finally lose
 509          * their original sense (even Linux sends invalid PORT_UNREACHs)
 510          *
 511          * Now we are in compliance with RFCs.
 512          *                                                      --ANK (980905)
 513          */
 514
 515         inet = inet_sk(sk);
 516         if (!sock_owned_by_user(sk) && inet->recverr) {
 517                 sk->sk_err = err;
 518                 sk->sk_error_report(sk);
 519         } else  { /* Only an error on timeout */
 520                 sk->sk_err_soft = err;
 521         }
 522
 523 out:
 524         bh_unlock_sock(sk);
 525         sock_put(sk);
 526 }
 527
 528 static void __tcp_v4_send_check(struct sk_buff *skb,
 529                                 __be32 saddr, __be32 daddr)
 530 {
 531         struct tcphdr *th = tcp_hdr(skb);
 532
 533         if (skb->ip_summed == CHECKSUM_PARTIAL) {
 534                 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 535                 skb->csum_start = skb_transport_header(skb) - skb->head;
 536                 skb->csum_offset = offsetof(struct tcphdr, check);
 537         } else {
 538                 th->check = tcp_v4_check(skb->len, saddr, daddr,
 539                                          csum_partial(th,
 540                                                       th->doff << 2,
 541                                                       skb->csum));
 542         }
 543 }
 544
 545 /* This routine computes an IPv4 TCP checksum. */
 546 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 547 {
 548         struct inet_sock *inet = inet_sk(sk);
 549
 550         __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 551 }
 552 EXPORT_SYMBOL(tcp_v4_send_check);
 553
 554 int tcp_v4_gso_send_check(struct sk_buff *skb)
 555 {
 556         const struct iphdr *iph;
 557         struct tcphdr *th;
 558
 559         if (!pskb_may_pull(skb, sizeof(*th)))
 560                 return -EINVAL;
 561
 562         iph = ip_hdr(skb);
 563         th = tcp_hdr(skb);
 564
 565         th->check = 0;
 566         skb->ip_summed = CHECKSUM_PARTIAL;
 567         __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 568         return 0;
 569 }
 570
 571 /*
 572  *      This routine will send an RST to the other tcp.
 573  *
 574  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 575  *                    for reset.
 576  *      Answer: if a packet caused RST, it is not for a socket
 577  *              existing in our system, if it is matched to a socket,
 578  *              it is just duplicate segment or bug in other side's TCP.
 579  *              So that we build reply only basing on parameters
 580  *              arrived with segment.
 581  *      Exception: precedence violation. We do not implement it in any case.
 582  */
 583
 584 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 585 {
 586         struct tcphdr *th = tcp_hdr(skb);
 587         struct {
 588                 struct tcphdr th;
 589 #ifdef CONFIG_TCP_MD5SIG
 590                 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 591 #endif
 592         } rep;
 593         struct ip_reply_arg arg;
 594 #ifdef CONFIG_TCP_MD5SIG
 595         struct tcp_md5sig_key *key;
 596 #endif
 597         struct net *net;
 598
 599         /* Never send a reset in response to a reset. */
 600         if (th->rst)
 601                 return;
 602
 603         if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 604                 return;
 605
 606         /* Swap the send and the receive. */
 607         memset(&rep, 0, sizeof(rep));
 608         rep.th.dest   = th->source;
 609         rep.th.source = th->dest;
 610         rep.th.doff   = sizeof(struct tcphdr) / 4;
 611         rep.th.rst    = 1;
 612
 613         if (th->ack) {
 614                 rep.th.seq = th->ack_seq;
 615         } else {
 616                 rep.th.ack = 1;
 617                 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 618                                        skb->len - (th->doff << 2));
 619         }
 620
 621         memset(&arg, 0, sizeof(arg));
 622         arg.iov[0].iov_base = (unsigned char *)&rep;
 623         arg.iov[0].iov_len  = sizeof(rep.th);
 624
 625 #ifdef CONFIG_TCP_MD5SIG
 626         key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 627         if (key) {
 628                 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 629                                    (TCPOPT_NOP << 16) |
 630                                    (TCPOPT_MD5SIG << 8) |
 631                                    TCPOLEN_MD5SIG);
 632                 /* Update length and the length the header thinks exists */
 633                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 634                 rep.th.doff = arg.iov[0].iov_len / 4;
 635
 636                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 637                                      key, ip_hdr(skb)->saddr,
 638                                      ip_hdr(skb)->daddr, &rep.th);
 639         }
 640 #endif
 641         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 642                                       ip_hdr(skb)->saddr,
 643                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 644         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 645         arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 646
 647         net = dev_net(skb_dst(skb)->dev);
 648         ip_send_reply(net->ipv4.tcp_sock, skb,
 649                       &arg, arg.iov[0].iov_len);
 650
 651         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 652         TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 653 }
 654
 655 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 656    outside socket context is ugly, certainly. What can I do?
 657  */
 658
 659 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 660                             u32 win, u32 ts, int oif,
 661                             struct tcp_md5sig_key *key,
 662                             int reply_flags)
 663 {
 664         struct tcphdr *th = tcp_hdr(skb);
 665         struct {
 666                 struct tcphdr th;
 667                 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 668 #ifdef CONFIG_TCP_MD5SIG
 669                            + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 670 #endif
 671                         ];
 672         } rep;
 673         struct ip_reply_arg arg;
 674         struct net *net = dev_net(skb_dst(skb)->dev);
 675
 676         memset(&rep.th, 0, sizeof(struct tcphdr));
 677         memset(&arg, 0, sizeof(arg));
 678
 679         arg.iov[0].iov_base = (unsigned char *)&rep;
 680         arg.iov[0].iov_len  = sizeof(rep.th);
 681         if (ts) {
 682                 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 683                                    (TCPOPT_TIMESTAMP << 8) |
 684                                    TCPOLEN_TIMESTAMP);
 685                 rep.opt[1] = htonl(tcp_time_stamp);
 686                 rep.opt[2] = htonl(ts);
 687                 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 688         }
 689
 690         /* Swap the send and the receive. */
 691         rep.th.dest    = th->source;
 692         rep.th.source  = th->dest;
 693         rep.th.doff    = arg.iov[0].iov_len / 4;
 694         rep.th.seq     = htonl(seq);
 695         rep.th.ack_seq = htonl(ack);
 696         rep.th.ack     = 1;
 697         rep.th.window  = htons(win);
 698
 699 #ifdef CONFIG_TCP_MD5SIG
 700         if (key) {
 701                 int offset = (ts) ? 3 : 0;
 702
 703                 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 704                                           (TCPOPT_NOP << 16) |
 705                                           (TCPOPT_MD5SIG << 8) |
 706                                           TCPOLEN_MD5SIG);
 707                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 708                 rep.th.doff = arg.iov[0].iov_len/4;
 709
 710                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 711                                     key, ip_hdr(skb)->saddr,
 712                                     ip_hdr(skb)->daddr, &rep.th);
 713         }
 714 #endif
 715         arg.flags = reply_flags;
 716         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 717                                       ip_hdr(skb)->saddr,
 718                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 719         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 720         if (oif)
 721                 arg.bound_dev_if = oif;
 722
 723         ip_send_reply(net->ipv4.tcp_sock, skb,
 724                       &arg, arg.iov[0].iov_len);
 725
 726         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 727 }
 728
 729 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 730 {
 731         struct inet_timewait_sock *tw = inet_twsk(sk);
 732         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 733
 734         tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 735                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 736                         tcptw->tw_ts_recent,
 737                         tw->tw_bound_dev_if,
 738                         tcp_twsk_md5_key(tcptw),
 739                         tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 740                         );
 741
 742         inet_twsk_put(tw);
 743 }
 744
 745 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 746                                   struct request_sock *req)
 747 {
 748         tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 749                         tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 750                         req->ts_recent,
 751                         0,
 752                         tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
 753                         inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 754 }
 755
 756 /*
 757  *      Send a SYN-ACK after having received a SYN.
 758  *      This still operates on a request_sock only, not on a big
 759  *      socket.
 760  */
 761 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 762                               struct request_sock *req,
 763                               struct request_values *rvp)
 764 {
 765         const struct inet_request_sock *ireq = inet_rsk(req);
 766         int err = -1;
 767         struct sk_buff * skb;
 768
 769         /* First, grab a route. */
 770         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 771                 return -1;
 772
 773         skb = tcp_make_synack(sk, dst, req, rvp);
 774
 775         if (skb) {
 776                 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 777
 778                 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 779                                             ireq->rmt_addr,
 780                                             ireq->opt);
 781                 err = net_xmit_eval(err);
 782         }
 783
 784         dst_release(dst);
 785         return err;
 786 }
 787
 788 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 789                               struct request_values *rvp)
 790 {
 791         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 792         return tcp_v4_send_synack(sk, NULL, req, rvp);
 793 }
 794
 795 /*
 796  *      IPv4 request_sock destructor.
 797  */
 798 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 799 {
 800         kfree(inet_rsk(req)->opt);
 801 }
 802
 803 static void syn_flood_warning(const struct sk_buff *skb)
 804 {
 805         const char *msg;
 806
 807 #ifdef CONFIG_SYN_COOKIES
 808         if (sysctl_tcp_syncookies)
 809                 msg = "Sending cookies";
 810         else
 811 #endif
 812                 msg = "Dropping request";
 813
 814         pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
 815                                 ntohs(tcp_hdr(skb)->dest), msg);
 816 }
 817
 818 /*
 819  * Save and compile IPv4 options into the request_sock if needed.
 820  */
 821 static struct ip_options *tcp_v4_save_options(struct sock *sk,
 822                                               struct sk_buff *skb)
 823 {
 824         struct ip_options *opt = &(IPCB(skb)->opt);
 825         struct ip_options *dopt = NULL;
 826
 827         if (opt && opt->optlen) {
 828                 int opt_size = optlength(opt);
 829                 dopt = kmalloc(opt_size, GFP_ATOMIC);
 830                 if (dopt) {
 831                         if (ip_options_echo(dopt, skb)) {
 832                                 kfree(dopt);
 833                                 dopt = NULL;
 834                         }
 835                 }
 836         }
 837         return dopt;
 838 }
 839
 840 #ifdef CONFIG_TCP_MD5SIG
 841 /*
 842  * RFC2385 MD5 checksumming requires a mapping of
 843  * IP address->MD5 Key.
 844  * We need to maintain these in the sk structure.
 845  */
 846
 847 /* Find the Key structure for an address.  */
 848 static struct tcp_md5sig_key *
 849                         tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 850 {
 851         struct tcp_sock *tp = tcp_sk(sk);
 852         int i;
 853
 854         if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 855                 return NULL;
 856         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 857                 if (tp->md5sig_info->keys4[i].addr == addr)
 858                         return &tp->md5sig_info->keys4[i].base;
 859         }
 860         return NULL;
 861 }
 862
 863 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 864                                          struct sock *addr_sk)
 865 {
 866         return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 867 }
 868 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 869
 870 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 871                                                       struct request_sock *req)
 872 {
 873         return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 874 }
 875
 876 /* This can be called on a newly created socket, from other files */
 877 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 878                       u8 *newkey, u8 newkeylen)
 879 {
 880         /* Add Key to the list */
 881         struct tcp_md5sig_key *key;
 882         struct tcp_sock *tp = tcp_sk(sk);
 883         struct tcp4_md5sig_key *keys;
 884
 885         key = tcp_v4_md5_do_lookup(sk, addr);
 886         if (key) {
 887                 /* Pre-existing entry - just update that one. */
 888                 kfree(key->key);
 889                 key->key = newkey;
 890                 key->keylen = newkeylen;
 891         } else {
 892                 struct tcp_md5sig_info *md5sig;
 893
 894                 if (!tp->md5sig_info) {
 895                         tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
 896                                                   GFP_ATOMIC);
 897                         if (!tp->md5sig_info) {
 898                                 kfree(newkey);
 899                                 return -ENOMEM;
 900                         }
 901                         sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 902                 }
 903                 if (tcp_alloc_md5sig_pool(sk) == NULL) {
 904                         kfree(newkey);
 905                         return -ENOMEM;
 906                 }
 907                 md5sig = tp->md5sig_info;
 908
 909                 if (md5sig->alloced4 == md5sig->entries4) {
 910                         keys = kmalloc((sizeof(*keys) *
 911                                         (md5sig->entries4 + 1)), GFP_ATOMIC);
 912                         if (!keys) {
 913                                 kfree(newkey);
 914                                 tcp_free_md5sig_pool();
 915                                 return -ENOMEM;
 916                         }
 917
 918                         if (md5sig->entries4)
 919                                 memcpy(keys, md5sig->keys4,
 920                                        sizeof(*keys) * md5sig->entries4);
 921
 922                         /* Free old key list, and reference new one */
 923                         kfree(md5sig->keys4);
 924                         md5sig->keys4 = keys;
 925                         md5sig->alloced4++;
 926                 }
 927                 md5sig->entries4++;
 928                 md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
 929                 md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
 930                 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
 931         }
 932         return 0;
 933 }
 934 EXPORT_SYMBOL(tcp_v4_md5_do_add);
 935
 936 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 937                                u8 *newkey, u8 newkeylen)
 938 {
 939         return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
 940                                  newkey, newkeylen);
 941 }
 942
 943 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 944 {
 945         struct tcp_sock *tp = tcp_sk(sk);
 946         int i;
 947
 948         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 949                 if (tp->md5sig_info->keys4[i].addr == addr) {
 950                         /* Free the key */
 951                         kfree(tp->md5sig_info->keys4[i].base.key);
 952                         tp->md5sig_info->entries4--;
 953
 954                         if (tp->md5sig_info->entries4 == 0) {
 955                                 kfree(tp->md5sig_info->keys4);
 956                                 tp->md5sig_info->keys4 = NULL;
 957                                 tp->md5sig_info->alloced4 = 0;
 958                         } else if (tp->md5sig_info->entries4 != i) {
 959                                 /* Need to do some manipulation */
 960                                 memmove(&tp->md5sig_info->keys4[i],
 961                                         &tp->md5sig_info->keys4[i+1],
 962                                         (tp->md5sig_info->entries4 - i) *
 963                                          sizeof(struct tcp4_md5sig_key));
 964                         }
 965                         tcp_free_md5sig_pool();
 966                         return 0;
 967                 }
 968         }
 969         return -ENOENT;
 970 }
 971 EXPORT_SYMBOL(tcp_v4_md5_do_del);
 972
 973 static void tcp_v4_clear_md5_list(struct sock *sk)
 974 {
 975         struct tcp_sock *tp = tcp_sk(sk);
 976
 977         /* Free each key, then the set of key keys,
 978          * the crypto element, and then decrement our
 979          * hold on the last resort crypto.
 980          */
 981         if (tp->md5sig_info->entries4) {
 982                 int i;
 983                 for (i = 0; i < tp->md5sig_info->entries4; i++)
 984                         kfree(tp->md5sig_info->keys4[i].base.key);
 985                 tp->md5sig_info->entries4 = 0;
 986                 tcp_free_md5sig_pool();
 987         }
 988         if (tp->md5sig_info->keys4) {
 989                 kfree(tp->md5sig_info->keys4);
 990                 tp->md5sig_info->keys4 = NULL;
 991                 tp->md5sig_info->alloced4  = 0;
 992         }
 993 }
 994
 995 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 996                                  int optlen)
 997 {
 998         struct tcp_md5sig cmd;
 999         struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1000         u8 *newkey;
1001
1002         if (optlen < sizeof(cmd))
1003                 return -EINVAL;
1004
1005         if (copy_from_user(&cmd, optval, sizeof(cmd)))
1006                 return -EFAULT;
1007
1008         if (sin->sin_family != AF_INET)
1009                 return -EINVAL;
1010
1011         if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1012                 if (!tcp_sk(sk)->md5sig_info)
1013                         return -ENOENT;
1014                 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1015         }
1016
1017         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1018                 return -EINVAL;
1019
1020         if (!tcp_sk(sk)->md5sig_info) {
1021                 struct tcp_sock *tp = tcp_sk(sk);
1022                 struct tcp_md5sig_info *p;
1023
1024                 p = kzalloc(sizeof(*p), sk->sk_allocation);
1025                 if (!p)
1026                         return -EINVAL;
1027
1028                 tp->md5sig_info = p;
1029                 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1030         }
1031
1032         newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1033         if (!newkey)
1034                 return -ENOMEM;
1035         return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1036                                  newkey, cmd.tcpm_keylen);
1037 }
1038
1039 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1040                                         __be32 daddr, __be32 saddr, int nbytes)
1041 {
1042         struct tcp4_pseudohdr *bp;
1043         struct scatterlist sg;
1044
1045         bp = &hp->md5_blk.ip4;
1046
1047         /*
1048          * 1. the TCP pseudo-header (in the order: source IP address,
1049          * destination IP address, zero-padded protocol number, and
1050          * segment length)
1051          */
1052         bp->saddr = saddr;
1053         bp->daddr = daddr;
1054         bp->pad = 0;
1055         bp->protocol = IPPROTO_TCP;
1056         bp->len = cpu_to_be16(nbytes);
1057
1058         sg_init_one(&sg, bp, sizeof(*bp));
1059         return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1060 }
1061
1062 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1063                                __be32 daddr, __be32 saddr, struct tcphdr *th)
1064 {
1065         struct tcp_md5sig_pool *hp;
1066         struct hash_desc *desc;
1067
1068         hp = tcp_get_md5sig_pool();
1069         if (!hp)
1070                 goto clear_hash_noput;
1071         desc = &hp->md5_desc;
1072
1073         if (crypto_hash_init(desc))
1074                 goto clear_hash;
1075         if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1076                 goto clear_hash;
1077         if (tcp_md5_hash_header(hp, th))
1078                 goto clear_hash;
1079         if (tcp_md5_hash_key(hp, key))
1080                 goto clear_hash;
1081         if (crypto_hash_final(desc, md5_hash))
1082                 goto clear_hash;
1083
1084         tcp_put_md5sig_pool();
1085         return 0;
1086
1087 clear_hash:
1088         tcp_put_md5sig_pool();
1089 clear_hash_noput:
1090         memset(md5_hash, 0, 16);
1091         return 1;
1092 }
1093
1094 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1095                         struct sock *sk, struct request_sock *req,
1096                         struct sk_buff *skb)
1097 {
1098         struct tcp_md5sig_pool *hp;
1099         struct hash_desc *desc;
1100         struct tcphdr *th = tcp_hdr(skb);
1101         __be32 saddr, daddr;
1102
1103         if (sk) {
1104                 saddr = inet_sk(sk)->inet_saddr;
1105                 daddr = inet_sk(sk)->inet_daddr;
1106         } else if (req) {
1107                 saddr = inet_rsk(req)->loc_addr;
1108                 daddr = inet_rsk(req)->rmt_addr;
1109         } else {
1110                 const struct iphdr *iph = ip_hdr(skb);
1111                 saddr = iph->saddr;
1112                 daddr = iph->daddr;
1113         }
1114
1115         hp = tcp_get_md5sig_pool();
1116         if (!hp)
1117                 goto clear_hash_noput;
1118         desc = &hp->md5_desc;
1119
1120         if (crypto_hash_init(desc))
1121                 goto clear_hash;
1122
1123         if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1124                 goto clear_hash;
1125         if (tcp_md5_hash_header(hp, th))
1126                 goto clear_hash;
1127         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1128                 goto clear_hash;
1129         if (tcp_md5_hash_key(hp, key))
1130                 goto clear_hash;
1131         if (crypto_hash_final(desc, md5_hash))
1132                 goto clear_hash;
1133
1134         tcp_put_md5sig_pool();
1135         return 0;
1136
1137 clear_hash:
1138         tcp_put_md5sig_pool();
1139 clear_hash_noput:
1140         memset(md5_hash, 0, 16);
1141         return 1;
1142 }
1143 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1144
1145 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1146 {
1147         /*
1148          * This gets called for each TCP segment that arrives
1149          * so we want to be efficient.
1150          * We have 3 drop cases:
1151          * o No MD5 hash and one expected.
1152          * o MD5 hash and we're not expecting one.
1153          * o MD5 hash and its wrong.
1154          */
1155         __u8 *hash_location = NULL;
1156         struct tcp_md5sig_key *hash_expected;
1157         const struct iphdr *iph = ip_hdr(skb);
1158         struct tcphdr *th = tcp_hdr(skb);
1159         int genhash;
1160         unsigned char newhash[16];
1161
1162         hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1163         hash_location = tcp_parse_md5sig_option(th);
1164
1165         /* We've parsed the options - do we have a hash? */
1166         if (!hash_expected && !hash_location)
1167                 return 0;
1168
1169         if (hash_expected && !hash_location) {
1170                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1171                 return 1;
1172         }
1173
1174         if (!hash_expected && hash_location) {
1175                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1176                 return 1;
1177         }
1178
1179         /* Okay, so this is hash_expected and hash_location -
1180          * so we need to calculate the checksum.
1181          */
1182         genhash = tcp_v4_md5_hash_skb(newhash,
1183                                       hash_expected,
1184                                       NULL, NULL, skb);
1185
1186         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1187                 if (net_ratelimit()) {
1188                         printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1189                                &iph->saddr, ntohs(th->source),
1190                                &iph->daddr, ntohs(th->dest),
1191                                genhash ? " tcp_v4_calc_md5_hash failed" : "");
1192                 }
1193                 return 1;
1194         }
1195         return 0;
1196 }
1197
1198 #endif
1199
1200 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1201         .family         =       PF_INET,
1202         .obj_size       =       sizeof(struct tcp_request_sock),
1203         .rtx_syn_ack    =       tcp_v4_rtx_synack,
1204         .send_ack       =       tcp_v4_reqsk_send_ack,
1205         .destructor     =       tcp_v4_reqsk_destructor,
1206         .send_reset     =       tcp_v4_send_reset,
1207         .syn_ack_timeout =      tcp_syn_ack_timeout,
1208 };
1209
1210 #ifdef CONFIG_TCP_MD5SIG
1211 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1212         .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1213         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1214 };
1215 #endif
1216
1217 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1218         .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1219         .twsk_unique    = tcp_twsk_unique,
1220         .twsk_destructor= tcp_twsk_destructor,
1221 };
1222
1223 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1224 {
1225         struct tcp_extend_values tmp_ext;
1226         struct tcp_options_received tmp_opt;
1227         u8 *hash_location;
1228         struct request_sock *req;
1229         struct inet_request_sock *ireq;
1230         struct tcp_sock *tp = tcp_sk(sk);
1231         struct dst_entry *dst = NULL;
1232         __be32 saddr = ip_hdr(skb)->saddr;
1233         __be32 daddr = ip_hdr(skb)->daddr;
1234         __u32 isn = TCP_SKB_CB(skb)->when;
1235 #ifdef CONFIG_SYN_COOKIES
1236         int want_cookie = 0;
1237 #else
1238 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1239 #endif
1240
1241         /* Never answer to SYNs send to broadcast or multicast */
1242         if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1243                 goto drop;
1244
1245         /* TW buckets are converted to open requests without
1246          * limitations, they conserve resources and peer is
1247          * evidently real one.
1248          */
1249         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1250                 if (net_ratelimit())
1251                         syn_flood_warning(skb);
1252 #ifdef CONFIG_SYN_COOKIES
1253                 if (sysctl_tcp_syncookies) {
1254                         want_cookie = 1;
1255                 } else
1256 #endif
1257                 goto drop;
1258         }
1259
1260         /* Accept backlog is full. If we have already queued enough
1261          * of warm entries in syn queue, drop request. It is better than
1262          * clogging syn queue with openreqs with exponentially increasing
1263          * timeout.
1264          */
1265         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1266                 goto drop;
1267
1268         req = inet_reqsk_alloc(&tcp_request_sock_ops);
1269         if (!req)
1270                 goto drop;
1271
1272 #ifdef CONFIG_TCP_MD5SIG
1273         tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1274 #endif
1275
1276         tcp_clear_options(&tmp_opt);
1277         tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1278         tmp_opt.user_mss  = tp->rx_opt.user_mss;
1279         tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1280
1281         if (tmp_opt.cookie_plus > 0 &&
1282             tmp_opt.saw_tstamp &&
1283             !tp->rx_opt.cookie_out_never &&
1284             (sysctl_tcp_cookie_size > 0 ||
1285              (tp->cookie_values != NULL &&
1286               tp->cookie_values->cookie_desired > 0))) {
1287                 u8 *c;
1288                 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1289                 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1290
1291                 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1292                         goto drop_and_release;
1293
1294                 /* Secret recipe starts with IP addresses */
1295                 *mess++ ^= (__force u32)daddr;
1296                 *mess++ ^= (__force u32)saddr;
1297
1298                 /* plus variable length Initiator Cookie */
1299                 c = (u8 *)mess;
1300                 while (l-- > 0)
1301                         *c++ ^= *hash_location++;
1302
1303 #ifdef CONFIG_SYN_COOKIES
1304                 want_cookie = 0;        /* not our kind of cookie */
1305 #endif
1306                 tmp_ext.cookie_out_never = 0; /* false */
1307                 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1308         } else if (!tp->rx_opt.cookie_in_always) {
1309                 /* redundant indications, but ensure initialization. */
1310                 tmp_ext.cookie_out_never = 1; /* true */
1311                 tmp_ext.cookie_plus = 0;
1312         } else {
1313                 goto drop_and_release;
1314         }
1315         tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1316
1317         if (want_cookie && !tmp_opt.saw_tstamp)
1318                 tcp_clear_options(&tmp_opt);
1319
1320         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1321         tcp_openreq_init(req, &tmp_opt, skb);
1322
1323         ireq = inet_rsk(req);
1324         ireq->loc_addr = daddr;
1325         ireq->rmt_addr = saddr;
1326         ireq->no_srccheck = inet_sk(sk)->transparent;
1327         ireq->opt = tcp_v4_save_options(sk, skb);
1328
1329         if (security_inet_conn_request(sk, skb, req))
1330                 goto drop_and_free;
1331
1332         if (!want_cookie || tmp_opt.tstamp_ok)
1333                 TCP_ECN_create_request(req, tcp_hdr(skb));
1334
1335         if (want_cookie) {
1336                 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1337                 req->cookie_ts = tmp_opt.tstamp_ok;
1338         } else if (!isn) {
1339                 struct inet_peer *peer = NULL;
1340
1341                 /* VJ's idea. We save last timestamp seen
1342                  * from the destination in peer table, when entering
1343                  * state TIME-WAIT, and check against it before
1344                  * accepting new connection request.
1345                  *
1346                  * If "isn" is not zero, this request hit alive
1347                  * timewait bucket, so that all the necessary checks
1348                  * are made in the function processing timewait state.
1349                  */
1350                 if (tmp_opt.saw_tstamp &&
1351                     tcp_death_row.sysctl_tw_recycle &&
1352                     (dst = inet_csk_route_req(sk, req)) != NULL &&
1353                     (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1354                     peer->v4daddr == saddr) {
1355                         inet_peer_refcheck(peer);
1356                         if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1357                             (s32)(peer->tcp_ts - req->ts_recent) >
1358                                                         TCP_PAWS_WINDOW) {
1359                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1360                                 goto drop_and_release;
1361                         }
1362                 }
1363                 /* Kill the following clause, if you dislike this way. */
1364                 else if (!sysctl_tcp_syncookies &&
1365                          (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1366                           (sysctl_max_syn_backlog >> 2)) &&
1367                          (!peer || !peer->tcp_ts_stamp) &&
1368                          (!dst || !dst_metric(dst, RTAX_RTT))) {
1369                         /* Without syncookies last quarter of
1370                          * backlog is filled with destinations,
1371                          * proven to be alive.
1372                          * It means that we continue to communicate
1373                          * to destinations, already remembered
1374                          * to the moment of synflood.
1375                          */
1376                         LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1377                                        &saddr, ntohs(tcp_hdr(skb)->source));
1378                         goto drop_and_release;
1379                 }
1380
1381                 isn = tcp_v4_init_sequence(skb);
1382         }
1383         tcp_rsk(req)->snt_isn = isn;
1384
1385         if (tcp_v4_send_synack(sk, dst, req,
1386                                (struct request_values *)&tmp_ext) ||
1387             want_cookie)
1388                 goto drop_and_free;
1389
1390         inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1391         return 0;
1392
1393 drop_and_release:
1394         dst_release(dst);
1395 drop_and_free:
1396         reqsk_free(req);
1397 drop:
1398         return 0;
1399 }
1400 EXPORT_SYMBOL(tcp_v4_conn_request);
1401
1402
1403 /*
1404  * The three way handshake has completed - we got a valid synack -
1405  * now create the new socket.
1406  */
1407 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1408                                   struct request_sock *req,
1409                                   struct dst_entry *dst)
1410 {
1411         struct inet_request_sock *ireq;
1412         struct inet_sock *newinet;
1413         struct tcp_sock *newtp;
1414         struct sock *newsk;
1415 #ifdef CONFIG_TCP_MD5SIG
1416         struct tcp_md5sig_key *key;
1417 #endif
1418
1419         if (sk_acceptq_is_full(sk))
1420                 goto exit_overflow;
1421
1422         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1423                 goto exit;
1424
1425         newsk = tcp_create_openreq_child(sk, req, skb);
1426         if (!newsk)
1427                 goto exit;
1428
1429         newsk->sk_gso_type = SKB_GSO_TCPV4;
1430         sk_setup_caps(newsk, dst);
1431
1432         newtp                 = tcp_sk(newsk);
1433         newinet               = inet_sk(newsk);
1434         ireq                  = inet_rsk(req);
1435         newinet->inet_daddr   = ireq->rmt_addr;
1436         newinet->inet_rcv_saddr = ireq->loc_addr;
1437         newinet->inet_saddr           = ireq->loc_addr;
1438         newinet->opt          = ireq->opt;
1439         ireq->opt             = NULL;
1440         newinet->mc_index     = inet_iif(skb);
1441         newinet->mc_ttl       = ip_hdr(skb)->ttl;
1442         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1443         if (newinet->opt)
1444                 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1445         newinet->inet_id = newtp->write_seq ^ jiffies;
1446
1447         tcp_mtup_init(newsk);
1448         tcp_sync_mss(newsk, dst_mtu(dst));
1449         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1450         if (tcp_sk(sk)->rx_opt.user_mss &&
1451             tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1452                 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1453
1454         tcp_initialize_rcv_mss(newsk);
1455
1456 #ifdef CONFIG_TCP_MD5SIG
1457         /* Copy over the MD5 key from the original socket */
1458         key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1459         if (key != NULL) {
1460                 /*
1461                  * We're using one, so create a matching key
1462                  * on the newsk structure. If we fail to get
1463                  * memory, then we end up not copying the key
1464                  * across. Shucks.
1465                  */
1466                 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1467                 if (newkey != NULL)
1468                         tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1469                                           newkey, key->keylen);
1470                 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1471         }
1472 #endif
1473
1474         __inet_hash_nolisten(newsk, NULL);
1475         __inet_inherit_port(sk, newsk);
1476
1477         return newsk;
1478
1479 exit_overflow:
1480         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1481 exit:
1482         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1483         dst_release(dst);
1484         return NULL;
1485 }
1486 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1487
1488 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1489 {
1490         struct tcphdr *th = tcp_hdr(skb);
1491         const struct iphdr *iph = ip_hdr(skb);
1492         struct sock *nsk;
1493         struct request_sock **prev;
1494         /* Find possible connection requests. */
1495         struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1496                                                        iph->saddr, iph->daddr);
1497         if (req)
1498                 return tcp_check_req(sk, skb, req, prev);
1499
1500         nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1501                         th->source, iph->daddr, th->dest, inet_iif(skb));
1502
1503         if (nsk) {
1504                 if (nsk->sk_state != TCP_TIME_WAIT) {
1505                         bh_lock_sock(nsk);
1506                         return nsk;
1507                 }
1508                 inet_twsk_put(inet_twsk(nsk));
1509                 return NULL;
1510         }
1511
1512 #ifdef CONFIG_SYN_COOKIES
1513         if (!th->syn)
1514                 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1515 #endif
1516         return sk;
1517 }
1518
1519 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1520 {
1521         const struct iphdr *iph = ip_hdr(skb);
1522
1523         if (skb->ip_summed == CHECKSUM_COMPLETE) {
1524                 if (!tcp_v4_check(skb->len, iph->saddr,
1525                                   iph->daddr, skb->csum)) {
1526                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1527                         return 0;
1528                 }
1529         }
1530
1531         skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1532                                        skb->len, IPPROTO_TCP, 0);
1533
1534         if (skb->len <= 76) {
1535                 return __skb_checksum_complete(skb);
1536         }
1537         return 0;
1538 }
1539
1540
1541 /* The socket must have it's spinlock held when we get
1542  * here.
1543  *
1544  * We have a potential double-lock case here, so even when
1545  * doing backlog processing we use the BH locking scheme.
1546  * This is because we cannot sleep with the original spinlock
1547  * held.
1548  */
1549 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1550 {
1551         struct sock *rsk;
1552 #ifdef CONFIG_TCP_MD5SIG
1553         /*
1554          * We really want to reject the packet as early as possible
1555          * if:
1556          *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1557          *  o There is an MD5 option and we're not expecting one
1558          */
1559         if (tcp_v4_inbound_md5_hash(sk, skb))
1560                 goto discard;
1561 #endif
1562
1563         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1564                 sock_rps_save_rxhash(sk, skb->rxhash);
1565                 TCP_CHECK_TIMER(sk);
1566                 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1567                         rsk = sk;
1568                         goto reset;
1569                 }
1570                 TCP_CHECK_TIMER(sk);
1571                 return 0;
1572         }
1573
1574         if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1575                 goto csum_err;
1576
1577         if (sk->sk_state == TCP_LISTEN) {
1578                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1579                 if (!nsk)
1580                         goto discard;
1581
1582                 if (nsk != sk) {
1583                         if (tcp_child_process(sk, nsk, skb)) {
1584                                 rsk = nsk;
1585                                 goto reset;
1586                         }
1587                         return 0;
1588                 }
1589         } else
1590                 sock_rps_save_rxhash(sk, skb->rxhash);
1591
1592
1593         TCP_CHECK_TIMER(sk);
1594         if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1595                 rsk = sk;
1596                 goto reset;
1597         }
1598         TCP_CHECK_TIMER(sk);
1599         return 0;
1600
1601 reset:
1602         tcp_v4_send_reset(rsk, skb);
1603 discard:
1604         kfree_skb(skb);
1605         /* Be careful here. If this function gets more complicated and
1606          * gcc suffers from register pressure on the x86, sk (in %ebx)
1607          * might be destroyed here. This current version compiles correctly,
1608          * but you have been warned.
1609          */
1610         return 0;
1611
1612 csum_err:
1613         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1614         goto discard;
1615 }
1616 EXPORT_SYMBOL(tcp_v4_do_rcv);
1617
1618 /*
1619  *      From tcp_input.c
1620  */
1621
1622 int BCMFASTPATH_HOST tcp_v4_rcv(struct sk_buff *skb)
1623 {
1624         const struct iphdr *iph;
1625         struct tcphdr *th;
1626         struct sock *sk;
1627         int ret;
1628         struct net *net = dev_net(skb->dev);
1629
1630         if (skb->pkt_type != PACKET_HOST)
1631                 goto discard_it;
1632
1633         /* Count it even if it's bad */
1634         TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1635
1636         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1637                 goto discard_it;
1638
1639         th = tcp_hdr(skb);
1640
1641         if (th->doff < sizeof(struct tcphdr) / 4)
1642                 goto bad_packet;
1643         if (!pskb_may_pull(skb, th->doff * 4))
1644                 goto discard_it;
1645
1646         /* An explanation is required here, I think.
1647          * Packet length and doff are validated by header prediction,
1648          * provided case of th->doff==0 is eliminated.
1649          * So, we defer the checks. */
1650         if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1651                 goto bad_packet;
1652
1653         th = tcp_hdr(skb);
1654         iph = ip_hdr(skb);
1655         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1656         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1657                                     skb->len - th->doff * 4);
1658         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1659         TCP_SKB_CB(skb)->when    = 0;
1660         TCP_SKB_CB(skb)->flags   = iph->tos;
1661         TCP_SKB_CB(skb)->sacked  = 0;
1662
1663         sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1664         if (!sk)
1665                 goto no_tcp_socket;
1666
1667 process:
1668         if (sk->sk_state == TCP_TIME_WAIT)
1669                 goto do_time_wait;
1670
1671         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1672                 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1673                 goto discard_and_relse;
1674         }
1675
1676         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1677                 goto discard_and_relse;
1678         nf_reset(skb);
1679
1680         if (sk_filter(sk, skb))
1681                 goto discard_and_relse;
1682
1683         skb->dev = NULL;
1684
1685         bh_lock_sock_nested(sk);
1686         ret = 0;
1687         if (!sock_owned_by_user(sk)) {
1688 #ifdef CONFIG_NET_DMA
1689                 struct tcp_sock *tp = tcp_sk(sk);
1690                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1691                         tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1692                 if (tp->ucopy.dma_chan)
1693                         ret = tcp_v4_do_rcv(sk, skb);
1694                 else
1695 #endif
1696                 {
1697                         if (!tcp_prequeue(sk, skb))
1698                                 ret = tcp_v4_do_rcv(sk, skb);
1699                 }
1700         } else if (unlikely(sk_add_backlog(sk, skb))) {
1701                 bh_unlock_sock(sk);
1702                 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1703                 goto discard_and_relse;
1704         }
1705         bh_unlock_sock(sk);
1706
1707         sock_put(sk);
1708
1709         return ret;
1710
1711 no_tcp_socket:
1712         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1713                 goto discard_it;
1714
1715         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1716 bad_packet:
1717                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1718         } else {
1719                 tcp_v4_send_reset(NULL, skb);
1720         }
1721
1722 discard_it:
1723         /* Discard frame. */
1724         kfree_skb(skb);
1725         return 0;
1726
1727 discard_and_relse:
1728         sock_put(sk);
1729         goto discard_it;
1730
1731 do_time_wait:
1732         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1733                 inet_twsk_put(inet_twsk(sk));
1734                 goto discard_it;
1735         }
1736
1737         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1738                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1739                 inet_twsk_put(inet_twsk(sk));
1740                 goto discard_it;
1741         }
1742         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1743         case TCP_TW_SYN: {
1744                 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1745                                                         &tcp_hashinfo,
1746                                                         iph->daddr, th->dest,
1747                                                         inet_iif(skb));
1748                 if (sk2) {
1749                         inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1750                         inet_twsk_put(inet_twsk(sk));
1751                         sk = sk2;
1752                         goto process;
1753                 }
1754                 /* Fall through to ACK */
1755         }
1756         case TCP_TW_ACK:
1757                 tcp_v4_timewait_ack(sk, skb);
1758                 break;
1759         case TCP_TW_RST:
1760                 goto no_tcp_socket;
1761         case TCP_TW_SUCCESS:;
1762         }
1763         goto discard_it;
1764 }
1765
1766 /* VJ's idea. Save last timestamp seen from this destination
1767  * and hold it at least for normal timewait interval to use for duplicate
1768  * segment detection in subsequent connections, before they enter synchronized
1769  * state.
1770  */
1771
1772 int tcp_v4_remember_stamp(struct sock *sk)
1773 {
1774         struct inet_sock *inet = inet_sk(sk);
1775         struct tcp_sock *tp = tcp_sk(sk);
1776         struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1777         struct inet_peer *peer = NULL;
1778         int release_it = 0;
1779
1780         if (!rt || rt->rt_dst != inet->inet_daddr) {
1781                 peer = inet_getpeer(inet->inet_daddr, 1);
1782                 release_it = 1;
1783         } else {
1784                 if (!rt->peer)
1785                         rt_bind_peer(rt, 1);
1786                 peer = rt->peer;
1787         }
1788
1789         if (peer) {
1790                 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1791                     ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1792                      peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1793                         peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1794                         peer->tcp_ts = tp->rx_opt.ts_recent;
1795                 }
1796                 if (release_it)
1797                         inet_putpeer(peer);
1798                 return 1;
1799         }
1800
1801         return 0;
1802 }
1803 EXPORT_SYMBOL(tcp_v4_remember_stamp);
1804
1805 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1806 {
1807         struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1808
1809         if (peer) {
1810                 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1811
1812                 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1813                     ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1814                      peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1815                         peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
1816                         peer->tcp_ts       = tcptw->tw_ts_recent;
1817                 }
1818                 inet_putpeer(peer);
1819                 return 1;
1820         }
1821
1822         return 0;
1823 }
1824
1825 const struct inet_connection_sock_af_ops ipv4_specific = {
1826         .queue_xmit        = ip_queue_xmit,
1827         .send_check        = tcp_v4_send_check,
1828         .rebuild_header    = inet_sk_rebuild_header,
1829         .conn_request      = tcp_v4_conn_request,
1830         .syn_recv_sock     = tcp_v4_syn_recv_sock,
1831         .remember_stamp    = tcp_v4_remember_stamp,
1832         .net_header_len    = sizeof(struct iphdr),
1833         .setsockopt        = ip_setsockopt,
1834         .getsockopt        = ip_getsockopt,
1835         .addr2sockaddr     = inet_csk_addr2sockaddr,
1836         .sockaddr_len      = sizeof(struct sockaddr_in),
1837         .bind_conflict     = inet_csk_bind_conflict,
1838 #ifdef CONFIG_COMPAT
1839         .compat_setsockopt = compat_ip_setsockopt,
1840         .compat_getsockopt = compat_ip_getsockopt,
1841 #endif
1842 };
1843 EXPORT_SYMBOL(ipv4_specific);
1844
1845 #ifdef CONFIG_TCP_MD5SIG
1846 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1847         .md5_lookup             = tcp_v4_md5_lookup,
1848         .calc_md5_hash          = tcp_v4_md5_hash_skb,
1849         .md5_add                = tcp_v4_md5_add_func,
1850         .md5_parse              = tcp_v4_parse_md5_keys,
1851 };
1852 #endif
1853
1854 /* NOTE: A lot of things set to zero explicitly by call to
1855  *       sk_alloc() so need not be done here.
1856  */
1857 static int tcp_v4_init_sock(struct sock *sk)
1858 {
1859         struct inet_connection_sock *icsk = inet_csk(sk);
1860         struct tcp_sock *tp = tcp_sk(sk);
1861
1862         skb_queue_head_init(&tp->out_of_order_queue);
1863         tcp_init_xmit_timers(sk);
1864         tcp_prequeue_init(tp);
1865
1866         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1867         tp->mdev = TCP_TIMEOUT_INIT;
1868
1869         /* So many TCP implementations out there (incorrectly) count the
1870          * initial SYN frame in their delayed-ACK and congestion control
1871          * algorithms that we must have the following bandaid to talk
1872          * efficiently to them.  -DaveM
1873          */
1874         tp->snd_cwnd = 2;
1875
1876         /* See draft-stevens-tcpca-spec-01 for discussion of the
1877          * initialization of these values.
1878          */
1879         tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1880         tp->snd_cwnd_clamp = ~0;
1881         tp->mss_cache = TCP_MSS_DEFAULT;
1882
1883         tp->reordering = sysctl_tcp_reordering;
1884         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1885
1886         sk->sk_state = TCP_CLOSE;
1887
1888         sk->sk_write_space = sk_stream_write_space;
1889         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1890
1891         icsk->icsk_af_ops = &ipv4_specific;
1892         icsk->icsk_sync_mss = tcp_sync_mss;
1893 #ifdef CONFIG_TCP_MD5SIG
1894         tp->af_specific = &tcp_sock_ipv4_specific;
1895 #endif
1896
1897         /* TCP Cookie Transactions */
1898         if (sysctl_tcp_cookie_size > 0) {
1899                 /* Default, cookies without s_data_payload. */
1900                 tp->cookie_values =
1901                         kzalloc(sizeof(*tp->cookie_values),
1902                                 sk->sk_allocation);
1903                 if (tp->cookie_values != NULL)
1904                         kref_init(&tp->cookie_values->kref);
1905         }
1906         /* Presumed zeroed, in order of appearance:
1907          *      cookie_in_always, cookie_out_never,
1908          *      s_data_constant, s_data_in, s_data_out
1909          */
1910         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1911         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1912
1913         local_bh_disable();
1914         percpu_counter_inc(&tcp_sockets_allocated);
1915         local_bh_enable();
1916
1917         return 0;
1918 }
1919
1920 void tcp_v4_destroy_sock(struct sock *sk)
1921 {
1922         struct tcp_sock *tp = tcp_sk(sk);
1923
1924         tcp_clear_xmit_timers(sk);
1925
1926         tcp_cleanup_congestion_control(sk);
1927
1928         /* Cleanup up the write buffer. */
1929         tcp_write_queue_purge(sk);
1930
1931         /* Cleans up our, hopefully empty, out_of_order_queue. */
1932         __skb_queue_purge(&tp->out_of_order_queue);
1933
1934 #ifdef CONFIG_TCP_MD5SIG
1935         /* Clean up the MD5 key list, if any */
1936         if (tp->md5sig_info) {
1937                 tcp_v4_clear_md5_list(sk);
1938                 kfree(tp->md5sig_info);
1939                 tp->md5sig_info = NULL;
1940         }
1941 #endif
1942
1943 #ifdef CONFIG_NET_DMA
1944         /* Cleans up our sk_async_wait_queue */
1945         __skb_queue_purge(&sk->sk_async_wait_queue);
1946 #endif
1947
1948         /* Clean prequeue, it must be empty really */
1949         __skb_queue_purge(&tp->ucopy.prequeue);
1950
1951         /* Clean up a referenced TCP bind bucket. */
1952         if (inet_csk(sk)->icsk_bind_hash)
1953                 inet_put_port(sk);
1954
1955         /*
1956          * If sendmsg cached page exists, toss it.
1957          */
1958         if (sk->sk_sndmsg_page) {
1959                 __free_page(sk->sk_sndmsg_page);
1960                 sk->sk_sndmsg_page = NULL;
1961         }
1962
1963         /* TCP Cookie Transactions */
1964         if (tp->cookie_values != NULL) {
1965                 kref_put(&tp->cookie_values->kref,
1966                          tcp_cookie_values_release);
1967                 tp->cookie_values = NULL;
1968         }
1969
1970         percpu_counter_dec(&tcp_sockets_allocated);
1971 }
1972 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1973
1974 #ifdef CONFIG_PROC_FS
1975 /* Proc filesystem TCP sock list dumping. */
1976
1977 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1978 {
1979         return hlist_nulls_empty(head) ? NULL :
1980                 list_entry(head->first, struct inet_timewait_sock, tw_node);
1981 }
1982
1983 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1984 {
1985         return !is_a_nulls(tw->tw_node.next) ?
1986                 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1987 }
1988
1989 /*
1990  * Get next listener socket follow cur.  If cur is NULL, get first socket
1991  * starting from bucket given in st->bucket; when st->bucket is zero the
1992  * very first socket in the hash table is returned.
1993  */
1994 static void *listening_get_next(struct seq_file *seq, void *cur)
1995 {
1996         struct inet_connection_sock *icsk;
1997         struct hlist_nulls_node *node;
1998         struct sock *sk = cur;
1999         struct inet_listen_hashbucket *ilb;
2000         struct tcp_iter_state *st = seq->private;
2001         struct net *net = seq_file_net(seq);
2002
2003         if (!sk) {
2004                 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2005                 spin_lock_bh(&ilb->lock);
2006                 sk = sk_nulls_head(&ilb->head);
2007                 st->offset = 0;
2008                 goto get_sk;
2009         }
2010         ilb = &tcp_hashinfo.listening_hash[st->bucket];
2011         ++st->num;
2012         ++st->offset;
2013
2014         if (st->state == TCP_SEQ_STATE_OPENREQ) {
2015                 struct request_sock *req = cur;
2016
2017                 icsk = inet_csk(st->syn_wait_sk);
2018                 req = req->dl_next;
2019                 while (1) {
2020                         while (req) {
2021                                 if (req->rsk_ops->family == st->family) {
2022                                         cur = req;
2023                                         goto out;
2024                                 }
2025                                 req = req->dl_next;
2026                         }
2027                         st->offset = 0;
2028                         if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2029                                 break;
2030 get_req:
2031                         req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2032                 }
2033                 sk        = sk_next(st->syn_wait_sk);
2034                 st->state = TCP_SEQ_STATE_LISTENING;
2035                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2036         } else {
2037                 icsk = inet_csk(sk);
2038                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2039                 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2040                         goto start_req;
2041                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2042                 sk = sk_next(sk);
2043         }
2044 get_sk:
2045         sk_nulls_for_each_from(sk, node) {
2046                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
2047                         cur = sk;
2048                         goto out;
2049                 }
2050                 icsk = inet_csk(sk);
2051                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2052                 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2053 start_req:
2054                         st->uid         = sock_i_uid(sk);
2055                         st->syn_wait_sk = sk;
2056                         st->state       = TCP_SEQ_STATE_OPENREQ;
2057                         st->sbucket     = 0;
2058                         goto get_req;
2059                 }
2060                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2061         }
2062         spin_unlock_bh(&ilb->lock);
2063         st->offset = 0;
2064         if (++st->bucket < INET_LHTABLE_SIZE) {
2065                 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2066                 spin_lock_bh(&ilb->lock);
2067                 sk = sk_nulls_head(&ilb->head);
2068                 goto get_sk;
2069         }
2070         cur = NULL;
2071 out:
2072         return cur;
2073 }
2074
2075 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2076 {
2077         struct tcp_iter_state *st = seq->private;
2078         void *rc;
2079
2080         st->bucket = 0;
2081         st->offset = 0;
2082         rc = listening_get_next(seq, NULL);
2083
2084         while (rc && *pos) {
2085                 rc = listening_get_next(seq, rc);
2086                 --*pos;
2087         }
2088         return rc;
2089 }
2090
2091 static inline int empty_bucket(struct tcp_iter_state *st)
2092 {
2093         return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2094                 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2095 }
2096
2097 /*
2098  * Get first established socket starting from bucket given in st->bucket.
2099  * If st->bucket is zero, the very first socket in the hash is returned.
2100  */
2101 static void *established_get_first(struct seq_file *seq)
2102 {
2103         struct tcp_iter_state *st = seq->private;
2104         struct net *net = seq_file_net(seq);
2105         void *rc = NULL;
2106
2107         st->offset = 0;
2108         for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2109                 struct sock *sk;
2110                 struct hlist_nulls_node *node;
2111                 struct inet_timewait_sock *tw;
2112                 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2113
2114                 /* Lockless fast path for the common case of empty buckets */
2115                 if (empty_bucket(st))
2116                         continue;
2117
2118                 spin_lock_bh(lock);
2119                 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2120                         if (sk->sk_family != st->family ||
2121                             !net_eq(sock_net(sk), net)) {
2122                                 continue;
2123                         }
2124                         rc = sk;
2125                         goto out;
2126                 }
2127                 st->state = TCP_SEQ_STATE_TIME_WAIT;
2128                 inet_twsk_for_each(tw, node,
2129                                    &tcp_hashinfo.ehash[st->bucket].twchain) {
2130                         if (tw->tw_family != st->family ||
2131                             !net_eq(twsk_net(tw), net)) {
2132                                 continue;
2133                         }
2134                         rc = tw;
2135                         goto out;
2136                 }
2137                 spin_unlock_bh(lock);
2138                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2139         }
2140 out:
2141         return rc;
2142 }
2143
2144 static void *established_get_next(struct seq_file *seq, void *cur)
2145 {
2146         struct sock *sk = cur;
2147         struct inet_timewait_sock *tw;
2148         struct hlist_nulls_node *node;
2149         struct tcp_iter_state *st = seq->private;
2150         struct net *net = seq_file_net(seq);
2151
2152         ++st->num;
2153         ++st->offset;
2154
2155         if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2156                 tw = cur;
2157                 tw = tw_next(tw);
2158 get_tw:
2159                 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2160                         tw = tw_next(tw);
2161                 }
2162                 if (tw) {
2163                         cur = tw;
2164                         goto out;
2165                 }
2166                 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2167                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2168
2169                 /* Look for next non empty bucket */
2170                 st->offset = 0;
2171                 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2172                                 empty_bucket(st))
2173                         ;
2174                 if (st->bucket > tcp_hashinfo.ehash_mask)
2175                         return NULL;
2176
2177                 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2178                 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2179         } else
2180                 sk = sk_nulls_next(sk);
2181
2182         sk_nulls_for_each_from(sk, node) {
2183                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2184                         goto found;
2185         }
2186
2187         st->state = TCP_SEQ_STATE_TIME_WAIT;
2188         tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2189         goto get_tw;
2190 found:
2191         cur = sk;
2192 out:
2193         return cur;
2194 }
2195
2196 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2197 {
2198         struct tcp_iter_state *st = seq->private;
2199         void *rc;
2200
2201         st->bucket = 0;
2202         rc = established_get_first(seq);
2203
2204         while (rc && pos) {
2205                 rc = established_get_next(seq, rc);
2206                 --pos;
2207         }
2208         return rc;
2209 }
2210
2211 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2212 {
2213         void *rc;
2214         struct tcp_iter_state *st = seq->private;
2215
2216         st->state = TCP_SEQ_STATE_LISTENING;
2217         rc        = listening_get_idx(seq, &pos);
2218
2219         if (!rc) {
2220                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2221                 rc        = established_get_idx(seq, pos);
2222         }
2223
2224         return rc;
2225 }
2226
2227 static void *tcp_seek_last_pos(struct seq_file *seq)
2228 {
2229         struct tcp_iter_state *st = seq->private;
2230         int offset = st->offset;
2231         int orig_num = st->num;
2232         void *rc = NULL;
2233
2234         switch (st->state) {
2235         case TCP_SEQ_STATE_OPENREQ:
2236         case TCP_SEQ_STATE_LISTENING:
2237                 if (st->bucket >= INET_LHTABLE_SIZE)
2238                         break;
2239                 st->state = TCP_SEQ_STATE_LISTENING;
2240                 rc = listening_get_next(seq, NULL);
2241                 while (offset-- && rc)
2242                         rc = listening_get_next(seq, rc);
2243                 if (rc)
2244                         break;
2245                 st->bucket = 0;
2246                 /* Fallthrough */
2247         case TCP_SEQ_STATE_ESTABLISHED:
2248         case TCP_SEQ_STATE_TIME_WAIT:
2249                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2250                 if (st->bucket > tcp_hashinfo.ehash_mask)
2251                         break;
2252                 rc = established_get_first(seq);
2253                 while (offset-- && rc)
2254                         rc = established_get_next(seq, rc);
2255         }
2256
2257         st->num = orig_num;
2258
2259         return rc;
2260 }
2261
2262 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2263 {
2264         struct tcp_iter_state *st = seq->private;
2265         void *rc;
2266
2267         if (*pos && *pos == st->last_pos) {
2268                 rc = tcp_seek_last_pos(seq);
2269                 if (rc)
2270                         goto out;
2271         }
2272
2273         st->state = TCP_SEQ_STATE_LISTENING;
2274         st->num = 0;
2275         st->bucket = 0;
2276         st->offset = 0;
2277         rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2278
2279 out:
2280         st->last_pos = *pos;
2281         return rc;
2282 }
2283
2284 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2285 {
2286         struct tcp_iter_state *st = seq->private;
2287         void *rc = NULL;
2288
2289         if (v == SEQ_START_TOKEN) {
2290                 rc = tcp_get_idx(seq, 0);
2291                 goto out;
2292         }
2293
2294         switch (st->state) {
2295         case TCP_SEQ_STATE_OPENREQ:
2296         case TCP_SEQ_STATE_LISTENING:
2297                 rc = listening_get_next(seq, v);
2298                 if (!rc) {
2299                         st->state = TCP_SEQ_STATE_ESTABLISHED;
2300                         st->bucket = 0;
2301                         st->offset = 0;
2302                         rc        = established_get_first(seq);
2303                 }
2304                 break;
2305         case TCP_SEQ_STATE_ESTABLISHED:
2306         case TCP_SEQ_STATE_TIME_WAIT:
2307                 rc = established_get_next(seq, v);
2308                 break;
2309         }
2310 out:
2311         ++*pos;
2312         st->last_pos = *pos;
2313         return rc;
2314 }
2315
2316 static void tcp_seq_stop(struct seq_file *seq, void *v)
2317 {
2318         struct tcp_iter_state *st = seq->private;
2319
2320         switch (st->state) {
2321         case TCP_SEQ_STATE_OPENREQ:
2322                 if (v) {
2323                         struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2324                         read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2325                 }
2326         case TCP_SEQ_STATE_LISTENING:
2327                 if (v != SEQ_START_TOKEN)
2328                         spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2329                 break;
2330         case TCP_SEQ_STATE_TIME_WAIT:
2331         case TCP_SEQ_STATE_ESTABLISHED:
2332                 if (v)
2333                         spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2334                 break;
2335         }
2336 }
2337
2338 static int tcp_seq_open(struct inode *inode, struct file *file)
2339 {
2340         struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2341         struct tcp_iter_state *s;
2342         int err;
2343
2344         err = seq_open_net(inode, file, &afinfo->seq_ops,
2345                           sizeof(struct tcp_iter_state));
2346         if (err < 0)
2347                 return err;
2348
2349         s = ((struct seq_file *)file->private_data)->private;
2350         s->family               = afinfo->family;
2351         s->last_pos             = 0;
2352         return 0;
2353 }
2354
2355 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2356 {
2357         int rc = 0;
2358         struct proc_dir_entry *p;
2359
2360         afinfo->seq_fops.open           = tcp_seq_open;
2361         afinfo->seq_fops.read           = seq_read;
2362         afinfo->seq_fops.llseek         = seq_lseek;
2363         afinfo->seq_fops.release        = seq_release_net;
2364
2365         afinfo->seq_ops.start           = tcp_seq_start;
2366         afinfo->seq_ops.next            = tcp_seq_next;
2367         afinfo->seq_ops.stop            = tcp_seq_stop;
2368
2369         p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2370                              &afinfo->seq_fops, afinfo);
2371         if (!p)
2372                 rc = -ENOMEM;
2373         return rc;
2374 }
2375 EXPORT_SYMBOL(tcp_proc_register);
2376
2377 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2378 {
2379         proc_net_remove(net, afinfo->name);
2380 }
2381 EXPORT_SYMBOL(tcp_proc_unregister);
2382
2383 static void get_openreq4(struct sock *sk, struct request_sock *req,
2384                          struct seq_file *f, int i, int uid, int *len)
2385 {
2386         const struct inet_request_sock *ireq = inet_rsk(req);
2387         int ttd = req->expires - jiffies;
2388
2389         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2390                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2391                 i,
2392                 ireq->loc_addr,
2393                 ntohs(inet_sk(sk)->inet_sport),
2394                 ireq->rmt_addr,
2395                 ntohs(ireq->rmt_port),
2396                 TCP_SYN_RECV,
2397                 0, 0, /* could print option size, but that is af dependent. */
2398                 1,    /* timers active (only the expire timer) */
2399                 jiffies_to_clock_t(ttd),
2400                 req->retrans,
2401                 uid,
2402                 0,  /* non standard timer */
2403                 0, /* open_requests have no inode */
2404                 atomic_read(&sk->sk_refcnt),
2405                 req,
2406                 len);
2407 }
2408
2409 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2410 {
2411         int timer_active;
2412         unsigned long timer_expires;
2413         struct tcp_sock *tp = tcp_sk(sk);
2414         const struct inet_connection_sock *icsk = inet_csk(sk);
2415         struct inet_sock *inet = inet_sk(sk);
2416         __be32 dest = inet->inet_daddr;
2417         __be32 src = inet->inet_rcv_saddr;
2418         __u16 destp = ntohs(inet->inet_dport);
2419         __u16 srcp = ntohs(inet->inet_sport);
2420         int rx_queue;
2421
2422         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2423                 timer_active    = 1;
2424                 timer_expires   = icsk->icsk_timeout;
2425         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2426                 timer_active    = 4;
2427                 timer_expires   = icsk->icsk_timeout;
2428         } else if (timer_pending(&sk->sk_timer)) {
2429                 timer_active    = 2;
2430                 timer_expires   = sk->sk_timer.expires;
2431         } else {
2432                 timer_active    = 0;
2433                 timer_expires = jiffies;
2434         }
2435
2436         if (sk->sk_state == TCP_LISTEN)
2437                 rx_queue = sk->sk_ack_backlog;
2438         else
2439                 /*
2440                  * because we dont lock socket, we might find a transient negative value
2441                  */
2442                 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2443
2444         seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2445                         "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2446                 i, src, srcp, dest, destp, sk->sk_state,
2447                 tp->write_seq - tp->snd_una,
2448                 rx_queue,
2449                 timer_active,
2450                 jiffies_to_clock_t(timer_expires - jiffies),
2451                 icsk->icsk_retransmits,
2452                 sock_i_uid(sk),
2453                 icsk->icsk_probes_out,
2454                 sock_i_ino(sk),
2455                 atomic_read(&sk->sk_refcnt), sk,
2456                 jiffies_to_clock_t(icsk->icsk_rto),
2457                 jiffies_to_clock_t(icsk->icsk_ack.ato),
2458                 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2459                 tp->snd_cwnd,
2460                 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2461                 len);
2462 }
2463
2464 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2465                                struct seq_file *f, int i, int *len)
2466 {
2467         __be32 dest, src;
2468         __u16 destp, srcp;
2469         int ttd = tw->tw_ttd - jiffies;
2470
2471         if (ttd < 0)
2472                 ttd = 0;
2473
2474         dest  = tw->tw_daddr;
2475         src   = tw->tw_rcv_saddr;
2476         destp = ntohs(tw->tw_dport);
2477         srcp  = ntohs(tw->tw_sport);
2478
2479         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2480                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2481                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2482                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2483                 atomic_read(&tw->tw_refcnt), tw, len);
2484 }
2485
2486 #define TMPSZ 150
2487
2488 static int tcp4_seq_show(struct seq_file *seq, void *v)
2489 {
2490         struct tcp_iter_state *st;
2491         int len;
2492
2493         if (v == SEQ_START_TOKEN) {
2494                 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2495                            "  sl  local_address rem_address   st tx_queue "
2496                            "rx_queue tr tm->when retrnsmt   uid  timeout "
2497                            "inode");
2498                 goto out;
2499         }
2500         st = seq->private;
2501
2502         switch (st->state) {
2503         case TCP_SEQ_STATE_LISTENING:
2504         case TCP_SEQ_STATE_ESTABLISHED:
2505                 get_tcp4_sock(v, seq, st->num, &len);
2506                 break;
2507         case TCP_SEQ_STATE_OPENREQ:
2508                 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2509                 break;
2510         case TCP_SEQ_STATE_TIME_WAIT:
2511                 get_timewait4_sock(v, seq, st->num, &len);
2512                 break;
2513         }
2514         seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2515 out:
2516         return 0;
2517 }
2518
2519 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2520         .name           = "tcp",
2521         .family         = AF_INET,
2522         .seq_fops       = {
2523                 .owner          = THIS_MODULE,
2524         },
2525         .seq_ops        = {
2526                 .show           = tcp4_seq_show,
2527         },
2528 };
2529
2530 static int __net_init tcp4_proc_init_net(struct net *net)
2531 {
2532         return tcp_proc_register(net, &tcp4_seq_afinfo);
2533 }
2534
2535 static void __net_exit tcp4_proc_exit_net(struct net *net)
2536 {
2537         tcp_proc_unregister(net, &tcp4_seq_afinfo);
2538 }
2539
2540 static struct pernet_operations tcp4_net_ops = {
2541         .init = tcp4_proc_init_net,
2542         .exit = tcp4_proc_exit_net,
2543 };
2544
2545 int __init tcp4_proc_init(void)
2546 {
2547         return register_pernet_subsys(&tcp4_net_ops);
2548 }
2549
2550 void tcp4_proc_exit(void)
2551 {
2552         unregister_pernet_subsys(&tcp4_net_ops);
2553 }
2554 #endif /* CONFIG_PROC_FS */
2555
2556 #ifdef CONFIG_INET_GRO
2557 extern atomic_t gro_timer_init;
2558 #endif
2559 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2560 {
2561         struct iphdr *iph;
2562
2563 #ifdef CONFIG_INET_GRO
2564         if (atomic_read(&gro_timer_init))
2565                 return tcp_gro_receive(head, skb);
2566 #else
2567         /* We don't support hw-checksum. Skip this part to do real TCP merge */
2568         iph = skb_gro_network_header(skb);
2569         switch (skb->ip_summed) {
2570         case CHECKSUM_COMPLETE:
2571                 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2572                                   skb->csum)) {
2573                         skb->ip_summed = CHECKSUM_UNNECESSARY;
2574                         break;
2575                 }
2576
2577                 /* fall through */
2578         case CHECKSUM_NONE:
2579                 NAPI_GRO_CB(skb)->flush = 1;
2580                 return NULL;
2581         }
2582
2583         return tcp_gro_receive(head, skb);
2584 #endif /* CONFIG_INET_GRO */
2585 }
2586 EXPORT_SYMBOL(tcp4_gro_receive);
2587
2588 int BCMFASTPATH_HOST tcp4_gro_complete(struct sk_buff *skb)
2589 {
2590         struct iphdr *iph = ip_hdr(skb);
2591         struct tcphdr *th = tcp_hdr(skb);
2592
2593         th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2594                                   iph->saddr, iph->daddr, 0);
2595         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2596
2597         return tcp_gro_complete(skb);
2598 }
2599 EXPORT_SYMBOL(tcp4_gro_complete);
2600
2601 struct proto tcp_prot = {
2602         .name                   = "TCP",
2603         .owner                  = THIS_MODULE,
2604         .close                  = tcp_close,
2605         .connect                = tcp_v4_connect,
2606         .disconnect             = tcp_disconnect,
2607         .accept                 = inet_csk_accept,
2608         .ioctl                  = tcp_ioctl,
2609         .init                   = tcp_v4_init_sock,
2610         .destroy                = tcp_v4_destroy_sock,
2611         .shutdown               = tcp_shutdown,
2612         .setsockopt             = tcp_setsockopt,
2613         .getsockopt             = tcp_getsockopt,
2614         .recvmsg                = tcp_recvmsg,
2615         .sendmsg                = tcp_sendmsg,
2616         .sendpage               = tcp_sendpage,
2617         .backlog_rcv            = tcp_v4_do_rcv,
2618         .hash                   = inet_hash,
2619         .unhash                 = inet_unhash,
2620         .get_port               = inet_csk_get_port,
2621         .enter_memory_pressure  = tcp_enter_memory_pressure,
2622         .sockets_allocated      = &tcp_sockets_allocated,
2623         .orphan_count           = &tcp_orphan_count,
2624         .memory_allocated       = &tcp_memory_allocated,
2625         .memory_pressure        = &tcp_memory_pressure,
2626         .sysctl_mem             = sysctl_tcp_mem,
2627         .sysctl_wmem            = sysctl_tcp_wmem,
2628         .sysctl_rmem            = sysctl_tcp_rmem,
2629         .max_header             = MAX_TCP_HEADER,
2630         .obj_size               = sizeof(struct tcp_sock),
2631         .slab_flags             = SLAB_DESTROY_BY_RCU,
2632         .twsk_prot              = &tcp_timewait_sock_ops,
2633         .rsk_prot               = &tcp_request_sock_ops,
2634         .h.hashinfo             = &tcp_hashinfo,
2635         .no_autobind            = true,
2636 #ifdef CONFIG_COMPAT
2637         .compat_setsockopt      = compat_tcp_setsockopt,
2638         .compat_getsockopt      = compat_tcp_getsockopt,
2639 #endif
2640 };
2641 EXPORT_SYMBOL(tcp_prot);
2642
2643
2644 static int __net_init tcp_sk_init(struct net *net)
2645 {
2646         return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2647                                     PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2648 }
2649
2650 static void __net_exit tcp_sk_exit(struct net *net)
2651 {
2652         inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2653 }
2654
2655 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2656 {
2657         inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2658 }
2659
2660 static struct pernet_operations __net_initdata tcp_sk_ops = {
2661        .init       = tcp_sk_init,
2662        .exit       = tcp_sk_exit,
2663        .exit_batch = tcp_sk_exit_batch,
2664 };
2665
2666 void __init tcp_v4_init(void)
2667 {
2668         inet_hashinfo_init(&tcp_hashinfo);
2669         if (register_pernet_subsys(&tcp_sk_ops))
2670                 panic("Failed to create the TCP control socket.\n");
2671 }