net/ipv4/tcp_ipv4.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  *              IPv4 specific functions
   9  *
  10  *
  11  *              code split from:
  12  *              linux/ipv4/tcp.c
  13  *              linux/ipv4/tcp_input.c
  14  *              linux/ipv4/tcp_output.c
  15  *
  16  *              See tcp.c for author information
  17  *
  18  *      This program is free software; you can redistribute it and/or
  19  *      modify it under the terms of the GNU General Public License
  20  *      as published by the Free Software Foundation; either version
  21  *      2 of the License, or (at your option) any later version.
  22  */
  23
  24 /*
  25  * Changes:
  26  *              David S. Miller :       New socket lookup architecture.
  27  *                                      This code is dedicated to John Dyson.
  28  *              David S. Miller :       Change semantics of established hash,
  29  *                                      half is devoted to TIME_WAIT sockets
  30  *                                      and the rest go in the other half.
  31  *              Andi Kleen :            Add support for syncookies and fixed
  32  *                                      some bugs: ip options weren't passed to
  33  *                                      the TCP layer, missed a check for an
  34  *                                      ACK bit.
  35  *              Andi Kleen :            Implemented fast path mtu discovery.
  36  *                                      Fixed many serious bugs in the
  37  *                                      request_sock handling and moved
  38  *                                      most of it into the af independent code.
  39  *                                      Added tail drop and some other bugfixes.
  40  *                                      Added new listen semantics.
  41  *              Mike McLagan    :       Routing by source
  42  *      Juan Jose Ciarlante:            ip_dynaddr bits
  43  *              Andi Kleen:             various fixes.
  44  *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45  *                                      coma.
  46  *      Andi Kleen              :       Fix new listen.
  47  *      Andi Kleen              :       Fix accept error reporting.
  48  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50  *                                      a single port at the same time.
  51  */
  52
  53
  54 #include <linux/bottom_half.h>
  55 #include <linux/types.h>
  56 #include <linux/fcntl.h>
  57 #include <linux/module.h>
  58 #include <linux/random.h>
  59 #include <linux/cache.h>
  60 #include <linux/jhash.h>
  61 #include <linux/init.h>
  62 #include <linux/times.h>
  63 #include <linux/slab.h>
  64
  65 #include <net/net_namespace.h>
  66 #include <net/icmp.h>
  67 #include <net/inet_hashtables.h>
  68 #include <net/tcp.h>
  69 #include <net/transp_v6.h>
  70 #include <net/ipv6.h>
  71 #include <net/inet_common.h>
  72 #include <net/timewait_sock.h>
  73 #include <net/xfrm.h>
  74 #include <net/netdma.h>
  75 #include <net/secure_seq.h>
  76 #include <net/tcp_memcontrol.h>
  77
  78 #include <linux/inet.h>
  79 #include <linux/ipv6.h>
  80 #include <linux/stddef.h>
  81 #include <linux/proc_fs.h>
  82 #include <linux/seq_file.h>
  83
  84 #include <linux/crypto.h>
  85 #include <linux/scatterlist.h>
  86
  87 int sysctl_tcp_tw_reuse __read_mostly;
  88 int sysctl_tcp_low_latency __read_mostly;
  89 EXPORT_SYMBOL(sysctl_tcp_low_latency);
  90
  91
  92 #ifdef CONFIG_TCP_MD5SIG
  93 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  94                                __be32 daddr, __be32 saddr, const struct tcphdr *th);
  95 #endif
  96
  97 struct inet_hashinfo tcp_hashinfo;
  98 EXPORT_SYMBOL(tcp_hashinfo);
  99
 100 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 101 {
 102         return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 103                                           ip_hdr(skb)->saddr,
 104                                           tcp_hdr(skb)->dest,
 105                                           tcp_hdr(skb)->source);
 106 }
 107
 108 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 109 {
 110         const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 111         struct tcp_sock *tp = tcp_sk(sk);
 112
 113         /* With PAWS, it is safe from the viewpoint
 114            of data integrity. Even without PAWS it is safe provided sequence
 115            spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 116
 117            Actually, the idea is close to VJ's one, only timestamp cache is
 118            held not per host, but per port pair and TW bucket is used as state
 119            holder.
 120
 121            If TW bucket has been already destroyed we fall back to VJ's scheme
 122            and use initial timestamp retrieved from peer table.
 123          */
 124         if (tcptw->tw_ts_recent_stamp &&
 125             (twp == NULL || (sysctl_tcp_tw_reuse &&
 126                              get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 127                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 128                 if (tp->write_seq == 0)
 129                         tp->write_seq = 1;
 130                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 131                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 132                 sock_hold(sktw);
 133                 return 1;
 134         }
 135
 136         return 0;
 137 }
 138 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 139
 140 /* This will initiate an outgoing connection. */
 141 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 142 {
 143         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 144         struct inet_sock *inet = inet_sk(sk);
 145         struct tcp_sock *tp = tcp_sk(sk);
 146         __be16 orig_sport, orig_dport;
 147         __be32 daddr, nexthop;
 148         struct flowi4 *fl4;
 149         struct rtable *rt;
 150         int err;
 151         struct ip_options_rcu *inet_opt;
 152
 153         if (addr_len < sizeof(struct sockaddr_in))
 154                 return -EINVAL;
 155
 156         if (usin->sin_family != AF_INET)
 157                 return -EAFNOSUPPORT;
 158
 159         nexthop = daddr = usin->sin_addr.s_addr;
 160         inet_opt = rcu_dereference_protected(inet->inet_opt,
 161                                              sock_owned_by_user(sk));
 162         if (inet_opt && inet_opt->opt.srr) {
 163                 if (!daddr)
 164                         return -EINVAL;
 165                 nexthop = inet_opt->opt.faddr;
 166         }
 167
 168         orig_sport = inet->inet_sport;
 169         orig_dport = usin->sin_port;
 170         fl4 = &inet->cork.fl.u.ip4;
 171         rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 172                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 173                               IPPROTO_TCP,
 174                               orig_sport, orig_dport, sk, true);
 175         if (IS_ERR(rt)) {
 176                 err = PTR_ERR(rt);
 177                 if (err == -ENETUNREACH)
 178                         IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 179                 return err;
 180         }
 181
 182         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 183                 ip_rt_put(rt);
 184                 return -ENETUNREACH;
 185         }
 186
 187         if (!inet_opt || !inet_opt->opt.srr)
 188                 daddr = fl4->daddr;
 189
 190         if (!inet->inet_saddr)
 191                 inet->inet_saddr = fl4->saddr;
 192         inet->inet_rcv_saddr = inet->inet_saddr;
 193
 194         if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 195                 /* Reset inherited state */
 196                 tp->rx_opt.ts_recent       = 0;
 197                 tp->rx_opt.ts_recent_stamp = 0;
 198                 tp->write_seq              = 0;
 199         }
 200
 201         if (tcp_death_row.sysctl_tw_recycle &&
 202             !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
 203                 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
 204                 /*
 205                  * VJ's idea. We save last timestamp seen from
 206                  * the destination in peer table, when entering state
 207                  * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 208                  * when trying new connection.
 209                  */
 210                 if (peer) {
 211                         inet_peer_refcheck(peer);
 212                         if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
 213                                 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 214                                 tp->rx_opt.ts_recent = peer->tcp_ts;
 215                         }
 216                 }
 217         }
 218
 219         inet->inet_dport = usin->sin_port;
 220         inet->inet_daddr = daddr;
 221
 222         inet_csk(sk)->icsk_ext_hdr_len = 0;
 223         if (inet_opt)
 224                 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 225
 226         tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 227
 228         /* Socket identity is still unknown (sport may be zero).
 229          * However we set state to SYN-SENT and not releasing socket
 230          * lock select source port, enter ourselves into the hash tables and
 231          * complete initialization after this.
 232          */
 233         tcp_set_state(sk, TCP_SYN_SENT);
 234         err = inet_hash_connect(&tcp_death_row, sk);
 235         if (err)
 236                 goto failure;
 237
 238         rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 239                                inet->inet_sport, inet->inet_dport, sk);
 240         if (IS_ERR(rt)) {
 241                 err = PTR_ERR(rt);
 242                 rt = NULL;
 243                 goto failure;
 244         }
 245         /* OK, now commit destination to socket.  */
 246         sk->sk_gso_type = SKB_GSO_TCPV4;
 247         sk_setup_caps(sk, &rt->dst);
 248
 249         if (!tp->write_seq)
 250                 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 251                                                            inet->inet_daddr,
 252                                                            inet->inet_sport,
 253                                                            usin->sin_port);
 254
 255         inet->inet_id = tp->write_seq ^ jiffies;
 256
 257         err = tcp_connect(sk);
 258         rt = NULL;
 259         if (err)
 260                 goto failure;
 261
 262         return 0;
 263
 264 failure:
 265         /*
 266          * This unhashes the socket and releases the local port,
 267          * if necessary.
 268          */
 269         tcp_set_state(sk, TCP_CLOSE);
 270         ip_rt_put(rt);
 271         sk->sk_route_caps = 0;
 272         inet->inet_dport = 0;
 273         return err;
 274 }
 275 EXPORT_SYMBOL(tcp_v4_connect);
 276
 277 /*
 278  * This routine does path mtu discovery as defined in RFC1191.
 279  */
 280 static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
 281 {
 282         struct dst_entry *dst;
 283         struct inet_sock *inet = inet_sk(sk);
 284
 285         /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 286          * send out by Linux are always <576bytes so they should go through
 287          * unfragmented).
 288          */
 289         if (sk->sk_state == TCP_LISTEN)
 290                 return;
 291
 292         /* We don't check in the destentry if pmtu discovery is forbidden
 293          * on this route. We just assume that no packet_to_big packets
 294          * are send back when pmtu discovery is not active.
 295          * There is a small race when the user changes this flag in the
 296          * route, but I think that's acceptable.
 297          */
 298         if ((dst = __sk_dst_check(sk, 0)) == NULL)
 299                 return;
 300
 301         dst->ops->update_pmtu(dst, mtu);
 302
 303         /* Something is about to be wrong... Remember soft error
 304          * for the case, if this connection will not able to recover.
 305          */
 306         if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 307                 sk->sk_err_soft = EMSGSIZE;
 308
 309         mtu = dst_mtu(dst);
 310
 311         if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 312             inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 313                 tcp_sync_mss(sk, mtu);
 314
 315                 /* Resend the TCP packet because it's
 316                  * clear that the old packet has been
 317                  * dropped. This is the new "fast" path mtu
 318                  * discovery.
 319                  */
 320                 tcp_simple_retransmit(sk);
 321         } /* else let the usual retransmit timer handle it */
 322 }
 323
 324 /*
 325  * This routine is called by the ICMP module when it gets some
 326  * sort of error condition.  If err < 0 then the socket should
 327  * be closed and the error returned to the user.  If err > 0
 328  * it's just the icmp type << 8 | icmp code.  After adjustment
 329  * header points to the first 8 bytes of the tcp header.  We need
 330  * to find the appropriate port.
 331  *
 332  * The locking strategy used here is very "optimistic". When
 333  * someone else accesses the socket the ICMP is just dropped
 334  * and for some paths there is no check at all.
 335  * A more general error queue to queue errors for later handling
 336  * is probably better.
 337  *
 338  */
 339
 340 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 341 {
 342         const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 343         struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 344         struct inet_connection_sock *icsk;
 345         struct tcp_sock *tp;
 346         struct inet_sock *inet;
 347         const int type = icmp_hdr(icmp_skb)->type;
 348         const int code = icmp_hdr(icmp_skb)->code;
 349         struct sock *sk;
 350         struct sk_buff *skb;
 351         __u32 seq;
 352         __u32 remaining;
 353         int err;
 354         struct net *net = dev_net(icmp_skb->dev);
 355
 356         if (icmp_skb->len < (iph->ihl << 2) + 8) {
 357                 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 358                 return;
 359         }
 360
 361         sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 362                         iph->saddr, th->source, inet_iif(icmp_skb));
 363         if (!sk) {
 364                 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 365                 return;
 366         }
 367         if (sk->sk_state == TCP_TIME_WAIT) {
 368                 inet_twsk_put(inet_twsk(sk));
 369                 return;
 370         }
 371
 372         bh_lock_sock(sk);
 373         /* If too many ICMPs get dropped on busy
 374          * servers this needs to be solved differently.
 375          */
 376         if (sock_owned_by_user(sk))
 377                 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 378
 379         if (sk->sk_state == TCP_CLOSE)
 380                 goto out;
 381
 382         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 383                 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 384                 goto out;
 385         }
 386
 387         icsk = inet_csk(sk);
 388         tp = tcp_sk(sk);
 389         seq = ntohl(th->seq);
 390         if (sk->sk_state != TCP_LISTEN &&
 391             !between(seq, tp->snd_una, tp->snd_nxt)) {
 392                 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 393                 goto out;
 394         }
 395
 396         switch (type) {
 397         case ICMP_SOURCE_QUENCH:
 398                 /* Just silently ignore these. */
 399                 goto out;
 400         case ICMP_PARAMETERPROB:
 401                 err = EPROTO;
 402                 break;
 403         case ICMP_DEST_UNREACH:
 404                 if (code > NR_ICMP_UNREACH)
 405                         goto out;
 406
 407                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 408                         if (!sock_owned_by_user(sk))
 409                                 do_pmtu_discovery(sk, iph, info);
 410                         goto out;
 411                 }
 412
 413                 err = icmp_err_convert[code].errno;
 414                 /* check if icmp_skb allows revert of backoff
 415                  * (see draft-zimmermann-tcp-lcd) */
 416                 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 417                         break;
 418                 if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 419                     !icsk->icsk_backoff)
 420                         break;
 421
 422                 if (sock_owned_by_user(sk))
 423                         break;
 424
 425                 icsk->icsk_backoff--;
 426                 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
 427                         TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
 428                 tcp_bound_rto(sk);
 429
 430                 skb = tcp_write_queue_head(sk);
 431                 BUG_ON(!skb);
 432
 433                 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 434                                 tcp_time_stamp - TCP_SKB_CB(skb)->when);
 435
 436                 if (remaining) {
 437                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 438                                                   remaining, TCP_RTO_MAX);
 439                 } else {
 440                         /* RTO revert clocked out retransmission.
 441                          * Will retransmit now */
 442                         tcp_retransmit_timer(sk);
 443                 }
 444
 445                 break;
 446         case ICMP_TIME_EXCEEDED:
 447                 err = EHOSTUNREACH;
 448                 break;
 449         default:
 450                 goto out;
 451         }
 452
 453         switch (sk->sk_state) {
 454                 struct request_sock *req, **prev;
 455         case TCP_LISTEN:
 456                 if (sock_owned_by_user(sk))
 457                         goto out;
 458
 459                 req = inet_csk_search_req(sk, &prev, th->dest,
 460                                           iph->daddr, iph->saddr);
 461                 if (!req)
 462                         goto out;
 463
 464                 /* ICMPs are not backlogged, hence we cannot get
 465                    an established socket here.
 466                  */
 467                 WARN_ON(req->sk);
 468
 469                 if (seq != tcp_rsk(req)->snt_isn) {
 470                         NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 471                         goto out;
 472                 }
 473
 474                 /*
 475                  * Still in SYN_RECV, just remove it silently.
 476                  * There is no good way to pass the error to the newly
 477                  * created socket, and POSIX does not want network
 478                  * errors returned from accept().
 479                  */
 480                 inet_csk_reqsk_queue_drop(sk, req, prev);
 481                 goto out;
 482
 483         case TCP_SYN_SENT:
 484         case TCP_SYN_RECV:  /* Cannot happen.
 485                                It can f.e. if SYNs crossed.
 486                              */
 487                 if (!sock_owned_by_user(sk)) {
 488                         sk->sk_err = err;
 489
 490                         sk->sk_error_report(sk);
 491
 492                         tcp_done(sk);
 493                 } else {
 494                         sk->sk_err_soft = err;
 495                 }
 496                 goto out;
 497         }
 498
 499         /* If we've already connected we will keep trying
 500          * until we time out, or the user gives up.
 501          *
 502          * rfc1122 4.2.3.9 allows to consider as hard errors
 503          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 504          * but it is obsoleted by pmtu discovery).
 505          *
 506          * Note, that in modern internet, where routing is unreliable
 507          * and in each dark corner broken firewalls sit, sending random
 508          * errors ordered by their masters even this two messages finally lose
 509          * their original sense (even Linux sends invalid PORT_UNREACHs)
 510          *
 511          * Now we are in compliance with RFCs.
 512          *                                                      --ANK (980905)
 513          */
 514
 515         inet = inet_sk(sk);
 516         if (!sock_owned_by_user(sk) && inet->recverr) {
 517                 sk->sk_err = err;
 518                 sk->sk_error_report(sk);
 519         } else  { /* Only an error on timeout */
 520                 sk->sk_err_soft = err;
 521         }
 522
 523 out:
 524         bh_unlock_sock(sk);
 525         sock_put(sk);
 526 }
 527
 528 static void __tcp_v4_send_check(struct sk_buff *skb,
 529                                 __be32 saddr, __be32 daddr)
 530 {
 531         struct tcphdr *th = tcp_hdr(skb);
 532
 533         if (skb->ip_summed == CHECKSUM_PARTIAL) {
 534                 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 535                 skb->csum_start = skb_transport_header(skb) - skb->head;
 536                 skb->csum_offset = offsetof(struct tcphdr, check);
 537         } else {
 538                 th->check = tcp_v4_check(skb->len, saddr, daddr,
 539                                          csum_partial(th,
 540                                                       th->doff << 2,
 541                                                       skb->csum));
 542         }
 543 }
 544
 545 /* This routine computes an IPv4 TCP checksum. */
 546 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 547 {
 548         const struct inet_sock *inet = inet_sk(sk);
 549
 550         __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 551 }
 552 EXPORT_SYMBOL(tcp_v4_send_check);
 553
 554 int tcp_v4_gso_send_check(struct sk_buff *skb)
 555 {
 556         const struct iphdr *iph;
 557         struct tcphdr *th;
 558
 559         if (!pskb_may_pull(skb, sizeof(*th)))
 560                 return -EINVAL;
 561
 562         iph = ip_hdr(skb);
 563         th = tcp_hdr(skb);
 564
 565         th->check = 0;
 566         skb->ip_summed = CHECKSUM_PARTIAL;
 567         __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 568         return 0;
 569 }
 570
 571 /*
 572  *      This routine will send an RST to the other tcp.
 573  *
 574  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 575  *                    for reset.
 576  *      Answer: if a packet caused RST, it is not for a socket
 577  *              existing in our system, if it is matched to a socket,
 578  *              it is just duplicate segment or bug in other side's TCP.
 579  *              So that we build reply only basing on parameters
 580  *              arrived with segment.
 581  *      Exception: precedence violation. We do not implement it in any case.
 582  */
 583
 584 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 585 {
 586         const struct tcphdr *th = tcp_hdr(skb);
 587         struct {
 588                 struct tcphdr th;
 589 #ifdef CONFIG_TCP_MD5SIG
 590                 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 591 #endif
 592         } rep;
 593         struct ip_reply_arg arg;
 594 #ifdef CONFIG_TCP_MD5SIG
 595         struct tcp_md5sig_key *key;
 596         const __u8 *hash_location = NULL;
 597         unsigned char newhash[16];
 598         int genhash;
 599         struct sock *sk1 = NULL;
 600 #endif
 601         struct net *net;
 602
 603         /* Never send a reset in response to a reset. */
 604         if (th->rst)
 605                 return;
 606
 607         if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 608                 return;
 609
 610         /* Swap the send and the receive. */
 611         memset(&rep, 0, sizeof(rep));
 612         rep.th.dest   = th->source;
 613         rep.th.source = th->dest;
 614         rep.th.doff   = sizeof(struct tcphdr) / 4;
 615         rep.th.rst    = 1;
 616
 617         if (th->ack) {
 618                 rep.th.seq = th->ack_seq;
 619         } else {
 620                 rep.th.ack = 1;
 621                 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 622                                        skb->len - (th->doff << 2));
 623         }
 624
 625         memset(&arg, 0, sizeof(arg));
 626         arg.iov[0].iov_base = (unsigned char *)&rep;
 627         arg.iov[0].iov_len  = sizeof(rep.th);
 628
 629 #ifdef CONFIG_TCP_MD5SIG
 630         hash_location = tcp_parse_md5sig_option(th);
 631         if (!sk && hash_location) {
 632                 /*
 633                  * active side is lost. Try to find listening socket through
 634                  * source port, and then find md5 key through listening socket.
 635                  * we are not loose security here:
 636                  * Incoming packet is checked with md5 hash with finding key,
 637                  * no RST generated if md5 hash doesn't match.
 638                  */
 639                 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
 640                                              &tcp_hashinfo, ip_hdr(skb)->daddr,
 641                                              ntohs(th->source), inet_iif(skb));
 642                 /* don't send rst if it can't find key */
 643                 if (!sk1)
 644                         return;
 645                 rcu_read_lock();
 646                 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
 647                                         &ip_hdr(skb)->saddr, AF_INET);
 648                 if (!key)
 649                         goto release_sk1;
 650
 651                 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
 652                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
 653                         goto release_sk1;
 654         } else {
 655                 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
 656                                              &ip_hdr(skb)->saddr,
 657                                              AF_INET) : NULL;
 658         }
 659
 660         if (key) {
 661                 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 662                                    (TCPOPT_NOP << 16) |
 663                                    (TCPOPT_MD5SIG << 8) |
 664                                    TCPOLEN_MD5SIG);
 665                 /* Update length and the length the header thinks exists */
 666                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 667                 rep.th.doff = arg.iov[0].iov_len / 4;
 668
 669                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 670                                      key, ip_hdr(skb)->saddr,
 671                                      ip_hdr(skb)->daddr, &rep.th);
 672         }
 673 #endif
 674         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 675                                       ip_hdr(skb)->saddr, /* XXX */
 676                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 677         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 678         arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 679         /* When socket is gone, all binding information is lost.
 680          * routing might fail in this case. using iif for oif to
 681          * make sure we can deliver it
 682          */
 683         arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
 684
 685         net = dev_net(skb_dst(skb)->dev);
 686         arg.tos = ip_hdr(skb)->tos;
 687         ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 688                       &arg, arg.iov[0].iov_len);
 689
 690         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 691         TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 692
 693 #ifdef CONFIG_TCP_MD5SIG
 694 release_sk1:
 695         if (sk1) {
 696                 rcu_read_unlock();
 697                 sock_put(sk1);
 698         }
 699 #endif
 700 }
 701
 702 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 703    outside socket context is ugly, certainly. What can I do?
 704  */
 705
 706 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 707                             u32 win, u32 ts, int oif,
 708                             struct tcp_md5sig_key *key,
 709                             int reply_flags, u8 tos)
 710 {
 711         const struct tcphdr *th = tcp_hdr(skb);
 712         struct {
 713                 struct tcphdr th;
 714                 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 715 #ifdef CONFIG_TCP_MD5SIG
 716                            + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 717 #endif
 718                         ];
 719         } rep;
 720         struct ip_reply_arg arg;
 721         struct net *net = dev_net(skb_dst(skb)->dev);
 722
 723         memset(&rep.th, 0, sizeof(struct tcphdr));
 724         memset(&arg, 0, sizeof(arg));
 725
 726         arg.iov[0].iov_base = (unsigned char *)&rep;
 727         arg.iov[0].iov_len  = sizeof(rep.th);
 728         if (ts) {
 729                 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 730                                    (TCPOPT_TIMESTAMP << 8) |
 731                                    TCPOLEN_TIMESTAMP);
 732                 rep.opt[1] = htonl(tcp_time_stamp);
 733                 rep.opt[2] = htonl(ts);
 734                 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 735         }
 736
 737         /* Swap the send and the receive. */
 738         rep.th.dest    = th->source;
 739         rep.th.source  = th->dest;
 740         rep.th.doff    = arg.iov[0].iov_len / 4;
 741         rep.th.seq     = htonl(seq);
 742         rep.th.ack_seq = htonl(ack);
 743         rep.th.ack     = 1;
 744         rep.th.window  = htons(win);
 745
 746 #ifdef CONFIG_TCP_MD5SIG
 747         if (key) {
 748                 int offset = (ts) ? 3 : 0;
 749
 750                 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 751                                           (TCPOPT_NOP << 16) |
 752                                           (TCPOPT_MD5SIG << 8) |
 753                                           TCPOLEN_MD5SIG);
 754                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 755                 rep.th.doff = arg.iov[0].iov_len/4;
 756
 757                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 758                                     key, ip_hdr(skb)->saddr,
 759                                     ip_hdr(skb)->daddr, &rep.th);
 760         }
 761 #endif
 762         arg.flags = reply_flags;
 763         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 764                                       ip_hdr(skb)->saddr, /* XXX */
 765                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 766         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 767         if (oif)
 768                 arg.bound_dev_if = oif;
 769         arg.tos = tos;
 770         ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 771                       &arg, arg.iov[0].iov_len);
 772
 773         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 774 }
 775
 776 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 777 {
 778         struct inet_timewait_sock *tw = inet_twsk(sk);
 779         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 780
 781         tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 782                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 783                         tcptw->tw_ts_recent,
 784                         tw->tw_bound_dev_if,
 785                         tcp_twsk_md5_key(tcptw),
 786                         tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 787                         tw->tw_tos
 788                         );
 789
 790         inet_twsk_put(tw);
 791 }
 792
 793 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 794                                   struct request_sock *req)
 795 {
 796         tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 797                         tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 798                         req->ts_recent,
 799                         0,
 800                         tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 801                                           AF_INET),
 802                         inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 803                         ip_hdr(skb)->tos);
 804 }
 805
 806 /*
 807  *      Send a SYN-ACK after having received a SYN.
 808  *      This still operates on a request_sock only, not on a big
 809  *      socket.
 810  */
 811 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 812                               struct request_sock *req,
 813                               struct request_values *rvp)
 814 {
 815         const struct inet_request_sock *ireq = inet_rsk(req);
 816         struct flowi4 fl4;
 817         int err = -1;
 818         struct sk_buff * skb;
 819
 820         /* First, grab a route. */
 821         if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 822                 return -1;
 823
 824         skb = tcp_make_synack(sk, dst, req, rvp);
 825
 826         if (skb) {
 827                 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 828
 829                 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 830                                             ireq->rmt_addr,
 831                                             ireq->opt);
 832                 err = net_xmit_eval(err);
 833         }
 834
 835         dst_release(dst);
 836         return err;
 837 }
 838
 839 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 840                               struct request_values *rvp)
 841 {
 842         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 843         return tcp_v4_send_synack(sk, NULL, req, rvp);
 844 }
 845
 846 /*
 847  *      IPv4 request_sock destructor.
 848  */
 849 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 850 {
 851         kfree(inet_rsk(req)->opt);
 852 }
 853
 854 /*
 855  * Return 1 if a syncookie should be sent
 856  */
 857 int tcp_syn_flood_action(struct sock *sk,
 858                          const struct sk_buff *skb,
 859                          const char *proto)
 860 {
 861         const char *msg = "Dropping request";
 862         int want_cookie = 0;
 863         struct listen_sock *lopt;
 864
 865
 866
 867 #ifdef CONFIG_SYN_COOKIES
 868         if (sysctl_tcp_syncookies) {
 869                 msg = "Sending cookies";
 870                 want_cookie = 1;
 871                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
 872         } else
 873 #endif
 874                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 875
 876         lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
 877         if (!lopt->synflood_warned) {
 878                 lopt->synflood_warned = 1;
 879                 pr_info("%s: Possible SYN flooding on port %d. %s. "
 880                         " Check SNMP counters.\n",
 881                         proto, ntohs(tcp_hdr(skb)->dest), msg);
 882         }
 883         return want_cookie;
 884 }
 885 EXPORT_SYMBOL(tcp_syn_flood_action);
 886
 887 /*
 888  * Save and compile IPv4 options into the request_sock if needed.
 889  */
 890 static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
 891                                                   struct sk_buff *skb)
 892 {
 893         const struct ip_options *opt = &(IPCB(skb)->opt);
 894         struct ip_options_rcu *dopt = NULL;
 895
 896         if (opt && opt->optlen) {
 897                 int opt_size = sizeof(*dopt) + opt->optlen;
 898
 899                 dopt = kmalloc(opt_size, GFP_ATOMIC);
 900                 if (dopt) {
 901                         if (ip_options_echo(&dopt->opt, skb)) {
 902                                 kfree(dopt);
 903                                 dopt = NULL;
 904                         }
 905                 }
 906         }
 907         return dopt;
 908 }
 909
 910 #ifdef CONFIG_TCP_MD5SIG
 911 /*
 912  * RFC2385 MD5 checksumming requires a mapping of
 913  * IP address->MD5 Key.
 914  * We need to maintain these in the sk structure.
 915  */
 916
 917 /* Find the Key structure for an address.  */
 918 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 919                                          const union tcp_md5_addr *addr,
 920                                          int family)
 921 {
 922         struct tcp_sock *tp = tcp_sk(sk);
 923         struct tcp_md5sig_key *key;
 924         struct hlist_node *pos;
 925         unsigned int size = sizeof(struct in_addr);
 926         struct tcp_md5sig_info *md5sig;
 927
 928         /* caller either holds rcu_read_lock() or socket lock */
 929         md5sig = rcu_dereference_check(tp->md5sig_info,
 930                                        sock_owned_by_user(sk) ||
 931                                        lockdep_is_held(&sk->sk_lock.slock));
 932         if (!md5sig)
 933                 return NULL;
 934 #if IS_ENABLED(CONFIG_IPV6)
 935         if (family == AF_INET6)
 936                 size = sizeof(struct in6_addr);
 937 #endif
 938         hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
 939                 if (key->family != family)
 940                         continue;
 941                 if (!memcmp(&key->addr, addr, size))
 942                         return key;
 943         }
 944         return NULL;
 945 }
 946 EXPORT_SYMBOL(tcp_md5_do_lookup);
 947
 948 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 949                                          struct sock *addr_sk)
 950 {
 951         union tcp_md5_addr *addr;
 952
 953         addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
 954         return tcp_md5_do_lookup(sk, addr, AF_INET);
 955 }
 956 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 957
 958 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 959                                                       struct request_sock *req)
 960 {
 961         union tcp_md5_addr *addr;
 962
 963         addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
 964         return tcp_md5_do_lookup(sk, addr, AF_INET);
 965 }
 966
 967 /* This can be called on a newly created socket, from other files */
 968 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 969                    int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 970 {
 971         /* Add Key to the list */
 972         struct tcp_md5sig_key *key;
 973         struct tcp_sock *tp = tcp_sk(sk);
 974         struct tcp_md5sig_info *md5sig;
 975
 976         key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
 977         if (key) {
 978                 /* Pre-existing entry - just update that one. */
 979                 memcpy(key->key, newkey, newkeylen);
 980                 key->keylen = newkeylen;
 981                 return 0;
 982         }
 983
 984         md5sig = rcu_dereference_protected(tp->md5sig_info,
 985                                            sock_owned_by_user(sk));
 986         if (!md5sig) {
 987                 md5sig = kmalloc(sizeof(*md5sig), gfp);
 988                 if (!md5sig)
 989                         return -ENOMEM;
 990
 991                 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 992                 INIT_HLIST_HEAD(&md5sig->head);
 993                 rcu_assign_pointer(tp->md5sig_info, md5sig);
 994         }
 995
 996         key = sock_kmalloc(sk, sizeof(*key), gfp);
 997         if (!key)
 998                 return -ENOMEM;
 999         if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1000                 sock_kfree_s(sk, key, sizeof(*key));
1001                 return -ENOMEM;
1002         }
1003
1004         memcpy(key->key, newkey, newkeylen);
1005         key->keylen = newkeylen;
1006         key->family = family;
1007         memcpy(&key->addr, addr,
1008                (family == AF_INET6) ? sizeof(struct in6_addr) :
1009                                       sizeof(struct in_addr));
1010         hlist_add_head_rcu(&key->node, &md5sig->head);
1011         return 0;
1012 }
1013 EXPORT_SYMBOL(tcp_md5_do_add);
1014
1015 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1016 {
1017         struct tcp_sock *tp = tcp_sk(sk);
1018         struct tcp_md5sig_key *key;
1019         struct tcp_md5sig_info *md5sig;
1020
1021         key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1022         if (!key)
1023                 return -ENOENT;
1024         hlist_del_rcu(&key->node);
1025         atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1026         kfree_rcu(key, rcu);
1027         md5sig = rcu_dereference_protected(tp->md5sig_info,
1028                                            sock_owned_by_user(sk));
1029         if (hlist_empty(&md5sig->head))
1030                 tcp_free_md5sig_pool();
1031         return 0;
1032 }
1033 EXPORT_SYMBOL(tcp_md5_do_del);
1034
1035 void tcp_clear_md5_list(struct sock *sk)
1036 {
1037         struct tcp_sock *tp = tcp_sk(sk);
1038         struct tcp_md5sig_key *key;
1039         struct hlist_node *pos, *n;
1040         struct tcp_md5sig_info *md5sig;
1041
1042         md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1043
1044         if (!hlist_empty(&md5sig->head))
1045                 tcp_free_md5sig_pool();
1046         hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1047                 hlist_del_rcu(&key->node);
1048                 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1049                 kfree_rcu(key, rcu);
1050         }
1051 }
1052
1053 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1054                                  int optlen)
1055 {
1056         struct tcp_md5sig cmd;
1057         struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1058
1059         if (optlen < sizeof(cmd))
1060                 return -EINVAL;
1061
1062         if (copy_from_user(&cmd, optval, sizeof(cmd)))
1063                 return -EFAULT;
1064
1065         if (sin->sin_family != AF_INET)
1066                 return -EINVAL;
1067
1068         if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1069                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1070                                       AF_INET);
1071
1072         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1073                 return -EINVAL;
1074
1075         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1076                               AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1077                               GFP_KERNEL);
1078 }
1079
1080 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1081                                         __be32 daddr, __be32 saddr, int nbytes)
1082 {
1083         struct tcp4_pseudohdr *bp;
1084         struct scatterlist sg;
1085
1086         bp = &hp->md5_blk.ip4;
1087
1088         /*
1089          * 1. the TCP pseudo-header (in the order: source IP address,
1090          * destination IP address, zero-padded protocol number, and
1091          * segment length)
1092          */
1093         bp->saddr = saddr;
1094         bp->daddr = daddr;
1095         bp->pad = 0;
1096         bp->protocol = IPPROTO_TCP;
1097         bp->len = cpu_to_be16(nbytes);
1098
1099         sg_init_one(&sg, bp, sizeof(*bp));
1100         return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1101 }
1102
1103 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1104                                __be32 daddr, __be32 saddr, const struct tcphdr *th)
1105 {
1106         struct tcp_md5sig_pool *hp;
1107         struct hash_desc *desc;
1108
1109         hp = tcp_get_md5sig_pool();
1110         if (!hp)
1111                 goto clear_hash_noput;
1112         desc = &hp->md5_desc;
1113
1114         if (crypto_hash_init(desc))
1115                 goto clear_hash;
1116         if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1117                 goto clear_hash;
1118         if (tcp_md5_hash_header(hp, th))
1119                 goto clear_hash;
1120         if (tcp_md5_hash_key(hp, key))
1121                 goto clear_hash;
1122         if (crypto_hash_final(desc, md5_hash))
1123                 goto clear_hash;
1124
1125         tcp_put_md5sig_pool();
1126         return 0;
1127
1128 clear_hash:
1129         tcp_put_md5sig_pool();
1130 clear_hash_noput:
1131         memset(md5_hash, 0, 16);
1132         return 1;
1133 }
1134
1135 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1136                         const struct sock *sk, const struct request_sock *req,
1137                         const struct sk_buff *skb)
1138 {
1139         struct tcp_md5sig_pool *hp;
1140         struct hash_desc *desc;
1141         const struct tcphdr *th = tcp_hdr(skb);
1142         __be32 saddr, daddr;
1143
1144         if (sk) {
1145                 saddr = inet_sk(sk)->inet_saddr;
1146                 daddr = inet_sk(sk)->inet_daddr;
1147         } else if (req) {
1148                 saddr = inet_rsk(req)->loc_addr;
1149                 daddr = inet_rsk(req)->rmt_addr;
1150         } else {
1151                 const struct iphdr *iph = ip_hdr(skb);
1152                 saddr = iph->saddr;
1153                 daddr = iph->daddr;
1154         }
1155
1156         hp = tcp_get_md5sig_pool();
1157         if (!hp)
1158                 goto clear_hash_noput;
1159         desc = &hp->md5_desc;
1160
1161         if (crypto_hash_init(desc))
1162                 goto clear_hash;
1163
1164         if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1165                 goto clear_hash;
1166         if (tcp_md5_hash_header(hp, th))
1167                 goto clear_hash;
1168         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1169                 goto clear_hash;
1170         if (tcp_md5_hash_key(hp, key))
1171                 goto clear_hash;
1172         if (crypto_hash_final(desc, md5_hash))
1173                 goto clear_hash;
1174
1175         tcp_put_md5sig_pool();
1176         return 0;
1177
1178 clear_hash:
1179         tcp_put_md5sig_pool();
1180 clear_hash_noput:
1181         memset(md5_hash, 0, 16);
1182         return 1;
1183 }
1184 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1185
1186 static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1187 {
1188         /*
1189          * This gets called for each TCP segment that arrives
1190          * so we want to be efficient.
1191          * We have 3 drop cases:
1192          * o No MD5 hash and one expected.
1193          * o MD5 hash and we're not expecting one.
1194          * o MD5 hash and its wrong.
1195          */
1196         const __u8 *hash_location = NULL;
1197         struct tcp_md5sig_key *hash_expected;
1198         const struct iphdr *iph = ip_hdr(skb);
1199         const struct tcphdr *th = tcp_hdr(skb);
1200         int genhash;
1201         unsigned char newhash[16];
1202
1203         hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1204                                           AF_INET);
1205         hash_location = tcp_parse_md5sig_option(th);
1206
1207         /* We've parsed the options - do we have a hash? */
1208         if (!hash_expected && !hash_location)
1209                 return 0;
1210
1211         if (hash_expected && !hash_location) {
1212                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1213                 return 1;
1214         }
1215
1216         if (!hash_expected && hash_location) {
1217                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1218                 return 1;
1219         }
1220
1221         /* Okay, so this is hash_expected and hash_location -
1222          * so we need to calculate the checksum.
1223          */
1224         genhash = tcp_v4_md5_hash_skb(newhash,
1225                                       hash_expected,
1226                                       NULL, NULL, skb);
1227
1228         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1229                 if (net_ratelimit()) {
1230                         pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1231                                 &iph->saddr, ntohs(th->source),
1232                                 &iph->daddr, ntohs(th->dest),
1233                                 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1234                 }
1235                 return 1;
1236         }
1237         return 0;
1238 }
1239
1240 #endif
1241
1242 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1243         .family         =       PF_INET,
1244         .obj_size       =       sizeof(struct tcp_request_sock),
1245         .rtx_syn_ack    =       tcp_v4_rtx_synack,
1246         .send_ack       =       tcp_v4_reqsk_send_ack,
1247         .destructor     =       tcp_v4_reqsk_destructor,
1248         .send_reset     =       tcp_v4_send_reset,
1249         .syn_ack_timeout =      tcp_syn_ack_timeout,
1250 };
1251
1252 #ifdef CONFIG_TCP_MD5SIG
1253 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1254         .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1255         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1256 };
1257 #endif
1258
1259 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1260 {
1261         struct tcp_extend_values tmp_ext;
1262         struct tcp_options_received tmp_opt;
1263         const u8 *hash_location;
1264         struct request_sock *req;
1265         struct inet_request_sock *ireq;
1266         struct tcp_sock *tp = tcp_sk(sk);
1267         struct dst_entry *dst = NULL;
1268         __be32 saddr = ip_hdr(skb)->saddr;
1269         __be32 daddr = ip_hdr(skb)->daddr;
1270         __u32 isn = TCP_SKB_CB(skb)->when;
1271         int want_cookie = 0;
1272
1273         /* Never answer to SYNs send to broadcast or multicast */
1274         if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1275                 goto drop;
1276
1277         /* TW buckets are converted to open requests without
1278          * limitations, they conserve resources and peer is
1279          * evidently real one.
1280          */
1281         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1282                 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283                 if (!want_cookie)
1284                         goto drop;
1285         }
1286
1287         /* Accept backlog is full. If we have already queued enough
1288          * of warm entries in syn queue, drop request. It is better than
1289          * clogging syn queue with openreqs with exponentially increasing
1290          * timeout.
1291          */
1292         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1293                 goto drop;
1294
1295         req = inet_reqsk_alloc(&tcp_request_sock_ops);
1296         if (!req)
1297                 goto drop;
1298
1299 #ifdef CONFIG_TCP_MD5SIG
1300         tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1301 #endif
1302
1303         tcp_clear_options(&tmp_opt);
1304         tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1305         tmp_opt.user_mss  = tp->rx_opt.user_mss;
1306         tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1307
1308         if (tmp_opt.cookie_plus > 0 &&
1309             tmp_opt.saw_tstamp &&
1310             !tp->rx_opt.cookie_out_never &&
1311             (sysctl_tcp_cookie_size > 0 ||
1312              (tp->cookie_values != NULL &&
1313               tp->cookie_values->cookie_desired > 0))) {
1314                 u8 *c;
1315                 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1316                 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1317
1318                 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1319                         goto drop_and_release;
1320
1321                 /* Secret recipe starts with IP addresses */
1322                 *mess++ ^= (__force u32)daddr;
1323                 *mess++ ^= (__force u32)saddr;
1324
1325                 /* plus variable length Initiator Cookie */
1326                 c = (u8 *)mess;
1327                 while (l-- > 0)
1328                         *c++ ^= *hash_location++;
1329
1330                 want_cookie = 0;        /* not our kind of cookie */
1331                 tmp_ext.cookie_out_never = 0; /* false */
1332                 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1333         } else if (!tp->rx_opt.cookie_in_always) {
1334                 /* redundant indications, but ensure initialization. */
1335                 tmp_ext.cookie_out_never = 1; /* true */
1336                 tmp_ext.cookie_plus = 0;
1337         } else {
1338                 goto drop_and_release;
1339         }
1340         tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1341
1342         if (want_cookie && !tmp_opt.saw_tstamp)
1343                 tcp_clear_options(&tmp_opt);
1344
1345         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1346         tcp_openreq_init(req, &tmp_opt, skb);
1347
1348         ireq = inet_rsk(req);
1349         ireq->loc_addr = daddr;
1350         ireq->rmt_addr = saddr;
1351         ireq->no_srccheck = inet_sk(sk)->transparent;
1352         ireq->opt = tcp_v4_save_options(sk, skb);
1353
1354         if (security_inet_conn_request(sk, skb, req))
1355                 goto drop_and_free;
1356
1357         if (!want_cookie || tmp_opt.tstamp_ok)
1358                 TCP_ECN_create_request(req, tcp_hdr(skb));
1359
1360         if (want_cookie) {
1361                 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1362                 req->cookie_ts = tmp_opt.tstamp_ok;
1363         } else if (!isn) {
1364                 struct inet_peer *peer = NULL;
1365                 struct flowi4 fl4;
1366
1367                 /* VJ's idea. We save last timestamp seen
1368                  * from the destination in peer table, when entering
1369                  * state TIME-WAIT, and check against it before
1370                  * accepting new connection request.
1371                  *
1372                  * If "isn" is not zero, this request hit alive
1373                  * timewait bucket, so that all the necessary checks
1374                  * are made in the function processing timewait state.
1375                  */
1376                 if (tmp_opt.saw_tstamp &&
1377                     tcp_death_row.sysctl_tw_recycle &&
1378                     (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1379                     fl4.daddr == saddr &&
1380                     (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1381                         inet_peer_refcheck(peer);
1382                         if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1383                             (s32)(peer->tcp_ts - req->ts_recent) >
1384                                                         TCP_PAWS_WINDOW) {
1385                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1386                                 goto drop_and_release;
1387                         }
1388                 }
1389                 /* Kill the following clause, if you dislike this way. */
1390                 else if (!sysctl_tcp_syncookies &&
1391                          (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1392                           (sysctl_max_syn_backlog >> 2)) &&
1393                          (!peer || !peer->tcp_ts_stamp) &&
1394                          (!dst || !dst_metric(dst, RTAX_RTT))) {
1395                         /* Without syncookies last quarter of
1396                          * backlog is filled with destinations,
1397                          * proven to be alive.
1398                          * It means that we continue to communicate
1399                          * to destinations, already remembered
1400                          * to the moment of synflood.
1401                          */
1402                         LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1403                                        &saddr, ntohs(tcp_hdr(skb)->source));
1404                         goto drop_and_release;
1405                 }
1406
1407                 isn = tcp_v4_init_sequence(skb);
1408         }
1409         tcp_rsk(req)->snt_isn = isn;
1410         tcp_rsk(req)->snt_synack = tcp_time_stamp;
1411
1412         if (tcp_v4_send_synack(sk, dst, req,
1413                                (struct request_values *)&tmp_ext) ||
1414             want_cookie)
1415                 goto drop_and_free;
1416
1417         inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1418         return 0;
1419
1420 drop_and_release:
1421         dst_release(dst);
1422 drop_and_free:
1423         reqsk_free(req);
1424 drop:
1425         return 0;
1426 }
1427 EXPORT_SYMBOL(tcp_v4_conn_request);
1428
1429
1430 /*
1431  * The three way handshake has completed - we got a valid synack -
1432  * now create the new socket.
1433  */
1434 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1435                                   struct request_sock *req,
1436                                   struct dst_entry *dst)
1437 {
1438         struct inet_request_sock *ireq;
1439         struct inet_sock *newinet;
1440         struct tcp_sock *newtp;
1441         struct sock *newsk;
1442 #ifdef CONFIG_TCP_MD5SIG
1443         struct tcp_md5sig_key *key;
1444 #endif
1445         struct ip_options_rcu *inet_opt;
1446
1447         if (sk_acceptq_is_full(sk))
1448                 goto exit_overflow;
1449
1450         newsk = tcp_create_openreq_child(sk, req, skb);
1451         if (!newsk)
1452                 goto exit_nonewsk;
1453
1454         newsk->sk_gso_type = SKB_GSO_TCPV4;
1455
1456         newtp                 = tcp_sk(newsk);
1457         newinet               = inet_sk(newsk);
1458         ireq                  = inet_rsk(req);
1459         newinet->inet_daddr   = ireq->rmt_addr;
1460         newinet->inet_rcv_saddr = ireq->loc_addr;
1461         newinet->inet_saddr           = ireq->loc_addr;
1462         inet_opt              = ireq->opt;
1463         rcu_assign_pointer(newinet->inet_opt, inet_opt);
1464         ireq->opt             = NULL;
1465         newinet->mc_index     = inet_iif(skb);
1466         newinet->mc_ttl       = ip_hdr(skb)->ttl;
1467         newinet->rcv_tos      = ip_hdr(skb)->tos;
1468         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1469         if (inet_opt)
1470                 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1471         newinet->inet_id = newtp->write_seq ^ jiffies;
1472
1473         if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1474                 goto put_and_exit;
1475
1476         sk_setup_caps(newsk, dst);
1477
1478         tcp_mtup_init(newsk);
1479         tcp_sync_mss(newsk, dst_mtu(dst));
1480         newtp->advmss = dst_metric_advmss(dst);
1481         if (tcp_sk(sk)->rx_opt.user_mss &&
1482             tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1483                 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1484
1485         tcp_initialize_rcv_mss(newsk);
1486         if (tcp_rsk(req)->snt_synack)
1487                 tcp_valid_rtt_meas(newsk,
1488                     tcp_time_stamp - tcp_rsk(req)->snt_synack);
1489         newtp->total_retrans = req->retrans;
1490
1491 #ifdef CONFIG_TCP_MD5SIG
1492         /* Copy over the MD5 key from the original socket */
1493         key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1494                                 AF_INET);
1495         if (key != NULL) {
1496                 /*
1497                  * We're using one, so create a matching key
1498                  * on the newsk structure. If we fail to get
1499                  * memory, then we end up not copying the key
1500                  * across. Shucks.
1501                  */
1502                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1503                                AF_INET, key->key, key->keylen, GFP_ATOMIC);
1504                 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1505         }
1506 #endif
1507
1508         if (__inet_inherit_port(sk, newsk) < 0)
1509                 goto put_and_exit;
1510         __inet_hash_nolisten(newsk, NULL);
1511
1512         return newsk;
1513
1514 exit_overflow:
1515         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1516 exit_nonewsk:
1517         dst_release(dst);
1518 exit:
1519         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1520         return NULL;
1521 put_and_exit:
1522         tcp_clear_xmit_timers(newsk);
1523         tcp_cleanup_congestion_control(newsk);
1524         bh_unlock_sock(newsk);
1525         sock_put(newsk);
1526         goto exit;
1527 }
1528 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1529
1530 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1531 {
1532         struct tcphdr *th = tcp_hdr(skb);
1533         const struct iphdr *iph = ip_hdr(skb);
1534         struct sock *nsk;
1535         struct request_sock **prev;
1536         /* Find possible connection requests. */
1537         struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1538                                                        iph->saddr, iph->daddr);
1539         if (req)
1540                 return tcp_check_req(sk, skb, req, prev);
1541
1542         nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1543                         th->source, iph->daddr, th->dest, inet_iif(skb));
1544
1545         if (nsk) {
1546                 if (nsk->sk_state != TCP_TIME_WAIT) {
1547                         bh_lock_sock(nsk);
1548                         return nsk;
1549                 }
1550                 inet_twsk_put(inet_twsk(nsk));
1551                 return NULL;
1552         }
1553
1554 #ifdef CONFIG_SYN_COOKIES
1555         if (!th->syn)
1556                 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1557 #endif
1558         return sk;
1559 }
1560
1561 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1562 {
1563         const struct iphdr *iph = ip_hdr(skb);
1564
1565         if (skb->ip_summed == CHECKSUM_COMPLETE) {
1566                 if (!tcp_v4_check(skb->len, iph->saddr,
1567                                   iph->daddr, skb->csum)) {
1568                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1569                         return 0;
1570                 }
1571         }
1572
1573         skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1574                                        skb->len, IPPROTO_TCP, 0);
1575
1576         if (skb->len <= 76) {
1577                 return __skb_checksum_complete(skb);
1578         }
1579         return 0;
1580 }
1581
1582
1583 /* The socket must have it's spinlock held when we get
1584  * here.
1585  *
1586  * We have a potential double-lock case here, so even when
1587  * doing backlog processing we use the BH locking scheme.
1588  * This is because we cannot sleep with the original spinlock
1589  * held.
1590  */
1591 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1592 {
1593         struct sock *rsk;
1594 #ifdef CONFIG_TCP_MD5SIG
1595         /*
1596          * We really want to reject the packet as early as possible
1597          * if:
1598          *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1599          *  o There is an MD5 option and we're not expecting one
1600          */
1601         if (tcp_v4_inbound_md5_hash(sk, skb))
1602                 goto discard;
1603 #endif
1604
1605         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1606                 sock_rps_save_rxhash(sk, skb);
1607                 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1608                         rsk = sk;
1609                         goto reset;
1610                 }
1611                 return 0;
1612         }
1613
1614         if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1615                 goto csum_err;
1616
1617         if (sk->sk_state == TCP_LISTEN) {
1618                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1619                 if (!nsk)
1620                         goto discard;
1621
1622                 if (nsk != sk) {
1623                         sock_rps_save_rxhash(nsk, skb);
1624                         if (tcp_child_process(sk, nsk, skb)) {
1625                                 rsk = nsk;
1626                                 goto reset;
1627                         }
1628                         return 0;
1629                 }
1630         } else
1631                 sock_rps_save_rxhash(sk, skb);
1632
1633         if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1634                 rsk = sk;
1635                 goto reset;
1636         }
1637         return 0;
1638
1639 reset:
1640         tcp_v4_send_reset(rsk, skb);
1641 discard:
1642         kfree_skb(skb);
1643         /* Be careful here. If this function gets more complicated and
1644          * gcc suffers from register pressure on the x86, sk (in %ebx)
1645          * might be destroyed here. This current version compiles correctly,
1646          * but you have been warned.
1647          */
1648         return 0;
1649
1650 csum_err:
1651         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1652         goto discard;
1653 }
1654 EXPORT_SYMBOL(tcp_v4_do_rcv);
1655
1656 /*
1657  *      From tcp_input.c
1658  */
1659
1660 int tcp_v4_rcv(struct sk_buff *skb)
1661 {
1662         const struct iphdr *iph;
1663         const struct tcphdr *th;
1664         struct sock *sk;
1665         int ret;
1666         struct net *net = dev_net(skb->dev);
1667
1668         if (skb->pkt_type != PACKET_HOST)
1669                 goto discard_it;
1670
1671         /* Count it even if it's bad */
1672         TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1673
1674         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1675                 goto discard_it;
1676
1677         th = tcp_hdr(skb);
1678
1679         if (th->doff < sizeof(struct tcphdr) / 4)
1680                 goto bad_packet;
1681         if (!pskb_may_pull(skb, th->doff * 4))
1682                 goto discard_it;
1683
1684         /* An explanation is required here, I think.
1685          * Packet length and doff are validated by header prediction,
1686          * provided case of th->doff==0 is eliminated.
1687          * So, we defer the checks. */
1688         if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1689                 goto bad_packet;
1690
1691         th = tcp_hdr(skb);
1692         iph = ip_hdr(skb);
1693         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1694         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1695                                     skb->len - th->doff * 4);
1696         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1697         TCP_SKB_CB(skb)->when    = 0;
1698         TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1699         TCP_SKB_CB(skb)->sacked  = 0;
1700
1701         sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1702         if (!sk)
1703                 goto no_tcp_socket;
1704
1705 process:
1706         if (sk->sk_state == TCP_TIME_WAIT)
1707                 goto do_time_wait;
1708
1709         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1710                 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1711                 goto discard_and_relse;
1712         }
1713
1714         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1715                 goto discard_and_relse;
1716         nf_reset(skb);
1717
1718         if (sk_filter(sk, skb))
1719                 goto discard_and_relse;
1720
1721         skb->dev = NULL;
1722
1723         bh_lock_sock_nested(sk);
1724         ret = 0;
1725         if (!sock_owned_by_user(sk)) {
1726 #ifdef CONFIG_NET_DMA
1727                 struct tcp_sock *tp = tcp_sk(sk);
1728                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1729                         tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1730                 if (tp->ucopy.dma_chan)
1731                         ret = tcp_v4_do_rcv(sk, skb);
1732                 else
1733 #endif
1734                 {
1735                         if (!tcp_prequeue(sk, skb))
1736                                 ret = tcp_v4_do_rcv(sk, skb);
1737                 }
1738         } else if (unlikely(sk_add_backlog(sk, skb))) {
1739                 bh_unlock_sock(sk);
1740                 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1741                 goto discard_and_relse;
1742         }
1743         bh_unlock_sock(sk);
1744
1745         sock_put(sk);
1746
1747         return ret;
1748
1749 no_tcp_socket:
1750         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1751                 goto discard_it;
1752
1753         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1754 bad_packet:
1755                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1756         } else {
1757                 tcp_v4_send_reset(NULL, skb);
1758         }
1759
1760 discard_it:
1761         /* Discard frame. */
1762         kfree_skb(skb);
1763         return 0;
1764
1765 discard_and_relse:
1766         sock_put(sk);
1767         goto discard_it;
1768
1769 do_time_wait:
1770         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1771                 inet_twsk_put(inet_twsk(sk));
1772                 goto discard_it;
1773         }
1774
1775         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1776                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1777                 inet_twsk_put(inet_twsk(sk));
1778                 goto discard_it;
1779         }
1780         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1781         case TCP_TW_SYN: {
1782                 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1783                                                         &tcp_hashinfo,
1784                                                         iph->daddr, th->dest,
1785                                                         inet_iif(skb));
1786                 if (sk2) {
1787                         inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1788                         inet_twsk_put(inet_twsk(sk));
1789                         sk = sk2;
1790                         goto process;
1791                 }
1792                 /* Fall through to ACK */
1793         }
1794         case TCP_TW_ACK:
1795                 tcp_v4_timewait_ack(sk, skb);
1796                 break;
1797         case TCP_TW_RST:
1798                 goto no_tcp_socket;
1799         case TCP_TW_SUCCESS:;
1800         }
1801         goto discard_it;
1802 }
1803
1804 struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1805 {
1806         struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1807         struct inet_sock *inet = inet_sk(sk);
1808         struct inet_peer *peer;
1809
1810         if (!rt ||
1811             inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1812                 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1813                 *release_it = true;
1814         } else {
1815                 if (!rt->peer)
1816                         rt_bind_peer(rt, inet->inet_daddr, 1);
1817                 peer = rt->peer;
1818                 *release_it = false;
1819         }
1820
1821         return peer;
1822 }
1823 EXPORT_SYMBOL(tcp_v4_get_peer);
1824
1825 void *tcp_v4_tw_get_peer(struct sock *sk)
1826 {
1827         const struct inet_timewait_sock *tw = inet_twsk(sk);
1828
1829         return inet_getpeer_v4(tw->tw_daddr, 1);
1830 }
1831 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1832
1833 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1834         .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1835         .twsk_unique    = tcp_twsk_unique,
1836         .twsk_destructor= tcp_twsk_destructor,
1837         .twsk_getpeer   = tcp_v4_tw_get_peer,
1838 };
1839
1840 const struct inet_connection_sock_af_ops ipv4_specific = {
1841         .queue_xmit        = ip_queue_xmit,
1842         .send_check        = tcp_v4_send_check,
1843         .rebuild_header    = inet_sk_rebuild_header,
1844         .conn_request      = tcp_v4_conn_request,
1845         .syn_recv_sock     = tcp_v4_syn_recv_sock,
1846         .get_peer          = tcp_v4_get_peer,
1847         .net_header_len    = sizeof(struct iphdr),
1848         .setsockopt        = ip_setsockopt,
1849         .getsockopt        = ip_getsockopt,
1850         .addr2sockaddr     = inet_csk_addr2sockaddr,
1851         .sockaddr_len      = sizeof(struct sockaddr_in),
1852         .bind_conflict     = inet_csk_bind_conflict,
1853 #ifdef CONFIG_COMPAT
1854         .compat_setsockopt = compat_ip_setsockopt,
1855         .compat_getsockopt = compat_ip_getsockopt,
1856 #endif
1857 };
1858 EXPORT_SYMBOL(ipv4_specific);
1859
1860 #ifdef CONFIG_TCP_MD5SIG
1861 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1862         .md5_lookup             = tcp_v4_md5_lookup,
1863         .calc_md5_hash          = tcp_v4_md5_hash_skb,
1864         .md5_parse              = tcp_v4_parse_md5_keys,
1865 };
1866 #endif
1867
1868 /* NOTE: A lot of things set to zero explicitly by call to
1869  *       sk_alloc() so need not be done here.
1870  */
1871 static int tcp_v4_init_sock(struct sock *sk)
1872 {
1873         struct inet_connection_sock *icsk = inet_csk(sk);
1874         struct tcp_sock *tp = tcp_sk(sk);
1875
1876         skb_queue_head_init(&tp->out_of_order_queue);
1877         tcp_init_xmit_timers(sk);
1878         tcp_prequeue_init(tp);
1879
1880         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1881         tp->mdev = TCP_TIMEOUT_INIT;
1882
1883         /* So many TCP implementations out there (incorrectly) count the
1884          * initial SYN frame in their delayed-ACK and congestion control
1885          * algorithms that we must have the following bandaid to talk
1886          * efficiently to them.  -DaveM
1887          */
1888         tp->snd_cwnd = TCP_INIT_CWND;
1889
1890         /* See draft-stevens-tcpca-spec-01 for discussion of the
1891          * initialization of these values.
1892          */
1893         tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1894         tp->snd_cwnd_clamp = ~0;
1895         tp->mss_cache = TCP_MSS_DEFAULT;
1896
1897         tp->reordering = sysctl_tcp_reordering;
1898         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1899
1900         sk->sk_state = TCP_CLOSE;
1901
1902         sk->sk_write_space = sk_stream_write_space;
1903         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1904
1905         icsk->icsk_af_ops = &ipv4_specific;
1906         icsk->icsk_sync_mss = tcp_sync_mss;
1907 #ifdef CONFIG_TCP_MD5SIG
1908         tp->af_specific = &tcp_sock_ipv4_specific;
1909 #endif
1910
1911         /* TCP Cookie Transactions */
1912         if (sysctl_tcp_cookie_size > 0) {
1913                 /* Default, cookies without s_data_payload. */
1914                 tp->cookie_values =
1915                         kzalloc(sizeof(*tp->cookie_values),
1916                                 sk->sk_allocation);
1917                 if (tp->cookie_values != NULL)
1918                         kref_init(&tp->cookie_values->kref);
1919         }
1920         /* Presumed zeroed, in order of appearance:
1921          *      cookie_in_always, cookie_out_never,
1922          *      s_data_constant, s_data_in, s_data_out
1923          */
1924         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1925         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1926
1927         local_bh_disable();
1928         sock_update_memcg(sk);
1929         sk_sockets_allocated_inc(sk);
1930         local_bh_enable();
1931
1932         return 0;
1933 }
1934
1935 void tcp_v4_destroy_sock(struct sock *sk)
1936 {
1937         struct tcp_sock *tp = tcp_sk(sk);
1938
1939         tcp_clear_xmit_timers(sk);
1940
1941         tcp_cleanup_congestion_control(sk);
1942
1943         /* Cleanup up the write buffer. */
1944         tcp_write_queue_purge(sk);
1945
1946         /* Cleans up our, hopefully empty, out_of_order_queue. */
1947         __skb_queue_purge(&tp->out_of_order_queue);
1948
1949 #ifdef CONFIG_TCP_MD5SIG
1950         /* Clean up the MD5 key list, if any */
1951         if (tp->md5sig_info) {
1952                 tcp_clear_md5_list(sk);
1953                 kfree_rcu(tp->md5sig_info, rcu);
1954                 tp->md5sig_info = NULL;
1955         }
1956 #endif
1957
1958 #ifdef CONFIG_NET_DMA
1959         /* Cleans up our sk_async_wait_queue */
1960         __skb_queue_purge(&sk->sk_async_wait_queue);
1961 #endif
1962
1963         /* Clean prequeue, it must be empty really */
1964         __skb_queue_purge(&tp->ucopy.prequeue);
1965
1966         /* Clean up a referenced TCP bind bucket. */
1967         if (inet_csk(sk)->icsk_bind_hash)
1968                 inet_put_port(sk);
1969
1970         /*
1971          * If sendmsg cached page exists, toss it.
1972          */
1973         if (sk->sk_sndmsg_page) {
1974                 __free_page(sk->sk_sndmsg_page);
1975                 sk->sk_sndmsg_page = NULL;
1976         }
1977
1978         /* TCP Cookie Transactions */
1979         if (tp->cookie_values != NULL) {
1980                 kref_put(&tp->cookie_values->kref,
1981                          tcp_cookie_values_release);
1982                 tp->cookie_values = NULL;
1983         }
1984
1985         sk_sockets_allocated_dec(sk);
1986         sock_release_memcg(sk);
1987 }
1988 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1989
1990 #ifdef CONFIG_PROC_FS
1991 /* Proc filesystem TCP sock list dumping. */
1992
1993 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1994 {
1995         return hlist_nulls_empty(head) ? NULL :
1996                 list_entry(head->first, struct inet_timewait_sock, tw_node);
1997 }
1998
1999 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2000 {
2001         return !is_a_nulls(tw->tw_node.next) ?
2002                 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2003 }
2004
2005 /*
2006  * Get next listener socket follow cur.  If cur is NULL, get first socket
2007  * starting from bucket given in st->bucket; when st->bucket is zero the
2008  * very first socket in the hash table is returned.
2009  */
2010 static void *listening_get_next(struct seq_file *seq, void *cur)
2011 {
2012         struct inet_connection_sock *icsk;
2013         struct hlist_nulls_node *node;
2014         struct sock *sk = cur;
2015         struct inet_listen_hashbucket *ilb;
2016         struct tcp_iter_state *st = seq->private;
2017         struct net *net = seq_file_net(seq);
2018
2019         if (!sk) {
2020                 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2021                 spin_lock_bh(&ilb->lock);
2022                 sk = sk_nulls_head(&ilb->head);
2023                 st->offset = 0;
2024                 goto get_sk;
2025         }
2026         ilb = &tcp_hashinfo.listening_hash[st->bucket];
2027         ++st->num;
2028         ++st->offset;
2029
2030         if (st->state == TCP_SEQ_STATE_OPENREQ) {
2031                 struct request_sock *req = cur;
2032
2033                 icsk = inet_csk(st->syn_wait_sk);
2034                 req = req->dl_next;
2035                 while (1) {
2036                         while (req) {
2037                                 if (req->rsk_ops->family == st->family) {
2038                                         cur = req;
2039                                         goto out;
2040                                 }
2041                                 req = req->dl_next;
2042                         }
2043                         if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2044                                 break;
2045 get_req:
2046                         req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2047                 }
2048                 sk        = sk_nulls_next(st->syn_wait_sk);
2049                 st->state = TCP_SEQ_STATE_LISTENING;
2050                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2051         } else {
2052                 icsk = inet_csk(sk);
2053                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2054                 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2055                         goto start_req;
2056                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2057                 sk = sk_nulls_next(sk);
2058         }
2059 get_sk:
2060         sk_nulls_for_each_from(sk, node) {
2061                 if (!net_eq(sock_net(sk), net))
2062                         continue;
2063                 if (sk->sk_family == st->family) {
2064                         cur = sk;
2065                         goto out;
2066                 }
2067                 icsk = inet_csk(sk);
2068                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2069                 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2070 start_req:
2071                         st->uid         = sock_i_uid(sk);
2072                         st->syn_wait_sk = sk;
2073                         st->state       = TCP_SEQ_STATE_OPENREQ;
2074                         st->sbucket     = 0;
2075                         goto get_req;
2076                 }
2077                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2078         }
2079         spin_unlock_bh(&ilb->lock);
2080         st->offset = 0;
2081         if (++st->bucket < INET_LHTABLE_SIZE) {
2082                 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2083                 spin_lock_bh(&ilb->lock);
2084                 sk = sk_nulls_head(&ilb->head);
2085                 goto get_sk;
2086         }
2087         cur = NULL;
2088 out:
2089         return cur;
2090 }
2091
2092 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2093 {
2094         struct tcp_iter_state *st = seq->private;
2095         void *rc;
2096
2097         st->bucket = 0;
2098         st->offset = 0;
2099         rc = listening_get_next(seq, NULL);
2100
2101         while (rc && *pos) {
2102                 rc = listening_get_next(seq, rc);
2103                 --*pos;
2104         }
2105         return rc;
2106 }
2107
2108 static inline int empty_bucket(struct tcp_iter_state *st)
2109 {
2110         return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2111                 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2112 }
2113
2114 /*
2115  * Get first established socket starting from bucket given in st->bucket.
2116  * If st->bucket is zero, the very first socket in the hash is returned.
2117  */
2118 static void *established_get_first(struct seq_file *seq)
2119 {
2120         struct tcp_iter_state *st = seq->private;
2121         struct net *net = seq_file_net(seq);
2122         void *rc = NULL;
2123
2124         st->offset = 0;
2125         for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2126                 struct sock *sk;
2127                 struct hlist_nulls_node *node;
2128                 struct inet_timewait_sock *tw;
2129                 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2130
2131                 /* Lockless fast path for the common case of empty buckets */
2132                 if (empty_bucket(st))
2133                         continue;
2134
2135                 spin_lock_bh(lock);
2136                 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2137                         if (sk->sk_family != st->family ||
2138                             !net_eq(sock_net(sk), net)) {
2139                                 continue;
2140                         }
2141                         rc = sk;
2142                         goto out;
2143                 }
2144                 st->state = TCP_SEQ_STATE_TIME_WAIT;
2145                 inet_twsk_for_each(tw, node,
2146                                    &tcp_hashinfo.ehash[st->bucket].twchain) {
2147                         if (tw->tw_family != st->family ||
2148                             !net_eq(twsk_net(tw), net)) {
2149                                 continue;
2150                         }
2151                         rc = tw;
2152                         goto out;
2153                 }
2154                 spin_unlock_bh(lock);
2155                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2156         }
2157 out:
2158         return rc;
2159 }
2160
2161 static void *established_get_next(struct seq_file *seq, void *cur)
2162 {
2163         struct sock *sk = cur;
2164         struct inet_timewait_sock *tw;
2165         struct hlist_nulls_node *node;
2166         struct tcp_iter_state *st = seq->private;
2167         struct net *net = seq_file_net(seq);
2168
2169         ++st->num;
2170         ++st->offset;
2171
2172         if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2173                 tw = cur;
2174                 tw = tw_next(tw);
2175 get_tw:
2176                 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2177                         tw = tw_next(tw);
2178                 }
2179                 if (tw) {
2180                         cur = tw;
2181                         goto out;
2182                 }
2183                 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2184                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2185
2186                 /* Look for next non empty bucket */
2187                 st->offset = 0;
2188                 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2189                                 empty_bucket(st))
2190                         ;
2191                 if (st->bucket > tcp_hashinfo.ehash_mask)
2192                         return NULL;
2193
2194                 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2195                 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2196         } else
2197                 sk = sk_nulls_next(sk);
2198
2199         sk_nulls_for_each_from(sk, node) {
2200                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2201                         goto found;
2202         }
2203
2204         st->state = TCP_SEQ_STATE_TIME_WAIT;
2205         tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2206         goto get_tw;
2207 found:
2208         cur = sk;
2209 out:
2210         return cur;
2211 }
2212
2213 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2214 {
2215         struct tcp_iter_state *st = seq->private;
2216         void *rc;
2217
2218         st->bucket = 0;
2219         rc = established_get_first(seq);
2220
2221         while (rc && pos) {
2222                 rc = established_get_next(seq, rc);
2223                 --pos;
2224         }
2225         return rc;
2226 }
2227
2228 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2229 {
2230         void *rc;
2231         struct tcp_iter_state *st = seq->private;
2232
2233         st->state = TCP_SEQ_STATE_LISTENING;
2234         rc        = listening_get_idx(seq, &pos);
2235
2236         if (!rc) {
2237                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2238                 rc        = established_get_idx(seq, pos);
2239         }
2240
2241         return rc;
2242 }
2243
2244 static void *tcp_seek_last_pos(struct seq_file *seq)
2245 {
2246         struct tcp_iter_state *st = seq->private;
2247         int offset = st->offset;
2248         int orig_num = st->num;
2249         void *rc = NULL;
2250
2251         switch (st->state) {
2252         case TCP_SEQ_STATE_OPENREQ:
2253         case TCP_SEQ_STATE_LISTENING:
2254                 if (st->bucket >= INET_LHTABLE_SIZE)
2255                         break;
2256                 st->state = TCP_SEQ_STATE_LISTENING;
2257                 rc = listening_get_next(seq, NULL);
2258                 while (offset-- && rc)
2259                         rc = listening_get_next(seq, rc);
2260                 if (rc)
2261                         break;
2262                 st->bucket = 0;
2263                 /* Fallthrough */
2264         case TCP_SEQ_STATE_ESTABLISHED:
2265         case TCP_SEQ_STATE_TIME_WAIT:
2266                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2267                 if (st->bucket > tcp_hashinfo.ehash_mask)
2268                         break;
2269                 rc = established_get_first(seq);
2270                 while (offset-- && rc)
2271                         rc = established_get_next(seq, rc);
2272         }
2273
2274         st->num = orig_num;
2275
2276         return rc;
2277 }
2278
2279 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2280 {
2281         struct tcp_iter_state *st = seq->private;
2282         void *rc;
2283
2284         if (*pos && *pos == st->last_pos) {
2285                 rc = tcp_seek_last_pos(seq);
2286                 if (rc)
2287                         goto out;
2288         }
2289
2290         st->state = TCP_SEQ_STATE_LISTENING;
2291         st->num = 0;
2292         st->bucket = 0;
2293         st->offset = 0;
2294         rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2295
2296 out:
2297         st->last_pos = *pos;
2298         return rc;
2299 }
2300
2301 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2302 {
2303         struct tcp_iter_state *st = seq->private;
2304         void *rc = NULL;
2305
2306         if (v == SEQ_START_TOKEN) {
2307                 rc = tcp_get_idx(seq, 0);
2308                 goto out;
2309         }
2310
2311         switch (st->state) {
2312         case TCP_SEQ_STATE_OPENREQ:
2313         case TCP_SEQ_STATE_LISTENING:
2314                 rc = listening_get_next(seq, v);
2315                 if (!rc) {
2316                         st->state = TCP_SEQ_STATE_ESTABLISHED;
2317                         st->bucket = 0;
2318                         st->offset = 0;
2319                         rc        = established_get_first(seq);
2320                 }
2321                 break;
2322         case TCP_SEQ_STATE_ESTABLISHED:
2323         case TCP_SEQ_STATE_TIME_WAIT:
2324                 rc = established_get_next(seq, v);
2325                 break;
2326         }
2327 out:
2328         ++*pos;
2329         st->last_pos = *pos;
2330         return rc;
2331 }
2332
2333 static void tcp_seq_stop(struct seq_file *seq, void *v)
2334 {
2335         struct tcp_iter_state *st = seq->private;
2336
2337         switch (st->state) {
2338         case TCP_SEQ_STATE_OPENREQ:
2339                 if (v) {
2340                         struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2341                         read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2342                 }
2343         case TCP_SEQ_STATE_LISTENING:
2344                 if (v != SEQ_START_TOKEN)
2345                         spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2346                 break;
2347         case TCP_SEQ_STATE_TIME_WAIT:
2348         case TCP_SEQ_STATE_ESTABLISHED:
2349                 if (v)
2350                         spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2351                 break;
2352         }
2353 }
2354
2355 int tcp_seq_open(struct inode *inode, struct file *file)
2356 {
2357         struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2358         struct tcp_iter_state *s;
2359         int err;
2360
2361         err = seq_open_net(inode, file, &afinfo->seq_ops,
2362                           sizeof(struct tcp_iter_state));
2363         if (err < 0)
2364                 return err;
2365
2366         s = ((struct seq_file *)file->private_data)->private;
2367         s->family               = afinfo->family;
2368         s->last_pos             = 0;
2369         return 0;
2370 }
2371 EXPORT_SYMBOL(tcp_seq_open);
2372
2373 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2374 {
2375         int rc = 0;
2376         struct proc_dir_entry *p;
2377
2378         afinfo->seq_ops.start           = tcp_seq_start;
2379         afinfo->seq_ops.next            = tcp_seq_next;
2380         afinfo->seq_ops.stop            = tcp_seq_stop;
2381
2382         p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2383                              afinfo->seq_fops, afinfo);
2384         if (!p)
2385                 rc = -ENOMEM;
2386         return rc;
2387 }
2388 EXPORT_SYMBOL(tcp_proc_register);
2389
2390 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2391 {
2392         proc_net_remove(net, afinfo->name);
2393 }
2394 EXPORT_SYMBOL(tcp_proc_unregister);
2395
2396 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2397                          struct seq_file *f, int i, int uid, int *len)
2398 {
2399         const struct inet_request_sock *ireq = inet_rsk(req);
2400         int ttd = req->expires - jiffies;
2401
2402         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2403                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2404                 i,
2405                 ireq->loc_addr,
2406                 ntohs(inet_sk(sk)->inet_sport),
2407                 ireq->rmt_addr,
2408                 ntohs(ireq->rmt_port),
2409                 TCP_SYN_RECV,
2410                 0, 0, /* could print option size, but that is af dependent. */
2411                 1,    /* timers active (only the expire timer) */
2412                 jiffies_to_clock_t(ttd),
2413                 req->retrans,
2414                 uid,
2415                 0,  /* non standard timer */
2416                 0, /* open_requests have no inode */
2417                 atomic_read(&sk->sk_refcnt),
2418                 req,
2419                 len);
2420 }
2421
2422 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2423 {
2424         int timer_active;
2425         unsigned long timer_expires;
2426         const struct tcp_sock *tp = tcp_sk(sk);
2427         const struct inet_connection_sock *icsk = inet_csk(sk);
2428         const struct inet_sock *inet = inet_sk(sk);
2429         __be32 dest = inet->inet_daddr;
2430         __be32 src = inet->inet_rcv_saddr;
2431         __u16 destp = ntohs(inet->inet_dport);
2432         __u16 srcp = ntohs(inet->inet_sport);
2433         int rx_queue;
2434
2435         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2436                 timer_active    = 1;
2437                 timer_expires   = icsk->icsk_timeout;
2438         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2439                 timer_active    = 4;
2440                 timer_expires   = icsk->icsk_timeout;
2441         } else if (timer_pending(&sk->sk_timer)) {
2442                 timer_active    = 2;
2443                 timer_expires   = sk->sk_timer.expires;
2444         } else {
2445                 timer_active    = 0;
2446                 timer_expires = jiffies;
2447         }
2448
2449         if (sk->sk_state == TCP_LISTEN)
2450                 rx_queue = sk->sk_ack_backlog;
2451         else
2452                 /*
2453                  * because we dont lock socket, we might find a transient negative value
2454                  */
2455                 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2456
2457         seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2458                         "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2459                 i, src, srcp, dest, destp, sk->sk_state,
2460                 tp->write_seq - tp->snd_una,
2461                 rx_queue,
2462                 timer_active,
2463                 jiffies_to_clock_t(timer_expires - jiffies),
2464                 icsk->icsk_retransmits,
2465                 sock_i_uid(sk),
2466                 icsk->icsk_probes_out,
2467                 sock_i_ino(sk),
2468                 atomic_read(&sk->sk_refcnt), sk,
2469                 jiffies_to_clock_t(icsk->icsk_rto),
2470                 jiffies_to_clock_t(icsk->icsk_ack.ato),
2471                 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2472                 tp->snd_cwnd,
2473                 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2474                 len);
2475 }
2476
2477 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2478                                struct seq_file *f, int i, int *len)
2479 {
2480         __be32 dest, src;
2481         __u16 destp, srcp;
2482         int ttd = tw->tw_ttd - jiffies;
2483
2484         if (ttd < 0)
2485                 ttd = 0;
2486
2487         dest  = tw->tw_daddr;
2488         src   = tw->tw_rcv_saddr;
2489         destp = ntohs(tw->tw_dport);
2490         srcp  = ntohs(tw->tw_sport);
2491
2492         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2493                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2494                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2495                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2496                 atomic_read(&tw->tw_refcnt), tw, len);
2497 }
2498
2499 #define TMPSZ 150
2500
2501 static int tcp4_seq_show(struct seq_file *seq, void *v)
2502 {
2503         struct tcp_iter_state *st;
2504         int len;
2505
2506         if (v == SEQ_START_TOKEN) {
2507                 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2508                            "  sl  local_address rem_address   st tx_queue "
2509                            "rx_queue tr tm->when retrnsmt   uid  timeout "
2510                            "inode");
2511                 goto out;
2512         }
2513         st = seq->private;
2514
2515         switch (st->state) {
2516         case TCP_SEQ_STATE_LISTENING:
2517         case TCP_SEQ_STATE_ESTABLISHED:
2518                 get_tcp4_sock(v, seq, st->num, &len);
2519                 break;
2520         case TCP_SEQ_STATE_OPENREQ:
2521                 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2522                 break;
2523         case TCP_SEQ_STATE_TIME_WAIT:
2524                 get_timewait4_sock(v, seq, st->num, &len);
2525                 break;
2526         }
2527         seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2528 out:
2529         return 0;
2530 }
2531
2532 static const struct file_operations tcp_afinfo_seq_fops = {
2533         .owner   = THIS_MODULE,
2534         .open    = tcp_seq_open,
2535         .read    = seq_read,
2536         .llseek  = seq_lseek,
2537         .release = seq_release_net
2538 };
2539
2540 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2541         .name           = "tcp",
2542         .family         = AF_INET,
2543         .seq_fops       = &tcp_afinfo_seq_fops,
2544         .seq_ops        = {
2545                 .show           = tcp4_seq_show,
2546         },
2547 };
2548
2549 static int __net_init tcp4_proc_init_net(struct net *net)
2550 {
2551         return tcp_proc_register(net, &tcp4_seq_afinfo);
2552 }
2553
2554 static void __net_exit tcp4_proc_exit_net(struct net *net)
2555 {
2556         tcp_proc_unregister(net, &tcp4_seq_afinfo);
2557 }
2558
2559 static struct pernet_operations tcp4_net_ops = {
2560         .init = tcp4_proc_init_net,
2561         .exit = tcp4_proc_exit_net,
2562 };
2563
2564 int __init tcp4_proc_init(void)
2565 {
2566         return register_pernet_subsys(&tcp4_net_ops);
2567 }
2568
2569 void tcp4_proc_exit(void)
2570 {
2571         unregister_pernet_subsys(&tcp4_net_ops);
2572 }
2573 #endif /* CONFIG_PROC_FS */
2574
2575 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2576 {
2577         const struct iphdr *iph = skb_gro_network_header(skb);
2578
2579         switch (skb->ip_summed) {
2580         case CHECKSUM_COMPLETE:
2581                 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2582                                   skb->csum)) {
2583                         skb->ip_summed = CHECKSUM_UNNECESSARY;
2584                         break;
2585                 }
2586
2587                 /* fall through */
2588         case CHECKSUM_NONE:
2589                 NAPI_GRO_CB(skb)->flush = 1;
2590                 return NULL;
2591         }
2592
2593         return tcp_gro_receive(head, skb);
2594 }
2595
2596 int tcp4_gro_complete(struct sk_buff *skb)
2597 {
2598         const struct iphdr *iph = ip_hdr(skb);
2599         struct tcphdr *th = tcp_hdr(skb);
2600
2601         th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2602                                   iph->saddr, iph->daddr, 0);
2603         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2604
2605         return tcp_gro_complete(skb);
2606 }
2607
2608 struct proto tcp_prot = {
2609         .name                   = "TCP",
2610         .owner                  = THIS_MODULE,
2611         .close                  = tcp_close,
2612         .connect                = tcp_v4_connect,
2613         .disconnect             = tcp_disconnect,
2614         .accept                 = inet_csk_accept,
2615         .ioctl                  = tcp_ioctl,
2616         .init                   = tcp_v4_init_sock,
2617         .destroy                = tcp_v4_destroy_sock,
2618         .shutdown               = tcp_shutdown,
2619         .setsockopt             = tcp_setsockopt,
2620         .getsockopt             = tcp_getsockopt,
2621         .recvmsg                = tcp_recvmsg,
2622         .sendmsg                = tcp_sendmsg,
2623         .sendpage               = tcp_sendpage,
2624         .backlog_rcv            = tcp_v4_do_rcv,
2625         .hash                   = inet_hash,
2626         .unhash                 = inet_unhash,
2627         .get_port               = inet_csk_get_port,
2628         .enter_memory_pressure  = tcp_enter_memory_pressure,
2629         .sockets_allocated      = &tcp_sockets_allocated,
2630         .orphan_count           = &tcp_orphan_count,
2631         .memory_allocated       = &tcp_memory_allocated,
2632         .memory_pressure        = &tcp_memory_pressure,
2633         .sysctl_wmem            = sysctl_tcp_wmem,
2634         .sysctl_rmem            = sysctl_tcp_rmem,
2635         .max_header             = MAX_TCP_HEADER,
2636         .obj_size               = sizeof(struct tcp_sock),
2637         .slab_flags             = SLAB_DESTROY_BY_RCU,
2638         .twsk_prot              = &tcp_timewait_sock_ops,
2639         .rsk_prot               = &tcp_request_sock_ops,
2640         .h.hashinfo             = &tcp_hashinfo,
2641         .no_autobind            = true,
2642 #ifdef CONFIG_COMPAT
2643         .compat_setsockopt      = compat_tcp_setsockopt,
2644         .compat_getsockopt      = compat_tcp_getsockopt,
2645 #endif
2646 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2647         .init_cgroup            = tcp_init_cgroup,
2648         .destroy_cgroup         = tcp_destroy_cgroup,
2649         .proto_cgroup           = tcp_proto_cgroup,
2650 #endif
2651 };
2652 EXPORT_SYMBOL(tcp_prot);
2653
2654 static int __net_init tcp_sk_init(struct net *net)
2655 {
2656         return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2657                                     PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2658 }
2659
2660 static void __net_exit tcp_sk_exit(struct net *net)
2661 {
2662         inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2663 }
2664
2665 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2666 {
2667         inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2668 }
2669
2670 static struct pernet_operations __net_initdata tcp_sk_ops = {
2671        .init       = tcp_sk_init,
2672        .exit       = tcp_sk_exit,
2673        .exit_batch = tcp_sk_exit_batch,
2674 };
2675
2676 void __init tcp_v4_init(void)
2677 {
2678         inet_hashinfo_init(&tcp_hashinfo);
2679         if (register_pernet_subsys(&tcp_sk_ops))
2680                 panic("Failed to create the TCP control socket.\n");
2681 }