net/ipv4/tcp_timer.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  20  *              Jorge Cwik, <jorge@laser.satlink.net>
  21  */
  22
  23 #include <linux/module.h>
  24 #include <net/tcp.h>
  25
  26 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
  27 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
  28 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
  29 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
  30 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
  31 int sysctl_tcp_retries1 = TCP_RETR1;
  32 int sysctl_tcp_retries2 = TCP_RETR2;
  33 int sysctl_tcp_orphan_retries;
  34
  35 static void tcp_write_timer(unsigned long);
  36 static void tcp_delack_timer(unsigned long);
  37 static void tcp_keepalive_timer (unsigned long data);
  38
  39 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
  40
  41 /*
  42  * Using different timers for retransmit, delayed acks and probes
  43  * We may wish use just one timer maintaining a list of expire jiffies
  44  * to optimize.
  45  */
  46
  47 void tcp_init_xmit_timers(struct sock *sk)
  48 {
  49         struct tcp_opt *tp = tcp_sk(sk);
  50
  51         init_timer(&tp->retransmit_timer);
  52         tp->retransmit_timer.function=&tcp_write_timer;
  53         tp->retransmit_timer.data = (unsigned long) sk;
  54         tp->pending = 0;
  55
  56         init_timer(&tp->delack_timer);
  57         tp->delack_timer.function=&tcp_delack_timer;
  58         tp->delack_timer.data = (unsigned long) sk;
  59         tp->ack.pending = 0;
  60
  61         init_timer(&sk->sk_timer);
  62         sk->sk_timer.function   = &tcp_keepalive_timer;
  63         sk->sk_timer.data       = (unsigned long)sk;
  64 }
  65
  66 void tcp_clear_xmit_timers(struct sock *sk)
  67 {
  68         struct tcp_opt *tp = tcp_sk(sk);
  69
  70         tp->pending = 0;
  71         sk_stop_timer(sk, &tp->retransmit_timer);
  72
  73         tp->ack.pending = 0;
  74         tp->ack.blocked = 0;
  75         sk_stop_timer(sk, &tp->delack_timer);
  76
  77         sk_stop_timer(sk, &sk->sk_timer);
  78 }
  79
  80 static void tcp_write_err(struct sock *sk)
  81 {
  82         sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
  83         sk->sk_error_report(sk);
  84
  85         tcp_done(sk);
  86         NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
  87 }
  88
  89 /* Do not allow orphaned sockets to eat all our resources.
  90  * This is direct violation of TCP specs, but it is required
  91  * to prevent DoS attacks. It is called when a retransmission timeout
  92  * or zero probe timeout occurs on orphaned socket.
  93  *
  94  * Criterium is still not confirmed experimentally and may change.
  95  * We kill the socket, if:
  96  * 1. If number of orphaned sockets exceeds an administratively configured
  97  *    limit.
  98  * 2. If we have strong memory pressure.
  99  */
 100 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 101 {
 102         struct tcp_opt *tp = tcp_sk(sk);
 103         int orphans = atomic_read(&tcp_orphan_count);
 104
 105         /* If peer does not open window for long time, or did not transmit
 106          * anything for long time, penalize it. */
 107         if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 108                 orphans <<= 1;
 109
 110         /* If some dubious ICMP arrived, penalize even more. */
 111         if (sk->sk_err_soft)
 112                 orphans <<= 1;
 113
 114         if (orphans >= sysctl_tcp_max_orphans ||
 115             (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
 116              atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
 117                 if (net_ratelimit())
 118                         printk(KERN_INFO "Out of socket memory\n");
 119
 120                 /* Catch exceptional cases, when connection requires reset.
 121                  *      1. Last segment was sent recently. */
 122                 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 123                     /*  2. Window is closed. */
 124                     (!tp->snd_wnd && !tcp_get_pcount(&tp->packets_out)))
 125                         do_reset = 1;
 126                 if (do_reset)
 127                         tcp_send_active_reset(sk, GFP_ATOMIC);
 128                 tcp_done(sk);
 129                 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 130                 return 1;
 131         }
 132         return 0;
 133 }
 134
 135 /* Calculate maximal number or retries on an orphaned socket. */
 136 static int tcp_orphan_retries(struct sock *sk, int alive)
 137 {
 138         int retries = sysctl_tcp_orphan_retries; /* May be zero. */
 139
 140         /* We know from an ICMP that something is wrong. */
 141         if (sk->sk_err_soft && !alive)
 142                 retries = 0;
 143
 144         /* However, if socket sent something recently, select some safe
 145          * number of retries. 8 corresponds to >100 seconds with minimal
 146          * RTO of 200msec. */
 147         if (retries == 0 && alive)
 148                 retries = 8;
 149         return retries;
 150 }
 151
 152 /* A write timeout has occurred. Process the after effects. */
 153 static int tcp_write_timeout(struct sock *sk)
 154 {
 155         struct tcp_opt *tp = tcp_sk(sk);
 156         int retry_until;
 157
 158         if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 159                 if (tp->retransmits)
 160                         dst_negative_advice(&sk->sk_dst_cache);
 161                 retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
 162         } else {
 163                 if (tp->retransmits >= sysctl_tcp_retries1) {
 164                         /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
 165                            hole detection. :-(
 166
 167                            It is place to make it. It is not made. I do not want
 168                            to make it. It is disguisting. It does not work in any
 169                            case. Let me to cite the same draft, which requires for
 170                            us to implement this:
 171
 172    "The one security concern raised by this memo is that ICMP black holes
 173    are often caused by over-zealous security administrators who block
 174    all ICMP messages.  It is vitally important that those who design and
 175    deploy security systems understand the impact of strict filtering on
 176    upper-layer protocols.  The safest web site in the world is worthless
 177    if most TCP implementations cannot transfer data from it.  It would
 178    be far nicer to have all of the black holes fixed rather than fixing
 179    all of the TCP implementations."
 180
 181                            Golden words :-).
 182                    */
 183
 184                         dst_negative_advice(&sk->sk_dst_cache);
 185                 }
 186
 187                 retry_until = sysctl_tcp_retries2;
 188                 if (sock_flag(sk, SOCK_DEAD)) {
 189                         int alive = (tp->rto < TCP_RTO_MAX);
 190
 191                         retry_until = tcp_orphan_retries(sk, alive);
 192
 193                         if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
 194                                 return 1;
 195                 }
 196         }
 197
 198         if (tp->retransmits >= retry_until) {
 199                 /* Has it gone just too far? */
 200                 tcp_write_err(sk);
 201                 return 1;
 202         }
 203         return 0;
 204 }
 205
 206 static void tcp_delack_timer(unsigned long data)
 207 {
 208         struct sock *sk = (struct sock*)data;
 209         struct tcp_opt *tp = tcp_sk(sk);
 210
 211         bh_lock_sock(sk);
 212         if (sock_owned_by_user(sk)) {
 213                 /* Try again later. */
 214                 tp->ack.blocked = 1;
 215                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
 216                 sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
 217                 goto out_unlock;
 218         }
 219
 220         sk_stream_mem_reclaim(sk);
 221
 222         if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
 223                 goto out;
 224
 225         if (time_after(tp->ack.timeout, jiffies)) {
 226                 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
 227                 goto out;
 228         }
 229         tp->ack.pending &= ~TCP_ACK_TIMER;
 230
 231         if (skb_queue_len(&tp->ucopy.prequeue)) {
 232                 struct sk_buff *skb;
 233
 234                 NET_ADD_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED,
 235                                  skb_queue_len(&tp->ucopy.prequeue));
 236
 237                 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
 238                         sk->sk_backlog_rcv(sk, skb);
 239
 240                 tp->ucopy.memory = 0;
 241         }
 242
 243         if (tcp_ack_scheduled(tp)) {
 244                 if (!tp->ack.pingpong) {
 245                         /* Delayed ACK missed: inflate ATO. */
 246                         tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
 247                 } else {
 248                         /* Delayed ACK missed: leave pingpong mode and
 249                          * deflate ATO.
 250                          */
 251                         tp->ack.pingpong = 0;
 252                         tp->ack.ato = TCP_ATO_MIN;
 253                 }
 254                 tcp_send_ack(sk);
 255                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
 256         }
 257         TCP_CHECK_TIMER(sk);
 258
 259 out:
 260         if (tcp_memory_pressure)
 261                 sk_stream_mem_reclaim(sk);
 262 out_unlock:
 263         bh_unlock_sock(sk);
 264         sock_put(sk);
 265 }
 266
 267 static void tcp_probe_timer(struct sock *sk)
 268 {
 269         struct tcp_opt *tp = tcp_sk(sk);
 270         int max_probes;
 271
 272         if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) {
 273                 tp->probes_out = 0;
 274                 return;
 275         }
 276
 277         /* *WARNING* RFC 1122 forbids this
 278          *
 279          * It doesn't AFAIK, because we kill the retransmit timer -AK
 280          *
 281          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
 282          * this behaviour in Solaris down as a bug fix. [AC]
 283          *
 284          * Let me to explain. probes_out is zeroed by incoming ACKs
 285          * even if they advertise zero window. Hence, connection is killed only
 286          * if we received no ACKs for normal connection timeout. It is not killed
 287          * only because window stays zero for some time, window may be zero
 288          * until armageddon and even later. We are in full accordance
 289          * with RFCs, only probe timer combines both retransmission timeout
 290          * and probe timeout in one bottle.                             --ANK
 291          */
 292         max_probes = sysctl_tcp_retries2;
 293
 294         if (sock_flag(sk, SOCK_DEAD)) {
 295                 int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
 296
 297                 max_probes = tcp_orphan_retries(sk, alive);
 298
 299                 if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
 300                         return;
 301         }
 302
 303         if (tp->probes_out > max_probes) {
 304                 tcp_write_err(sk);
 305         } else {
 306                 /* Only send another probe if we didn't close things up. */
 307                 tcp_send_probe0(sk);
 308         }
 309 }
 310
 311 /*
 312  *      The TCP retransmit timer.
 313  */
 314
 315 static void tcp_retransmit_timer(struct sock *sk)
 316 {
 317         struct tcp_opt *tp = tcp_sk(sk);
 318
 319         if (!tcp_get_pcount(&tp->packets_out))
 320                 goto out;
 321
 322         BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
 323
 324         if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 325             !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
 326                 /* Receiver dastardly shrinks window. Our retransmits
 327                  * become zero probes, but we should not timeout this
 328                  * connection. If the socket is an orphan, time it out,
 329                  * we cannot allow such beasts to hang infinitely.
 330                  */
 331 #ifdef TCP_DEBUG
 332                 if (net_ratelimit()) {
 333                         struct inet_opt *inet = inet_sk(sk);
 334                         printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
 335                                NIPQUAD(inet->daddr), htons(inet->dport),
 336                                inet->num, tp->snd_una, tp->snd_nxt);
 337                 }
 338 #endif
 339                 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
 340                         tcp_write_err(sk);
 341                         goto out;
 342                 }
 343                 tcp_enter_loss(sk, 0);
 344                 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
 345                 __sk_dst_reset(sk);
 346                 goto out_reset_timer;
 347         }
 348
 349         if (tcp_write_timeout(sk))
 350                 goto out;
 351
 352         if (tp->retransmits == 0) {
 353                 if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
 354                         if (tp->sack_ok) {
 355                                 if (tp->ca_state == TCP_CA_Recovery)
 356                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
 357                                 else
 358                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
 359                         } else {
 360                                 if (tp->ca_state == TCP_CA_Recovery)
 361                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
 362                                 else
 363                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
 364                         }
 365                 } else if (tp->ca_state == TCP_CA_Loss) {
 366                         NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
 367                 } else {
 368                         NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
 369                 }
 370         }
 371
 372         if (tcp_use_frto(sk)) {
 373                 tcp_enter_frto(sk);
 374         } else {
 375                 tcp_enter_loss(sk, 0);
 376         }
 377
 378         if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
 379                 /* Retransmission failed because of local congestion,
 380                  * do not backoff.
 381                  */
 382                 if (!tp->retransmits)
 383                         tp->retransmits=1;
 384                 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
 385                                      min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
 386                 goto out;
 387         }
 388
 389         /* Increase the timeout each time we retransmit.  Note that
 390          * we do not increase the rtt estimate.  rto is initialized
 391          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
 392          * that doubling rto each time is the least we can get away with.
 393          * In KA9Q, Karn uses this for the first few times, and then
 394          * goes to quadratic.  netBSD doubles, but only goes up to *64,
 395          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
 396          * defined in the protocol as the maximum possible RTT.  I guess
 397          * we'll have to use something other than TCP to talk to the
 398          * University of Mars.
 399          *
 400          * PAWS allows us longer timeouts and large windows, so once
 401          * implemented ftp to mars will work nicely. We will have to fix
 402          * the 120 second clamps though!
 403          */
 404         tp->backoff++;
 405         tp->retransmits++;
 406
 407 out_reset_timer:
 408         tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
 409         tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 410         if (tp->retransmits > sysctl_tcp_retries1)
 411                 __sk_dst_reset(sk);
 412
 413 out:;
 414 }
 415
 416 static void tcp_write_timer(unsigned long data)
 417 {
 418         struct sock *sk = (struct sock*)data;
 419         struct tcp_opt *tp = tcp_sk(sk);
 420         int event;
 421
 422         bh_lock_sock(sk);
 423         if (sock_owned_by_user(sk)) {
 424                 /* Try again later */
 425                 sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
 426                 goto out_unlock;
 427         }
 428
 429         if (sk->sk_state == TCP_CLOSE || !tp->pending)
 430                 goto out;
 431
 432         if (time_after(tp->timeout, jiffies)) {
 433                 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
 434                 goto out;
 435         }
 436
 437         event = tp->pending;
 438         tp->pending = 0;
 439
 440         switch (event) {
 441         case TCP_TIME_RETRANS:
 442                 tcp_retransmit_timer(sk);
 443                 break;
 444         case TCP_TIME_PROBE0:
 445                 tcp_probe_timer(sk);
 446                 break;
 447         }
 448         TCP_CHECK_TIMER(sk);
 449
 450 out:
 451         sk_stream_mem_reclaim(sk);
 452 out_unlock:
 453         bh_unlock_sock(sk);
 454         sock_put(sk);
 455 }
 456
 457 /*
 458  *      Timer for listening sockets
 459  */
 460
 461 static void tcp_synack_timer(struct sock *sk)
 462 {
 463         struct tcp_opt *tp = tcp_sk(sk);
 464         struct tcp_listen_opt *lopt = tp->listen_opt;
 465         int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
 466         int thresh = max_retries;
 467         unsigned long now = jiffies;
 468         struct open_request **reqp, *req;
 469         int i, budget;
 470
 471         if (lopt == NULL || lopt->qlen == 0)
 472                 return;
 473
 474         /* Normally all the openreqs are young and become mature
 475          * (i.e. converted to established socket) for first timeout.
 476          * If synack was not acknowledged for 3 seconds, it means
 477          * one of the following things: synack was lost, ack was lost,
 478          * rtt is high or nobody planned to ack (i.e. synflood).
 479          * When server is a bit loaded, queue is populated with old
 480          * open requests, reducing effective size of queue.
 481          * When server is well loaded, queue size reduces to zero
 482          * after several minutes of work. It is not synflood,
 483          * it is normal operation. The solution is pruning
 484          * too old entries overriding normal timeout, when
 485          * situation becomes dangerous.
 486          *
 487          * Essentially, we reserve half of room for young
 488          * embrions; and abort old ones without pity, if old
 489          * ones are about to clog our table.
 490          */
 491         if (lopt->qlen>>(lopt->max_qlen_log-1)) {
 492                 int young = (lopt->qlen_young<<1);
 493
 494                 while (thresh > 2) {
 495                         if (lopt->qlen < young)
 496                                 break;
 497                         thresh--;
 498                         young <<= 1;
 499                 }
 500         }
 501
 502         if (tp->defer_accept)
 503                 max_retries = tp->defer_accept;
 504
 505         budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
 506         i = lopt->clock_hand;
 507
 508         do {
 509                 reqp=&lopt->syn_table[i];
 510                 while ((req = *reqp) != NULL) {
 511                         if (time_after_eq(now, req->expires)) {
 512                                 if ((req->retrans < thresh ||
 513                                      (req->acked && req->retrans < max_retries))
 514                                     && !req->class->rtx_syn_ack(sk, req, NULL)) {
 515                                         unsigned long timeo;
 516
 517                                         if (req->retrans++ == 0)
 518                                                 lopt->qlen_young--;
 519                                         timeo = min((TCP_TIMEOUT_INIT << req->retrans),
 520                                                     TCP_RTO_MAX);
 521                                         req->expires = now + timeo;
 522                                         reqp = &req->dl_next;
 523                                         continue;
 524                                 }
 525
 526                                 /* Drop this request */
 527                                 write_lock(&tp->syn_wait_lock);
 528                                 *reqp = req->dl_next;
 529                                 write_unlock(&tp->syn_wait_lock);
 530                                 lopt->qlen--;
 531                                 if (req->retrans == 0)
 532                                         lopt->qlen_young--;
 533                                 tcp_openreq_free(req);
 534                                 continue;
 535                         }
 536                         reqp = &req->dl_next;
 537                 }
 538
 539                 i = (i+1)&(TCP_SYNQ_HSIZE-1);
 540
 541         } while (--budget > 0);
 542
 543         lopt->clock_hand = i;
 544
 545         if (lopt->qlen)
 546                 tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
 547 }
 548
 549 void tcp_delete_keepalive_timer (struct sock *sk)
 550 {
 551         sk_stop_timer(sk, &sk->sk_timer);
 552 }
 553
 554 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
 555 {
 556         sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
 557 }
 558
 559 void tcp_set_keepalive(struct sock *sk, int val)
 560 {
 561         if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
 562                 return;
 563
 564         if (val && !sock_flag(sk, SOCK_KEEPOPEN))
 565                 tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
 566         else if (!val)
 567                 tcp_delete_keepalive_timer(sk);
 568 }
 569
 570
 571 static void tcp_keepalive_timer (unsigned long data)
 572 {
 573         struct sock *sk = (struct sock *) data;
 574         struct tcp_opt *tp = tcp_sk(sk);
 575         __u32 elapsed;
 576
 577         /* Only process if socket is not in use. */
 578         bh_lock_sock(sk);
 579         if (sock_owned_by_user(sk)) {
 580                 /* Try again later. */
 581                 tcp_reset_keepalive_timer (sk, HZ/20);
 582                 goto out;
 583         }
 584
 585         if (sk->sk_state == TCP_LISTEN) {
 586                 tcp_synack_timer(sk);
 587                 goto out;
 588         }
 589
 590         if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
 591                 if (tp->linger2 >= 0) {
 592                         int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
 593
 594                         if (tmo > 0) {
 595                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 596                                 goto out;
 597                         }
 598                 }
 599                 tcp_send_active_reset(sk, GFP_ATOMIC);
 600                 goto death;
 601         }
 602
 603         if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 604                 goto out;
 605
 606         elapsed = keepalive_time_when(tp);
 607
 608         /* It is alive without keepalive 8) */
 609         if (tcp_get_pcount(&tp->packets_out) || sk->sk_send_head)
 610                 goto resched;
 611
 612         elapsed = tcp_time_stamp - tp->rcv_tstamp;
 613
 614         if (elapsed >= keepalive_time_when(tp)) {
 615                 if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
 616                      (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
 617                         tcp_send_active_reset(sk, GFP_ATOMIC);
 618                         tcp_write_err(sk);
 619                         goto out;
 620                 }
 621                 if (tcp_write_wakeup(sk) <= 0) {
 622                         tp->probes_out++;
 623                         elapsed = keepalive_intvl_when(tp);
 624                 } else {
 625                         /* If keepalive was lost due to local congestion,
 626                          * try harder.
 627                          */
 628                         elapsed = TCP_RESOURCE_PROBE_INTERVAL;
 629                 }
 630         } else {
 631                 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
 632                 elapsed = keepalive_time_when(tp) - elapsed;
 633         }
 634
 635         TCP_CHECK_TIMER(sk);
 636         sk_stream_mem_reclaim(sk);
 637
 638 resched:
 639         tcp_reset_keepalive_timer (sk, elapsed);
 640         goto out;
 641
 642 death:
 643         tcp_done(sk);
 644
 645 out:
 646         bh_unlock_sock(sk);
 647         sock_put(sk);
 648 }
 649
 650 EXPORT_SYMBOL(tcp_clear_xmit_timers);
 651 EXPORT_SYMBOL(tcp_delete_keepalive_timer);
 652 EXPORT_SYMBOL(tcp_init_xmit_timers);
 653 EXPORT_SYMBOL(tcp_reset_keepalive_timer);