net/ipv4/tcp_timer.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_timer.c,v 1.68 1999/09/07 02:31:43 davem Exp $
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  20  *              Jorge Cwik, <jorge@laser.satlink.net>
  21  */
  22
  23 #include <net/tcp.h>
  24
  25 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
  26 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
  27 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
  28 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
  29 int sysctl_tcp_retries1 = TCP_RETR1;
  30 int sysctl_tcp_retries2 = TCP_RETR2;
  31
  32
  33 static void tcp_sltimer_handler(unsigned long);
  34 static void tcp_syn_recv_timer(unsigned long);
  35 static void tcp_twkill(unsigned long);
  36
  37 struct timer_list       tcp_slow_timer = {
  38         NULL, NULL,
  39         0, 0,
  40         tcp_sltimer_handler,
  41 };
  42
  43
  44 struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
  45         {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK    */
  46         {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}         /* TWKILL    */
  47 };
  48
  49 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
  50
  51 /*
  52  * Using different timers for retransmit, delayed acks and probes
  53  * We may wish use just one timer maintaining a list of expire jiffies
  54  * to optimize.
  55  */
  56
  57 void tcp_init_xmit_timers(struct sock *sk)
  58 {
  59         init_timer(&sk->tp_pinfo.af_tcp.retransmit_timer);
  60         sk->tp_pinfo.af_tcp.retransmit_timer.function=&tcp_retransmit_timer;
  61         sk->tp_pinfo.af_tcp.retransmit_timer.data = (unsigned long) sk;
  62
  63         init_timer(&sk->tp_pinfo.af_tcp.delack_timer);
  64         sk->tp_pinfo.af_tcp.delack_timer.function=&tcp_delack_timer;
  65         sk->tp_pinfo.af_tcp.delack_timer.data = (unsigned long) sk;
  66
  67         init_timer(&sk->tp_pinfo.af_tcp.probe_timer);
  68         sk->tp_pinfo.af_tcp.probe_timer.function=&tcp_probe_timer;
  69         sk->tp_pinfo.af_tcp.probe_timer.data = (unsigned long) sk;
  70 }
  71
  72 /*
  73  *      Reset the retransmission timer
  74  */
  75
  76 void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
  77 {
  78         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
  79
  80         spin_lock_bh(&sk->timer_lock);
  81         switch (what) {
  82         case TIME_RETRANS:
  83                 /* When seting the transmit timer the probe timer
  84                  * should not be set.
  85                  * The delayed ack timer can be set if we are changing the
  86                  * retransmit timer when removing acked frames.
  87                  */
  88                 if(tp->probe_timer.prev && del_timer(&tp->probe_timer))
  89                         __sock_put(sk);
  90                 if (!tp->retransmit_timer.prev || !del_timer(&tp->retransmit_timer))
  91                         sock_hold(sk);
  92                 if (when > 120*HZ) {
  93                         printk(KERN_DEBUG "reset_xmit_timer sk=%p when=0x%lx, caller=%p\n", sk, when, NET_CALLER(sk));
  94                         when = 120*HZ;
  95                 }
  96                 mod_timer(&tp->retransmit_timer, jiffies+when);
  97                 break;
  98
  99         case TIME_DACK:
 100                 if (!tp->delack_timer.prev || !del_timer(&tp->delack_timer))
 101                         sock_hold(sk);
 102                 mod_timer(&tp->delack_timer, jiffies+when);
 103                 break;
 104
 105         case TIME_PROBE0:
 106                 if (!tp->probe_timer.prev || !del_timer(&tp->probe_timer))
 107                         sock_hold(sk);
 108                 mod_timer(&tp->probe_timer, jiffies+when);
 109                 break;
 110
 111         case TIME_WRITE:
 112                 printk(KERN_DEBUG "bug: tcp_reset_xmit_timer TIME_WRITE\n");
 113                 break;
 114
 115         default:
 116                 printk(KERN_DEBUG "bug: unknown timer value\n");
 117         };
 118         spin_unlock_bh(&sk->timer_lock);
 119 }
 120
 121 void tcp_clear_xmit_timers(struct sock *sk)
 122 {
 123         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 124
 125         spin_lock_bh(&sk->timer_lock);
 126         if(tp->retransmit_timer.prev && del_timer(&tp->retransmit_timer))
 127                 __sock_put(sk);
 128         if(tp->delack_timer.prev && del_timer(&tp->delack_timer))
 129                 __sock_put(sk);
 130         if(tp->probe_timer.prev && del_timer(&tp->probe_timer))
 131                 __sock_put(sk);
 132         if(sk->timer.prev && del_timer(&sk->timer))
 133                 __sock_put(sk);
 134         spin_unlock_bh(&sk->timer_lock);
 135 }
 136
 137 static void tcp_write_err(struct sock *sk, int force)
 138 {
 139         sk->err = sk->err_soft ? sk->err_soft : ETIMEDOUT;
 140         sk->error_report(sk);
 141
 142         tcp_clear_xmit_timers(sk);
 143
 144         /* Do not time wait the socket. It is timed out and, hence,
 145          * idle for 120*HZ. "force" argument is ignored, delete
 146          * it eventually.
 147          */
 148
 149         /* Clean up time. */
 150         tcp_set_state(sk, TCP_CLOSE);
 151         tcp_done(sk);
 152 }
 153
 154 /* A write timeout has occurred. Process the after effects. */
 155 static void tcp_write_timeout(struct sock *sk)
 156 {
 157         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 158
 159         /* Look for a 'soft' timeout. */
 160         if ((sk->state == TCP_ESTABLISHED &&
 161              tp->retransmits && (tp->retransmits % TCP_QUICK_TRIES) == 0) ||
 162             (sk->state != TCP_ESTABLISHED && tp->retransmits > sysctl_tcp_retries1)) {
 163                 /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
 164                    hole detection. :-(
 165
 166                    It is place to make it. It is not made. I do not want
 167                    to make it. It is disguisting. It does not work in any
 168                    case. Let me to cite the same draft, which requires for
 169                    us to implement this:
 170
 171    "The one security concern raised by this memo is that ICMP black holes
 172    are often caused by over-zealous security administrators who block
 173    all ICMP messages.  It is vitally important that those who design and
 174    deploy security systems understand the impact of strict filtering on
 175    upper-layer protocols.  The safest web site in the world is worthless
 176    if most TCP implementations cannot transfer data from it.  It would
 177    be far nicer to have all of the black holes fixed rather than fixing
 178    all of the TCP implementations."
 179
 180                    Golden words :-).
 181                  */
 182
 183                 dst_negative_advice(&sk->dst_cache);
 184         }
 185
 186         /* Have we tried to SYN too many times (repent repent 8)) */
 187         if (sk->state == TCP_SYN_SENT &&
 188             ((!tp->syn_retries && tp->retransmits > sysctl_tcp_syn_retries) ||
 189               (tp->syn_retries && tp->retransmits > tp->syn_retries))) {
 190                 tcp_write_err(sk, 1);
 191                 /* Don't FIN, we got nothing back */
 192         } else if (tp->retransmits > sysctl_tcp_retries2) {
 193                 /* Has it gone just too far? */
 194                 tcp_write_err(sk, 0);
 195         }
 196 }
 197
 198 void tcp_delack_timer(unsigned long data)
 199 {
 200         struct sock *sk = (struct sock*)data;
 201
 202         bh_lock_sock(sk);
 203         if (sk->lock.users) {
 204                 /* Try again later. */
 205                 tcp_reset_xmit_timer(sk, TIME_DACK, HZ/5);
 206                 goto out_unlock;
 207         }
 208
 209         if(!sk->zapped &&
 210            sk->tp_pinfo.af_tcp.delayed_acks &&
 211            sk->state != TCP_CLOSE)
 212                 tcp_send_ack(sk);
 213
 214 out_unlock:
 215         bh_unlock_sock(sk);
 216         sock_put(sk);
 217 }
 218
 219 void tcp_probe_timer(unsigned long data)
 220 {
 221         struct sock *sk = (struct sock*)data;
 222         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 223
 224         if(sk->zapped)
 225                 goto out;
 226
 227         bh_lock_sock(sk);
 228         if (sk->lock.users) {
 229                 /* Try again later. */
 230                 tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ/5);
 231                 goto out_unlock;
 232         }
 233
 234         /* *WARNING* RFC 1122 forbids this
 235          *
 236          * It doesn't AFAIK, because we kill the retransmit timer -AK
 237          *
 238          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
 239          * this behaviour in Solaris down as a bug fix. [AC]
 240          *
 241          * Let me to explain. probes_out is zeroed by incoming ACKs
 242          * even if they advertise zero window. Hence, connection is killed only
 243          * if we received no ACKs for normal connection timeout. It is not killed
 244          * only because window stays zero for some time, window may be zero
 245          * until armageddon and even later. We are in full accordance
 246          * with RFCs, only probe timer combines both retransmission timeout
 247          * and probe timeout in one bottle.                             --ANK
 248          */
 249         if (tp->probes_out > sysctl_tcp_retries2) {
 250                 tcp_write_err(sk, 0);
 251         } else {
 252                 /* Only send another probe if we didn't close things up. */
 253                 tcp_send_probe0(sk);
 254         }
 255 out_unlock:
 256         bh_unlock_sock(sk);
 257 out:
 258         sock_put(sk);
 259 }
 260
 261
 262 /* Kill off TIME_WAIT sockets once their lifetime has expired. */
 263 int tcp_tw_death_row_slot = 0;
 264 static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] =
 265         { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
 266 static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED;
 267
 268
 269 static void tcp_twkill(unsigned long data)
 270 {
 271         struct tcp_tw_bucket *tw;
 272         int killed = 0;
 273
 274         /* The death-row tw chains are only ever touched
 275          * in BH context so no BH disabling (for now) is needed.
 276          */
 277         spin_lock(&tw_death_lock);
 278         tw = tcp_tw_death_row[tcp_tw_death_row_slot];
 279         tcp_tw_death_row[tcp_tw_death_row_slot] = NULL;
 280         tcp_tw_death_row_slot =
 281           ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
 282         spin_unlock(&tw_death_lock);
 283
 284         while(tw != NULL) {
 285                 struct tcp_tw_bucket *next = tw->next_death;
 286
 287                 tcp_timewait_kill(tw);
 288                 tcp_tw_put(tw);
 289                 killed++;
 290                 tw = next;
 291         }
 292         if(killed != 0) {
 293                 struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
 294                 atomic_sub(killed, &slt->count);
 295         }
 296 }
 297
 298 /* These are always called from BH context.  See callers in
 299  * tcp_input.c to verify this.
 300  */
 301 void tcp_tw_schedule(struct tcp_tw_bucket *tw)
 302 {
 303         struct tcp_tw_bucket **tpp;
 304         int slot;
 305
 306         spin_lock(&tw_death_lock);
 307         slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1);
 308         tpp = &tcp_tw_death_row[slot];
 309         if((tw->next_death = *tpp) != NULL)
 310                 (*tpp)->pprev_death = &tw->next_death;
 311         *tpp = tw;
 312         tw->pprev_death = tpp;
 313
 314         tw->death_slot = slot;
 315         atomic_inc(&tw->refcnt);
 316         spin_unlock(&tw_death_lock);
 317
 318         tcp_inc_slow_timer(TCP_SLT_TWKILL);
 319 }
 320
 321 /* Happens rarely if at all, no care about scalability here. */
 322 void tcp_tw_reschedule(struct tcp_tw_bucket *tw)
 323 {
 324         struct tcp_tw_bucket **tpp;
 325         int slot;
 326
 327         spin_lock(&tw_death_lock);
 328         if (tw->pprev_death) {
 329                 if(tw->next_death)
 330                         tw->next_death->pprev_death = tw->pprev_death;
 331                 *tw->pprev_death = tw->next_death;
 332                 tw->pprev_death = NULL;
 333         } else
 334                 atomic_inc(&tw->refcnt);
 335
 336         slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1);
 337         tpp = &tcp_tw_death_row[slot];
 338         if((tw->next_death = *tpp) != NULL)
 339                 (*tpp)->pprev_death = &tw->next_death;
 340         *tpp = tw;
 341         tw->pprev_death = tpp;
 342
 343         tw->death_slot = slot;
 344         spin_unlock(&tw_death_lock);
 345
 346         /* Timer was incremented when we first entered the table. */
 347 }
 348
 349 /* This is for handling early-kills of TIME_WAIT sockets. */
 350 void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
 351 {
 352         spin_lock(&tw_death_lock);
 353         if (tw->pprev_death) {
 354                 if(tw->next_death)
 355                         tw->next_death->pprev_death = tw->pprev_death;
 356                 *tw->pprev_death = tw->next_death;
 357                 tw->pprev_death = NULL;
 358                 tcp_tw_put(tw);
 359         }
 360         spin_unlock(&tw_death_lock);
 361
 362         tcp_dec_slow_timer(TCP_SLT_TWKILL);
 363 }
 364
 365
 366 /*
 367  *      The TCP retransmit timer.
 368  *
 369  *      1.      An initial rtt timeout on the probe0 should cause what we can
 370  *              of the first write queue buffer to be split and sent.
 371  *      2.      On a 'major timeout' as defined by RFC1122 we do not report
 372  *              ETIMEDOUT if we know an additional 'soft' error caused this.
 373  *              tcp_err saves a 'soft error' for us.
 374  */
 375
 376 void tcp_retransmit_timer(unsigned long data)
 377 {
 378         struct sock *sk = (struct sock*)data;
 379         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 380
 381         /* We are reset. We will send no more retransmits. */
 382         if(sk->zapped)
 383                 goto out;
 384
 385         bh_lock_sock(sk);
 386         if (sk->lock.users) {
 387                 /* Try again later */
 388                 tcp_reset_xmit_timer(sk, TIME_RETRANS, HZ/20);
 389                 goto out_unlock;
 390         }
 391
 392         /* Clear delay ack timer. */
 393         tcp_clear_xmit_timer(sk, TIME_DACK);
 394
 395         /* RFC 2018, clear all 'sacked' flags in retransmission queue,
 396          * the sender may have dropped out of order frames and we must
 397          * send them out should this timer fire on us.
 398          */
 399         if(tp->sack_ok) {
 400                 struct sk_buff *skb = skb_peek(&sk->write_queue);
 401
 402                 while((skb != NULL) &&
 403                       (skb != tp->send_head) &&
 404                       (skb != (struct sk_buff *)&sk->write_queue)) {
 405                         TCP_SKB_CB(skb)->sacked &=
 406                                 ~(TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS);
 407                         skb = skb->next;
 408                 }
 409         }
 410
 411         /* Retransmission. */
 412         tp->retrans_head = NULL;
 413         tp->rexmt_done = 0;
 414         tp->fackets_out = 0;
 415         tp->retrans_out = 0;
 416         if (tp->retransmits == 0) {
 417                 /* Remember window where we lost:
 418                  * "one half of the current window but at least 2 segments"
 419                  *
 420                  * Here "current window" means the effective one, which
 421                  * means it must be an accurate representation of our current
 422                  * sending rate _and_ the snd_wnd.
 423                  */
 424                 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
 425                 tp->snd_cwnd_cnt = 0;
 426                 tp->snd_cwnd = 1;
 427         }
 428
 429         tp->retransmits++;
 430
 431         tp->dup_acks = 0;
 432         tp->high_seq = tp->snd_nxt;
 433         tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
 434
 435         /* Increase the timeout each time we retransmit.  Note that
 436          * we do not increase the rtt estimate.  rto is initialized
 437          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
 438          * that doubling rto each time is the least we can get away with.
 439          * In KA9Q, Karn uses this for the first few times, and then
 440          * goes to quadratic.  netBSD doubles, but only goes up to *64,
 441          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
 442          * defined in the protocol as the maximum possible RTT.  I guess
 443          * we'll have to use something other than TCP to talk to the
 444          * University of Mars.
 445          *
 446          * PAWS allows us longer timeouts and large windows, so once
 447          * implemented ftp to mars will work nicely. We will have to fix
 448          * the 120 second clamps though!
 449          */
 450         tp->backoff++;
 451         tp->rto = min(tp->rto << 1, 120*HZ);
 452         tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 453
 454         tcp_write_timeout(sk);
 455
 456 out_unlock:
 457         bh_unlock_sock(sk);
 458 out:
 459         sock_put(sk);
 460 }
 461
 462 /*
 463  *      Slow timer for SYN-RECV sockets
 464  */
 465
 466 static void tcp_do_syn_queue(struct sock *sk, struct tcp_opt *tp, unsigned long now)
 467 {
 468         struct open_request *prev, *req;
 469
 470         prev = (struct open_request *) &tp->syn_wait_queue;
 471         for(req = tp->syn_wait_queue; req; ) {
 472                 struct open_request *next = req->dl_next;
 473
 474                 if (!req->sk && (long)(now - req->expires) >= 0) {
 475                         tcp_synq_unlink(tp, req, prev);
 476                         if(req->retrans >= sysctl_tcp_retries1) {
 477                                 (*req->class->destructor)(req);
 478                                 tcp_dec_slow_timer(TCP_SLT_SYNACK);
 479                                 tp->syn_backlog--;
 480                                 tcp_openreq_free(req);
 481                                 if (! tp->syn_wait_queue)
 482                                         break;
 483                         } else {
 484                                 unsigned long timeo;
 485                                 struct open_request *rp;
 486
 487                                 (*req->class->rtx_syn_ack)(sk, req);
 488                                 req->retrans++;
 489                                 timeo = min((TCP_TIMEOUT_INIT << req->retrans),
 490                                             (120 * HZ));
 491                                 req->expires = now + timeo;
 492                                 rp = prev->dl_next;
 493                                 tcp_synq_queue(tp, req);
 494                                 if(rp != prev->dl_next)
 495                                         prev = prev->dl_next;
 496                         }
 497                 } else
 498                         prev = req;
 499                 req = next;
 500         }
 501 }
 502
 503 /* This now scales very nicely. -DaveM */
 504 static void tcp_syn_recv_timer(unsigned long data)
 505 {
 506         struct sock *sk;
 507         unsigned long now = jiffies;
 508         int i;
 509
 510         read_lock(&tcp_lhash_lock);
 511         for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
 512                 sk = tcp_listening_hash[i];
 513                 while(sk) {
 514                         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 515
 516                         /* TCP_LISTEN is implied. */
 517                         bh_lock_sock(sk);
 518                         if (!sk->lock.users && tp->syn_wait_queue)
 519                                 tcp_do_syn_queue(sk, tp, now);
 520                         bh_unlock_sock(sk);
 521                         sk = sk->next;
 522                 }
 523         }
 524         read_unlock(&tcp_lhash_lock);
 525 }
 526
 527 void tcp_sltimer_handler(unsigned long data)
 528 {
 529         struct tcp_sl_timer *slt = tcp_slt_array;
 530         unsigned long next = ~0UL;
 531         unsigned long now = jiffies;
 532         int i;
 533
 534         for (i=0; i < TCP_SLT_MAX; i++, slt++) {
 535                 if (atomic_read(&slt->count)) {
 536                         long trigger;
 537
 538                         trigger = slt->period - ((long)(now - slt->last));
 539
 540                         if (trigger <= 0) {
 541                                 (*slt->handler)((unsigned long) slt);
 542                                 slt->last = now;
 543                                 trigger = slt->period;
 544                         }
 545
 546                         /* Only reschedule if some events remain. */
 547                         if (atomic_read(&slt->count))
 548                                 next = min(next, trigger);
 549                 }
 550         }
 551         if (next != ~0UL)
 552                 mod_timer(&tcp_slow_timer, (now + next));
 553 }
 554
 555 /* __tcp_inc_slow_timer is called when an slow timer is started
 556  * first time (slt->count was 0). There is race condition between
 557  * timer creation and deletion and if we do not force adding timer here,
 558  * we might lose timer. We could avoid it with global spinlock, but
 559  * it is apparently overkill, so that we restart timer ALWAYS when
 560  * this function is entered, it guarantees that timer will not lost.
 561  */
 562
 563 void __tcp_inc_slow_timer(struct tcp_sl_timer *slt)
 564 {
 565         unsigned long now = jiffies;
 566         unsigned long when;
 567
 568         slt->last = now;
 569
 570         when = now + slt->period;
 571
 572         if (tcp_slow_timer.prev &&
 573             (long)(tcp_slow_timer.expires - when) < 0)
 574                 when = tcp_slow_timer.expires;
 575
 576         mod_timer(&tcp_slow_timer, when);
 577 }
 578
 579 void tcp_delete_keepalive_timer (struct sock *sk)
 580 {
 581         spin_lock_bh(&sk->timer_lock);
 582         if (sk->timer.prev && del_timer (&sk->timer))
 583                 __sock_put(sk);
 584         spin_unlock_bh(&sk->timer_lock);
 585 }
 586
 587 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
 588 {
 589         spin_lock_bh(&sk->timer_lock);
 590         if(!sk->timer.prev || !del_timer(&sk->timer))
 591                 sock_hold(sk);
 592         mod_timer(&sk->timer, jiffies+len);
 593         spin_unlock_bh(&sk->timer_lock);
 594 }
 595
 596 void tcp_set_keepalive(struct sock *sk, int val)
 597 {
 598         if (val && !sk->keepopen)
 599                 tcp_reset_keepalive_timer(sk, keepalive_time_when(&sk->tp_pinfo.af_tcp));
 600         else if (!val)
 601                 tcp_delete_keepalive_timer(sk);
 602 }
 603
 604
 605 void tcp_keepalive_timer (unsigned long data)
 606 {
 607         struct sock *sk = (struct sock *) data;
 608         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 609         __u32 elapsed;
 610
 611         /* Only process if socket is not in use. */
 612         bh_lock_sock(sk);
 613         if (sk->lock.users) {
 614                 /* Try again later. */
 615                 tcp_reset_keepalive_timer (sk, HZ/20);
 616                 goto out;
 617         }
 618
 619         if (sk->state == TCP_FIN_WAIT2 && sk->dead)
 620                 goto death;
 621
 622         if (!sk->keepopen)
 623                 goto out;
 624
 625         elapsed = keepalive_time_when(tp);
 626         if (!((1<<sk->state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2)))
 627                 goto resched;
 628
 629         elapsed = tcp_time_stamp - tp->rcv_tstamp;
 630
 631         if (elapsed >= keepalive_time_when(tp)) {
 632                 if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
 633                      (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
 634                         tcp_send_active_reset(sk, GFP_ATOMIC);
 635                         tcp_write_err(sk, 1);
 636                         goto out;
 637                 }
 638                 tp->probes_out++;
 639                 tp->pending = TIME_KEEPOPEN;
 640                 tcp_write_wakeup(sk);
 641                 elapsed = keepalive_intvl_when(tp);
 642         } else {
 643                 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
 644                 if (keepalive_time_when(tp) > elapsed)
 645                         elapsed = keepalive_time_when(tp) - elapsed;
 646                 else
 647                         elapsed = 0;
 648         }
 649
 650 resched:
 651         tcp_reset_keepalive_timer (sk, elapsed);
 652         goto out;
 653
 654 death:
 655         tcp_set_state(sk, TCP_CLOSE);
 656         tcp_clear_xmit_timers(sk);
 657         tcp_done(sk);
 658
 659 out:
 660         bh_unlock_sock(sk);
 661         sock_put(sk);
 662 }