From 3d0a255c417cf2e7b69e770de43f195b0eeffacb Mon Sep 17 00:00:00 2001 From: Garrett D'Amore Date: Wed, 17 Aug 2011 16:31:10 -0700 Subject: [PATCH] 1361 Add support for socket options TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL Reviewed by: Pavan Reviewed by: Dan McDonald Reviewed by: Garrett D'Amore Approved by: Garrett D'Amore --- usr/src/man/man7p/tcp.7p | 25 +++++--- usr/src/uts/common/inet/tcp.h | 18 +++++- usr/src/uts/common/inet/tcp/tcp.c | 3 + usr/src/uts/common/inet/tcp/tcp_opt_data.c | 100 +++++++++++++++++++++++++++++ usr/src/uts/common/inet/tcp/tcp_timers.c | 10 ++- usr/src/uts/common/netinet/tcp.h | 4 ++ 6 files changed, 150 insertions(+), 10 deletions(-) diff --git a/usr/src/man/man7p/tcp.7p b/usr/src/man/man7p/tcp.7p index 578bc2d474..5e1a6e8a8f 100644 --- a/usr/src/man/man7p/tcp.7p +++ b/usr/src/man/man7p/tcp.7p @@ -1,5 +1,6 @@ '\" te .\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. .\" Copyright 1989 AT&T .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. @@ -344,6 +345,14 @@ integer in milliseconds. The value zero indicates that TCP should never time out and abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval. The default is eight minutes. +.sp +.LP +socket options TCP_KEEPIDLE, TCP_KEEPCNT and TCP_KEEPINTVL are also supported +for compatibility with other Unix Flavors. TCP_KEEPIDLE option specifies the +interval in seconds for sending out the first keep-alive probe. TCP_KEEPCNT +specifies the number of keep-alive probes to be sent before aborting the +connection in the event of no response from peer. TCP_KEEPINTVL specifies the +interval in seconds between successive keep-alive probes. .SH SEE ALSO .sp .LP @@ -385,7 +394,7 @@ A socket operation may fail if: \fB\fBEISCONN\fR\fR .ad .RS 17n -.rt +.rt A \fBconnect()\fR operation was attempted on a socket on which a \fBconnect()\fR operation had already been performed. .RE @@ -397,7 +406,7 @@ A \fBconnect()\fR operation was attempted on a socket on which a \fB\fBETIMEDOUT\fR\fR .ad .RS 17n -.rt +.rt A connection was dropped due to excessive retransmissions. .RE @@ -408,7 +417,7 @@ A connection was dropped due to excessive retransmissions. \fB\fBECONNRESET\fR\fR .ad .RS 17n -.rt +.rt The remote peer forced the connection to be closed (usually because the remote machine has lost state information about the connection due to a crash). .RE @@ -420,7 +429,7 @@ machine has lost state information about the connection due to a crash). \fB\fBECONNREFUSED\fR\fR .ad .RS 17n -.rt +.rt The remote peer actively refused connection establishment (usually because no process is listening to the port). .RE @@ -432,7 +441,7 @@ process is listening to the port). \fB\fBEADDRINUSE\fR\fR .ad .RS 17n -.rt +.rt A \fBbind()\fR operation was attempted on a socket with a network address/port pair that has already been bound to another socket. .RE @@ -444,7 +453,7 @@ pair that has already been bound to another socket. \fB\fBEADDRNOTAVAIL\fR\fR .ad .RS 17n -.rt +.rt A \fBbind()\fR operation was attempted on a socket with a network address for which no network interface exists. .RE @@ -456,7 +465,7 @@ which no network interface exists. \fB\fBEACCES\fR\fR .ad .RS 17n -.rt +.rt A \fBbind()\fR operation was attempted with a "reserved" port number and the effective user \fBID\fR of the process was not the privileged user. .RE @@ -468,7 +477,7 @@ effective user \fBID\fR of the process was not the privileged user. \fB\fBENOBUFS\fR\fR .ad .RS 17n -.rt +.rt The system ran out of memory for internal data structures. .RE diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index 460f183884..d95f2559c6 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -334,11 +335,26 @@ typedef struct tcp_s { } tcp_conn; uint32_t tcp_syn_rcvd_timeout; /* How many SYN_RCVD timeout in q0 */ - /* TCP Keepalive Timer members */ + /* + * TCP Keepalive Timer members. + * All keepalive timer intervals are in milliseconds. + */ int32_t tcp_ka_last_intrvl; /* Last probe interval */ timeout_id_t tcp_ka_tid; /* Keepalive timer ID */ uint32_t tcp_ka_interval; /* Keepalive interval */ + + /* + * TCP connection is terminated if we don't hear back from the peer + * for tcp_ka_abort_thres milliseconds after the first keepalive probe. + * tcp_ka_rinterval is the interval in milliseconds between successive + * keepalive probes. tcp_ka_cnt is the number of keepalive probes to + * be sent before terminating the connection, if we don't hear back from + * peer. + * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt + */ + uint32_t tcp_ka_rinterval; /* keepalive retransmit interval */ uint32_t tcp_ka_abort_thres; /* Keepalive abort threshold */ + uint32_t tcp_ka_cnt; /* count of keepalive probes */ int32_t tcp_client_errno; /* How the client screwed up */ diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 8d3dacf35b..0734468ea0 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -22,6 +22,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, Joyent Inc. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -2354,6 +2355,8 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent) tcp->tcp_ka_interval = tcps->tcps_keepalive_interval; tcp->tcp_ka_abort_thres = tcps->tcps_keepalive_abort_interval; + tcp->tcp_ka_cnt = 0; + tcp->tcp_ka_rinterval = 0; /* * Default value of tcp_init_cwnd is 0, so no need to set here diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c index c1614463c2..960c3d8902 100644 --- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c +++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ #include @@ -117,6 +118,12 @@ opdes_t tcp_opt_arr[] = { { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, + +{ TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, + +{ TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, + { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, @@ -403,6 +410,25 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) case TCP_KEEPALIVE_THRESHOLD: *i1 = tcp->tcp_ka_interval; return (sizeof (int)); + + /* + * TCP_KEEPIDLE expects value in seconds, but + * tcp_ka_interval is in milliseconds. + */ + case TCP_KEEPIDLE: + *i1 = tcp->tcp_ka_interval / 1000; + return (sizeof (int)); + case TCP_KEEPCNT: + *i1 = tcp->tcp_ka_cnt; + return (sizeof (int)); + + /* + * TCP_KEEPINTVL expects value in seconds, but + * tcp_ka_rinterval is in milliseconds. + */ + case TCP_KEEPINTVL: + *i1 = tcp->tcp_ka_rinterval / 1000; + return (sizeof (int)); case TCP_KEEPALIVE_ABORT_THRESHOLD: *i1 = tcp->tcp_ka_abort_thres; return (sizeof (int)); @@ -682,6 +708,18 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, } tcp->tcp_init_cwnd = val; break; + + /* + * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD + * is in milliseconds. TCP_KEEPIDLE is introduced for + * compatibility with other Unix flavors. + * We can fall through TCP_KEEPALIVE_THRESHOLD logic after + * converting the input to milliseconds. + */ + case TCP_KEEPIDLE: + *i1 *= 1000; + /* fall through */ + case TCP_KEEPALIVE_THRESHOLD: if (checkonly) break; @@ -708,6 +746,66 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, } } break; + + /* + * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt. + * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the + * three members - tcp_ka_abort_thres, tcp_ka_rinterval and + * tcp_ka_cnt. + */ + case TCP_KEEPCNT: + if (checkonly) + break; + + if (*i1 == 0) { + return (EINVAL); + } else if (tcp->tcp_ka_rinterval == 0) { + if ((tcp->tcp_ka_abort_thres / *i1) < + tcp->tcp_rto_min || + (tcp->tcp_ka_abort_thres / *i1) > + tcp->tcp_rto_max) + return (EINVAL); + + tcp->tcp_ka_rinterval = + tcp->tcp_ka_abort_thres / *i1; + } else { + if ((*i1 * tcp->tcp_ka_rinterval) < + tcps->tcps_keepalive_abort_interval_low || + (*i1 * tcp->tcp_ka_rinterval) > + tcps->tcps_keepalive_abort_interval_high) + return (EINVAL); + tcp->tcp_ka_abort_thres = + (*i1 * tcp->tcp_ka_rinterval); + } + tcp->tcp_ka_cnt = *i1; + break; + case TCP_KEEPINTVL: + /* + * TCP_KEEPINTVL is specified in seconds, but + * tcp_ka_rinterval is in milliseconds. + */ + + if (checkonly) + break; + + if ((*i1 * 1000) < tcp->tcp_rto_min || + (*i1 * 1000) > tcp->tcp_rto_max) + return (EINVAL); + + if (tcp->tcp_ka_cnt == 0) { + tcp->tcp_ka_cnt = + tcp->tcp_ka_abort_thres / (*i1 * 1000); + } else { + if ((*i1 * tcp->tcp_ka_cnt * 1000) < + tcps->tcps_keepalive_abort_interval_low || + (*i1 * tcp->tcp_ka_cnt * 1000) > + tcps->tcps_keepalive_abort_interval_high) + return (EINVAL); + tcp->tcp_ka_abort_thres = + (*i1 * tcp->tcp_ka_cnt * 1000); + } + tcp->tcp_ka_rinterval = *i1 * 1000; + break; case TCP_KEEPALIVE_ABORT_THRESHOLD: if (!checkonly) { if (*i1 < @@ -718,6 +816,8 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, return (EINVAL); } tcp->tcp_ka_abort_thres = *i1; + tcp->tcp_ka_cnt = 0; + tcp->tcp_ka_rinterval = 0; } break; case TCP_CORK: diff --git a/usr/src/uts/common/inet/tcp/tcp_timers.c b/usr/src/uts/common/inet/tcp/tcp_timers.c index c883be8cfd..90e1c9178c 100644 --- a/usr/src/uts/common/inet/tcp/tcp_timers.c +++ b/usr/src/uts/common/inet/tcp/tcp_timers.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ #include @@ -390,6 +391,11 @@ tcp_timers_stop(tcp_t *tcp) * (tcp_ka_interval + tcp_ka_abort_thres) we have not heard anything, * kill the connection unless the keepalive abort threshold is 0. In * that case, we will probe "forever." + * If tcp_ka_cnt and tcp_ka_rinterval are non-zero, then we do not follow + * the exponential backoff, but send probes tcp_ka_cnt times in regular + * intervals of tcp_ka_rinterval milliseconds until we hear back from peer. + * Kill the connection if we don't hear back from peer after tcp_ka_cnt + * probes are sent. */ void tcp_keepalive_timer(void *arg) @@ -455,7 +461,9 @@ tcp_keepalive_timer(void *arg) if (mp != NULL) { tcp_send_data(tcp, mp); TCPS_BUMP_MIB(tcps, tcpTimKeepaliveProbe); - if (tcp->tcp_ka_last_intrvl != 0) { + if (tcp->tcp_ka_rinterval) { + firetime = tcp->tcp_ka_rinterval; + } else if (tcp->tcp_ka_last_intrvl != 0) { int max; /* * We should probe again at least diff --git a/usr/src/uts/common/netinet/tcp.h b/usr/src/uts/common/netinet/tcp.h index 9a08545ab7..f6c2fc160b 100644 --- a/usr/src/uts/common/netinet/tcp.h +++ b/usr/src/uts/common/netinet/tcp.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ /* @@ -125,6 +126,9 @@ struct tcphdr { /* gap for expansion of ``standard'' options */ #define TCP_ANONPRIVBIND 0x20 /* for internal use only */ #define TCP_EXCLBIND 0x21 /* for internal use only */ +#define TCP_KEEPIDLE 0x22 +#define TCP_KEEPCNT 0x23 +#define TCP_KEEPINTVL 0x24 #ifdef __cplusplus } -- 2.11.4.GIT