From 915cb3b310e9b3a87ab2b352b22f57c23b34f41d Mon Sep 17 00:00:00 2001 From: Lauri Tirkkonen Date: Wed, 26 Apr 2017 12:27:14 +0300 Subject: [PATCH] tcp: remove _strong_iss tunable Instead, use random ISS on incoming connections and follow RFC6528 for outgoing connections. This logic is borrowed from OpenBSD -- the initial sequence numbers don't need to be increasing across connections except for the case where the same connection-id tuple is still in TIME_WAIT on the listener, so RFC6528 is only necessary for outgoing connections. The implementation is not that pretty: we use the listener tcp's tcp_iss to pass the new ISS along to tcp_input_listener from the SYN received while in TIME_WAIT. Previously this used to modify the tcp stack's tcps_iss_incr_extra value though, so perhaps it's at least not worse. --- include/inet/tcp_impl.h | 79 +++++++++++++++----------------------- kernel/net/tcp/tcp.c | 26 ++----------- kernel/net/tcp/tcp_input.c | 11 ++++++ kernel/net/tcp/tcp_time_wait.c | 71 +++++++--------------------------- kernel/net/tcp/tcp_tunables.c | 4 -- usr/src/cmd/svc/milestone/net-init | 11 ------ 6 files changed, 60 insertions(+), 142 deletions(-) diff --git a/include/inet/tcp_impl.h b/include/inet/tcp_impl.h index 7d72e1c619..289b3c390a 100644 --- a/include/inet/tcp_impl.h +++ b/include/inet/tcp_impl.h @@ -208,25 +208,11 @@ typedef struct tcp_squeue_priv_s { } tcp_squeue_priv_t; /* - * Parameters for TCP Initial Send Sequence number (ISS) generation. When - * tcp_strong_iss is set to 1, which is the default, the ISS is calculated - * by adding three components: a time component which grows by 1 every 4096 - * nanoseconds (versus every 4 microseconds suggested by RFC 793, page 27); - * a per-connection component which grows by 125000 for every new connection; - * and an "extra" component that grows by a random amount centered - * approximately on 64000. This causes the ISS generator to cycle every - * 4.89 hours if no TCP connections are made, and faster if connections are - * made. - * - * When tcp_strong_iss is set to 0, ISS is calculated by adding two - * components: a time component which grows by 250000 every second; and - * a per-connection component which grows by 125000 for every new connections. - * - * A third method, when tcp_strong_iss is set to 2, for generating ISS is - * prescribed by Steve Bellovin. This involves adding time, the 125000 per - * connection, and a one-way hash (MD5) of the connection ID , a "truly" random (per RFC 1750) number, and a console-entered - * password. + * Parameters for TCP Initial Send Sequence number (ISS) generation. The ISS + * for outgoing connections is calculated as proposed in RFC 6528 section 3, + * ie. by adding two components: a time component which grows by 1 every 4096 + * nanoseconds and a one-way hash (MD5) of + * where secret is a true random number (RFC 4086). */ #define ISS_INCR 250000 #define ISS_NSEC_SHT 12 @@ -542,37 +528,36 @@ extern uint32_t tcp_early_abort; #define tcps_fin_wait_2_flush_interval_low \ tcps_propinfo_tbl[33].prop_min_uval #define tcps_max_buf tcps_propinfo_tbl[34].prop_cur_uval -#define tcps_strong_iss tcps_propinfo_tbl[35].prop_cur_uval -#define tcps_rtt_updates tcps_propinfo_tbl[36].prop_cur_uval -#define tcps_wscale_always tcps_propinfo_tbl[37].prop_cur_bval -#define tcps_tstamp_always tcps_propinfo_tbl[38].prop_cur_bval -#define tcps_tstamp_if_wscale tcps_propinfo_tbl[39].prop_cur_bval -#define tcps_rexmit_interval_extra tcps_propinfo_tbl[40].prop_cur_uval -#define tcps_deferred_acks_max tcps_propinfo_tbl[41].prop_cur_uval -#define tcps_slow_start_after_idle tcps_propinfo_tbl[42].prop_cur_uval -#define tcps_slow_start_initial tcps_propinfo_tbl[43].prop_cur_uval -#define tcps_sack_permitted tcps_propinfo_tbl[44].prop_cur_uval -#define tcps_ipv6_hoplimit tcps_propinfo_tbl[45].prop_cur_uval -#define tcps_mss_def_ipv6 tcps_propinfo_tbl[46].prop_cur_uval -#define tcps_mss_max_ipv6 tcps_propinfo_tbl[47].prop_cur_uval -#define tcps_rev_src_routes tcps_propinfo_tbl[48].prop_cur_bval -#define tcps_local_dack_interval tcps_propinfo_tbl[49].prop_cur_uval -#define tcps_local_dacks_max tcps_propinfo_tbl[50].prop_cur_uval -#define tcps_ecn_permitted tcps_propinfo_tbl[51].prop_cur_uval -#define tcps_rst_sent_rate_enabled tcps_propinfo_tbl[52].prop_cur_bval -#define tcps_rst_sent_rate tcps_propinfo_tbl[53].prop_cur_uval -#define tcps_push_timer_interval tcps_propinfo_tbl[54].prop_cur_uval -#define tcps_use_smss_as_mss_opt tcps_propinfo_tbl[55].prop_cur_bval +#define tcps_rtt_updates tcps_propinfo_tbl[35].prop_cur_uval +#define tcps_wscale_always tcps_propinfo_tbl[36].prop_cur_bval +#define tcps_tstamp_always tcps_propinfo_tbl[37].prop_cur_bval +#define tcps_tstamp_if_wscale tcps_propinfo_tbl[38].prop_cur_bval +#define tcps_rexmit_interval_extra tcps_propinfo_tbl[39].prop_cur_uval +#define tcps_deferred_acks_max tcps_propinfo_tbl[40].prop_cur_uval +#define tcps_slow_start_after_idle tcps_propinfo_tbl[41].prop_cur_uval +#define tcps_slow_start_initial tcps_propinfo_tbl[42].prop_cur_uval +#define tcps_sack_permitted tcps_propinfo_tbl[43].prop_cur_uval +#define tcps_ipv6_hoplimit tcps_propinfo_tbl[44].prop_cur_uval +#define tcps_mss_def_ipv6 tcps_propinfo_tbl[45].prop_cur_uval +#define tcps_mss_max_ipv6 tcps_propinfo_tbl[46].prop_cur_uval +#define tcps_rev_src_routes tcps_propinfo_tbl[47].prop_cur_bval +#define tcps_local_dack_interval tcps_propinfo_tbl[48].prop_cur_uval +#define tcps_local_dacks_max tcps_propinfo_tbl[49].prop_cur_uval +#define tcps_ecn_permitted tcps_propinfo_tbl[50].prop_cur_uval +#define tcps_rst_sent_rate_enabled tcps_propinfo_tbl[51].prop_cur_bval +#define tcps_rst_sent_rate tcps_propinfo_tbl[52].prop_cur_uval +#define tcps_push_timer_interval tcps_propinfo_tbl[53].prop_cur_uval +#define tcps_use_smss_as_mss_opt tcps_propinfo_tbl[54].prop_cur_bval #define tcps_keepalive_abort_interval_high \ - tcps_propinfo_tbl[56].prop_max_uval + tcps_propinfo_tbl[55].prop_max_uval #define tcps_keepalive_abort_interval \ - tcps_propinfo_tbl[56].prop_cur_uval + tcps_propinfo_tbl[55].prop_cur_uval #define tcps_keepalive_abort_interval_low \ - tcps_propinfo_tbl[56].prop_min_uval -#define tcps_wroff_xtra tcps_propinfo_tbl[57].prop_cur_uval -#define tcps_dev_flow_ctl tcps_propinfo_tbl[58].prop_cur_bval -#define tcps_reass_timeout tcps_propinfo_tbl[59].prop_cur_uval -#define tcps_iss_incr tcps_propinfo_tbl[64].prop_cur_uval + tcps_propinfo_tbl[55].prop_min_uval +#define tcps_wroff_xtra tcps_propinfo_tbl[56].prop_cur_uval +#define tcps_dev_flow_ctl tcps_propinfo_tbl[57].prop_cur_bval +#define tcps_reass_timeout tcps_propinfo_tbl[58].prop_cur_uval +#define tcps_iss_incr tcps_propinfo_tbl[63].prop_cur_uval extern struct qinit tcp_rinitv4, tcp_rinitv6; extern boolean_t do_tcp_fusion; diff --git a/kernel/net/tcp/tcp.c b/kernel/net/tcp/tcp.c index 7715863298..90e1198908 100644 --- a/kernel/net/tcp/tcp.c +++ b/kernel/net/tcp/tcp.c @@ -3828,11 +3828,6 @@ tcp_stack_fini(netstackid_t stackid, void *arg) kmem_free(tcps, sizeof (*tcps)); } -/* - * Generate ISS, taking into account NDD changes may happen halfway through. - * (If the iss is not zero, set it.) - */ - static void tcp_iss_init(tcp_t *tcp) { @@ -3842,10 +3837,10 @@ tcp_iss_init(tcp_t *tcp) tcp_stack_t *tcps = tcp->tcp_tcps; conn_t *connp = tcp->tcp_connp; - tcps->tcps_iss_incr_extra += (tcps->tcps_iss_incr >> 1); - tcp->tcp_iss = tcps->tcps_iss_incr_extra; - switch (tcps->tcps_strong_iss) { - case 2: + /* tcp_iss may already have been set in tcp_input_listener */ + if (!tcp->tcp_iss) { + tcps->tcps_iss_incr_extra += (tcps->tcps_iss_incr >> 1); + tcp->tcp_iss = tcps->tcps_iss_incr_extra; context = tcps->tcps_iss_key; arg.ports = connp->conn_ports; arg.src = connp->conn_laddr_v6; @@ -3853,19 +3848,6 @@ tcp_iss_init(tcp_t *tcp) MD5Update(&context, (uchar_t *)&arg, sizeof (arg)); MD5Final((uchar_t *)answer, &context); tcp->tcp_iss += answer[0] ^ answer[1] ^ answer[2] ^ answer[3]; - /* - * Now that we've hashed into a unique per-connection sequence - * space, add a random increment per strong_iss == 1. So I - * guess we'll have to... - */ - /* FALLTHRU */ - case 1: - tcp->tcp_iss += (gethrtime() >> ISS_NSEC_SHT) + tcp_random(); - break; - default: - tcp->tcp_iss += (uint32_t)gethrestime_sec() * - tcps->tcps_iss_incr; - break; } tcp->tcp_valid_bits = TCP_ISS_VALID; tcp->tcp_fss = tcp->tcp_iss - 1; diff --git a/kernel/net/tcp/tcp_input.c b/kernel/net/tcp/tcp_input.c index d747fb5034..6620501ad9 100644 --- a/kernel/net/tcp/tcp_input.c +++ b/kernel/net/tcp/tcp_input.c @@ -1444,6 +1444,17 @@ tcp_input_listener(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) * the listener. */ tcp_init_values(eager, listener); + /* + * If listener's tcp_iss is set, the new connection replaces an old + * TIME_WAIT connection and should use the ISS set in tcp_time_wait.c. + */ + if (listener->tcp_iss) { + eager->tcp_iss = listener->tcp_iss; + listener->tcp_iss = 0; + } else { + random_get_pseudo_bytes(&eager->tcp_iss, + sizeof(eager->tcp_iss)); + } ASSERT((econnp->conn_ixa->ixa_flags & (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | diff --git a/kernel/net/tcp/tcp_time_wait.c b/kernel/net/tcp/tcp_time_wait.c index 56f3505486..404ffd92ec 100644 --- a/kernel/net/tcp/tcp_time_wait.c +++ b/kernel/net/tcp/tcp_time_wait.c @@ -677,65 +677,8 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, } if ((flags & TH_SYN) && gap > 0 && rgap < 0) { - /* - * Make sure that when we accept the connection, pick - * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the - * old connection. - * - * The next ISS generated is equal to tcp_iss_incr_extra - * + tcp_iss_incr/2 + other components depending on the - * value of tcp_strong_iss. We pre-calculate the new - * ISS here and compare with tcp_snxt to determine if - * we need to make adjustment to tcp_iss_incr_extra. - * - * The above calculation is ugly and is a - * waste of CPU cycles... - */ - uint32_t new_iss = tcps->tcps_iss_incr_extra; - int32_t adj; ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; - - switch (tcps->tcps_strong_iss) { - case 2: { - /* Add time and MD5 components. */ - uint32_t answer[4]; - struct { - uint32_t ports; - in6_addr_t src; - in6_addr_t dst; - } arg; - MD5_CTX context; - - context = tcps->tcps_iss_key; - arg.ports = connp->conn_ports; - /* We use MAPPED addresses in tcp_iss_init */ - arg.src = connp->conn_laddr_v6; - arg.dst = connp->conn_faddr_v6; - MD5Update(&context, (uchar_t *)&arg, - sizeof (arg)); - MD5Final((uchar_t *)answer, &context); - answer[0] ^= answer[1] ^ answer[2] ^ answer[3]; - new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0]; - break; - } - case 1: - /* Add time component and min random (i.e. 1). */ - new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1; - break; - default: - /* Add only time component. */ - new_iss += (uint32_t)gethrestime_sec() * - tcps->tcps_iss_incr; - break; - } - if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) { - /* - * New ISS not guaranteed to be tcp_iss_incr/2 - * ahead of the current tcp_snxt, so add the - * difference to tcp_iss_incr_extra. - */ - tcps->tcps_iss_incr_extra += adj; - } + uint32_t snxt = tcp->tcp_snxt; /* * If tcp_clean_death() can not perform the task now, * drop the SYN packet and let the other side re-xmit. @@ -746,6 +689,18 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, goto done; nconnp = ipcl_classify(mp, ira, ipst); if (nconnp != NULL) { + /* + * Make sure that when we accept the connection, the + * new ISS is greater than tcp_snxt by at least 32768, + * but clear the MSB so that SEQ_LT(tcp_snxt, iss). + */ + uint32_t new_iss; + random_get_pseudo_bytes(&new_iss, sizeof(new_iss)); + new_iss &= 0x7fffffff; + new_iss |= 0x8000; + new_iss += snxt; + /* tcp_input_listener will copy this to the new tcp */ + nconnp->conn_tcp->tcp_iss = new_iss; TCP_STAT(tcps, tcp_time_wait_syn_success); /* Drops ref on nconnp */ tcp_reinput(nconnp, mp, ira, ipst); diff --git a/kernel/net/tcp/tcp_tunables.c b/kernel/net/tcp/tcp_tunables.c index aad11cbea9..d7289b53be 100644 --- a/kernel/net/tcp/tcp_tunables.c +++ b/kernel/net/tcp/tcp_tunables.c @@ -378,10 +378,6 @@ mod_prop_info_t tcp_propinfo_tbl[] = { mod_set_uint32, mod_get_uint32, {8192, ULP_MAX_BUF, 1024*1024}, {1024*1024} }, - { "_strong_iss", MOD_PROTO_TCP, - mod_set_uint32, mod_get_uint32, - {0, 2, 2}, {2} }, - { "_rtt_updates", MOD_PROTO_TCP, mod_set_uint32, mod_get_uint32, {0, 65536, 20}, {20} }, diff --git a/usr/src/cmd/svc/milestone/net-init b/usr/src/cmd/svc/milestone/net-init index 05e0f6910c..513b1a1346 100644 --- a/usr/src/cmd/svc/milestone/net-init +++ b/usr/src/cmd/svc/milestone/net-init @@ -69,16 +69,5 @@ if [ -f /usr/sbin/sdpadm -a -f /etc/sdp.conf ]; then fi fi -# -# Set TCP ISS generation. By default the ISS generation is -# time + random()-delta. This might not be strong enough for some users. -# See /etc/default/inetinit for settings and further info on TCP_STRONG_ISS. -# If not set, use TCP's internal default setting. -# -[ -f /etc/default/inetinit ] && . /etc/default/inetinit -if [ -n $TCP_STRONG_ISS ]; then - /usr/sbin/ipadm set-prop -p _strong_iss=$TCP_STRONG_ISS tcp -fi - # Clear exit status. exit $SMF_EXIT_OK -- 2.11.4.GIT