tcp: remove _strong_iss tunable
[unleashed.git] / kernel / net / tcp / tcp_tunables.c
blobd7289b53be136614629b086fe217264e7e075eac
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
27 /* Copyright (c) 1990 Mentat Inc. */
29 #include <inet/ip.h>
30 #include <inet/tcp_impl.h>
31 #include <sys/multidata.h>
32 #include <sys/sunddi.h>
34 /* Max size IP datagram is 64k - 1 */
35 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
36 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
38 /* Max of the above */
39 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
42 * returns the current list of listener limit configuration.
44 /* ARGSUSED */
45 static int
46 tcp_listener_conf_get(netstack_t *stack, mod_prop_info_t *pinfo,
47 const char *ifname, void *val, uint_t psize, uint_t flags)
49 tcp_stack_t *tcps = stack->netstack_tcp;
50 tcp_listener_t *tl;
51 char *pval = val;
52 size_t nbytes = 0, tbytes = 0;
53 uint_t size;
54 int err = 0;
56 bzero(pval, psize);
57 size = psize;
59 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
60 return (0);
62 mutex_enter(&tcps->tcps_listener_conf_lock);
63 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
64 tl = list_next(&tcps->tcps_listener_conf, tl)) {
65 if (psize == size)
66 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port,
67 tl->tl_ratio);
68 else
69 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port,
70 tl->tl_ratio);
71 size -= nbytes;
72 pval += nbytes;
73 tbytes += nbytes;
74 if (tbytes >= psize) {
75 /* Buffer overflow, stop copying information */
76 err = ENOBUFS;
77 break;
81 mutex_exit(&tcps->tcps_listener_conf_lock);
82 return (err);
86 * add a new listener limit configuration.
88 /* ARGSUSED */
89 static int
90 tcp_listener_conf_add(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
91 const char *ifname, const void* pval, uint_t flags)
93 tcp_listener_t *new_tl;
94 tcp_listener_t *tl;
95 long lport;
96 long ratio;
97 char *colon;
98 tcp_stack_t *tcps = stack->netstack_tcp;
100 if (flags & MOD_PROP_DEFAULT)
101 return (ENOTSUP);
103 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
104 lport > USHRT_MAX || *colon != ':') {
105 return (EINVAL);
107 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
108 return (EINVAL);
110 mutex_enter(&tcps->tcps_listener_conf_lock);
111 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
112 tl = list_next(&tcps->tcps_listener_conf, tl)) {
113 /* There is an existing entry, so update its ratio value. */
114 if (tl->tl_port == lport) {
115 tl->tl_ratio = ratio;
116 mutex_exit(&tcps->tcps_listener_conf_lock);
117 return (0);
121 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
122 NULL) {
123 mutex_exit(&tcps->tcps_listener_conf_lock);
124 return (ENOMEM);
127 new_tl->tl_port = lport;
128 new_tl->tl_ratio = ratio;
129 list_insert_tail(&tcps->tcps_listener_conf, new_tl);
130 mutex_exit(&tcps->tcps_listener_conf_lock);
131 return (0);
135 * remove a listener limit configuration.
137 /* ARGSUSED */
138 static int
139 tcp_listener_conf_del(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
140 const char *ifname, const void* pval, uint_t flags)
142 tcp_listener_t *tl;
143 long lport;
144 tcp_stack_t *tcps = stack->netstack_tcp;
146 if (flags & MOD_PROP_DEFAULT)
147 return (ENOTSUP);
149 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
150 lport > USHRT_MAX) {
151 return (EINVAL);
153 mutex_enter(&tcps->tcps_listener_conf_lock);
154 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
155 tl = list_next(&tcps->tcps_listener_conf, tl)) {
156 if (tl->tl_port == lport) {
157 list_remove(&tcps->tcps_listener_conf, tl);
158 mutex_exit(&tcps->tcps_listener_conf_lock);
159 kmem_free(tl, sizeof (tcp_listener_t));
160 return (0);
163 mutex_exit(&tcps->tcps_listener_conf_lock);
164 return (ESRCH);
167 static int
168 tcp_set_buf_prop(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
169 const char *ifname, const void *pval, uint_t flags)
171 return (mod_set_buf_prop(stack->netstack_tcp->tcps_propinfo_tbl, stack,
172 cr, pinfo, ifname, pval, flags));
175 static int
176 tcp_get_buf_prop(netstack_t *stack, mod_prop_info_t *pinfo, const char *ifname,
177 void *val, uint_t psize, uint_t flags)
179 return (mod_get_buf_prop(stack->netstack_tcp->tcps_propinfo_tbl, stack,
180 pinfo, ifname, val, psize, flags));
184 * Special checkers for smallest/largest anonymous port so they don't
185 * ever happen to be (largest < smallest).
187 /* ARGSUSED */
188 static int
189 tcp_smallest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
190 const char *ifname, const void *pval, uint_t flags)
192 unsigned long new_value;
193 tcp_stack_t *tcps = stack->netstack_tcp;
194 int err;
196 if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
197 return (err);
198 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
199 if ((uint32_t)new_value > tcps->tcps_largest_anon_port)
200 return (ERANGE);
201 pinfo->prop_cur_uval = (uint32_t)new_value;
202 return (0);
205 /* ARGSUSED */
206 static int
207 tcp_largest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
208 const char *ifname, const void *pval, uint_t flags)
210 unsigned long new_value;
211 tcp_stack_t *tcps = stack->netstack_tcp;
212 int err;
214 if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
215 return (err);
216 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
217 if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
218 return (ERANGE);
219 pinfo->prop_cur_uval = (uint32_t)new_value;
220 return (0);
224 * All of these are alterable, within the min/max values given, at run time.
226 * Note: All those tunables which do not start with "_" are Committed and
227 * therefore are public. See PSARC 2010/080.
229 mod_prop_info_t tcp_propinfo_tbl[] = {
230 /* tunable - 0 */
231 { "_time_wait_interval", MOD_PROTO_TCP,
232 mod_set_uint32, mod_get_uint32,
233 {1*SECONDS, TCP_TIME_WAIT_MAX, 1*MINUTES}, {1*MINUTES} },
235 { "_conn_req_max_q", MOD_PROTO_TCP,
236 mod_set_uint32, mod_get_uint32,
237 {1, UINT32_MAX, 128}, {128} },
239 { "_conn_req_max_q0", MOD_PROTO_TCP,
240 mod_set_uint32, mod_get_uint32,
241 {0, UINT32_MAX, 1024}, {1024} },
243 { "_conn_req_min", MOD_PROTO_TCP,
244 mod_set_uint32, mod_get_uint32,
245 {1, 1024, 1}, {1} },
247 { "_conn_grace_period", MOD_PROTO_TCP,
248 mod_set_uint32, mod_get_uint32,
249 {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
251 { "_cwnd_max", MOD_PROTO_TCP,
252 mod_set_uint32, mod_get_uint32,
253 {128, ULP_MAX_BUF, 1024*1024}, {1024*1024} },
255 { "_debug", MOD_PROTO_TCP,
256 mod_set_uint32, mod_get_uint32,
257 {0, 10, 0}, {0} },
259 { "smallest_nonpriv_port", MOD_PROTO_TCP,
260 mod_set_uint32, mod_get_uint32,
261 {1024, (32*1024), 1024}, {1024} },
263 { "_ip_abort_cinterval", MOD_PROTO_TCP,
264 mod_set_uint32, mod_get_uint32,
265 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
267 { "_ip_abort_linterval", MOD_PROTO_TCP,
268 mod_set_uint32, mod_get_uint32,
269 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
271 /* tunable - 10 */
272 { "_ip_abort_interval", MOD_PROTO_TCP,
273 mod_set_uint32, mod_get_uint32,
274 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
276 { "_ip_notify_cinterval", MOD_PROTO_TCP,
277 mod_set_uint32, mod_get_uint32,
278 {1*SECONDS, UINT32_MAX, 10*SECONDS},
279 {10*SECONDS} },
281 { "_ip_notify_interval", MOD_PROTO_TCP,
282 mod_set_uint32, mod_get_uint32,
283 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
285 { "_ipv4_ttl", MOD_PROTO_TCP,
286 mod_set_uint32, mod_get_uint32,
287 {1, 255, 64}, {64} },
289 { "_keepalive_interval", MOD_PROTO_TCP,
290 mod_set_uint32, mod_get_uint32,
291 {1*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
293 { "_maxpsz_multiplier", MOD_PROTO_TCP,
294 mod_set_uint32, mod_get_uint32,
295 {0, 100, 10}, {10} },
297 { "_mss_def_ipv4", MOD_PROTO_TCP,
298 mod_set_uint32, mod_get_uint32,
299 {1, TCP_MSS_MAX_IPV4, 536}, {536} },
301 { "_mss_max_ipv4", MOD_PROTO_TCP,
302 mod_set_uint32, mod_get_uint32,
303 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
304 {TCP_MSS_MAX_IPV4} },
306 { "_mss_min", MOD_PROTO_TCP,
307 mod_set_uint32, mod_get_uint32,
308 {1, TCP_MSS_MAX, 108}, {108} },
310 { "_naglim_def", MOD_PROTO_TCP,
311 mod_set_uint32, mod_get_uint32,
312 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
314 /* tunable - 20 */
315 { "_rexmit_interval_initial", MOD_PROTO_TCP,
316 mod_set_uint32, mod_get_uint32,
317 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
319 { "_rexmit_interval_max", MOD_PROTO_TCP,
320 mod_set_uint32, mod_get_uint32,
321 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
323 { "_rexmit_interval_min", MOD_PROTO_TCP,
324 mod_set_uint32, mod_get_uint32,
325 {1*MS, 2*HOURS, 400*MS}, {400*MS} },
327 { "_deferred_ack_interval", MOD_PROTO_TCP,
328 mod_set_uint32, mod_get_uint32,
329 {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
331 { "_snd_lowat_fraction", MOD_PROTO_TCP,
332 mod_set_uint32, mod_get_uint32,
333 {0, 16, 10}, {10} },
335 { "dupack_fast_retrans", MOD_PROTO_TCP,
336 mod_set_uint32, mod_get_uint32,
337 {1, 10000, 3}, {3} },
339 { "_ignore_path_mtu", MOD_PROTO_TCP,
340 mod_set_boolean, mod_get_boolean,
341 {B_FALSE}, {B_FALSE} },
343 { "smallest_anon_port", MOD_PROTO_TCP,
344 tcp_smallest_anon_set, mod_get_uint32,
345 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
347 { "largest_anon_port", MOD_PROTO_TCP,
348 tcp_largest_anon_set, mod_get_uint32,
349 {1024, ULP_MAX_PORT, ULP_MAX_PORT},
350 {ULP_MAX_PORT} },
352 { "send_buf", MOD_PROTO_TCP,
353 tcp_set_buf_prop, tcp_get_buf_prop,
354 {TCP_XMIT_LOWATER, ULP_MAX_BUF, TCP_XMIT_HIWATER},
355 {TCP_XMIT_HIWATER} },
357 /* tunable - 30 */
358 { "_xmit_lowat", MOD_PROTO_TCP,
359 mod_set_uint32, mod_get_uint32,
360 {TCP_XMIT_LOWATER, ULP_MAX_BUF, TCP_XMIT_LOWATER},
361 {TCP_XMIT_LOWATER} },
363 { "recv_buf", MOD_PROTO_TCP,
364 tcp_set_buf_prop, tcp_get_buf_prop,
365 {TCP_RECV_LOWATER, ULP_MAX_BUF, TCP_RECV_HIWATER},
366 {TCP_RECV_HIWATER} },
368 { "_recv_hiwat_minmss", MOD_PROTO_TCP,
369 mod_set_uint32, mod_get_uint32,
370 {1, 65536, 4}, {4} },
372 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
373 mod_set_uint32, mod_get_uint32,
374 {1*SECONDS, 2*HOURS, 60*SECONDS},
375 {60*SECONDS} },
377 { "max_buf", MOD_PROTO_TCP,
378 mod_set_uint32, mod_get_uint32,
379 {8192, ULP_MAX_BUF, 1024*1024}, {1024*1024} },
381 { "_rtt_updates", MOD_PROTO_TCP,
382 mod_set_uint32, mod_get_uint32,
383 {0, 65536, 20}, {20} },
385 { "_wscale_always", MOD_PROTO_TCP,
386 mod_set_boolean, mod_get_boolean,
387 {B_TRUE}, {B_TRUE} },
389 { "_tstamp_always", MOD_PROTO_TCP,
390 mod_set_boolean, mod_get_boolean,
391 {B_FALSE}, {B_FALSE} },
393 { "_tstamp_if_wscale", MOD_PROTO_TCP,
394 mod_set_boolean, mod_get_boolean,
395 {B_TRUE}, {B_TRUE} },
397 /* tunable - 40 */
398 { "_rexmit_interval_extra", MOD_PROTO_TCP,
399 mod_set_uint32, mod_get_uint32,
400 {0*MS, 2*HOURS, 0*MS}, {0*MS} },
402 { "_deferred_acks_max", MOD_PROTO_TCP,
403 mod_set_uint32, mod_get_uint32,
404 {0, 16, 2}, {2} },
406 { "_slow_start_after_idle", MOD_PROTO_TCP,
407 mod_set_uint32, mod_get_uint32,
408 {0, 16384, 0}, {0} },
410 { "_slow_start_initial", MOD_PROTO_TCP,
411 mod_set_uint32, mod_get_uint32,
412 {0, 16, 0}, {0} },
414 { "sack", MOD_PROTO_TCP,
415 mod_set_uint32, mod_get_uint32,
416 {0, 2, 2}, {2} },
418 { "_ipv6_hoplimit", MOD_PROTO_TCP,
419 mod_set_uint32, mod_get_uint32,
420 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
421 {IPV6_DEFAULT_HOPS} },
423 { "_mss_def_ipv6", MOD_PROTO_TCP,
424 mod_set_uint32, mod_get_uint32,
425 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
427 { "_mss_max_ipv6", MOD_PROTO_TCP,
428 mod_set_uint32, mod_get_uint32,
429 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
430 {TCP_MSS_MAX_IPV6} },
432 { "_rev_src_routes", MOD_PROTO_TCP,
433 mod_set_boolean, mod_get_boolean,
434 {B_FALSE}, {B_FALSE} },
436 { "_local_dack_interval", MOD_PROTO_TCP,
437 mod_set_uint32, mod_get_uint32,
438 {10*MS, 500*MS, 50*MS}, {50*MS} },
440 /* tunable - 50 */
441 { "_local_dacks_max", MOD_PROTO_TCP,
442 mod_set_uint32, mod_get_uint32,
443 {0, 16, 8}, {8} },
445 { "ecn", MOD_PROTO_TCP,
446 mod_set_uint32, mod_get_uint32,
447 {0, 2, 1}, {1} },
449 { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
450 mod_set_boolean, mod_get_boolean,
451 {B_TRUE}, {B_TRUE} },
453 { "_rst_sent_rate", MOD_PROTO_TCP,
454 mod_set_uint32, mod_get_uint32,
455 {0, UINT32_MAX, 40}, {40} },
457 { "_push_timer_interval", MOD_PROTO_TCP,
458 mod_set_uint32, mod_get_uint32,
459 {0, 100*MS, 50*MS}, {50*MS} },
461 { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
462 mod_set_boolean, mod_get_boolean,
463 {B_FALSE}, {B_FALSE} },
465 { "_keepalive_abort_interval", MOD_PROTO_TCP,
466 mod_set_uint32, mod_get_uint32,
467 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
470 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
471 * layer header. It has to be a multiple of 8.
473 { "_wroff_xtra", MOD_PROTO_TCP,
474 mod_set_aligned, mod_get_uint32,
475 {0, 256, 32}, {32} },
477 { "_dev_flow_ctl", MOD_PROTO_TCP,
478 mod_set_boolean, mod_get_boolean,
479 {B_FALSE}, {B_FALSE} },
481 { "_reass_timeout", MOD_PROTO_TCP,
482 mod_set_uint32, mod_get_uint32,
483 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
485 /* tunable - 60 */
486 { "extra_priv_ports", MOD_PROTO_TCP,
487 mod_set_extra_privports, mod_get_extra_privports,
488 {1, ULP_MAX_PORT, 0}, {0} },
490 { "_listener_limit_conf", MOD_PROTO_TCP,
491 NULL, tcp_listener_conf_get, {0}, {0} },
493 { "_listener_limit_conf_add", MOD_PROTO_TCP,
494 tcp_listener_conf_add, NULL, {0}, {0} },
496 { "_listener_limit_conf_del", MOD_PROTO_TCP,
497 tcp_listener_conf_del, NULL, {0}, {0} },
499 { "_iss_incr", MOD_PROTO_TCP,
500 mod_set_uint32, mod_get_uint32,
501 {1, ISS_INCR, ISS_INCR},
502 {ISS_INCR} },
504 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
506 { NULL, 0, NULL, NULL, {0}, {0} }
509 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);