4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
27 /* Copyright (c) 1990 Mentat Inc. */
30 #include <inet/tcp_impl.h>
31 #include <sys/multidata.h>
32 #include <sys/sunddi.h>
34 /* Max size IP datagram is 64k - 1 */
35 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
36 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
38 /* Max of the above */
39 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
42 * Set the RFC 1948 pass phrase
46 tcp_set_1948phrase(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
47 const char *ifname
, const void* pr_val
, uint_t flags
)
49 if (flags
& MOD_PROP_DEFAULT
)
53 * Basically, value contains a new pass phrase. Pass it along!
55 tcp_iss_key_init((uint8_t *)pr_val
, strlen(pr_val
),
61 * returns the current list of listener limit configuration.
65 tcp_listener_conf_get(netstack_t
*stack
, mod_prop_info_t
*pinfo
,
66 const char *ifname
, void *val
, uint_t psize
, uint_t flags
)
68 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
71 size_t nbytes
= 0, tbytes
= 0;
78 if (flags
& (MOD_PROP_DEFAULT
|MOD_PROP_PERM
|MOD_PROP_POSSIBLE
))
81 mutex_enter(&tcps
->tcps_listener_conf_lock
);
82 for (tl
= list_head(&tcps
->tcps_listener_conf
); tl
!= NULL
;
83 tl
= list_next(&tcps
->tcps_listener_conf
, tl
)) {
85 nbytes
= snprintf(pval
, size
, "%d:%d", tl
->tl_port
,
88 nbytes
= snprintf(pval
, size
, ",%d:%d", tl
->tl_port
,
93 if (tbytes
>= psize
) {
94 /* Buffer overflow, stop copying information */
100 mutex_exit(&tcps
->tcps_listener_conf_lock
);
105 * add a new listener limit configuration.
109 tcp_listener_conf_add(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
110 const char *ifname
, const void* pval
, uint_t flags
)
112 tcp_listener_t
*new_tl
;
117 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
119 if (flags
& MOD_PROP_DEFAULT
)
122 if (ddi_strtol(pval
, &colon
, 10, &lport
) != 0 || lport
<= 0 ||
123 lport
> USHRT_MAX
|| *colon
!= ':') {
126 if (ddi_strtol(colon
+ 1, NULL
, 10, &ratio
) != 0 || ratio
<= 0)
129 mutex_enter(&tcps
->tcps_listener_conf_lock
);
130 for (tl
= list_head(&tcps
->tcps_listener_conf
); tl
!= NULL
;
131 tl
= list_next(&tcps
->tcps_listener_conf
, tl
)) {
132 /* There is an existing entry, so update its ratio value. */
133 if (tl
->tl_port
== lport
) {
134 tl
->tl_ratio
= ratio
;
135 mutex_exit(&tcps
->tcps_listener_conf_lock
);
140 if ((new_tl
= kmem_alloc(sizeof (tcp_listener_t
), KM_NOSLEEP
)) ==
142 mutex_exit(&tcps
->tcps_listener_conf_lock
);
146 new_tl
->tl_port
= lport
;
147 new_tl
->tl_ratio
= ratio
;
148 list_insert_tail(&tcps
->tcps_listener_conf
, new_tl
);
149 mutex_exit(&tcps
->tcps_listener_conf_lock
);
154 * remove a listener limit configuration.
158 tcp_listener_conf_del(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
159 const char *ifname
, const void* pval
, uint_t flags
)
163 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
165 if (flags
& MOD_PROP_DEFAULT
)
168 if (ddi_strtol(pval
, NULL
, 10, &lport
) != 0 || lport
<= 0 ||
172 mutex_enter(&tcps
->tcps_listener_conf_lock
);
173 for (tl
= list_head(&tcps
->tcps_listener_conf
); tl
!= NULL
;
174 tl
= list_next(&tcps
->tcps_listener_conf
, tl
)) {
175 if (tl
->tl_port
== lport
) {
176 list_remove(&tcps
->tcps_listener_conf
, tl
);
177 mutex_exit(&tcps
->tcps_listener_conf_lock
);
178 kmem_free(tl
, sizeof (tcp_listener_t
));
182 mutex_exit(&tcps
->tcps_listener_conf_lock
);
187 tcp_set_buf_prop(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
188 const char *ifname
, const void *pval
, uint_t flags
)
190 return (mod_set_buf_prop(stack
->netstack_tcp
->tcps_propinfo_tbl
, stack
,
191 cr
, pinfo
, ifname
, pval
, flags
));
195 tcp_get_buf_prop(netstack_t
*stack
, mod_prop_info_t
*pinfo
, const char *ifname
,
196 void *val
, uint_t psize
, uint_t flags
)
198 return (mod_get_buf_prop(stack
->netstack_tcp
->tcps_propinfo_tbl
, stack
,
199 pinfo
, ifname
, val
, psize
, flags
));
203 * Special checkers for smallest/largest anonymous port so they don't
204 * ever happen to be (largest < smallest).
208 tcp_smallest_anon_set(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
209 const char *ifname
, const void *pval
, uint_t flags
)
211 unsigned long new_value
;
212 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
215 if ((err
= mod_uint32_value(pval
, pinfo
, flags
, &new_value
)) != 0)
217 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
218 if ((uint32_t)new_value
> tcps
->tcps_largest_anon_port
)
220 pinfo
->prop_cur_uval
= (uint32_t)new_value
;
226 tcp_largest_anon_set(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
227 const char *ifname
, const void *pval
, uint_t flags
)
229 unsigned long new_value
;
230 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
233 if ((err
= mod_uint32_value(pval
, pinfo
, flags
, &new_value
)) != 0)
235 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
236 if ((uint32_t)new_value
< tcps
->tcps_smallest_anon_port
)
238 pinfo
->prop_cur_uval
= (uint32_t)new_value
;
243 * All of these are alterable, within the min/max values given, at run time.
245 * Note: All those tunables which do not start with "_" are Committed and
246 * therefore are public. See PSARC 2010/080.
248 mod_prop_info_t tcp_propinfo_tbl
[] = {
250 { "_time_wait_interval", MOD_PROTO_TCP
,
251 mod_set_uint32
, mod_get_uint32
,
252 {1*SECONDS
, TCP_TIME_WAIT_MAX
, 1*MINUTES
}, {1*MINUTES
} },
254 { "_conn_req_max_q", MOD_PROTO_TCP
,
255 mod_set_uint32
, mod_get_uint32
,
256 {1, UINT32_MAX
, 128}, {128} },
258 { "_conn_req_max_q0", MOD_PROTO_TCP
,
259 mod_set_uint32
, mod_get_uint32
,
260 {0, UINT32_MAX
, 1024}, {1024} },
262 { "_conn_req_min", MOD_PROTO_TCP
,
263 mod_set_uint32
, mod_get_uint32
,
266 { "_conn_grace_period", MOD_PROTO_TCP
,
267 mod_set_uint32
, mod_get_uint32
,
268 {0*MS
, 20*SECONDS
, 0*MS
}, {0*MS
} },
270 { "_cwnd_max", MOD_PROTO_TCP
,
271 mod_set_uint32
, mod_get_uint32
,
272 {128, ULP_MAX_BUF
, 1024*1024}, {1024*1024} },
274 { "_debug", MOD_PROTO_TCP
,
275 mod_set_uint32
, mod_get_uint32
,
278 { "smallest_nonpriv_port", MOD_PROTO_TCP
,
279 mod_set_uint32
, mod_get_uint32
,
280 {1024, (32*1024), 1024}, {1024} },
282 { "_ip_abort_cinterval", MOD_PROTO_TCP
,
283 mod_set_uint32
, mod_get_uint32
,
284 {1*SECONDS
, UINT32_MAX
, 3*MINUTES
}, {3*MINUTES
} },
286 { "_ip_abort_linterval", MOD_PROTO_TCP
,
287 mod_set_uint32
, mod_get_uint32
,
288 {1*SECONDS
, UINT32_MAX
, 3*MINUTES
}, {3*MINUTES
} },
291 { "_ip_abort_interval", MOD_PROTO_TCP
,
292 mod_set_uint32
, mod_get_uint32
,
293 {500*MS
, UINT32_MAX
, 5*MINUTES
}, {5*MINUTES
} },
295 { "_ip_notify_cinterval", MOD_PROTO_TCP
,
296 mod_set_uint32
, mod_get_uint32
,
297 {1*SECONDS
, UINT32_MAX
, 10*SECONDS
},
300 { "_ip_notify_interval", MOD_PROTO_TCP
,
301 mod_set_uint32
, mod_get_uint32
,
302 {500*MS
, UINT32_MAX
, 10*SECONDS
}, {10*SECONDS
} },
304 { "_ipv4_ttl", MOD_PROTO_TCP
,
305 mod_set_uint32
, mod_get_uint32
,
306 {1, 255, 64}, {64} },
308 { "_keepalive_interval", MOD_PROTO_TCP
,
309 mod_set_uint32
, mod_get_uint32
,
310 {1*SECONDS
, 10*DAYS
, 2*HOURS
}, {2*HOURS
} },
312 { "_maxpsz_multiplier", MOD_PROTO_TCP
,
313 mod_set_uint32
, mod_get_uint32
,
314 {0, 100, 10}, {10} },
316 { "_mss_def_ipv4", MOD_PROTO_TCP
,
317 mod_set_uint32
, mod_get_uint32
,
318 {1, TCP_MSS_MAX_IPV4
, 536}, {536} },
320 { "_mss_max_ipv4", MOD_PROTO_TCP
,
321 mod_set_uint32
, mod_get_uint32
,
322 {1, TCP_MSS_MAX_IPV4
, TCP_MSS_MAX_IPV4
},
323 {TCP_MSS_MAX_IPV4
} },
325 { "_mss_min", MOD_PROTO_TCP
,
326 mod_set_uint32
, mod_get_uint32
,
327 {1, TCP_MSS_MAX
, 108}, {108} },
329 { "_naglim_def", MOD_PROTO_TCP
,
330 mod_set_uint32
, mod_get_uint32
,
331 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
334 { "_rexmit_interval_initial", MOD_PROTO_TCP
,
335 mod_set_uint32
, mod_get_uint32
,
336 {1*MS
, 20*SECONDS
, 1*SECONDS
}, {1*SECONDS
} },
338 { "_rexmit_interval_max", MOD_PROTO_TCP
,
339 mod_set_uint32
, mod_get_uint32
,
340 {1*MS
, 2*HOURS
, 60*SECONDS
}, {60*SECONDS
} },
342 { "_rexmit_interval_min", MOD_PROTO_TCP
,
343 mod_set_uint32
, mod_get_uint32
,
344 {1*MS
, 2*HOURS
, 400*MS
}, {400*MS
} },
346 { "_deferred_ack_interval", MOD_PROTO_TCP
,
347 mod_set_uint32
, mod_get_uint32
,
348 {1*MS
, 1*MINUTES
, 100*MS
}, {100*MS
} },
350 { "_snd_lowat_fraction", MOD_PROTO_TCP
,
351 mod_set_uint32
, mod_get_uint32
,
354 { "_dupack_fast_retransmit", MOD_PROTO_TCP
,
355 mod_set_uint32
, mod_get_uint32
,
356 {1, 10000, 3}, {3} },
358 { "_ignore_path_mtu", MOD_PROTO_TCP
,
359 mod_set_boolean
, mod_get_boolean
,
360 {B_FALSE
}, {B_FALSE
} },
362 { "smallest_anon_port", MOD_PROTO_TCP
,
363 tcp_smallest_anon_set
, mod_get_uint32
,
364 {1024, ULP_MAX_PORT
, 32*1024}, {32*1024} },
366 { "largest_anon_port", MOD_PROTO_TCP
,
367 tcp_largest_anon_set
, mod_get_uint32
,
368 {1024, ULP_MAX_PORT
, ULP_MAX_PORT
},
371 { "send_buf", MOD_PROTO_TCP
,
372 tcp_set_buf_prop
, tcp_get_buf_prop
,
373 {TCP_XMIT_LOWATER
, ULP_MAX_BUF
, TCP_XMIT_HIWATER
},
374 {TCP_XMIT_HIWATER
} },
377 { "_xmit_lowat", MOD_PROTO_TCP
,
378 mod_set_uint32
, mod_get_uint32
,
379 {TCP_XMIT_LOWATER
, ULP_MAX_BUF
, TCP_XMIT_LOWATER
},
380 {TCP_XMIT_LOWATER
} },
382 { "recv_buf", MOD_PROTO_TCP
,
383 tcp_set_buf_prop
, tcp_get_buf_prop
,
384 {TCP_RECV_LOWATER
, ULP_MAX_BUF
, TCP_RECV_HIWATER
},
385 {TCP_RECV_HIWATER
} },
387 { "_recv_hiwat_minmss", MOD_PROTO_TCP
,
388 mod_set_uint32
, mod_get_uint32
,
389 {1, 65536, 4}, {4} },
391 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP
,
392 mod_set_uint32
, mod_get_uint32
,
393 {1*SECONDS
, 2*HOURS
, 60*SECONDS
},
396 { "max_buf", MOD_PROTO_TCP
,
397 mod_set_uint32
, mod_get_uint32
,
398 {8192, ULP_MAX_BUF
, 1024*1024}, {1024*1024} },
400 { "_strong_iss", MOD_PROTO_TCP
,
401 mod_set_uint32
, mod_get_uint32
,
404 { "_rtt_updates", MOD_PROTO_TCP
,
405 mod_set_uint32
, mod_get_uint32
,
406 {0, 65536, 20}, {20} },
408 { "_wscale_always", MOD_PROTO_TCP
,
409 mod_set_boolean
, mod_get_boolean
,
410 {B_TRUE
}, {B_TRUE
} },
412 { "_tstamp_always", MOD_PROTO_TCP
,
413 mod_set_boolean
, mod_get_boolean
,
414 {B_FALSE
}, {B_FALSE
} },
416 { "_tstamp_if_wscale", MOD_PROTO_TCP
,
417 mod_set_boolean
, mod_get_boolean
,
418 {B_TRUE
}, {B_TRUE
} },
421 { "_rexmit_interval_extra", MOD_PROTO_TCP
,
422 mod_set_uint32
, mod_get_uint32
,
423 {0*MS
, 2*HOURS
, 0*MS
}, {0*MS
} },
425 { "_deferred_acks_max", MOD_PROTO_TCP
,
426 mod_set_uint32
, mod_get_uint32
,
429 { "_slow_start_after_idle", MOD_PROTO_TCP
,
430 mod_set_uint32
, mod_get_uint32
,
431 {0, 16384, 0}, {0} },
433 { "_slow_start_initial", MOD_PROTO_TCP
,
434 mod_set_uint32
, mod_get_uint32
,
437 { "sack", MOD_PROTO_TCP
,
438 mod_set_uint32
, mod_get_uint32
,
441 { "_ipv6_hoplimit", MOD_PROTO_TCP
,
442 mod_set_uint32
, mod_get_uint32
,
443 {0, IPV6_MAX_HOPS
, IPV6_DEFAULT_HOPS
},
444 {IPV6_DEFAULT_HOPS
} },
446 { "_mss_def_ipv6", MOD_PROTO_TCP
,
447 mod_set_uint32
, mod_get_uint32
,
448 {1, TCP_MSS_MAX_IPV6
, 1220}, {1220} },
450 { "_mss_max_ipv6", MOD_PROTO_TCP
,
451 mod_set_uint32
, mod_get_uint32
,
452 {1, TCP_MSS_MAX_IPV6
, TCP_MSS_MAX_IPV6
},
453 {TCP_MSS_MAX_IPV6
} },
455 { "_rev_src_routes", MOD_PROTO_TCP
,
456 mod_set_boolean
, mod_get_boolean
,
457 {B_FALSE
}, {B_FALSE
} },
459 { "_local_dack_interval", MOD_PROTO_TCP
,
460 mod_set_uint32
, mod_get_uint32
,
461 {10*MS
, 500*MS
, 50*MS
}, {50*MS
} },
464 { "_local_dacks_max", MOD_PROTO_TCP
,
465 mod_set_uint32
, mod_get_uint32
,
468 { "ecn", MOD_PROTO_TCP
,
469 mod_set_uint32
, mod_get_uint32
,
472 { "_rst_sent_rate_enabled", MOD_PROTO_TCP
,
473 mod_set_boolean
, mod_get_boolean
,
474 {B_TRUE
}, {B_TRUE
} },
476 { "_rst_sent_rate", MOD_PROTO_TCP
,
477 mod_set_uint32
, mod_get_uint32
,
478 {0, UINT32_MAX
, 40}, {40} },
480 { "_push_timer_interval", MOD_PROTO_TCP
,
481 mod_set_uint32
, mod_get_uint32
,
482 {0, 100*MS
, 50*MS
}, {50*MS
} },
484 { "_use_smss_as_mss_opt", MOD_PROTO_TCP
,
485 mod_set_boolean
, mod_get_boolean
,
486 {B_FALSE
}, {B_FALSE
} },
488 { "_keepalive_abort_interval", MOD_PROTO_TCP
,
489 mod_set_uint32
, mod_get_uint32
,
490 {0, UINT32_MAX
, 8*MINUTES
}, {8*MINUTES
} },
493 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
494 * layer header. It has to be a multiple of 8.
496 { "_wroff_xtra", MOD_PROTO_TCP
,
497 mod_set_aligned
, mod_get_uint32
,
498 {0, 256, 32}, {32} },
500 { "_dev_flow_ctl", MOD_PROTO_TCP
,
501 mod_set_boolean
, mod_get_boolean
,
502 {B_FALSE
}, {B_FALSE
} },
504 { "_reass_timeout", MOD_PROTO_TCP
,
505 mod_set_uint32
, mod_get_uint32
,
506 {0, UINT32_MAX
, 100*SECONDS
}, {100*SECONDS
} },
509 { "extra_priv_ports", MOD_PROTO_TCP
,
510 mod_set_extra_privports
, mod_get_extra_privports
,
511 {1, ULP_MAX_PORT
, 0}, {0} },
513 { "_1948_phrase", MOD_PROTO_TCP
,
514 tcp_set_1948phrase
, NULL
, {0}, {0} },
516 { "_listener_limit_conf", MOD_PROTO_TCP
,
517 NULL
, tcp_listener_conf_get
, {0}, {0} },
519 { "_listener_limit_conf_add", MOD_PROTO_TCP
,
520 tcp_listener_conf_add
, NULL
, {0}, {0} },
522 { "_listener_limit_conf_del", MOD_PROTO_TCP
,
523 tcp_listener_conf_del
, NULL
, {0}, {0} },
525 { "_iss_incr", MOD_PROTO_TCP
,
526 mod_set_uint32
, mod_get_uint32
,
527 {1, ISS_INCR
, ISS_INCR
},
530 { "?", MOD_PROTO_TCP
, NULL
, mod_get_allprop
, {0}, {0} },
532 { NULL
, 0, NULL
, NULL
, {0}, {0} }
535 int tcp_propinfo_count
= A_CNT(tcp_propinfo_tbl
);