4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Joyent, Inc.
28 * This file contains functions related to TCP time wait processing. Also
29 * refer to the time wait handling comments in tcp_impl.h.
32 #include <sys/types.h>
33 #include <sys/strsun.h>
34 #include <sys/squeue_impl.h>
35 #include <sys/squeue.h>
36 #include <sys/callo.h>
38 #include <inet/common.h>
41 #include <inet/tcp_impl.h>
42 #include <inet/tcp_cluster.h>
44 static void tcp_time_wait_purge(tcp_t
*, tcp_squeue_priv_t
*);
46 #define TW_BUCKET(t) \
47 (((t) / MSEC_TO_TICK(TCP_TIME_WAIT_DELAY)) % TCP_TIME_WAIT_BUCKETS)
49 #define TW_BUCKET_NEXT(b) (((b) + 1) % TCP_TIME_WAIT_BUCKETS)
53 * Remove a connection from the list of detached TIME_WAIT connections.
54 * It returns B_FALSE if it can't remove the connection from the list
55 * as the connection has already been removed from the list due to an
56 * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
59 tcp_time_wait_remove(tcp_t
*tcp
, tcp_squeue_priv_t
*tsp
)
61 boolean_t locked
= B_FALSE
;
64 tsp
= *((tcp_squeue_priv_t
**)
65 squeue_getprivate(tcp
->tcp_connp
->conn_sqp
, SQPRIVATE_TCP
));
66 mutex_enter(&tsp
->tcp_time_wait_lock
);
69 ASSERT(MUTEX_HELD(&tsp
->tcp_time_wait_lock
));
72 /* 0 means that the tcp_t has not been added to the time wait list. */
73 if (tcp
->tcp_time_wait_expire
== 0) {
74 ASSERT(tcp
->tcp_time_wait_next
== NULL
);
75 ASSERT(tcp
->tcp_time_wait_prev
== NULL
);
77 mutex_exit(&tsp
->tcp_time_wait_lock
);
80 ASSERT(TCP_IS_DETACHED(tcp
));
81 ASSERT(tcp
->tcp_state
== TCPS_TIME_WAIT
);
82 ASSERT(tsp
->tcp_time_wait_cnt
> 0);
84 if (tcp
->tcp_time_wait_next
!= NULL
) {
85 tcp
->tcp_time_wait_next
->tcp_time_wait_prev
=
86 tcp
->tcp_time_wait_prev
;
88 if (tcp
->tcp_time_wait_prev
!= NULL
) {
89 tcp
->tcp_time_wait_prev
->tcp_time_wait_next
=
90 tcp
->tcp_time_wait_next
;
94 bucket
= TW_BUCKET(tcp
->tcp_time_wait_expire
);
95 ASSERT(tsp
->tcp_time_wait_bucket
[bucket
] == tcp
);
96 tsp
->tcp_time_wait_bucket
[bucket
] = tcp
->tcp_time_wait_next
;
98 tcp
->tcp_time_wait_next
= NULL
;
99 tcp
->tcp_time_wait_prev
= NULL
;
100 tcp
->tcp_time_wait_expire
= 0;
101 tsp
->tcp_time_wait_cnt
--;
104 mutex_exit(&tsp
->tcp_time_wait_lock
);
108 /* Constants used for fast checking of a localhost address */
109 #if defined(_BIG_ENDIAN)
110 #define IPv4_LOCALHOST 0x7f000000U
111 #define IPv4_LH_MASK 0xffffff00U
113 #define IPv4_LOCALHOST 0x0000007fU
114 #define IPv4_LH_MASK 0x00ffffffU
117 #define IS_LOCAL_HOST(x) ( \
118 ((x)->tcp_connp->conn_ipversion == IPV4_VERSION && \
119 ((x)->tcp_connp->conn_laddr_v4 & IPv4_LH_MASK) == IPv4_LOCALHOST) || \
120 ((x)->tcp_connp->conn_ipversion == IPV6_VERSION && \
121 IN6_IS_ADDR_LOOPBACK(&(x)->tcp_connp->conn_laddr_v6)))
125 * Add a connection to the list of detached TIME_WAIT connections
126 * and set its time to expire.
129 tcp_time_wait_append(tcp_t
*tcp
)
131 tcp_stack_t
*tcps
= tcp
->tcp_tcps
;
132 squeue_t
*sqp
= tcp
->tcp_connp
->conn_sqp
;
133 tcp_squeue_priv_t
*tsp
=
134 *((tcp_squeue_priv_t
**)squeue_getprivate(sqp
, SQPRIVATE_TCP
));
135 int64_t now
, schedule
;
138 tcp_timers_stop(tcp
);
141 ASSERT(tcp
->tcp_timer_tid
== 0);
142 ASSERT(tcp
->tcp_ack_tid
== 0);
144 /* must have happened at the time of detaching the tcp */
145 ASSERT(TCP_IS_DETACHED(tcp
));
146 ASSERT(tcp
->tcp_state
== TCPS_TIME_WAIT
);
147 ASSERT(tcp
->tcp_ptpahn
== NULL
);
148 ASSERT(tcp
->tcp_flow_stopped
== 0);
149 ASSERT(tcp
->tcp_time_wait_next
== NULL
);
150 ASSERT(tcp
->tcp_time_wait_prev
== NULL
);
151 ASSERT(tcp
->tcp_time_wait_expire
== 0);
152 ASSERT(tcp
->tcp_listener
== NULL
);
154 TCP_DBGSTAT(tcps
, tcp_time_wait
);
155 mutex_enter(&tsp
->tcp_time_wait_lock
);
158 * Immediately expire loopback connections. Since there is no worry
159 * about packets on the local host showing up after a long network
160 * delay, this is safe and allows much higher rates of connection churn
161 * for applications operating locally.
163 * This typically bypasses the tcp_free_list fast path due to squeue
164 * re-entry for the loopback close operation.
166 if (tcp
->tcp_loopback
) {
167 tcp_time_wait_purge(tcp
, tsp
);
168 mutex_exit(&tsp
->tcp_time_wait_lock
);
173 * In order to reap TIME_WAITs reliably, we should use a source of time
174 * that is not adjustable by the user. While it would be more accurate
175 * to grab this timestamp before (potentially) sleeping on the
176 * tcp_time_wait_lock, doing so complicates bucket addressing later.
178 now
= ddi_get_lbolt64();
181 * Each squeue uses an arbitrary time offset when scheduling
182 * expiration timers. This prevents the bucketing from forcing
183 * tcp_time_wait_collector to run in locksetup across squeues.
185 * This offset is (re)initialized when a new TIME_WAIT connection is
186 * added to an squeue which has no connections waiting to expire.
188 if (tsp
->tcp_time_wait_tid
== 0) {
189 ASSERT(tsp
->tcp_time_wait_cnt
== 0);
190 tsp
->tcp_time_wait_offset
=
191 now
% MSEC_TO_TICK(TCP_TIME_WAIT_DELAY
);
193 now
-= tsp
->tcp_time_wait_offset
;
196 * Use the netstack-defined timeout, rounded up to the minimum
197 * time_wait_collector interval.
199 schedule
= now
+ MSEC_TO_TICK(tcps
->tcps_time_wait_interval
);
200 tcp
->tcp_time_wait_expire
= schedule
;
203 * Append the connection into the appropriate bucket.
205 bucket
= TW_BUCKET(tcp
->tcp_time_wait_expire
);
206 tcp
->tcp_time_wait_next
= tsp
->tcp_time_wait_bucket
[bucket
];
207 tsp
->tcp_time_wait_bucket
[bucket
] = tcp
;
208 if (tcp
->tcp_time_wait_next
!= NULL
) {
209 ASSERT(tcp
->tcp_time_wait_next
->tcp_time_wait_prev
== NULL
);
210 tcp
->tcp_time_wait_next
->tcp_time_wait_prev
= tcp
;
212 tsp
->tcp_time_wait_cnt
++;
215 * Round delay up to the nearest bucket boundary.
217 schedule
+= MSEC_TO_TICK(TCP_TIME_WAIT_DELAY
);
218 schedule
-= schedule
% MSEC_TO_TICK(TCP_TIME_WAIT_DELAY
);
221 * The newly inserted entry may require a tighter schedule for the
224 if (schedule
< tsp
->tcp_time_wait_schedule
) {
225 callout_id_t old_tid
= tsp
->tcp_time_wait_tid
;
227 tsp
->tcp_time_wait_schedule
= schedule
;
228 tsp
->tcp_time_wait_tid
=
229 timeout_generic(CALLOUT_NORMAL
,
230 tcp_time_wait_collector
, sqp
,
231 TICK_TO_NSEC(schedule
- now
),
232 CALLOUT_TCP_RESOLUTION
, CALLOUT_FLAG_ROUNDUP
);
235 * It is possible for the timer to fire before the untimeout
236 * action is able to complete. In that case, the exclusion
237 * offered by the tcp_time_wait_collector_active flag will
238 * prevent multiple collector threads from processing records
239 * simultaneously from the same squeue.
241 mutex_exit(&tsp
->tcp_time_wait_lock
);
242 (void) untimeout_default(old_tid
, 0);
247 * Start a fresh timer if none exists.
249 if (tsp
->tcp_time_wait_schedule
== 0) {
250 ASSERT(tsp
->tcp_time_wait_tid
== 0);
252 tsp
->tcp_time_wait_schedule
= schedule
;
253 tsp
->tcp_time_wait_tid
=
254 timeout_generic(CALLOUT_NORMAL
,
255 tcp_time_wait_collector
, sqp
,
256 TICK_TO_NSEC(schedule
- now
),
257 CALLOUT_TCP_RESOLUTION
, CALLOUT_FLAG_ROUNDUP
);
259 mutex_exit(&tsp
->tcp_time_wait_lock
);
263 * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
264 * tcp_t. Used in tcp_time_wait_collector().
268 tcp_timewait_close(void *arg
, mblk_t
*mp
, void *arg2
, ip_recv_attr_t
*dummy
)
270 conn_t
*connp
= (conn_t
*)arg
;
271 tcp_t
*tcp
= connp
->conn_tcp
;
274 if (tcp
->tcp_state
== TCPS_CLOSED
) {
278 ASSERT((connp
->conn_family
== AF_INET
&&
279 connp
->conn_ipversion
== IPV4_VERSION
) ||
280 (connp
->conn_family
== AF_INET6
&&
281 (connp
->conn_ipversion
== IPV4_VERSION
||
282 connp
->conn_ipversion
== IPV6_VERSION
)));
283 ASSERT(!tcp
->tcp_listener
);
285 ASSERT(TCP_IS_DETACHED(tcp
));
288 * Because they have no upstream client to rebind or tcp_close()
289 * them later, we axe the connection here and now.
291 tcp_close_detached(tcp
);
296 tcp_time_wait_purge(tcp_t
*tcp
, tcp_squeue_priv_t
*tsp
)
299 conn_t
*connp
= tcp
->tcp_connp
;
302 ASSERT(MUTEX_HELD(&tsp
->tcp_time_wait_lock
));
303 ASSERT(connp
->conn_fanout
!= NULL
);
305 lock
= &connp
->conn_fanout
->connf_lock
;
308 * This is essentially a TIME_WAIT reclaim fast path optimization for
309 * performance where the connection is checked under the fanout lock
310 * (so that no one else can get access to the conn_t) that the refcnt
311 * is 2 (one each for TCP and the classifier hash list). That is the
312 * case and clustering callbacks are not enabled, the conn can be
313 * removed under the fanout lock and avoid clean-up under the squeue.
315 * This optimization is forgone when clustering is enabled since the
316 * clustering callback must be made before setting the CONDEMNED flag
317 * and after dropping all locks
319 * See the comments in tcp_closei_local for additional information
320 * regarding the refcnt logic.
322 if (mutex_tryenter(lock
)) {
323 mutex_enter(&connp
->conn_lock
);
324 if (connp
->conn_ref
== 2 && cl_inet_disconnect
== NULL
) {
325 ipcl_hash_remove_locked(connp
, connp
->conn_fanout
);
327 * Set the CONDEMNED flag now itself so that the refcnt
328 * cannot increase due to any walker.
330 connp
->conn_state_flags
|= CONN_CONDEMNED
;
331 mutex_exit(&connp
->conn_lock
);
333 if (tsp
->tcp_free_list_cnt
< tcp_free_list_max_cnt
) {
335 * Add to head of tcp_free_list
338 ASSERT(connp
->conn_latch
== NULL
);
339 ASSERT(connp
->conn_policy
== NULL
);
340 ASSERT(tcp
->tcp_tcps
== NULL
);
341 ASSERT(connp
->conn_netstack
== NULL
);
343 tcp
->tcp_time_wait_next
= tsp
->tcp_free_list
;
344 tcp
->tcp_in_free_list
= B_TRUE
;
345 tsp
->tcp_free_list
= tcp
;
346 tsp
->tcp_free_list_cnt
++;
349 * Do not add to tcp_free_list
351 tcp_bind_hash_remove(tcp
);
352 ixa_cleanup(tcp
->tcp_connp
->conn_ixa
);
353 tcp_ipsec_cleanup(tcp
);
354 CONN_DEC_REF(tcp
->tcp_connp
);
358 * With the fast-path complete, we can bail.
363 * Fall back to slow path.
365 CONN_INC_REF_LOCKED(connp
);
366 mutex_exit(&connp
->conn_lock
);
374 * We can reuse the closemp here since conn has detached (otherwise we
375 * wouldn't even be in time_wait list). It is safe to change
376 * tcp_closemp_used without taking a lock as no other thread can
377 * concurrently access it at this point in the connection lifecycle.
379 if (tcp
->tcp_closemp
.b_prev
== NULL
) {
380 tcp
->tcp_closemp_used
= B_TRUE
;
383 "tcp_timewait_collector: concurrent use of tcp_closemp: "
384 "connp %p tcp %p\n", (void *)connp
, (void *)tcp
);
387 TCP_DEBUG_GETPCSTACK(tcp
->tcmp_stk
, 15);
388 mp
= &tcp
->tcp_closemp
;
389 mutex_exit(&tsp
->tcp_time_wait_lock
);
390 SQUEUE_ENTER_ONE(connp
->conn_sqp
, mp
, tcp_timewait_close
, connp
, NULL
,
391 SQ_FILL
, SQTAG_TCP_TIMEWAIT
);
392 mutex_enter(&tsp
->tcp_time_wait_lock
);
396 * Purge any tcp_t instances associated with this squeue which have expired
397 * from the TIME_WAIT state.
400 tcp_time_wait_collector(void *arg
)
403 int64_t now
, sched_active
, sched_cur
, sched_new
;
406 squeue_t
*sqp
= (squeue_t
*)arg
;
407 tcp_squeue_priv_t
*tsp
=
408 *((tcp_squeue_priv_t
**)squeue_getprivate(sqp
, SQPRIVATE_TCP
));
410 mutex_enter(&tsp
->tcp_time_wait_lock
);
413 * Because of timer scheduling complexity and the fact that the
414 * tcp_time_wait_lock is dropped during tcp_time_wait_purge, it is
415 * possible for multiple tcp_time_wait_collector threads to run against
416 * the same squeue. This flag is used to exclude other collectors from
417 * the squeue during execution.
419 if (tsp
->tcp_time_wait_collector_active
) {
420 mutex_exit(&tsp
->tcp_time_wait_lock
);
423 tsp
->tcp_time_wait_collector_active
= B_TRUE
;
426 * After its assignment here, the value of sched_active must not be
427 * altered as it is used to validate the state of the
428 * tcp_time_wait_collector callout schedule for this squeue.
430 * The same does not hold true of sched_cur, which holds the timestamp
431 * of the bucket undergoing processing. While it is initially equal to
432 * sched_active, certain conditions below can walk it forward,
433 * triggering the retry loop.
435 sched_cur
= sched_active
= tsp
->tcp_time_wait_schedule
;
438 * Purge the free list if necessary
440 if (tsp
->tcp_free_list
!= NULL
) {
441 TCP_G_STAT(tcp_freelist_cleanup
);
442 while ((tcp
= tsp
->tcp_free_list
) != NULL
) {
443 tsp
->tcp_free_list
= tcp
->tcp_time_wait_next
;
444 tcp
->tcp_time_wait_next
= NULL
;
445 tsp
->tcp_free_list_cnt
--;
446 ASSERT(tcp
->tcp_tcps
== NULL
);
447 CONN_DEC_REF(tcp
->tcp_connp
);
449 ASSERT(tsp
->tcp_free_list_cnt
== 0);
453 * If there are no connections pending, clear timer-related state to be
454 * reinitialized by the next caller.
456 if (tsp
->tcp_time_wait_cnt
== 0) {
457 tsp
->tcp_time_wait_offset
= 0;
458 tsp
->tcp_time_wait_schedule
= 0;
459 tsp
->tcp_time_wait_tid
= 0;
460 tsp
->tcp_time_wait_collector_active
= B_FALSE
;
461 mutex_exit(&tsp
->tcp_time_wait_lock
);
467 * Grab the bucket which we were scheduled to cleanse.
469 idx
= TW_BUCKET(sched_cur
- 1);
470 now
= ddi_get_lbolt64() - tsp
->tcp_time_wait_offset
;
471 tcp
= tsp
->tcp_time_wait_bucket
[idx
];
473 while (tcp
!= NULL
) {
475 * Since the bucket count is sized to prevent wrap-around
476 * during typical operation and timers are schedule to process
477 * buckets with only expired connections, there is only one
478 * reason to encounter a connection expiring in the future:
479 * The tcp_time_wait_collector thread has been so delayed in
480 * its processing that connections have wrapped around the
481 * timing wheel into this bucket.
483 * In that case, the remaining entires in the bucket can be
484 * ignored since, being appended sequentially, they should all
485 * expire in the future.
487 if (now
< tcp
->tcp_time_wait_expire
) {
492 * Pull the connection out of the bucket.
494 VERIFY(tcp_time_wait_remove(tcp
, tsp
));
497 * Purge the connection.
499 * While tcp_time_wait_lock will be temporarily dropped as part
500 * of the process, there is no risk of the timer being
501 * (re)scheduled while the collector is running since a value
502 * corresponding to the past is left in tcp_time_wait_schedule.
504 tcp_time_wait_purge(tcp
, tsp
);
507 * Because tcp_time_wait_remove clears the tcp_time_wait_next
508 * field, the next item must be grabbed directly from the
511 tcp
= tsp
->tcp_time_wait_bucket
[idx
];
514 if (tsp
->tcp_time_wait_cnt
== 0) {
516 * There is not a need for the collector to schedule a new
517 * timer if no pending items remain. The timer state can be
518 * cleared only if it was untouched while the collector dropped
519 * its locks during tcp_time_wait_purge.
521 if (tsp
->tcp_time_wait_schedule
== sched_active
) {
522 tsp
->tcp_time_wait_offset
= 0;
523 tsp
->tcp_time_wait_schedule
= 0;
524 tsp
->tcp_time_wait_tid
= 0;
526 tsp
->tcp_time_wait_collector_active
= B_FALSE
;
527 mutex_exit(&tsp
->tcp_time_wait_lock
);
533 * Locate the next bucket containing entries.
535 sched_new
= sched_cur
+ MSEC_TO_TICK(TCP_TIME_WAIT_DELAY
);
536 nidx
= TW_BUCKET_NEXT(idx
);
537 while (tsp
->tcp_time_wait_bucket
[nidx
] == NULL
) {
541 nidx
= TW_BUCKET_NEXT(nidx
);
542 sched_new
+= MSEC_TO_TICK(TCP_TIME_WAIT_DELAY
);
544 ASSERT(tsp
->tcp_time_wait_bucket
[nidx
] != NULL
);
548 * It is possible that the system is under such dire load that between
549 * the timer scheduling and TIME_WAIT processing delay, execution
550 * overran the interval allocated to this bucket.
552 now
= ddi_get_lbolt64() - tsp
->tcp_time_wait_offset
;
553 if (sched_new
<= now
) {
555 * Attempt to right the situation by immediately performing a
556 * purge on the next bucket. This loop will continue as needed
557 * until the schedule can be pushed out ahead of the clock.
559 sched_cur
= sched_new
;
560 DTRACE_PROBE3(tcp__time__wait__overrun
,
561 tcp_squeue_priv_t
*, tsp
, int64_t, sched_new
, int64_t, now
);
566 * Another thread may have snuck in to reschedule the timer while locks
567 * were dropped during tcp_time_wait_purge. Defer to the running timer
568 * if that is the case.
570 if (tsp
->tcp_time_wait_schedule
!= sched_active
) {
571 tsp
->tcp_time_wait_collector_active
= B_FALSE
;
572 mutex_exit(&tsp
->tcp_time_wait_lock
);
577 * Schedule the next timer.
579 tsp
->tcp_time_wait_schedule
= sched_new
;
580 tsp
->tcp_time_wait_tid
=
581 timeout_generic(CALLOUT_NORMAL
,
582 tcp_time_wait_collector
, sqp
,
583 TICK_TO_NSEC(sched_new
- now
),
584 CALLOUT_TCP_RESOLUTION
, CALLOUT_FLAG_ROUNDUP
);
585 tsp
->tcp_time_wait_collector_active
= B_FALSE
;
586 mutex_exit(&tsp
->tcp_time_wait_lock
);
590 * tcp_time_wait_processing() handles processing of incoming packets when
591 * the tcp_t is in the TIME_WAIT state.
593 * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
594 * detached state) is never put on the time wait list.
597 tcp_time_wait_processing(tcp_t
*tcp
, mblk_t
*mp
, uint32_t seg_seq
,
598 uint32_t seg_ack
, int seg_len
, tcpha_t
*tcpha
, ip_recv_attr_t
*ira
)
605 uint32_t new_swnd
= 0;
607 conn_t
*connp
= tcp
->tcp_connp
;
608 tcp_stack_t
*tcps
= tcp
->tcp_tcps
;
610 BUMP_LOCAL(tcp
->tcp_ibsegs
);
611 DTRACE_PROBE2(tcp__trace__recv
, mblk_t
*, mp
, tcp_t
*, tcp
);
613 flags
= (unsigned int)tcpha
->tha_flags
& 0xFF;
614 new_swnd
= ntohs(tcpha
->tha_win
) <<
615 ((tcpha
->tha_flags
& TH_SYN
) ? 0 : tcp
->tcp_snd_ws
);
617 if (tcp
->tcp_snd_ts_ok
&& !(tcpha
->tha_flags
& TH_RST
)) {
619 if (tcp
->tcp_snd_sack_ok
)
623 options
= tcp_parse_options(tcpha
, &tcpopt
);
624 if (!(options
& TCP_OPT_TSTAMP_PRESENT
)) {
625 DTRACE_TCP1(droppedtimestamp
, tcp_t
*, tcp
);
627 } else if (!tcp_paws_check(tcp
, &tcpopt
)) {
628 tcp_xmit_ctl(NULL
, tcp
, tcp
->tcp_snxt
, tcp
->tcp_rnxt
,
633 gap
= seg_seq
- tcp
->tcp_rnxt
;
634 rgap
= tcp
->tcp_rwnd
- (gap
+ seg_len
);
636 TCPS_BUMP_MIB(tcps
, tcpInDataDupSegs
);
637 TCPS_UPDATE_MIB(tcps
, tcpInDataDupBytes
,
638 (seg_len
> -gap
? -gap
: seg_len
));
640 if (seg_len
< 0 || (seg_len
== 0 && !(flags
& TH_FIN
))) {
641 if (flags
& TH_RST
) {
644 if ((flags
& TH_FIN
) && seg_len
== -1) {
646 * When TCP receives a duplicate FIN in
647 * TIME_WAIT state, restart the 2 MSL timer.
648 * See page 73 in RFC 793. Make sure this TCP
649 * is already on the TIME_WAIT list. If not,
650 * just restart the timer.
652 if (TCP_IS_DETACHED(tcp
)) {
653 if (tcp_time_wait_remove(tcp
, NULL
) ==
655 tcp_time_wait_append(tcp
);
661 TCP_TIMER_RESTART(tcp
,
662 tcps
->tcps_time_wait_interval
);
664 tcp_xmit_ctl(NULL
, tcp
, tcp
->tcp_snxt
,
665 tcp
->tcp_rnxt
, TH_ACK
);
668 flags
|= TH_ACK_NEEDED
;
673 /* Fix seg_seq, and chew the gap off the front. */
674 seg_seq
= tcp
->tcp_rnxt
;
677 if ((flags
& TH_SYN
) && gap
> 0 && rgap
< 0) {
679 * Make sure that when we accept the connection, pick
680 * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the
683 * The next ISS generated is equal to tcp_iss_incr_extra
684 * + tcp_iss_incr/2 + other components depending on the
685 * value of tcp_strong_iss. We pre-calculate the new
686 * ISS here and compare with tcp_snxt to determine if
687 * we need to make adjustment to tcp_iss_incr_extra.
689 * The above calculation is ugly and is a
690 * waste of CPU cycles...
692 uint32_t new_iss
= tcps
->tcps_iss_incr_extra
;
694 ip_stack_t
*ipst
= tcps
->tcps_netstack
->netstack_ip
;
696 switch (tcps
->tcps_strong_iss
) {
698 /* Add time and MD5 components. */
707 mutex_enter(&tcps
->tcps_iss_key_lock
);
708 context
= tcps
->tcps_iss_key
;
709 mutex_exit(&tcps
->tcps_iss_key_lock
);
710 arg
.ports
= connp
->conn_ports
;
711 /* We use MAPPED addresses in tcp_iss_init */
712 arg
.src
= connp
->conn_laddr_v6
;
713 arg
.dst
= connp
->conn_faddr_v6
;
714 MD5Update(&context
, (uchar_t
*)&arg
,
716 MD5Final((uchar_t
*)answer
, &context
);
717 answer
[0] ^= answer
[1] ^ answer
[2] ^ answer
[3];
718 new_iss
+= (gethrtime() >> ISS_NSEC_SHT
) + answer
[0];
722 /* Add time component and min random (i.e. 1). */
723 new_iss
+= (gethrtime() >> ISS_NSEC_SHT
) + 1;
726 /* Add only time component. */
727 new_iss
+= (uint32_t)gethrestime_sec() *
731 if ((adj
= (int32_t)(tcp
->tcp_snxt
- new_iss
)) > 0) {
733 * New ISS not guaranteed to be tcp_iss_incr/2
734 * ahead of the current tcp_snxt, so add the
735 * difference to tcp_iss_incr_extra.
737 tcps
->tcps_iss_incr_extra
+= adj
;
740 * If tcp_clean_death() can not perform the task now,
741 * drop the SYN packet and let the other side re-xmit.
742 * Otherwise pass the SYN packet back in, since the
743 * old tcp state has been cleaned up or freed.
745 if (tcp_clean_death(tcp
, 0) == -1)
747 nconnp
= ipcl_classify(mp
, ira
, ipst
);
748 if (nconnp
!= NULL
) {
749 TCP_STAT(tcps
, tcp_time_wait_syn_success
);
750 /* Drops ref on nconnp */
751 tcp_reinput(nconnp
, mp
, ira
, ipst
);
758 * rgap is the amount of stuff received out of window. A negative
759 * value is the amount out of window.
762 TCPS_BUMP_MIB(tcps
, tcpInDataPastWinSegs
);
763 TCPS_UPDATE_MIB(tcps
, tcpInDataPastWinBytes
, -rgap
);
764 /* Fix seg_len and make sure there is something left. */
767 if (flags
& TH_RST
) {
770 flags
|= TH_ACK_NEEDED
;
776 * Check whether we can update tcp_ts_recent. This test is from RFC
779 if (tcp
->tcp_snd_ts_ok
&& !(flags
& TH_RST
) &&
780 TSTMP_GEQ(tcpopt
.tcp_opt_ts_val
, tcp
->tcp_ts_recent
) &&
781 SEQ_LEQ(seg_seq
, tcp
->tcp_rack
)) {
782 tcp
->tcp_ts_recent
= tcpopt
.tcp_opt_ts_val
;
783 tcp
->tcp_last_rcv_lbolt
= ddi_get_lbolt64();
786 if (seg_seq
!= tcp
->tcp_rnxt
&& seg_len
> 0) {
787 /* Always ack out of order packets */
788 flags
|= TH_ACK_NEEDED
;
790 } else if (seg_len
> 0) {
791 TCPS_BUMP_MIB(tcps
, tcpInClosed
);
792 TCPS_BUMP_MIB(tcps
, tcpInDataInorderSegs
);
793 TCPS_UPDATE_MIB(tcps
, tcpInDataInorderBytes
, seg_len
);
795 if (flags
& TH_RST
) {
796 (void) tcp_clean_death(tcp
, 0);
799 if (flags
& TH_SYN
) {
800 tcp_xmit_ctl("TH_SYN", tcp
, seg_ack
, seg_seq
+ 1,
803 * Do not delete the TCP structure if it is in
804 * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13.
809 if (flags
& TH_ACK
) {
810 bytes_acked
= (int)(seg_ack
- tcp
->tcp_suna
);
811 if (bytes_acked
<= 0) {
812 if (bytes_acked
== 0 && seg_len
== 0 &&
813 new_swnd
== tcp
->tcp_swnd
)
814 TCPS_BUMP_MIB(tcps
, tcpInDupAck
);
816 /* Acks something not sent */
817 flags
|= TH_ACK_NEEDED
;
820 if (flags
& TH_ACK_NEEDED
) {
822 * Time to send an ack for some reason.
824 tcp_xmit_ctl(NULL
, tcp
, tcp
->tcp_snxt
,
825 tcp
->tcp_rnxt
, TH_ACK
);