4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #define _SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/socket.h>
33 #include <sys/stropts.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 #include <sys/socketvar.h>
37 #include <inet/common.h>
40 #include <inet/ip_ire.h>
42 #include <inet/sctp_ip.h>
43 #include <inet/ipclassifier.h>
48 * A message can expire before it gets to the transmit list (i.e. it is still
49 * in the unsent list - unchunked), after it gets to the transmit list, but
50 * before transmission has actually started, or after transmission has begun.
51 * Accordingly, we check for the status of a message in sctp_chunkify() when
52 * the message is being transferred from the unsent list to the transmit list;
53 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
54 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
55 * When we nuke a message in sctp_chunkify(), all we need to do is take it
56 * out of the unsent list and update sctp_unsent; when a message is deemed
57 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
58 * list, update sctp_unsent IFF transmission for the message has not yet begun
59 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
60 * message has started, then we cannot just take it out of the list, we need
61 * to send Forward TSN chunk to the peer so that the peer can clear its
62 * fragment list for this message. However, we cannot just send the Forward
63 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
64 * messages preceeding this abandoned message. So, we send a Forward TSN
65 * IFF all messages prior to this abandoned message has been SACKd, if not
66 * we defer sending the Forward TSN to sctp_cumack(), which will check for
67 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
68 * sctp_rexmit() when we check for retransmissions, we need to determine if
69 * the advanced peer ack point can be moved ahead, and if so, send a Forward
70 * TSN to the peer instead of retransmitting the chunk. Note that when
71 * we send a Forward TSN for a message, there may be yet unsent chunks for
72 * this message; we need to mark all such chunks as abandoned, so that
73 * sctp_cumack() can take the message out of the transmit list, additionally
74 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
75 * decremented when a message/chunk is deemed abandoned), sockfs needs to
76 * be notified so that it can adjust its idea of the queued message.
79 #include <inet/sctp/sctp_impl.h>
81 static struct kmem_cache
*sctp_kmem_ftsn_set_cache
;
82 static mblk_t
*sctp_chunkify(sctp_t
*, int, int, int);
85 static boolean_t
sctp_verify_chain(mblk_t
*, mblk_t
*);
89 * Called to allocate a header mblk when sending data to SCTP.
90 * Data will follow in b_cont of this mblk.
93 sctp_alloc_hdr(const char *name
, int nlen
, const char *control
, int clen
,
97 struct T_unitdata_req
*tudr
;
101 size
= sizeof (*tudr
) + _TPI_ALIGN_TOPT(nlen
) + clen
;
102 size
= MAX(size
, sizeof (sctp_msg_hdr_t
));
103 if (flags
& SCTP_CAN_BLOCK
) {
104 mp
= allocb_wait(size
, BPRI_MED
, 0, &error
);
106 mp
= allocb(size
, BPRI_MED
);
109 tudr
= (struct T_unitdata_req
*)mp
->b_rptr
;
110 tudr
->PRIM_type
= T_UNITDATA_REQ
;
111 tudr
->DEST_length
= nlen
;
112 tudr
->DEST_offset
= sizeof (*tudr
);
113 tudr
->OPT_length
= clen
;
114 tudr
->OPT_offset
= (t_scalar_t
)(sizeof (*tudr
) +
115 _TPI_ALIGN_TOPT(nlen
));
117 bcopy(name
, tudr
+ 1, nlen
);
119 bcopy(control
, (char *)tudr
+ tudr
->OPT_offset
, clen
);
120 mp
->b_wptr
+= (tudr
->OPT_offset
+ clen
);
121 mp
->b_datap
->db_type
= M_PROTO
;
128 sctp_sendmsg(sctp_t
*sctp
, mblk_t
*mp
, int flags
)
130 sctp_faddr_t
*fp
= NULL
;
131 struct T_unitdata_req
*tudr
;
136 uint16_t sid
= sctp
->sctp_def_stream
;
137 uint32_t ppid
= sctp
->sctp_def_ppid
;
138 uint32_t context
= sctp
->sctp_def_context
;
139 uint16_t msg_flags
= sctp
->sctp_def_flags
;
140 sctp_msg_hdr_t
*sctp_msg_hdr
;
141 uint32_t msg_len
= 0;
142 uint32_t timetolive
= sctp
->sctp_def_timetolive
;
143 conn_t
*connp
= sctp
->sctp_connp
;
145 ASSERT(DB_TYPE(mproto
) == M_PROTO
);
148 ASSERT(mp
== NULL
|| DB_TYPE(mp
) == M_DATA
);
150 tudr
= (struct T_unitdata_req
*)mproto
->b_rptr
;
151 ASSERT(tudr
->PRIM_type
== T_UNITDATA_REQ
);
153 /* Get destination address, if specified */
154 if (tudr
->DEST_length
> 0) {
158 sin
= (struct sockaddr_in
*)
159 (mproto
->b_rptr
+ tudr
->DEST_offset
);
160 switch (sin
->sin_family
) {
162 if (tudr
->DEST_length
< sizeof (*sin
)) {
165 IN6_IPADDR_TO_V4MAPPED(sin
->sin_addr
.s_addr
, &tmpaddr
);
169 if (tudr
->DEST_length
< sizeof (*sin6
)) {
172 sin6
= (struct sockaddr_in6
*)
173 (mproto
->b_rptr
+ tudr
->DEST_offset
);
174 addr
= &sin6
->sin6_addr
;
177 return (EAFNOSUPPORT
);
179 fp
= sctp_lookup_faddr(sctp
, addr
);
184 /* Ancillary Data? */
185 if (tudr
->OPT_length
> 0) {
186 struct cmsghdr
*cmsg
;
188 struct sctp_sndrcvinfo
*sndrcv
;
190 cmsg
= (struct cmsghdr
*)(mproto
->b_rptr
+ tudr
->OPT_offset
);
191 cend
= ((char *)cmsg
+ tudr
->OPT_length
);
192 ASSERT(cend
<= (char *)mproto
->b_wptr
);
195 if ((char *)(cmsg
+ 1) > cend
||
196 ((char *)cmsg
+ cmsg
->cmsg_len
) > cend
) {
199 if ((cmsg
->cmsg_level
== IPPROTO_SCTP
) &&
200 (cmsg
->cmsg_type
== SCTP_SNDRCV
)) {
202 (sizeof (*sndrcv
) + sizeof (*cmsg
))) {
205 sndrcv
= (struct sctp_sndrcvinfo
*)(cmsg
+ 1);
206 sid
= sndrcv
->sinfo_stream
;
207 msg_flags
= sndrcv
->sinfo_flags
;
208 ppid
= sndrcv
->sinfo_ppid
;
209 context
= sndrcv
->sinfo_context
;
210 timetolive
= sndrcv
->sinfo_timetolive
;
213 if (cmsg
->cmsg_len
> 0)
214 cmsg
= CMSG_NEXT(cmsg
);
219 if (msg_flags
& MSG_ABORT
) {
220 if (mp
&& mp
->b_cont
) {
221 mblk_t
*pump
= msgpullup(mp
, -1);
230 sctp_user_abort(sctp
, mp
);
239 /* Reject any new data requests if we are shutting down */
240 if (sctp
->sctp_state
> SCTPS_ESTABLISHED
||
241 (sctp
->sctp_connp
->conn_state_flags
& CONN_CLOSING
)) {
246 /* Re-use the mproto to store relevant info. */
247 ASSERT(MBLKSIZE(mproto
) >= sizeof (*sctp_msg_hdr
));
249 mproto
->b_rptr
= mproto
->b_datap
->db_base
;
250 mproto
->b_wptr
= mproto
->b_rptr
+ sizeof (*sctp_msg_hdr
);
252 sctp_msg_hdr
= (sctp_msg_hdr_t
*)mproto
->b_rptr
;
253 bzero(sctp_msg_hdr
, sizeof (*sctp_msg_hdr
));
254 sctp_msg_hdr
->smh_context
= context
;
255 sctp_msg_hdr
->smh_sid
= sid
;
256 sctp_msg_hdr
->smh_ppid
= ppid
;
257 sctp_msg_hdr
->smh_flags
= msg_flags
;
258 sctp_msg_hdr
->smh_ttl
= MSEC_TO_TICK(timetolive
);
259 sctp_msg_hdr
->smh_tob
= ddi_get_lbolt64();
260 for (; mp
!= NULL
; mp
= mp
->b_cont
)
261 msg_len
+= MBLKL(mp
);
262 sctp_msg_hdr
->smh_msglen
= msg_len
;
264 /* User requested specific destination */
265 SCTP_SET_CHUNK_DEST(mproto
, fp
);
267 if (sctp
->sctp_state
>= SCTPS_COOKIE_ECHOED
&&
268 sid
>= sctp
->sctp_num_ostr
) {
269 /* Send sendfail event */
270 sctp_sendfail_event(sctp
, dupmsg(mproto
), SCTP_ERR_BAD_SID
,
278 sctp_sendfail_event(sctp
, dupmsg(mproto
),
279 SCTP_ERR_NO_USR_DATA
, B_FALSE
);
284 /* Add it to the unsent list */
285 if (sctp
->sctp_xmit_unsent
== NULL
) {
286 sctp
->sctp_xmit_unsent
= sctp
->sctp_xmit_unsent_tail
= mproto
;
288 sctp
->sctp_xmit_unsent_tail
->b_next
= mproto
;
289 sctp
->sctp_xmit_unsent_tail
= mproto
;
291 sctp
->sctp_unsent
+= msg_len
;
292 BUMP_LOCAL(sctp
->sctp_msgcount
);
294 * Notify sockfs if the tx queue is full.
296 if (SCTP_TXQ_LEN(sctp
) >= connp
->conn_sndbuf
) {
297 sctp
->sctp_txq_full
= 1;
298 sctp
->sctp_ulp_txq_full(sctp
->sctp_ulpd
, B_TRUE
);
300 if (sctp
->sctp_state
== SCTPS_ESTABLISHED
)
301 sctp_output(sctp
, UINT_MAX
);
312 * While there are messages on sctp_xmit_unsent, detach each one. For each:
313 * allocate space for the chunk header, fill in the data chunk, and fill in
314 * the chunk header. Then append it to sctp_xmit_tail.
315 * Return after appending as many bytes as required (bytes_to_send).
316 * We also return if we've appended one or more chunks, and find a subsequent
317 * unsent message is too big to fit in the segment.
320 sctp_chunkify(sctp_t
*sctp
, int mss
, int firstseg_len
, int bytes_to_send
)
326 mblk_t
*chunk_tail
= NULL
;
329 sctp_data_hdr_t
*sdc
;
330 mblk_t
*mdblk
= sctp
->sctp_xmit_unsent
;
334 sctp_msg_hdr_t
*msg_hdr
;
335 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
336 sctp_msg_hdr_t
*next_msg_hdr
;
338 int remaining_len
= mss
- firstseg_len
;
340 ASSERT(remaining_len
>= 0);
342 fp
= SCTP_CHUNK_DEST(mdblk
);
344 fp
= sctp
->sctp_current
;
346 xtralen
= sctp
->sctp_hdr_len
+ sctps
->sctps_wroff_xtra
+
349 xtralen
= sctp
->sctp_hdr6_len
+ sctps
->sctps_wroff_xtra
+
351 count
= chunksize
= remaining_len
- sizeof (*sdc
);
353 next_msg_hdr
= (sctp_msg_hdr_t
*)sctp
->sctp_xmit_unsent
->b_rptr
;
354 nextlen
= next_msg_hdr
->smh_msglen
;
356 * Will the entire next message fit in the current packet ?
357 * if not, leave it on the unsent list.
359 if ((firstseg_len
!= 0) && (nextlen
> remaining_len
))
362 chunk_mp
= mdblk
->b_cont
;
365 * If this partially chunked, we ignore the next one for now and
366 * use the one already present. For the unchunked bits, we use the
367 * length of the last chunk.
369 if (SCTP_IS_MSG_CHUNKED(mdblk
)) {
372 ASSERT(chunk_mp
->b_next
!= NULL
);
373 mdblk
->b_cont
= chunk_mp
->b_next
;
374 chunk_mp
->b_next
= NULL
;
375 SCTP_MSG_CLEAR_CHUNKED(mdblk
);
377 while (mp
->b_next
!= NULL
)
379 chunk_len
= ntohs(((sctp_data_hdr_t
*)mp
->b_rptr
)->sdh_len
);
380 if (fp
->sf_pmss
- chunk_len
> sizeof (*sdc
))
381 count
= chunksize
= fp
->sf_pmss
- chunk_len
;
383 count
= chunksize
= fp
->sf_pmss
;
384 count
= chunksize
= count
- sizeof (*sdc
);
386 msg_hdr
= (sctp_msg_hdr_t
*)mdblk
->b_rptr
;
387 if (SCTP_MSG_TO_BE_ABANDONED(mdblk
, msg_hdr
, sctp
)) {
388 sctp
->sctp_xmit_unsent
= mdblk
->b_next
;
389 if (sctp
->sctp_xmit_unsent
== NULL
)
390 sctp
->sctp_xmit_unsent_tail
= NULL
;
391 ASSERT(sctp
->sctp_unsent
>= msg_hdr
->smh_msglen
);
392 sctp
->sctp_unsent
-= msg_hdr
->smh_msglen
;
393 mdblk
->b_next
= NULL
;
394 BUMP_LOCAL(sctp
->sctp_prsctpdrop
);
396 * Update ULP the amount of queued data, which is
397 * sent-unack'ed + unsent.
399 if (!SCTP_IS_DETACHED(sctp
))
400 SCTP_TXQ_UPDATE(sctp
);
401 sctp_sendfail_event(sctp
, mdblk
, 0, B_FALSE
);
404 mdblk
->b_cont
= NULL
;
406 msg_hdr
= (sctp_msg_hdr_t
*)mdblk
->b_rptr
;
408 chunk_head
= chunk_mp
;
411 /* Skip as many mblk's as we need */
412 while (chunk_mp
!= NULL
&& ((count
- MBLKL(chunk_mp
)) >= 0)) {
413 count
-= MBLKL(chunk_mp
);
414 chunk_tail
= chunk_mp
;
415 chunk_mp
= chunk_mp
->b_cont
;
417 /* Split the chain, if needed */
418 if (chunk_mp
!= NULL
) {
420 mblk_t
*split_mp
= dupb(chunk_mp
);
422 if (split_mp
== NULL
) {
423 if (mdblk
->b_cont
== NULL
) {
424 mdblk
->b_cont
= chunk_head
;
426 SCTP_MSG_SET_CHUNKED(mdblk
);
427 ASSERT(chunk_head
->b_next
== NULL
);
428 chunk_head
->b_next
= mdblk
->b_cont
;
429 mdblk
->b_cont
= chunk_head
;
431 return (sctp
->sctp_xmit_tail
);
433 if (chunk_tail
!= NULL
) {
434 chunk_tail
->b_cont
= split_mp
;
435 chunk_tail
= chunk_tail
->b_cont
;
437 chunk_head
= chunk_tail
= split_mp
;
439 chunk_tail
->b_wptr
= chunk_tail
->b_rptr
+ count
;
440 chunk_mp
->b_rptr
= chunk_tail
->b_wptr
;
442 } else if (chunk_tail
== NULL
) {
445 chunk_tail
->b_cont
= NULL
;
448 /* Alloc chunk hdr, if needed */
449 if (DB_REF(chunk_head
) > 1 ||
450 ((intptr_t)chunk_head
->b_rptr
) & (SCTP_ALIGN
- 1) ||
451 MBLKHEAD(chunk_head
) < sizeof (*sdc
)) {
452 if ((chunk_hdr
= allocb(xtralen
, BPRI_MED
)) == NULL
) {
453 if (mdblk
->b_cont
== NULL
) {
454 if (chunk_mp
!= NULL
)
455 linkb(chunk_head
, chunk_mp
);
456 mdblk
->b_cont
= chunk_head
;
458 SCTP_MSG_SET_CHUNKED(mdblk
);
459 if (chunk_mp
!= NULL
)
460 linkb(chunk_head
, chunk_mp
);
461 ASSERT(chunk_head
->b_next
== NULL
);
462 chunk_head
->b_next
= mdblk
->b_cont
;
463 mdblk
->b_cont
= chunk_head
;
465 return (sctp
->sctp_xmit_tail
);
467 chunk_hdr
->b_rptr
+= xtralen
- sizeof (*sdc
);
468 chunk_hdr
->b_wptr
= chunk_hdr
->b_rptr
+ sizeof (*sdc
);
469 chunk_hdr
->b_cont
= chunk_head
;
471 chunk_hdr
= chunk_head
;
472 chunk_hdr
->b_rptr
-= sizeof (*sdc
);
474 ASSERT(chunk_hdr
->b_datap
->db_ref
== 1);
475 sdc
= (sctp_data_hdr_t
*)chunk_hdr
->b_rptr
;
476 sdc
->sdh_id
= CHUNK_DATA
;
478 sdc
->sdh_len
= htons(sizeof (*sdc
) + chunksize
- count
);
479 ASSERT(sdc
->sdh_len
);
480 sdc
->sdh_sid
= htons(msg_hdr
->smh_sid
);
482 * We defer assigning the SSN just before sending the chunk, else
483 * if we drop the chunk in sctp_get_msg_to_send(), we would need
484 * to send a Forward TSN to let the peer know. Some more comments
485 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
487 sdc
->sdh_payload_id
= msg_hdr
->smh_ppid
;
489 if (mdblk
->b_cont
== NULL
) {
490 mdblk
->b_cont
= chunk_hdr
;
491 SCTP_DATA_SET_BBIT(sdc
);
494 while (mp
->b_next
!= NULL
)
496 mp
->b_next
= chunk_hdr
;
499 bytes_to_send
-= (chunksize
- count
);
500 if (chunk_mp
!= NULL
) {
502 count
= chunksize
= fp
->sf_pmss
- sizeof (*sdc
);
505 SCTP_DATA_SET_EBIT(sdc
);
506 sctp
->sctp_xmit_unsent
= mdblk
->b_next
;
507 if (mdblk
->b_next
== NULL
) {
508 sctp
->sctp_xmit_unsent_tail
= NULL
;
510 mdblk
->b_next
= NULL
;
512 if (sctp
->sctp_xmit_tail
== NULL
) {
513 sctp
->sctp_xmit_head
= sctp
->sctp_xmit_tail
= mdblk
;
515 mp
= sctp
->sctp_xmit_tail
;
516 while (mp
->b_next
!= NULL
)
522 if (bytes_to_send
> 0 && sctp
->sctp_xmit_unsent
!= NULL
) {
523 mdblk
= sctp
->sctp_xmit_unsent
;
524 fp1
= SCTP_CHUNK_DEST(mdblk
);
526 fp1
= sctp
->sctp_current
;
528 size_t len
= MBLKL(mdblk
->b_cont
);
530 ((len
> fp
->sf_pmss
- sizeof (*sdc
)) ||
532 count
-= sizeof (*sdc
);
533 count
= chunksize
= count
- (count
& 0x3);
535 count
= chunksize
= fp
->sf_pmss
-
540 xtralen
= sctp
->sctp_hdr_len
;
542 xtralen
= sctp
->sctp_hdr6_len
;
543 xtralen
+= sctps
->sctps_wroff_xtra
+ sizeof (*sdc
);
544 count
= chunksize
= fp1
->sf_pmss
- sizeof (*sdc
);
549 return (sctp
->sctp_xmit_tail
);
553 sctp_free_msg(mblk_t
*ump
)
557 for (mp
= ump
->b_cont
; mp
; mp
= nmp
) {
559 mp
->b_next
= mp
->b_prev
= NULL
;
562 ASSERT(!ump
->b_prev
);
568 sctp_add_proto_hdr(sctp_t
*sctp
, sctp_faddr_t
*fp
, mblk_t
*mp
, int sacklen
,
573 int isv4
= fp
->sf_isv4
;
574 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
580 hdrlen
= sctp
->sctp_hdr_len
;
581 hdr
= sctp
->sctp_iphc
;
583 hdrlen
= sctp
->sctp_hdr6_len
;
584 hdr
= sctp
->sctp_iphc6
;
587 * A reject|blackhole could mean that the address is 'down'. Similarly,
588 * it is possible that the address went down, we tried to send an
589 * heartbeat and ended up setting fp->sf_saddr as unspec because we
590 * didn't have any usable source address. In either case
591 * sctp_get_dest() will try find an IRE, if available, and set
592 * the source address, if needed. If we still don't have any
593 * usable source address, fp->sf_state will be SCTP_FADDRS_UNREACH and
594 * we return EHOSTUNREACH.
596 ASSERT(fp
->sf_ixa
->ixa_ire
!= NULL
);
597 if ((fp
->sf_ixa
->ixa_ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) ||
598 SCTP_IS_ADDR_UNSPEC(fp
->sf_isv4
, fp
->sf_saddr
)) {
599 sctp_get_dest(sctp
, fp
);
600 if (fp
->sf_state
== SCTP_FADDRS_UNREACH
) {
602 *error
= EHOSTUNREACH
;
606 /* Copy in IP header. */
607 if ((mp
->b_rptr
- mp
->b_datap
->db_base
) <
608 (sctps
->sctps_wroff_xtra
+ hdrlen
+ sacklen
) || DB_REF(mp
) > 2) {
612 * This can happen if IP headers are adjusted after
613 * data was moved into chunks, or during retransmission,
614 * or things like snoop is running.
616 nmp
= allocb(sctps
->sctps_wroff_xtra
+ hdrlen
+ sacklen
,
623 nmp
->b_rptr
+= sctps
->sctps_wroff_xtra
;
624 nmp
->b_wptr
= nmp
->b_rptr
+ hdrlen
+ sacklen
;
628 mp
->b_rptr
-= (hdrlen
+ sacklen
);
630 bcopy(hdr
, mp
->b_rptr
, hdrlen
);
632 sctp_fill_sack(sctp
, mp
->b_rptr
+ hdrlen
, sacklen
);
634 if (fp
!= sctp
->sctp_current
) {
635 /* change addresses in header */
637 ipha_t
*iph
= (ipha_t
*)mp
->b_rptr
;
639 IN6_V4MAPPED_TO_IPADDR(&fp
->sf_faddr
, iph
->ipha_dst
);
640 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp
->sf_saddr
)) {
641 IN6_V4MAPPED_TO_IPADDR(&fp
->sf_saddr
,
643 } else if (sctp
->sctp_bound_to_all
) {
644 iph
->ipha_src
= INADDR_ANY
;
647 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
649 ip6h
->ip6_dst
= fp
->sf_faddr
;
650 if (!IN6_IS_ADDR_UNSPECIFIED(&fp
->sf_saddr
)) {
651 ip6h
->ip6_src
= fp
->sf_saddr
;
652 } else if (sctp
->sctp_bound_to_all
) {
653 ip6h
->ip6_src
= ipv6_all_zeros
;
661 * SCTP requires every chunk to be padded so that the total length
662 * is a multiple of SCTP_ALIGN. This function returns a mblk with
663 * the specified pad length.
666 sctp_get_padding(sctp_t
*sctp
, int pad
)
670 ASSERT(pad
< SCTP_ALIGN
);
671 ASSERT(sctp
->sctp_pad_mp
!= NULL
);
672 if ((fill
= dupb(sctp
->sctp_pad_mp
)) != NULL
) {
678 * The memory saving path of reusing the sctp_pad_mp
679 * fails may be because it has been dupb() too
680 * many times (DBLK_REFMAX). Use the memory consuming
681 * path of allocating the pad mblk.
683 if ((fill
= allocb(SCTP_ALIGN
, BPRI_MED
)) != NULL
) {
684 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */
685 *(int32_t *)fill
->b_rptr
= 0;
692 sctp_find_fast_rexmit_mblks(sctp_t
*sctp
, int *total
, sctp_faddr_t
**fp
)
695 mblk_t
*start_mp
= NULL
;
696 mblk_t
*end_mp
= NULL
;
699 sctp_data_hdr_t
*sdh
;
702 sctp_msg_hdr_t
*msg_hdr
;
703 sctp_faddr_t
*old_fp
= NULL
;
704 sctp_faddr_t
*chunk_fp
;
705 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
707 for (meta
= sctp
->sctp_xmit_head
; meta
!= NULL
; meta
= meta
->b_next
) {
708 msg_hdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
709 if (SCTP_IS_MSG_ABANDONED(meta
) ||
710 SCTP_MSG_TO_BE_ABANDONED(meta
, msg_hdr
, sctp
)) {
713 for (mp
= meta
->b_cont
; mp
!= NULL
; mp
= mp
->b_next
) {
714 if (SCTP_CHUNK_WANT_REXMIT(mp
)) {
716 * Use the same peer address to do fast
717 * retransmission. If the original peer
718 * address is dead, switch to the current
719 * one. Record the old one so that we
720 * will pick the chunks sent to the old
721 * one for fast retransmission.
723 chunk_fp
= SCTP_CHUNK_DEST(mp
);
726 if ((*fp
)->sf_state
!=
729 *fp
= sctp
->sctp_current
;
731 } else if (old_fp
== NULL
&& *fp
!= chunk_fp
) {
733 } else if (old_fp
!= NULL
&&
734 old_fp
!= chunk_fp
) {
738 sdh
= (sctp_data_hdr_t
*)mp
->b_rptr
;
739 msglen
= ntohs(sdh
->sdh_len
);
740 if ((extra
= msglen
& (SCTP_ALIGN
- 1)) != 0) {
741 extra
= SCTP_ALIGN
- extra
;
745 * We still return at least the first message
746 * even if that message cannot fit in as
747 * PMTU may have changed.
749 if (*total
+ msglen
+ extra
>
750 (*fp
)->sf_pmss
&& start_mp
!= NULL
) {
753 if ((nmp
= dupmsg(mp
)) == NULL
)
756 fill
= sctp_get_padding(sctp
, extra
);
763 SCTPS_BUMP_MIB(sctps
, sctpOutFastRetrans
);
764 BUMP_LOCAL(sctp
->sctp_rxtchunks
);
765 SCTP_CHUNK_CLEAR_REXMIT(mp
);
766 if (start_mp
== NULL
) {
772 *total
+= msglen
+ extra
;
773 dprint(2, ("sctp_find_fast_rexmit_mblks: "
774 "tsn %x\n", sdh
->sdh_tsn
));
778 /* Clear the flag as there is no more message to be fast rexmitted. */
779 sctp
->sctp_chk_fast_rexmit
= B_FALSE
;
783 /* A debug function just to make sure that a mblk chain is not broken */
786 sctp_verify_chain(mblk_t
*head
, mblk_t
*tail
)
790 if (head
== NULL
|| tail
== NULL
)
802 * Gets the next unsent chunk to transmit. Messages that are abandoned are
803 * skipped. A message can be abandoned if it has a non-zero timetolive and
804 * transmission has not yet started or if it is a partially reliable
805 * message and its time is up (assuming we are PR-SCTP aware).
806 * We only return a chunk if it will fit entirely in the current packet.
807 * 'cansend' is used to determine if need to try and chunkify messages from
808 * the unsent list, if any, and also as an input to sctp_chunkify() if so.
810 * firstseg_len indicates the space already used, cansend represents remaining
811 * space in the window, ((sf_pmss - firstseg_len) can therefore reasonably
812 * be used to compute the cansend arg).
815 sctp_get_msg_to_send(sctp_t
*sctp
, mblk_t
**mp
, mblk_t
*meta
, int *error
,
816 int32_t firstseg_len
, uint32_t cansend
, sctp_faddr_t
*fp
)
819 sctp_msg_hdr_t
*msg_hdr
;
823 ASSERT(error
!= NULL
&& mp
!= NULL
);
826 ASSERT(sctp
->sctp_current
!= NULL
);
829 while (meta
!= NULL
) {
830 tmp_meta
= meta
->b_next
;
831 msg_hdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
833 if (SCTP_IS_MSG_ABANDONED(meta
))
835 if (!SCTP_MSG_TO_BE_ABANDONED(meta
, msg_hdr
, sctp
)) {
836 while (mp1
!= NULL
) {
837 if (SCTP_CHUNK_CANSEND(mp1
)) {
840 ASSERT(sctp_verify_chain(
841 sctp
->sctp_xmit_head
, meta
));
850 * If we come here and the first chunk is sent, then we
851 * we are PR-SCTP aware, in which case if the cumulative
852 * TSN has moved upto or beyond the first chunk (which
853 * means all the previous messages have been cumulative
854 * SACK'd), then we send a Forward TSN with the last
855 * chunk that was sent in this message. If we can't send
856 * a Forward TSN because previous non-abandoned messages
857 * have not been acked then we will defer the Forward TSN
858 * to sctp_rexmit() or sctp_cumack().
860 if (SCTP_CHUNK_ISSENT(mp1
)) {
861 *error
= sctp_check_abandoned_msg(sctp
, meta
);
864 ASSERT(sctp_verify_chain(sctp
->sctp_xmit_head
,
865 sctp
->sctp_xmit_tail
));
871 BUMP_LOCAL(sctp
->sctp_prsctpdrop
);
872 ASSERT(sctp
->sctp_unsent
>= msg_hdr
->smh_msglen
);
873 if (meta
->b_prev
== NULL
) {
874 ASSERT(sctp
->sctp_xmit_head
== meta
);
875 sctp
->sctp_xmit_head
= tmp_meta
;
876 if (sctp
->sctp_xmit_tail
== meta
)
877 sctp
->sctp_xmit_tail
= tmp_meta
;
879 if (tmp_meta
!= NULL
)
880 tmp_meta
->b_prev
= NULL
;
881 } else if (meta
->b_next
== NULL
) {
882 if (sctp
->sctp_xmit_tail
== meta
)
883 sctp
->sctp_xmit_tail
= meta
->b_prev
;
884 meta
->b_prev
->b_next
= NULL
;
887 meta
->b_prev
->b_next
= tmp_meta
;
888 tmp_meta
->b_prev
= meta
->b_prev
;
889 if (sctp
->sctp_xmit_tail
== meta
)
890 sctp
->sctp_xmit_tail
= tmp_meta
;
894 sctp
->sctp_unsent
-= msg_hdr
->smh_msglen
;
896 * Update ULP the amount of queued data, which is
897 * sent-unack'ed + unsent.
899 if (!SCTP_IS_DETACHED(sctp
))
900 SCTP_TXQ_UPDATE(sctp
);
901 sctp_sendfail_event(sctp
, meta
, 0, B_TRUE
);
905 /* chunkify, if needed */
906 if (cansend
> 0 && sctp
->sctp_xmit_unsent
!= NULL
) {
907 ASSERT(sctp
->sctp_unsent
> 0);
909 fp
= SCTP_CHUNK_DEST(sctp
->sctp_xmit_unsent
);
910 if (fp
== NULL
|| fp
->sf_state
!= SCTP_FADDRS_ALIVE
)
911 fp
= sctp
->sctp_current
;
914 * If user specified destination, try to honor that.
916 fp1
= SCTP_CHUNK_DEST(sctp
->sctp_xmit_unsent
);
917 if (fp1
!= NULL
&& fp1
->sf_state
== SCTP_FADDRS_ALIVE
&&
922 meta
= sctp_chunkify(sctp
, fp
->sf_pmss
, firstseg_len
, cansend
);
926 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
927 * new chunk(s) to the tail, so we need to skip the
928 * sctp_xmit_tail, which would have already been processed.
929 * This could happen when there is unacked chunks, but
930 * nothing new to send.
931 * When sctp_chunkify() is called when the transmit queue
932 * is empty then we need to start from sctp_xmit_tail.
934 if (SCTP_CHUNK_ISSENT(sctp
->sctp_xmit_tail
->b_cont
)) {
936 mp1
= sctp
->sctp_xmit_tail
->b_cont
;
937 while (mp1
!= NULL
) {
938 ASSERT(!SCTP_CHUNK_CANSEND(mp1
));
942 if ((meta
= sctp
->sctp_xmit_tail
->b_next
) == NULL
)
949 ASSERT(sctp_verify_chain(sctp
->sctp_xmit_head
, sctp
->sctp_xmit_tail
));
955 sctp_fast_rexmit(sctp_t
*sctp
)
959 sctp_faddr_t
*fp
= NULL
;
960 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
962 ASSERT(sctp
->sctp_xmit_head
!= NULL
);
963 mp
= sctp_find_fast_rexmit_mblks(sctp
, &pktlen
, &fp
);
965 SCTP_KSTAT(sctps
, sctp_fr_not_found
);
968 if ((head
= sctp_add_proto_hdr(sctp
, fp
, mp
, 0, NULL
)) == NULL
) {
970 SCTP_KSTAT(sctps
, sctp_fr_add_hdr
);
973 if ((pktlen
> fp
->sf_pmss
) && fp
->sf_isv4
) {
974 ipha_t
*iph
= (ipha_t
*)head
->b_rptr
;
976 iph
->ipha_fragment_offset_and_flags
= 0;
979 sctp_set_iplen(sctp
, head
, fp
->sf_ixa
);
980 (void) conn_ip_output(head
, fp
->sf_ixa
);
981 BUMP_LOCAL(sctp
->sctp_opkts
);
982 sctp
->sctp_active
= fp
->sf_lastactive
= ddi_get_lbolt64();
986 sctp_output(sctp_t
*sctp
, uint_t num_pkt
)
991 mblk_t
*meta
= sctp
->sctp_xmit_tail
;
1001 int64_t now
= LBOLT_FASTPATH64
;
1004 sctp_data_hdr_t
*sdc
;
1006 boolean_t notsent
= B_TRUE
;
1007 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
1010 if (sctp
->sctp_ftsn
== sctp
->sctp_lastacked
+ 1) {
1013 /* send a SACK chunk */
1014 sacklen
= sizeof (sctp_chunk_hdr_t
) +
1015 sizeof (sctp_sack_chunk_t
) +
1016 (sizeof (sctp_sack_frag_t
) * sctp
->sctp_sack_gaps
);
1017 lfp
= sctp
->sctp_lastdata
;
1018 ASSERT(lfp
!= NULL
);
1019 if (lfp
->sf_state
!= SCTP_FADDRS_ALIVE
)
1020 lfp
= sctp
->sctp_current
;
1023 cansend
= sctp
->sctp_frwnd
;
1024 if (sctp
->sctp_unsent
< cansend
)
1025 cansend
= sctp
->sctp_unsent
;
1028 * Start persist timer if unable to send or when
1029 * trying to send into a zero window. This timer
1030 * ensures the blocked send attempt is retried.
1032 if ((cansend
< sctp
->sctp_current
->sf_pmss
/ 2) &&
1033 (sctp
->sctp_unacked
!= 0) &&
1034 (sctp
->sctp_unacked
< sctp
->sctp_current
->sf_pmss
) &&
1035 !sctp
->sctp_ndelay
||
1036 (cansend
== 0 && sctp
->sctp_unacked
== 0 &&
1037 sctp
->sctp_unsent
!= 0)) {
1039 fp
= sctp
->sctp_current
;
1044 while (cansend
> 0 && num_pkt
-- != 0) {
1048 * Find first segment eligible for transmit.
1050 while (mp
!= NULL
) {
1051 if (SCTP_CHUNK_CANSEND(mp
))
1056 meta
= sctp_get_msg_to_send(sctp
, &mp
,
1057 meta
== NULL
? NULL
: meta
->b_next
, &error
, sacklen
,
1059 if (error
!= 0 || meta
== NULL
) {
1061 fp
= sctp
->sctp_current
;
1064 sctp
->sctp_xmit_tail
= meta
;
1067 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
1068 seglen
= ntohs(sdc
->sdh_len
);
1069 xtralen
= sizeof (*sdc
);
1070 chunklen
= seglen
- xtralen
;
1075 if (chunklen
> cansend
) {
1077 fp
= SCTP_CHUNK_DEST(meta
);
1078 if (fp
== NULL
|| fp
->sf_state
!= SCTP_FADDRS_ALIVE
)
1079 fp
= sctp
->sctp_current
;
1082 if ((extra
= seglen
& (SCTP_ALIGN
- 1)) != 0)
1083 extra
= SCTP_ALIGN
- extra
;
1086 * Pick destination address, and check cwnd.
1088 if (sacklen
> 0 && (seglen
+ extra
<= lfp
->sf_cwnd
-
1090 (seglen
+ sacklen
+ extra
<= lfp
->sf_pmss
)) {
1092 * Only include SACK chunk if it can be bundled
1093 * with a data chunk, and sent to sctp_lastdata.
1095 pathmax
= lfp
->sf_cwnd
- lfp
->sf_suna
;
1098 if ((nmp
= dupmsg(mp
)) == NULL
) {
1102 SCTP_CHUNK_CLEAR_FLAGS(nmp
);
1103 head
= sctp_add_proto_hdr(sctp
, fp
, nmp
, sacklen
,
1107 * If none of the source addresses are
1108 * available (i.e error == EHOSTUNREACH),
1109 * pretend we have sent the data. We will
1110 * eventually time out trying to retramsmit
1111 * the data if the interface never comes up.
1112 * If we have already sent some stuff (i.e.,
1113 * notsent is B_FALSE) then we are fine, else
1114 * just mark this packet as sent.
1116 if (notsent
&& error
== EHOSTUNREACH
) {
1117 SCTP_CHUNK_SENT(sctp
, mp
, sdc
,
1118 fp
, chunklen
, meta
);
1121 SCTP_KSTAT(sctps
, sctp_output_failed
);
1128 fp
= SCTP_CHUNK_DEST(meta
);
1129 if (fp
== NULL
|| fp
->sf_state
!= SCTP_FADDRS_ALIVE
)
1130 fp
= sctp
->sctp_current
;
1132 * If we haven't sent data to this destination for
1133 * a while, do slow start again.
1135 if (now
- fp
->sf_lastactive
> fp
->sf_rto
) {
1136 SET_CWND(fp
, fp
->sf_pmss
,
1137 sctps
->sctps_slow_start_after_idle
);
1140 pathmax
= fp
->sf_cwnd
- fp
->sf_suna
;
1141 if (seglen
+ extra
> pathmax
) {
1145 if ((nmp
= dupmsg(mp
)) == NULL
) {
1149 SCTP_CHUNK_CLEAR_FLAGS(nmp
);
1150 head
= sctp_add_proto_hdr(sctp
, fp
, nmp
, 0, &error
);
1153 * If none of the source addresses are
1154 * available (i.e error == EHOSTUNREACH),
1155 * pretend we have sent the data. We will
1156 * eventually time out trying to retramsmit
1157 * the data if the interface never comes up.
1158 * If we have already sent some stuff (i.e.,
1159 * notsent is B_FALSE) then we are fine, else
1160 * just mark this packet as sent.
1162 if (notsent
&& error
== EHOSTUNREACH
) {
1163 SCTP_CHUNK_SENT(sctp
, mp
, sdc
,
1164 fp
, chunklen
, meta
);
1167 SCTP_KSTAT(sctps
, sctp_output_failed
);
1171 fp
->sf_lastactive
= now
;
1172 if (pathmax
> fp
->sf_pmss
)
1173 pathmax
= fp
->sf_pmss
;
1174 SCTP_CHUNK_SENT(sctp
, mp
, sdc
, fp
, chunklen
, meta
);
1178 * Use this chunk to measure RTT?
1179 * Must not be a retransmision of an earlier chunk,
1180 * ensure the tsn is current.
1182 tsn
= ntohl(sdc
->sdh_tsn
);
1183 if (sctp
->sctp_out_time
== 0 && tsn
== (sctp
->sctp_ltsn
- 1)) {
1184 sctp
->sctp_out_time
= now
;
1185 sctp
->sctp_rtt_tsn
= tsn
;
1188 fill
= sctp_get_padding(sctp
, extra
);
1198 * Bundle chunks. We linkb() the chunks together to send
1199 * downstream in a single packet.
1200 * Partial chunks MUST NOT be bundled with full chunks, so we
1201 * rely on sctp_get_msg_to_send() to only return messages that
1202 * will fit entirely in the current packet.
1204 while (seglen
< pathmax
) {
1206 int32_t new_xtralen
;
1208 while (mp
!= NULL
) {
1209 if (SCTP_CHUNK_CANSEND(mp
))
1214 meta
= sctp_get_msg_to_send(sctp
, &mp
,
1215 meta
->b_next
, &error
, seglen
,
1216 (seglen
- xtralen
) >= cansend
? 0 :
1217 cansend
- seglen
, fp
);
1220 /* If no more eligible chunks, cease bundling */
1223 sctp
->sctp_xmit_tail
= meta
;
1226 if (!SCTP_CHUNK_ISSENT(mp
) && SCTP_CHUNK_DEST(meta
) &&
1227 fp
!= SCTP_CHUNK_DEST(meta
)) {
1230 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
1231 chunklen
= ntohs(sdc
->sdh_len
);
1232 if ((extra
= chunklen
& (SCTP_ALIGN
- 1)) != 0)
1233 extra
= SCTP_ALIGN
- extra
;
1235 new_len
= seglen
+ chunklen
;
1236 new_xtralen
= xtralen
+ sizeof (*sdc
);
1237 chunklen
-= sizeof (*sdc
);
1239 if (new_len
- new_xtralen
> cansend
||
1240 new_len
+ extra
> pathmax
) {
1243 if ((nmp
= dupmsg(mp
)) == NULL
)
1246 fill
= sctp_get_padding(sctp
, extra
);
1257 xtralen
= new_xtralen
;
1258 SCTP_CHUNK_CLEAR_FLAGS(nmp
);
1259 SCTP_CHUNK_SENT(sctp
, mp
, sdc
, fp
, chunklen
, meta
);
1263 if ((seglen
> fp
->sf_pmss
) && fp
->sf_isv4
) {
1264 ipha_t
*iph
= (ipha_t
*)head
->b_rptr
;
1267 * Path MTU is different from what we thought it would
1268 * be when we created chunks, or IP headers have grown.
1269 * Need to clear the DF bit.
1271 iph
->ipha_fragment_offset_and_flags
= 0;
1274 ASSERT(cansend
>= seglen
- pad
- xtralen
);
1275 cansend
-= (seglen
- pad
- xtralen
);
1276 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1277 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1278 seglen
- xtralen
, ntohl(sdc
->sdh_tsn
),
1279 ntohs(sdc
->sdh_ssn
), (void *)fp
, sctp
->sctp_frwnd
,
1280 cansend
, sctp
->sctp_lastack_rxd
));
1281 sctp_set_iplen(sctp
, head
, fp
->sf_ixa
);
1282 (void) conn_ip_output(head
, fp
->sf_ixa
);
1283 BUMP_LOCAL(sctp
->sctp_opkts
);
1284 /* arm rto timer (if not set) */
1285 if (!fp
->sf_timer_running
)
1286 SCTP_FADDR_TIMER_RESTART(sctp
, fp
, fp
->sf_rto
);
1289 sctp
->sctp_active
= now
;
1292 /* arm persist timer (if rto timer not set) */
1293 if (!fp
->sf_timer_running
)
1294 SCTP_FADDR_TIMER_RESTART(sctp
, fp
, fp
->sf_rto
);
1300 * The following two functions initialize and destroy the cache
1301 * associated with the sets used for PR-SCTP.
1304 sctp_ftsn_sets_init(void)
1306 sctp_kmem_ftsn_set_cache
= kmem_cache_create("sctp_ftsn_set_cache",
1307 sizeof (sctp_ftsn_set_t
), 0, NULL
, NULL
, NULL
, NULL
,
1312 sctp_ftsn_sets_fini(void)
1314 kmem_cache_destroy(sctp_kmem_ftsn_set_cache
);
1318 /* Free PR-SCTP sets */
1320 sctp_free_ftsn_set(sctp_ftsn_set_t
*s
)
1327 kmem_cache_free(sctp_kmem_ftsn_set_cache
, s
);
1333 * Given a message meta block, meta, this routine creates or modifies
1334 * the set that will be used to generate a Forward TSN chunk. If the
1335 * entry for stream id, sid, for this message already exists, the
1336 * sequence number, ssn, is updated if it is greater than the existing
1337 * one. If an entry for this sid does not exist, one is created if
1338 * the size does not exceed fp->sf_pmss. We return false in case
1342 sctp_add_ftsn_set(sctp_ftsn_set_t
**s
, sctp_faddr_t
*fp
, mblk_t
*meta
,
1343 uint_t
*nsets
, uint32_t *slen
)
1346 sctp_msg_hdr_t
*msg_hdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
1347 uint16_t sid
= htons(msg_hdr
->smh_sid
);
1348 /* msg_hdr->smh_ssn is already in NBO */
1349 uint16_t ssn
= msg_hdr
->smh_ssn
;
1351 ASSERT(s
!= NULL
&& nsets
!= NULL
);
1352 ASSERT((*nsets
== 0 && *s
== NULL
) || (*nsets
> 0 && *s
!= NULL
));
1355 ASSERT((*slen
+ sizeof (uint32_t)) <= fp
->sf_pmss
);
1356 *s
= kmem_cache_alloc(sctp_kmem_ftsn_set_cache
, KM_NOSLEEP
);
1359 (*s
)->ftsn_entries
.ftsn_sid
= sid
;
1360 (*s
)->ftsn_entries
.ftsn_ssn
= ssn
;
1363 *slen
+= sizeof (uint32_t);
1366 for (p
= *s
; p
->next
!= NULL
; p
= p
->next
) {
1367 if (p
->ftsn_entries
.ftsn_sid
== sid
) {
1368 if (SSN_GT(ssn
, p
->ftsn_entries
.ftsn_ssn
))
1369 p
->ftsn_entries
.ftsn_ssn
= ssn
;
1374 if (p
->ftsn_entries
.ftsn_sid
== sid
) {
1375 if (SSN_GT(ssn
, p
->ftsn_entries
.ftsn_ssn
))
1376 p
->ftsn_entries
.ftsn_ssn
= ssn
;
1378 if ((*slen
+ sizeof (uint32_t)) > fp
->sf_pmss
)
1380 p
->next
= kmem_cache_alloc(sctp_kmem_ftsn_set_cache
,
1382 if (p
->next
== NULL
)
1385 p
->ftsn_entries
.ftsn_sid
= sid
;
1386 p
->ftsn_entries
.ftsn_ssn
= ssn
;
1389 *slen
+= sizeof (uint32_t);
1395 * Given a set of stream id - sequence number pairs, this routing creates
1396 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1397 * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1398 * will add the IP/SCTP header.
1401 sctp_make_ftsn_chunk(sctp_t
*sctp
, sctp_faddr_t
*fp
, sctp_ftsn_set_t
*sets
,
1402 uint_t nsets
, uint32_t seglen
)
1405 sctp_chunk_hdr_t
*ch_hdr
;
1409 ftsn_entry_t
*ftsn_entry
;
1410 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
1412 seglen
+= sizeof (sctp_chunk_hdr_t
);
1414 xtralen
= sctp
->sctp_hdr_len
+ sctps
->sctps_wroff_xtra
;
1416 xtralen
= sctp
->sctp_hdr6_len
+ sctps
->sctps_wroff_xtra
;
1417 ftsn_mp
= allocb(xtralen
+ seglen
, BPRI_MED
);
1418 if (ftsn_mp
== NULL
)
1420 ftsn_mp
->b_rptr
+= xtralen
;
1421 ftsn_mp
->b_wptr
= ftsn_mp
->b_rptr
+ seglen
;
1423 ch_hdr
= (sctp_chunk_hdr_t
*)ftsn_mp
->b_rptr
;
1424 ch_hdr
->sch_id
= CHUNK_FORWARD_TSN
;
1425 ch_hdr
->sch_flags
= 0;
1427 * The cast here should not be an issue since seglen is
1428 * the length of the Forward TSN chunk.
1430 schlen
= (uint16_t)seglen
;
1431 U16_TO_ABE16(schlen
, &(ch_hdr
->sch_len
));
1433 advtsn
= (uint32_t *)(ch_hdr
+ 1);
1434 U32_TO_ABE32(sctp
->sctp_adv_pap
, advtsn
);
1435 ftsn_entry
= (ftsn_entry_t
*)(advtsn
+ 1);
1437 ASSERT((uchar_t
*)&ftsn_entry
[1] <= ftsn_mp
->b_wptr
);
1438 ftsn_entry
->ftsn_sid
= sets
->ftsn_entries
.ftsn_sid
;
1439 ftsn_entry
->ftsn_ssn
= sets
->ftsn_entries
.ftsn_ssn
;
1448 * Given a starting message, the routine steps through all the
1449 * messages whose TSN is less than sctp->sctp_adv_pap and creates
1450 * ftsn sets. The ftsn sets is then used to create an Forward TSN
1451 * chunk. All the messages, that have chunks that are included in the
1452 * ftsn sets, are flagged abandonded. If a message is partially sent
1453 * and is deemed abandoned, all remaining unsent chunks are marked
1454 * abandoned and are deducted from sctp_unsent.
1457 sctp_make_ftsns(sctp_t
*sctp
, mblk_t
*meta
, mblk_t
*mp
, mblk_t
**nmp
,
1458 sctp_faddr_t
*fp
, uint32_t *seglen
)
1461 mblk_t
*mp_head
= mp
;
1462 mblk_t
*meta_head
= meta
;
1464 sctp_ftsn_set_t
*sets
= NULL
;
1467 sctp_data_hdr_t
*sdc
;
1469 uint32_t adv_pap
= sctp
->sctp_adv_pap
;
1470 uint32_t unsent
= 0;
1472 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
1474 *seglen
= sizeof (uint32_t);
1476 sdc
= (sctp_data_hdr_t
*)mp1
->b_rptr
;
1477 while (meta
!= NULL
&&
1478 SEQ_GEQ(sctp
->sctp_adv_pap
, ntohl(sdc
->sdh_tsn
))) {
1480 * Skip adding FTSN sets for un-ordered messages as they do
1483 ubit
= SCTP_DATA_GET_UBIT(sdc
);
1485 !sctp_add_ftsn_set(&sets
, fp
, meta
, &nsets
, seglen
)) {
1487 sctp
->sctp_adv_pap
= adv_pap
;
1490 while (mp1
!= NULL
&& SCTP_CHUNK_ISSENT(mp1
)) {
1491 sdc
= (sctp_data_hdr_t
*)mp1
->b_rptr
;
1492 adv_pap
= ntohl(sdc
->sdh_tsn
);
1495 meta
= meta
->b_next
;
1498 if (!SCTP_CHUNK_ISSENT(mp1
))
1500 sdc
= (sctp_data_hdr_t
*)mp1
->b_rptr
;
1505 * Can't compare with sets == NULL, since we don't add any
1506 * sets for un-ordered messages.
1508 if (meta
== meta_head
)
1510 *nmp
= sctp_make_ftsn_chunk(sctp
, fp
, sets
, nsets
, *seglen
);
1511 sctp_free_ftsn_set(sets
);
1514 if (sctp
->sctp_ftsn
== sctp
->sctp_lastacked
+ 1) {
1517 sacklen
= sizeof (sctp_chunk_hdr_t
) +
1518 sizeof (sctp_sack_chunk_t
) +
1519 (sizeof (sctp_sack_frag_t
) * sctp
->sctp_sack_gaps
);
1520 if (*seglen
+ sacklen
> sctp
->sctp_lastdata
->sf_pmss
) {
1521 /* piggybacked SACK doesn't fit */
1524 fp
= sctp
->sctp_lastdata
;
1527 head
= sctp_add_proto_hdr(sctp
, fp
, *nmp
, sacklen
, NULL
);
1531 SCTP_KSTAT(sctps
, sctp_send_ftsn_failed
);
1538 * XXXNeed to optimise this, the reason it is done here is so
1539 * that we don't have to undo in case of failure.
1542 sdc
= (sctp_data_hdr_t
*)mp1
->b_rptr
;
1543 while (meta_head
!= NULL
&&
1544 SEQ_GEQ(sctp
->sctp_adv_pap
, ntohl(sdc
->sdh_tsn
))) {
1545 if (!SCTP_IS_MSG_ABANDONED(meta_head
))
1546 SCTP_MSG_SET_ABANDONED(meta_head
);
1547 while (mp1
!= NULL
&& SCTP_CHUNK_ISSENT(mp1
)) {
1548 sdc
= (sctp_data_hdr_t
*)mp1
->b_rptr
;
1549 if (!SCTP_CHUNK_ISACKED(mp1
)) {
1550 clen
= ntohs(sdc
->sdh_len
) - sizeof (*sdc
);
1551 SCTP_CHUNK_SENT(sctp
, mp1
, sdc
, fp
, clen
,
1556 while (mp1
!= NULL
) {
1557 sdc
= (sctp_data_hdr_t
*)mp1
->b_rptr
;
1558 if (!SCTP_CHUNK_ABANDONED(mp1
)) {
1559 ASSERT(!SCTP_CHUNK_ISSENT(mp1
));
1560 unsent
+= ntohs(sdc
->sdh_len
) - sizeof (*sdc
);
1561 SCTP_ABANDON_CHUNK(mp1
);
1565 meta_head
= meta_head
->b_next
;
1566 if (meta_head
!= NULL
) {
1567 mp1
= meta_head
->b_cont
;
1568 if (!SCTP_CHUNK_ISSENT(mp1
))
1570 sdc
= (sctp_data_hdr_t
*)mp1
->b_rptr
;
1574 ASSERT(sctp
->sctp_unsent
>= unsent
);
1575 sctp
->sctp_unsent
-= unsent
;
1577 * Update ULP the amount of queued data, which is
1578 * sent-unack'ed + unsent.
1580 if (!SCTP_IS_DETACHED(sctp
))
1581 SCTP_TXQ_UPDATE(sctp
);
1586 * This function steps through messages starting at meta and checks if
1587 * the message is abandoned. It stops when it hits an unsent chunk or
1588 * a message that has all its chunk acked. This is the only place
1589 * where the sctp_adv_pap is moved forward to indicated abandoned
1593 sctp_check_adv_ack_pt(sctp_t
*sctp
, mblk_t
*meta
, mblk_t
*mp
)
1595 uint32_t tsn
= sctp
->sctp_adv_pap
;
1596 sctp_data_hdr_t
*sdc
;
1597 sctp_msg_hdr_t
*msg_hdr
;
1600 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
1601 ASSERT(SEQ_GT(ntohl(sdc
->sdh_tsn
), sctp
->sctp_lastack_rxd
));
1602 msg_hdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
1603 if (!SCTP_IS_MSG_ABANDONED(meta
) &&
1604 !SCTP_MSG_TO_BE_ABANDONED(meta
, msg_hdr
, sctp
)) {
1607 while (meta
!= NULL
) {
1608 while (mp
!= NULL
&& SCTP_CHUNK_ISSENT(mp
)) {
1609 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
1610 tsn
= ntohl(sdc
->sdh_tsn
);
1616 * We continue checking for successive messages only if there
1617 * is a chunk marked for retransmission. Else, we might
1618 * end up sending FTSN prematurely for chunks that have been
1619 * sent, but not yet acked.
1621 if ((meta
= meta
->b_next
) != NULL
) {
1622 msg_hdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
1623 if (!SCTP_IS_MSG_ABANDONED(meta
) &&
1624 !SCTP_MSG_TO_BE_ABANDONED(meta
, msg_hdr
, sctp
)) {
1627 for (mp
= meta
->b_cont
; mp
!= NULL
; mp
= mp
->b_next
) {
1628 if (!SCTP_CHUNK_ISSENT(mp
)) {
1629 sctp
->sctp_adv_pap
= tsn
;
1632 if (SCTP_CHUNK_WANT_REXMIT(mp
))
1639 sctp
->sctp_adv_pap
= tsn
;
1644 * Determine if we should bundle a data chunk with the chunk being
1645 * retransmitted. We bundle if
1647 * - the chunk is sent to the same destination and unack'ed.
1651 * - the chunk is unsent, i.e. new data.
1653 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \
1654 (!SCTP_CHUNK_ABANDONED((mp)) && \
1655 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \
1656 !SCTP_CHUNK_ISACKED(mp))) || \
1657 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1658 SCTP_CHUNK_FLAG_SENT)))
1661 * Retransmit first segment which hasn't been acked with cumtsn or send
1662 * a Forward TSN chunk, if appropriate.
1665 sctp_rexmit(sctp_t
*sctp
, sctp_faddr_t
*oldfp
)
1670 mblk_t
*meta
= sctp
->sctp_xmit_head
;
1672 uint32_t seglen
= 0;
1676 sctp_data_hdr_t
*sdc
;
1678 uint32_t adv_pap
= sctp
->sctp_adv_pap
;
1679 boolean_t do_ftsn
= B_FALSE
;
1680 boolean_t ftsn_check
= B_TRUE
;
1681 uint32_t first_ua_tsn
;
1682 sctp_msg_hdr_t
*mhdr
;
1683 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
1686 while (meta
!= NULL
) {
1687 for (mp
= meta
->b_cont
; mp
!= NULL
; mp
= mp
->b_next
) {
1690 if (!SCTP_CHUNK_ISSENT(mp
))
1693 * We break in the following cases -
1695 * if the advanced peer ack point includes the next
1696 * chunk to be retransmited - possibly the Forward
1699 * if we are PRSCTP aware and the next chunk to be
1700 * retransmitted is now abandoned
1702 * if the next chunk to be retransmitted is for
1703 * the dest on which the timer went off. (this
1704 * message is not abandoned).
1706 * We check for Forward TSN only for the first
1707 * eligible chunk to be retransmitted. The reason
1708 * being if the first eligible chunk is skipped (say
1709 * it was sent to a destination other than oldfp)
1710 * then we cannot advance the cum TSN via Forward
1713 * Also, ftsn_check is B_TRUE only for the first
1714 * eligible chunk, it will be B_FALSE for all
1715 * subsequent candidate messages for retransmission.
1717 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
1718 tsn
= ntohl(sdc
->sdh_tsn
);
1719 if (SEQ_GT(tsn
, sctp
->sctp_lastack_rxd
)) {
1720 if (sctp
->sctp_prsctp_aware
&& ftsn_check
) {
1721 if (SEQ_GEQ(sctp
->sctp_adv_pap
, tsn
)) {
1722 ASSERT(sctp
->sctp_prsctp_aware
);
1726 sctp_check_adv_ack_pt(sctp
,
1728 if (SEQ_GT(sctp
->sctp_adv_pap
,
1734 ftsn_check
= B_FALSE
;
1736 if (SCTP_CHUNK_DEST(mp
) == oldfp
)
1740 meta
= meta
->b_next
;
1741 if (meta
!= NULL
&& sctp
->sctp_prsctp_aware
) {
1742 mhdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
1744 while (meta
!= NULL
&& (SCTP_IS_MSG_ABANDONED(meta
) ||
1745 SCTP_MSG_TO_BE_ABANDONED(meta
, mhdr
, sctp
))) {
1746 meta
= meta
->b_next
;
1752 * Retransmit fired for a destination which didn't have
1753 * any unacked data pending.
1755 if (sctp
->sctp_unacked
== 0 && sctp
->sctp_unsent
!= 0) {
1757 * Send a window probe. Inflate frwnd to allow
1758 * sending one segment.
1760 if (sctp
->sctp_frwnd
< (oldfp
->sf_pmss
- sizeof (*sdc
)))
1761 sctp
->sctp_frwnd
= oldfp
->sf_pmss
- sizeof (*sdc
);
1763 /* next TSN to send */
1764 sctp
->sctp_rxt_nxttsn
= sctp
->sctp_ltsn
;
1767 * The above sctp_frwnd adjustment is coarse. The "changed"
1768 * sctp_frwnd may allow us to send more than 1 packet. So
1769 * tell sctp_output() to send only 1 packet.
1771 sctp_output(sctp
, 1);
1774 sctp
->sctp_rxt_maxtsn
= sctp
->sctp_ltsn
- 1;
1775 ASSERT(sctp
->sctp_rxt_maxtsn
>= sctp
->sctp_rxt_nxttsn
);
1776 sctp
->sctp_zero_win_probe
= B_TRUE
;
1777 SCTPS_BUMP_MIB(sctps
, sctpOutWinProbe
);
1782 * After a time out, assume that everything has left the network. So
1783 * we can clear rxt_unacked for the original peer address.
1785 oldfp
->sf_rxt_unacked
= 0;
1788 * If we were probing for zero window, don't adjust retransmission
1789 * variables, but the timer is still backed off.
1791 if (sctp
->sctp_zero_win_probe
) {
1796 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1797 * and sctp_rxt_maxtsn will specify the ZWP packet.
1800 if (oldfp
->sf_state
!= SCTP_FADDRS_ALIVE
)
1801 fp
= sctp_rotate_faddr(sctp
, oldfp
);
1802 pkt
= sctp_rexmit_packet(sctp
, &meta
, &mp
, fp
, &pkt_len
);
1804 ASSERT(pkt_len
<= fp
->sf_pmss
);
1805 sctp_set_iplen(sctp
, pkt
, fp
->sf_ixa
);
1806 (void) conn_ip_output(pkt
, fp
->sf_ixa
);
1807 BUMP_LOCAL(sctp
->sctp_opkts
);
1809 SCTP_KSTAT(sctps
, sctp_ss_rexmit_failed
);
1813 * The strikes will be clear by sctp_faddr_alive() when the
1814 * other side sends us an ack.
1816 oldfp
->sf_strikes
++;
1817 sctp
->sctp_strikes
++;
1819 SCTP_CALC_RXT(sctp
, oldfp
, sctp
->sctp_rto_max
);
1820 if (oldfp
!= fp
&& oldfp
->sf_suna
!= 0)
1821 SCTP_FADDR_TIMER_RESTART(sctp
, oldfp
, fp
->sf_rto
);
1822 SCTP_FADDR_TIMER_RESTART(sctp
, fp
, fp
->sf_rto
);
1823 SCTPS_BUMP_MIB(sctps
, sctpOutWinProbe
);
1828 * Enter slowstart for this destination
1830 oldfp
->sf_ssthresh
= oldfp
->sf_cwnd
/ 2;
1831 if (oldfp
->sf_ssthresh
< 2 * oldfp
->sf_pmss
)
1832 oldfp
->sf_ssthresh
= 2 * oldfp
->sf_pmss
;
1833 oldfp
->sf_cwnd
= oldfp
->sf_pmss
;
1835 fp
= sctp_rotate_faddr(sctp
, oldfp
);
1837 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
1839 first_ua_tsn
= ntohl(sdc
->sdh_tsn
);
1841 sctp_make_ftsns(sctp
, meta
, mp
, &nmp
, fp
, &seglen
);
1843 sctp
->sctp_adv_pap
= adv_pap
;
1848 * Move to the next unabandoned chunk. XXXCheck if meta will
1849 * always be marked abandoned.
1851 while (meta
!= NULL
&& SCTP_IS_MSG_ABANDONED(meta
))
1852 meta
= meta
->b_next
;
1859 seglen
= ntohs(sdc
->sdh_len
);
1860 chunklen
= seglen
- sizeof (*sdc
);
1861 if ((extra
= seglen
& (SCTP_ALIGN
- 1)) != 0)
1862 extra
= SCTP_ALIGN
- extra
;
1864 /* Find out if we need to piggyback SACK. */
1865 if (sctp
->sctp_ftsn
== sctp
->sctp_lastacked
+ 1) {
1868 sacklen
= sizeof (sctp_chunk_hdr_t
) +
1869 sizeof (sctp_sack_chunk_t
) +
1870 (sizeof (sctp_sack_frag_t
) * sctp
->sctp_sack_gaps
);
1871 if (seglen
+ sacklen
> sctp
->sctp_lastdata
->sf_pmss
) {
1872 /* piggybacked SACK doesn't fit */
1876 * OK, we have room to send SACK back. But we
1877 * should send it back to the last fp where we
1878 * receive data from, unless sctp_lastdata equals
1879 * oldfp, then we should probably not send it
1880 * back to that fp. Also we should check that
1883 if (sctp
->sctp_lastdata
!= oldfp
&&
1884 sctp
->sctp_lastdata
->sf_state
==
1885 SCTP_FADDRS_ALIVE
) {
1886 fp
= sctp
->sctp_lastdata
;
1892 * Cancel RTT measurement if the retransmitted TSN is before the
1893 * TSN used for timimg.
1895 if (sctp
->sctp_out_time
!= 0 &&
1896 SEQ_GEQ(sctp
->sctp_rtt_tsn
, sdc
->sdh_tsn
)) {
1897 sctp
->sctp_out_time
= 0;
1899 /* Clear the counter as the RTT calculation may be off. */
1900 fp
->sf_rtt_updates
= 0;
1901 oldfp
->sf_rtt_updates
= 0;
1904 * After a timeout, we should change the current faddr so that
1905 * new chunks will be sent to the alternate address.
1907 sctp_set_faddr_current(sctp
, fp
);
1913 fill
= sctp_get_padding(sctp
, extra
);
1922 SCTP_CHUNK_CLEAR_FLAGS(nmp
);
1923 head
= sctp_add_proto_hdr(sctp
, fp
, nmp
, sacklen
, NULL
);
1926 SCTP_KSTAT(sctps
, sctp_rexmit_failed
);
1931 SCTP_CHUNK_SENT(sctp
, mp
, sdc
, fp
, chunklen
, meta
);
1936 /* We can at least and at most send 1 packet at timeout. */
1937 while (seglen
< fp
->sf_pmss
) {
1940 /* Go through the list to find more chunks to be bundled. */
1941 while (mp
!= NULL
) {
1942 /* Check if the chunk can be bundled. */
1943 if (SCTP_CHUNK_RX_CANBUNDLE(mp
, oldfp
))
1947 /* Go to the next message. */
1949 for (meta
= meta
->b_next
; meta
!= NULL
;
1950 meta
= meta
->b_next
) {
1951 mhdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
1953 if (SCTP_IS_MSG_ABANDONED(meta
) ||
1954 SCTP_MSG_TO_BE_ABANDONED(meta
, mhdr
,
1963 * Check if there is a new message which potentially
1964 * could be bundled with this retransmission.
1966 meta
= sctp_get_msg_to_send(sctp
, &mp
, NULL
, &error
,
1967 seglen
, fp
->sf_pmss
- seglen
, NULL
);
1968 if (error
!= 0 || meta
== NULL
) {
1969 /* No more chunk to be bundled. */
1976 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
1977 new_len
= ntohs(sdc
->sdh_len
);
1978 chunklen
= new_len
- sizeof (*sdc
);
1980 if ((extra
= new_len
& (SCTP_ALIGN
- 1)) != 0)
1981 extra
= SCTP_ALIGN
- extra
;
1982 if ((new_len
= seglen
+ new_len
+ extra
) > fp
->sf_pmss
)
1984 if ((nmp
= dupmsg(mp
)) == NULL
)
1988 fill
= sctp_get_padding(sctp
, extra
);
1998 SCTP_CHUNK_CLEAR_FLAGS(nmp
);
1999 SCTP_CHUNK_SENT(sctp
, mp
, sdc
, fp
, chunklen
, meta
);
2005 if ((seglen
> fp
->sf_pmss
) && fp
->sf_isv4
) {
2006 ipha_t
*iph
= (ipha_t
*)head
->b_rptr
;
2009 * Path MTU is different from path we thought it would
2010 * be when we created chunks, or IP headers have grown.
2011 * Need to clear the DF bit.
2013 iph
->ipha_fragment_offset_and_flags
= 0;
2015 fp
->sf_rxt_unacked
+= seglen
;
2017 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
2018 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
2019 seglen
, ntohl(sdc
->sdh_tsn
), ntohs(sdc
->sdh_ssn
),
2020 (void *)fp
, sctp
->sctp_frwnd
, sctp
->sctp_lastack_rxd
));
2022 sctp
->sctp_rexmitting
= B_TRUE
;
2023 sctp
->sctp_rxt_nxttsn
= first_ua_tsn
;
2024 sctp
->sctp_rxt_maxtsn
= sctp
->sctp_ltsn
- 1;
2025 sctp_set_iplen(sctp
, head
, fp
->sf_ixa
);
2026 (void) conn_ip_output(head
, fp
->sf_ixa
);
2027 BUMP_LOCAL(sctp
->sctp_opkts
);
2030 * Restart the oldfp timer with exponential backoff and
2031 * the new fp timer for the retransmitted chunks.
2034 oldfp
->sf_strikes
++;
2035 sctp
->sctp_strikes
++;
2036 SCTP_CALC_RXT(sctp
, oldfp
, sctp
->sctp_rto_max
);
2038 * If there is still some data in the oldfp, restart the
2039 * retransmission timer. If there is no data, the heartbeat will
2040 * continue to run so it will do its job in checking the reachability
2043 if (oldfp
!= fp
&& oldfp
->sf_suna
!= 0)
2044 SCTP_FADDR_TIMER_RESTART(sctp
, oldfp
, oldfp
->sf_rto
);
2047 * Should we restart the timer of the new fp? If there is
2048 * outstanding data to the new fp, the timer should be
2049 * running already. So restarting it means that the timer
2050 * will fire later for those outstanding data. But if
2051 * we don't restart it, the timer will fire too early for the
2052 * just retransmitted chunks to the new fp. The reason is that we
2053 * don't keep a timestamp on when a chunk is retransmitted.
2054 * So when the timer fires, it will just search for the
2055 * chunk with the earliest TSN sent to new fp. This probably
2056 * is the chunk we just retransmitted. So for now, let's
2057 * be conservative and restart the timer of the new fp.
2059 SCTP_FADDR_TIMER_RESTART(sctp
, fp
, fp
->sf_rto
);
2061 sctp
->sctp_active
= ddi_get_lbolt64();
2065 * This function is called by sctp_ss_rexmit() to create a packet
2066 * to be retransmitted to the given fp. The given meta and mp
2067 * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2068 * first chunk to be retransmitted. This is also called when we want
2069 * to retransmit a zero window probe from sctp_rexmit() or when we
2070 * want to retransmit the zero window probe after the window has
2071 * opened from sctp_got_sack().
2074 sctp_rexmit_packet(sctp_t
*sctp
, mblk_t
**meta
, mblk_t
**mp
, sctp_faddr_t
*fp
,
2077 uint32_t seglen
= 0;
2083 sctp_data_hdr_t
*sdc
;
2084 sctp_msg_hdr_t
*mhdr
;
2086 sdc
= (sctp_data_hdr_t
*)(*mp
)->b_rptr
;
2087 seglen
= ntohs(sdc
->sdh_len
);
2088 chunklen
= seglen
- sizeof (*sdc
);
2089 if ((extra
= seglen
& (SCTP_ALIGN
- 1)) != 0)
2090 extra
= SCTP_ALIGN
- extra
;
2096 fill
= sctp_get_padding(sctp
, extra
);
2105 SCTP_CHUNK_CLEAR_FLAGS(nmp
);
2106 head
= sctp_add_proto_hdr(sctp
, fp
, nmp
, 0, NULL
);
2111 SCTP_CHUNK_SENT(sctp
, *mp
, sdc
, fp
, chunklen
, *meta
);
2113 * Don't update the TSN if we are doing a Zero Win Probe.
2115 if (!sctp
->sctp_zero_win_probe
)
2116 sctp
->sctp_rxt_nxttsn
= ntohl(sdc
->sdh_tsn
);
2117 *mp
= (*mp
)->b_next
;
2120 while (seglen
< fp
->sf_pmss
) {
2124 * Go through the list to find more chunks to be bundled.
2125 * We should only retransmit sent by unack'ed chunks. Since
2126 * they were sent before, the peer's receive window should
2127 * be able to receive them.
2129 while (*mp
!= NULL
) {
2130 /* Check if the chunk can be bundled. */
2131 if (SCTP_CHUNK_ISSENT(*mp
) && !SCTP_CHUNK_ISACKED(*mp
))
2133 *mp
= (*mp
)->b_next
;
2135 /* Go to the next message. */
2137 for (*meta
= (*meta
)->b_next
; *meta
!= NULL
;
2138 *meta
= (*meta
)->b_next
) {
2139 mhdr
= (sctp_msg_hdr_t
*)(*meta
)->b_rptr
;
2141 if (SCTP_IS_MSG_ABANDONED(*meta
) ||
2142 SCTP_MSG_TO_BE_ABANDONED(*meta
, mhdr
,
2147 *mp
= (*meta
)->b_cont
;
2150 /* No more chunk to be bundled. */
2154 sdc
= (sctp_data_hdr_t
*)(*mp
)->b_rptr
;
2155 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2156 if (SEQ_GT(ntohl(sdc
->sdh_tsn
), sctp
->sctp_rxt_maxtsn
))
2158 new_len
= ntohs(sdc
->sdh_len
);
2159 chunklen
= new_len
- sizeof (*sdc
);
2161 if ((extra
= new_len
& (SCTP_ALIGN
- 1)) != 0)
2162 extra
= SCTP_ALIGN
- extra
;
2163 if ((new_len
= seglen
+ new_len
+ extra
) > fp
->sf_pmss
)
2165 if ((nmp
= dupmsg(*mp
)) == NULL
)
2169 fill
= sctp_get_padding(sctp
, extra
);
2179 SCTP_CHUNK_CLEAR_FLAGS(nmp
);
2180 SCTP_CHUNK_SENT(sctp
, *mp
, sdc
, fp
, chunklen
, *meta
);
2182 * Don't update the TSN if we are doing a Zero Win Probe.
2184 if (!sctp
->sctp_zero_win_probe
)
2185 sctp
->sctp_rxt_nxttsn
= ntohl(sdc
->sdh_tsn
);
2188 *mp
= (*mp
)->b_next
;
2190 *packet_len
= seglen
;
2191 fp
->sf_rxt_unacked
+= seglen
;
2196 * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2197 * advances the cum_tsn but the cum_tsn is still less than what we have sent
2198 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial"
2199 * SACK. We retransmit unacked chunks without having to wait for another
2200 * timeout. The rationale is that the SACK should not be "partial" if all the
2201 * lost chunks have been retransmitted. Since the SACK is "partial,"
2202 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2203 * be missing. It is better for us to retransmit them now instead
2204 * of waiting for a timeout.
2207 sctp_ss_rexmit(sctp_t
*sctp
)
2215 sctp_data_hdr_t
*sdc
;
2217 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
2219 ASSERT(!sctp
->sctp_zero_win_probe
);
2222 * If the last cum ack is smaller than what we have just
2223 * retransmitted, simply return.
2225 if (SEQ_GEQ(sctp
->sctp_lastack_rxd
, sctp
->sctp_rxt_nxttsn
))
2226 sctp
->sctp_rxt_nxttsn
= sctp
->sctp_lastack_rxd
+ 1;
2229 ASSERT(SEQ_LEQ(sctp
->sctp_rxt_nxttsn
, sctp
->sctp_rxt_maxtsn
));
2232 * After a timer fires, sctp_current should be set to the new
2233 * fp where the retransmitted chunks are sent.
2235 fp
= sctp
->sctp_current
;
2238 * Since we are retransmitting, we only need to use cwnd to determine
2239 * how much we can send as we were allowed (by peer's receive window)
2240 * to send those retransmitted chunks previously when they are first
2241 * sent. If we record how much we have retransmitted but
2242 * unacknowledged using rxt_unacked, then the amount we can now send
2243 * is equal to cwnd minus rxt_unacked.
2245 * The field rxt_unacked is incremented when we retransmit a packet
2246 * and decremented when we got a SACK acknowledging something. And
2247 * it is reset when the retransmission timer fires as we assume that
2248 * all packets have left the network after a timeout. If this
2249 * assumption is not true, it means that after a timeout, we can
2250 * get a SACK acknowledging more than rxt_unacked (its value only
2251 * contains what is retransmitted when the timer fires). So
2252 * rxt_unacked will become very big (it is an unsiged int so going
2253 * negative means that the value is huge). This is the reason we
2254 * always send at least 1 MSS bytes.
2256 * The reason why we do not have an accurate count is that we
2257 * only know how many packets are outstanding (using the TSN numbers).
2258 * But we do not know how many bytes those packets contain. To
2259 * have an accurate count, we need to walk through the send list.
2260 * As it is not really important to have an accurate count during
2261 * retransmission, we skip this walk to save some time. This should
2262 * not make the retransmission too aggressive to cause congestion.
2264 if (fp
->sf_cwnd
<= fp
->sf_rxt_unacked
)
2265 tot_wnd
= fp
->sf_pmss
;
2267 tot_wnd
= fp
->sf_cwnd
- fp
->sf_rxt_unacked
;
2269 /* Find the first unack'ed chunk */
2270 for (meta
= sctp
->sctp_xmit_head
; meta
!= NULL
; meta
= meta
->b_next
) {
2271 sctp_msg_hdr_t
*mhdr
= (sctp_msg_hdr_t
*)meta
->b_rptr
;
2273 if (SCTP_IS_MSG_ABANDONED(meta
) ||
2274 SCTP_MSG_TO_BE_ABANDONED(meta
, mhdr
, sctp
)) {
2278 for (mp
= meta
->b_cont
; mp
!= NULL
; mp
= mp
->b_next
) {
2279 /* Again, this may not be possible */
2280 if (!SCTP_CHUNK_ISSENT(mp
))
2282 sdc
= (sctp_data_hdr_t
*)mp
->b_rptr
;
2283 if (ntohl(sdc
->sdh_tsn
) == sctp
->sctp_rxt_nxttsn
)
2288 /* Everything is abandoned... */
2292 if (!fp
->sf_timer_running
)
2293 SCTP_FADDR_TIMER_RESTART(sctp
, fp
, fp
->sf_rto
);
2294 pkt
= sctp_rexmit_packet(sctp
, &meta
, &mp
, fp
, &pkt_len
);
2296 SCTP_KSTAT(sctps
, sctp_ss_rexmit_failed
);
2299 if ((pkt_len
> fp
->sf_pmss
) && fp
->sf_isv4
) {
2300 ipha_t
*iph
= (ipha_t
*)pkt
->b_rptr
;
2303 * Path MTU is different from path we thought it would
2304 * be when we created chunks, or IP headers have grown.
2305 * Need to clear the DF bit.
2307 iph
->ipha_fragment_offset_and_flags
= 0;
2309 sctp_set_iplen(sctp
, pkt
, fp
->sf_ixa
);
2310 (void) conn_ip_output(pkt
, fp
->sf_ixa
);
2311 BUMP_LOCAL(sctp
->sctp_opkts
);
2313 /* Check and see if there is more chunk to be retransmitted. */
2314 if (tot_wnd
<= pkt_len
|| tot_wnd
- pkt_len
< fp
->sf_pmss
||
2318 meta
= meta
->b_next
;
2322 /* Retransmit another packet if the window allows. */
2323 for (tot_wnd
-= pkt_len
, burst
= sctps
->sctps_maxburst
- 1;
2324 meta
!= NULL
&& burst
> 0; meta
= meta
->b_next
, burst
--) {
2327 for (; mp
!= NULL
; mp
= mp
->b_next
) {
2328 /* Again, this may not be possible */
2329 if (!SCTP_CHUNK_ISSENT(mp
))
2331 if (!SCTP_CHUNK_ISACKED(mp
))