Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / kernel / net / sctp / sctp_output.c
blobb4308b826a752b11436ddcdfd33fe219b6e7a182
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #define _SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/socket.h>
33 #include <sys/stropts.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 #include <sys/socketvar.h>
37 #include <inet/common.h>
38 #include <inet/mi.h>
39 #include <inet/ip.h>
40 #include <inet/ip_ire.h>
41 #include <inet/ip6.h>
42 #include <inet/sctp_ip.h>
43 #include <inet/ipclassifier.h>
46 * PR-SCTP comments.
48 * A message can expire before it gets to the transmit list (i.e. it is still
49 * in the unsent list - unchunked), after it gets to the transmit list, but
50 * before transmission has actually started, or after transmission has begun.
51 * Accordingly, we check for the status of a message in sctp_chunkify() when
52 * the message is being transferred from the unsent list to the transmit list;
53 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
54 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
55 * When we nuke a message in sctp_chunkify(), all we need to do is take it
56 * out of the unsent list and update sctp_unsent; when a message is deemed
57 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
58 * list, update sctp_unsent IFF transmission for the message has not yet begun
59 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
60 * message has started, then we cannot just take it out of the list, we need
61 * to send Forward TSN chunk to the peer so that the peer can clear its
62 * fragment list for this message. However, we cannot just send the Forward
63 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
64 * messages preceeding this abandoned message. So, we send a Forward TSN
65 * IFF all messages prior to this abandoned message has been SACKd, if not
66 * we defer sending the Forward TSN to sctp_cumack(), which will check for
67 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
68 * sctp_rexmit() when we check for retransmissions, we need to determine if
69 * the advanced peer ack point can be moved ahead, and if so, send a Forward
70 * TSN to the peer instead of retransmitting the chunk. Note that when
71 * we send a Forward TSN for a message, there may be yet unsent chunks for
72 * this message; we need to mark all such chunks as abandoned, so that
73 * sctp_cumack() can take the message out of the transmit list, additionally
74 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
75 * decremented when a message/chunk is deemed abandoned), sockfs needs to
76 * be notified so that it can adjust its idea of the queued message.
79 #include <inet/sctp/sctp_impl.h>
81 static struct kmem_cache *sctp_kmem_ftsn_set_cache;
82 static mblk_t *sctp_chunkify(sctp_t *, int, int, int);
84 #ifdef DEBUG
85 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *);
86 #endif
89 * Called to allocate a header mblk when sending data to SCTP.
90 * Data will follow in b_cont of this mblk.
92 mblk_t *
93 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
94 int flags)
96 mblk_t *mp;
97 struct T_unitdata_req *tudr;
98 size_t size;
99 int error;
101 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
102 size = MAX(size, sizeof (sctp_msg_hdr_t));
103 if (flags & SCTP_CAN_BLOCK) {
104 mp = allocb_wait(size, BPRI_MED, 0, &error);
105 } else {
106 mp = allocb(size, BPRI_MED);
108 if (mp) {
109 tudr = (struct T_unitdata_req *)mp->b_rptr;
110 tudr->PRIM_type = T_UNITDATA_REQ;
111 tudr->DEST_length = nlen;
112 tudr->DEST_offset = sizeof (*tudr);
113 tudr->OPT_length = clen;
114 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
115 _TPI_ALIGN_TOPT(nlen));
116 if (nlen > 0)
117 bcopy(name, tudr + 1, nlen);
118 if (clen > 0)
119 bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
120 mp->b_wptr += (tudr ->OPT_offset + clen);
121 mp->b_datap->db_type = M_PROTO;
123 return (mp);
126 /*ARGSUSED2*/
128 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
130 sctp_faddr_t *fp = NULL;
131 struct T_unitdata_req *tudr;
132 int error = 0;
133 mblk_t *mproto = mp;
134 in6_addr_t *addr;
135 in6_addr_t tmpaddr;
136 uint16_t sid = sctp->sctp_def_stream;
137 uint32_t ppid = sctp->sctp_def_ppid;
138 uint32_t context = sctp->sctp_def_context;
139 uint16_t msg_flags = sctp->sctp_def_flags;
140 sctp_msg_hdr_t *sctp_msg_hdr;
141 uint32_t msg_len = 0;
142 uint32_t timetolive = sctp->sctp_def_timetolive;
143 conn_t *connp = sctp->sctp_connp;
145 ASSERT(DB_TYPE(mproto) == M_PROTO);
147 mp = mp->b_cont;
148 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
150 tudr = (struct T_unitdata_req *)mproto->b_rptr;
151 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
153 /* Get destination address, if specified */
154 if (tudr->DEST_length > 0) {
155 sin_t *sin;
156 sin6_t *sin6;
158 sin = (struct sockaddr_in *)
159 (mproto->b_rptr + tudr->DEST_offset);
160 switch (sin->sin_family) {
161 case AF_INET:
162 if (tudr->DEST_length < sizeof (*sin)) {
163 return (EINVAL);
165 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
166 addr = &tmpaddr;
167 break;
168 case AF_INET6:
169 if (tudr->DEST_length < sizeof (*sin6)) {
170 return (EINVAL);
172 sin6 = (struct sockaddr_in6 *)
173 (mproto->b_rptr + tudr->DEST_offset);
174 addr = &sin6->sin6_addr;
175 break;
176 default:
177 return (EAFNOSUPPORT);
179 fp = sctp_lookup_faddr(sctp, addr);
180 if (fp == NULL) {
181 return (EINVAL);
184 /* Ancillary Data? */
185 if (tudr->OPT_length > 0) {
186 struct cmsghdr *cmsg;
187 char *cend;
188 struct sctp_sndrcvinfo *sndrcv;
190 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
191 cend = ((char *)cmsg + tudr->OPT_length);
192 ASSERT(cend <= (char *)mproto->b_wptr);
194 for (;;) {
195 if ((char *)(cmsg + 1) > cend ||
196 ((char *)cmsg + cmsg->cmsg_len) > cend) {
197 break;
199 if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
200 (cmsg->cmsg_type == SCTP_SNDRCV)) {
201 if (cmsg->cmsg_len <
202 (sizeof (*sndrcv) + sizeof (*cmsg))) {
203 return (EINVAL);
205 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
206 sid = sndrcv->sinfo_stream;
207 msg_flags = sndrcv->sinfo_flags;
208 ppid = sndrcv->sinfo_ppid;
209 context = sndrcv->sinfo_context;
210 timetolive = sndrcv->sinfo_timetolive;
211 break;
213 if (cmsg->cmsg_len > 0)
214 cmsg = CMSG_NEXT(cmsg);
215 else
216 break;
219 if (msg_flags & MSG_ABORT) {
220 if (mp && mp->b_cont) {
221 mblk_t *pump = msgpullup(mp, -1);
222 if (!pump) {
223 return (ENOMEM);
225 freemsg(mp);
226 mp = pump;
227 mproto->b_cont = mp;
229 RUN_SCTP(sctp);
230 sctp_user_abort(sctp, mp);
231 freemsg(mproto);
232 goto done2;
234 if (mp == NULL)
235 goto done;
237 RUN_SCTP(sctp);
239 /* Reject any new data requests if we are shutting down */
240 if (sctp->sctp_state > SCTPS_ESTABLISHED ||
241 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
242 error = EPIPE;
243 goto unlock_done;
246 /* Re-use the mproto to store relevant info. */
247 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
249 mproto->b_rptr = mproto->b_datap->db_base;
250 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
252 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
253 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
254 sctp_msg_hdr->smh_context = context;
255 sctp_msg_hdr->smh_sid = sid;
256 sctp_msg_hdr->smh_ppid = ppid;
257 sctp_msg_hdr->smh_flags = msg_flags;
258 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
259 sctp_msg_hdr->smh_tob = ddi_get_lbolt64();
260 for (; mp != NULL; mp = mp->b_cont)
261 msg_len += MBLKL(mp);
262 sctp_msg_hdr->smh_msglen = msg_len;
264 /* User requested specific destination */
265 SCTP_SET_CHUNK_DEST(mproto, fp);
267 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
268 sid >= sctp->sctp_num_ostr) {
269 /* Send sendfail event */
270 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
271 B_FALSE);
272 error = EINVAL;
273 goto unlock_done;
276 /* no data */
277 if (msg_len == 0) {
278 sctp_sendfail_event(sctp, dupmsg(mproto),
279 SCTP_ERR_NO_USR_DATA, B_FALSE);
280 error = EINVAL;
281 goto unlock_done;
284 /* Add it to the unsent list */
285 if (sctp->sctp_xmit_unsent == NULL) {
286 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
287 } else {
288 sctp->sctp_xmit_unsent_tail->b_next = mproto;
289 sctp->sctp_xmit_unsent_tail = mproto;
291 sctp->sctp_unsent += msg_len;
292 BUMP_LOCAL(sctp->sctp_msgcount);
294 * Notify sockfs if the tx queue is full.
296 if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) {
297 sctp->sctp_txq_full = 1;
298 sctp->sctp_ulp_txq_full(sctp->sctp_ulpd, B_TRUE);
300 if (sctp->sctp_state == SCTPS_ESTABLISHED)
301 sctp_output(sctp, UINT_MAX);
302 done2:
303 WAKE_SCTP(sctp);
304 return (0);
305 unlock_done:
306 WAKE_SCTP(sctp);
307 done:
308 return (error);
312 * While there are messages on sctp_xmit_unsent, detach each one. For each:
313 * allocate space for the chunk header, fill in the data chunk, and fill in
314 * the chunk header. Then append it to sctp_xmit_tail.
315 * Return after appending as many bytes as required (bytes_to_send).
316 * We also return if we've appended one or more chunks, and find a subsequent
317 * unsent message is too big to fit in the segment.
319 mblk_t *
320 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send)
322 mblk_t *mp;
323 mblk_t *chunk_mp;
324 mblk_t *chunk_head;
325 mblk_t *chunk_hdr;
326 mblk_t *chunk_tail = NULL;
327 int count;
328 int chunksize;
329 sctp_data_hdr_t *sdc;
330 mblk_t *mdblk = sctp->sctp_xmit_unsent;
331 sctp_faddr_t *fp;
332 sctp_faddr_t *fp1;
333 size_t xtralen;
334 sctp_msg_hdr_t *msg_hdr;
335 sctp_stack_t *sctps = sctp->sctp_sctps;
336 sctp_msg_hdr_t *next_msg_hdr;
337 size_t nextlen;
338 int remaining_len = mss - firstseg_len;
340 ASSERT(remaining_len >= 0);
342 fp = SCTP_CHUNK_DEST(mdblk);
343 if (fp == NULL)
344 fp = sctp->sctp_current;
345 if (fp->sf_isv4)
346 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
347 sizeof (*sdc);
348 else
349 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
350 sizeof (*sdc);
351 count = chunksize = remaining_len - sizeof (*sdc);
352 nextmsg:
353 next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr;
354 nextlen = next_msg_hdr->smh_msglen;
356 * Will the entire next message fit in the current packet ?
357 * if not, leave it on the unsent list.
359 if ((firstseg_len != 0) && (nextlen > remaining_len))
360 return (NULL);
362 chunk_mp = mdblk->b_cont;
365 * If this partially chunked, we ignore the next one for now and
366 * use the one already present. For the unchunked bits, we use the
367 * length of the last chunk.
369 if (SCTP_IS_MSG_CHUNKED(mdblk)) {
370 int chunk_len;
372 ASSERT(chunk_mp->b_next != NULL);
373 mdblk->b_cont = chunk_mp->b_next;
374 chunk_mp->b_next = NULL;
375 SCTP_MSG_CLEAR_CHUNKED(mdblk);
376 mp = mdblk->b_cont;
377 while (mp->b_next != NULL)
378 mp = mp->b_next;
379 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
380 if (fp->sf_pmss - chunk_len > sizeof (*sdc))
381 count = chunksize = fp->sf_pmss - chunk_len;
382 else
383 count = chunksize = fp->sf_pmss;
384 count = chunksize = count - sizeof (*sdc);
385 } else {
386 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
387 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
388 sctp->sctp_xmit_unsent = mdblk->b_next;
389 if (sctp->sctp_xmit_unsent == NULL)
390 sctp->sctp_xmit_unsent_tail = NULL;
391 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
392 sctp->sctp_unsent -= msg_hdr->smh_msglen;
393 mdblk->b_next = NULL;
394 BUMP_LOCAL(sctp->sctp_prsctpdrop);
396 * Update ULP the amount of queued data, which is
397 * sent-unack'ed + unsent.
399 if (!SCTP_IS_DETACHED(sctp))
400 SCTP_TXQ_UPDATE(sctp);
401 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
402 goto try_next;
404 mdblk->b_cont = NULL;
406 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
407 nextchunk:
408 chunk_head = chunk_mp;
409 chunk_tail = NULL;
411 /* Skip as many mblk's as we need */
412 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
413 count -= MBLKL(chunk_mp);
414 chunk_tail = chunk_mp;
415 chunk_mp = chunk_mp->b_cont;
417 /* Split the chain, if needed */
418 if (chunk_mp != NULL) {
419 if (count > 0) {
420 mblk_t *split_mp = dupb(chunk_mp);
422 if (split_mp == NULL) {
423 if (mdblk->b_cont == NULL) {
424 mdblk->b_cont = chunk_head;
425 } else {
426 SCTP_MSG_SET_CHUNKED(mdblk);
427 ASSERT(chunk_head->b_next == NULL);
428 chunk_head->b_next = mdblk->b_cont;
429 mdblk->b_cont = chunk_head;
431 return (sctp->sctp_xmit_tail);
433 if (chunk_tail != NULL) {
434 chunk_tail->b_cont = split_mp;
435 chunk_tail = chunk_tail->b_cont;
436 } else {
437 chunk_head = chunk_tail = split_mp;
439 chunk_tail->b_wptr = chunk_tail->b_rptr + count;
440 chunk_mp->b_rptr = chunk_tail->b_wptr;
441 count = 0;
442 } else if (chunk_tail == NULL) {
443 goto next;
444 } else {
445 chunk_tail->b_cont = NULL;
448 /* Alloc chunk hdr, if needed */
449 if (DB_REF(chunk_head) > 1 ||
450 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
451 MBLKHEAD(chunk_head) < sizeof (*sdc)) {
452 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
453 if (mdblk->b_cont == NULL) {
454 if (chunk_mp != NULL)
455 linkb(chunk_head, chunk_mp);
456 mdblk->b_cont = chunk_head;
457 } else {
458 SCTP_MSG_SET_CHUNKED(mdblk);
459 if (chunk_mp != NULL)
460 linkb(chunk_head, chunk_mp);
461 ASSERT(chunk_head->b_next == NULL);
462 chunk_head->b_next = mdblk->b_cont;
463 mdblk->b_cont = chunk_head;
465 return (sctp->sctp_xmit_tail);
467 chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
468 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
469 chunk_hdr->b_cont = chunk_head;
470 } else {
471 chunk_hdr = chunk_head;
472 chunk_hdr->b_rptr -= sizeof (*sdc);
474 ASSERT(chunk_hdr->b_datap->db_ref == 1);
475 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
476 sdc->sdh_id = CHUNK_DATA;
477 sdc->sdh_flags = 0;
478 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
479 ASSERT(sdc->sdh_len);
480 sdc->sdh_sid = htons(msg_hdr->smh_sid);
482 * We defer assigning the SSN just before sending the chunk, else
483 * if we drop the chunk in sctp_get_msg_to_send(), we would need
484 * to send a Forward TSN to let the peer know. Some more comments
485 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
487 sdc->sdh_payload_id = msg_hdr->smh_ppid;
489 if (mdblk->b_cont == NULL) {
490 mdblk->b_cont = chunk_hdr;
491 SCTP_DATA_SET_BBIT(sdc);
492 } else {
493 mp = mdblk->b_cont;
494 while (mp->b_next != NULL)
495 mp = mp->b_next;
496 mp->b_next = chunk_hdr;
499 bytes_to_send -= (chunksize - count);
500 if (chunk_mp != NULL) {
501 next:
502 count = chunksize = fp->sf_pmss - sizeof (*sdc);
503 goto nextchunk;
505 SCTP_DATA_SET_EBIT(sdc);
506 sctp->sctp_xmit_unsent = mdblk->b_next;
507 if (mdblk->b_next == NULL) {
508 sctp->sctp_xmit_unsent_tail = NULL;
510 mdblk->b_next = NULL;
512 if (sctp->sctp_xmit_tail == NULL) {
513 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
514 } else {
515 mp = sctp->sctp_xmit_tail;
516 while (mp->b_next != NULL)
517 mp = mp->b_next;
518 mp->b_next = mdblk;
519 mdblk->b_prev = mp;
521 try_next:
522 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
523 mdblk = sctp->sctp_xmit_unsent;
524 fp1 = SCTP_CHUNK_DEST(mdblk);
525 if (fp1 == NULL)
526 fp1 = sctp->sctp_current;
527 if (fp == fp1) {
528 size_t len = MBLKL(mdblk->b_cont);
529 if ((count > 0) &&
530 ((len > fp->sf_pmss - sizeof (*sdc)) ||
531 (len <= count))) {
532 count -= sizeof (*sdc);
533 count = chunksize = count - (count & 0x3);
534 } else {
535 count = chunksize = fp->sf_pmss -
536 sizeof (*sdc);
538 } else {
539 if (fp1->sf_isv4)
540 xtralen = sctp->sctp_hdr_len;
541 else
542 xtralen = sctp->sctp_hdr6_len;
543 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
544 count = chunksize = fp1->sf_pmss - sizeof (*sdc);
545 fp = fp1;
547 goto nextmsg;
549 return (sctp->sctp_xmit_tail);
552 void
553 sctp_free_msg(mblk_t *ump)
555 mblk_t *mp, *nmp;
557 for (mp = ump->b_cont; mp; mp = nmp) {
558 nmp = mp->b_next;
559 mp->b_next = mp->b_prev = NULL;
560 freemsg(mp);
562 ASSERT(!ump->b_prev);
563 ump->b_next = NULL;
564 freeb(ump);
567 mblk_t *
568 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
569 int *error)
571 int hdrlen;
572 uchar_t *hdr;
573 int isv4 = fp->sf_isv4;
574 sctp_stack_t *sctps = sctp->sctp_sctps;
576 if (error != NULL)
577 *error = 0;
579 if (isv4) {
580 hdrlen = sctp->sctp_hdr_len;
581 hdr = sctp->sctp_iphc;
582 } else {
583 hdrlen = sctp->sctp_hdr6_len;
584 hdr = sctp->sctp_iphc6;
587 * A reject|blackhole could mean that the address is 'down'. Similarly,
588 * it is possible that the address went down, we tried to send an
589 * heartbeat and ended up setting fp->sf_saddr as unspec because we
590 * didn't have any usable source address. In either case
591 * sctp_get_dest() will try find an IRE, if available, and set
592 * the source address, if needed. If we still don't have any
593 * usable source address, fp->sf_state will be SCTP_FADDRS_UNREACH and
594 * we return EHOSTUNREACH.
596 ASSERT(fp->sf_ixa->ixa_ire != NULL);
597 if ((fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
598 SCTP_IS_ADDR_UNSPEC(fp->sf_isv4, fp->sf_saddr)) {
599 sctp_get_dest(sctp, fp);
600 if (fp->sf_state == SCTP_FADDRS_UNREACH) {
601 if (error != NULL)
602 *error = EHOSTUNREACH;
603 return (NULL);
606 /* Copy in IP header. */
607 if ((mp->b_rptr - mp->b_datap->db_base) <
608 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
609 mblk_t *nmp;
612 * This can happen if IP headers are adjusted after
613 * data was moved into chunks, or during retransmission,
614 * or things like snoop is running.
616 nmp = allocb(sctps->sctps_wroff_xtra + hdrlen + sacklen,
617 BPRI_MED);
618 if (nmp == NULL) {
619 if (error != NULL)
620 *error = ENOMEM;
621 return (NULL);
623 nmp->b_rptr += sctps->sctps_wroff_xtra;
624 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
625 nmp->b_cont = mp;
626 mp = nmp;
627 } else {
628 mp->b_rptr -= (hdrlen + sacklen);
630 bcopy(hdr, mp->b_rptr, hdrlen);
631 if (sacklen) {
632 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
634 if (fp != sctp->sctp_current) {
635 /* change addresses in header */
636 if (isv4) {
637 ipha_t *iph = (ipha_t *)mp->b_rptr;
639 IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst);
640 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->sf_saddr)) {
641 IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr,
642 iph->ipha_src);
643 } else if (sctp->sctp_bound_to_all) {
644 iph->ipha_src = INADDR_ANY;
646 } else {
647 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
649 ip6h->ip6_dst = fp->sf_faddr;
650 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->sf_saddr)) {
651 ip6h->ip6_src = fp->sf_saddr;
652 } else if (sctp->sctp_bound_to_all) {
653 ip6h->ip6_src = ipv6_all_zeros;
657 return (mp);
661 * SCTP requires every chunk to be padded so that the total length
662 * is a multiple of SCTP_ALIGN. This function returns a mblk with
663 * the specified pad length.
665 static mblk_t *
666 sctp_get_padding(sctp_t *sctp, int pad)
668 mblk_t *fill;
670 ASSERT(pad < SCTP_ALIGN);
671 ASSERT(sctp->sctp_pad_mp != NULL);
672 if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
673 fill->b_wptr += pad;
674 return (fill);
678 * The memory saving path of reusing the sctp_pad_mp
679 * fails may be because it has been dupb() too
680 * many times (DBLK_REFMAX). Use the memory consuming
681 * path of allocating the pad mblk.
683 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
684 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */
685 *(int32_t *)fill->b_rptr = 0;
686 fill->b_wptr += pad;
688 return (fill);
691 static mblk_t *
692 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
694 mblk_t *meta;
695 mblk_t *start_mp = NULL;
696 mblk_t *end_mp = NULL;
697 mblk_t *mp, *nmp;
698 mblk_t *fill;
699 sctp_data_hdr_t *sdh;
700 int msglen;
701 int extra;
702 sctp_msg_hdr_t *msg_hdr;
703 sctp_faddr_t *old_fp = NULL;
704 sctp_faddr_t *chunk_fp;
705 sctp_stack_t *sctps = sctp->sctp_sctps;
707 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
708 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
709 if (SCTP_IS_MSG_ABANDONED(meta) ||
710 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
711 continue;
713 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
714 if (SCTP_CHUNK_WANT_REXMIT(mp)) {
716 * Use the same peer address to do fast
717 * retransmission. If the original peer
718 * address is dead, switch to the current
719 * one. Record the old one so that we
720 * will pick the chunks sent to the old
721 * one for fast retransmission.
723 chunk_fp = SCTP_CHUNK_DEST(mp);
724 if (*fp == NULL) {
725 *fp = chunk_fp;
726 if ((*fp)->sf_state !=
727 SCTP_FADDRS_ALIVE) {
728 old_fp = *fp;
729 *fp = sctp->sctp_current;
731 } else if (old_fp == NULL && *fp != chunk_fp) {
732 continue;
733 } else if (old_fp != NULL &&
734 old_fp != chunk_fp) {
735 continue;
738 sdh = (sctp_data_hdr_t *)mp->b_rptr;
739 msglen = ntohs(sdh->sdh_len);
740 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
741 extra = SCTP_ALIGN - extra;
745 * We still return at least the first message
746 * even if that message cannot fit in as
747 * PMTU may have changed.
749 if (*total + msglen + extra >
750 (*fp)->sf_pmss && start_mp != NULL) {
751 return (start_mp);
753 if ((nmp = dupmsg(mp)) == NULL)
754 return (start_mp);
755 if (extra > 0) {
756 fill = sctp_get_padding(sctp, extra);
757 if (fill != NULL) {
758 linkb(nmp, fill);
759 } else {
760 return (start_mp);
763 SCTPS_BUMP_MIB(sctps, sctpOutFastRetrans);
764 BUMP_LOCAL(sctp->sctp_rxtchunks);
765 SCTP_CHUNK_CLEAR_REXMIT(mp);
766 if (start_mp == NULL) {
767 start_mp = nmp;
768 } else {
769 linkb(end_mp, nmp);
771 end_mp = nmp;
772 *total += msglen + extra;
773 dprint(2, ("sctp_find_fast_rexmit_mblks: "
774 "tsn %x\n", sdh->sdh_tsn));
778 /* Clear the flag as there is no more message to be fast rexmitted. */
779 sctp->sctp_chk_fast_rexmit = B_FALSE;
780 return (start_mp);
783 /* A debug function just to make sure that a mblk chain is not broken */
784 #ifdef DEBUG
785 static boolean_t
786 sctp_verify_chain(mblk_t *head, mblk_t *tail)
788 mblk_t *mp = head;
790 if (head == NULL || tail == NULL)
791 return (B_TRUE);
792 while (mp != NULL) {
793 if (mp == tail)
794 return (B_TRUE);
795 mp = mp->b_next;
797 return (B_FALSE);
799 #endif
802 * Gets the next unsent chunk to transmit. Messages that are abandoned are
803 * skipped. A message can be abandoned if it has a non-zero timetolive and
804 * transmission has not yet started or if it is a partially reliable
805 * message and its time is up (assuming we are PR-SCTP aware).
806 * We only return a chunk if it will fit entirely in the current packet.
807 * 'cansend' is used to determine if need to try and chunkify messages from
808 * the unsent list, if any, and also as an input to sctp_chunkify() if so.
810 * firstseg_len indicates the space already used, cansend represents remaining
811 * space in the window, ((sf_pmss - firstseg_len) can therefore reasonably
812 * be used to compute the cansend arg).
814 mblk_t *
815 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error,
816 int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp)
818 mblk_t *mp1;
819 sctp_msg_hdr_t *msg_hdr;
820 mblk_t *tmp_meta;
821 sctp_faddr_t *fp1;
823 ASSERT(error != NULL && mp != NULL);
824 *error = 0;
826 ASSERT(sctp->sctp_current != NULL);
828 chunkified:
829 while (meta != NULL) {
830 tmp_meta = meta->b_next;
831 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
832 mp1 = meta->b_cont;
833 if (SCTP_IS_MSG_ABANDONED(meta))
834 goto next_msg;
835 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
836 while (mp1 != NULL) {
837 if (SCTP_CHUNK_CANSEND(mp1)) {
838 *mp = mp1;
839 #ifdef DEBUG
840 ASSERT(sctp_verify_chain(
841 sctp->sctp_xmit_head, meta));
842 #endif
843 return (meta);
845 mp1 = mp1->b_next;
847 goto next_msg;
850 * If we come here and the first chunk is sent, then we
851 * we are PR-SCTP aware, in which case if the cumulative
852 * TSN has moved upto or beyond the first chunk (which
853 * means all the previous messages have been cumulative
854 * SACK'd), then we send a Forward TSN with the last
855 * chunk that was sent in this message. If we can't send
856 * a Forward TSN because previous non-abandoned messages
857 * have not been acked then we will defer the Forward TSN
858 * to sctp_rexmit() or sctp_cumack().
860 if (SCTP_CHUNK_ISSENT(mp1)) {
861 *error = sctp_check_abandoned_msg(sctp, meta);
862 if (*error != 0) {
863 #ifdef DEBUG
864 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
865 sctp->sctp_xmit_tail));
866 #endif
867 return (NULL);
869 goto next_msg;
871 BUMP_LOCAL(sctp->sctp_prsctpdrop);
872 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
873 if (meta->b_prev == NULL) {
874 ASSERT(sctp->sctp_xmit_head == meta);
875 sctp->sctp_xmit_head = tmp_meta;
876 if (sctp->sctp_xmit_tail == meta)
877 sctp->sctp_xmit_tail = tmp_meta;
878 meta->b_next = NULL;
879 if (tmp_meta != NULL)
880 tmp_meta->b_prev = NULL;
881 } else if (meta->b_next == NULL) {
882 if (sctp->sctp_xmit_tail == meta)
883 sctp->sctp_xmit_tail = meta->b_prev;
884 meta->b_prev->b_next = NULL;
885 meta->b_prev = NULL;
886 } else {
887 meta->b_prev->b_next = tmp_meta;
888 tmp_meta->b_prev = meta->b_prev;
889 if (sctp->sctp_xmit_tail == meta)
890 sctp->sctp_xmit_tail = tmp_meta;
891 meta->b_prev = NULL;
892 meta->b_next = NULL;
894 sctp->sctp_unsent -= msg_hdr->smh_msglen;
896 * Update ULP the amount of queued data, which is
897 * sent-unack'ed + unsent.
899 if (!SCTP_IS_DETACHED(sctp))
900 SCTP_TXQ_UPDATE(sctp);
901 sctp_sendfail_event(sctp, meta, 0, B_TRUE);
902 next_msg:
903 meta = tmp_meta;
905 /* chunkify, if needed */
906 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
907 ASSERT(sctp->sctp_unsent > 0);
908 if (fp == NULL) {
909 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
910 if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
911 fp = sctp->sctp_current;
912 } else {
914 * If user specified destination, try to honor that.
916 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
917 if (fp1 != NULL && fp1->sf_state == SCTP_FADDRS_ALIVE &&
918 fp1 != fp) {
919 goto chunk_done;
922 meta = sctp_chunkify(sctp, fp->sf_pmss, firstseg_len, cansend);
923 if (meta == NULL)
924 goto chunk_done;
926 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
927 * new chunk(s) to the tail, so we need to skip the
928 * sctp_xmit_tail, which would have already been processed.
929 * This could happen when there is unacked chunks, but
930 * nothing new to send.
931 * When sctp_chunkify() is called when the transmit queue
932 * is empty then we need to start from sctp_xmit_tail.
934 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
935 #ifdef DEBUG
936 mp1 = sctp->sctp_xmit_tail->b_cont;
937 while (mp1 != NULL) {
938 ASSERT(!SCTP_CHUNK_CANSEND(mp1));
939 mp1 = mp1->b_next;
941 #endif
942 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
943 goto chunk_done;
945 goto chunkified;
947 chunk_done:
948 #ifdef DEBUG
949 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
950 #endif
951 return (NULL);
954 void
955 sctp_fast_rexmit(sctp_t *sctp)
957 mblk_t *mp, *head;
958 int pktlen = 0;
959 sctp_faddr_t *fp = NULL;
960 sctp_stack_t *sctps = sctp->sctp_sctps;
962 ASSERT(sctp->sctp_xmit_head != NULL);
963 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
964 if (mp == NULL) {
965 SCTP_KSTAT(sctps, sctp_fr_not_found);
966 return;
968 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
969 freemsg(mp);
970 SCTP_KSTAT(sctps, sctp_fr_add_hdr);
971 return;
973 if ((pktlen > fp->sf_pmss) && fp->sf_isv4) {
974 ipha_t *iph = (ipha_t *)head->b_rptr;
976 iph->ipha_fragment_offset_and_flags = 0;
979 sctp_set_iplen(sctp, head, fp->sf_ixa);
980 (void) conn_ip_output(head, fp->sf_ixa);
981 BUMP_LOCAL(sctp->sctp_opkts);
982 sctp->sctp_active = fp->sf_lastactive = ddi_get_lbolt64();
985 void
986 sctp_output(sctp_t *sctp, uint_t num_pkt)
988 mblk_t *mp = NULL;
989 mblk_t *nmp;
990 mblk_t *head;
991 mblk_t *meta = sctp->sctp_xmit_tail;
992 mblk_t *fill = NULL;
993 uint16_t chunklen;
994 uint32_t cansend;
995 int32_t seglen;
996 int32_t xtralen;
997 int32_t sacklen;
998 int32_t pad = 0;
999 int32_t pathmax;
1000 int extra;
1001 int64_t now = LBOLT_FASTPATH64;
1002 sctp_faddr_t *fp;
1003 sctp_faddr_t *lfp;
1004 sctp_data_hdr_t *sdc;
1005 int error;
1006 boolean_t notsent = B_TRUE;
1007 sctp_stack_t *sctps = sctp->sctp_sctps;
1008 uint32_t tsn;
1010 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1011 sacklen = 0;
1012 } else {
1013 /* send a SACK chunk */
1014 sacklen = sizeof (sctp_chunk_hdr_t) +
1015 sizeof (sctp_sack_chunk_t) +
1016 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1017 lfp = sctp->sctp_lastdata;
1018 ASSERT(lfp != NULL);
1019 if (lfp->sf_state != SCTP_FADDRS_ALIVE)
1020 lfp = sctp->sctp_current;
1023 cansend = sctp->sctp_frwnd;
1024 if (sctp->sctp_unsent < cansend)
1025 cansend = sctp->sctp_unsent;
1028 * Start persist timer if unable to send or when
1029 * trying to send into a zero window. This timer
1030 * ensures the blocked send attempt is retried.
1032 if ((cansend < sctp->sctp_current->sf_pmss / 2) &&
1033 (sctp->sctp_unacked != 0) &&
1034 (sctp->sctp_unacked < sctp->sctp_current->sf_pmss) &&
1035 !sctp->sctp_ndelay ||
1036 (cansend == 0 && sctp->sctp_unacked == 0 &&
1037 sctp->sctp_unsent != 0)) {
1038 head = NULL;
1039 fp = sctp->sctp_current;
1040 goto unsent_data;
1042 if (meta != NULL)
1043 mp = meta->b_cont;
1044 while (cansend > 0 && num_pkt-- != 0) {
1045 pad = 0;
1048 * Find first segment eligible for transmit.
1050 while (mp != NULL) {
1051 if (SCTP_CHUNK_CANSEND(mp))
1052 break;
1053 mp = mp->b_next;
1055 if (mp == NULL) {
1056 meta = sctp_get_msg_to_send(sctp, &mp,
1057 meta == NULL ? NULL : meta->b_next, &error, sacklen,
1058 cansend, NULL);
1059 if (error != 0 || meta == NULL) {
1060 head = NULL;
1061 fp = sctp->sctp_current;
1062 goto unsent_data;
1064 sctp->sctp_xmit_tail = meta;
1067 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1068 seglen = ntohs(sdc->sdh_len);
1069 xtralen = sizeof (*sdc);
1070 chunklen = seglen - xtralen;
1073 * Check rwnd.
1075 if (chunklen > cansend) {
1076 head = NULL;
1077 fp = SCTP_CHUNK_DEST(meta);
1078 if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1079 fp = sctp->sctp_current;
1080 goto unsent_data;
1082 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1083 extra = SCTP_ALIGN - extra;
1086 * Pick destination address, and check cwnd.
1088 if (sacklen > 0 && (seglen + extra <= lfp->sf_cwnd -
1089 lfp->sf_suna) &&
1090 (seglen + sacklen + extra <= lfp->sf_pmss)) {
1092 * Only include SACK chunk if it can be bundled
1093 * with a data chunk, and sent to sctp_lastdata.
1095 pathmax = lfp->sf_cwnd - lfp->sf_suna;
1097 fp = lfp;
1098 if ((nmp = dupmsg(mp)) == NULL) {
1099 head = NULL;
1100 goto unsent_data;
1102 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1103 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1104 &error);
1105 if (head == NULL) {
1107 * If none of the source addresses are
1108 * available (i.e error == EHOSTUNREACH),
1109 * pretend we have sent the data. We will
1110 * eventually time out trying to retramsmit
1111 * the data if the interface never comes up.
1112 * If we have already sent some stuff (i.e.,
1113 * notsent is B_FALSE) then we are fine, else
1114 * just mark this packet as sent.
1116 if (notsent && error == EHOSTUNREACH) {
1117 SCTP_CHUNK_SENT(sctp, mp, sdc,
1118 fp, chunklen, meta);
1120 freemsg(nmp);
1121 SCTP_KSTAT(sctps, sctp_output_failed);
1122 goto unsent_data;
1124 seglen += sacklen;
1125 xtralen += sacklen;
1126 sacklen = 0;
1127 } else {
1128 fp = SCTP_CHUNK_DEST(meta);
1129 if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1130 fp = sctp->sctp_current;
1132 * If we haven't sent data to this destination for
1133 * a while, do slow start again.
1135 if (now - fp->sf_lastactive > fp->sf_rto) {
1136 SET_CWND(fp, fp->sf_pmss,
1137 sctps->sctps_slow_start_after_idle);
1140 pathmax = fp->sf_cwnd - fp->sf_suna;
1141 if (seglen + extra > pathmax) {
1142 head = NULL;
1143 goto unsent_data;
1145 if ((nmp = dupmsg(mp)) == NULL) {
1146 head = NULL;
1147 goto unsent_data;
1149 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1150 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1151 if (head == NULL) {
1153 * If none of the source addresses are
1154 * available (i.e error == EHOSTUNREACH),
1155 * pretend we have sent the data. We will
1156 * eventually time out trying to retramsmit
1157 * the data if the interface never comes up.
1158 * If we have already sent some stuff (i.e.,
1159 * notsent is B_FALSE) then we are fine, else
1160 * just mark this packet as sent.
1162 if (notsent && error == EHOSTUNREACH) {
1163 SCTP_CHUNK_SENT(sctp, mp, sdc,
1164 fp, chunklen, meta);
1166 freemsg(nmp);
1167 SCTP_KSTAT(sctps, sctp_output_failed);
1168 goto unsent_data;
1171 fp->sf_lastactive = now;
1172 if (pathmax > fp->sf_pmss)
1173 pathmax = fp->sf_pmss;
1174 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1175 mp = mp->b_next;
1178 * Use this chunk to measure RTT?
1179 * Must not be a retransmision of an earlier chunk,
1180 * ensure the tsn is current.
1182 tsn = ntohl(sdc->sdh_tsn);
1183 if (sctp->sctp_out_time == 0 && tsn == (sctp->sctp_ltsn - 1)) {
1184 sctp->sctp_out_time = now;
1185 sctp->sctp_rtt_tsn = tsn;
1187 if (extra > 0) {
1188 fill = sctp_get_padding(sctp, extra);
1189 if (fill != NULL) {
1190 linkb(head, fill);
1191 pad = extra;
1192 seglen += extra;
1193 } else {
1194 goto unsent_data;
1198 * Bundle chunks. We linkb() the chunks together to send
1199 * downstream in a single packet.
1200 * Partial chunks MUST NOT be bundled with full chunks, so we
1201 * rely on sctp_get_msg_to_send() to only return messages that
1202 * will fit entirely in the current packet.
1204 while (seglen < pathmax) {
1205 int32_t new_len;
1206 int32_t new_xtralen;
1208 while (mp != NULL) {
1209 if (SCTP_CHUNK_CANSEND(mp))
1210 break;
1211 mp = mp->b_next;
1213 if (mp == NULL) {
1214 meta = sctp_get_msg_to_send(sctp, &mp,
1215 meta->b_next, &error, seglen,
1216 (seglen - xtralen) >= cansend ? 0 :
1217 cansend - seglen, fp);
1218 if (error != 0)
1219 break;
1220 /* If no more eligible chunks, cease bundling */
1221 if (meta == NULL)
1222 break;
1223 sctp->sctp_xmit_tail = meta;
1225 ASSERT(mp != NULL);
1226 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1227 fp != SCTP_CHUNK_DEST(meta)) {
1228 break;
1230 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1231 chunklen = ntohs(sdc->sdh_len);
1232 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0)
1233 extra = SCTP_ALIGN - extra;
1235 new_len = seglen + chunklen;
1236 new_xtralen = xtralen + sizeof (*sdc);
1237 chunklen -= sizeof (*sdc);
1239 if (new_len - new_xtralen > cansend ||
1240 new_len + extra > pathmax) {
1241 break;
1243 if ((nmp = dupmsg(mp)) == NULL)
1244 break;
1245 if (extra > 0) {
1246 fill = sctp_get_padding(sctp, extra);
1247 if (fill != NULL) {
1248 pad += extra;
1249 new_len += extra;
1250 linkb(nmp, fill);
1251 } else {
1252 freemsg(nmp);
1253 break;
1256 seglen = new_len;
1257 xtralen = new_xtralen;
1258 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1259 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1260 linkb(head, nmp);
1261 mp = mp->b_next;
1263 if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
1264 ipha_t *iph = (ipha_t *)head->b_rptr;
1267 * Path MTU is different from what we thought it would
1268 * be when we created chunks, or IP headers have grown.
1269 * Need to clear the DF bit.
1271 iph->ipha_fragment_offset_and_flags = 0;
1273 /* xmit segment */
1274 ASSERT(cansend >= seglen - pad - xtralen);
1275 cansend -= (seglen - pad - xtralen);
1276 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1277 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1278 seglen - xtralen, ntohl(sdc->sdh_tsn),
1279 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1280 cansend, sctp->sctp_lastack_rxd));
1281 sctp_set_iplen(sctp, head, fp->sf_ixa);
1282 (void) conn_ip_output(head, fp->sf_ixa);
1283 BUMP_LOCAL(sctp->sctp_opkts);
1284 /* arm rto timer (if not set) */
1285 if (!fp->sf_timer_running)
1286 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1287 notsent = B_FALSE;
1289 sctp->sctp_active = now;
1290 return;
1291 unsent_data:
1292 /* arm persist timer (if rto timer not set) */
1293 if (!fp->sf_timer_running)
1294 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1295 if (head != NULL)
1296 freemsg(head);
1300 * The following two functions initialize and destroy the cache
1301 * associated with the sets used for PR-SCTP.
1303 void
1304 sctp_ftsn_sets_init(void)
1306 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1307 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1308 NULL, 0);
1311 void
1312 sctp_ftsn_sets_fini(void)
1314 kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1318 /* Free PR-SCTP sets */
1319 void
1320 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1322 sctp_ftsn_set_t *p;
1324 while (s != NULL) {
1325 p = s->next;
1326 s->next = NULL;
1327 kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1328 s = p;
1333 * Given a message meta block, meta, this routine creates or modifies
1334 * the set that will be used to generate a Forward TSN chunk. If the
1335 * entry for stream id, sid, for this message already exists, the
1336 * sequence number, ssn, is updated if it is greater than the existing
1337 * one. If an entry for this sid does not exist, one is created if
1338 * the size does not exceed fp->sf_pmss. We return false in case
1339 * or an error.
1341 boolean_t
1342 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1343 uint_t *nsets, uint32_t *slen)
1345 sctp_ftsn_set_t *p;
1346 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1347 uint16_t sid = htons(msg_hdr->smh_sid);
1348 /* msg_hdr->smh_ssn is already in NBO */
1349 uint16_t ssn = msg_hdr->smh_ssn;
1351 ASSERT(s != NULL && nsets != NULL);
1352 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1354 if (*s == NULL) {
1355 ASSERT((*slen + sizeof (uint32_t)) <= fp->sf_pmss);
1356 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1357 if (*s == NULL)
1358 return (B_FALSE);
1359 (*s)->ftsn_entries.ftsn_sid = sid;
1360 (*s)->ftsn_entries.ftsn_ssn = ssn;
1361 (*s)->next = NULL;
1362 *nsets = 1;
1363 *slen += sizeof (uint32_t);
1364 return (B_TRUE);
1366 for (p = *s; p->next != NULL; p = p->next) {
1367 if (p->ftsn_entries.ftsn_sid == sid) {
1368 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1369 p->ftsn_entries.ftsn_ssn = ssn;
1370 return (B_TRUE);
1373 /* the last one */
1374 if (p->ftsn_entries.ftsn_sid == sid) {
1375 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1376 p->ftsn_entries.ftsn_ssn = ssn;
1377 } else {
1378 if ((*slen + sizeof (uint32_t)) > fp->sf_pmss)
1379 return (B_FALSE);
1380 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1381 KM_NOSLEEP);
1382 if (p->next == NULL)
1383 return (B_FALSE);
1384 p = p->next;
1385 p->ftsn_entries.ftsn_sid = sid;
1386 p->ftsn_entries.ftsn_ssn = ssn;
1387 p->next = NULL;
1388 (*nsets)++;
1389 *slen += sizeof (uint32_t);
1391 return (B_TRUE);
1395 * Given a set of stream id - sequence number pairs, this routing creates
1396 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1397 * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1398 * will add the IP/SCTP header.
1400 mblk_t *
1401 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1402 uint_t nsets, uint32_t seglen)
1404 mblk_t *ftsn_mp;
1405 sctp_chunk_hdr_t *ch_hdr;
1406 uint32_t *advtsn;
1407 uint16_t schlen;
1408 size_t xtralen;
1409 ftsn_entry_t *ftsn_entry;
1410 sctp_stack_t *sctps = sctp->sctp_sctps;
1412 seglen += sizeof (sctp_chunk_hdr_t);
1413 if (fp->sf_isv4)
1414 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
1415 else
1416 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
1417 ftsn_mp = allocb(xtralen + seglen, BPRI_MED);
1418 if (ftsn_mp == NULL)
1419 return (NULL);
1420 ftsn_mp->b_rptr += xtralen;
1421 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1423 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1424 ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1425 ch_hdr->sch_flags = 0;
1427 * The cast here should not be an issue since seglen is
1428 * the length of the Forward TSN chunk.
1430 schlen = (uint16_t)seglen;
1431 U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1433 advtsn = (uint32_t *)(ch_hdr + 1);
1434 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1435 ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1436 while (nsets > 0) {
1437 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1438 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1439 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1440 ftsn_entry++;
1441 sets = sets->next;
1442 nsets--;
1444 return (ftsn_mp);
1448 * Given a starting message, the routine steps through all the
1449 * messages whose TSN is less than sctp->sctp_adv_pap and creates
1450 * ftsn sets. The ftsn sets is then used to create an Forward TSN
1451 * chunk. All the messages, that have chunks that are included in the
1452 * ftsn sets, are flagged abandonded. If a message is partially sent
1453 * and is deemed abandoned, all remaining unsent chunks are marked
1454 * abandoned and are deducted from sctp_unsent.
1456 void
1457 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1458 sctp_faddr_t *fp, uint32_t *seglen)
1460 mblk_t *mp1 = mp;
1461 mblk_t *mp_head = mp;
1462 mblk_t *meta_head = meta;
1463 mblk_t *head;
1464 sctp_ftsn_set_t *sets = NULL;
1465 uint_t nsets = 0;
1466 uint16_t clen;
1467 sctp_data_hdr_t *sdc;
1468 uint32_t sacklen;
1469 uint32_t adv_pap = sctp->sctp_adv_pap;
1470 uint32_t unsent = 0;
1471 boolean_t ubit;
1472 sctp_stack_t *sctps = sctp->sctp_sctps;
1474 *seglen = sizeof (uint32_t);
1476 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1477 while (meta != NULL &&
1478 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1480 * Skip adding FTSN sets for un-ordered messages as they do
1481 * not have SSNs.
1483 ubit = SCTP_DATA_GET_UBIT(sdc);
1484 if (!ubit &&
1485 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1486 meta = NULL;
1487 sctp->sctp_adv_pap = adv_pap;
1488 goto ftsn_done;
1490 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1491 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1492 adv_pap = ntohl(sdc->sdh_tsn);
1493 mp1 = mp1->b_next;
1495 meta = meta->b_next;
1496 if (meta != NULL) {
1497 mp1 = meta->b_cont;
1498 if (!SCTP_CHUNK_ISSENT(mp1))
1499 break;
1500 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1503 ftsn_done:
1505 * Can't compare with sets == NULL, since we don't add any
1506 * sets for un-ordered messages.
1508 if (meta == meta_head)
1509 return;
1510 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1511 sctp_free_ftsn_set(sets);
1512 if (*nmp == NULL)
1513 return;
1514 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1515 sacklen = 0;
1516 } else {
1517 sacklen = sizeof (sctp_chunk_hdr_t) +
1518 sizeof (sctp_sack_chunk_t) +
1519 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1520 if (*seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1521 /* piggybacked SACK doesn't fit */
1522 sacklen = 0;
1523 } else {
1524 fp = sctp->sctp_lastdata;
1527 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1528 if (head == NULL) {
1529 freemsg(*nmp);
1530 *nmp = NULL;
1531 SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1532 return;
1534 *seglen += sacklen;
1535 *nmp = head;
1538 * XXXNeed to optimise this, the reason it is done here is so
1539 * that we don't have to undo in case of failure.
1541 mp1 = mp_head;
1542 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1543 while (meta_head != NULL &&
1544 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1545 if (!SCTP_IS_MSG_ABANDONED(meta_head))
1546 SCTP_MSG_SET_ABANDONED(meta_head);
1547 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1548 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1549 if (!SCTP_CHUNK_ISACKED(mp1)) {
1550 clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1551 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1552 meta_head);
1554 mp1 = mp1->b_next;
1556 while (mp1 != NULL) {
1557 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1558 if (!SCTP_CHUNK_ABANDONED(mp1)) {
1559 ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1560 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1561 SCTP_ABANDON_CHUNK(mp1);
1563 mp1 = mp1->b_next;
1565 meta_head = meta_head->b_next;
1566 if (meta_head != NULL) {
1567 mp1 = meta_head->b_cont;
1568 if (!SCTP_CHUNK_ISSENT(mp1))
1569 break;
1570 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1573 if (unsent > 0) {
1574 ASSERT(sctp->sctp_unsent >= unsent);
1575 sctp->sctp_unsent -= unsent;
1577 * Update ULP the amount of queued data, which is
1578 * sent-unack'ed + unsent.
1580 if (!SCTP_IS_DETACHED(sctp))
1581 SCTP_TXQ_UPDATE(sctp);
1586 * This function steps through messages starting at meta and checks if
1587 * the message is abandoned. It stops when it hits an unsent chunk or
1588 * a message that has all its chunk acked. This is the only place
1589 * where the sctp_adv_pap is moved forward to indicated abandoned
1590 * messages.
1592 void
1593 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1595 uint32_t tsn = sctp->sctp_adv_pap;
1596 sctp_data_hdr_t *sdc;
1597 sctp_msg_hdr_t *msg_hdr;
1599 ASSERT(mp != NULL);
1600 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1601 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1602 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1603 if (!SCTP_IS_MSG_ABANDONED(meta) &&
1604 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1605 return;
1607 while (meta != NULL) {
1608 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1609 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1610 tsn = ntohl(sdc->sdh_tsn);
1611 mp = mp->b_next;
1613 if (mp != NULL)
1614 break;
1616 * We continue checking for successive messages only if there
1617 * is a chunk marked for retransmission. Else, we might
1618 * end up sending FTSN prematurely for chunks that have been
1619 * sent, but not yet acked.
1621 if ((meta = meta->b_next) != NULL) {
1622 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1623 if (!SCTP_IS_MSG_ABANDONED(meta) &&
1624 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1625 break;
1627 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1628 if (!SCTP_CHUNK_ISSENT(mp)) {
1629 sctp->sctp_adv_pap = tsn;
1630 return;
1632 if (SCTP_CHUNK_WANT_REXMIT(mp))
1633 break;
1635 if (mp == NULL)
1636 break;
1639 sctp->sctp_adv_pap = tsn;
1644 * Determine if we should bundle a data chunk with the chunk being
1645 * retransmitted. We bundle if
1647 * - the chunk is sent to the same destination and unack'ed.
1649 * OR
1651 * - the chunk is unsent, i.e. new data.
1653 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \
1654 (!SCTP_CHUNK_ABANDONED((mp)) && \
1655 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \
1656 !SCTP_CHUNK_ISACKED(mp))) || \
1657 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1658 SCTP_CHUNK_FLAG_SENT)))
1661 * Retransmit first segment which hasn't been acked with cumtsn or send
1662 * a Forward TSN chunk, if appropriate.
1664 void
1665 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1667 mblk_t *mp;
1668 mblk_t *nmp = NULL;
1669 mblk_t *head;
1670 mblk_t *meta = sctp->sctp_xmit_head;
1671 mblk_t *fill;
1672 uint32_t seglen = 0;
1673 uint32_t sacklen;
1674 uint16_t chunklen;
1675 int extra;
1676 sctp_data_hdr_t *sdc;
1677 sctp_faddr_t *fp;
1678 uint32_t adv_pap = sctp->sctp_adv_pap;
1679 boolean_t do_ftsn = B_FALSE;
1680 boolean_t ftsn_check = B_TRUE;
1681 uint32_t first_ua_tsn;
1682 sctp_msg_hdr_t *mhdr;
1683 sctp_stack_t *sctps = sctp->sctp_sctps;
1684 int error;
1686 while (meta != NULL) {
1687 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1688 uint32_t tsn;
1690 if (!SCTP_CHUNK_ISSENT(mp))
1691 goto window_probe;
1693 * We break in the following cases -
1695 * if the advanced peer ack point includes the next
1696 * chunk to be retransmited - possibly the Forward
1697 * TSN was lost.
1699 * if we are PRSCTP aware and the next chunk to be
1700 * retransmitted is now abandoned
1702 * if the next chunk to be retransmitted is for
1703 * the dest on which the timer went off. (this
1704 * message is not abandoned).
1706 * We check for Forward TSN only for the first
1707 * eligible chunk to be retransmitted. The reason
1708 * being if the first eligible chunk is skipped (say
1709 * it was sent to a destination other than oldfp)
1710 * then we cannot advance the cum TSN via Forward
1711 * TSN chunk.
1713 * Also, ftsn_check is B_TRUE only for the first
1714 * eligible chunk, it will be B_FALSE for all
1715 * subsequent candidate messages for retransmission.
1717 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1718 tsn = ntohl(sdc->sdh_tsn);
1719 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1720 if (sctp->sctp_prsctp_aware && ftsn_check) {
1721 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1722 ASSERT(sctp->sctp_prsctp_aware);
1723 do_ftsn = B_TRUE;
1724 goto out;
1725 } else {
1726 sctp_check_adv_ack_pt(sctp,
1727 meta, mp);
1728 if (SEQ_GT(sctp->sctp_adv_pap,
1729 adv_pap)) {
1730 do_ftsn = B_TRUE;
1731 goto out;
1734 ftsn_check = B_FALSE;
1736 if (SCTP_CHUNK_DEST(mp) == oldfp)
1737 goto out;
1740 meta = meta->b_next;
1741 if (meta != NULL && sctp->sctp_prsctp_aware) {
1742 mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1744 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1745 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1746 meta = meta->b_next;
1750 window_probe:
1752 * Retransmit fired for a destination which didn't have
1753 * any unacked data pending.
1755 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
1757 * Send a window probe. Inflate frwnd to allow
1758 * sending one segment.
1760 if (sctp->sctp_frwnd < (oldfp->sf_pmss - sizeof (*sdc)))
1761 sctp->sctp_frwnd = oldfp->sf_pmss - sizeof (*sdc);
1763 /* next TSN to send */
1764 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
1767 * The above sctp_frwnd adjustment is coarse. The "changed"
1768 * sctp_frwnd may allow us to send more than 1 packet. So
1769 * tell sctp_output() to send only 1 packet.
1771 sctp_output(sctp, 1);
1773 /* Last sent TSN */
1774 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1775 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
1776 sctp->sctp_zero_win_probe = B_TRUE;
1777 SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1779 return;
1780 out:
1782 * After a time out, assume that everything has left the network. So
1783 * we can clear rxt_unacked for the original peer address.
1785 oldfp->sf_rxt_unacked = 0;
1788 * If we were probing for zero window, don't adjust retransmission
1789 * variables, but the timer is still backed off.
1791 if (sctp->sctp_zero_win_probe) {
1792 mblk_t *pkt;
1793 uint_t pkt_len;
1796 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1797 * and sctp_rxt_maxtsn will specify the ZWP packet.
1799 fp = oldfp;
1800 if (oldfp->sf_state != SCTP_FADDRS_ALIVE)
1801 fp = sctp_rotate_faddr(sctp, oldfp);
1802 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
1803 if (pkt != NULL) {
1804 ASSERT(pkt_len <= fp->sf_pmss);
1805 sctp_set_iplen(sctp, pkt, fp->sf_ixa);
1806 (void) conn_ip_output(pkt, fp->sf_ixa);
1807 BUMP_LOCAL(sctp->sctp_opkts);
1808 } else {
1809 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
1813 * The strikes will be clear by sctp_faddr_alive() when the
1814 * other side sends us an ack.
1816 oldfp->sf_strikes++;
1817 sctp->sctp_strikes++;
1819 SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
1820 if (oldfp != fp && oldfp->sf_suna != 0)
1821 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->sf_rto);
1822 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1823 SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1824 return;
1828 * Enter slowstart for this destination
1830 oldfp->sf_ssthresh = oldfp->sf_cwnd / 2;
1831 if (oldfp->sf_ssthresh < 2 * oldfp->sf_pmss)
1832 oldfp->sf_ssthresh = 2 * oldfp->sf_pmss;
1833 oldfp->sf_cwnd = oldfp->sf_pmss;
1834 oldfp->sf_pba = 0;
1835 fp = sctp_rotate_faddr(sctp, oldfp);
1836 ASSERT(fp != NULL);
1837 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1839 first_ua_tsn = ntohl(sdc->sdh_tsn);
1840 if (do_ftsn) {
1841 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1842 if (nmp == NULL) {
1843 sctp->sctp_adv_pap = adv_pap;
1844 goto restart_timer;
1846 head = nmp;
1848 * Move to the next unabandoned chunk. XXXCheck if meta will
1849 * always be marked abandoned.
1851 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1852 meta = meta->b_next;
1853 if (meta != NULL)
1854 mp = mp->b_cont;
1855 else
1856 mp = NULL;
1857 goto try_bundle;
1859 seglen = ntohs(sdc->sdh_len);
1860 chunklen = seglen - sizeof (*sdc);
1861 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1862 extra = SCTP_ALIGN - extra;
1864 /* Find out if we need to piggyback SACK. */
1865 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1866 sacklen = 0;
1867 } else {
1868 sacklen = sizeof (sctp_chunk_hdr_t) +
1869 sizeof (sctp_sack_chunk_t) +
1870 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1871 if (seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1872 /* piggybacked SACK doesn't fit */
1873 sacklen = 0;
1874 } else {
1876 * OK, we have room to send SACK back. But we
1877 * should send it back to the last fp where we
1878 * receive data from, unless sctp_lastdata equals
1879 * oldfp, then we should probably not send it
1880 * back to that fp. Also we should check that
1881 * the fp is alive.
1883 if (sctp->sctp_lastdata != oldfp &&
1884 sctp->sctp_lastdata->sf_state ==
1885 SCTP_FADDRS_ALIVE) {
1886 fp = sctp->sctp_lastdata;
1892 * Cancel RTT measurement if the retransmitted TSN is before the
1893 * TSN used for timimg.
1895 if (sctp->sctp_out_time != 0 &&
1896 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1897 sctp->sctp_out_time = 0;
1899 /* Clear the counter as the RTT calculation may be off. */
1900 fp->sf_rtt_updates = 0;
1901 oldfp->sf_rtt_updates = 0;
1904 * After a timeout, we should change the current faddr so that
1905 * new chunks will be sent to the alternate address.
1907 sctp_set_faddr_current(sctp, fp);
1909 nmp = dupmsg(mp);
1910 if (nmp == NULL)
1911 goto restart_timer;
1912 if (extra > 0) {
1913 fill = sctp_get_padding(sctp, extra);
1914 if (fill != NULL) {
1915 linkb(nmp, fill);
1916 seglen += extra;
1917 } else {
1918 freemsg(nmp);
1919 goto restart_timer;
1922 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1923 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1924 if (head == NULL) {
1925 freemsg(nmp);
1926 SCTP_KSTAT(sctps, sctp_rexmit_failed);
1927 goto restart_timer;
1929 seglen += sacklen;
1931 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1933 mp = mp->b_next;
1935 try_bundle:
1936 /* We can at least and at most send 1 packet at timeout. */
1937 while (seglen < fp->sf_pmss) {
1938 int32_t new_len;
1940 /* Go through the list to find more chunks to be bundled. */
1941 while (mp != NULL) {
1942 /* Check if the chunk can be bundled. */
1943 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1944 break;
1945 mp = mp->b_next;
1947 /* Go to the next message. */
1948 if (mp == NULL) {
1949 for (meta = meta->b_next; meta != NULL;
1950 meta = meta->b_next) {
1951 mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1953 if (SCTP_IS_MSG_ABANDONED(meta) ||
1954 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1955 sctp)) {
1956 continue;
1959 mp = meta->b_cont;
1960 goto try_bundle;
1963 * Check if there is a new message which potentially
1964 * could be bundled with this retransmission.
1966 meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error,
1967 seglen, fp->sf_pmss - seglen, NULL);
1968 if (error != 0 || meta == NULL) {
1969 /* No more chunk to be bundled. */
1970 break;
1971 } else {
1972 goto try_bundle;
1976 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1977 new_len = ntohs(sdc->sdh_len);
1978 chunklen = new_len - sizeof (*sdc);
1980 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1981 extra = SCTP_ALIGN - extra;
1982 if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
1983 break;
1984 if ((nmp = dupmsg(mp)) == NULL)
1985 break;
1987 if (extra > 0) {
1988 fill = sctp_get_padding(sctp, extra);
1989 if (fill != NULL) {
1990 linkb(nmp, fill);
1991 } else {
1992 freemsg(nmp);
1993 break;
1996 linkb(head, nmp);
1998 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1999 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
2001 seglen = new_len;
2002 mp = mp->b_next;
2004 done_bundle:
2005 if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
2006 ipha_t *iph = (ipha_t *)head->b_rptr;
2009 * Path MTU is different from path we thought it would
2010 * be when we created chunks, or IP headers have grown.
2011 * Need to clear the DF bit.
2013 iph->ipha_fragment_offset_and_flags = 0;
2015 fp->sf_rxt_unacked += seglen;
2017 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
2018 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
2019 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
2020 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
2022 sctp->sctp_rexmitting = B_TRUE;
2023 sctp->sctp_rxt_nxttsn = first_ua_tsn;
2024 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
2025 sctp_set_iplen(sctp, head, fp->sf_ixa);
2026 (void) conn_ip_output(head, fp->sf_ixa);
2027 BUMP_LOCAL(sctp->sctp_opkts);
2030 * Restart the oldfp timer with exponential backoff and
2031 * the new fp timer for the retransmitted chunks.
2033 restart_timer:
2034 oldfp->sf_strikes++;
2035 sctp->sctp_strikes++;
2036 SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
2038 * If there is still some data in the oldfp, restart the
2039 * retransmission timer. If there is no data, the heartbeat will
2040 * continue to run so it will do its job in checking the reachability
2041 * of the oldfp.
2043 if (oldfp != fp && oldfp->sf_suna != 0)
2044 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->sf_rto);
2047 * Should we restart the timer of the new fp? If there is
2048 * outstanding data to the new fp, the timer should be
2049 * running already. So restarting it means that the timer
2050 * will fire later for those outstanding data. But if
2051 * we don't restart it, the timer will fire too early for the
2052 * just retransmitted chunks to the new fp. The reason is that we
2053 * don't keep a timestamp on when a chunk is retransmitted.
2054 * So when the timer fires, it will just search for the
2055 * chunk with the earliest TSN sent to new fp. This probably
2056 * is the chunk we just retransmitted. So for now, let's
2057 * be conservative and restart the timer of the new fp.
2059 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2061 sctp->sctp_active = ddi_get_lbolt64();
2065 * This function is called by sctp_ss_rexmit() to create a packet
2066 * to be retransmitted to the given fp. The given meta and mp
2067 * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2068 * first chunk to be retransmitted. This is also called when we want
2069 * to retransmit a zero window probe from sctp_rexmit() or when we
2070 * want to retransmit the zero window probe after the window has
2071 * opened from sctp_got_sack().
2073 mblk_t *
2074 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2075 uint_t *packet_len)
2077 uint32_t seglen = 0;
2078 uint16_t chunklen;
2079 int extra;
2080 mblk_t *nmp;
2081 mblk_t *head;
2082 mblk_t *fill;
2083 sctp_data_hdr_t *sdc;
2084 sctp_msg_hdr_t *mhdr;
2086 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2087 seglen = ntohs(sdc->sdh_len);
2088 chunklen = seglen - sizeof (*sdc);
2089 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2090 extra = SCTP_ALIGN - extra;
2092 nmp = dupmsg(*mp);
2093 if (nmp == NULL)
2094 return (NULL);
2095 if (extra > 0) {
2096 fill = sctp_get_padding(sctp, extra);
2097 if (fill != NULL) {
2098 linkb(nmp, fill);
2099 seglen += extra;
2100 } else {
2101 freemsg(nmp);
2102 return (NULL);
2105 SCTP_CHUNK_CLEAR_FLAGS(nmp);
2106 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2107 if (head == NULL) {
2108 freemsg(nmp);
2109 return (NULL);
2111 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2113 * Don't update the TSN if we are doing a Zero Win Probe.
2115 if (!sctp->sctp_zero_win_probe)
2116 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2117 *mp = (*mp)->b_next;
2119 try_bundle:
2120 while (seglen < fp->sf_pmss) {
2121 int32_t new_len;
2124 * Go through the list to find more chunks to be bundled.
2125 * We should only retransmit sent by unack'ed chunks. Since
2126 * they were sent before, the peer's receive window should
2127 * be able to receive them.
2129 while (*mp != NULL) {
2130 /* Check if the chunk can be bundled. */
2131 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2132 break;
2133 *mp = (*mp)->b_next;
2135 /* Go to the next message. */
2136 if (*mp == NULL) {
2137 for (*meta = (*meta)->b_next; *meta != NULL;
2138 *meta = (*meta)->b_next) {
2139 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2141 if (SCTP_IS_MSG_ABANDONED(*meta) ||
2142 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2143 sctp)) {
2144 continue;
2147 *mp = (*meta)->b_cont;
2148 goto try_bundle;
2150 /* No more chunk to be bundled. */
2151 break;
2154 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2155 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2156 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2157 break;
2158 new_len = ntohs(sdc->sdh_len);
2159 chunklen = new_len - sizeof (*sdc);
2161 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2162 extra = SCTP_ALIGN - extra;
2163 if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
2164 break;
2165 if ((nmp = dupmsg(*mp)) == NULL)
2166 break;
2168 if (extra > 0) {
2169 fill = sctp_get_padding(sctp, extra);
2170 if (fill != NULL) {
2171 linkb(nmp, fill);
2172 } else {
2173 freemsg(nmp);
2174 break;
2177 linkb(head, nmp);
2179 SCTP_CHUNK_CLEAR_FLAGS(nmp);
2180 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2182 * Don't update the TSN if we are doing a Zero Win Probe.
2184 if (!sctp->sctp_zero_win_probe)
2185 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2187 seglen = new_len;
2188 *mp = (*mp)->b_next;
2190 *packet_len = seglen;
2191 fp->sf_rxt_unacked += seglen;
2192 return (head);
2196 * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2197 * advances the cum_tsn but the cum_tsn is still less than what we have sent
2198 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial"
2199 * SACK. We retransmit unacked chunks without having to wait for another
2200 * timeout. The rationale is that the SACK should not be "partial" if all the
2201 * lost chunks have been retransmitted. Since the SACK is "partial,"
2202 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2203 * be missing. It is better for us to retransmit them now instead
2204 * of waiting for a timeout.
2206 void
2207 sctp_ss_rexmit(sctp_t *sctp)
2209 mblk_t *meta;
2210 mblk_t *mp;
2211 mblk_t *pkt;
2212 sctp_faddr_t *fp;
2213 uint_t pkt_len;
2214 uint32_t tot_wnd;
2215 sctp_data_hdr_t *sdc;
2216 int burst;
2217 sctp_stack_t *sctps = sctp->sctp_sctps;
2219 ASSERT(!sctp->sctp_zero_win_probe);
2222 * If the last cum ack is smaller than what we have just
2223 * retransmitted, simply return.
2225 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2226 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2227 else
2228 return;
2229 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2232 * After a timer fires, sctp_current should be set to the new
2233 * fp where the retransmitted chunks are sent.
2235 fp = sctp->sctp_current;
2238 * Since we are retransmitting, we only need to use cwnd to determine
2239 * how much we can send as we were allowed (by peer's receive window)
2240 * to send those retransmitted chunks previously when they are first
2241 * sent. If we record how much we have retransmitted but
2242 * unacknowledged using rxt_unacked, then the amount we can now send
2243 * is equal to cwnd minus rxt_unacked.
2245 * The field rxt_unacked is incremented when we retransmit a packet
2246 * and decremented when we got a SACK acknowledging something. And
2247 * it is reset when the retransmission timer fires as we assume that
2248 * all packets have left the network after a timeout. If this
2249 * assumption is not true, it means that after a timeout, we can
2250 * get a SACK acknowledging more than rxt_unacked (its value only
2251 * contains what is retransmitted when the timer fires). So
2252 * rxt_unacked will become very big (it is an unsiged int so going
2253 * negative means that the value is huge). This is the reason we
2254 * always send at least 1 MSS bytes.
2256 * The reason why we do not have an accurate count is that we
2257 * only know how many packets are outstanding (using the TSN numbers).
2258 * But we do not know how many bytes those packets contain. To
2259 * have an accurate count, we need to walk through the send list.
2260 * As it is not really important to have an accurate count during
2261 * retransmission, we skip this walk to save some time. This should
2262 * not make the retransmission too aggressive to cause congestion.
2264 if (fp->sf_cwnd <= fp->sf_rxt_unacked)
2265 tot_wnd = fp->sf_pmss;
2266 else
2267 tot_wnd = fp->sf_cwnd - fp->sf_rxt_unacked;
2269 /* Find the first unack'ed chunk */
2270 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2271 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2273 if (SCTP_IS_MSG_ABANDONED(meta) ||
2274 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2275 continue;
2278 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2279 /* Again, this may not be possible */
2280 if (!SCTP_CHUNK_ISSENT(mp))
2281 return;
2282 sdc = (sctp_data_hdr_t *)mp->b_rptr;
2283 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2284 goto found_msg;
2288 /* Everything is abandoned... */
2289 return;
2291 found_msg:
2292 if (!fp->sf_timer_running)
2293 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2294 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2295 if (pkt == NULL) {
2296 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2297 return;
2299 if ((pkt_len > fp->sf_pmss) && fp->sf_isv4) {
2300 ipha_t *iph = (ipha_t *)pkt->b_rptr;
2303 * Path MTU is different from path we thought it would
2304 * be when we created chunks, or IP headers have grown.
2305 * Need to clear the DF bit.
2307 iph->ipha_fragment_offset_and_flags = 0;
2309 sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2310 (void) conn_ip_output(pkt, fp->sf_ixa);
2311 BUMP_LOCAL(sctp->sctp_opkts);
2313 /* Check and see if there is more chunk to be retransmitted. */
2314 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sf_pmss ||
2315 meta == NULL)
2316 return;
2317 if (mp == NULL)
2318 meta = meta->b_next;
2319 if (meta == NULL)
2320 return;
2322 /* Retransmit another packet if the window allows. */
2323 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
2324 meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2325 if (mp == NULL)
2326 mp = meta->b_cont;
2327 for (; mp != NULL; mp = mp->b_next) {
2328 /* Again, this may not be possible */
2329 if (!SCTP_CHUNK_ISSENT(mp))
2330 return;
2331 if (!SCTP_CHUNK_ISACKED(mp))
2332 goto found_msg;