Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / kernel / net / sctp / sctp_common.c
blob0ad2dbb9acaaedb6c88bcc37e6a621e3bb747cdb
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/strsubr.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/kmem.h>
33 #include <sys/socket.h>
34 #include <sys/random.h>
36 #include <netinet/in.h>
37 #include <netinet/ip6.h>
38 #include <netinet/sctp.h>
40 #include <inet/common.h>
41 #include <inet/ip.h>
42 #include <inet/ip6.h>
43 #include <inet/ip_ire.h>
44 #include <inet/ip_if.h>
45 #include <inet/ip_ndp.h>
46 #include <inet/mib2.h>
47 #include <inet/nd.h>
48 #include <inet/optcom.h>
49 #include <inet/sctp_ip.h>
50 #include <inet/ipclassifier.h>
52 #include <inet/sctp/sctp_impl.h>
53 #include <inet/sctp/sctp_addr.h>
54 #include <inet/sctp/sctp_asconf.h>
56 static struct kmem_cache *sctp_kmem_faddr_cache;
57 static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *);
59 /* Set the source address. Refer to comments in sctp_get_dest(). */
60 void
61 sctp_set_saddr(sctp_t *sctp, sctp_faddr_t *fp)
63 boolean_t v6 = !fp->sf_isv4;
64 boolean_t addr_set;
66 fp->sf_saddr = sctp_get_valid_addr(sctp, v6, &addr_set);
68 * If there is no source address avaialble, mark this peer address
69 * as unreachable for now. When the heartbeat timer fires, it will
70 * call sctp_get_dest() to re-check if there is any source address
71 * available.
73 if (!addr_set)
74 fp->sf_state = SCTP_FADDRS_UNREACH;
78 * Call this function to get information about a peer addr fp.
80 * Uses ip_attr_connect to avoid explicit use of ire and source address
81 * selection.
83 void
84 sctp_get_dest(sctp_t *sctp, sctp_faddr_t *fp)
86 in6_addr_t laddr;
87 in6_addr_t nexthop;
88 sctp_saddr_ipif_t *sp;
89 int hdrlen;
90 sctp_stack_t *sctps = sctp->sctp_sctps;
91 conn_t *connp = sctp->sctp_connp;
92 iulp_t uinfo;
93 uint_t pmtu;
94 int error;
95 uint32_t flags = IPDF_VERIFY_DST | IPDF_IPSEC |
96 IPDF_SELECT_SRC | IPDF_UNIQUE_DCE;
99 * Tell sctp_make_mp it needs to call us again should we not
100 * complete and set the saddr.
102 fp->sf_saddr = ipv6_all_zeros;
105 * If this addr is not reachable, mark it as unconfirmed for now, the
106 * state will be changed back to unreachable later in this function
107 * if it is still the case.
109 if (fp->sf_state == SCTP_FADDRS_UNREACH) {
110 fp->sf_state = SCTP_FADDRS_UNCONFIRMED;
114 * Socket is connected - enable PMTU discovery.
116 if (!sctps->sctps_ignore_path_mtu)
117 fp->sf_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
119 ip_attr_nexthop(&connp->conn_xmit_ipp, fp->sf_ixa, &fp->sf_faddr,
120 &nexthop);
122 laddr = fp->sf_saddr;
123 error = ip_attr_connect(connp, fp->sf_ixa, &laddr, &fp->sf_faddr,
124 &nexthop, connp->conn_fport, &laddr, &uinfo, flags);
126 if (error != 0) {
127 dprint(3, ("sctp_get_dest: no ire for %x:%x:%x:%x\n",
128 SCTP_PRINTADDR(fp->sf_faddr)));
130 * It is tempting to just leave the src addr
131 * unspecified and let IP figure it out, but we
132 * *cannot* do this, since IP may choose a src addr
133 * that is not part of this association... unless
134 * this sctp has bound to all addrs. So if the dest
135 * lookup fails, try to find one in our src addr
136 * list, unless the sctp has bound to all addrs, in
137 * which case we change the src addr to unspec.
139 * Note that if this is a v6 endpoint but it does
140 * not have any v4 address at this point (e.g. may
141 * have been deleted), sctp_get_valid_addr() will
142 * return mapped INADDR_ANY. In this case, this
143 * address should be marked not reachable so that
144 * it won't be used to send data.
146 sctp_set_saddr(sctp, fp);
147 if (fp->sf_state == SCTP_FADDRS_UNREACH)
148 return;
149 goto check_current;
151 ASSERT(fp->sf_ixa->ixa_ire != NULL);
152 ASSERT(!(fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)));
154 if (!sctp->sctp_loopback)
155 sctp->sctp_loopback = uinfo.iulp_loopback;
157 /* Make sure the laddr is part of this association */
158 if ((sp = sctp_saddr_lookup(sctp, &laddr, 0)) != NULL &&
159 !sp->saddr_ipif_dontsrc) {
160 if (sp->saddr_ipif_unconfirmed == 1)
161 sp->saddr_ipif_unconfirmed = 0;
162 /* We did IPsec policy lookup for laddr already */
163 fp->sf_saddr = laddr;
164 } else {
165 dprint(2, ("sctp_get_dest: src addr is not part of assoc "
166 "%x:%x:%x:%x\n", SCTP_PRINTADDR(laddr)));
169 * Set the src to the first saddr and hope for the best.
170 * Note that this case should very seldomly
171 * happen. One scenario this can happen is an app
172 * explicitly bind() to an address. But that address is
173 * not the preferred source address to send to the peer.
175 sctp_set_saddr(sctp, fp);
176 if (fp->sf_state == SCTP_FADDRS_UNREACH) {
177 return;
182 * Pull out RTO information for this faddr and use it if we don't
183 * have any yet.
185 if (fp->sf_srtt == -1 && uinfo.iulp_rtt != 0) {
186 /* The cached value is in ms. */
187 fp->sf_srtt = MSEC_TO_TICK(uinfo.iulp_rtt);
188 fp->sf_rttvar = MSEC_TO_TICK(uinfo.iulp_rtt_sd);
189 fp->sf_rto = 3 * fp->sf_srtt;
191 /* Bound the RTO by configured min and max values */
192 if (fp->sf_rto < sctp->sctp_rto_min) {
193 fp->sf_rto = sctp->sctp_rto_min;
195 if (fp->sf_rto > sctp->sctp_rto_max) {
196 fp->sf_rto = sctp->sctp_rto_max;
198 SCTP_MAX_RTO(sctp, fp);
200 pmtu = uinfo.iulp_mtu;
203 * Record the MTU for this faddr. If the MTU for this faddr has
204 * changed, check if the assc MTU will also change.
206 if (fp->sf_isv4) {
207 hdrlen = sctp->sctp_hdr_len;
208 } else {
209 hdrlen = sctp->sctp_hdr6_len;
211 if ((fp->sf_pmss + hdrlen) != pmtu) {
212 /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
213 fp->sf_pmss = (pmtu - hdrlen) & ~(SCTP_ALIGN - 1);
214 if (fp->sf_cwnd < (fp->sf_pmss * 2)) {
215 SET_CWND(fp, fp->sf_pmss,
216 sctps->sctps_slow_start_initial);
220 check_current:
221 if (fp == sctp->sctp_current)
222 sctp_set_faddr_current(sctp, fp);
225 void
226 sctp_update_dce(sctp_t *sctp)
228 sctp_faddr_t *fp;
229 sctp_stack_t *sctps = sctp->sctp_sctps;
230 iulp_t uinfo;
231 ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip;
232 uint_t ifindex;
234 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
235 bzero(&uinfo, sizeof (uinfo));
237 * Only record the PMTU for this faddr if we actually have
238 * done discovery. This prevents initialized default from
239 * clobbering any real info that IP may have.
241 if (fp->sf_pmtu_discovered) {
242 if (fp->sf_isv4) {
243 uinfo.iulp_mtu = fp->sf_pmss +
244 sctp->sctp_hdr_len;
245 } else {
246 uinfo.iulp_mtu = fp->sf_pmss +
247 sctp->sctp_hdr6_len;
250 if (sctps->sctps_rtt_updates != 0 &&
251 fp->sf_rtt_updates >= sctps->sctps_rtt_updates) {
253 * dce_update_uinfo() merges these values with the
254 * old values.
256 uinfo.iulp_rtt = TICK_TO_MSEC(fp->sf_srtt);
257 uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->sf_rttvar);
258 fp->sf_rtt_updates = 0;
260 ifindex = 0;
261 if (IN6_IS_ADDR_LINKSCOPE(&fp->sf_faddr)) {
263 * If we are going to create a DCE we'd better have
264 * an ifindex
266 if (fp->sf_ixa->ixa_nce != NULL) {
267 ifindex = fp->sf_ixa->ixa_nce->nce_common->
268 ncec_ill->ill_phyint->phyint_ifindex;
269 } else {
270 continue;
274 (void) dce_update_uinfo(&fp->sf_faddr, ifindex, &uinfo, ipst);
279 * The sender must later set the total length in the IP header.
281 mblk_t *
282 sctp_make_mp(sctp_t *sctp, sctp_faddr_t *fp, int trailer)
284 mblk_t *mp;
285 size_t ipsctplen;
286 int isv4;
287 sctp_stack_t *sctps = sctp->sctp_sctps;
288 boolean_t src_changed = B_FALSE;
290 ASSERT(fp != NULL);
291 isv4 = fp->sf_isv4;
293 if (SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr) ||
294 (fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
295 /* Need to pick a source */
296 sctp_get_dest(sctp, fp);
298 * Although we still may not get an IRE, the source address
299 * may be changed in sctp_get_ire(). Set src_changed to
300 * true so that the source address is copied again.
302 src_changed = B_TRUE;
305 /* There is no suitable source address to use, return. */
306 if (fp->sf_state == SCTP_FADDRS_UNREACH)
307 return (NULL);
309 ASSERT(fp->sf_ixa->ixa_ire != NULL);
310 ASSERT(!SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr));
312 if (isv4) {
313 ipsctplen = sctp->sctp_hdr_len;
314 } else {
315 ipsctplen = sctp->sctp_hdr6_len;
318 mp = allocb(ipsctplen + sctps->sctps_wroff_xtra + trailer, BPRI_MED);
319 if (mp == NULL) {
320 ip1dbg(("sctp_make_mp: error making mp..\n"));
321 return (NULL);
323 mp->b_rptr += sctps->sctps_wroff_xtra;
324 mp->b_wptr = mp->b_rptr + ipsctplen;
326 ASSERT(OK_32PTR(mp->b_wptr));
328 if (isv4) {
329 ipha_t *iph = (ipha_t *)mp->b_rptr;
331 bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen);
332 if (fp != sctp->sctp_current || src_changed) {
333 /* Fix the source and destination addresses. */
334 IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst);
335 IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr, iph->ipha_src);
337 /* set or clear the don't fragment bit */
338 if (fp->sf_df) {
339 iph->ipha_fragment_offset_and_flags = htons(IPH_DF);
340 } else {
341 iph->ipha_fragment_offset_and_flags = 0;
343 } else {
344 bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen);
345 if (fp != sctp->sctp_current || src_changed) {
346 /* Fix the source and destination addresses. */
347 ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->sf_faddr;
348 ((ip6_t *)(mp->b_rptr))->ip6_src = fp->sf_saddr;
351 ASSERT(sctp->sctp_connp != NULL);
352 return (mp);
356 * Notify upper layers about preferred write offset, write size.
358 void
359 sctp_set_ulp_prop(sctp_t *sctp)
361 int hdrlen;
362 struct sock_proto_props sopp;
364 sctp_stack_t *sctps = sctp->sctp_sctps;
366 if (sctp->sctp_current->sf_isv4) {
367 hdrlen = sctp->sctp_hdr_len;
368 } else {
369 hdrlen = sctp->sctp_hdr6_len;
371 ASSERT(sctp->sctp_ulpd);
373 sctp->sctp_connp->conn_wroff = sctps->sctps_wroff_xtra + hdrlen +
374 sizeof (sctp_data_hdr_t);
376 ASSERT(sctp->sctp_current->sf_pmss == sctp->sctp_mss);
377 bzero(&sopp, sizeof (sopp));
378 sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF;
379 sopp.sopp_wroff = sctp->sctp_connp->conn_wroff;
380 sopp.sopp_maxblk = sctp->sctp_mss - sizeof (sctp_data_hdr_t);
381 sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp);
385 * Set the lengths in the packet and the transmit attributes.
387 void
388 sctp_set_iplen(sctp_t *sctp, mblk_t *mp, ip_xmit_attr_t *ixa)
390 uint16_t sum = 0;
391 ipha_t *iph;
392 ip6_t *ip6h;
393 mblk_t *pmp = mp;
394 boolean_t isv4;
396 isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
397 for (; pmp; pmp = pmp->b_cont)
398 sum += pmp->b_wptr - pmp->b_rptr;
400 ixa->ixa_pktlen = sum;
401 if (isv4) {
402 iph = (ipha_t *)mp->b_rptr;
403 iph->ipha_length = htons(sum);
404 ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr_len;
405 } else {
406 ip6h = (ip6_t *)mp->b_rptr;
407 ip6h->ip6_plen = htons(sum - IPV6_HDR_LEN);
408 ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len;
413 sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2)
415 int na1 = 0;
416 int overlap = 0;
417 int equal = 1;
418 int onematch;
419 sctp_faddr_t *fp1, *fp2;
421 for (fp1 = a1; fp1; fp1 = fp1->sf_next) {
422 onematch = 0;
423 for (fp2 = a2; fp2; fp2 = fp2->sf_next) {
424 if (IN6_ARE_ADDR_EQUAL(&fp1->sf_faddr,
425 &fp2->sf_faddr)) {
426 overlap++;
427 onematch = 1;
428 break;
430 if (!onematch) {
431 equal = 0;
434 na1++;
437 if (equal) {
438 return (SCTP_ADDR_EQUAL);
440 if (overlap == na1) {
441 return (SCTP_ADDR_SUBSET);
443 if (overlap) {
444 return (SCTP_ADDR_OVERLAP);
446 return (SCTP_ADDR_DISJOINT);
450 * Returns 0 on success, ENOMEM on memory allocation failure, EHOSTUNREACH
451 * if the connection credentials fail remote host accreditation.
452 * If sleep is true, this function should never fail for a memory allocation
453 * failure. The boolean parameter "first" decides whether the newly created
454 * faddr structure should be added at the beginning of the list or at the end.
456 * Note: caller must hold conn fanout lock.
459 sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first)
461 sctp_faddr_t *faddr;
462 mblk_t *timer_mp;
463 int err;
464 conn_t *connp = sctp->sctp_connp;
466 if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL)
467 return (ENOMEM);
468 bzero(faddr, sizeof (*faddr));
469 timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer, sleep);
470 if (timer_mp == NULL) {
471 kmem_cache_free(sctp_kmem_faddr_cache, faddr);
472 return (ENOMEM);
474 ((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr;
476 /* Start with any options set on the conn */
477 faddr->sf_ixa = conn_get_ixa_exclusive(connp);
478 if (faddr->sf_ixa == NULL) {
479 freemsg(timer_mp);
480 kmem_cache_free(sctp_kmem_faddr_cache, faddr);
481 return (ENOMEM);
483 faddr->sf_ixa->ixa_notify_cookie = connp->conn_sctp;
485 sctp_init_faddr(sctp, faddr, addr, timer_mp);
486 ASSERT(faddr->sf_ixa->ixa_cred != NULL);
488 /* ip_attr_connect didn't allow broadcats/multicast dest */
489 ASSERT(faddr->sf_next == NULL);
491 if (sctp->sctp_faddrs == NULL) {
492 ASSERT(sctp->sctp_lastfaddr == NULL);
493 /* only element on list; first and last are same */
494 sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr;
495 } else if (first) {
496 ASSERT(sctp->sctp_lastfaddr != NULL);
497 faddr->sf_next = sctp->sctp_faddrs;
498 sctp->sctp_faddrs = faddr;
499 } else {
500 sctp->sctp_lastfaddr->sf_next = faddr;
501 sctp->sctp_lastfaddr = faddr;
503 sctp->sctp_nfaddrs++;
505 return (0);
508 sctp_faddr_t *
509 sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr)
511 sctp_faddr_t *fp;
513 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
514 if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr))
515 break;
518 return (fp);
521 sctp_faddr_t *
522 sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr)
524 for (; fp; fp = fp->sf_next) {
525 if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr)) {
526 break;
530 return (fp);
534 * To change the currently used peer address to the specified one.
536 void
537 sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp)
539 /* Now setup the composite header. */
540 if (fp->sf_isv4) {
541 IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr,
542 sctp->sctp_ipha->ipha_dst);
543 IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr,
544 sctp->sctp_ipha->ipha_src);
545 /* update don't fragment bit */
546 if (fp->sf_df) {
547 sctp->sctp_ipha->ipha_fragment_offset_and_flags =
548 htons(IPH_DF);
549 } else {
550 sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0;
552 } else {
553 sctp->sctp_ip6h->ip6_dst = fp->sf_faddr;
554 sctp->sctp_ip6h->ip6_src = fp->sf_saddr;
557 sctp->sctp_current = fp;
558 sctp->sctp_mss = fp->sf_pmss;
560 /* Update the uppper layer for the change. */
561 if (!SCTP_IS_DETACHED(sctp))
562 sctp_set_ulp_prop(sctp);
565 void
566 sctp_redo_faddr_srcs(sctp_t *sctp)
568 sctp_faddr_t *fp;
570 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
571 sctp_get_dest(sctp, fp);
575 void
576 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
578 int64_t now = LBOLT_FASTPATH64;
581 * If we are under memory pressure, we abort association waiting
582 * in zero window probing state for too long. We do this by not
583 * resetting sctp_strikes. So if sctp_zero_win_probe continues
584 * while under memory pressure, this association will eventually
585 * time out.
587 if (!sctp->sctp_zero_win_probe || !sctp->sctp_sctps->sctps_reclaim) {
588 sctp->sctp_strikes = 0;
590 fp->sf_strikes = 0;
591 fp->sf_lastactive = now;
592 fp->sf_hb_expiry = now + SET_HB_INTVL(fp);
593 fp->sf_hb_pending = B_FALSE;
594 if (fp->sf_state != SCTP_FADDRS_ALIVE) {
595 fp->sf_state = SCTP_FADDRS_ALIVE;
596 sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_AVAILABLE, 0);
597 /* Should have a full IRE now */
598 sctp_get_dest(sctp, fp);
601 * If this is the primary, switch back to it now. And
602 * we probably want to reset the source addr used to reach
603 * it.
604 * Note that if we didn't find a source in sctp_get_dest
605 * then we'd be unreachable at this point in time.
607 if (fp == sctp->sctp_primary &&
608 fp->sf_state != SCTP_FADDRS_UNREACH) {
609 sctp_set_faddr_current(sctp, fp);
610 return;
616 * Return B_TRUE if there is still an active peer address with zero strikes;
617 * otherwise rturn B_FALSE.
619 boolean_t
620 sctp_is_a_faddr_clean(sctp_t *sctp)
622 sctp_faddr_t *fp;
624 for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) {
625 if (fp->sf_state == SCTP_FADDRS_ALIVE && fp->sf_strikes == 0) {
626 return (B_TRUE);
630 return (B_FALSE);
634 * Returns 0 if there is at leave one other active faddr, -1 if there
635 * are none. If there are none left, faddr_dead() will start killing the
636 * association.
637 * If the downed faddr was the current faddr, a new current faddr
638 * will be chosen.
641 sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate)
643 sctp_faddr_t *ofp;
644 sctp_stack_t *sctps = sctp->sctp_sctps;
646 if (fp->sf_state == SCTP_FADDRS_ALIVE) {
647 sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_UNREACHABLE, 0);
649 fp->sf_state = newstate;
651 dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n",
652 SCTP_PRINTADDR(fp->sf_faddr), newstate));
654 if (fp == sctp->sctp_current) {
655 /* Current faddr down; need to switch it */
656 sctp->sctp_current = NULL;
659 /* Find next alive faddr */
660 ofp = fp;
661 for (fp = fp->sf_next; fp != NULL; fp = fp->sf_next) {
662 if (fp->sf_state == SCTP_FADDRS_ALIVE) {
663 break;
667 if (fp == NULL) {
668 /* Continue from beginning of list */
669 for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->sf_next) {
670 if (fp->sf_state == SCTP_FADDRS_ALIVE) {
671 break;
677 * Find a new fp, so if the current faddr is dead, use the new fp
678 * as the current one.
680 if (fp != ofp) {
681 if (sctp->sctp_current == NULL) {
682 dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n",
683 SCTP_PRINTADDR(fp->sf_faddr)));
685 * Note that we don't need to reset the source addr
686 * of the new fp.
688 sctp_set_faddr_current(sctp, fp);
690 return (0);
694 /* All faddrs are down; kill the association */
695 dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
696 SCTPS_BUMP_MIB(sctps, sctpAborted);
697 sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ?
698 SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL);
699 sctp_clean_death(sctp, sctp->sctp_client_errno ?
700 sctp->sctp_client_errno : ETIMEDOUT);
702 return (-1);
705 sctp_faddr_t *
706 sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp)
708 sctp_faddr_t *nfp = NULL;
709 sctp_faddr_t *saved_fp = NULL;
710 int min_strikes;
712 if (ofp == NULL) {
713 ofp = sctp->sctp_current;
715 /* Nothing to do */
716 if (sctp->sctp_nfaddrs < 2)
717 return (ofp);
720 * Find the next live peer address with zero strikes. In case
721 * there is none, find the one with the lowest number of strikes.
723 min_strikes = ofp->sf_strikes;
724 nfp = ofp->sf_next;
725 while (nfp != ofp) {
726 /* If reached end of list, continue scan from the head */
727 if (nfp == NULL) {
728 nfp = sctp->sctp_faddrs;
729 continue;
731 if (nfp->sf_state == SCTP_FADDRS_ALIVE) {
732 if (nfp->sf_strikes == 0)
733 break;
734 if (nfp->sf_strikes < min_strikes) {
735 min_strikes = nfp->sf_strikes;
736 saved_fp = nfp;
739 nfp = nfp->sf_next;
741 /* If reached the old address, there is no zero strike path */
742 if (nfp == ofp)
743 nfp = NULL;
746 * If there is a peer address with zero strikes we use that, if not
747 * return a peer address with fewer strikes than the one last used,
748 * if neither exist we may as well stay with the old one.
750 if (nfp != NULL)
751 return (nfp);
752 if (saved_fp != NULL)
753 return (saved_fp);
754 return (ofp);
757 void
758 sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp)
760 sctp_faddr_t *fpp;
762 if (!sctp->sctp_faddrs) {
763 return;
766 if (fp->sf_timer_mp != NULL) {
767 sctp_timer_free(fp->sf_timer_mp);
768 fp->sf_timer_mp = NULL;
769 fp->sf_timer_running = 0;
771 if (fp->sf_rc_timer_mp != NULL) {
772 sctp_timer_free(fp->sf_rc_timer_mp);
773 fp->sf_rc_timer_mp = NULL;
774 fp->sf_rc_timer_running = 0;
776 if (fp->sf_ixa != NULL) {
777 ixa_refrele(fp->sf_ixa);
778 fp->sf_ixa = NULL;
781 if (fp == sctp->sctp_faddrs) {
782 goto gotit;
785 for (fpp = sctp->sctp_faddrs; fpp->sf_next != fp; fpp = fpp->sf_next)
788 gotit:
789 ASSERT(sctp->sctp_conn_tfp != NULL);
790 mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
791 if (fp == sctp->sctp_faddrs) {
792 sctp->sctp_faddrs = fp->sf_next;
793 } else {
794 fpp->sf_next = fp->sf_next;
796 mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
797 kmem_cache_free(sctp_kmem_faddr_cache, fp);
798 sctp->sctp_nfaddrs--;
801 void
802 sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock)
804 sctp_faddr_t *fp, *fpn;
806 if (sctp->sctp_faddrs == NULL) {
807 ASSERT(sctp->sctp_lastfaddr == NULL);
808 return;
811 ASSERT(sctp->sctp_lastfaddr != NULL);
812 sctp->sctp_lastfaddr = NULL;
813 sctp->sctp_current = NULL;
814 sctp->sctp_primary = NULL;
816 sctp_free_faddr_timers(sctp);
818 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
819 /* in conn fanout; need to hold lock */
820 mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
823 for (fp = sctp->sctp_faddrs; fp; fp = fpn) {
824 fpn = fp->sf_next;
825 if (fp->sf_ixa != NULL) {
826 ixa_refrele(fp->sf_ixa);
827 fp->sf_ixa = NULL;
829 kmem_cache_free(sctp_kmem_faddr_cache, fp);
830 sctp->sctp_nfaddrs--;
833 sctp->sctp_faddrs = NULL;
834 ASSERT(sctp->sctp_nfaddrs == 0);
835 if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
836 mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
841 void
842 sctp_zap_addrs(sctp_t *sctp)
844 sctp_zap_faddrs(sctp, 0);
845 sctp_free_saddrs(sctp);
849 * Build two SCTP header templates; one for IPv4 and one for IPv6.
850 * Store them in sctp_iphc and sctp_iphc6 respectively (and related fields).
851 * There are no IP addresses in the templates, but the port numbers and
852 * verifier are field in from the conn_t and sctp_t.
854 * Returns failure if can't allocate memory, or if there is a problem
855 * with a routing header/option.
857 * We allocate space for the minimum sctp header (sctp_hdr_t).
859 * We massage an routing option/header. There is no checksum implication
860 * for a routing header for sctp.
862 * Caller needs to update conn_wroff if desired.
865 sctp_build_hdrs(sctp_t *sctp, int sleep)
867 conn_t *connp = sctp->sctp_connp;
868 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
869 uint_t ip_hdr_length;
870 uchar_t *hdrs;
871 uint_t hdrs_len;
872 uint_t ulp_hdr_length = sizeof (sctp_hdr_t);
873 ipha_t *ipha;
874 ip6_t *ip6h;
875 sctp_hdr_t *sctph;
876 in6_addr_t v6src, v6dst;
877 ipaddr_t v4src, v4dst;
879 v4src = connp->conn_saddr_v4;
880 v4dst = connp->conn_faddr_v4;
881 v6src = connp->conn_saddr_v6;
882 v6dst = connp->conn_faddr_v6;
884 /* First do IPv4 header */
885 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
887 /* In case of IP options it can be too much */
888 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
889 /* Preserves existing TX errno for this */
890 return (EHOSTUNREACH);
892 hdrs_len = ip_hdr_length + ulp_hdr_length;
893 ASSERT(hdrs_len != 0);
895 if (hdrs_len != sctp->sctp_iphc_len) {
896 /* Allocate new before we free any old */
897 hdrs = kmem_alloc(hdrs_len, sleep);
898 if (hdrs == NULL)
899 return (ENOMEM);
901 if (sctp->sctp_iphc != NULL)
902 kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len);
903 sctp->sctp_iphc = hdrs;
904 sctp->sctp_iphc_len = hdrs_len;
905 } else {
906 hdrs = sctp->sctp_iphc;
908 sctp->sctp_hdr_len = sctp->sctp_iphc_len;
909 sctp->sctp_ip_hdr_len = ip_hdr_length;
911 sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length);
912 sctp->sctp_sctph = sctph;
913 sctph->sh_sport = connp->conn_lport;
914 sctph->sh_dport = connp->conn_fport;
915 sctph->sh_verf = sctp->sctp_fvtag;
916 sctph->sh_chksum = 0;
918 ipha = (ipha_t *)hdrs;
919 sctp->sctp_ipha = ipha;
921 ipha->ipha_src = v4src;
922 ipha->ipha_dst = v4dst;
923 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
924 ipha->ipha_length = htons(hdrs_len);
925 ipha->ipha_fragment_offset_and_flags = 0;
927 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS)
928 (void) ip_massage_options(ipha, connp->conn_netstack);
930 /* Now IPv6 */
931 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
932 hdrs_len = ip_hdr_length + ulp_hdr_length;
933 ASSERT(hdrs_len != 0);
935 if (hdrs_len != sctp->sctp_iphc6_len) {
936 /* Allocate new before we free any old */
937 hdrs = kmem_alloc(hdrs_len, sleep);
938 if (hdrs == NULL)
939 return (ENOMEM);
941 if (sctp->sctp_iphc6 != NULL)
942 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
943 sctp->sctp_iphc6 = hdrs;
944 sctp->sctp_iphc6_len = hdrs_len;
945 } else {
946 hdrs = sctp->sctp_iphc6;
948 sctp->sctp_hdr6_len = sctp->sctp_iphc6_len;
949 sctp->sctp_ip_hdr6_len = ip_hdr_length;
951 sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length);
952 sctp->sctp_sctph6 = sctph;
953 sctph->sh_sport = connp->conn_lport;
954 sctph->sh_dport = connp->conn_fport;
955 sctph->sh_verf = sctp->sctp_fvtag;
956 sctph->sh_chksum = 0;
958 ip6h = (ip6_t *)hdrs;
959 sctp->sctp_ip6h = ip6h;
961 ip6h->ip6_src = v6src;
962 ip6h->ip6_dst = v6dst;
963 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
964 connp->conn_flowinfo);
965 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
967 if (ipp->ipp_fields & IPPF_RTHDR) {
968 uint8_t *end;
969 ip6_rthdr_t *rth;
971 end = (uint8_t *)ip6h + ip_hdr_length;
972 rth = ip_find_rthdr_v6(ip6h, end);
973 if (rth != NULL) {
974 (void) ip_massage_options_v6(ip6h, rth,
975 connp->conn_netstack);
979 * Verify that the first hop isn't a mapped address.
980 * Routers along the path need to do this verification
981 * for subsequent hops.
983 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
984 return (EADDRNOTAVAIL);
986 return (0);
990 * XXX implement more sophisticated logic
993 sctp_set_hdraddrs(sctp_t *sctp)
995 sctp_faddr_t *fp;
996 int gotv4 = 0;
997 int gotv6 = 0;
998 conn_t *connp = sctp->sctp_connp;
1000 ASSERT(sctp->sctp_faddrs != NULL);
1001 ASSERT(sctp->sctp_nsaddrs > 0);
1003 /* Set up using the primary first */
1004 connp->conn_faddr_v6 = sctp->sctp_primary->sf_faddr;
1005 /* saddr may be unspec; make_mp() will handle this */
1006 connp->conn_saddr_v6 = sctp->sctp_primary->sf_saddr;
1007 connp->conn_laddr_v6 = connp->conn_saddr_v6;
1008 if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->sf_faddr)) {
1009 gotv4 = 1;
1010 if (connp->conn_family == AF_INET) {
1011 goto done;
1013 } else {
1014 gotv6 = 1;
1017 for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) {
1018 if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
1019 gotv4 = 1;
1020 if (connp->conn_family == AF_INET || gotv6) {
1021 break;
1023 } else if (!gotv6 && !IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
1024 gotv6 = 1;
1025 if (gotv4)
1026 break;
1030 done:
1031 if (!gotv4 && !gotv6)
1032 return (EACCES);
1034 return (0);
1038 * got_errchunk is set B_TRUE only if called from validate_init_params(), when
1039 * an ERROR chunk is already prepended the size of which needs updating for
1040 * additional unrecognized parameters. Other callers either prepend the ERROR
1041 * chunk with the correct size after calling this function, or they are calling
1042 * to add an invalid parameter to an INIT_ACK chunk, in that case no ERROR chunk
1043 * exists, the CAUSE blocks go into the INIT_ACK directly.
1045 * *errmp will be non-NULL both when adding an additional CAUSE block to an
1046 * existing prepended COOKIE ERROR chunk (processing params of an INIT_ACK),
1047 * and when adding unrecognized parameters after the first, to an INIT_ACK
1048 * (processing params of an INIT chunk).
1050 void
1051 sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp,
1052 boolean_t got_errchunk)
1054 mblk_t *mp;
1055 sctp_parm_hdr_t *ph;
1056 size_t len;
1057 int pad;
1058 sctp_chunk_hdr_t *ecp;
1060 len = sizeof (*ph) + ntohs(uph->sph_len);
1061 if ((pad = len % SCTP_ALIGN) != 0) {
1062 pad = SCTP_ALIGN - pad;
1063 len += pad;
1065 mp = allocb(len, BPRI_MED);
1066 if (mp == NULL) {
1067 return;
1070 ph = (sctp_parm_hdr_t *)(mp->b_rptr);
1071 ph->sph_type = htons(PARM_UNRECOGNIZED);
1072 ph->sph_len = htons(len - pad);
1074 /* copy in the unrecognized parameter */
1075 bcopy(uph, ph + 1, ntohs(uph->sph_len));
1077 if (pad != 0)
1078 bzero((mp->b_rptr + len - pad), pad);
1080 mp->b_wptr = mp->b_rptr + len;
1081 if (*errmp != NULL) {
1083 * Update total length if an ERROR chunk, then link
1084 * this CAUSE block to the possible chain of CAUSE
1085 * blocks attached to the ERROR chunk or INIT_ACK
1086 * being created.
1088 if (got_errchunk) {
1089 /* ERROR chunk already prepended */
1090 ecp = (sctp_chunk_hdr_t *)((*errmp)->b_rptr);
1091 ecp->sch_len = htons(ntohs(ecp->sch_len) + len);
1093 linkb(*errmp, mp);
1094 } else {
1095 *errmp = mp;
1100 * o Bounds checking
1101 * o Updates remaining
1102 * o Checks alignment
1104 sctp_parm_hdr_t *
1105 sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining)
1107 int pad;
1108 uint16_t len;
1110 len = ntohs(current->sph_len);
1111 *remaining -= len;
1112 if (*remaining < sizeof (*current) || len < sizeof (*current)) {
1113 return (NULL);
1115 if ((pad = len & (SCTP_ALIGN - 1)) != 0) {
1116 pad = SCTP_ALIGN - pad;
1117 *remaining -= pad;
1119 /*LINTED pointer cast may result in improper alignment*/
1120 current = (sctp_parm_hdr_t *)((char *)current + len + pad);
1121 return (current);
1125 * Sets the address parameters given in the INIT chunk into sctp's
1126 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are
1127 * no address parameters in the INIT chunk, a single faddr is created
1128 * from the ip hdr at the beginning of pkt.
1129 * If there already are existing addresses hanging from sctp, merge
1130 * them in, if the old info contains addresses which are not present
1131 * in this new info, get rid of them, and clean the pointers if there's
1132 * messages which have this as their target address.
1134 * We also re-adjust the source address list here since the list may
1135 * contain more than what is actually part of the association. If
1136 * we get here from sctp_send_cookie_echo(), we are on the active
1137 * side and psctp will be NULL and ich will be the INIT-ACK chunk.
1138 * If we get here from sctp_accept_comm(), ich will be the INIT chunk
1139 * and psctp will the listening endpoint.
1141 * INIT processing: When processing the INIT we inherit the src address
1142 * list from the listener. For a loopback or linklocal association, we
1143 * delete the list and just take the address from the IP header (since
1144 * that's how we created the INIT-ACK). Additionally, for loopback we
1145 * ignore the address params in the INIT. For determining which address
1146 * types were sent in the INIT-ACK we follow the same logic as in
1147 * creating the INIT-ACK. We delete addresses of the type that are not
1148 * supported by the peer.
1150 * INIT-ACK processing: When processing the INIT-ACK since we had not
1151 * included addr params for loopback or linklocal addresses when creating
1152 * the INIT, we just use the address from the IP header. Further, for
1153 * loopback we ignore the addr param list. We mark addresses of the
1154 * type not supported by the peer as unconfirmed.
1156 * In case of INIT processing we look for supported address types in the
1157 * supported address param, if present. In both cases the address type in
1158 * the IP header is supported as well as types for addresses in the param
1159 * list, if any.
1161 * Once we have the supported address types sctp_check_saddr() runs through
1162 * the source address list and deletes or marks as unconfirmed address of
1163 * types not supported by the peer.
1165 * Returns 0 on success, sys errno on failure
1168 sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt,
1169 sctp_chunk_hdr_t *ich, uint_t *sctp_options)
1171 sctp_init_chunk_t *init;
1172 ipha_t *iph;
1173 ip6_t *ip6h;
1174 in6_addr_t hdrsaddr[1];
1175 in6_addr_t hdrdaddr[1];
1176 sctp_parm_hdr_t *ph;
1177 ssize_t remaining;
1178 int isv4;
1179 int err;
1180 sctp_faddr_t *fp;
1181 int supp_af = 0;
1182 boolean_t check_saddr = B_TRUE;
1183 conn_t *connp = sctp->sctp_connp;
1185 if (sctp_options != NULL)
1186 *sctp_options = 0;
1188 /* extract the address from the IP header */
1189 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1190 if (isv4) {
1191 iph = (ipha_t *)pkt->b_rptr;
1192 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr);
1193 IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr);
1194 supp_af |= PARM_SUPP_V4;
1195 } else {
1196 ip6h = (ip6_t *)pkt->b_rptr;
1197 hdrsaddr[0] = ip6h->ip6_src;
1198 hdrdaddr[0] = ip6h->ip6_dst;
1199 supp_af |= PARM_SUPP_V6;
1203 * Unfortunately, we can't delay this because adding an faddr
1204 * looks for the presence of the source address (from the ire
1205 * for the faddr) in the source address list. We could have
1206 * delayed this if, say, this was a loopback/linklocal connection.
1207 * Now, we just end up nuking this list and taking the addr from
1208 * the IP header for loopback/linklocal.
1210 if (psctp != NULL && psctp->sctp_nsaddrs > 0) {
1211 ASSERT(sctp->sctp_nsaddrs == 0);
1213 err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP);
1214 if (err != 0)
1215 return (err);
1218 * We will add the faddr before parsing the address list as this
1219 * might be a loopback connection and we would not have to
1220 * go through the list.
1222 * Make sure the header's addr is in the list
1224 fp = sctp_lookup_faddr(sctp, hdrsaddr);
1225 if (fp == NULL) {
1226 /* not included; add it now */
1227 err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE);
1228 if (err != 0)
1229 return (err);
1231 /* sctp_faddrs will be the hdr addr */
1232 fp = sctp->sctp_faddrs;
1234 /* make the header addr the primary */
1236 sctp->sctp_primary = fp;
1237 sctp->sctp_current = fp;
1238 sctp->sctp_mss = fp->sf_pmss;
1240 /* For loopback connections & linklocal get address from the header */
1241 if (sctp->sctp_loopback || sctp->sctp_linklocal) {
1242 if (sctp->sctp_nsaddrs != 0)
1243 sctp_free_saddrs(sctp);
1244 if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0)
1245 return (err);
1246 /* For loopback ignore address list */
1247 if (sctp->sctp_loopback)
1248 return (0);
1249 check_saddr = B_FALSE;
1252 /* Walk the params in the INIT [ACK], pulling out addr params */
1253 remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1254 sizeof (sctp_init_chunk_t);
1255 if (remaining < sizeof (*ph)) {
1256 if (check_saddr) {
1257 sctp_check_saddr(sctp, supp_af, psctp == NULL ?
1258 B_FALSE : B_TRUE, hdrdaddr);
1260 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
1261 return (0);
1264 init = (sctp_init_chunk_t *)(ich + 1);
1265 ph = (sctp_parm_hdr_t *)(init + 1);
1267 /* params will have already been byteordered when validating */
1268 while (ph != NULL) {
1269 if (ph->sph_type == htons(PARM_SUPP_ADDRS)) {
1270 int plen;
1271 uint16_t *p;
1272 uint16_t addrtype;
1274 ASSERT(psctp != NULL);
1275 plen = ntohs(ph->sph_len);
1276 p = (uint16_t *)(ph + 1);
1277 while (plen > 0) {
1278 addrtype = ntohs(*p);
1279 switch (addrtype) {
1280 case PARM_ADDR6:
1281 supp_af |= PARM_SUPP_V6;
1282 break;
1283 case PARM_ADDR4:
1284 supp_af |= PARM_SUPP_V4;
1285 break;
1286 default:
1287 break;
1289 p++;
1290 plen -= sizeof (*p);
1292 } else if (ph->sph_type == htons(PARM_ADDR4)) {
1293 if (remaining >= PARM_ADDR4_LEN) {
1294 in6_addr_t addr;
1295 ipaddr_t ta;
1297 supp_af |= PARM_SUPP_V4;
1299 * Screen out broad/multicasts & loopback.
1300 * If the endpoint only accepts v6 address,
1301 * go to the next one.
1303 * Subnet broadcast check is done in
1304 * sctp_add_faddr(). If the address is
1305 * a broadcast address, it won't be added.
1307 bcopy(ph + 1, &ta, sizeof (ta));
1308 if (ta == 0 ||
1309 ta == INADDR_BROADCAST ||
1310 ta == htonl(INADDR_LOOPBACK) ||
1311 CLASSD(ta) || connp->conn_ipv6_v6only) {
1312 goto next;
1314 IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1315 (ph + 1), &addr);
1317 /* Check for duplicate. */
1318 if (sctp_lookup_faddr(sctp, &addr) != NULL)
1319 goto next;
1321 /* OK, add it to the faddr set */
1322 err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP,
1323 B_FALSE);
1324 /* Something is wrong... Try the next one. */
1325 if (err != 0)
1326 goto next;
1328 } else if (ph->sph_type == htons(PARM_ADDR6) &&
1329 connp->conn_family == AF_INET6) {
1330 /* An v4 socket should not take v6 addresses. */
1331 if (remaining >= PARM_ADDR6_LEN) {
1332 in6_addr_t *addr6;
1334 supp_af |= PARM_SUPP_V6;
1335 addr6 = (in6_addr_t *)(ph + 1);
1337 * Screen out link locals, mcast, loopback
1338 * and bogus v6 address.
1340 if (IN6_IS_ADDR_LINKLOCAL(addr6) ||
1341 IN6_IS_ADDR_MULTICAST(addr6) ||
1342 IN6_IS_ADDR_LOOPBACK(addr6) ||
1343 IN6_IS_ADDR_V4MAPPED(addr6)) {
1344 goto next;
1346 /* Check for duplicate. */
1347 if (sctp_lookup_faddr(sctp, addr6) != NULL)
1348 goto next;
1350 err = sctp_add_faddr(sctp,
1351 (in6_addr_t *)(ph + 1), KM_NOSLEEP,
1352 B_FALSE);
1353 /* Something is wrong... Try the next one. */
1354 if (err != 0)
1355 goto next;
1357 } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) {
1358 if (sctp_options != NULL)
1359 *sctp_options |= SCTP_PRSCTP_OPTION;
1360 } /* else; skip */
1362 next:
1363 ph = sctp_next_parm(ph, &remaining);
1365 if (check_saddr) {
1366 sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE :
1367 B_TRUE, hdrdaddr);
1369 ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
1370 return (0);
1374 * Returns 0 if the check failed and the restart should be refused,
1375 * 1 if the check succeeded.
1378 sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports,
1379 int sleep, sctp_stack_t *sctps, ip_recv_attr_t *ira)
1381 sctp_faddr_t *fp, *fphead = NULL;
1382 sctp_parm_hdr_t *ph;
1383 ssize_t remaining;
1384 int isv4;
1385 ipha_t *iph;
1386 ip6_t *ip6h;
1387 in6_addr_t hdraddr[1];
1388 int retval = 0;
1389 sctp_tf_t *tf;
1390 sctp_t *sctp;
1391 int compres;
1392 sctp_init_chunk_t *init;
1393 int nadded = 0;
1395 /* extract the address from the IP header */
1396 isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
1397 if (isv4) {
1398 iph = (ipha_t *)pkt->b_rptr;
1399 IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr);
1400 } else {
1401 ip6h = (ip6_t *)pkt->b_rptr;
1402 hdraddr[0] = ip6h->ip6_src;
1405 /* Walk the params in the INIT [ACK], pulling out addr params */
1406 remaining = ntohs(ich->sch_len) - sizeof (*ich) -
1407 sizeof (sctp_init_chunk_t);
1408 if (remaining < sizeof (*ph)) {
1409 /* no parameters; restart OK */
1410 return (1);
1412 init = (sctp_init_chunk_t *)(ich + 1);
1413 ph = (sctp_parm_hdr_t *)(init + 1);
1415 while (ph != NULL) {
1416 sctp_faddr_t *fpa = NULL;
1418 /* params will have already been byteordered when validating */
1419 if (ph->sph_type == htons(PARM_ADDR4)) {
1420 if (remaining >= PARM_ADDR4_LEN) {
1421 in6_addr_t addr;
1422 IN6_INADDR_TO_V4MAPPED((struct in_addr *)
1423 (ph + 1), &addr);
1424 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1425 sleep);
1426 if (fpa == NULL) {
1427 goto done;
1429 bzero(fpa, sizeof (*fpa));
1430 fpa->sf_faddr = addr;
1431 fpa->sf_next = NULL;
1433 } else if (ph->sph_type == htons(PARM_ADDR6)) {
1434 if (remaining >= PARM_ADDR6_LEN) {
1435 fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
1436 sleep);
1437 if (fpa == NULL) {
1438 goto done;
1440 bzero(fpa, sizeof (*fpa));
1441 bcopy(ph + 1, &fpa->sf_faddr,
1442 sizeof (fpa->sf_faddr));
1443 fpa->sf_next = NULL;
1446 /* link in the new addr, if it was an addr param */
1447 if (fpa != NULL) {
1448 if (fphead == NULL) {
1449 fphead = fpa;
1450 } else {
1451 fpa->sf_next = fphead;
1452 fphead = fpa;
1456 ph = sctp_next_parm(ph, &remaining);
1459 if (fphead == NULL) {
1460 /* no addr parameters; restart OK */
1461 return (1);
1465 * got at least one; make sure the header's addr is
1466 * in the list
1468 fp = sctp_lookup_faddr_nosctp(fphead, hdraddr);
1469 if (fp == NULL) {
1470 /* not included; add it now */
1471 fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep);
1472 if (fp == NULL) {
1473 goto done;
1475 bzero(fp, sizeof (*fp));
1476 fp->sf_faddr = *hdraddr;
1477 fp->sf_next = fphead;
1478 fphead = fp;
1482 * Now, we can finally do the check: For each sctp instance
1483 * on the hash line for ports, compare its faddr set against
1484 * the new one. If the new one is a strict subset of any
1485 * existing sctp's faddrs, the restart is OK. However, if there
1486 * is an overlap, this could be an attack, so return failure.
1487 * If all sctp's faddrs are disjoint, this is a legitimate new
1488 * association.
1490 tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
1491 mutex_enter(&tf->tf_lock);
1493 for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
1494 if (ports != sctp->sctp_connp->conn_ports) {
1495 continue;
1497 compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs);
1498 if (compres <= SCTP_ADDR_SUBSET) {
1499 retval = 1;
1500 mutex_exit(&tf->tf_lock);
1501 goto done;
1503 if (compres == SCTP_ADDR_OVERLAP) {
1504 dprint(1,
1505 ("new assoc from %x:%x:%x:%x overlaps with %p\n",
1506 SCTP_PRINTADDR(*hdraddr), (void *)sctp));
1508 * While we still hold the lock, we need to
1509 * figure out which addresses have been
1510 * added so we can include them in the abort
1511 * we will send back. Since these faddrs will
1512 * never be used, we overload the rto field
1513 * here, setting it to 0 if the address was
1514 * not added, 1 if it was added.
1516 for (fp = fphead; fp; fp = fp->sf_next) {
1517 if (sctp_lookup_faddr(sctp, &fp->sf_faddr)) {
1518 fp->sf_rto = 0;
1519 } else {
1520 fp->sf_rto = 1;
1521 nadded++;
1524 mutex_exit(&tf->tf_lock);
1525 goto done;
1528 mutex_exit(&tf->tf_lock);
1530 /* All faddrs are disjoint; legit new association */
1531 retval = 1;
1533 done:
1534 /* If are attempted adds, send back an abort listing the addrs */
1535 if (nadded > 0) {
1536 void *dtail;
1537 size_t dlen;
1539 dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP);
1540 if (dtail == NULL) {
1541 goto cleanup;
1544 ph = dtail;
1545 dlen = 0;
1546 for (fp = fphead; fp; fp = fp->sf_next) {
1547 if (fp->sf_rto == 0) {
1548 continue;
1550 if (IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
1551 ipaddr_t addr4;
1553 ph->sph_type = htons(PARM_ADDR4);
1554 ph->sph_len = htons(PARM_ADDR4_LEN);
1555 IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, addr4);
1556 ph++;
1557 bcopy(&addr4, ph, sizeof (addr4));
1558 ph = (sctp_parm_hdr_t *)
1559 ((char *)ph + sizeof (addr4));
1560 dlen += PARM_ADDR4_LEN;
1561 } else {
1562 ph->sph_type = htons(PARM_ADDR6);
1563 ph->sph_len = htons(PARM_ADDR6_LEN);
1564 ph++;
1565 bcopy(&fp->sf_faddr, ph, sizeof (fp->sf_faddr));
1566 ph = (sctp_parm_hdr_t *)
1567 ((char *)ph + sizeof (fp->sf_faddr));
1568 dlen += PARM_ADDR6_LEN;
1572 /* Send off the abort */
1573 sctp_send_abort(sctp, sctp_init2vtag(ich),
1574 SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE,
1575 ira);
1577 kmem_free(dtail, PARM_ADDR6_LEN * nadded);
1580 cleanup:
1581 /* Clean up */
1582 if (fphead) {
1583 sctp_faddr_t *fpn;
1584 for (fp = fphead; fp; fp = fpn) {
1585 fpn = fp->sf_next;
1586 if (fp->sf_ixa != NULL) {
1587 ixa_refrele(fp->sf_ixa);
1588 fp->sf_ixa = NULL;
1590 kmem_cache_free(sctp_kmem_faddr_cache, fp);
1594 return (retval);
1598 * Reset any state related to transmitted chunks.
1600 void
1601 sctp_congest_reset(sctp_t *sctp)
1603 sctp_faddr_t *fp;
1604 sctp_stack_t *sctps = sctp->sctp_sctps;
1605 mblk_t *mp;
1607 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
1608 fp->sf_ssthresh = sctps->sctps_initial_mtu;
1609 SET_CWND(fp, fp->sf_pmss, sctps->sctps_slow_start_initial);
1610 fp->sf_suna = 0;
1611 fp->sf_pba = 0;
1614 * Clean up the transmit list as well since we have reset accounting
1615 * on all the fps. Send event upstream, if required.
1617 while ((mp = sctp->sctp_xmit_head) != NULL) {
1618 sctp->sctp_xmit_head = mp->b_next;
1619 mp->b_next = NULL;
1620 if (sctp->sctp_xmit_head != NULL)
1621 sctp->sctp_xmit_head->b_prev = NULL;
1622 sctp_sendfail_event(sctp, mp, 0, B_TRUE);
1624 sctp->sctp_xmit_head = NULL;
1625 sctp->sctp_xmit_tail = NULL;
1626 sctp->sctp_xmit_unacked = NULL;
1628 sctp->sctp_unacked = 0;
1630 * Any control message as well. We will clean-up this list as well.
1631 * This contains any pending ASCONF request that we have queued/sent.
1632 * If we do get an ACK we will just drop it. However, given that
1633 * we are restarting chances are we aren't going to get any.
1635 if (sctp->sctp_cxmit_list != NULL)
1636 sctp_asconf_free_cxmit(sctp, NULL);
1637 sctp->sctp_cxmit_list = NULL;
1638 sctp->sctp_cchunk_pend = 0;
1640 sctp->sctp_rexmitting = B_FALSE;
1641 sctp->sctp_rxt_nxttsn = 0;
1642 sctp->sctp_rxt_maxtsn = 0;
1644 sctp->sctp_zero_win_probe = B_FALSE;
1647 static void
1648 sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr,
1649 mblk_t *timer_mp)
1651 sctp_stack_t *sctps = sctp->sctp_sctps;
1653 ASSERT(fp->sf_ixa != NULL);
1655 bcopy(addr, &fp->sf_faddr, sizeof (*addr));
1656 if (IN6_IS_ADDR_V4MAPPED(addr)) {
1657 fp->sf_isv4 = 1;
1658 /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
1659 fp->sf_pmss =
1660 (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) &
1661 ~(SCTP_ALIGN - 1);
1662 fp->sf_ixa->ixa_flags |= IXAF_IS_IPV4;
1663 } else {
1664 fp->sf_isv4 = 0;
1665 fp->sf_pmss =
1666 (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) &
1667 ~(SCTP_ALIGN - 1);
1668 fp->sf_ixa->ixa_flags &= ~IXAF_IS_IPV4;
1670 fp->sf_cwnd = sctps->sctps_slow_start_initial * fp->sf_pmss;
1671 fp->sf_rto = MIN(sctp->sctp_rto_initial, sctp->sctp_rto_max_init);
1672 SCTP_MAX_RTO(sctp, fp);
1673 fp->sf_srtt = -1;
1674 fp->sf_rtt_updates = 0;
1675 fp->sf_strikes = 0;
1676 fp->sf_max_retr = sctp->sctp_pp_max_rxt;
1677 /* Mark it as not confirmed. */
1678 fp->sf_state = SCTP_FADDRS_UNCONFIRMED;
1679 fp->sf_hb_interval = sctp->sctp_hb_interval;
1680 fp->sf_ssthresh = sctps->sctps_initial_ssthresh;
1681 fp->sf_suna = 0;
1682 fp->sf_pba = 0;
1683 fp->sf_acked = 0;
1684 fp->sf_lastactive = fp->sf_hb_expiry = ddi_get_lbolt64();
1685 fp->sf_timer_mp = timer_mp;
1686 fp->sf_hb_pending = B_FALSE;
1687 fp->sf_hb_enabled = B_TRUE;
1688 fp->sf_df = 1;
1689 fp->sf_pmtu_discovered = 0;
1690 fp->sf_next = NULL;
1691 fp->sf_T3expire = 0;
1692 (void) random_get_pseudo_bytes((uint8_t *)&fp->sf_hb_secret,
1693 sizeof (fp->sf_hb_secret));
1694 fp->sf_rxt_unacked = 0;
1696 sctp_get_dest(sctp, fp);
1699 /*ARGSUSED*/
1700 static int
1701 faddr_constructor(void *buf, void *arg, int flags)
1703 sctp_faddr_t *fp = buf;
1705 fp->sf_timer_mp = NULL;
1706 fp->sf_timer_running = 0;
1708 fp->sf_rc_timer_mp = NULL;
1709 fp->sf_rc_timer_running = 0;
1711 return (0);
1714 /*ARGSUSED*/
1715 static void
1716 faddr_destructor(void *buf, void *arg)
1718 sctp_faddr_t *fp = buf;
1720 ASSERT(fp->sf_timer_mp == NULL);
1721 ASSERT(fp->sf_timer_running == 0);
1723 ASSERT(fp->sf_rc_timer_mp == NULL);
1724 ASSERT(fp->sf_rc_timer_running == 0);
1727 void
1728 sctp_faddr_init(void)
1730 sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache",
1731 sizeof (sctp_faddr_t), 0, faddr_constructor, faddr_destructor,
1732 NULL, NULL, NULL, 0);
1735 void
1736 sctp_faddr_fini(void)
1738 kmem_cache_destroy(sctp_kmem_faddr_cache);