7888 installboot: print version info of the file
[unleashed.git] / kernel / net / ip / ip_multi.c
bloba5ba0ecc0a3a0103f95278e2b3cc3b6218bc3691
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/dlpi.h>
29 #include <sys/stropts.h>
30 #include <sys/strsun.h>
31 #include <sys/ddi.h>
32 #include <sys/cmn_err.h>
33 #include <sys/sdt.h>
34 #include <sys/zone.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <net/if.h>
40 #include <sys/systm.h>
41 #include <sys/strsubr.h>
42 #include <net/route.h>
43 #include <netinet/in.h>
44 #include <net/if_dl.h>
45 #include <netinet/ip6.h>
46 #include <netinet/icmp6.h>
48 #include <inet/common.h>
49 #include <inet/mi.h>
50 #include <inet/nd.h>
51 #include <inet/arp.h>
52 #include <inet/ip.h>
53 #include <inet/ip6.h>
54 #include <inet/ip_if.h>
55 #include <inet/ip_ndp.h>
56 #include <inet/ip_multi.h>
57 #include <inet/ipclassifier.h>
58 #include <inet/ipsec_impl.h>
59 #include <inet/sctp_ip.h>
60 #include <inet/ip_listutils.h>
61 #include <inet/udp_impl.h>
63 /* igmpv3/mldv2 source filter manipulation */
64 static void ilm_bld_flists(conn_t *conn, void *arg);
65 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
66 slist_t *flist);
68 static ilm_t *ilm_add(ill_t *ill, const in6_addr_t *group,
69 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
70 zoneid_t zoneid);
71 static void ilm_delete(ilm_t *ilm);
72 static int ilm_numentries(ill_t *, const in6_addr_t *);
74 static ilm_t *ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
75 ilg_stat_t, mcast_record_t, slist_t *, int *);
76 static ilm_t *ip_addmulti_impl(const in6_addr_t *, ill_t *,
77 zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
78 static int ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
79 static int ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
81 static int ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
82 t_uscalar_t);
83 static ilg_t *ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
84 uint_t ifindex);
86 static int ilg_add(conn_t *connp, const in6_addr_t *group,
87 ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
88 const in6_addr_t *v6src);
89 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
90 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive,
91 uint32_t *addr_lenp, uint32_t *addr_offp);
92 static int ip_opt_delete_group_excl(conn_t *connp,
93 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
94 mcast_record_t fmode, const in6_addr_t *v6src);
96 static ilm_t *ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
98 static int ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
99 ill_t **);
101 static void ilg_check_detach(conn_t *, ill_t *);
102 static void ilg_check_reattach(conn_t *, ill_t *);
105 * MT notes:
107 * Multicast joins operate on both the ilg and ilm structures. Multiple
108 * threads operating on an conn (socket) trying to do multicast joins
109 * need to synchronize when operating on the ilg. Multiple threads
110 * potentially operating on different conn (socket endpoints) trying to
111 * do multicast joins could eventually end up trying to manipulate the
112 * ilm simulatenously and need to synchronize on the access to the ilm.
113 * The access and lookup of the ilm, as well as other ill multicast state,
114 * is under ill_mcast_lock.
115 * The modifications and lookup of ilg entries is serialized using conn_ilg_lock
116 * rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
118 * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
119 * never the other way around.
121 * An ilm is an IP data structure used to track multicast join/leave.
122 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
123 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
124 * referencing the ilm.
125 * The modifications and lookup of ilm entries is serialized using the
126 * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
127 * of the ilm state.
128 * ilms are created / destroyed only as writer. ilms
129 * are not passed around. The datapath (anything outside of this file
130 * and igmp.c) use functions that do not return ilms - just the number
131 * of members. So we don't need a dynamic refcount of the number
132 * of threads holding reference to an ilm.
134 * In the cases where we serially access the ilg and ilm, which happens when
135 * we handle the applications requests to join or leave groups and sources,
136 * we use the ill_mcast_serializer mutex to ensure that a multithreaded
137 * application which does concurrent joins and/or leaves on the same group on
138 * the same socket always results in a consistent order for the ilg and ilm
139 * modifications.
141 * When a multicast operation results in needing to send a message to
142 * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
143 * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
144 * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
147 #define GETSTRUCT(structure, number) \
148 ((structure *)mi_zalloc(sizeof (structure) * (number)))
151 * Caller must ensure that the ilg has not been condemned
152 * The condemned flag is only set in ilg_delete under conn_ilg_lock.
154 * The caller must hold conn_ilg_lock as writer.
156 static void
157 ilg_refhold(ilg_t *ilg)
159 ASSERT(ilg->ilg_refcnt != 0);
160 ASSERT(!ilg->ilg_condemned);
161 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
163 ilg->ilg_refcnt++;
166 static void
167 ilg_inactive(ilg_t *ilg)
169 ASSERT(ilg->ilg_ill == NULL);
170 ASSERT(ilg->ilg_ilm == NULL);
171 ASSERT(ilg->ilg_filter == NULL);
172 ASSERT(ilg->ilg_condemned);
174 /* Unlink from list */
175 *ilg->ilg_ptpn = ilg->ilg_next;
176 if (ilg->ilg_next != NULL)
177 ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
178 ilg->ilg_next = NULL;
179 ilg->ilg_ptpn = NULL;
181 ilg->ilg_connp = NULL;
182 kmem_free(ilg, sizeof (*ilg));
186 * The caller must hold conn_ilg_lock as writer.
188 static void
189 ilg_refrele(ilg_t *ilg)
191 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
192 ASSERT(ilg->ilg_refcnt != 0);
193 if (--ilg->ilg_refcnt == 0)
194 ilg_inactive(ilg);
198 * Acquire reference on ilg and drop reference on held_ilg.
199 * In the case when held_ilg is the same as ilg we already have
200 * a reference, but the held_ilg might be condemned. In that case
201 * we avoid the ilg_refhold/rele so that we can assert in ire_refhold
202 * that the ilg isn't condemned.
204 static void
205 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
207 if (held_ilg == ilg)
208 return;
210 ilg_refhold(ilg);
211 if (held_ilg != NULL)
212 ilg_refrele(held_ilg);
216 * Allocate a new ilg_t and links it into conn_ilg.
217 * Returns NULL on failure, in which case `*errp' will be
218 * filled in with the reason.
220 * Assumes connp->conn_ilg_lock is held.
222 static ilg_t *
223 conn_ilg_alloc(conn_t *connp, int *errp)
225 ilg_t *ilg;
227 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
230 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
231 * create any ilgs.
233 if (connp->conn_state_flags & CONN_CLOSING) {
234 *errp = EINVAL;
235 return (NULL);
238 ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
239 if (ilg == NULL) {
240 *errp = ENOMEM;
241 return (NULL);
244 ilg->ilg_refcnt = 1;
246 /* Insert at head */
247 if (connp->conn_ilg != NULL)
248 connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
249 ilg->ilg_next = connp->conn_ilg;
250 ilg->ilg_ptpn = &connp->conn_ilg;
251 connp->conn_ilg = ilg;
253 ilg->ilg_connp = connp;
254 return (ilg);
257 typedef struct ilm_fbld_s {
258 ilm_t *fbld_ilm;
259 int fbld_in_cnt;
260 int fbld_ex_cnt;
261 slist_t fbld_in;
262 slist_t fbld_ex;
263 boolean_t fbld_in_overflow;
264 } ilm_fbld_t;
267 * Caller must hold ill_mcast_lock
269 static void
270 ilm_bld_flists(conn_t *connp, void *arg)
272 ilg_t *ilg;
273 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
274 ilm_t *ilm = fbld->fbld_ilm;
275 in6_addr_t *v6group = &ilm->ilm_v6addr;
277 if (connp->conn_ilg == NULL)
278 return;
281 * Since we can't break out of the ipcl_walk once started, we still
282 * have to look at every conn. But if we've already found one
283 * (EXCLUDE, NULL) list, there's no need to keep checking individual
284 * ilgs--that will be our state.
286 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
287 return;
290 * Check this conn's ilgs to see if any are interested in our
291 * ilm (group, interface match). If so, update the master
292 * include and exclude lists we're building in the fbld struct
293 * with this ilg's filter info.
295 * Note that the caller has already serialized on the ill we care
296 * about.
298 ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
300 rw_enter(&connp->conn_ilg_lock, RW_READER);
301 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
302 if (ilg->ilg_condemned)
303 continue;
306 * Since we are under the ill_mcast_serializer we know
307 * that any ilg+ilm operations on this ilm have either
308 * not started or completed, except for the last ilg
309 * (the one that caused us to be called) which doesn't
310 * have ilg_ilm set yet. Hence we compare using ilg_ill
311 * and the address.
313 if ((ilg->ilg_ill == ilm->ilm_ill) &&
314 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
315 if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
316 fbld->fbld_in_cnt++;
317 if (!fbld->fbld_in_overflow)
318 l_union_in_a(&fbld->fbld_in,
319 ilg->ilg_filter,
320 &fbld->fbld_in_overflow);
321 } else {
322 fbld->fbld_ex_cnt++;
324 * On the first exclude list, don't try to do
325 * an intersection, as the master exclude list
326 * is intentionally empty. If the master list
327 * is still empty on later iterations, that
328 * means we have at least one ilg with an empty
329 * exclude list, so that should be reflected
330 * when we take the intersection.
332 if (fbld->fbld_ex_cnt == 1) {
333 if (ilg->ilg_filter != NULL)
334 l_copy(ilg->ilg_filter,
335 &fbld->fbld_ex);
336 } else {
337 l_intersection_in_a(&fbld->fbld_ex,
338 ilg->ilg_filter);
341 /* there will only be one match, so break now. */
342 break;
345 rw_exit(&connp->conn_ilg_lock);
349 * Caller must hold ill_mcast_lock
351 static void
352 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
354 ilm_fbld_t fbld;
355 ip_stack_t *ipst = ilm->ilm_ipst;
357 fbld.fbld_ilm = ilm;
358 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
359 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
360 fbld.fbld_in_overflow = B_FALSE;
362 /* first, construct our master include and exclude lists */
363 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
365 /* now use those master lists to generate the interface filter */
367 /* if include list overflowed, filter is (EXCLUDE, NULL) */
368 if (fbld.fbld_in_overflow) {
369 *fmode = MODE_IS_EXCLUDE;
370 flist->sl_numsrc = 0;
371 return;
374 /* if nobody interested, interface filter is (INCLUDE, NULL) */
375 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
376 *fmode = MODE_IS_INCLUDE;
377 flist->sl_numsrc = 0;
378 return;
382 * If there are no exclude lists, then the interface filter
383 * is INCLUDE, with its filter list equal to fbld_in. A single
384 * exclude list makes the interface filter EXCLUDE, with its
385 * filter list equal to (fbld_ex - fbld_in).
387 if (fbld.fbld_ex_cnt == 0) {
388 *fmode = MODE_IS_INCLUDE;
389 l_copy(&fbld.fbld_in, flist);
390 } else {
391 *fmode = MODE_IS_EXCLUDE;
392 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
397 * Caller must hold ill_mcast_lock
399 static int
400 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
402 mcast_record_t fmode;
403 slist_t *flist;
404 boolean_t fdefault;
405 char buf[INET6_ADDRSTRLEN];
406 ill_t *ill = ilm->ilm_ill;
409 * There are several cases where the ilm's filter state
410 * defaults to (EXCLUDE, NULL):
411 * - we've had previous joins without associated ilgs
412 * - this join has no associated ilg
413 * - the ilg's filter state is (EXCLUDE, NULL)
415 fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
416 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
418 /* attempt mallocs (if needed) before doing anything else */
419 if ((flist = l_alloc()) == NULL)
420 return (ENOMEM);
421 if (!fdefault && ilm->ilm_filter == NULL) {
422 ilm->ilm_filter = l_alloc();
423 if (ilm->ilm_filter == NULL) {
424 l_free(flist);
425 return (ENOMEM);
429 if (ilgstat != ILGSTAT_CHANGE)
430 ilm->ilm_refcnt++;
432 if (ilgstat == ILGSTAT_NONE)
433 ilm->ilm_no_ilg_cnt++;
436 * Determine new filter state. If it's not the default
437 * (EXCLUDE, NULL), we must walk the conn list to find
438 * any ilgs interested in this group, and re-build the
439 * ilm filter.
441 if (fdefault) {
442 fmode = MODE_IS_EXCLUDE;
443 flist->sl_numsrc = 0;
444 } else {
445 ilm_gen_filter(ilm, &fmode, flist);
448 /* make sure state actually changed; nothing to do if not. */
449 if ((ilm->ilm_fmode == fmode) &&
450 !lists_are_different(ilm->ilm_filter, flist)) {
451 l_free(flist);
452 return (0);
455 /* send the state change report */
456 if (!IS_LOOPBACK(ill)) {
457 if (ill->ill_isv6)
458 mld_statechange(ilm, fmode, flist);
459 else
460 igmp_statechange(ilm, fmode, flist);
463 /* update the ilm state */
464 ilm->ilm_fmode = fmode;
465 if (flist->sl_numsrc > 0)
466 l_copy(flist, ilm->ilm_filter);
467 else
468 CLEAR_SLIST(ilm->ilm_filter);
470 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
471 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
473 l_free(flist);
474 return (0);
478 * Caller must hold ill_mcast_lock
480 static int
481 ilm_update_del(ilm_t *ilm)
483 mcast_record_t fmode;
484 slist_t *flist;
485 ill_t *ill = ilm->ilm_ill;
487 ip1dbg(("ilm_update_del: still %d left; updating state\n",
488 ilm->ilm_refcnt));
490 if ((flist = l_alloc()) == NULL)
491 return (ENOMEM);
494 * If present, the ilg in question has already either been
495 * updated or removed from our list; so all we need to do
496 * now is walk the list to update the ilm filter state.
498 * Skip the list walk if we have any no-ilg joins, which
499 * cause the filter state to revert to (EXCLUDE, NULL).
501 if (ilm->ilm_no_ilg_cnt != 0) {
502 fmode = MODE_IS_EXCLUDE;
503 flist->sl_numsrc = 0;
504 } else {
505 ilm_gen_filter(ilm, &fmode, flist);
508 /* check to see if state needs to be updated */
509 if ((ilm->ilm_fmode == fmode) &&
510 (!lists_are_different(ilm->ilm_filter, flist))) {
511 l_free(flist);
512 return (0);
515 if (!IS_LOOPBACK(ill)) {
516 if (ill->ill_isv6)
517 mld_statechange(ilm, fmode, flist);
518 else
519 igmp_statechange(ilm, fmode, flist);
522 ilm->ilm_fmode = fmode;
523 if (flist->sl_numsrc > 0) {
524 if (ilm->ilm_filter == NULL) {
525 ilm->ilm_filter = l_alloc();
526 if (ilm->ilm_filter == NULL) {
527 char buf[INET6_ADDRSTRLEN];
528 ip1dbg(("ilm_update_del: failed to alloc ilm "
529 "filter; no source filtering for %s on %s",
530 inet_ntop(AF_INET6, &ilm->ilm_v6addr,
531 buf, sizeof (buf)), ill->ill_name));
532 ilm->ilm_fmode = MODE_IS_EXCLUDE;
533 l_free(flist);
534 return (0);
537 l_copy(flist, ilm->ilm_filter);
538 } else {
539 CLEAR_SLIST(ilm->ilm_filter);
542 l_free(flist);
543 return (0);
547 * Create/update the ilm for the group/ill. Used by other parts of IP to
548 * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
549 * Returns with a refhold on the ilm.
551 * The unspecified address means all multicast addresses for in both the
552 * case of IPv4 and IPv6.
554 * The caller should have already mapped an IPMP under ill to the upper.
556 ilm_t *
557 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
558 int *errorp)
560 ilm_t *ilm;
562 /* Acquire serializer to keep assert in ilm_bld_flists happy */
563 mutex_enter(&ill->ill_mcast_serializer);
564 ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
565 MODE_IS_EXCLUDE, NULL, errorp);
566 mutex_exit(&ill->ill_mcast_serializer);
568 * Now that all locks have been dropped, we can send any
569 * deferred/queued DLPI or IP packets
571 ill_mcast_send_queued(ill);
572 ill_dlpi_send_queued(ill);
573 return (ilm);
577 * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
578 * then this returns with a refhold on the ilm.
580 * Internal routine which assumes the caller has already acquired
581 * ill_mcast_serializer. It is the caller's responsibility to send out
582 * queued DLPI/multicast packets after all locks are dropped.
584 * The unspecified address means all multicast addresses for in both the
585 * case of IPv4 and IPv6.
587 * ilgstat tells us if there's an ilg associated with this join,
588 * and if so, if it's a new ilg or a change to an existing one.
589 * ilg_fmode and ilg_flist give us the current filter state of
590 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
592 * The caller should have already mapped an IPMP under ill to the upper.
594 static ilm_t *
595 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
596 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
597 int *errorp)
599 ilm_t *ilm;
601 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
603 if (ill->ill_isv6) {
604 if (!IN6_IS_ADDR_MULTICAST(v6group) &&
605 !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
606 *errorp = EINVAL;
607 return (NULL);
609 } else {
610 if (IN6_IS_ADDR_V4MAPPED(v6group)) {
611 ipaddr_t v4group;
613 IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
614 ASSERT(!IS_UNDER_IPMP(ill));
615 if (!CLASSD(v4group)) {
616 *errorp = EINVAL;
617 return (NULL);
619 } else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
620 *errorp = EINVAL;
621 return (NULL);
625 if (IS_UNDER_IPMP(ill)) {
626 *errorp = EINVAL;
627 return (NULL);
630 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
632 * We do the equivalent of a lookup by checking after we get the lock
633 * This is needed since the ill could have been condemned after
634 * we looked it up, and we need to check condemned after we hold
635 * ill_mcast_lock to synchronize with the unplumb code.
637 if (ill->ill_state_flags & ILL_CONDEMNED) {
638 rw_exit(&ill->ill_mcast_lock);
639 *errorp = ENXIO;
640 return (NULL);
642 ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
643 ilg_flist, errorp);
644 rw_exit(&ill->ill_mcast_lock);
646 ill_mcast_timer_start(ill->ill_ipst);
647 return (ilm);
650 static ilm_t *
651 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
652 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
653 int *errorp)
655 ilm_t *ilm;
656 int ret = 0;
658 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
659 *errorp = 0;
662 * An ilm is uniquely identified by the tuple of (group, ill) where
663 * `group' is the multicast group address, and `ill' is the interface
664 * on which it is currently joined.
667 ilm = ilm_lookup(ill, v6group, zoneid);
668 if (ilm != NULL) {
669 /* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
670 ret = ilm_update_add(ilm, ilgstat, ilg_flist);
671 if (ret == 0)
672 return (ilm);
674 *errorp = ret;
675 return (NULL);
679 * The callers checks on the ilg and the ilg+ilm consistency under
680 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
681 * and no ilm.
683 ASSERT(ilgstat != ILGSTAT_CHANGE);
684 ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
685 if (ilm == NULL) {
686 *errorp = ENOMEM;
687 return (NULL);
690 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
692 * If we have more then one we should not tell the driver
693 * to join this time.
695 if (ilm_numentries(ill, v6group) == 1) {
696 ret = ill_join_allmulti(ill);
698 } else {
699 if (!IS_LOOPBACK(ill)) {
700 if (ill->ill_isv6)
701 mld_joingroup(ilm);
702 else
703 igmp_joingroup(ilm);
707 * If we have more then one we should not tell the driver
708 * to join this time.
710 if (ilm_numentries(ill, v6group) == 1) {
711 ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
714 if (ret != 0) {
715 if (ret == ENETDOWN) {
716 char buf[INET6_ADDRSTRLEN];
718 ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
719 inet_ntop(AF_INET6, &ilm->ilm_v6addr,
720 buf, sizeof (buf)), ill->ill_name));
722 ilm_delete(ilm);
723 *errorp = ret;
724 return (NULL);
725 } else {
726 return (ilm);
731 * Looks up the list of multicast physical addresses this interface
732 * listens to. Add to the list if not present already.
734 boolean_t
735 ip_mphysaddr_add(ill_t *ill, uchar_t *hw_addr)
737 multiphysaddr_t *mpa = NULL;
738 int hw_addr_length = ill->ill_phys_addr_length;
740 mutex_enter(&ill->ill_lock);
741 for (mpa = ill->ill_mphysaddr_list; mpa != NULL; mpa = mpa->mpa_next) {
742 if (bcmp(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length) == 0) {
743 mpa->mpa_refcnt++;
744 mutex_exit(&ill->ill_lock);
745 return (B_FALSE);
749 mpa = kmem_zalloc(sizeof (multiphysaddr_t), KM_NOSLEEP);
750 if (mpa == NULL) {
752 * We risk not having the multiphysadd structure. At this
753 * point we can't fail. We can't afford to not send a
754 * DL_ENABMULTI_REQ also. It is better than pre-allocating
755 * the structure and having the code to track it also.
757 ip0dbg(("ip_mphysaddr_add: ENOMEM. Some multicast apps"
758 " may have issues. hw_addr: %p ill_name: %s\n",
759 (void *)hw_addr, ill->ill_name));
760 mutex_exit(&ill->ill_lock);
761 return (B_TRUE);
763 bcopy(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length);
764 mpa->mpa_refcnt = 1;
765 mpa->mpa_next = ill->ill_mphysaddr_list;
766 ill->ill_mphysaddr_list = mpa;
767 mutex_exit(&ill->ill_lock);
768 return (B_TRUE);
772 * Look up hw_addr from the list of physical multicast addresses this interface
773 * listens to.
774 * Remove the entry if the refcnt is 0
776 boolean_t
777 ip_mphysaddr_del(ill_t *ill, uchar_t *hw_addr)
779 multiphysaddr_t *mpap = NULL, **mpapp = NULL;
780 int hw_addr_length = ill->ill_phys_addr_length;
781 boolean_t ret = B_FALSE;
783 mutex_enter(&ill->ill_lock);
784 for (mpapp = &ill->ill_mphysaddr_list; (mpap = *mpapp) != NULL;
785 mpapp = &(mpap->mpa_next)) {
786 if (bcmp(hw_addr, &(mpap->mpa_addr[0]), hw_addr_length) == 0)
787 break;
789 if (mpap == NULL) {
791 * Should be coming here only when there was a memory
792 * exhaustion and we were not able to allocate
793 * a multiphysaddr_t. We still send a DL_DISABMULTI_REQ down.
796 ip0dbg(("ip_mphysaddr_del: No entry for this addr. Some "
797 "multicast apps might have had issues. hw_addr: %p "
798 " ill_name: %s\n", (void *)hw_addr, ill->ill_name));
799 ret = B_TRUE;
800 } else if (--mpap->mpa_refcnt == 0) {
801 *mpapp = mpap->mpa_next;
802 kmem_free(mpap, sizeof (multiphysaddr_t));
803 ret = B_TRUE;
805 mutex_exit(&ill->ill_lock);
806 return (ret);
810 * Send a multicast request to the driver for enabling or disabling
811 * multicast reception for v6groupp address. The caller has already
812 * checked whether it is appropriate to send one or not.
814 * For IPMP we switch to the cast_ill since it has the right hardware
815 * information.
817 static int
818 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
820 mblk_t *mp;
821 uint32_t addrlen, addroff;
822 ill_t *release_ill = NULL;
823 uchar_t *cp;
824 int err = 0;
826 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
828 if (IS_IPMP(ill)) {
829 /* On the upper IPMP ill. */
830 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
831 if (release_ill == NULL) {
833 * Avoid sending it down to the ipmpstub.
834 * We will be called again once the members of the
835 * group are in place
837 ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
838 ill->ill_name, ill->ill_isv6));
839 return (0);
841 ill = release_ill;
843 /* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
844 mp = ill_create_dl(ill, prim, &addrlen, &addroff);
845 if (mp == NULL) {
846 err = ENOMEM;
847 goto done;
850 mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
851 if (mp == NULL) {
852 ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
853 err = ENOMEM;
854 goto done;
856 cp = mp->b_rptr;
858 switch (((union DL_primitives *)cp)->dl_primitive) {
859 case DL_ENABMULTI_REQ:
860 cp += ((dl_enabmulti_req_t *)cp)->dl_addr_offset;
861 if (!ip_mphysaddr_add(ill, cp)) {
862 freemsg(mp);
863 err = 0;
864 goto done;
866 mutex_enter(&ill->ill_lock);
867 /* Track the state if this is the first enabmulti */
868 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
869 ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
870 mutex_exit(&ill->ill_lock);
871 break;
872 case DL_DISABMULTI_REQ:
873 cp += ((dl_disabmulti_req_t *)cp)->dl_addr_offset;
874 if (!ip_mphysaddr_del(ill, cp)) {
875 freemsg(mp);
876 err = 0;
877 goto done;
880 ill_dlpi_queue(ill, mp);
881 done:
882 if (release_ill != NULL)
883 ill_refrele(release_ill);
884 return (err);
888 * Send a multicast request to the driver for enabling multicast
889 * membership for v6group if appropriate.
891 static int
892 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
894 if (ill->ill_net_type != IRE_IF_RESOLVER ||
895 ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
896 ip1dbg(("ip_ll_multireq: not resolver\n"));
897 return (0); /* Must be IRE_IF_NORESOLVER */
900 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
901 ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
902 return (0);
904 return (ip_ll_send_multireq(ill, v6groupp, prim));
908 * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
909 * being true.
912 ip_delmulti(ilm_t *ilm)
914 ill_t *ill = ilm->ilm_ill;
915 int error;
917 /* Acquire serializer to keep assert in ilm_bld_flists happy */
918 mutex_enter(&ill->ill_mcast_serializer);
919 error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
920 mutex_exit(&ill->ill_mcast_serializer);
922 * Now that all locks have been dropped, we can send any
923 * deferred/queued DLPI or IP packets
925 ill_mcast_send_queued(ill);
926 ill_dlpi_send_queued(ill);
927 return (error);
932 * Delete the ilm.
933 * Assumes ill_mcast_serializer is held by the caller.
934 * Caller must send out queued dlpi/multicast packets after dropping
935 * all locks.
937 static int
938 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
940 ill_t *ill = ilm->ilm_ill;
941 int ret;
943 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
944 ASSERT(!(IS_UNDER_IPMP(ill)));
946 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
947 ret = ip_delmulti_impl(ilm, no_ilg, leaving);
948 rw_exit(&ill->ill_mcast_lock);
949 ill_mcast_timer_start(ill->ill_ipst);
950 return (ret);
953 static int
954 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
956 ill_t *ill = ilm->ilm_ill;
957 int error;
958 in6_addr_t v6group;
960 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
962 /* Update counters */
963 if (no_ilg)
964 ilm->ilm_no_ilg_cnt--;
966 if (leaving)
967 ilm->ilm_refcnt--;
969 if (ilm->ilm_refcnt > 0)
970 return (ilm_update_del(ilm));
972 v6group = ilm->ilm_v6addr;
974 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
975 ilm_delete(ilm);
977 * If we have some left then one we should not tell the driver
978 * to leave.
980 if (ilm_numentries(ill, &v6group) != 0)
981 return (0);
983 ill_leave_allmulti(ill);
985 return (0);
988 if (!IS_LOOPBACK(ill)) {
989 if (ill->ill_isv6)
990 mld_leavegroup(ilm);
991 else
992 igmp_leavegroup(ilm);
995 ilm_delete(ilm);
997 * If we have some left then one we should not tell the driver
998 * to leave.
1000 if (ilm_numentries(ill, &v6group) != 0)
1001 return (0);
1003 error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
1004 /* We ignore the case when ill_dl_up is not set */
1005 if (error == ENETDOWN) {
1006 char buf[INET6_ADDRSTRLEN];
1008 ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
1009 inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
1010 ill->ill_name));
1012 return (error);
1016 * Make the driver pass up all multicast packets.
1019 ill_join_allmulti(ill_t *ill)
1021 mblk_t *promiscon_mp, *promiscoff_mp = NULL;
1022 uint32_t addrlen, addroff;
1023 ill_t *release_ill = NULL;
1025 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1027 if (IS_LOOPBACK(ill))
1028 return (0);
1030 if (!ill->ill_dl_up) {
1032 * Nobody there. All multicast addresses will be re-joined
1033 * when we get the DL_BIND_ACK bringing the interface up.
1035 return (ENETDOWN);
1038 if (IS_IPMP(ill)) {
1039 /* On the upper IPMP ill. */
1040 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1041 if (release_ill == NULL) {
1043 * Avoid sending it down to the ipmpstub.
1044 * We will be called again once the members of the
1045 * group are in place
1047 ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
1048 ill->ill_name, ill->ill_isv6));
1049 return (0);
1051 ill = release_ill;
1052 if (!ill->ill_dl_up) {
1053 ill_refrele(ill);
1054 return (ENETDOWN);
1059 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
1060 * provider. We don't need to do this for certain media types for
1061 * which we never need to turn promiscuous mode on. While we're here,
1062 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
1063 * ill_leave_allmulti() will not fail due to low memory conditions.
1065 if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1066 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1067 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1068 &addrlen, &addroff);
1069 if (ill->ill_promiscoff_mp == NULL)
1070 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1071 &addrlen, &addroff);
1072 if (promiscon_mp == NULL ||
1073 (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
1074 freemsg(promiscon_mp);
1075 freemsg(promiscoff_mp);
1076 if (release_ill != NULL)
1077 ill_refrele(release_ill);
1078 return (ENOMEM);
1080 if (ill->ill_promiscoff_mp == NULL)
1081 ill->ill_promiscoff_mp = promiscoff_mp;
1082 ill_dlpi_queue(ill, promiscon_mp);
1084 if (release_ill != NULL)
1085 ill_refrele(release_ill);
1086 return (0);
1090 * Make the driver stop passing up all multicast packets
1092 void
1093 ill_leave_allmulti(ill_t *ill)
1095 mblk_t *promiscoff_mp;
1096 ill_t *release_ill = NULL;
1098 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1100 if (IS_LOOPBACK(ill))
1101 return;
1103 if (!ill->ill_dl_up) {
1105 * Nobody there. All multicast addresses will be re-joined
1106 * when we get the DL_BIND_ACK bringing the interface up.
1108 return;
1111 if (IS_IPMP(ill)) {
1112 /* On the upper IPMP ill. */
1113 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1114 if (release_ill == NULL) {
1116 * Avoid sending it down to the ipmpstub.
1117 * We will be called again once the members of the
1118 * group are in place
1120 ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
1121 ill->ill_name, ill->ill_isv6));
1122 return;
1124 ill = release_ill;
1125 if (!ill->ill_dl_up)
1126 goto done;
1130 * In the case of IPMP and ill_dl_up not being set when we joined
1131 * we didn't allocate a promiscoff_mp. In that case we have
1132 * nothing to do when we leave.
1133 * Ditto for PHYI_MULTI_BCAST
1135 promiscoff_mp = ill->ill_promiscoff_mp;
1136 if (promiscoff_mp != NULL) {
1137 ill->ill_promiscoff_mp = NULL;
1138 ill_dlpi_queue(ill, promiscoff_mp);
1140 done:
1141 if (release_ill != NULL)
1142 ill_refrele(release_ill);
1146 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1148 ill_t *ill;
1149 int ret;
1150 ilm_t *ilm;
1152 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1153 if (ill == NULL)
1154 return (ENODEV);
1157 * The ip_addmulti() function doesn't allow IPMP underlying interfaces
1158 * to join allmulti since only the nominated underlying interface in
1159 * the group should receive multicast. We silently succeed to avoid
1160 * having to teach IPobs (currently the only caller of this routine)
1161 * to ignore failures in this case.
1163 if (IS_UNDER_IPMP(ill)) {
1164 ill_refrele(ill);
1165 return (0);
1167 mutex_enter(&ill->ill_lock);
1168 if (ill->ill_ipallmulti_cnt > 0) {
1169 /* Already joined */
1170 ASSERT(ill->ill_ipallmulti_ilm != NULL);
1171 ill->ill_ipallmulti_cnt++;
1172 mutex_exit(&ill->ill_lock);
1173 goto done;
1175 mutex_exit(&ill->ill_lock);
1177 ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
1178 if (ilm == NULL) {
1179 ASSERT(ret != 0);
1180 ill_refrele(ill);
1181 return (ret);
1184 mutex_enter(&ill->ill_lock);
1185 if (ill->ill_ipallmulti_cnt > 0) {
1186 /* Another thread added it concurrently */
1187 (void) ip_delmulti(ilm);
1188 mutex_exit(&ill->ill_lock);
1189 goto done;
1191 ASSERT(ill->ill_ipallmulti_ilm == NULL);
1192 ill->ill_ipallmulti_ilm = ilm;
1193 ill->ill_ipallmulti_cnt++;
1194 mutex_exit(&ill->ill_lock);
1195 done:
1196 ill_refrele(ill);
1197 return (0);
1201 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1203 ill_t *ill;
1204 ilm_t *ilm;
1206 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1207 if (ill == NULL)
1208 return (ENODEV);
1210 if (IS_UNDER_IPMP(ill)) {
1211 ill_refrele(ill);
1212 return (0);
1215 mutex_enter(&ill->ill_lock);
1216 if (ill->ill_ipallmulti_cnt == 0) {
1217 /* ip_purge_allmulti could have removed them all */
1218 mutex_exit(&ill->ill_lock);
1219 goto done;
1221 ill->ill_ipallmulti_cnt--;
1222 if (ill->ill_ipallmulti_cnt == 0) {
1223 /* Last one */
1224 ilm = ill->ill_ipallmulti_ilm;
1225 ill->ill_ipallmulti_ilm = NULL;
1226 } else {
1227 ilm = NULL;
1229 mutex_exit(&ill->ill_lock);
1230 if (ilm != NULL)
1231 (void) ip_delmulti(ilm);
1233 done:
1234 ill_refrele(ill);
1235 return (0);
1239 * Delete the allmulti memberships that were added as part of
1240 * ip_join_allmulti().
1242 void
1243 ip_purge_allmulti(ill_t *ill)
1245 ilm_t *ilm;
1247 ASSERT(IAM_WRITER_ILL(ill));
1249 mutex_enter(&ill->ill_lock);
1250 ilm = ill->ill_ipallmulti_ilm;
1251 ill->ill_ipallmulti_ilm = NULL;
1252 ill->ill_ipallmulti_cnt = 0;
1253 mutex_exit(&ill->ill_lock);
1255 if (ilm != NULL)
1256 (void) ip_delmulti(ilm);
1260 * Create a dlpi message with room for phys+sap. Later
1261 * we will strip the sap for those primitives which
1262 * only need a physical address.
1264 static mblk_t *
1265 ill_create_dl(ill_t *ill, uint32_t dl_primitive,
1266 uint32_t *addr_lenp, uint32_t *addr_offp)
1268 mblk_t *mp;
1269 uint32_t hw_addr_length;
1270 char *cp;
1271 uint32_t offset;
1272 uint32_t length;
1273 uint32_t size;
1275 *addr_lenp = *addr_offp = 0;
1277 hw_addr_length = ill->ill_phys_addr_length;
1278 if (!hw_addr_length) {
1279 ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1280 return (NULL);
1283 switch (dl_primitive) {
1284 case DL_ENABMULTI_REQ:
1285 length = sizeof (dl_enabmulti_req_t);
1286 size = length + hw_addr_length;
1287 break;
1288 case DL_DISABMULTI_REQ:
1289 length = sizeof (dl_disabmulti_req_t);
1290 size = length + hw_addr_length;
1291 break;
1292 case DL_PROMISCON_REQ:
1293 case DL_PROMISCOFF_REQ:
1294 size = length = sizeof (dl_promiscon_req_t);
1295 break;
1296 default:
1297 return (NULL);
1299 mp = allocb(size, BPRI_HI);
1300 if (!mp)
1301 return (NULL);
1302 mp->b_wptr += size;
1303 mp->b_datap->db_type = M_PROTO;
1305 cp = (char *)mp->b_rptr;
1306 offset = length;
1308 switch (dl_primitive) {
1309 case DL_ENABMULTI_REQ: {
1310 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1312 dl->dl_primitive = dl_primitive;
1313 dl->dl_addr_offset = offset;
1314 *addr_lenp = dl->dl_addr_length = hw_addr_length;
1315 *addr_offp = offset;
1316 break;
1318 case DL_DISABMULTI_REQ: {
1319 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1321 dl->dl_primitive = dl_primitive;
1322 dl->dl_addr_offset = offset;
1323 *addr_lenp = dl->dl_addr_length = hw_addr_length;
1324 *addr_offp = offset;
1325 break;
1327 case DL_PROMISCON_REQ:
1328 case DL_PROMISCOFF_REQ: {
1329 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1331 dl->dl_primitive = dl_primitive;
1332 dl->dl_level = DL_PROMISC_MULTI;
1333 break;
1336 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1337 *addr_lenp, *addr_offp));
1338 return (mp);
1342 * Rejoin any groups for which we have ilms.
1344 * This is only needed for IPMP when the cast_ill changes since that
1345 * change is invisible to the ilm. Other interface changes are handled
1346 * by conn_update_ill.
1348 void
1349 ill_recover_multicast(ill_t *ill)
1351 ilm_t *ilm;
1352 char addrbuf[INET6_ADDRSTRLEN];
1354 ill->ill_need_recover_multicast = 0;
1356 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1357 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1359 * If we have more then one ilm for the group (e.g., with
1360 * different zoneid) then we should not tell the driver
1361 * to join unless this is the first ilm for the group.
1363 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1364 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1365 continue;
1368 ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1369 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1371 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1372 (void) ill_join_allmulti(ill);
1373 } else {
1374 if (ill->ill_isv6)
1375 mld_joingroup(ilm);
1376 else
1377 igmp_joingroup(ilm);
1379 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1380 DL_ENABMULTI_REQ);
1383 rw_exit(&ill->ill_mcast_lock);
1384 /* Send any deferred/queued DLPI or IP packets */
1385 ill_mcast_send_queued(ill);
1386 ill_dlpi_send_queued(ill);
1387 ill_mcast_timer_start(ill->ill_ipst);
1391 * The opposite of ill_recover_multicast() -- leaves all multicast groups
1392 * that were explicitly joined.
1394 * This is only needed for IPMP when the cast_ill changes since that
1395 * change is invisible to the ilm. Other interface changes are handled
1396 * by conn_update_ill.
1398 void
1399 ill_leave_multicast(ill_t *ill)
1401 ilm_t *ilm;
1402 char addrbuf[INET6_ADDRSTRLEN];
1404 ill->ill_need_recover_multicast = 1;
1406 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1407 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1409 * If we have more then one ilm for the group (e.g., with
1410 * different zoneid) then we should not tell the driver
1411 * to leave unless this is the first ilm for the group.
1413 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1414 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1415 continue;
1418 ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1419 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1421 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1422 ill_leave_allmulti(ill);
1423 } else {
1424 if (ill->ill_isv6)
1425 mld_leavegroup(ilm);
1426 else
1427 igmp_leavegroup(ilm);
1429 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1430 DL_DISABMULTI_REQ);
1433 rw_exit(&ill->ill_mcast_lock);
1434 /* Send any deferred/queued DLPI or IP packets */
1435 ill_mcast_send_queued(ill);
1436 ill_dlpi_send_queued(ill);
1437 ill_mcast_timer_start(ill->ill_ipst);
1441 * Interface used by IP input/output.
1442 * Returns true if there is a member on the ill for any zoneid.
1444 boolean_t
1445 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
1447 ilm_t *ilm;
1449 rw_enter(&ill->ill_mcast_lock, RW_READER);
1450 ilm = ilm_lookup(ill, v6group, ALL_ZONES);
1451 rw_exit(&ill->ill_mcast_lock);
1452 return (ilm != NULL);
1456 * Interface used by IP input/output.
1457 * Returns true if there is a member on the ill for any zoneid.
1459 * The group and source can't be INADDR_ANY here so no need to translate to
1460 * the unspecified IPv6 address.
1462 boolean_t
1463 ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
1465 in6_addr_t v6group;
1467 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1468 return (ill_hasmembers_v6(ill, &v6group));
1472 * Interface used by IP input/output.
1473 * Returns true if there is a member on the ill for any zoneid except skipzone.
1475 boolean_t
1476 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
1477 zoneid_t skipzone)
1479 ilm_t *ilm;
1481 rw_enter(&ill->ill_mcast_lock, RW_READER);
1482 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1483 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1484 ilm->ilm_zoneid != skipzone) {
1485 rw_exit(&ill->ill_mcast_lock);
1486 return (B_TRUE);
1489 rw_exit(&ill->ill_mcast_lock);
1490 return (B_FALSE);
1494 * Interface used by IP input/output.
1495 * Returns true if there is a member on the ill for any zoneid except skipzone.
1497 * The group and source can't be INADDR_ANY here so no need to translate to
1498 * the unspecified IPv6 address.
1500 boolean_t
1501 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
1503 in6_addr_t v6group;
1505 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1506 return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
1510 * Interface used by IP input.
1511 * Returns the next numerically larger zoneid that has a member. If none exist
1512 * then returns -1 (ALL_ZONES).
1513 * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
1514 * to find the first zoneid which has a member, and then pass that in for
1515 * subsequent calls until ALL_ZONES is returned.
1517 * The implementation of ill_hasmembers_nextzone() assumes the ilms
1518 * are sorted by zoneid for efficiency.
1520 zoneid_t
1521 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
1522 zoneid_t zoneid)
1524 ilm_t *ilm;
1526 rw_enter(&ill->ill_mcast_lock, RW_READER);
1527 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1528 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1529 ilm->ilm_zoneid > zoneid) {
1530 zoneid = ilm->ilm_zoneid;
1531 rw_exit(&ill->ill_mcast_lock);
1532 return (zoneid);
1535 rw_exit(&ill->ill_mcast_lock);
1536 return (ALL_ZONES);
1540 * Interface used by IP input.
1541 * Returns the next numerically larger zoneid that has a member. If none exist
1542 * then returns -1 (ALL_ZONES).
1544 * The group and source can't be INADDR_ANY here so no need to translate to
1545 * the unspecified IPv6 address.
1547 zoneid_t
1548 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1550 in6_addr_t v6group;
1552 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1554 return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
1558 * Find an ilm matching the ill, group, and zoneid.
1560 static ilm_t *
1561 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1563 ilm_t *ilm;
1565 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1567 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1568 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1569 continue;
1570 if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1571 continue;
1573 ASSERT(ilm->ilm_ill == ill);
1574 return (ilm);
1576 return (NULL);
1580 * How many members on this ill?
1581 * Since each shared-IP zone has a separate ilm for the same group/ill
1582 * we can have several.
1584 static int
1585 ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
1587 ilm_t *ilm;
1588 int i = 0;
1590 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1591 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1592 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1593 i++;
1596 return (i);
1599 /* Caller guarantees that the group is not already on the list */
1600 static ilm_t *
1601 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1602 mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1604 ilm_t *ilm;
1605 ilm_t *ilm_cur;
1606 ilm_t **ilm_ptpn;
1608 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1609 ilm = GETSTRUCT(ilm_t, 1);
1610 if (ilm == NULL)
1611 return (NULL);
1612 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1613 ilm->ilm_filter = l_alloc();
1614 if (ilm->ilm_filter == NULL) {
1615 mi_free(ilm);
1616 return (NULL);
1619 ilm->ilm_v6addr = *v6group;
1620 ilm->ilm_refcnt = 1;
1621 ilm->ilm_zoneid = zoneid;
1622 ilm->ilm_timer = INFINITY;
1623 ilm->ilm_rtx.rtx_timer = INFINITY;
1625 ilm->ilm_ill = ill;
1626 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1627 (char *), "ilm", (void *), ilm);
1628 ill->ill_ilm_cnt++;
1630 ASSERT(ill->ill_ipst);
1631 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */
1633 /* The ill/ipif could have just been marked as condemned */
1636 * To make ill_hasmembers_nextzone_v6 work we keep the list
1637 * sorted by zoneid.
1639 ilm_cur = ill->ill_ilm;
1640 ilm_ptpn = &ill->ill_ilm;
1641 while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
1642 ilm_ptpn = &ilm_cur->ilm_next;
1643 ilm_cur = ilm_cur->ilm_next;
1645 ilm->ilm_next = ilm_cur;
1646 *ilm_ptpn = ilm;
1649 * If we have an associated ilg, use its filter state; if not,
1650 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1652 if (ilgstat != ILGSTAT_NONE) {
1653 if (!SLIST_IS_EMPTY(ilg_flist))
1654 l_copy(ilg_flist, ilm->ilm_filter);
1655 ilm->ilm_fmode = ilg_fmode;
1656 } else {
1657 ilm->ilm_no_ilg_cnt = 1;
1658 ilm->ilm_fmode = MODE_IS_EXCLUDE;
1661 return (ilm);
1664 void
1665 ilm_inactive(ilm_t *ilm)
1667 FREE_SLIST(ilm->ilm_filter);
1668 FREE_SLIST(ilm->ilm_pendsrcs);
1669 FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1670 FREE_SLIST(ilm->ilm_rtx.rtx_block);
1671 ilm->ilm_ipst = NULL;
1672 mi_free((char *)ilm);
1676 * Unlink ilm and free it.
1678 static void
1679 ilm_delete(ilm_t *ilm)
1681 ill_t *ill = ilm->ilm_ill;
1682 ilm_t **ilmp;
1683 boolean_t need_wakeup;
1686 * Delete under lock protection so that readers don't stumble
1687 * on bad ilm_next
1689 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1691 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1694 *ilmp = ilm->ilm_next;
1696 mutex_enter(&ill->ill_lock);
1698 * if we are the last reference to the ill, we may need to wakeup any
1699 * pending FREE or unplumb operations. This is because conn_update_ill
1700 * bails if there is a ilg_delete_all in progress.
1702 need_wakeup = B_FALSE;
1703 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1704 (char *), "ilm", (void *), ilm);
1705 ASSERT(ill->ill_ilm_cnt > 0);
1706 ill->ill_ilm_cnt--;
1707 if (ILL_FREE_OK(ill))
1708 need_wakeup = B_TRUE;
1710 ilm_inactive(ilm); /* frees this ilm */
1712 if (need_wakeup) {
1713 /* drops ill lock */
1714 ipif_ill_refrele_tail(ill);
1715 } else {
1716 mutex_exit(&ill->ill_lock);
1721 * Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
1722 * Applies to both IPv4 and IPv6, although ifaddr is only used with
1723 * IPv4.
1724 * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1725 * On error it sets *errorp.
1727 static ill_t *
1728 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
1729 zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
1731 ill_t *ill;
1732 ipaddr_t v4group;
1734 if (IN6_IS_ADDR_V4MAPPED(group)) {
1735 IN6_V4MAPPED_TO_IPADDR(group, v4group);
1737 if (ifindex != 0) {
1738 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1739 B_FALSE, ipst);
1740 } else if (ifaddr != INADDR_ANY) {
1741 ipif_t *ipif;
1743 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
1744 if (ipif == NULL) {
1745 ill = NULL;
1746 } else {
1747 ill = ipif->ipif_ill;
1748 ill_refhold(ill);
1749 ipif_refrele(ipif);
1751 } else {
1752 ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL);
1754 } else {
1755 if (ifindex != 0) {
1756 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1757 B_TRUE, ipst);
1758 } else {
1759 ill = ill_lookup_group_v6(group, zoneid, ipst, NULL);
1762 if (ill == NULL) {
1763 if (ifindex != 0)
1764 *errorp = ENXIO;
1765 else
1766 *errorp = EADDRNOTAVAIL;
1767 return (NULL);
1769 /* operation not supported on the virtual network interface */
1770 if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
1771 ill_refrele(ill);
1772 *errorp = EINVAL;
1773 return (NULL);
1775 return (ill);
1779 * Looks up the appropriate ill given an interface index (or interface address)
1780 * and multicast group. On success, returns 0, with *illpp pointing to the
1781 * found struct. On failure, returns an errno and *illpp is set to NULL.
1783 * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1785 * Handles both IPv4 and IPv6. The ifaddr argument only applies in the
1786 * case of IPv4.
1789 ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
1790 const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
1792 boolean_t src_unspec;
1793 ill_t *ill = NULL;
1794 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1795 int error = 0;
1797 *illpp = NULL;
1799 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1801 if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1802 ipaddr_t v4group;
1803 ipaddr_t v4src;
1805 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1806 return (EINVAL);
1807 IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1808 if (src_unspec) {
1809 v4src = INADDR_ANY;
1810 } else {
1811 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1813 if (!CLASSD(v4group) || CLASSD(v4src))
1814 return (EINVAL);
1815 } else {
1816 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1817 return (EINVAL);
1818 if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1819 IN6_IS_ADDR_MULTICAST(v6src)) {
1820 return (EINVAL);
1824 ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
1825 ipst, &error);
1826 *illpp = ill;
1827 return (error);
1830 static int
1831 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1832 struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
1834 ilg_t *ilg;
1835 int i, numsrc, fmode, outsrcs;
1836 struct sockaddr_in *sin;
1837 struct sockaddr_in6 *sin6;
1838 struct in_addr *addrp;
1839 slist_t *fp;
1840 boolean_t is_v4only_api;
1841 ipaddr_t ifaddr;
1842 uint_t ifindex;
1844 if (gf == NULL) {
1845 ASSERT(imsf != NULL);
1846 ASSERT(!issin6);
1847 is_v4only_api = B_TRUE;
1848 outsrcs = imsf->imsf_numsrc;
1849 ifaddr = imsf->imsf_interface.s_addr;
1850 ifindex = 0;
1851 } else {
1852 ASSERT(imsf == NULL);
1853 is_v4only_api = B_FALSE;
1854 outsrcs = gf->gf_numsrc;
1855 ifaddr = INADDR_ANY;
1856 ifindex = gf->gf_interface;
1859 /* No need to use ill_mcast_serializer for the reader */
1860 rw_enter(&connp->conn_ilg_lock, RW_READER);
1861 ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1862 if (ilg == NULL) {
1863 rw_exit(&connp->conn_ilg_lock);
1864 return (EADDRNOTAVAIL);
1868 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1869 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1870 * So we need to translate here.
1872 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1873 MCAST_INCLUDE : MCAST_EXCLUDE;
1874 if ((fp = ilg->ilg_filter) == NULL) {
1875 numsrc = 0;
1876 } else {
1877 for (i = 0; i < outsrcs; i++) {
1878 if (i == fp->sl_numsrc)
1879 break;
1880 if (issin6) {
1881 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1882 sin6->sin6_family = AF_INET6;
1883 sin6->sin6_addr = fp->sl_addr[i];
1884 } else {
1885 if (is_v4only_api) {
1886 addrp = &imsf->imsf_slist[i];
1887 } else {
1888 sin = (struct sockaddr_in *)
1889 &gf->gf_slist[i];
1890 sin->sin_family = AF_INET;
1891 addrp = &sin->sin_addr;
1893 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1896 numsrc = fp->sl_numsrc;
1899 if (is_v4only_api) {
1900 imsf->imsf_numsrc = numsrc;
1901 imsf->imsf_fmode = fmode;
1902 } else {
1903 gf->gf_numsrc = numsrc;
1904 gf->gf_fmode = fmode;
1907 rw_exit(&connp->conn_ilg_lock);
1909 return (0);
1913 * Common for IPv4 and IPv6.
1915 static int
1916 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
1917 struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
1918 boolean_t issin6)
1920 ilg_t *ilg;
1921 int i, err, infmode, new_fmode;
1922 uint_t insrcs;
1923 struct sockaddr_in *sin;
1924 struct sockaddr_in6 *sin6;
1925 struct in_addr *addrp;
1926 slist_t *orig_filter = NULL;
1927 slist_t *new_filter = NULL;
1928 mcast_record_t orig_fmode;
1929 boolean_t leave_group, is_v4only_api;
1930 ilg_stat_t ilgstat;
1931 ilm_t *ilm;
1932 ipaddr_t ifaddr;
1933 uint_t ifindex;
1935 if (gf == NULL) {
1936 ASSERT(imsf != NULL);
1937 ASSERT(!issin6);
1938 is_v4only_api = B_TRUE;
1939 insrcs = imsf->imsf_numsrc;
1940 infmode = imsf->imsf_fmode;
1941 ifaddr = imsf->imsf_interface.s_addr;
1942 ifindex = 0;
1943 } else {
1944 ASSERT(imsf == NULL);
1945 is_v4only_api = B_FALSE;
1946 insrcs = gf->gf_numsrc;
1947 infmode = gf->gf_fmode;
1948 ifaddr = INADDR_ANY;
1949 ifindex = gf->gf_interface;
1952 /* Make sure we can handle the source list */
1953 if (insrcs > MAX_FILTER_SIZE)
1954 return (ENOBUFS);
1957 * setting the filter to (INCLUDE, NULL) is treated
1958 * as a request to leave the group.
1960 leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
1962 mutex_enter(&ill->ill_mcast_serializer);
1963 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1964 ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1965 if (ilg == NULL) {
1967 * if the request was actually to leave, and we
1968 * didn't find an ilg, there's nothing to do.
1970 if (leave_group) {
1971 rw_exit(&connp->conn_ilg_lock);
1972 mutex_exit(&ill->ill_mcast_serializer);
1973 return (0);
1975 ilg = conn_ilg_alloc(connp, &err);
1976 if (ilg == NULL) {
1977 rw_exit(&connp->conn_ilg_lock);
1978 mutex_exit(&ill->ill_mcast_serializer);
1979 return (err);
1981 ilgstat = ILGSTAT_NEW;
1982 ilg->ilg_v6group = *group;
1983 ilg->ilg_ill = ill;
1984 ilg->ilg_ifaddr = ifaddr;
1985 ilg->ilg_ifindex = ifindex;
1986 } else if (leave_group) {
1988 * Make sure we have the correct serializer. The ill argument
1989 * might not match ilg_ill.
1991 ilg_refhold(ilg);
1992 mutex_exit(&ill->ill_mcast_serializer);
1993 ill = ilg->ilg_ill;
1994 rw_exit(&connp->conn_ilg_lock);
1996 mutex_enter(&ill->ill_mcast_serializer);
1997 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1998 ilm = ilg->ilg_ilm;
1999 ilg->ilg_ilm = NULL;
2000 ilg_delete(connp, ilg, NULL);
2001 ilg_refrele(ilg);
2002 rw_exit(&connp->conn_ilg_lock);
2003 if (ilm != NULL)
2004 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2005 mutex_exit(&ill->ill_mcast_serializer);
2007 * Now that all locks have been dropped, we can send any
2008 * deferred/queued DLPI or IP packets
2010 ill_mcast_send_queued(ill);
2011 ill_dlpi_send_queued(ill);
2012 return (0);
2013 } else {
2014 ilgstat = ILGSTAT_CHANGE;
2015 /* Preserve existing state in case ip_addmulti() fails */
2016 orig_fmode = ilg->ilg_fmode;
2017 if (ilg->ilg_filter == NULL) {
2018 orig_filter = NULL;
2019 } else {
2020 orig_filter = l_alloc_copy(ilg->ilg_filter);
2021 if (orig_filter == NULL) {
2022 rw_exit(&connp->conn_ilg_lock);
2023 mutex_exit(&ill->ill_mcast_serializer);
2024 return (ENOMEM);
2030 * Alloc buffer to copy new state into (see below) before
2031 * we make any changes, so we can bail if it fails.
2033 if ((new_filter = l_alloc()) == NULL) {
2034 rw_exit(&connp->conn_ilg_lock);
2035 err = ENOMEM;
2036 goto free_and_exit;
2039 if (insrcs == 0) {
2040 CLEAR_SLIST(ilg->ilg_filter);
2041 } else {
2042 slist_t *fp;
2043 if (ilg->ilg_filter == NULL) {
2044 fp = l_alloc();
2045 if (fp == NULL) {
2046 if (ilgstat == ILGSTAT_NEW)
2047 ilg_delete(connp, ilg, NULL);
2048 rw_exit(&connp->conn_ilg_lock);
2049 err = ENOMEM;
2050 goto free_and_exit;
2052 } else {
2053 fp = ilg->ilg_filter;
2055 for (i = 0; i < insrcs; i++) {
2056 if (issin6) {
2057 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2058 fp->sl_addr[i] = sin6->sin6_addr;
2059 } else {
2060 if (is_v4only_api) {
2061 addrp = &imsf->imsf_slist[i];
2062 } else {
2063 sin = (struct sockaddr_in *)
2064 &gf->gf_slist[i];
2065 addrp = &sin->sin_addr;
2067 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2070 fp->sl_numsrc = insrcs;
2071 ilg->ilg_filter = fp;
2074 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2075 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2076 * So we need to translate here.
2078 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2079 MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2082 * Save copy of ilg's filter state to pass to other functions,
2083 * so we can release conn_ilg_lock now.
2085 new_fmode = ilg->ilg_fmode;
2086 l_copy(ilg->ilg_filter, new_filter);
2088 rw_exit(&connp->conn_ilg_lock);
2091 * Now update the ill. We wait to do this until after the ilg
2092 * has been updated because we need to update the src filter
2093 * info for the ill, which involves looking at the status of
2094 * all the ilgs associated with this group/interface pair.
2096 ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
2097 new_fmode, new_filter, &err);
2099 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2101 * Must look up the ilg again since we've not been holding
2102 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2103 * having called conn_update_ill, which can run once we dropped the
2104 * conn_ilg_lock above.
2106 ilg = ilg_lookup(connp, group, ifaddr, ifindex);
2107 if (ilg == NULL) {
2108 rw_exit(&connp->conn_ilg_lock);
2109 if (ilm != NULL) {
2110 (void) ip_delmulti_serial(ilm, B_FALSE,
2111 (ilgstat == ILGSTAT_NEW));
2113 err = ENXIO;
2114 goto free_and_exit;
2117 if (ilm != NULL) {
2118 if (ilg->ilg_ill == NULL) {
2119 /* some other thread is re-attaching this. */
2120 rw_exit(&connp->conn_ilg_lock);
2121 (void) ip_delmulti_serial(ilm, B_FALSE,
2122 (ilgstat == ILGSTAT_NEW));
2123 err = 0;
2124 goto free_and_exit;
2126 /* Succeeded. Update the ilg to point at the ilm */
2127 if (ilgstat == ILGSTAT_NEW) {
2128 if (ilg->ilg_ilm == NULL) {
2129 ilg->ilg_ilm = ilm;
2130 ilm->ilm_ifaddr = ifaddr; /* For netstat */
2131 } else {
2132 /* some other thread is re-attaching this. */
2133 rw_exit(&connp->conn_ilg_lock);
2134 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2135 err = 0;
2136 goto free_and_exit;
2138 } else {
2140 * ip_addmulti didn't get a held ilm for
2141 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2143 ASSERT(ilg->ilg_ilm == ilm);
2145 } else {
2146 ASSERT(err != 0);
2148 * Failed to allocate the ilm.
2149 * Restore the original filter state, or delete the
2150 * newly-created ilg.
2151 * If ENETDOWN just clear ill_ilg since so that we
2152 * will rejoin when the ill comes back; don't report ENETDOWN
2153 * to application.
2155 if (ilgstat == ILGSTAT_NEW) {
2156 if (err == ENETDOWN) {
2157 ilg->ilg_ill = NULL;
2158 err = 0;
2159 } else {
2160 ilg_delete(connp, ilg, NULL);
2162 } else {
2163 ilg->ilg_fmode = orig_fmode;
2164 if (SLIST_IS_EMPTY(orig_filter)) {
2165 CLEAR_SLIST(ilg->ilg_filter);
2166 } else {
2168 * We didn't free the filter, even if we
2169 * were trying to make the source list empty;
2170 * so if orig_filter isn't empty, the ilg
2171 * must still have a filter alloc'd.
2173 l_copy(orig_filter, ilg->ilg_filter);
2177 rw_exit(&connp->conn_ilg_lock);
2179 free_and_exit:
2180 mutex_exit(&ill->ill_mcast_serializer);
2181 ill_mcast_send_queued(ill);
2182 ill_dlpi_send_queued(ill);
2183 l_free(orig_filter);
2184 l_free(new_filter);
2186 return (err);
2190 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2192 /* ARGSUSED */
2194 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2195 ip_ioctl_cmd_t *ipip, void *ifreq)
2197 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2198 /* existence verified in ip_wput_nondata() */
2199 mblk_t *data_mp = mp->b_cont->b_cont;
2200 int datalen, err, cmd, minsize;
2201 uint_t expsize = 0;
2202 conn_t *connp;
2203 boolean_t isv6, is_v4only_api, getcmd;
2204 struct sockaddr_in *gsin;
2205 struct sockaddr_in6 *gsin6;
2206 ipaddr_t v4group;
2207 in6_addr_t v6group;
2208 struct group_filter *gf = NULL;
2209 struct ip_msfilter *imsf = NULL;
2210 mblk_t *ndp;
2211 ill_t *ill;
2213 connp = Q_TO_CONN(q);
2214 err = ip_msfilter_ill(connp, mp, ipip, &ill);
2215 if (err != 0)
2216 return (err);
2218 if (data_mp->b_cont != NULL) {
2219 if ((ndp = msgpullup(data_mp, -1)) == NULL)
2220 return (ENOMEM);
2221 freemsg(data_mp);
2222 data_mp = ndp;
2223 mp->b_cont->b_cont = data_mp;
2226 cmd = iocp->ioc_cmd;
2227 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2228 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2229 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2230 datalen = MBLKL(data_mp);
2232 if (datalen < minsize)
2233 return (EINVAL);
2236 * now we know we have at least have the initial structure,
2237 * but need to check for the source list array.
2239 if (is_v4only_api) {
2240 imsf = (struct ip_msfilter *)data_mp->b_rptr;
2241 isv6 = B_FALSE;
2242 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2243 } else {
2244 gf = (struct group_filter *)data_mp->b_rptr;
2245 if (gf->gf_group.ss_family == AF_INET6) {
2246 gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2247 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2248 } else {
2249 isv6 = B_FALSE;
2251 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2253 if (datalen < expsize)
2254 return (EINVAL);
2256 if (isv6) {
2257 gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2258 v6group = gsin6->sin6_addr;
2259 if (getcmd) {
2260 err = ip_get_srcfilter(connp, gf, NULL, &v6group,
2261 B_TRUE);
2262 } else {
2263 err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
2264 B_TRUE);
2266 } else {
2267 boolean_t issin6 = B_FALSE;
2268 if (is_v4only_api) {
2269 v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2270 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2271 } else {
2272 if (gf->gf_group.ss_family == AF_INET) {
2273 gsin = (struct sockaddr_in *)&gf->gf_group;
2274 v4group = (ipaddr_t)gsin->sin_addr.s_addr;
2275 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2276 } else {
2277 gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2278 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2279 v4group);
2280 issin6 = B_TRUE;
2284 * INADDR_ANY is represented as the IPv6 unspecifed addr.
2286 if (v4group == INADDR_ANY)
2287 v6group = ipv6_all_zeros;
2288 else
2289 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2291 if (getcmd) {
2292 err = ip_get_srcfilter(connp, gf, imsf, &v6group,
2293 issin6);
2294 } else {
2295 err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
2296 issin6);
2299 ill_refrele(ill);
2301 return (err);
2305 * Determine the ill for the SIOC*MSFILTER ioctls
2307 * Returns an error for IS_UNDER_IPMP interfaces.
2309 * Finds the ill based on information in the ioctl headers.
2311 static int
2312 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2313 ill_t **illp)
2315 int cmd = ipip->ipi_cmd;
2316 int err = 0;
2317 ill_t *ill;
2318 /* caller has verified this mblk exists */
2319 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2320 struct ip_msfilter *imsf;
2321 struct group_filter *gf;
2322 ipaddr_t v4addr, v4group;
2323 in6_addr_t v6group;
2324 uint32_t index;
2325 ip_stack_t *ipst;
2327 ipst = connp->conn_netstack->netstack_ip;
2329 *illp = NULL;
2331 /* don't allow multicast operations on a tcp conn */
2332 if (IPCL_IS_TCP(connp))
2333 return (ENOPROTOOPT);
2335 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2336 /* don't allow v4-specific ioctls on v6 socket */
2337 if (connp->conn_family == AF_INET6)
2338 return (EAFNOSUPPORT);
2340 imsf = (struct ip_msfilter *)dbuf;
2341 v4addr = imsf->imsf_interface.s_addr;
2342 v4group = imsf->imsf_multiaddr.s_addr;
2343 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2344 ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
2345 ipst, &err);
2346 if (ill == NULL && v4addr != INADDR_ANY)
2347 err = ENXIO;
2348 } else {
2349 gf = (struct group_filter *)dbuf;
2350 index = gf->gf_interface;
2351 if (gf->gf_group.ss_family == AF_INET6) {
2352 struct sockaddr_in6 *sin6;
2354 sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2355 v6group = sin6->sin6_addr;
2356 } else if (gf->gf_group.ss_family == AF_INET) {
2357 struct sockaddr_in *sin;
2359 sin = (struct sockaddr_in *)&gf->gf_group;
2360 v4group = sin->sin_addr.s_addr;
2361 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2362 } else {
2363 return (EAFNOSUPPORT);
2365 ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
2366 IPCL_ZONEID(connp), ipst, &err);
2368 *illp = ill;
2369 return (err);
2373 * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2374 * in in two stages, as the first copyin tells us the size of the attached
2375 * source buffer. This function is called by ip_wput_nondata() after the
2376 * first copyin has completed; it figures out how big the second stage
2377 * needs to be, and kicks it off.
2379 * In some cases (numsrc < 2), the second copyin is not needed as the
2380 * first one gets a complete structure containing 1 source addr.
2382 * The function returns 0 if a second copyin has been started (i.e. there's
2383 * no more work to be done right now), or 1 if the second copyin is not
2384 * needed and ip_wput_nondata() can continue its processing.
2387 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2389 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2390 int cmd = iocp->ioc_cmd;
2391 /* validity of this checked in ip_wput_nondata() */
2392 mblk_t *mp1 = mp->b_cont->b_cont;
2393 int copysize = 0;
2394 int offset;
2396 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2397 struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2398 if (gf->gf_numsrc >= 2) {
2399 offset = sizeof (struct group_filter);
2400 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2402 } else {
2403 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2404 if (imsf->imsf_numsrc >= 2) {
2405 offset = sizeof (struct ip_msfilter);
2406 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2409 if (copysize > 0) {
2410 mi_copyin_n(q, mp, offset, copysize);
2411 return (0);
2413 return (1);
2417 * Handle the following optmgmt:
2418 * IP_ADD_MEMBERSHIP must not have joined already
2419 * IPV6_JOIN_GROUP must not have joined already
2420 * MCAST_JOIN_GROUP must not have joined already
2421 * IP_BLOCK_SOURCE must have joined already
2422 * MCAST_BLOCK_SOURCE must have joined already
2423 * IP_JOIN_SOURCE_GROUP may have joined already
2424 * MCAST_JOIN_SOURCE_GROUP may have joined already
2426 * fmode and src parameters may be used to determine which option is
2427 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2428 * are functionally equivalent):
2429 * opt fmode v6src
2430 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE unspecified
2431 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified
2432 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified
2433 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr
2434 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr
2435 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr
2436 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr
2438 * Changing the filter mode is not allowed; if a matching ilg already
2439 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2441 * Verifies that there is a source address of appropriate scope for
2442 * the group; if not, EADDRNOTAVAIL is returned.
2444 * The interface to be used may be identified by an IPv4 address or by an
2445 * interface index.
2447 * Handles IPv4-mapped IPv6 multicast addresses by associating them
2448 * with the IPv4 address. Assumes that if v6group is v4-mapped,
2449 * v6src is also v4-mapped.
2452 ip_opt_add_group(conn_t *connp, boolean_t checkonly,
2453 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2454 mcast_record_t fmode, const in6_addr_t *v6src)
2456 ill_t *ill;
2457 char buf[INET6_ADDRSTRLEN];
2458 int err;
2460 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
2461 if (err != 0) {
2462 ip1dbg(("ip_opt_add_group: no ill for group %s/"
2463 "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2464 sizeof (buf)), ifindex));
2465 return (err);
2468 if (checkonly) {
2470 * do not do operation, just pretend to - new T_CHECK
2471 * semantics. The error return case above if encountered
2472 * considered a good enough "check" here.
2474 ill_refrele(ill);
2475 return (0);
2477 mutex_enter(&ill->ill_mcast_serializer);
2479 * Multicast groups may not be joined on interfaces that are either
2480 * already underlying interfaces in an IPMP group, or in the process
2481 * of joining the IPMP group. The latter condition is enforced by
2482 * checking the value of ill->ill_grp_pending under the
2483 * ill_mcast_serializer lock. We cannot serialize the
2484 * ill_grp_pending check on the ill_g_lock across ilg_add() because
2485 * ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex
2486 * will take the ill_g_lock itself. Instead, we hold the
2487 * ill_mcast_serializer.
2489 if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) {
2490 DTRACE_PROBE2(group__add__on__under, ill_t *, ill,
2491 in6_addr_t *, v6group);
2492 mutex_exit(&ill->ill_mcast_serializer);
2493 ill_refrele(ill);
2494 return (EADDRNOTAVAIL);
2496 err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
2497 mutex_exit(&ill->ill_mcast_serializer);
2499 * We have done an addmulti_impl and/or delmulti_impl.
2500 * All locks have been dropped, we can send any
2501 * deferred/queued DLPI or IP packets
2503 ill_mcast_send_queued(ill);
2504 ill_dlpi_send_queued(ill);
2505 ill_refrele(ill);
2506 return (err);
2510 * Common for IPv6 and IPv4.
2511 * Here we handle ilgs that are still attached to their original ill
2512 * (the one ifaddr/ifindex points at), as well as detached ones.
2513 * The detached ones might have been attached to some other ill.
2515 static int
2516 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
2517 ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
2518 const in6_addr_t *v6src)
2520 ilg_t *ilg;
2521 boolean_t leaving;
2522 ilm_t *ilm;
2523 ill_t *ill;
2524 int err = 0;
2526 retry:
2527 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2528 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2529 if (ilg == NULL) {
2530 rw_exit(&connp->conn_ilg_lock);
2532 * Since we didn't have any ilg we now do the error checks
2533 * to determine the best errno.
2535 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2536 &ill);
2537 if (ill != NULL) {
2538 /* The only error was a missing ilg for the group */
2539 ill_refrele(ill);
2540 err = EADDRNOTAVAIL;
2542 return (err);
2545 /* If the ilg is attached then we serialize using that ill */
2546 ill = ilg->ilg_ill;
2547 if (ill != NULL) {
2548 /* Prevent the ill and ilg from being freed */
2549 ill_refhold(ill);
2550 ilg_refhold(ilg);
2551 rw_exit(&connp->conn_ilg_lock);
2552 mutex_enter(&ill->ill_mcast_serializer);
2553 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2554 if (ilg->ilg_condemned) {
2555 /* Disappeared */
2556 ilg_refrele(ilg);
2557 rw_exit(&connp->conn_ilg_lock);
2558 mutex_exit(&ill->ill_mcast_serializer);
2559 ill_refrele(ill);
2560 goto retry;
2565 * Decide if we're actually deleting the ilg or just removing a
2566 * source filter address; if just removing an addr, make sure we
2567 * aren't trying to change the filter mode, and that the addr is
2568 * actually in our filter list already. If we're removing the
2569 * last src in an include list, just delete the ilg.
2571 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2572 leaving = B_TRUE;
2573 } else {
2574 if (fmode != ilg->ilg_fmode)
2575 err = EINVAL;
2576 else if (ilg->ilg_filter == NULL ||
2577 !list_has_addr(ilg->ilg_filter, v6src))
2578 err = EADDRNOTAVAIL;
2579 if (err != 0) {
2580 if (ill != NULL)
2581 ilg_refrele(ilg);
2582 rw_exit(&connp->conn_ilg_lock);
2583 goto done;
2585 if (fmode == MODE_IS_INCLUDE &&
2586 ilg->ilg_filter->sl_numsrc == 1) {
2587 leaving = B_TRUE;
2588 v6src = NULL;
2589 } else {
2590 leaving = B_FALSE;
2593 ilm = ilg->ilg_ilm;
2594 if (leaving)
2595 ilg->ilg_ilm = NULL;
2597 ilg_delete(connp, ilg, v6src);
2598 if (ill != NULL)
2599 ilg_refrele(ilg);
2600 rw_exit(&connp->conn_ilg_lock);
2602 if (ilm != NULL) {
2603 ASSERT(ill != NULL);
2604 (void) ip_delmulti_serial(ilm, B_FALSE, leaving);
2606 done:
2607 if (ill != NULL) {
2608 mutex_exit(&ill->ill_mcast_serializer);
2610 * Now that all locks have been dropped, we can
2611 * send any deferred/queued DLPI or IP packets
2613 ill_mcast_send_queued(ill);
2614 ill_dlpi_send_queued(ill);
2615 ill_refrele(ill);
2617 return (err);
2621 * Handle the following optmgmt:
2622 * IP_DROP_MEMBERSHIP will leave
2623 * IPV6_LEAVE_GROUP will leave
2624 * MCAST_LEAVE_GROUP will leave
2625 * IP_UNBLOCK_SOURCE will not leave
2626 * MCAST_UNBLOCK_SOURCE will not leave
2627 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source)
2628 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source)
2630 * fmode and src parameters may be used to determine which option is
2631 * being set, as follows:
2632 * opt fmode v6src
2633 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE unspecified
2634 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified
2635 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified
2636 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr
2637 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr
2638 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr
2639 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr
2641 * Changing the filter mode is not allowed; if a matching ilg already
2642 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2644 * The interface to be used may be identified by an IPv4 address or by an
2645 * interface index.
2647 * Handles IPv4-mapped IPv6 multicast addresses by associating them
2648 * with the IPv4 address. Assumes that if v6group is v4-mapped,
2649 * v6src is also v4-mapped.
2652 ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
2653 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2654 mcast_record_t fmode, const in6_addr_t *v6src)
2658 * In the normal case below we don't check for the ill existing.
2659 * Instead we look for an existing ilg in _excl.
2660 * If checkonly we sanity check the arguments
2662 if (checkonly) {
2663 ill_t *ill;
2664 int err;
2666 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2667 &ill);
2669 * do not do operation, just pretend to - new T_CHECK semantics.
2670 * ip_opt_check is considered a good enough "check" here.
2672 if (ill != NULL)
2673 ill_refrele(ill);
2674 return (err);
2676 return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
2677 fmode, v6src));
2681 * Group mgmt for upper conn that passes things down
2682 * to the interface multicast list (and DLPI)
2683 * These routines can handle new style options that specify an interface name
2684 * as opposed to an interface address (needed for general handling of
2685 * unnumbered interfaces.)
2689 * Add a group to an upper conn group data structure and pass things down
2690 * to the interface multicast list (and DLPI)
2691 * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
2693 static int
2694 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2695 uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2697 int error = 0;
2698 ilg_t *ilg;
2699 ilg_stat_t ilgstat;
2700 slist_t *new_filter = NULL;
2701 int new_fmode;
2702 ilm_t *ilm;
2704 if (!(ill->ill_flags & ILLF_MULTICAST))
2705 return (EADDRNOTAVAIL);
2707 /* conn_ilg_lock protects the ilg list. */
2708 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
2709 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2710 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2713 * Depending on the option we're handling, may or may not be okay
2714 * if group has already been added. Figure out our rules based
2715 * on fmode and src params. Also make sure there's enough room
2716 * in the filter if we're adding a source to an existing filter.
2718 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2719 /* we're joining for all sources, must not have joined */
2720 if (ilg != NULL)
2721 error = EADDRINUSE;
2722 } else {
2723 if (fmode == MODE_IS_EXCLUDE) {
2724 /* (excl {addr}) => block source, must have joined */
2725 if (ilg == NULL)
2726 error = EADDRNOTAVAIL;
2728 /* (incl {addr}) => join source, may have joined */
2730 if (ilg != NULL &&
2731 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
2732 error = ENOBUFS;
2734 if (error != 0) {
2735 rw_exit(&connp->conn_ilg_lock);
2736 return (error);
2740 * Alloc buffer to copy new state into (see below) before
2741 * we make any changes, so we can bail if it fails.
2743 if ((new_filter = l_alloc()) == NULL) {
2744 rw_exit(&connp->conn_ilg_lock);
2745 return (ENOMEM);
2748 if (ilg == NULL) {
2749 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
2750 rw_exit(&connp->conn_ilg_lock);
2751 l_free(new_filter);
2752 return (error);
2754 ilg->ilg_ifindex = ifindex;
2755 ilg->ilg_ifaddr = ifaddr;
2756 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2757 ilg->ilg_filter = l_alloc();
2758 if (ilg->ilg_filter == NULL) {
2759 ilg_delete(connp, ilg, NULL);
2760 rw_exit(&connp->conn_ilg_lock);
2761 l_free(new_filter);
2762 return (ENOMEM);
2764 ilg->ilg_filter->sl_numsrc = 1;
2765 ilg->ilg_filter->sl_addr[0] = *v6src;
2767 ilgstat = ILGSTAT_NEW;
2768 ilg->ilg_v6group = *v6group;
2769 ilg->ilg_fmode = fmode;
2770 ilg->ilg_ill = ill;
2771 } else {
2772 int index;
2774 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2775 rw_exit(&connp->conn_ilg_lock);
2776 l_free(new_filter);
2777 return (EINVAL);
2779 if (ilg->ilg_filter == NULL) {
2780 ilg->ilg_filter = l_alloc();
2781 if (ilg->ilg_filter == NULL) {
2782 rw_exit(&connp->conn_ilg_lock);
2783 l_free(new_filter);
2784 return (ENOMEM);
2787 if (list_has_addr(ilg->ilg_filter, v6src)) {
2788 rw_exit(&connp->conn_ilg_lock);
2789 l_free(new_filter);
2790 return (EADDRNOTAVAIL);
2792 ilgstat = ILGSTAT_CHANGE;
2793 index = ilg->ilg_filter->sl_numsrc++;
2794 ilg->ilg_filter->sl_addr[index] = *v6src;
2798 * Save copy of ilg's filter state to pass to other functions,
2799 * so we can release conn_ilg_lock now.
2801 new_fmode = ilg->ilg_fmode;
2802 l_copy(ilg->ilg_filter, new_filter);
2804 rw_exit(&connp->conn_ilg_lock);
2807 * Now update the ill. We wait to do this until after the ilg
2808 * has been updated because we need to update the src filter
2809 * info for the ill, which involves looking at the status of
2810 * all the ilgs associated with this group/interface pair.
2812 ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
2813 new_fmode, new_filter, &error);
2815 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2817 * Must look up the ilg again since we've not been holding
2818 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2819 * having called conn_update_ill, which can run once we dropped the
2820 * conn_ilg_lock above.
2822 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2823 if (ilg == NULL) {
2824 rw_exit(&connp->conn_ilg_lock);
2825 if (ilm != NULL) {
2826 (void) ip_delmulti_serial(ilm, B_FALSE,
2827 (ilgstat == ILGSTAT_NEW));
2829 error = ENXIO;
2830 goto free_and_exit;
2832 if (ilm != NULL) {
2833 if (ilg->ilg_ill == NULL) {
2834 /* some other thread is re-attaching this. */
2835 rw_exit(&connp->conn_ilg_lock);
2836 (void) ip_delmulti_serial(ilm, B_FALSE,
2837 (ilgstat == ILGSTAT_NEW));
2838 error = 0;
2839 goto free_and_exit;
2841 /* Succeeded. Update the ilg to point at the ilm */
2842 if (ilgstat == ILGSTAT_NEW) {
2843 if (ilg->ilg_ilm == NULL) {
2844 ilg->ilg_ilm = ilm;
2845 ilm->ilm_ifaddr = ifaddr; /* For netstat */
2846 } else {
2847 /* some other thread is re-attaching this. */
2848 rw_exit(&connp->conn_ilg_lock);
2849 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2850 error = 0;
2851 goto free_and_exit;
2853 } else {
2855 * ip_addmulti didn't get a held ilm for
2856 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2858 ASSERT(ilg->ilg_ilm == ilm);
2860 } else {
2861 ASSERT(error != 0);
2863 * Failed to allocate the ilm.
2864 * Need to undo what we did before calling ip_addmulti()
2865 * If ENETDOWN just clear ill_ilg since so that we
2866 * will rejoin when the ill comes back; don't report ENETDOWN
2867 * to application.
2869 if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
2870 ilg->ilg_ill = NULL;
2871 error = 0;
2872 } else {
2873 in6_addr_t delsrc =
2874 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
2876 ilg_delete(connp, ilg, &delsrc);
2879 rw_exit(&connp->conn_ilg_lock);
2881 free_and_exit:
2882 l_free(new_filter);
2883 return (error);
2887 * Find an IPv4 ilg matching group, ill and source.
2888 * The group and source can't be INADDR_ANY here so no need to translate to
2889 * the unspecified IPv6 address.
2891 boolean_t
2892 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
2893 ill_t *ill)
2895 in6_addr_t v6group, v6src;
2896 int i;
2897 boolean_t isinlist;
2898 ilg_t *ilg;
2900 rw_enter(&connp->conn_ilg_lock, RW_READER);
2901 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
2902 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2903 if (ilg->ilg_condemned)
2904 continue;
2906 /* ilg_ill could be NULL if an add is in progress */
2907 if (ilg->ilg_ill != ill)
2908 continue;
2910 /* The callers use upper ill for IPMP */
2911 ASSERT(!IS_UNDER_IPMP(ill));
2912 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
2913 if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2914 /* no source filter, so this is a match */
2915 rw_exit(&connp->conn_ilg_lock);
2916 return (B_TRUE);
2918 break;
2921 if (ilg == NULL) {
2922 rw_exit(&connp->conn_ilg_lock);
2923 return (B_FALSE);
2927 * we have an ilg with matching ill and group; but
2928 * the ilg has a source list that we must check.
2930 IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2931 isinlist = B_FALSE;
2932 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2933 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
2934 isinlist = B_TRUE;
2935 break;
2939 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2940 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2941 rw_exit(&connp->conn_ilg_lock);
2942 return (B_TRUE);
2944 rw_exit(&connp->conn_ilg_lock);
2945 return (B_FALSE);
2949 * Find an IPv6 ilg matching group, ill, and source
2951 boolean_t
2952 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
2953 const in6_addr_t *v6src, ill_t *ill)
2955 int i;
2956 boolean_t isinlist;
2957 ilg_t *ilg;
2959 rw_enter(&connp->conn_ilg_lock, RW_READER);
2960 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2961 if (ilg->ilg_condemned)
2962 continue;
2964 /* ilg_ill could be NULL if an add is in progress */
2965 if (ilg->ilg_ill != ill)
2966 continue;
2968 /* The callers use upper ill for IPMP */
2969 ASSERT(!IS_UNDER_IPMP(ill));
2970 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
2971 if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2972 /* no source filter, so this is a match */
2973 rw_exit(&connp->conn_ilg_lock);
2974 return (B_TRUE);
2976 break;
2979 if (ilg == NULL) {
2980 rw_exit(&connp->conn_ilg_lock);
2981 return (B_FALSE);
2985 * we have an ilg with matching ill and group; but
2986 * the ilg has a source list that we must check.
2988 isinlist = B_FALSE;
2989 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2990 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
2991 isinlist = B_TRUE;
2992 break;
2996 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2997 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2998 rw_exit(&connp->conn_ilg_lock);
2999 return (B_TRUE);
3001 rw_exit(&connp->conn_ilg_lock);
3002 return (B_FALSE);
3006 * Find an ilg matching group and ifaddr/ifindex.
3007 * We check both ifaddr and ifindex even though at most one of them
3008 * will be non-zero; that way we always find the right one.
3010 static ilg_t *
3011 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
3012 uint_t ifindex)
3014 ilg_t *ilg;
3016 ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
3018 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3019 if (ilg->ilg_condemned)
3020 continue;
3022 if (ilg->ilg_ifaddr == ifaddr &&
3023 ilg->ilg_ifindex == ifindex &&
3024 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3025 return (ilg);
3027 return (NULL);
3031 * If a source address is passed in (src != NULL and src is not
3032 * unspecified), remove the specified src addr from the given ilg's
3033 * filter list, else delete the ilg.
3035 static void
3036 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3038 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3039 ASSERT(ilg->ilg_ptpn != NULL);
3040 ASSERT(!ilg->ilg_condemned);
3042 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3043 FREE_SLIST(ilg->ilg_filter);
3044 ilg->ilg_filter = NULL;
3046 ASSERT(ilg->ilg_ilm == NULL);
3047 ilg->ilg_ill = NULL;
3048 ilg->ilg_condemned = B_TRUE;
3050 /* ilg_inactive will unlink from the list */
3051 ilg_refrele(ilg);
3052 } else {
3053 l_remove(ilg->ilg_filter, src);
3058 * Called from conn close. No new ilg can be added or removed
3059 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3060 * will return error if conn has started closing.
3062 * We handle locking as follows.
3063 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3064 * proceed with the ilm part of the delete we hold a reference on both the ill
3065 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3066 * being deleted.
3068 * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
3069 * and ill_mcast_lock for the ip_addmulti part) we can run at a point between
3070 * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
3071 * that case we delete the ilg here, which makes ilg_add discover that the ilg
3072 * has disappeared when ip_addmulti returns, so it will discard the ilm it just
3073 * added.
3075 void
3076 ilg_delete_all(conn_t *connp)
3078 ilg_t *ilg, *next_ilg, *held_ilg;
3079 ilm_t *ilm;
3080 ill_t *ill;
3081 boolean_t need_refrele;
3084 * Can not run if there is a conn_update_ill already running.
3085 * Wait for it to complete. Caller should have already set CONN_CLOSING
3086 * which prevents any new threads to run in conn_update_ill.
3088 mutex_enter(&connp->conn_lock);
3089 ASSERT(connp->conn_state_flags & CONN_CLOSING);
3090 while (connp->conn_state_flags & CONN_UPDATE_ILL)
3091 cv_wait(&connp->conn_cv, &connp->conn_lock);
3092 mutex_exit(&connp->conn_lock);
3094 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3095 ilg = connp->conn_ilg;
3096 held_ilg = NULL;
3097 while (ilg != NULL) {
3098 if (ilg->ilg_condemned) {
3099 ilg = ilg->ilg_next;
3100 continue;
3102 /* If the ilg is detached then no need to serialize */
3103 if (ilg->ilg_ilm == NULL) {
3104 next_ilg = ilg->ilg_next;
3105 ilg_delete(connp, ilg, NULL);
3106 ilg = next_ilg;
3107 continue;
3109 ill = ilg->ilg_ilm->ilm_ill;
3112 * In order to serialize on the ill we try to enter
3113 * and if that fails we unlock and relock and then
3114 * check that we still have an ilm.
3116 need_refrele = B_FALSE;
3117 if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3118 ill_refhold(ill);
3119 need_refrele = B_TRUE;
3120 ilg_refhold(ilg);
3121 if (held_ilg != NULL)
3122 ilg_refrele(held_ilg);
3123 held_ilg = ilg;
3124 rw_exit(&connp->conn_ilg_lock);
3125 mutex_enter(&ill->ill_mcast_serializer);
3126 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3127 if (ilg->ilg_condemned) {
3128 ilg = ilg->ilg_next;
3129 goto next;
3132 ilm = ilg->ilg_ilm;
3133 ilg->ilg_ilm = NULL;
3134 next_ilg = ilg->ilg_next;
3135 ilg_delete(connp, ilg, NULL);
3136 ilg = next_ilg;
3137 rw_exit(&connp->conn_ilg_lock);
3139 if (ilm != NULL)
3140 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3142 next:
3143 mutex_exit(&ill->ill_mcast_serializer);
3145 * Now that all locks have been dropped, we can send any
3146 * deferred/queued DLPI or IP packets
3148 ill_mcast_send_queued(ill);
3149 ill_dlpi_send_queued(ill);
3150 if (need_refrele) {
3151 /* Drop ill reference while we hold no locks */
3152 ill_refrele(ill);
3154 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3156 if (held_ilg != NULL)
3157 ilg_refrele(held_ilg);
3158 rw_exit(&connp->conn_ilg_lock);
3162 * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
3163 * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock.
3165 static void
3166 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
3168 ilg_stat_t ilgstat;
3169 slist_t *new_filter;
3170 int new_fmode;
3171 in6_addr_t v6group;
3172 ipaddr_t ifaddr;
3173 uint_t ifindex;
3174 ilm_t *ilm;
3175 int error = 0;
3177 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3179 * Alloc buffer to copy new state into (see below) before
3180 * we make any changes, so we can bail if it fails.
3182 if ((new_filter = l_alloc()) == NULL)
3183 return;
3186 * Save copy of ilg's filter state to pass to other functions, so
3187 * we can release conn_ilg_lock now.
3188 * Set ilg_ill so that an unplumb can find us.
3190 new_fmode = ilg->ilg_fmode;
3191 l_copy(ilg->ilg_filter, new_filter);
3192 v6group = ilg->ilg_v6group;
3193 ifaddr = ilg->ilg_ifaddr;
3194 ifindex = ilg->ilg_ifindex;
3195 ilgstat = ILGSTAT_NEW;
3197 ilg->ilg_ill = ill;
3198 ASSERT(ilg->ilg_ilm == NULL);
3199 rw_exit(&connp->conn_ilg_lock);
3201 ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
3202 new_fmode, new_filter, &error);
3203 l_free(new_filter);
3205 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3207 * Must look up the ilg again since we've not been holding
3208 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
3209 * having called conn_update_ill, which can run once we dropped the
3210 * conn_ilg_lock above. Alternatively, the ilg could have been attached
3211 * when the lock was dropped
3213 ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
3214 if (ilg == NULL || ilg->ilg_ilm != NULL) {
3215 if (ilm != NULL) {
3216 rw_exit(&connp->conn_ilg_lock);
3217 (void) ip_delmulti_serial(ilm, B_FALSE,
3218 (ilgstat == ILGSTAT_NEW));
3219 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3221 return;
3223 if (ilm == NULL) {
3224 ilg->ilg_ill = NULL;
3225 return;
3227 ilg->ilg_ilm = ilm;
3228 ilm->ilm_ifaddr = ifaddr; /* For netstat */
3232 * Called when an ill is unplumbed to make sure that there are no
3233 * dangling conn references to that ill. In that case ill is non-NULL and
3234 * we make sure we remove all references to it.
3235 * Also called when we should revisit the ilg_ill used for multicast
3236 * memberships, in which case ill is NULL.
3238 * conn is held by caller.
3240 * Note that ipcl_walk only walks conns that are not yet condemned.
3241 * condemned conns can't be refheld. For this reason, conn must become clean
3242 * first, i.e. it must not refer to any ill/ire and then only set
3243 * condemned flag.
3245 * We leave ixa_multicast_ifindex in place. We prefer dropping
3246 * packets instead of sending them out the wrong interface.
3248 * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
3249 * NULL) so that the application can leave it later. Also, if ilg_ifaddr and
3250 * ilg_ifindex are zero, indicating that the system should pick the interface,
3251 * then we attempt to reselect the ill and join on it.
3253 * Locking notes:
3254 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3255 * proceed with the ilm part of the delete we hold a reference on both the ill
3256 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3257 * being deleted.
3259 * Note: if this function is called when new ill/ipif's arrive or change status
3260 * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
3261 * a NULL ilg_ill to an ill/ilm.
3263 static void
3264 conn_update_ill(conn_t *connp, caddr_t arg)
3266 ill_t *ill = (ill_t *)arg;
3269 * We have to prevent ip_close/ilg_delete_all from running at
3270 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
3271 * all, and we set CONN_UPDATE_ILL. That ensures that only one of
3272 * ilg_delete_all and conn_update_ill run at a time for a given conn.
3273 * If ilg_delete_all got here first, then we have nothing to do.
3275 mutex_enter(&connp->conn_lock);
3276 if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
3277 /* Caller has to wait for ill_ilm_cnt to drop to zero */
3278 mutex_exit(&connp->conn_lock);
3279 return;
3281 connp->conn_state_flags |= CONN_UPDATE_ILL;
3282 mutex_exit(&connp->conn_lock);
3284 if (ill != NULL)
3285 ilg_check_detach(connp, ill);
3287 ilg_check_reattach(connp, ill);
3289 /* Do we need to wake up a thread in ilg_delete_all? */
3290 mutex_enter(&connp->conn_lock);
3291 connp->conn_state_flags &= ~CONN_UPDATE_ILL;
3292 if (connp->conn_state_flags & CONN_CLOSING)
3293 cv_broadcast(&connp->conn_cv);
3294 mutex_exit(&connp->conn_lock);
3297 /* Detach from an ill that is going away */
3298 static void
3299 ilg_check_detach(conn_t *connp, ill_t *ill)
3301 char group_buf[INET6_ADDRSTRLEN];
3302 ilg_t *ilg, *held_ilg;
3303 ilm_t *ilm;
3305 mutex_enter(&ill->ill_mcast_serializer);
3306 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3307 held_ilg = NULL;
3308 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3309 if (ilg->ilg_condemned)
3310 continue;
3312 if (ilg->ilg_ill != ill)
3313 continue;
3315 /* Detach from current ill */
3316 ip1dbg(("ilg_check_detach: detach %s on %s\n",
3317 inet_ntop(AF_INET6, &ilg->ilg_v6group,
3318 group_buf, sizeof (group_buf)),
3319 ilg->ilg_ill->ill_name));
3321 /* Detach this ilg from the ill/ilm */
3322 ilm = ilg->ilg_ilm;
3323 ilg->ilg_ilm = NULL;
3324 ilg->ilg_ill = NULL;
3325 if (ilm == NULL)
3326 continue;
3328 /* Prevent ilg from disappearing */
3329 ilg_transfer_hold(held_ilg, ilg);
3330 held_ilg = ilg;
3331 rw_exit(&connp->conn_ilg_lock);
3333 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3334 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3336 if (held_ilg != NULL)
3337 ilg_refrele(held_ilg);
3338 rw_exit(&connp->conn_ilg_lock);
3339 mutex_exit(&ill->ill_mcast_serializer);
3341 * Now that all locks have been dropped, we can send any
3342 * deferred/queued DLPI or IP packets
3344 ill_mcast_send_queued(ill);
3345 ill_dlpi_send_queued(ill);
3349 * Check if there is a place to attach the conn_ilgs. We do this for both
3350 * detached ilgs and attached ones, since for the latter there could be
3351 * a better ill to attach them to. oill is non-null if we just detached from
3352 * that ill.
3354 static void
3355 ilg_check_reattach(conn_t *connp, ill_t *oill)
3357 ill_t *ill;
3358 char group_buf[INET6_ADDRSTRLEN];
3359 ilg_t *ilg, *held_ilg;
3360 ilm_t *ilm;
3361 zoneid_t zoneid = IPCL_ZONEID(connp);
3362 int error;
3363 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3365 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3366 held_ilg = NULL;
3367 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3368 if (ilg->ilg_condemned)
3369 continue;
3371 /* Check if the conn_ill matches what we would pick now */
3372 ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
3373 ilg->ilg_ifindex, zoneid, ipst, &error);
3376 * Make sure the ill is usable for multicast and that
3377 * we can send the DL_ADDMULTI_REQ before we create an
3378 * ilm.
3380 if (ill != NULL &&
3381 (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
3382 /* Drop locks across ill_refrele */
3383 ilg_transfer_hold(held_ilg, ilg);
3384 held_ilg = ilg;
3385 rw_exit(&connp->conn_ilg_lock);
3386 ill_refrele(ill);
3387 ill = NULL;
3388 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3389 /* Note that ilg could have become condemned */
3393 * Is the ill unchanged, even if both are NULL?
3394 * Did we just detach from that ill?
3396 if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) {
3397 if (ill != NULL) {
3398 /* Drop locks across ill_refrele */
3399 ilg_transfer_hold(held_ilg, ilg);
3400 held_ilg = ilg;
3401 rw_exit(&connp->conn_ilg_lock);
3402 ill_refrele(ill);
3403 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3405 continue;
3408 /* Something changed; detach from old first if needed */
3409 if (ilg->ilg_ill != NULL) {
3410 ill_t *ill2 = ilg->ilg_ill;
3411 boolean_t need_refrele = B_FALSE;
3414 * In order to serialize on the ill we try to enter
3415 * and if that fails we unlock and relock.
3417 if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
3418 ill_refhold(ill2);
3419 need_refrele = B_TRUE;
3420 ilg_transfer_hold(held_ilg, ilg);
3421 held_ilg = ilg;
3422 rw_exit(&connp->conn_ilg_lock);
3423 mutex_enter(&ill2->ill_mcast_serializer);
3424 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3425 /* Note that ilg could have become condemned */
3428 * Check that nobody else re-attached the ilg while we
3429 * dropped the lock.
3431 if (ilg->ilg_ill == ill2) {
3432 ASSERT(!ilg->ilg_condemned);
3433 /* Detach from current ill */
3434 ip1dbg(("conn_check_reattach: detach %s/%s\n",
3435 inet_ntop(AF_INET6, &ilg->ilg_v6group,
3436 group_buf, sizeof (group_buf)),
3437 ill2->ill_name));
3439 ilm = ilg->ilg_ilm;
3440 ilg->ilg_ilm = NULL;
3441 ilg->ilg_ill = NULL;
3442 } else {
3443 ilm = NULL;
3445 ilg_transfer_hold(held_ilg, ilg);
3446 held_ilg = ilg;
3447 rw_exit(&connp->conn_ilg_lock);
3448 if (ilm != NULL)
3449 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3450 mutex_exit(&ill2->ill_mcast_serializer);
3452 * Now that all locks have been dropped, we can send any
3453 * deferred/queued DLPI or IP packets
3455 ill_mcast_send_queued(ill2);
3456 ill_dlpi_send_queued(ill2);
3457 if (need_refrele) {
3458 /* Drop ill reference while we hold no locks */
3459 ill_refrele(ill2);
3461 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3463 * While we dropped conn_ilg_lock some other thread
3464 * could have attached this ilg, thus we check again.
3466 if (ilg->ilg_ill != NULL) {
3467 if (ill != NULL) {
3468 /* Drop locks across ill_refrele */
3469 ilg_transfer_hold(held_ilg, ilg);
3470 held_ilg = ilg;
3471 rw_exit(&connp->conn_ilg_lock);
3472 ill_refrele(ill);
3473 rw_enter(&connp->conn_ilg_lock,
3474 RW_WRITER);
3476 continue;
3479 if (ill != NULL) {
3481 * In order to serialize on the ill we try to enter
3482 * and if that fails we unlock and relock.
3484 if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3485 /* Already have a refhold on ill */
3486 ilg_transfer_hold(held_ilg, ilg);
3487 held_ilg = ilg;
3488 rw_exit(&connp->conn_ilg_lock);
3489 mutex_enter(&ill->ill_mcast_serializer);
3490 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3491 /* Note that ilg could have become condemned */
3493 ilg_transfer_hold(held_ilg, ilg);
3494 held_ilg = ilg;
3496 * Check that nobody else attached the ilg and that
3497 * it wasn't condemned while we dropped the lock.
3499 if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
3501 * Attach to the new ill. Can fail in which
3502 * case ilg_ill will remain NULL. ilg_attach
3503 * drops and reacquires conn_ilg_lock.
3505 ip1dbg(("conn_check_reattach: attach %s/%s\n",
3506 inet_ntop(AF_INET6, &ilg->ilg_v6group,
3507 group_buf, sizeof (group_buf)),
3508 ill->ill_name));
3509 ilg_attach(connp, ilg, ill);
3510 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3512 /* Drop locks across ill_refrele */
3513 rw_exit(&connp->conn_ilg_lock);
3514 mutex_exit(&ill->ill_mcast_serializer);
3516 * Now that all locks have been
3517 * dropped, we can send any
3518 * deferred/queued DLPI or IP packets
3520 ill_mcast_send_queued(ill);
3521 ill_dlpi_send_queued(ill);
3522 ill_refrele(ill);
3523 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3526 if (held_ilg != NULL)
3527 ilg_refrele(held_ilg);
3528 rw_exit(&connp->conn_ilg_lock);
3532 * Called when an ill is unplumbed to make sure that there are no
3533 * dangling conn references to that ill. In that case ill is non-NULL and
3534 * we make sure we remove all references to it.
3535 * Also called when we should revisit the ilg_ill used for multicast
3536 * memberships, in which case ill is NULL.
3538 void
3539 update_conn_ill(ill_t *ill, ip_stack_t *ipst)
3541 ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);