4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
24 /* Copyright (c) 1990 Mentat Inc. */
27 * Internet Group Management Protocol (IGMP) routines.
28 * Multicast Listener Discovery Protocol (MLD) routines.
30 * Written by Steve Deering, Stanford, May 1988.
31 * Modified by Rosen Sharma, Stanford, Aug 1994.
32 * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
37 #include <sys/types.h>
38 #include <sys/stream.h>
39 #include <sys/stropts.h>
40 #include <sys/strlog.h>
41 #include <sys/strsun.h>
42 #include <sys/systm.h>
44 #include <sys/sunddi.h>
45 #include <sys/cmn_err.h>
46 #include <sys/atomic.h>
48 #include <sys/callb.h>
49 #include <sys/param.h>
50 #include <sys/socket.h>
51 #include <inet/ipclassifier.h>
53 #include <net/route.h>
54 #include <netinet/in.h>
55 #include <netinet/igmp_var.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <inet/ipsec_impl.h>
60 #include <inet/common.h>
63 #include <inet/tunables.h>
66 #include <inet/ip_multi.h>
67 #include <inet/ip_listutils.h>
69 #include <netinet/igmp.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/ip_if.h>
73 static uint_t
igmp_query_in(ipha_t
*ipha
, igmpa_t
*igmpa
, ill_t
*ill
);
74 static uint_t
igmpv3_query_in(igmp3qa_t
*igmp3qa
, ill_t
*ill
, int igmplen
);
75 static uint_t
mld_query_in(mld_hdr_t
*mldh
, ill_t
*ill
);
76 static uint_t
mldv2_query_in(mld2q_t
*mld2q
, ill_t
*ill
, int mldlen
);
77 static void igmp_sendpkt(ilm_t
*ilm
, uchar_t type
, ipaddr_t addr
);
78 static void mld_sendpkt(ilm_t
*ilm
, uchar_t type
, const in6_addr_t
*v6addr
);
79 static void igmpv3_sendrpt(ill_t
*ill
, mrec_t
*reclist
);
80 static void mldv2_sendrpt(ill_t
*ill
, mrec_t
*reclist
);
81 static mrec_t
*mcast_bldmrec(mcast_record_t type
, in6_addr_t
*grp
,
82 slist_t
*srclist
, mrec_t
*next
);
83 static void mcast_init_rtx(ill_t
*ill
, rtx_state_t
*rtxp
,
84 mcast_record_t rtype
, slist_t
*flist
);
85 static mrec_t
*mcast_merge_rtx(ilm_t
*ilm
, mrec_t
*rp
, slist_t
*flist
);
88 * Macros used to do timer len conversions. Timer values are always
89 * stored and passed to the timer functions as milliseconds; but the
90 * default values and values from the wire may not be.
92 * And yes, it's obscure, but decisecond is easier to abbreviate than
93 * "tenths of a second".
95 #define DSEC_TO_MSEC(dsec) ((dsec) * 100)
96 #define SEC_TO_MSEC(sec) ((sec) * 1000)
99 * A running timer (scheduled thru timeout) can be cancelled if another
100 * timer with a shorter timeout value is scheduled before it has timed
101 * out. When the shorter timer expires, the original timer is updated
102 * to account for the time elapsed while the shorter timer ran; but this
103 * does not take into account the amount of time already spent in timeout
104 * state before being preempted by the shorter timer, that is the time
105 * interval between time scheduled to time cancelled. This can cause
106 * delays in sending out multicast membership reports. To resolve this
107 * problem, wallclock time (absolute time) is used instead of deltas
108 * (relative time) to track timers.
110 * The MACRO below gets the lbolt value, used for proper timer scheduling
111 * and firing. Therefore multicast membership reports are sent on time.
112 * The timer does not exactly fire at the time it was scehduled to fire,
113 * there is a difference of a few milliseconds observed. An offset is used
114 * to take care of the difference.
117 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
118 #define CURRENT_OFFSET (999)
121 * The first multicast join will trigger the igmp timers / mld timers
122 * The unit for next is milliseconds.
125 igmp_start_timers(unsigned next
, ip_stack_t
*ipst
)
131 ASSERT(next
!= 0 && next
!= INFINITY
);
133 mutex_enter(&ipst
->ips_igmp_timer_lock
);
135 if (ipst
->ips_igmp_timer_setter_active
) {
137 * Serialize timer setters, one at a time. If the
138 * timer is currently being set by someone,
139 * just record the next time when it has to be
140 * invoked and return. The current setter will
143 ipst
->ips_igmp_time_to_next
=
144 MIN(ipst
->ips_igmp_time_to_next
, next
);
145 mutex_exit(&ipst
->ips_igmp_timer_lock
);
148 ipst
->ips_igmp_timer_setter_active
= B_TRUE
;
150 if (ipst
->ips_igmp_timeout_id
== 0) {
152 * The timer is inactive. We need to start a timer if we haven't
153 * been asked to quiesce.
155 ipst
->ips_igmp_time_to_next
= next
;
156 if (ipst
->ips_igmp_timer_quiesce
!= B_TRUE
) {
157 ipst
->ips_igmp_timeout_id
=
158 timeout(igmp_timeout_handler
, (void *)ipst
,
159 MSEC_TO_TICK(ipst
->ips_igmp_time_to_next
));
160 ipst
->ips_igmp_timer_scheduled_last
= ddi_get_lbolt();
162 ipst
->ips_igmp_timer_setter_active
= B_FALSE
;
163 mutex_exit(&ipst
->ips_igmp_timer_lock
);
168 * The timer was scheduled sometime back for firing in
169 * 'igmp_time_to_next' ms and is active. We need to
170 * reschedule the timeout if the new 'next' will happen
171 * earlier than the currently scheduled timeout
173 time_left
= ipst
->ips_igmp_timer_scheduled_last
+
174 MSEC_TO_TICK(ipst
->ips_igmp_time_to_next
) - ddi_get_lbolt();
175 if (time_left
< MSEC_TO_TICK(next
)) {
176 ipst
->ips_igmp_timer_setter_active
= B_FALSE
;
177 mutex_exit(&ipst
->ips_igmp_timer_lock
);
180 id
= ipst
->ips_igmp_timeout_id
;
182 mutex_exit(&ipst
->ips_igmp_timer_lock
);
184 mutex_enter(&ipst
->ips_igmp_timer_lock
);
186 * The timeout was cancelled, or the timeout handler
187 * completed, while we were blocked in the untimeout.
188 * No other thread could have set the timer meanwhile
189 * since we serialized all the timer setters. Thus
190 * no timer is currently active nor executing nor will
191 * any timer fire in the future. We start the timer now
195 ASSERT(ipst
->ips_igmp_timeout_id
== 0);
197 ASSERT(ipst
->ips_igmp_timeout_id
!= 0);
198 ipst
->ips_igmp_timeout_id
= 0;
200 if (ipst
->ips_igmp_time_to_next
!= 0 &&
201 ipst
->ips_igmp_timer_quiesce
!= B_TRUE
) {
202 ipst
->ips_igmp_time_to_next
=
203 MIN(ipst
->ips_igmp_time_to_next
, next
);
204 ipst
->ips_igmp_timeout_id
= timeout(igmp_timeout_handler
,
205 (void *)ipst
, MSEC_TO_TICK(ipst
->ips_igmp_time_to_next
));
206 ipst
->ips_igmp_timer_scheduled_last
= ddi_get_lbolt();
208 ipst
->ips_igmp_timer_setter_active
= B_FALSE
;
209 mutex_exit(&ipst
->ips_igmp_timer_lock
);
214 * The unit for next is milliseconds.
217 mld_start_timers(unsigned next
, ip_stack_t
*ipst
)
223 ASSERT(next
!= 0 && next
!= INFINITY
);
225 mutex_enter(&ipst
->ips_mld_timer_lock
);
226 if (ipst
->ips_mld_timer_setter_active
) {
228 * Serialize timer setters, one at a time. If the
229 * timer is currently being set by someone,
230 * just record the next time when it has to be
231 * invoked and return. The current setter will
234 ipst
->ips_mld_time_to_next
=
235 MIN(ipst
->ips_mld_time_to_next
, next
);
236 mutex_exit(&ipst
->ips_mld_timer_lock
);
239 ipst
->ips_mld_timer_setter_active
= B_TRUE
;
241 if (ipst
->ips_mld_timeout_id
== 0) {
243 * The timer is inactive. We need to start a timer, if we
244 * haven't been asked to quiesce.
246 ipst
->ips_mld_time_to_next
= next
;
247 if (ipst
->ips_mld_timer_quiesce
!= B_TRUE
) {
248 ipst
->ips_mld_timeout_id
= timeout(mld_timeout_handler
,
250 MSEC_TO_TICK(ipst
->ips_mld_time_to_next
));
251 ipst
->ips_mld_timer_scheduled_last
= ddi_get_lbolt();
253 ipst
->ips_mld_timer_setter_active
= B_FALSE
;
254 mutex_exit(&ipst
->ips_mld_timer_lock
);
259 * The timer was scheduled sometime back for firing in
260 * 'igmp_time_to_next' ms and is active. We need to
261 * reschedule the timeout if the new 'next' will happen
262 * earlier than the currently scheduled timeout
264 time_left
= ipst
->ips_mld_timer_scheduled_last
+
265 MSEC_TO_TICK(ipst
->ips_mld_time_to_next
) - ddi_get_lbolt();
266 if (time_left
< MSEC_TO_TICK(next
)) {
267 ipst
->ips_mld_timer_setter_active
= B_FALSE
;
268 mutex_exit(&ipst
->ips_mld_timer_lock
);
271 id
= ipst
->ips_mld_timeout_id
;
273 mutex_exit(&ipst
->ips_mld_timer_lock
);
275 mutex_enter(&ipst
->ips_mld_timer_lock
);
277 * The timeout was cancelled, or the timeout handler
278 * completed, while we were blocked in the untimeout.
279 * No other thread could have set the timer meanwhile
280 * since we serialized all the timer setters. Thus
281 * no timer is currently active nor executing nor will
282 * any timer fire in the future. We start the timer now
286 ASSERT(ipst
->ips_mld_timeout_id
== 0);
288 ASSERT(ipst
->ips_mld_timeout_id
!= 0);
289 ipst
->ips_mld_timeout_id
= 0;
291 if (ipst
->ips_mld_time_to_next
!= 0 &&
292 ipst
->ips_mld_timer_quiesce
== B_FALSE
) {
293 ipst
->ips_mld_time_to_next
=
294 MIN(ipst
->ips_mld_time_to_next
, next
);
295 ipst
->ips_mld_timeout_id
= timeout(mld_timeout_handler
,
296 (void *)ipst
, MSEC_TO_TICK(ipst
->ips_mld_time_to_next
));
297 ipst
->ips_mld_timer_scheduled_last
= ddi_get_lbolt();
299 ipst
->ips_mld_timer_setter_active
= B_FALSE
;
300 mutex_exit(&ipst
->ips_mld_timer_lock
);
305 * Return NULL for a bad packet that is discarded here.
306 * Return mp if the message is OK and should be handed to "raw" receivers.
307 * Callers of igmp_input() may need to reinitialize variables that were copied
308 * from the mblk as this calls pullupmsg().
311 igmp_input(mblk_t
*mp
, ip_recv_attr_t
*ira
)
314 ipha_t
*ipha
= (ipha_t
*)(mp
->b_rptr
);
315 int iphlen
, igmplen
, mblklen
;
322 ill_t
*ill
= ira
->ira_ill
;
323 ip_stack_t
*ipst
= ill
->ill_ipst
;
325 ASSERT(!ill
->ill_isv6
);
326 ++ipst
->ips_igmpstat
.igps_rcv_total
;
329 iphlen
= ira
->ira_ip_hdr_length
;
330 if (mblklen
< 1 || mblklen
< iphlen
) {
331 ++ipst
->ips_igmpstat
.igps_rcv_tooshort
;
334 igmplen
= ira
->ira_pktlen
- iphlen
;
336 * Since msg sizes are more variable with v3, just pullup the
339 if (MBLKL(mp
) < (igmplen
+ iphlen
)) {
341 if ((mp1
= msgpullup(mp
, -1)) == NULL
) {
342 ++ipst
->ips_igmpstat
.igps_rcv_tooshort
;
347 ipha
= (ipha_t
*)(mp
->b_rptr
);
353 if (igmplen
< IGMP_MINLEN
) {
354 ++ipst
->ips_igmpstat
.igps_rcv_tooshort
;
358 igmpa
= (igmpa_t
*)(&mp
->b_rptr
[iphlen
]);
359 src
= ipha
->ipha_src
;
360 dst
= ipha
->ipha_dst
;
362 (void) mi_strlog(ill
->ill_rq
, 1, SL_TRACE
,
363 "igmp_input: src 0x%x, dst 0x%x on %s\n",
364 (int)ntohl(src
), (int)ntohl(dst
),
367 switch (igmpa
->igmpa_type
) {
368 case IGMP_MEMBERSHIP_QUERY
:
370 * packet length differentiates between v1/v2 and v3
371 * v1/v2 should be exactly 8 octets long; v3 is >= 12
373 if ((igmplen
== IGMP_MINLEN
) ||
374 (ipst
->ips_igmp_max_version
<= IGMP_V2_ROUTER
)) {
375 next
= igmp_query_in(ipha
, igmpa
, ill
);
376 } else if (igmplen
>= IGMP_V3_QUERY_MINLEN
) {
377 next
= igmpv3_query_in((igmp3qa_t
*)igmpa
, ill
,
380 ++ipst
->ips_igmpstat
.igps_rcv_tooshort
;
386 if (next
!= INFINITY
)
387 igmp_start_timers(next
, ipst
);
391 case IGMP_V1_MEMBERSHIP_REPORT
:
392 case IGMP_V2_MEMBERSHIP_REPORT
:
394 * For fast leave to work, we have to know that we are the
395 * last person to send a report for this group. Reports
396 * generated by us are looped back since we could potentially
397 * be a multicast router, so discard reports sourced by me.
399 mutex_enter(&ill
->ill_lock
);
400 for (ipif
= ill
->ill_ipif
; ipif
!= NULL
;
401 ipif
= ipif
->ipif_next
) {
402 if (ipif
->ipif_lcl_addr
== src
) {
404 (void) mi_strlog(ill
->ill_rq
,
407 "igmp_input: we are only "
411 mutex_exit(&ill
->ill_lock
);
415 mutex_exit(&ill
->ill_lock
);
417 ++ipst
->ips_igmpstat
.igps_rcv_reports
;
418 group
= igmpa
->igmpa_group
;
419 if (!CLASSD(group
)) {
420 ++ipst
->ips_igmpstat
.igps_rcv_badreports
;
425 * KLUDGE: if the IP source address of the report has an
426 * unspecified (i.e., zero) subnet number, as is allowed for
427 * a booting host, replace it with the correct subnet number
428 * so that a process-level multicast routing demon can
429 * determine which subnet it arrived from. This is necessary
430 * to compensate for the lack of any way for a process to
431 * determine the arrival interface of an incoming packet.
433 * Requires that a copy of *this* message it passed up
434 * to the raw interface which is done by our caller.
436 if ((src
& htonl(0xFF000000U
)) == 0) { /* Minimum net mask */
437 /* Pick the first ipif on this ill */
438 mutex_enter(&ill
->ill_lock
);
439 src
= ill
->ill_ipif
->ipif_subnet
;
440 mutex_exit(&ill
->ill_lock
);
441 ip1dbg(("igmp_input: changed src to 0x%x\n",
443 ipha
->ipha_src
= src
;
447 * If our ill has ILMs that belong to the group being
448 * reported, and we are a 'Delaying Member' in the RFC
449 * terminology, stop our timer for that group and 'clear
450 * flag' i.e. mark as IGMP_OTHERMEMBER.
452 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
453 IN6_IPADDR_TO_V4MAPPED(group
, &v6group
);
454 for (ilm
= ill
->ill_ilm
; ilm
; ilm
= ilm
->ilm_next
) {
455 if (!IN6_ARE_ADDR_EQUAL(&ilm
->ilm_v6addr
, &v6group
))
458 ++ipst
->ips_igmpstat
.igps_rcv_ourreports
;
459 ilm
->ilm_timer
= INFINITY
;
460 ilm
->ilm_state
= IGMP_OTHERMEMBER
;
462 rw_exit(&ill
->ill_mcast_lock
);
463 ill_mcast_timer_start(ill
->ill_ipst
);
466 case IGMP_V3_MEMBERSHIP_REPORT
:
468 * Currently nothing to do here; IGMP router is not
469 * implemented in ip, and v3 hosts don't pay attention
470 * to membership reports.
475 * Pass all valid IGMP packets up to any process(es) listening
476 * on a raw IGMP socket. Do not free the packet.
486 igmp_query_in(ipha_t
*ipha
, igmpa_t
*igmpa
, ill_t
*ill
)
490 uint_t next
, current
;
493 ipst
= ill
->ill_ipst
;
494 ++ipst
->ips_igmpstat
.igps_rcv_queries
;
496 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
498 * In the IGMPv2 specification, there are 3 states and a flag.
500 * In Non-Member state, we simply don't have a membership record.
501 * In Delaying Member state, our timer is running (ilm->ilm_timer
502 * < INFINITY). In Idle Member state, our timer is not running
503 * (ilm->ilm_timer == INFINITY).
505 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
506 * we have heard a report from another member, or IGMP_IREPORTEDLAST
507 * if I sent the last report.
509 if ((igmpa
->igmpa_code
== 0) ||
510 (ipst
->ips_igmp_max_version
== IGMP_V1_ROUTER
)) {
512 * Query from an old router.
513 * Remember that the querier on this interface is old,
514 * and set the timer to the value in RFC 1112.
516 ill
->ill_mcast_v1_time
= 0;
517 ill
->ill_mcast_v1_tset
= 1;
518 if (ill
->ill_mcast_type
!= IGMP_V1_ROUTER
) {
519 ip1dbg(("Received IGMPv1 Query on %s, switching mode "
520 "to IGMP_V1_ROUTER\n", ill
->ill_name
));
521 atomic_inc_16(&ill
->ill_ifptr
->illif_mcast_v1
);
522 ill
->ill_mcast_type
= IGMP_V1_ROUTER
;
525 timer
= SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY
);
527 if (ipha
->ipha_dst
!= htonl(INADDR_ALLHOSTS_GROUP
) ||
528 igmpa
->igmpa_group
!= 0) {
529 ++ipst
->ips_igmpstat
.igps_rcv_badqueries
;
530 rw_exit(&ill
->ill_mcast_lock
);
531 ill_mcast_timer_start(ill
->ill_ipst
);
539 * Query from a new router
540 * Simply do a validity check
542 group
= igmpa
->igmpa_group
;
543 if (group
!= 0 && (!CLASSD(group
))) {
544 ++ipst
->ips_igmpstat
.igps_rcv_badqueries
;
545 rw_exit(&ill
->ill_mcast_lock
);
546 ill_mcast_timer_start(ill
->ill_ipst
);
551 * Switch interface state to v2 on receipt of a v2 query
552 * ONLY IF current state is v3. Let things be if current
553 * state if v1 but do reset the v2-querier-present timer.
555 if (ill
->ill_mcast_type
== IGMP_V3_ROUTER
) {
556 ip1dbg(("Received IGMPv2 Query on %s, switching mode "
557 "to IGMP_V2_ROUTER", ill
->ill_name
));
558 atomic_inc_16(&ill
->ill_ifptr
->illif_mcast_v2
);
559 ill
->ill_mcast_type
= IGMP_V2_ROUTER
;
561 ill
->ill_mcast_v2_time
= 0;
562 ill
->ill_mcast_v2_tset
= 1;
564 timer
= DSEC_TO_MSEC((int)igmpa
->igmpa_code
);
568 (void) mi_strlog(ill
->ill_rq
, 1, SL_TRACE
,
569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
570 (int)ntohs(igmpa
->igmpa_code
),
571 (int)ntohs(igmpa
->igmpa_type
));
575 * -Start the timers in all of our membership records
576 * for the physical interface on which the query
577 * arrived, excluding those that belong to the "all
578 * hosts" group (224.0.0.1).
580 * -Restart any timer that is already running but has
581 * a value longer than the requested timeout.
583 * -Use the value specified in the query message as
584 * the maximum timeout.
586 next
= (unsigned)INFINITY
;
588 current
= CURRENT_MSTIME
;
589 for (ilm
= ill
->ill_ilm
; ilm
; ilm
= ilm
->ilm_next
) {
592 * A multicast router joins INADDR_ANY address
593 * to enable promiscuous reception of all
594 * mcasts from the interface. This INADDR_ANY
595 * is stored in the ilm_v6addr as V6 unspec addr
597 if (!IN6_IS_ADDR_V4MAPPED(&ilm
->ilm_v6addr
))
599 if (ilm
->ilm_addr
== htonl(INADDR_ANY
))
601 if (ilm
->ilm_addr
!= htonl(INADDR_ALLHOSTS_GROUP
) &&
602 (igmpa
->igmpa_group
== 0) ||
603 (igmpa
->igmpa_group
== ilm
->ilm_addr
)) {
604 if (ilm
->ilm_timer
> timer
) {
605 MCAST_RANDOM_DELAY(ilm
->ilm_timer
, timer
);
606 if (ilm
->ilm_timer
< next
)
607 next
= ilm
->ilm_timer
;
608 ilm
->ilm_timer
+= current
;
612 rw_exit(&ill
->ill_mcast_lock
);
614 * No packets have been sent above - no
615 * ill_mcast_send_queued is needed.
617 ill_mcast_timer_start(ill
->ill_ipst
);
623 igmpv3_query_in(igmp3qa_t
*igmp3qa
, ill_t
*ill
, int igmplen
)
625 uint_t i
, next
, mrd
, qqi
, timer
, delay
, numsrc
;
632 ipst
= ill
->ill_ipst
;
633 /* make sure numsrc matches packet size */
634 numsrc
= ntohs(igmp3qa
->igmp3qa_numsrc
);
635 if (igmplen
< IGMP_V3_QUERY_MINLEN
+ (numsrc
* sizeof (ipaddr_t
))) {
636 ++ipst
->ips_igmpstat
.igps_rcv_tooshort
;
639 src_array
= (ipaddr_t
*)&igmp3qa
[1];
641 ++ipst
->ips_igmpstat
.igps_rcv_queries
;
643 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
645 if ((mrd
= (uint_t
)igmp3qa
->igmp3qa_mxrc
) >= IGMP_V3_MAXRT_FPMIN
) {
646 uint_t hdrval
, mant
, exp
;
647 hdrval
= (uint_t
)igmp3qa
->igmp3qa_mxrc
;
648 mant
= hdrval
& IGMP_V3_MAXRT_MANT_MASK
;
649 exp
= (hdrval
& IGMP_V3_MAXRT_EXP_MASK
) >> 4;
650 mrd
= (mant
| 0x10) << (exp
+ 3);
653 mrd
= MCAST_DEF_QUERY_RESP_INTERVAL
;
654 timer
= DSEC_TO_MSEC(mrd
);
655 MCAST_RANDOM_DELAY(delay
, timer
);
656 next
= (unsigned)INFINITY
;
657 current
= CURRENT_MSTIME
;
659 if ((qrv
= igmp3qa
->igmp3qa_sqrv
& IGMP_V3_RV_MASK
) == 0)
660 ill
->ill_mcast_rv
= MCAST_DEF_ROBUSTNESS
;
662 ill
->ill_mcast_rv
= qrv
;
664 if ((qqi
= (uint_t
)igmp3qa
->igmp3qa_qqic
) >= IGMP_V3_QQI_FPMIN
) {
665 uint_t hdrval
, mant
, exp
;
666 hdrval
= (uint_t
)igmp3qa
->igmp3qa_qqic
;
667 mant
= hdrval
& IGMP_V3_QQI_MANT_MASK
;
668 exp
= (hdrval
& IGMP_V3_QQI_EXP_MASK
) >> 4;
669 qqi
= (mant
| 0x10) << (exp
+ 3);
671 ill
->ill_mcast_qi
= (qqi
== 0) ? MCAST_DEF_QUERY_INTERVAL
: qqi
;
674 * If we have a pending general query response that's scheduled
675 * sooner than the delay we calculated for this response, then
676 * no action is required (RFC3376 section 5.2 rule 1)
678 if (ill
->ill_global_timer
< (current
+ delay
)) {
679 rw_exit(&ill
->ill_mcast_lock
);
680 ill_mcast_timer_start(ill
->ill_ipst
);
685 * Now take action depending upon query type:
686 * general, group specific, or group/source specific.
688 if ((numsrc
== 0) && (igmp3qa
->igmp3qa_group
== INADDR_ANY
)) {
691 * We know global timer is either not running or is
692 * greater than our calculated delay, so reset it to
693 * our delay (random value in range [0, response time]).
695 ill
->ill_global_timer
= current
+ delay
;
698 /* group or group/source specific query */
699 for (ilm
= ill
->ill_ilm
; ilm
; ilm
= ilm
->ilm_next
) {
700 if (!IN6_IS_ADDR_V4MAPPED(&ilm
->ilm_v6addr
) ||
701 (ilm
->ilm_addr
== htonl(INADDR_ANY
)) ||
702 (ilm
->ilm_addr
== htonl(INADDR_ALLHOSTS_GROUP
)) ||
703 (igmp3qa
->igmp3qa_group
!= ilm
->ilm_addr
))
706 * If the query is group specific or we have a
707 * pending group specific query, the response is
708 * group specific (pending sources list should be
709 * empty). Otherwise, need to update the pending
710 * sources list for the group and source specific
713 if (numsrc
== 0 || (ilm
->ilm_timer
< INFINITY
&&
714 SLIST_IS_EMPTY(ilm
->ilm_pendsrcs
))) {
716 FREE_SLIST(ilm
->ilm_pendsrcs
);
717 ilm
->ilm_pendsrcs
= NULL
;
721 if (numsrc
> MAX_FILTER_SIZE
||
722 (ilm
->ilm_pendsrcs
== NULL
&&
723 (ilm
->ilm_pendsrcs
= l_alloc()) == NULL
)) {
725 * We've been sent more sources than
726 * we can deal with; or we can't deal
727 * with a source list at all. Revert
728 * to a group specific query.
732 if ((pktl
= l_alloc()) == NULL
)
734 pktl
->sl_numsrc
= numsrc
;
735 for (i
= 0; i
< numsrc
; i
++)
736 IN6_IPADDR_TO_V4MAPPED(src_array
[i
],
737 &(pktl
->sl_addr
[i
]));
738 l_union_in_a(ilm
->ilm_pendsrcs
, pktl
,
745 ilm
->ilm_timer
= (ilm
->ilm_timer
== INFINITY
) ?
746 INFINITY
: (ilm
->ilm_timer
- current
);
747 /* choose soonest timer */
748 ilm
->ilm_timer
= MIN(ilm
->ilm_timer
, delay
);
749 if (ilm
->ilm_timer
< next
)
750 next
= ilm
->ilm_timer
;
751 ilm
->ilm_timer
+= current
;
754 rw_exit(&ill
->ill_mcast_lock
);
756 * No packets have been sent above - no
757 * ill_mcast_send_queued is needed.
759 ill_mcast_timer_start(ill
->ill_ipst
);
765 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
766 * and it gets sent after the lock is dropped.
769 igmp_joingroup(ilm_t
*ilm
)
773 ip_stack_t
*ipst
= ilm
->ilm_ipst
;
777 ASSERT(!ill
->ill_isv6
);
778 ASSERT(RW_WRITE_HELD(&ill
->ill_mcast_lock
));
780 if (ilm
->ilm_addr
== htonl(INADDR_ALLHOSTS_GROUP
)) {
781 ilm
->ilm_rtx
.rtx_timer
= INFINITY
;
782 ilm
->ilm_state
= IGMP_OTHERMEMBER
;
784 ip1dbg(("Querier mode %d, sending report, group %x\n",
785 ill
->ill_mcast_type
, htonl(ilm
->ilm_addr
)));
786 if (ill
->ill_mcast_type
== IGMP_V1_ROUTER
) {
787 igmp_sendpkt(ilm
, IGMP_V1_MEMBERSHIP_REPORT
, 0);
788 } else if (ill
->ill_mcast_type
== IGMP_V2_ROUTER
) {
789 igmp_sendpkt(ilm
, IGMP_V2_MEMBERSHIP_REPORT
, 0);
790 } else if (ill
->ill_mcast_type
== IGMP_V3_ROUTER
) {
792 mcast_record_t rtype
;
794 * The possible state changes we need to handle here:
795 * Old State New State Report
797 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
798 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
800 * No need to send the BLOCK(0) report; ALLOW(X)
803 rtype
= (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) ?
804 ALLOW_NEW_SOURCES
: CHANGE_TO_EXCLUDE
;
805 rp
= mcast_bldmrec(rtype
, &ilm
->ilm_v6addr
,
806 ilm
->ilm_filter
, NULL
);
807 igmpv3_sendrpt(ill
, rp
);
809 * Set up retransmission state. Timer is set below,
810 * for both v3 and older versions.
812 mcast_init_rtx(ill
, &ilm
->ilm_rtx
, rtype
,
816 /* Set the ilm timer value */
817 ilm
->ilm_rtx
.rtx_cnt
= ill
->ill_mcast_rv
;
818 MCAST_RANDOM_DELAY(ilm
->ilm_rtx
.rtx_timer
,
819 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY
));
820 timer
= ilm
->ilm_rtx
.rtx_timer
;
821 ilm
->ilm_rtx
.rtx_timer
+= CURRENT_MSTIME
;
822 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
825 * We are holding ill_mcast_lock here and the timeout
826 * handler (igmp_timeout_handler_per_ill) acquires that
827 * lock. Hence we can't call igmp_start_timers since it could
828 * deadlock in untimeout().
829 * Instead the thread which drops ill_mcast_lock will have
830 * to call ill_mcast_timer_start().
832 mutex_enter(&ipst
->ips_igmp_timer_lock
);
833 ipst
->ips_igmp_deferred_next
= MIN(timer
,
834 ipst
->ips_igmp_deferred_next
);
835 mutex_exit(&ipst
->ips_igmp_timer_lock
);
839 (void) mi_strlog(ilm
->ilm_ill
->ill_rq
, 1, SL_TRACE
,
840 "igmp_joingroup: multicast_type %d timer %d",
841 (ilm
->ilm_ill
->ill_mcast_type
),
847 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
848 * and it gets sent after the lock is dropped.
851 mld_joingroup(ilm_t
*ilm
)
855 ip_stack_t
*ipst
= ilm
->ilm_ipst
;
859 ASSERT(ill
->ill_isv6
);
861 ASSERT(RW_WRITE_HELD(&ill
->ill_mcast_lock
));
863 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast
, &ilm
->ilm_v6addr
)) {
864 ilm
->ilm_rtx
.rtx_timer
= INFINITY
;
865 ilm
->ilm_state
= IGMP_OTHERMEMBER
;
867 if (ill
->ill_mcast_type
== MLD_V1_ROUTER
) {
868 mld_sendpkt(ilm
, MLD_LISTENER_REPORT
, NULL
);
871 mcast_record_t rtype
;
873 * The possible state changes we need to handle here:
874 * Old State New State Report
876 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
877 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
879 * No need to send the BLOCK(0) report; ALLOW(X)
882 rtype
= (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) ?
883 ALLOW_NEW_SOURCES
: CHANGE_TO_EXCLUDE
;
884 rp
= mcast_bldmrec(rtype
, &ilm
->ilm_v6addr
,
885 ilm
->ilm_filter
, NULL
);
886 mldv2_sendrpt(ill
, rp
);
888 * Set up retransmission state. Timer is set below,
889 * for both v2 and v1.
891 mcast_init_rtx(ill
, &ilm
->ilm_rtx
, rtype
,
895 /* Set the ilm timer value */
896 ASSERT(ill
->ill_mcast_type
!= MLD_V2_ROUTER
||
897 ilm
->ilm_rtx
.rtx_cnt
> 0);
899 ilm
->ilm_rtx
.rtx_cnt
= ill
->ill_mcast_rv
;
900 MCAST_RANDOM_DELAY(ilm
->ilm_rtx
.rtx_timer
,
901 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY
));
902 timer
= ilm
->ilm_rtx
.rtx_timer
;
903 ilm
->ilm_rtx
.rtx_timer
+= CURRENT_MSTIME
;
904 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
907 * We are holding ill_mcast_lock here and the timeout
908 * handler (mld_timeout_handler_per_ill) acquires that
909 * lock. Hence we can't call mld_start_timers since it could
910 * deadlock in untimeout().
911 * Instead the thread which drops ill_mcast_lock will have
912 * to call ill_mcast_timer_start().
914 mutex_enter(&ipst
->ips_mld_timer_lock
);
915 ipst
->ips_mld_deferred_next
= MIN(timer
,
916 ipst
->ips_mld_deferred_next
);
917 mutex_exit(&ipst
->ips_mld_timer_lock
);
921 (void) mi_strlog(ilm
->ilm_ill
->ill_rq
, 1, SL_TRACE
,
922 "mld_joingroup: multicast_type %d timer %d",
923 (ilm
->ilm_ill
->ill_mcast_type
),
929 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
930 * and it gets sent after the lock is dropped.
933 igmp_leavegroup(ilm_t
*ilm
)
935 ill_t
*ill
= ilm
->ilm_ill
;
937 ASSERT(!ill
->ill_isv6
);
939 ASSERT(RW_WRITE_HELD(&ill
->ill_mcast_lock
));
940 if (ilm
->ilm_state
== IGMP_IREPORTEDLAST
&&
941 ill
->ill_mcast_type
== IGMP_V2_ROUTER
&&
942 (ilm
->ilm_addr
!= htonl(INADDR_ALLHOSTS_GROUP
))) {
943 igmp_sendpkt(ilm
, IGMP_V2_LEAVE_GROUP
,
944 (htonl(INADDR_ALLRTRS_GROUP
)));
947 if ((ill
->ill_mcast_type
== IGMP_V3_ROUTER
) &&
948 (ilm
->ilm_addr
!= htonl(INADDR_ALLHOSTS_GROUP
))) {
951 * The possible state changes we need to handle here:
952 * Old State New State Report
954 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
955 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
957 * No need to send the ALLOW(0) report; BLOCK(X) is enough
959 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
960 rp
= mcast_bldmrec(BLOCK_OLD_SOURCES
, &ilm
->ilm_v6addr
,
961 ilm
->ilm_filter
, NULL
);
963 rp
= mcast_bldmrec(CHANGE_TO_INCLUDE
, &ilm
->ilm_v6addr
,
966 igmpv3_sendrpt(ill
, rp
);
972 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
973 * and it gets sent after the lock is dropped.
976 mld_leavegroup(ilm_t
*ilm
)
978 ill_t
*ill
= ilm
->ilm_ill
;
980 ASSERT(ill
->ill_isv6
);
982 ASSERT(RW_WRITE_HELD(&ill
->ill_mcast_lock
));
983 if (ilm
->ilm_state
== IGMP_IREPORTEDLAST
&&
984 ill
->ill_mcast_type
== MLD_V1_ROUTER
&&
985 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast
, &ilm
->ilm_v6addr
))) {
986 mld_sendpkt(ilm
, MLD_LISTENER_REDUCTION
, &ipv6_all_rtrs_mcast
);
989 if ((ill
->ill_mcast_type
== MLD_V2_ROUTER
) &&
990 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast
, &ilm
->ilm_v6addr
))) {
993 * The possible state changes we need to handle here:
994 * Old State New State Report
996 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
997 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
999 * No need to send the ALLOW(0) report; BLOCK(X) is enough
1001 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
1002 rp
= mcast_bldmrec(BLOCK_OLD_SOURCES
, &ilm
->ilm_v6addr
,
1003 ilm
->ilm_filter
, NULL
);
1005 rp
= mcast_bldmrec(CHANGE_TO_INCLUDE
, &ilm
->ilm_v6addr
,
1008 mldv2_sendrpt(ill
, rp
);
1014 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1015 * and it gets sent after the lock is dropped.
1018 igmp_statechange(ilm_t
*ilm
, mcast_record_t fmode
, slist_t
*flist
)
1022 ip_stack_t
*ipst
= ilm
->ilm_ipst
;
1024 ASSERT(ilm
!= NULL
);
1026 /* state change reports should only be sent if the router is v3 */
1027 if (ilm
->ilm_ill
->ill_mcast_type
!= IGMP_V3_ROUTER
)
1031 ASSERT(RW_WRITE_HELD(&ill
->ill_mcast_lock
));
1034 * Compare existing(old) state with the new state and prepare
1035 * State Change Report, according to the rules in RFC 3376:
1037 * Old State New State State Change Report
1039 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1040 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1041 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1042 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1045 if (ilm
->ilm_fmode
== fmode
) {
1046 slist_t
*a_minus_b
= NULL
, *b_minus_a
= NULL
;
1047 slist_t
*allow
, *block
;
1048 if (((a_minus_b
= l_alloc()) == NULL
) ||
1049 ((b_minus_a
= l_alloc()) == NULL
)) {
1051 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
)
1056 l_difference(ilm
->ilm_filter
, flist
, a_minus_b
);
1057 l_difference(flist
, ilm
->ilm_filter
, b_minus_a
);
1058 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
1066 if (!SLIST_IS_EMPTY(allow
))
1067 rp
= mcast_bldmrec(ALLOW_NEW_SOURCES
, &ilm
->ilm_v6addr
,
1069 if (!SLIST_IS_EMPTY(block
))
1070 rp
= mcast_bldmrec(BLOCK_OLD_SOURCES
, &ilm
->ilm_v6addr
,
1074 } else if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
1076 rp
= mcast_bldmrec(CHANGE_TO_EXCLUDE
, &ilm
->ilm_v6addr
, flist
,
1080 rp
= mcast_bldmrec(CHANGE_TO_INCLUDE
, &ilm
->ilm_v6addr
, flist
,
1085 * Need to set up retransmission state; merge the new info with the
1086 * current state (which may be null). If the timer is not currently
1087 * running, the caller will start it when dropping ill_mcast_lock.
1089 rp
= mcast_merge_rtx(ilm
, rp
, flist
);
1090 if (ilm
->ilm_rtx
.rtx_timer
== INFINITY
) {
1091 ilm
->ilm_rtx
.rtx_cnt
= ill
->ill_mcast_rv
;
1092 MCAST_RANDOM_DELAY(ilm
->ilm_rtx
.rtx_timer
,
1093 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY
));
1094 mutex_enter(&ipst
->ips_igmp_timer_lock
);
1095 ipst
->ips_igmp_deferred_next
= MIN(ipst
->ips_igmp_deferred_next
,
1096 ilm
->ilm_rtx
.rtx_timer
);
1097 ilm
->ilm_rtx
.rtx_timer
+= CURRENT_MSTIME
;
1098 mutex_exit(&ipst
->ips_igmp_timer_lock
);
1101 igmpv3_sendrpt(ill
, rp
);
1105 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1106 * and it gets sent after the lock is dropped.
1109 mld_statechange(ilm_t
*ilm
, mcast_record_t fmode
, slist_t
*flist
)
1113 ip_stack_t
*ipst
= ilm
->ilm_ipst
;
1115 ASSERT(ilm
!= NULL
);
1118 ASSERT(RW_WRITE_HELD(&ill
->ill_mcast_lock
));
1120 /* only need to send if we have an mldv2-capable router */
1121 if (ill
->ill_mcast_type
!= MLD_V2_ROUTER
) {
1126 * Compare existing (old) state with the new state passed in
1127 * and send appropriate MLDv2 State Change Report.
1129 * Old State New State State Change Report
1131 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1132 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1133 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1134 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1136 if (ilm
->ilm_fmode
== fmode
) {
1137 slist_t
*a_minus_b
= NULL
, *b_minus_a
= NULL
;
1138 slist_t
*allow
, *block
;
1139 if (((a_minus_b
= l_alloc()) == NULL
) ||
1140 ((b_minus_a
= l_alloc()) == NULL
)) {
1142 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
)
1147 l_difference(ilm
->ilm_filter
, flist
, a_minus_b
);
1148 l_difference(flist
, ilm
->ilm_filter
, b_minus_a
);
1149 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
1156 if (!SLIST_IS_EMPTY(allow
))
1157 rp
= mcast_bldmrec(ALLOW_NEW_SOURCES
, &ilm
->ilm_v6addr
,
1159 if (!SLIST_IS_EMPTY(block
))
1160 rp
= mcast_bldmrec(BLOCK_OLD_SOURCES
, &ilm
->ilm_v6addr
,
1164 } else if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
1166 rp
= mcast_bldmrec(CHANGE_TO_EXCLUDE
, &ilm
->ilm_v6addr
, flist
,
1170 rp
= mcast_bldmrec(CHANGE_TO_INCLUDE
, &ilm
->ilm_v6addr
, flist
,
1175 * Need to set up retransmission state; merge the new info with the
1176 * current state (which may be null). If the timer is not currently
1177 * running, the caller will start it when dropping ill_mcast_lock.
1179 rp
= mcast_merge_rtx(ilm
, rp
, flist
);
1180 ASSERT(ilm
->ilm_rtx
.rtx_cnt
> 0);
1181 if (ilm
->ilm_rtx
.rtx_timer
== INFINITY
) {
1182 ilm
->ilm_rtx
.rtx_cnt
= ill
->ill_mcast_rv
;
1183 MCAST_RANDOM_DELAY(ilm
->ilm_rtx
.rtx_timer
,
1184 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY
));
1185 mutex_enter(&ipst
->ips_mld_timer_lock
);
1186 ipst
->ips_mld_deferred_next
=
1187 MIN(ipst
->ips_mld_deferred_next
, ilm
->ilm_rtx
.rtx_timer
);
1188 ilm
->ilm_rtx
.rtx_timer
+= CURRENT_MSTIME
;
1189 mutex_exit(&ipst
->ips_mld_timer_lock
);
1192 mldv2_sendrpt(ill
, rp
);
1196 igmp_timeout_handler_per_ill(ill_t
*ill
)
1198 uint_t next
= INFINITY
, current
;
1201 mrec_t
*rtxrp
= NULL
;
1203 mcast_record_t rtype
;
1205 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
1207 current
= CURRENT_MSTIME
;
1208 /* First check the global timer on this interface */
1209 if (ill
->ill_global_timer
== INFINITY
)
1211 if (ill
->ill_global_timer
<= (current
+ CURRENT_OFFSET
)) {
1212 ill
->ill_global_timer
= INFINITY
;
1214 * Send report for each group on this interface.
1215 * Since we just set the global timer (received a v3 general
1216 * query), need to skip the all hosts addr (224.0.0.1), per
1217 * RFC 3376 section 5.
1219 for (ilm
= ill
->ill_ilm
; ilm
!= NULL
; ilm
= ilm
->ilm_next
) {
1220 if (ilm
->ilm_addr
== htonl(INADDR_ALLHOSTS_GROUP
))
1222 rp
= mcast_bldmrec(ilm
->ilm_fmode
, &ilm
->ilm_v6addr
,
1223 ilm
->ilm_filter
, rp
);
1225 * Since we're sending a report on this group, okay
1226 * to delete pending group-specific timers. Note
1227 * that group-specific retransmit timers still need
1228 * to be checked in the per_ilm_timer for-loop.
1230 ilm
->ilm_timer
= INFINITY
;
1231 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
1232 FREE_SLIST(ilm
->ilm_pendsrcs
);
1233 ilm
->ilm_pendsrcs
= NULL
;
1235 igmpv3_sendrpt(ill
, rp
);
1238 if ((ill
->ill_global_timer
- current
) < next
)
1239 next
= ill
->ill_global_timer
- current
;
1243 for (ilm
= ill
->ill_ilm
; ilm
!= NULL
; ilm
= ilm
->ilm_next
) {
1244 if (ilm
->ilm_timer
== INFINITY
)
1245 goto per_ilm_rtxtimer
;
1247 if (ilm
->ilm_timer
> (current
+ CURRENT_OFFSET
)) {
1248 if ((ilm
->ilm_timer
- current
) < next
)
1249 next
= ilm
->ilm_timer
- current
;
1252 (void) mi_strlog(ill
->ill_rq
, 1, SL_TRACE
,
1253 "igmp_timo_hlr 2: ilm_timr %d "
1255 (int)ntohl(ilm
->ilm_timer
- current
),
1256 (ill
->ill_mcast_type
), next
);
1259 goto per_ilm_rtxtimer
;
1262 /* the timer has expired, need to take action */
1263 ilm
->ilm_timer
= INFINITY
;
1264 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
1265 if (ill
->ill_mcast_type
== IGMP_V1_ROUTER
) {
1266 igmp_sendpkt(ilm
, IGMP_V1_MEMBERSHIP_REPORT
, 0);
1267 } else if (ill
->ill_mcast_type
== IGMP_V2_ROUTER
) {
1268 igmp_sendpkt(ilm
, IGMP_V2_MEMBERSHIP_REPORT
, 0);
1271 if (!SLIST_IS_EMPTY(ilm
->ilm_pendsrcs
) &&
1272 (rsp
= l_alloc()) != NULL
) {
1274 * Contents of reply depend on pending
1275 * requested source list.
1277 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
1278 l_intersection(ilm
->ilm_filter
,
1279 ilm
->ilm_pendsrcs
, rsp
);
1281 l_difference(ilm
->ilm_pendsrcs
,
1282 ilm
->ilm_filter
, rsp
);
1284 FREE_SLIST(ilm
->ilm_pendsrcs
);
1285 ilm
->ilm_pendsrcs
= NULL
;
1286 if (!SLIST_IS_EMPTY(rsp
))
1287 rp
= mcast_bldmrec(MODE_IS_INCLUDE
,
1288 &ilm
->ilm_v6addr
, rsp
, rp
);
1292 * Either the pending request is just group-
1293 * specific, or we couldn't get the resources
1294 * (rsp) to build a source-specific reply.
1296 rp
= mcast_bldmrec(ilm
->ilm_fmode
,
1297 &ilm
->ilm_v6addr
, ilm
->ilm_filter
, rp
);
1299 igmpv3_sendrpt(ill
, rp
);
1304 rtxp
= &ilm
->ilm_rtx
;
1306 if (rtxp
->rtx_timer
== INFINITY
)
1308 if (rtxp
->rtx_timer
> (current
+ CURRENT_OFFSET
)) {
1309 if ((rtxp
->rtx_timer
- current
) < next
)
1310 next
= rtxp
->rtx_timer
- current
;
1314 rtxp
->rtx_timer
= INFINITY
;
1315 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
1316 if (ill
->ill_mcast_type
== IGMP_V1_ROUTER
) {
1317 igmp_sendpkt(ilm
, IGMP_V1_MEMBERSHIP_REPORT
, 0);
1320 if (ill
->ill_mcast_type
== IGMP_V2_ROUTER
) {
1321 igmp_sendpkt(ilm
, IGMP_V2_MEMBERSHIP_REPORT
, 0);
1326 * The retransmit timer has popped, and our router is
1327 * IGMPv3. We have to delve into the retransmit state
1328 * stored in the ilm.
1330 * Decrement the retransmit count. If the fmode rtx
1331 * count is active, decrement it, and send a filter
1332 * mode change report with the ilm's source list.
1333 * Otherwise, send a source list change report with
1334 * the current retransmit lists.
1336 ASSERT(rtxp
->rtx_cnt
> 0);
1337 ASSERT(rtxp
->rtx_cnt
>= rtxp
->rtx_fmode_cnt
);
1339 if (rtxp
->rtx_fmode_cnt
> 0) {
1340 rtxp
->rtx_fmode_cnt
--;
1341 rtype
= (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) ?
1342 CHANGE_TO_INCLUDE
: CHANGE_TO_EXCLUDE
;
1343 rtxrp
= mcast_bldmrec(rtype
, &ilm
->ilm_v6addr
,
1344 ilm
->ilm_filter
, rtxrp
);
1346 rtxrp
= mcast_bldmrec(ALLOW_NEW_SOURCES
,
1347 &ilm
->ilm_v6addr
, rtxp
->rtx_allow
, rtxrp
);
1348 rtxrp
= mcast_bldmrec(BLOCK_OLD_SOURCES
,
1349 &ilm
->ilm_v6addr
, rtxp
->rtx_block
, rtxrp
);
1351 if (rtxp
->rtx_cnt
> 0) {
1352 MCAST_RANDOM_DELAY(rtxp
->rtx_timer
,
1353 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY
));
1354 if (rtxp
->rtx_timer
< next
)
1355 next
= rtxp
->rtx_timer
;
1356 rtxp
->rtx_timer
+= current
;
1358 ASSERT(rtxp
->rtx_timer
== INFINITY
);
1359 CLEAR_SLIST(rtxp
->rtx_allow
);
1360 CLEAR_SLIST(rtxp
->rtx_block
);
1362 igmpv3_sendrpt(ill
, rtxrp
);
1366 rw_exit(&ill
->ill_mcast_lock
);
1367 /* Send any deferred/queued IP packets */
1368 ill_mcast_send_queued(ill
);
1369 /* Defer ill_mcast_timer_start() until the caller is done */
1375 * igmp_timeout_handler:
1376 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1377 * Returns number of ticks to next event (or 0 if none).
1379 * As part of multicast join and leave igmp we may need to send out an
1380 * igmp request. The igmp related state variables in the ilm are protected
1381 * by ill_mcast_lock. A single global igmp timer is used to track igmp timeouts.
1382 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1383 * starts the igmp timer if needed. It serializes multiple threads trying to
1384 * simultaneously start the timer using the igmp_timer_setter_active flag.
1386 * igmp_input() receives igmp queries and responds to the queries
1387 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1388 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler()
1389 * performs the action exclusively after acquiring ill_mcast_lock.
1391 * The igmp_slowtimeo() function is called thru another timer.
1392 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1395 igmp_timeout_handler(void *arg
)
1398 uint_t global_next
= INFINITY
;
1400 ill_walk_context_t ctx
;
1401 ip_stack_t
*ipst
= arg
;
1403 ASSERT(arg
!= NULL
);
1404 mutex_enter(&ipst
->ips_igmp_timer_lock
);
1405 ASSERT(ipst
->ips_igmp_timeout_id
!= 0);
1406 ipst
->ips_igmp_timeout_id
= 0;
1407 ipst
->ips_igmp_timer_scheduled_last
= 0;
1408 ipst
->ips_igmp_time_to_next
= 0;
1409 mutex_exit(&ipst
->ips_igmp_timer_lock
);
1411 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1412 ill
= ILL_START_WALK_V4(&ctx
, ipst
);
1413 for (; ill
!= NULL
; ill
= ill_next(&ctx
, ill
)) {
1414 ASSERT(!ill
->ill_isv6
);
1415 /* Make sure the ill isn't going away. */
1416 if (!ill_check_and_refhold(ill
))
1418 rw_exit(&ipst
->ips_ill_g_lock
);
1419 next
= igmp_timeout_handler_per_ill(ill
);
1420 if (next
< global_next
)
1423 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1425 rw_exit(&ipst
->ips_ill_g_lock
);
1426 if (global_next
!= INFINITY
)
1427 igmp_start_timers(global_next
, ipst
);
1431 * mld_timeout_handler:
1432 * Called when there are timeout events, every next (tick).
1433 * Returns number of ticks to next event (or 0 if none).
1436 mld_timeout_handler_per_ill(ill_t
*ill
)
1439 uint_t next
= INFINITY
, current
;
1442 mcast_record_t rtype
;
1444 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
1446 current
= CURRENT_MSTIME
;
1448 * First check the global timer on this interface; the global timer
1449 * is not used for MLDv1, so if it's set we can assume we're v2.
1451 if (ill
->ill_global_timer
== INFINITY
)
1453 if (ill
->ill_global_timer
<= (current
+ CURRENT_OFFSET
)) {
1454 ill
->ill_global_timer
= INFINITY
;
1456 * Send report for each group on this interface.
1457 * Since we just set the global timer (received a v2 general
1458 * query), need to skip the all hosts addr (ff02::1), per
1459 * RFC 3810 section 6.
1462 for (ilm
= ill
->ill_ilm
; ilm
!= NULL
; ilm
= ilm
->ilm_next
) {
1463 if (IN6_ARE_ADDR_EQUAL(&ilm
->ilm_v6addr
,
1464 &ipv6_all_hosts_mcast
))
1466 rp
= mcast_bldmrec(ilm
->ilm_fmode
, &ilm
->ilm_v6addr
,
1467 ilm
->ilm_filter
, rp
);
1469 * Since we're sending a report on this group, okay
1470 * to delete pending group-specific timers. Note
1471 * that group-specific retransmit timers still need
1472 * to be checked in the per_ilm_timer for-loop.
1474 ilm
->ilm_timer
= INFINITY
;
1475 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
1476 FREE_SLIST(ilm
->ilm_pendsrcs
);
1477 ilm
->ilm_pendsrcs
= NULL
;
1479 mldv2_sendrpt(ill
, rp
);
1481 if ((ill
->ill_global_timer
- current
) < next
)
1482 next
= ill
->ill_global_timer
- current
;
1487 for (ilm
= ill
->ill_ilm
; ilm
!= NULL
; ilm
= ilm
->ilm_next
) {
1488 if (ilm
->ilm_timer
== INFINITY
)
1489 goto per_ilm_rtxtimer
;
1491 if (ilm
->ilm_timer
> (current
+ CURRENT_OFFSET
)) {
1492 if ((ilm
->ilm_timer
- current
) < next
)
1493 next
= ilm
->ilm_timer
- current
;
1496 (void) mi_strlog(ill
->ill_rq
, 1, SL_TRACE
,
1497 "igmp_timo_hlr 2: ilm_timr"
1498 " %d typ %d nxt %d",
1499 (int)ntohl(ilm
->ilm_timer
- current
),
1500 (ill
->ill_mcast_type
), next
);
1503 goto per_ilm_rtxtimer
;
1506 /* the timer has expired, need to take action */
1507 ilm
->ilm_timer
= INFINITY
;
1508 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
1509 if (ill
->ill_mcast_type
== MLD_V1_ROUTER
) {
1510 mld_sendpkt(ilm
, MLD_LISTENER_REPORT
, NULL
);
1513 if (!SLIST_IS_EMPTY(ilm
->ilm_pendsrcs
) &&
1514 (rsp
= l_alloc()) != NULL
) {
1516 * Contents of reply depend on pending
1517 * requested source list.
1519 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
1520 l_intersection(ilm
->ilm_filter
,
1521 ilm
->ilm_pendsrcs
, rsp
);
1523 l_difference(ilm
->ilm_pendsrcs
,
1524 ilm
->ilm_filter
, rsp
);
1526 FREE_SLIST(ilm
->ilm_pendsrcs
);
1527 ilm
->ilm_pendsrcs
= NULL
;
1528 if (!SLIST_IS_EMPTY(rsp
))
1529 rp
= mcast_bldmrec(MODE_IS_INCLUDE
,
1530 &ilm
->ilm_v6addr
, rsp
, rp
);
1533 rp
= mcast_bldmrec(ilm
->ilm_fmode
,
1534 &ilm
->ilm_v6addr
, ilm
->ilm_filter
, rp
);
1539 rtxp
= &ilm
->ilm_rtx
;
1541 if (rtxp
->rtx_timer
== INFINITY
)
1543 if (rtxp
->rtx_timer
> (current
+ CURRENT_OFFSET
)) {
1544 if ((rtxp
->rtx_timer
- current
) < next
)
1545 next
= rtxp
->rtx_timer
- current
;
1549 rtxp
->rtx_timer
= INFINITY
;
1550 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
1551 if (ill
->ill_mcast_type
== MLD_V1_ROUTER
) {
1552 mld_sendpkt(ilm
, MLD_LISTENER_REPORT
, NULL
);
1557 * The retransmit timer has popped, and our router is
1558 * MLDv2. We have to delve into the retransmit state
1559 * stored in the ilm.
1561 * Decrement the retransmit count. If the fmode rtx
1562 * count is active, decrement it, and send a filter
1563 * mode change report with the ilm's source list.
1564 * Otherwise, send a source list change report with
1565 * the current retransmit lists.
1567 ASSERT(rtxp
->rtx_cnt
> 0);
1568 ASSERT(rtxp
->rtx_cnt
>= rtxp
->rtx_fmode_cnt
);
1570 if (rtxp
->rtx_fmode_cnt
> 0) {
1571 rtxp
->rtx_fmode_cnt
--;
1572 rtype
= (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) ?
1573 CHANGE_TO_INCLUDE
: CHANGE_TO_EXCLUDE
;
1574 rtxrp
= mcast_bldmrec(rtype
, &ilm
->ilm_v6addr
,
1575 ilm
->ilm_filter
, rtxrp
);
1577 rtxrp
= mcast_bldmrec(ALLOW_NEW_SOURCES
,
1578 &ilm
->ilm_v6addr
, rtxp
->rtx_allow
, rtxrp
);
1579 rtxrp
= mcast_bldmrec(BLOCK_OLD_SOURCES
,
1580 &ilm
->ilm_v6addr
, rtxp
->rtx_block
, rtxrp
);
1582 if (rtxp
->rtx_cnt
> 0) {
1583 MCAST_RANDOM_DELAY(rtxp
->rtx_timer
,
1584 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY
));
1585 if (rtxp
->rtx_timer
< next
)
1586 next
= rtxp
->rtx_timer
;
1587 rtxp
->rtx_timer
+= current
;
1589 ASSERT(rtxp
->rtx_timer
== INFINITY
);
1590 CLEAR_SLIST(rtxp
->rtx_allow
);
1591 CLEAR_SLIST(rtxp
->rtx_block
);
1595 if (ill
->ill_mcast_type
== MLD_V2_ROUTER
) {
1596 mldv2_sendrpt(ill
, rp
);
1597 mldv2_sendrpt(ill
, rtxrp
);
1599 rw_exit(&ill
->ill_mcast_lock
);
1600 /* Send any deferred/queued IP packets */
1601 ill_mcast_send_queued(ill
);
1602 /* Defer ill_mcast_timer_start() until the caller is done */
1608 * mld_timeout_handler:
1609 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1610 * Returns number of ticks to next event (or 0 if none).
1611 * MT issues are same as igmp_timeout_handler
1614 mld_timeout_handler(void *arg
)
1617 uint_t global_next
= INFINITY
;
1619 ill_walk_context_t ctx
;
1620 ip_stack_t
*ipst
= arg
;
1622 ASSERT(arg
!= NULL
);
1623 mutex_enter(&ipst
->ips_mld_timer_lock
);
1624 ASSERT(ipst
->ips_mld_timeout_id
!= 0);
1625 ipst
->ips_mld_timeout_id
= 0;
1626 ipst
->ips_mld_timer_scheduled_last
= 0;
1627 ipst
->ips_mld_time_to_next
= 0;
1628 mutex_exit(&ipst
->ips_mld_timer_lock
);
1630 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1631 ill
= ILL_START_WALK_V6(&ctx
, ipst
);
1632 for (; ill
!= NULL
; ill
= ill_next(&ctx
, ill
)) {
1633 ASSERT(ill
->ill_isv6
);
1634 /* Make sure the ill isn't going away. */
1635 if (!ill_check_and_refhold(ill
))
1637 rw_exit(&ipst
->ips_ill_g_lock
);
1638 next
= mld_timeout_handler_per_ill(ill
);
1639 if (next
< global_next
)
1642 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1644 rw_exit(&ipst
->ips_ill_g_lock
);
1645 if (global_next
!= INFINITY
)
1646 mld_start_timers(global_next
, ipst
);
1650 * Calculate the Older Version Querier Present timeout value, in number
1651 * of slowtimo intervals, for the given ill.
1654 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1655 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1659 * - Resets to new router if we didnt we hear from the router
1660 * in IGMP_AGE_THRESHOLD seconds.
1661 * - Resets slowtimeout.
1662 * Check for ips_igmp_max_version ensures that we don't revert to a higher
1663 * IGMP version than configured.
1666 igmp_slowtimo(void *arg
)
1670 avl_tree_t
*avl_tree
;
1671 ip_stack_t
*ipst
= (ip_stack_t
*)arg
;
1673 ASSERT(arg
!= NULL
);
1676 * The ill_if_t list is circular, hence the odd loop parameters.
1678 * We can't use the ILL_START_WALK and ill_next() wrappers for this
1679 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1680 * structure (allowing us to skip if none of the instances have timers
1683 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1684 for (ifp
= IP_V4_ILL_G_LIST(ipst
);
1685 ifp
!= (ill_if_t
*)&IP_V4_ILL_G_LIST(ipst
);
1686 ifp
= ifp
->illif_next
) {
1688 * illif_mcast_v[12] are set using atomics. If an ill hears
1689 * a V1 or V2 query now and we miss seeing the count now,
1690 * we will see it the next time igmp_slowtimo is called.
1692 if (ifp
->illif_mcast_v1
== 0 && ifp
->illif_mcast_v2
== 0)
1695 avl_tree
= &ifp
->illif_avl_by_ppa
;
1696 for (ill
= avl_first(avl_tree
); ill
!= NULL
;
1697 ill
= avl_walk(avl_tree
, ill
, AVL_AFTER
)) {
1698 /* Make sure the ill isn't going away. */
1699 if (!ill_check_and_refhold(ill
))
1701 rw_exit(&ipst
->ips_ill_g_lock
);
1702 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
1703 if (ill
->ill_mcast_v1_tset
== 1)
1704 ill
->ill_mcast_v1_time
++;
1705 if (ill
->ill_mcast_v2_tset
== 1)
1706 ill
->ill_mcast_v2_time
++;
1707 if ((ill
->ill_mcast_type
== IGMP_V1_ROUTER
) &&
1708 (ipst
->ips_igmp_max_version
>= IGMP_V2_ROUTER
) &&
1709 (ill
->ill_mcast_v1_time
>= OVQP(ill
))) {
1710 if ((ill
->ill_mcast_v2_tset
> 0) ||
1711 (ipst
->ips_igmp_max_version
==
1713 ip1dbg(("V1 query timer "
1714 "expired on %s; switching "
1715 "mode to IGMP_V2\n",
1717 ill
->ill_mcast_type
=
1720 ip1dbg(("V1 query timer "
1721 "expired on %s; switching "
1722 "mode to IGMP_V3\n",
1724 ill
->ill_mcast_type
=
1727 ill
->ill_mcast_v1_time
= 0;
1728 ill
->ill_mcast_v1_tset
= 0;
1729 atomic_dec_16(&ifp
->illif_mcast_v1
);
1731 if ((ill
->ill_mcast_type
== IGMP_V2_ROUTER
) &&
1732 (ipst
->ips_igmp_max_version
>= IGMP_V3_ROUTER
) &&
1733 (ill
->ill_mcast_v2_time
>= OVQP(ill
))) {
1734 ip1dbg(("V2 query timer expired on "
1735 "%s; switching mode to IGMP_V3\n",
1737 ill
->ill_mcast_type
= IGMP_V3_ROUTER
;
1738 ill
->ill_mcast_v2_time
= 0;
1739 ill
->ill_mcast_v2_tset
= 0;
1740 atomic_dec_16(&ifp
->illif_mcast_v2
);
1742 rw_exit(&ill
->ill_mcast_lock
);
1744 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1747 rw_exit(&ipst
->ips_ill_g_lock
);
1748 ill_mcast_timer_start(ipst
);
1749 mutex_enter(&ipst
->ips_igmp_slowtimeout_lock
);
1750 if (ipst
->ips_igmp_slowtimeout_quiesce
!= B_TRUE
) {
1751 ipst
->ips_igmp_slowtimeout_id
= timeout(igmp_slowtimo
,
1752 (void *)ipst
, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL
));
1754 ipst
->ips_igmp_slowtimeout_id
= 0;
1756 mutex_exit(&ipst
->ips_igmp_slowtimeout_lock
);
1761 * - Resets to newer version if we didn't hear from the older version router
1762 * in MLD_AGE_THRESHOLD seconds.
1763 * - Restarts slowtimeout.
1764 * Check for ips_mld_max_version ensures that we don't revert to a higher
1765 * IGMP version than configured.
1768 mld_slowtimo(void *arg
)
1772 avl_tree_t
*avl_tree
;
1773 ip_stack_t
*ipst
= (ip_stack_t
*)arg
;
1775 ASSERT(arg
!= NULL
);
1776 /* See comments in igmp_slowtimo() above... */
1777 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1778 for (ifp
= IP_V6_ILL_G_LIST(ipst
);
1779 ifp
!= (ill_if_t
*)&IP_V6_ILL_G_LIST(ipst
);
1780 ifp
= ifp
->illif_next
) {
1781 if (ifp
->illif_mcast_v1
== 0)
1784 avl_tree
= &ifp
->illif_avl_by_ppa
;
1785 for (ill
= avl_first(avl_tree
); ill
!= NULL
;
1786 ill
= avl_walk(avl_tree
, ill
, AVL_AFTER
)) {
1787 /* Make sure the ill isn't going away. */
1788 if (!ill_check_and_refhold(ill
))
1790 rw_exit(&ipst
->ips_ill_g_lock
);
1791 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
1792 if (ill
->ill_mcast_v1_tset
== 1)
1793 ill
->ill_mcast_v1_time
++;
1794 if ((ill
->ill_mcast_type
== MLD_V1_ROUTER
) &&
1795 (ipst
->ips_mld_max_version
>= MLD_V2_ROUTER
) &&
1796 (ill
->ill_mcast_v1_time
>= OVQP(ill
))) {
1797 ip1dbg(("MLD query timer expired on"
1798 " %s; switching mode to MLD_V2\n",
1800 ill
->ill_mcast_type
= MLD_V2_ROUTER
;
1801 ill
->ill_mcast_v1_time
= 0;
1802 ill
->ill_mcast_v1_tset
= 0;
1803 atomic_dec_16(&ifp
->illif_mcast_v1
);
1805 rw_exit(&ill
->ill_mcast_lock
);
1807 rw_enter(&ipst
->ips_ill_g_lock
, RW_READER
);
1810 rw_exit(&ipst
->ips_ill_g_lock
);
1811 ill_mcast_timer_start(ipst
);
1812 mutex_enter(&ipst
->ips_mld_slowtimeout_lock
);
1813 if (ipst
->ips_mld_slowtimeout_quiesce
!= B_TRUE
) {
1814 ipst
->ips_mld_slowtimeout_id
= timeout(mld_slowtimo
,
1815 (void *)ipst
, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL
));
1817 ipst
->ips_mld_slowtimeout_id
= 0;
1819 mutex_exit(&ipst
->ips_mld_slowtimeout_lock
);
1824 * This will send to ip_output_simple just like icmp_inbound.
1827 igmp_sendpkt(ilm_t
*ilm
, uchar_t type
, ipaddr_t addr
)
1833 int hdrlen
= sizeof (ipha_t
) + RTRALERT_LEN
;
1834 size_t size
= hdrlen
+ sizeof (igmpa_t
);
1835 ill_t
*ill
= ilm
->ilm_ill
;
1836 ip_stack_t
*ipst
= ill
->ill_ipst
;
1838 ASSERT(RW_LOCK_HELD(&ill
->ill_mcast_lock
));
1840 mp
= allocb(size
, BPRI_HI
);
1844 mp
->b_wptr
= mp
->b_rptr
+ size
;
1846 ipha
= (ipha_t
*)mp
->b_rptr
;
1847 rtralert
= (uint8_t *)&(ipha
[1]);
1848 igmpa
= (igmpa_t
*)&(rtralert
[RTRALERT_LEN
]);
1849 igmpa
->igmpa_type
= type
;
1850 igmpa
->igmpa_code
= 0;
1851 igmpa
->igmpa_group
= ilm
->ilm_addr
;
1852 igmpa
->igmpa_cksum
= 0;
1853 igmpa
->igmpa_cksum
= IP_CSUM(mp
, hdrlen
, 0);
1855 rtralert
[0] = IPOPT_COPY
| IPOPT_RTRALERT
;
1856 rtralert
[1] = RTRALERT_LEN
;
1860 ipha
->ipha_version_and_hdr_length
= (IP_VERSION
<< 4)
1861 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS
+ RTRALERT_LEN_IN_WORDS
);
1862 ipha
->ipha_type_of_service
= 0;
1863 ipha
->ipha_length
= htons(size
);
1864 ipha
->ipha_ident
= 0;
1865 ipha
->ipha_fragment_offset_and_flags
= 0;
1866 ipha
->ipha_ttl
= IGMP_TTL
;
1867 ipha
->ipha_protocol
= IPPROTO_IGMP
;
1868 ipha
->ipha_hdr_checksum
= 0;
1869 ipha
->ipha_dst
= addr
? addr
: igmpa
->igmpa_group
;
1870 ipha
->ipha_src
= INADDR_ANY
;
1872 ill_mcast_queue(ill
, mp
);
1874 ++ipst
->ips_igmpstat
.igps_snd_reports
;
1878 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill.
1879 * The report will contain one group record
1880 * for each element of reclist. If this causes packet length to
1881 * exceed ill->ill_mc_mtu, multiple reports are sent.
1882 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1883 * and those buffers are freed here.
1886 igmpv3_sendrpt(ill_t
*ill
, mrec_t
*reclist
)
1893 ipaddr_t
*src_array
;
1894 int i
, j
, numrec
, more_src_cnt
;
1895 size_t hdrsize
, size
, rsize
;
1896 mrec_t
*rp
, *cur_reclist
;
1897 mrec_t
*next_reclist
= reclist
;
1899 ip_stack_t
*ipst
= ill
->ill_ipst
;
1901 ASSERT(RW_LOCK_HELD(&ill
->ill_mcast_lock
));
1903 /* if there aren't any records, there's nothing to send */
1904 if (reclist
== NULL
)
1907 hdrsize
= sizeof (ipha_t
) + RTRALERT_LEN
;
1909 size
= hdrsize
+ sizeof (igmp3ra_t
);
1912 cur_reclist
= next_reclist
;
1914 for (rp
= cur_reclist
; rp
!= NULL
; rp
= rp
->mrec_next
) {
1915 rsize
= sizeof (grphdra_t
) +
1916 (rp
->mrec_srcs
.sl_numsrc
* sizeof (ipaddr_t
));
1917 if (size
+ rsize
> ill
->ill_mc_mtu
) {
1918 if (rp
== cur_reclist
) {
1920 * If the first mrec we looked at is too big
1921 * to fit in a single packet (i.e the source
1922 * list is too big), we must either truncate
1923 * the list (if TO_EX or IS_EX), or send
1924 * multiple reports for the same group (all
1927 int srcspace
, srcsperpkt
;
1928 srcspace
= ill
->ill_mc_mtu
- (size
+
1929 sizeof (grphdra_t
));
1932 * Skip if there's not even enough room in
1933 * a single packet to send something useful.
1935 if (srcspace
<= sizeof (ipaddr_t
))
1938 srcsperpkt
= srcspace
/ sizeof (ipaddr_t
);
1940 * Increment size and numrec, because we will
1941 * be sending a record for the mrec we're
1944 size
+= sizeof (grphdra_t
) +
1945 (srcsperpkt
* sizeof (ipaddr_t
));
1947 if (rp
->mrec_type
== MODE_IS_EXCLUDE
||
1948 rp
->mrec_type
== CHANGE_TO_EXCLUDE
) {
1949 rp
->mrec_srcs
.sl_numsrc
= srcsperpkt
;
1950 if (rp
->mrec_next
== NULL
) {
1951 /* no more packets to send */
1955 * more packets, but we're
1956 * done with this mrec.
1958 next_reclist
= rp
->mrec_next
;
1961 more_src_cnt
= rp
->mrec_srcs
.sl_numsrc
1963 rp
->mrec_srcs
.sl_numsrc
= srcsperpkt
;
1965 * We'll fix up this mrec (remove the
1966 * srcs we've already sent) before
1967 * returning to nextpkt above.
1981 mp
= allocb(size
, BPRI_HI
);
1985 bzero((char *)mp
->b_rptr
, size
);
1986 mp
->b_wptr
= (uchar_t
*)(mp
->b_rptr
+ size
);
1988 ipha
= (ipha_t
*)mp
->b_rptr
;
1989 rtralert
= (uint8_t *)&(ipha
[1]);
1990 igmp3ra
= (igmp3ra_t
*)&(rtralert
[RTRALERT_LEN
]);
1991 grphdr
= (grphdra_t
*)&(igmp3ra
[1]);
1994 for (i
= 0; i
< numrec
; i
++) {
1995 grphdr
->grphdra_type
= rp
->mrec_type
;
1996 grphdr
->grphdra_numsrc
= htons(rp
->mrec_srcs
.sl_numsrc
);
1997 grphdr
->grphdra_group
= V4_PART_OF_V6(rp
->mrec_group
);
1998 src_array
= (ipaddr_t
*)&(grphdr
[1]);
2000 for (j
= 0; j
< rp
->mrec_srcs
.sl_numsrc
; j
++)
2001 src_array
[j
] = V4_PART_OF_V6(rp
->mrec_srcs
.sl_addr
[j
]);
2003 grphdr
= (grphdra_t
*)&(src_array
[j
]);
2007 igmp3ra
->igmp3ra_type
= IGMP_V3_MEMBERSHIP_REPORT
;
2008 igmp3ra
->igmp3ra_numrec
= htons(numrec
);
2009 igmp3ra
->igmp3ra_cksum
= IP_CSUM(mp
, hdrsize
, 0);
2011 rtralert
[0] = IPOPT_COPY
| IPOPT_RTRALERT
;
2012 rtralert
[1] = RTRALERT_LEN
;
2016 ipha
->ipha_version_and_hdr_length
= IP_VERSION
<< 4
2017 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS
+ RTRALERT_LEN_IN_WORDS
);
2018 ipha
->ipha_type_of_service
= IPTOS_PREC_INTERNETCONTROL
;
2019 ipha
->ipha_length
= htons(size
);
2020 ipha
->ipha_ttl
= IGMP_TTL
;
2021 ipha
->ipha_protocol
= IPPROTO_IGMP
;
2022 ipha
->ipha_dst
= htonl(INADDR_ALLRPTS_GROUP
);
2023 ipha
->ipha_src
= INADDR_ANY
;
2025 ill_mcast_queue(ill
, mp
);
2027 ++ipst
->ips_igmpstat
.igps_snd_reports
;
2030 if (more_src_cnt
> 0) {
2032 slist_t
*sl
= &next_reclist
->mrec_srcs
;
2033 index
= sl
->sl_numsrc
;
2034 mvsize
= more_src_cnt
* sizeof (in6_addr_t
);
2035 (void) memmove(&sl
->sl_addr
[0], &sl
->sl_addr
[index
],
2037 sl
->sl_numsrc
= more_src_cnt
;
2043 while (reclist
!= NULL
) {
2044 rp
= reclist
->mrec_next
;
2052 * Return NULL for a bad packet that is discarded here.
2053 * Return mp if the message is OK and should be handed to "raw" receivers.
2054 * Callers of mld_input() may need to reinitialize variables that were copied
2055 * from the mblk as this calls pullupmsg().
2058 mld_input(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2060 ip6_t
*ip6h
= (ip6_t
*)(mp
->b_rptr
);
2064 uint16_t hdr_length
, exthdr_length
;
2065 in6_addr_t
*v6group_ptr
;
2068 ill_t
*ill
= ira
->ira_ill
;
2069 ip_stack_t
*ipst
= ill
->ill_ipst
;
2071 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInGroupMembTotal
);
2073 /* Make sure the src address of the packet is link-local */
2074 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h
->ip6_src
))) {
2075 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInErrors
);
2080 if (ip6h
->ip6_hlim
!= 1) {
2081 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpBadHoplimit
);
2086 /* Get to the icmp header part */
2087 hdr_length
= ira
->ira_ip_hdr_length
;
2088 exthdr_length
= hdr_length
- IPV6_HDR_LEN
;
2090 mldlen
= ntohs(ip6h
->ip6_plen
) - exthdr_length
;
2092 /* An MLD packet must at least be 24 octets to be valid */
2093 if (mldlen
< MLD_MINLEN
) {
2094 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInErrors
);
2099 mldh
= (mld_hdr_t
*)(&mp
->b_rptr
[hdr_length
]);
2101 switch (mldh
->mld_type
) {
2102 case MLD_LISTENER_QUERY
:
2104 * packet length differentiates between v1 and v2. v1
2105 * query should be exactly 24 octets long; v2 is >= 28.
2107 if ((mldlen
== MLD_MINLEN
) ||
2108 (ipst
->ips_mld_max_version
< MLD_V2_ROUTER
)) {
2109 next
= mld_query_in(mldh
, ill
);
2110 } else if (mldlen
>= MLD_V2_QUERY_MINLEN
) {
2111 next
= mldv2_query_in((mld2q_t
*)mldh
, ill
, mldlen
);
2113 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInErrors
);
2121 if (next
!= INFINITY
)
2122 mld_start_timers(next
, ipst
);
2125 case MLD_LISTENER_REPORT
:
2127 * For fast leave to work, we have to know that we are the
2128 * last person to send a report for this group. Reports
2129 * generated by us are looped back since we could potentially
2130 * be a multicast router, so discard reports sourced by me.
2132 mutex_enter(&ill
->ill_lock
);
2133 for (ipif
= ill
->ill_ipif
; ipif
!= NULL
;
2134 ipif
= ipif
->ipif_next
) {
2135 if (IN6_ARE_ADDR_EQUAL(&ipif
->ipif_v6lcl_addr
,
2138 char buf1
[INET6_ADDRSTRLEN
];
2140 (void) mi_strlog(ill
->ill_rq
,
2143 "mld_input: we are only "
2145 inet_ntop(AF_INET6
, &ip6h
->ip6_src
,
2146 buf1
, sizeof (buf1
)));
2148 mutex_exit(&ill
->ill_lock
);
2152 mutex_exit(&ill
->ill_lock
);
2153 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInGroupMembResponses
);
2155 v6group_ptr
= &mldh
->mld_addr
;
2156 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr
)) {
2157 BUMP_MIB(ill
->ill_icmp6_mib
,
2158 ipv6IfIcmpInGroupMembBadReports
);
2165 * If we belong to the group being reported, and we are a
2166 * 'Delaying member' per the RFC terminology, stop our timer
2167 * for that group and 'clear flag' i.e. mark ilm_state as
2168 * IGMP_OTHERMEMBER. With zones, there can be multiple group
2169 * membership entries for the same group address (one per zone)
2170 * so we need to walk the ill_ilm list.
2172 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
2173 for (ilm
= ill
->ill_ilm
; ilm
!= NULL
; ilm
= ilm
->ilm_next
) {
2174 if (!IN6_ARE_ADDR_EQUAL(&ilm
->ilm_v6addr
, v6group_ptr
))
2176 BUMP_MIB(ill
->ill_icmp6_mib
,
2177 ipv6IfIcmpInGroupMembOurReports
);
2179 ilm
->ilm_timer
= INFINITY
;
2180 ilm
->ilm_state
= IGMP_OTHERMEMBER
;
2182 rw_exit(&ill
->ill_mcast_lock
);
2184 * No packets have been sent above - no
2185 * ill_mcast_send_queued is needed.
2187 ill_mcast_timer_start(ill
->ill_ipst
);
2190 case MLD_LISTENER_REDUCTION
:
2191 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInGroupMembReductions
);
2198 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate
2199 * (non-zero, unsigned) timer value to be set on success.
2202 mld_query_in(mld_hdr_t
*mldh
, ill_t
*ill
)
2206 uint_t next
, current
;
2207 in6_addr_t
*v6group
;
2209 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInGroupMembQueries
);
2212 * In the MLD specification, there are 3 states and a flag.
2214 * In Non-Listener state, we simply don't have a membership record.
2215 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2216 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2219 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2220 * we have heard a report from another member, or IGMP_IREPORTEDLAST
2221 * if I sent the last report.
2223 v6group
= &mldh
->mld_addr
;
2224 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group
)) &&
2225 ((!IN6_IS_ADDR_MULTICAST(v6group
)))) {
2226 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInGroupMembBadQueries
);
2230 /* Need to do compatibility mode checking */
2231 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
2232 ill
->ill_mcast_v1_time
= 0;
2233 ill
->ill_mcast_v1_tset
= 1;
2234 if (ill
->ill_mcast_type
== MLD_V2_ROUTER
) {
2235 ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2236 "MLD_V1_ROUTER\n", ill
->ill_name
));
2237 atomic_inc_16(&ill
->ill_ifptr
->illif_mcast_v1
);
2238 ill
->ill_mcast_type
= MLD_V1_ROUTER
;
2241 timer
= (int)ntohs(mldh
->mld_maxdelay
);
2243 (void) mi_strlog(ill
->ill_rq
, 1, SL_TRACE
,
2244 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2245 timer
, (int)mldh
->mld_type
);
2249 * -Start the timers in all of our membership records for
2250 * the physical interface on which the query arrived,
2252 * 1. those that belong to the "all hosts" group,
2253 * 2. those with 0 scope, or 1 node-local scope.
2255 * -Restart any timer that is already running but has a value
2256 * longer that the requested timeout.
2257 * -Use the value specified in the query message as the
2262 current
= CURRENT_MSTIME
;
2263 for (ilm
= ill
->ill_ilm
; ilm
!= NULL
; ilm
= ilm
->ilm_next
) {
2264 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm
->ilm_v6addr
));
2266 if (IN6_IS_ADDR_UNSPECIFIED(&ilm
->ilm_v6addr
) ||
2267 IN6_IS_ADDR_MC_NODELOCAL(&ilm
->ilm_v6addr
) ||
2268 IN6_IS_ADDR_MC_RESERVED(&ilm
->ilm_v6addr
))
2270 if ((!IN6_ARE_ADDR_EQUAL(&ilm
->ilm_v6addr
,
2271 &ipv6_all_hosts_mcast
)) &&
2272 (IN6_IS_ADDR_UNSPECIFIED(v6group
)) ||
2273 (IN6_ARE_ADDR_EQUAL(v6group
, &ilm
->ilm_v6addr
))) {
2275 /* Respond immediately */
2276 ilm
->ilm_timer
= INFINITY
;
2277 ilm
->ilm_state
= IGMP_IREPORTEDLAST
;
2278 mld_sendpkt(ilm
, MLD_LISTENER_REPORT
, NULL
);
2281 if (ilm
->ilm_timer
> timer
) {
2282 MCAST_RANDOM_DELAY(ilm
->ilm_timer
, timer
);
2283 if (ilm
->ilm_timer
< next
)
2284 next
= ilm
->ilm_timer
;
2285 ilm
->ilm_timer
+= current
;
2290 rw_exit(&ill
->ill_mcast_lock
);
2291 /* Send any deferred/queued IP packets */
2292 ill_mcast_send_queued(ill
);
2293 ill_mcast_timer_start(ill
->ill_ipst
);
2299 * Handles an MLDv2 Listener Query. On error, returns 0; on success,
2300 * returns the appropriate (non-zero, unsigned) timer value (which may
2301 * be INFINITY) to be set.
2304 mldv2_query_in(mld2q_t
*mld2q
, ill_t
*ill
, int mldlen
)
2307 in6_addr_t
*v6group
, *src_array
;
2308 uint_t next
, numsrc
, i
, mrd
, delay
, qqi
, current
;
2311 v6group
= &mld2q
->mld2q_addr
;
2312 numsrc
= ntohs(mld2q
->mld2q_numsrc
);
2314 /* make sure numsrc matches packet size */
2315 if (mldlen
< MLD_V2_QUERY_MINLEN
+ (numsrc
* sizeof (in6_addr_t
))) {
2316 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInErrors
);
2319 src_array
= (in6_addr_t
*)&mld2q
[1];
2321 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInGroupMembQueries
);
2323 /* extract Maximum Response Delay from code in header */
2324 mrd
= ntohs(mld2q
->mld2q_mxrc
);
2325 if (mrd
>= MLD_V2_MAXRT_FPMIN
) {
2326 uint_t hdrval
, mant
, exp
;
2328 mant
= hdrval
& MLD_V2_MAXRT_MANT_MASK
;
2329 exp
= (hdrval
& MLD_V2_MAXRT_EXP_MASK
) >> 12;
2330 mrd
= (mant
| 0x1000) << (exp
+ 3);
2333 mrd
= DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL
);
2335 MCAST_RANDOM_DELAY(delay
, mrd
);
2336 next
= (unsigned)INFINITY
;
2337 current
= CURRENT_MSTIME
;
2339 if ((qrv
= mld2q
->mld2q_sqrv
& MLD_V2_RV_MASK
) == 0)
2340 ill
->ill_mcast_rv
= MCAST_DEF_ROBUSTNESS
;
2342 ill
->ill_mcast_rv
= qrv
;
2344 if ((qqi
= (uint_t
)mld2q
->mld2q_qqic
) >= MLD_V2_QQI_FPMIN
) {
2346 mant
= qqi
& MLD_V2_QQI_MANT_MASK
;
2347 exp
= (qqi
& MLD_V2_QQI_EXP_MASK
) >> 12;
2348 qqi
= (mant
| 0x10) << (exp
+ 3);
2350 ill
->ill_mcast_qi
= (qqi
== 0) ? MCAST_DEF_QUERY_INTERVAL
: qqi
;
2353 * If we have a pending general query response that's scheduled
2354 * sooner than the delay we calculated for this response, then
2355 * no action is required (MLDv2 draft section 6.2 rule 1)
2357 rw_enter(&ill
->ill_mcast_lock
, RW_WRITER
);
2358 if (ill
->ill_global_timer
< (current
+ delay
)) {
2359 rw_exit(&ill
->ill_mcast_lock
);
2364 * Now take action depending on query type: general,
2365 * group specific, or group/source specific.
2367 if ((numsrc
== 0) && IN6_IS_ADDR_UNSPECIFIED(v6group
)) {
2370 * We know global timer is either not running or is
2371 * greater than our calculated delay, so reset it to
2372 * our delay (random value in range [0, response time])
2374 ill
->ill_global_timer
= current
+ delay
;
2377 /* group or group/source specific query */
2378 for (ilm
= ill
->ill_ilm
; ilm
!= NULL
; ilm
= ilm
->ilm_next
) {
2379 if (IN6_IS_ADDR_UNSPECIFIED(&ilm
->ilm_v6addr
) ||
2380 IN6_IS_ADDR_MC_NODELOCAL(&ilm
->ilm_v6addr
) ||
2381 IN6_IS_ADDR_MC_RESERVED(&ilm
->ilm_v6addr
) ||
2382 !IN6_ARE_ADDR_EQUAL(v6group
, &ilm
->ilm_v6addr
))
2386 * If the query is group specific or we have a
2387 * pending group specific query, the response is
2388 * group specific (pending sources list should be
2389 * empty). Otherwise, need to update the pending
2390 * sources list for the group and source specific
2393 if (numsrc
== 0 || (ilm
->ilm_timer
< INFINITY
&&
2394 SLIST_IS_EMPTY(ilm
->ilm_pendsrcs
))) {
2396 FREE_SLIST(ilm
->ilm_pendsrcs
);
2397 ilm
->ilm_pendsrcs
= NULL
;
2401 if (numsrc
> MAX_FILTER_SIZE
||
2402 (ilm
->ilm_pendsrcs
== NULL
&&
2403 (ilm
->ilm_pendsrcs
= l_alloc()) == NULL
)) {
2405 * We've been sent more sources than
2406 * we can deal with; or we can't deal
2407 * with a source list at all. Revert
2408 * to a group specific query.
2412 if ((pktl
= l_alloc()) == NULL
)
2414 pktl
->sl_numsrc
= numsrc
;
2415 for (i
= 0; i
< numsrc
; i
++)
2416 pktl
->sl_addr
[i
] = src_array
[i
];
2417 l_union_in_a(ilm
->ilm_pendsrcs
, pktl
,
2423 ilm
->ilm_timer
= (ilm
->ilm_timer
== INFINITY
) ?
2424 INFINITY
: (ilm
->ilm_timer
- current
);
2425 /* set timer to soonest value */
2426 ilm
->ilm_timer
= MIN(ilm
->ilm_timer
, delay
);
2427 if (ilm
->ilm_timer
< next
)
2428 next
= ilm
->ilm_timer
;
2429 ilm
->ilm_timer
+= current
;
2433 rw_exit(&ill
->ill_mcast_lock
);
2435 * No packets have been sent above - no
2436 * ill_mcast_send_queued is needed.
2438 ill_mcast_timer_start(ill
->ill_ipst
);
2444 * Send MLDv1 response packet with hoplimit 1
2447 mld_sendpkt(ilm_t
*ilm
, uchar_t type
, const in6_addr_t
*v6addr
)
2453 struct ip6_opt_router
*ip6router
;
2454 size_t size
= IPV6_HDR_LEN
+ sizeof (mld_hdr_t
);
2455 ill_t
*ill
= ilm
->ilm_ill
;
2457 ASSERT(RW_LOCK_HELD(&ill
->ill_mcast_lock
));
2460 * We need to place a router alert option in this packet. The length
2461 * of the options must be a multiple of 8. The hbh option header is 2
2462 * bytes followed by the 4 byte router alert option. That leaves
2463 * 2 bytes of pad for a total of 8 bytes.
2465 const int router_alert_length
= 8;
2467 ASSERT(ill
->ill_isv6
);
2469 size
+= router_alert_length
;
2470 mp
= allocb(size
, BPRI_HI
);
2473 bzero(mp
->b_rptr
, size
);
2474 mp
->b_wptr
= mp
->b_rptr
+ size
;
2476 ip6h
= (ip6_t
*)mp
->b_rptr
;
2477 ip6hbh
= (struct ip6_hbh
*)&ip6h
[1];
2478 ip6router
= (struct ip6_opt_router
*)&ip6hbh
[1];
2480 * A zero is a pad option of length 1. The bzero of the whole packet
2481 * above will pad between ip6router and mld.
2483 mldh
= (mld_hdr_t
*)((uint8_t *)ip6hbh
+ router_alert_length
);
2485 mldh
->mld_type
= type
;
2486 mldh
->mld_addr
= ilm
->ilm_v6addr
;
2488 ip6router
->ip6or_type
= IP6OPT_ROUTER_ALERT
;
2489 ip6router
->ip6or_len
= 2;
2490 ip6router
->ip6or_value
[0] = 0;
2491 ip6router
->ip6or_value
[1] = IP6_ALERT_MLD
;
2493 ip6hbh
->ip6h_nxt
= IPPROTO_ICMPV6
;
2494 ip6hbh
->ip6h_len
= 0;
2496 ip6h
->ip6_vcf
= IPV6_DEFAULT_VERS_AND_FLOW
;
2497 ip6h
->ip6_plen
= htons(sizeof (*mldh
) + router_alert_length
);
2498 ip6h
->ip6_nxt
= IPPROTO_HOPOPTS
;
2499 ip6h
->ip6_hops
= MLD_HOP_LIMIT
;
2501 ip6h
->ip6_dst
= ilm
->ilm_v6addr
;
2503 ip6h
->ip6_dst
= *v6addr
;
2505 ip6h
->ip6_src
= ipv6_all_zeros
;
2507 * Prepare for checksum by putting icmp length in the icmp
2508 * checksum field. The checksum is calculated in ip_output.
2510 mldh
->mld_cksum
= htons(sizeof (*mldh
));
2512 ill_mcast_queue(ill
, mp
);
2516 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The
2517 * report will contain one multicast address record for each element of
2518 * reclist. If this causes packet length to exceed ill->ill_mc_mtu,
2519 * multiple reports are sent. reclist is assumed to be made up of
2520 * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2523 mldv2_sendrpt(ill_t
*ill
, mrec_t
*reclist
)
2528 in6_addr_t
*srcarray
;
2531 struct ip6_opt_router
*ip6router
;
2532 size_t size
, optlen
, padlen
, icmpsize
, rsize
;
2533 int i
, numrec
, more_src_cnt
;
2534 mrec_t
*rp
, *cur_reclist
;
2535 mrec_t
*next_reclist
= reclist
;
2538 /* If there aren't any records, there's nothing to send */
2539 if (reclist
== NULL
)
2542 ASSERT(ill
->ill_isv6
);
2543 ASSERT(RW_LOCK_HELD(&ill
->ill_mcast_lock
));
2546 * Total option length (optlen + padlen) must be a multiple of
2547 * 8 bytes. We assume here that optlen <= 8, so the total option
2548 * length will be 8. Assert this in case anything ever changes.
2550 optlen
= sizeof (ip6_hbh_t
) + sizeof (struct ip6_opt_router
);
2551 ASSERT(optlen
<= 8);
2552 padlen
= 8 - optlen
;
2554 icmpsize
= sizeof (mld2r_t
);
2555 size
= IPV6_HDR_LEN
+ optlen
+ padlen
+ icmpsize
;
2558 for (rp
= cur_reclist
= next_reclist
, numrec
= 0; rp
!= NULL
;
2559 rp
= rp
->mrec_next
, numrec
++) {
2560 rsize
= sizeof (mld2mar_t
) +
2561 (rp
->mrec_srcs
.sl_numsrc
* sizeof (in6_addr_t
));
2562 if (size
+ rsize
> ill
->ill_mc_mtu
) {
2563 if (rp
== cur_reclist
) {
2565 * If the first mrec we looked at is too big
2566 * to fit in a single packet (i.e the source
2567 * list is too big), we must either truncate
2568 * the list (if TO_EX or IS_EX), or send
2569 * multiple reports for the same group (all
2572 int srcspace
, srcsperpkt
;
2573 srcspace
= ill
->ill_mc_mtu
-
2574 (size
+ sizeof (mld2mar_t
));
2577 * Skip if there's not even enough room in
2578 * a single packet to send something useful.
2580 if (srcspace
<= sizeof (in6_addr_t
))
2583 srcsperpkt
= srcspace
/ sizeof (in6_addr_t
);
2585 * Increment icmpsize and size, because we will
2586 * be sending a record for the mrec we're
2589 rsize
= sizeof (mld2mar_t
) +
2590 (srcsperpkt
* sizeof (in6_addr_t
));
2593 if (rp
->mrec_type
== MODE_IS_EXCLUDE
||
2594 rp
->mrec_type
== CHANGE_TO_EXCLUDE
) {
2595 rp
->mrec_srcs
.sl_numsrc
= srcsperpkt
;
2596 if (rp
->mrec_next
== NULL
) {
2597 /* no more packets to send */
2601 * more packets, but we're
2602 * done with this mrec.
2604 next_reclist
= rp
->mrec_next
;
2607 more_src_cnt
= rp
->mrec_srcs
.sl_numsrc
2609 rp
->mrec_srcs
.sl_numsrc
= srcsperpkt
;
2611 * We'll fix up this mrec (remove the
2612 * srcs we've already sent) before
2613 * returning to nextpkt above.
2627 mp
= allocb(size
, BPRI_HI
);
2630 bzero(mp
->b_rptr
, size
);
2631 mp
->b_wptr
= mp
->b_rptr
+ size
;
2633 ip6h
= (ip6_t
*)mp
->b_rptr
;
2634 ip6hbh
= (ip6_hbh_t
*)&(ip6h
[1]);
2635 ip6router
= (struct ip6_opt_router
*)&(ip6hbh
[1]);
2636 mld2r
= (mld2r_t
*)((uint8_t *)ip6hbh
+ optlen
+ padlen
);
2637 mld2mar
= (mld2mar_t
*)&(mld2r
[1]);
2639 ip6h
->ip6_vcf
= IPV6_DEFAULT_VERS_AND_FLOW
;
2640 ip6h
->ip6_plen
= htons(optlen
+ padlen
+ icmpsize
);
2641 ip6h
->ip6_nxt
= IPPROTO_HOPOPTS
;
2642 ip6h
->ip6_hops
= MLD_HOP_LIMIT
;
2643 ip6h
->ip6_dst
= ipv6_all_v2rtrs_mcast
;
2644 ip6h
->ip6_src
= ipv6_all_zeros
;
2646 ip6hbh
->ip6h_nxt
= IPPROTO_ICMPV6
;
2648 * ip6h_len is the number of 8-byte words, not including the first
2649 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2651 ip6hbh
->ip6h_len
= 0;
2653 ip6router
->ip6or_type
= IP6OPT_ROUTER_ALERT
;
2654 ip6router
->ip6or_len
= 2;
2655 ip6router
->ip6or_value
[0] = 0;
2656 ip6router
->ip6or_value
[1] = IP6_ALERT_MLD
;
2658 mld2r
->mld2r_type
= MLD_V2_LISTENER_REPORT
;
2659 mld2r
->mld2r_nummar
= htons(numrec
);
2661 * Prepare for the checksum by putting icmp length in the icmp
2662 * checksum field. The checksum is calculated in ip_output_simple.
2664 mld2r
->mld2r_cksum
= htons(icmpsize
);
2666 for (rp
= cur_reclist
; rp
!= NULL
; rp
= rp
->mrec_next
) {
2667 mld2mar
->mld2mar_type
= rp
->mrec_type
;
2668 mld2mar
->mld2mar_auxlen
= 0;
2669 mld2mar
->mld2mar_numsrc
= htons(rp
->mrec_srcs
.sl_numsrc
);
2670 mld2mar
->mld2mar_group
= rp
->mrec_group
;
2671 srcarray
= (in6_addr_t
*)&(mld2mar
[1]);
2673 for (i
= 0; i
< rp
->mrec_srcs
.sl_numsrc
; i
++)
2674 srcarray
[i
] = rp
->mrec_srcs
.sl_addr
[i
];
2676 mld2mar
= (mld2mar_t
*)&(srcarray
[i
]);
2679 ill_mcast_queue(ill
, mp
);
2682 if (more_src_cnt
> 0) {
2684 slist_t
*sl
= &next_reclist
->mrec_srcs
;
2685 index
= sl
->sl_numsrc
;
2686 mvsize
= more_src_cnt
* sizeof (in6_addr_t
);
2687 (void) memmove(&sl
->sl_addr
[0], &sl
->sl_addr
[index
],
2689 sl
->sl_numsrc
= more_src_cnt
;
2695 while (reclist
!= NULL
) {
2696 rp
= reclist
->mrec_next
;
2703 mcast_bldmrec(mcast_record_t type
, in6_addr_t
*grp
, slist_t
*srclist
,
2709 if ((type
== ALLOW_NEW_SOURCES
|| type
== BLOCK_OLD_SOURCES
) &&
2710 SLIST_IS_EMPTY(srclist
))
2713 rp
= (mrec_t
*)mi_alloc(sizeof (mrec_t
), BPRI_HI
);
2717 rp
->mrec_next
= next
;
2718 rp
->mrec_type
= type
;
2719 rp
->mrec_auxlen
= 0;
2720 rp
->mrec_group
= *grp
;
2721 if (srclist
== NULL
) {
2722 rp
->mrec_srcs
.sl_numsrc
= 0;
2724 rp
->mrec_srcs
.sl_numsrc
= srclist
->sl_numsrc
;
2725 for (i
= 0; i
< srclist
->sl_numsrc
; i
++)
2726 rp
->mrec_srcs
.sl_addr
[i
] = srclist
->sl_addr
[i
];
2733 * Set up initial retransmit state. If memory cannot be allocated for
2734 * the source lists, simply create as much state as is possible; memory
2735 * allocation failures are considered one type of transient error that
2736 * the retransmissions are designed to overcome (and if they aren't
2737 * transient, there are bigger problems than failing to notify the
2738 * router about multicast group membership state changes).
2741 mcast_init_rtx(ill_t
*ill
, rtx_state_t
*rtxp
, mcast_record_t rtype
,
2745 * There are only three possibilities for rtype:
2746 * New join, transition from INCLUDE {} to INCLUDE {flist}
2747 * => rtype is ALLOW_NEW_SOURCES
2748 * New join, transition from INCLUDE {} to EXCLUDE {flist}
2749 * => rtype is CHANGE_TO_EXCLUDE
2750 * State change that involves a filter mode change
2751 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2753 ASSERT(rtype
== CHANGE_TO_EXCLUDE
|| rtype
== CHANGE_TO_INCLUDE
||
2754 rtype
== ALLOW_NEW_SOURCES
);
2756 rtxp
->rtx_cnt
= ill
->ill_mcast_rv
;
2759 case CHANGE_TO_EXCLUDE
:
2760 rtxp
->rtx_fmode_cnt
= ill
->ill_mcast_rv
;
2761 CLEAR_SLIST(rtxp
->rtx_allow
);
2762 COPY_SLIST(flist
, rtxp
->rtx_block
);
2764 case ALLOW_NEW_SOURCES
:
2765 case CHANGE_TO_INCLUDE
:
2766 rtxp
->rtx_fmode_cnt
=
2767 rtype
== ALLOW_NEW_SOURCES
? 0 : ill
->ill_mcast_rv
;
2768 CLEAR_SLIST(rtxp
->rtx_block
);
2769 COPY_SLIST(flist
, rtxp
->rtx_allow
);
2775 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2776 * RFC 3376 section 5.1, covers three cases:
2777 * * The current state change is a filter mode change
2778 * Set filter mode retransmit counter; set retransmit allow or
2779 * block list to new source list as appropriate, and clear the
2780 * retransmit list that was not set; send TO_IN or TO_EX with
2782 * * The current state change is a source list change, but the filter
2783 * mode retransmit counter is > 0
2784 * Decrement filter mode retransmit counter; set retransmit
2785 * allow or block list to new source list as appropriate,
2786 * and clear the retransmit list that was not set; send TO_IN
2787 * or TO_EX with new source list.
2788 * * The current state change is a source list change, and the filter
2789 * mode retransmit counter is 0.
2790 * Merge existing rtx allow and block lists with new state:
2791 * rtx_allow = (new allow + rtx_allow) - new block
2792 * rtx_block = (new block + rtx_block) - new allow
2793 * Send ALLOW and BLOCK records for new retransmit lists;
2794 * decrement retransmit counter.
2796 * As is the case for mcast_init_rtx(), memory allocation failures are
2797 * acceptable; we just create as much state as we can.
2800 mcast_merge_rtx(ilm_t
*ilm
, mrec_t
*mreclist
, slist_t
*flist
)
2803 rtx_state_t
*rtxp
= &ilm
->ilm_rtx
;
2804 mcast_record_t txtype
;
2805 mrec_t
*rp
, *rpnext
, *rtnmrec
;
2810 if (mreclist
== NULL
)
2814 * A filter mode change is indicated by a single mrec, which is
2815 * either TO_IN or TO_EX. In this case, we just need to set new
2816 * retransmit state as if this were an initial join. There is
2817 * no change to the mrec list.
2819 if (mreclist
->mrec_type
== CHANGE_TO_INCLUDE
||
2820 mreclist
->mrec_type
== CHANGE_TO_EXCLUDE
) {
2821 mcast_init_rtx(ill
, rtxp
, mreclist
->mrec_type
,
2822 &mreclist
->mrec_srcs
);
2827 * Only the source list has changed
2829 rtxp
->rtx_cnt
= ill
->ill_mcast_rv
;
2830 if (rtxp
->rtx_fmode_cnt
> 0) {
2831 /* but we're still sending filter mode change reports */
2832 rtxp
->rtx_fmode_cnt
--;
2833 if (ilm
->ilm_fmode
== MODE_IS_INCLUDE
) {
2834 CLEAR_SLIST(rtxp
->rtx_block
);
2835 COPY_SLIST(flist
, rtxp
->rtx_allow
);
2836 txtype
= CHANGE_TO_INCLUDE
;
2838 CLEAR_SLIST(rtxp
->rtx_allow
);
2839 COPY_SLIST(flist
, rtxp
->rtx_block
);
2840 txtype
= CHANGE_TO_EXCLUDE
;
2842 /* overwrite first mrec with new info */
2843 mreclist
->mrec_type
= txtype
;
2844 l_copy(flist
, &mreclist
->mrec_srcs
);
2845 /* then free any remaining mrecs */
2846 for (rp
= mreclist
->mrec_next
; rp
!= NULL
; rp
= rpnext
) {
2847 rpnext
= rp
->mrec_next
;
2850 mreclist
->mrec_next
= NULL
;
2853 mrec_t
*allow_mrec
, *block_mrec
;
2855 * Just send the source change reports; but we need to
2856 * recalculate the ALLOW and BLOCK lists based on previous
2857 * state and new changes.
2860 allow_mrec
= block_mrec
= NULL
;
2861 for (rp
= mreclist
; rp
!= NULL
; rp
= rp
->mrec_next
) {
2862 ASSERT(rp
->mrec_type
== ALLOW_NEW_SOURCES
||
2863 rp
->mrec_type
== BLOCK_OLD_SOURCES
);
2864 if (rp
->mrec_type
== ALLOW_NEW_SOURCES
)
2870 * Perform calculations:
2871 * new_allow = mrec_allow + (rtx_allow - mrec_block)
2872 * new_block = mrec_block + (rtx_block - mrec_allow)
2874 * Each calc requires two steps, for example:
2875 * rtx_allow = rtx_allow - mrec_block;
2876 * new_allow = mrec_allow + rtx_allow;
2878 * Store results in mrec lists, and then copy into rtx lists.
2879 * We do it in this order in case the rtx list hasn't been
2880 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
2881 * Overflows are also okay.
2883 if (block_mrec
!= NULL
) {
2884 l_difference_in_a(rtxp
->rtx_allow
,
2885 &block_mrec
->mrec_srcs
);
2887 if (allow_mrec
!= NULL
) {
2888 l_difference_in_a(rtxp
->rtx_block
,
2889 &allow_mrec
->mrec_srcs
);
2890 l_union_in_a(&allow_mrec
->mrec_srcs
, rtxp
->rtx_allow
,
2893 if (block_mrec
!= NULL
) {
2894 l_union_in_a(&block_mrec
->mrec_srcs
, rtxp
->rtx_block
,
2896 COPY_SLIST(&block_mrec
->mrec_srcs
, rtxp
->rtx_block
);
2898 rtnmrec
= mcast_bldmrec(BLOCK_OLD_SOURCES
,
2899 &ilm
->ilm_v6addr
, rtxp
->rtx_block
, allow_mrec
);
2901 if (allow_mrec
!= NULL
) {
2902 COPY_SLIST(&allow_mrec
->mrec_srcs
, rtxp
->rtx_allow
);
2904 rtnmrec
= mcast_bldmrec(ALLOW_NEW_SOURCES
,
2905 &ilm
->ilm_v6addr
, rtxp
->rtx_allow
, block_mrec
);