7888 installboot: print version info of the file
[unleashed.git] / kernel / net / ip / igmp.c
blob423bb2a816f583ee0cacaf57e7034cb2e4677fa1
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
24 /* Copyright (c) 1990 Mentat Inc. */
27 * Internet Group Management Protocol (IGMP) routines.
28 * Multicast Listener Discovery Protocol (MLD) routines.
30 * Written by Steve Deering, Stanford, May 1988.
31 * Modified by Rosen Sharma, Stanford, Aug 1994.
32 * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
34 * MULTICAST 3.5.1.1
37 #include <sys/types.h>
38 #include <sys/stream.h>
39 #include <sys/stropts.h>
40 #include <sys/strlog.h>
41 #include <sys/strsun.h>
42 #include <sys/systm.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cmn_err.h>
46 #include <sys/atomic.h>
47 #include <sys/zone.h>
48 #include <sys/callb.h>
49 #include <sys/param.h>
50 #include <sys/socket.h>
51 #include <inet/ipclassifier.h>
52 #include <net/if.h>
53 #include <net/route.h>
54 #include <netinet/in.h>
55 #include <netinet/igmp_var.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <inet/ipsec_impl.h>
60 #include <inet/common.h>
61 #include <inet/mi.h>
62 #include <inet/nd.h>
63 #include <inet/tunables.h>
64 #include <inet/ip.h>
65 #include <inet/ip6.h>
66 #include <inet/ip_multi.h>
67 #include <inet/ip_listutils.h>
69 #include <netinet/igmp.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/ip_if.h>
73 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
74 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
75 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill);
76 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
77 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
78 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
79 static void igmpv3_sendrpt(ill_t *ill, mrec_t *reclist);
80 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
81 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
82 slist_t *srclist, mrec_t *next);
83 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
84 mcast_record_t rtype, slist_t *flist);
85 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
88 * Macros used to do timer len conversions. Timer values are always
89 * stored and passed to the timer functions as milliseconds; but the
90 * default values and values from the wire may not be.
92 * And yes, it's obscure, but decisecond is easier to abbreviate than
93 * "tenths of a second".
95 #define DSEC_TO_MSEC(dsec) ((dsec) * 100)
96 #define SEC_TO_MSEC(sec) ((sec) * 1000)
99 * A running timer (scheduled thru timeout) can be cancelled if another
100 * timer with a shorter timeout value is scheduled before it has timed
101 * out. When the shorter timer expires, the original timer is updated
102 * to account for the time elapsed while the shorter timer ran; but this
103 * does not take into account the amount of time already spent in timeout
104 * state before being preempted by the shorter timer, that is the time
105 * interval between time scheduled to time cancelled. This can cause
106 * delays in sending out multicast membership reports. To resolve this
107 * problem, wallclock time (absolute time) is used instead of deltas
108 * (relative time) to track timers.
110 * The MACRO below gets the lbolt value, used for proper timer scheduling
111 * and firing. Therefore multicast membership reports are sent on time.
112 * The timer does not exactly fire at the time it was scehduled to fire,
113 * there is a difference of a few milliseconds observed. An offset is used
114 * to take care of the difference.
117 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
118 #define CURRENT_OFFSET (999)
121 * The first multicast join will trigger the igmp timers / mld timers
122 * The unit for next is milliseconds.
124 void
125 igmp_start_timers(unsigned next, ip_stack_t *ipst)
127 int time_left;
128 int ret;
129 timeout_id_t id;
131 ASSERT(next != 0 && next != INFINITY);
133 mutex_enter(&ipst->ips_igmp_timer_lock);
135 if (ipst->ips_igmp_timer_setter_active) {
137 * Serialize timer setters, one at a time. If the
138 * timer is currently being set by someone,
139 * just record the next time when it has to be
140 * invoked and return. The current setter will
141 * take care.
143 ipst->ips_igmp_time_to_next =
144 MIN(ipst->ips_igmp_time_to_next, next);
145 mutex_exit(&ipst->ips_igmp_timer_lock);
146 return;
147 } else {
148 ipst->ips_igmp_timer_setter_active = B_TRUE;
150 if (ipst->ips_igmp_timeout_id == 0) {
152 * The timer is inactive. We need to start a timer if we haven't
153 * been asked to quiesce.
155 ipst->ips_igmp_time_to_next = next;
156 if (ipst->ips_igmp_timer_quiesce != B_TRUE) {
157 ipst->ips_igmp_timeout_id =
158 timeout(igmp_timeout_handler, (void *)ipst,
159 MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
160 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
162 ipst->ips_igmp_timer_setter_active = B_FALSE;
163 mutex_exit(&ipst->ips_igmp_timer_lock);
164 return;
168 * The timer was scheduled sometime back for firing in
169 * 'igmp_time_to_next' ms and is active. We need to
170 * reschedule the timeout if the new 'next' will happen
171 * earlier than the currently scheduled timeout
173 time_left = ipst->ips_igmp_timer_scheduled_last +
174 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
175 if (time_left < MSEC_TO_TICK(next)) {
176 ipst->ips_igmp_timer_setter_active = B_FALSE;
177 mutex_exit(&ipst->ips_igmp_timer_lock);
178 return;
180 id = ipst->ips_igmp_timeout_id;
182 mutex_exit(&ipst->ips_igmp_timer_lock);
183 ret = untimeout(id);
184 mutex_enter(&ipst->ips_igmp_timer_lock);
186 * The timeout was cancelled, or the timeout handler
187 * completed, while we were blocked in the untimeout.
188 * No other thread could have set the timer meanwhile
189 * since we serialized all the timer setters. Thus
190 * no timer is currently active nor executing nor will
191 * any timer fire in the future. We start the timer now
192 * if needed.
194 if (ret == -1) {
195 ASSERT(ipst->ips_igmp_timeout_id == 0);
196 } else {
197 ASSERT(ipst->ips_igmp_timeout_id != 0);
198 ipst->ips_igmp_timeout_id = 0;
200 if (ipst->ips_igmp_time_to_next != 0 &&
201 ipst->ips_igmp_timer_quiesce != B_TRUE) {
202 ipst->ips_igmp_time_to_next =
203 MIN(ipst->ips_igmp_time_to_next, next);
204 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
205 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
206 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
208 ipst->ips_igmp_timer_setter_active = B_FALSE;
209 mutex_exit(&ipst->ips_igmp_timer_lock);
213 * mld_start_timers:
214 * The unit for next is milliseconds.
216 void
217 mld_start_timers(unsigned next, ip_stack_t *ipst)
219 int time_left;
220 int ret;
221 timeout_id_t id;
223 ASSERT(next != 0 && next != INFINITY);
225 mutex_enter(&ipst->ips_mld_timer_lock);
226 if (ipst->ips_mld_timer_setter_active) {
228 * Serialize timer setters, one at a time. If the
229 * timer is currently being set by someone,
230 * just record the next time when it has to be
231 * invoked and return. The current setter will
232 * take care.
234 ipst->ips_mld_time_to_next =
235 MIN(ipst->ips_mld_time_to_next, next);
236 mutex_exit(&ipst->ips_mld_timer_lock);
237 return;
238 } else {
239 ipst->ips_mld_timer_setter_active = B_TRUE;
241 if (ipst->ips_mld_timeout_id == 0) {
243 * The timer is inactive. We need to start a timer, if we
244 * haven't been asked to quiesce.
246 ipst->ips_mld_time_to_next = next;
247 if (ipst->ips_mld_timer_quiesce != B_TRUE) {
248 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
249 (void *)ipst,
250 MSEC_TO_TICK(ipst->ips_mld_time_to_next));
251 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
253 ipst->ips_mld_timer_setter_active = B_FALSE;
254 mutex_exit(&ipst->ips_mld_timer_lock);
255 return;
259 * The timer was scheduled sometime back for firing in
260 * 'igmp_time_to_next' ms and is active. We need to
261 * reschedule the timeout if the new 'next' will happen
262 * earlier than the currently scheduled timeout
264 time_left = ipst->ips_mld_timer_scheduled_last +
265 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
266 if (time_left < MSEC_TO_TICK(next)) {
267 ipst->ips_mld_timer_setter_active = B_FALSE;
268 mutex_exit(&ipst->ips_mld_timer_lock);
269 return;
271 id = ipst->ips_mld_timeout_id;
273 mutex_exit(&ipst->ips_mld_timer_lock);
274 ret = untimeout(id);
275 mutex_enter(&ipst->ips_mld_timer_lock);
277 * The timeout was cancelled, or the timeout handler
278 * completed, while we were blocked in the untimeout.
279 * No other thread could have set the timer meanwhile
280 * since we serialized all the timer setters. Thus
281 * no timer is currently active nor executing nor will
282 * any timer fire in the future. We start the timer now
283 * if needed.
285 if (ret == -1) {
286 ASSERT(ipst->ips_mld_timeout_id == 0);
287 } else {
288 ASSERT(ipst->ips_mld_timeout_id != 0);
289 ipst->ips_mld_timeout_id = 0;
291 if (ipst->ips_mld_time_to_next != 0 &&
292 ipst->ips_mld_timer_quiesce == B_FALSE) {
293 ipst->ips_mld_time_to_next =
294 MIN(ipst->ips_mld_time_to_next, next);
295 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
296 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
297 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
299 ipst->ips_mld_timer_setter_active = B_FALSE;
300 mutex_exit(&ipst->ips_mld_timer_lock);
304 * igmp_input:
305 * Return NULL for a bad packet that is discarded here.
306 * Return mp if the message is OK and should be handed to "raw" receivers.
307 * Callers of igmp_input() may need to reinitialize variables that were copied
308 * from the mblk as this calls pullupmsg().
310 mblk_t *
311 igmp_input(mblk_t *mp, ip_recv_attr_t *ira)
313 igmpa_t *igmpa;
314 ipha_t *ipha = (ipha_t *)(mp->b_rptr);
315 int iphlen, igmplen, mblklen;
316 ilm_t *ilm;
317 uint32_t src, dst;
318 uint32_t group;
319 in6_addr_t v6group;
320 uint_t next;
321 ipif_t *ipif;
322 ill_t *ill = ira->ira_ill;
323 ip_stack_t *ipst = ill->ill_ipst;
325 ASSERT(!ill->ill_isv6);
326 ++ipst->ips_igmpstat.igps_rcv_total;
328 mblklen = MBLKL(mp);
329 iphlen = ira->ira_ip_hdr_length;
330 if (mblklen < 1 || mblklen < iphlen) {
331 ++ipst->ips_igmpstat.igps_rcv_tooshort;
332 goto bad_pkt;
334 igmplen = ira->ira_pktlen - iphlen;
336 * Since msg sizes are more variable with v3, just pullup the
337 * whole thing now.
339 if (MBLKL(mp) < (igmplen + iphlen)) {
340 mblk_t *mp1;
341 if ((mp1 = msgpullup(mp, -1)) == NULL) {
342 ++ipst->ips_igmpstat.igps_rcv_tooshort;
343 goto bad_pkt;
345 freemsg(mp);
346 mp = mp1;
347 ipha = (ipha_t *)(mp->b_rptr);
351 * Validate lengths
353 if (igmplen < IGMP_MINLEN) {
354 ++ipst->ips_igmpstat.igps_rcv_tooshort;
355 goto bad_pkt;
358 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
359 src = ipha->ipha_src;
360 dst = ipha->ipha_dst;
361 if (ip_debug > 1)
362 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
363 "igmp_input: src 0x%x, dst 0x%x on %s\n",
364 (int)ntohl(src), (int)ntohl(dst),
365 ill->ill_name);
367 switch (igmpa->igmpa_type) {
368 case IGMP_MEMBERSHIP_QUERY:
370 * packet length differentiates between v1/v2 and v3
371 * v1/v2 should be exactly 8 octets long; v3 is >= 12
373 if ((igmplen == IGMP_MINLEN) ||
374 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
375 next = igmp_query_in(ipha, igmpa, ill);
376 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
377 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
378 igmplen);
379 } else {
380 ++ipst->ips_igmpstat.igps_rcv_tooshort;
381 goto bad_pkt;
383 if (next == 0)
384 goto bad_pkt;
386 if (next != INFINITY)
387 igmp_start_timers(next, ipst);
389 break;
391 case IGMP_V1_MEMBERSHIP_REPORT:
392 case IGMP_V2_MEMBERSHIP_REPORT:
394 * For fast leave to work, we have to know that we are the
395 * last person to send a report for this group. Reports
396 * generated by us are looped back since we could potentially
397 * be a multicast router, so discard reports sourced by me.
399 mutex_enter(&ill->ill_lock);
400 for (ipif = ill->ill_ipif; ipif != NULL;
401 ipif = ipif->ipif_next) {
402 if (ipif->ipif_lcl_addr == src) {
403 if (ip_debug > 1) {
404 (void) mi_strlog(ill->ill_rq,
406 SL_TRACE,
407 "igmp_input: we are only "
408 "member src 0x%x\n",
409 (int)ntohl(src));
411 mutex_exit(&ill->ill_lock);
412 return (mp);
415 mutex_exit(&ill->ill_lock);
417 ++ipst->ips_igmpstat.igps_rcv_reports;
418 group = igmpa->igmpa_group;
419 if (!CLASSD(group)) {
420 ++ipst->ips_igmpstat.igps_rcv_badreports;
421 goto bad_pkt;
425 * KLUDGE: if the IP source address of the report has an
426 * unspecified (i.e., zero) subnet number, as is allowed for
427 * a booting host, replace it with the correct subnet number
428 * so that a process-level multicast routing demon can
429 * determine which subnet it arrived from. This is necessary
430 * to compensate for the lack of any way for a process to
431 * determine the arrival interface of an incoming packet.
433 * Requires that a copy of *this* message it passed up
434 * to the raw interface which is done by our caller.
436 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */
437 /* Pick the first ipif on this ill */
438 mutex_enter(&ill->ill_lock);
439 src = ill->ill_ipif->ipif_subnet;
440 mutex_exit(&ill->ill_lock);
441 ip1dbg(("igmp_input: changed src to 0x%x\n",
442 (int)ntohl(src)));
443 ipha->ipha_src = src;
447 * If our ill has ILMs that belong to the group being
448 * reported, and we are a 'Delaying Member' in the RFC
449 * terminology, stop our timer for that group and 'clear
450 * flag' i.e. mark as IGMP_OTHERMEMBER.
452 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
453 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
454 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
455 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
456 continue;
458 ++ipst->ips_igmpstat.igps_rcv_ourreports;
459 ilm->ilm_timer = INFINITY;
460 ilm->ilm_state = IGMP_OTHERMEMBER;
461 } /* for */
462 rw_exit(&ill->ill_mcast_lock);
463 ill_mcast_timer_start(ill->ill_ipst);
464 break;
466 case IGMP_V3_MEMBERSHIP_REPORT:
468 * Currently nothing to do here; IGMP router is not
469 * implemented in ip, and v3 hosts don't pay attention
470 * to membership reports.
472 break;
475 * Pass all valid IGMP packets up to any process(es) listening
476 * on a raw IGMP socket. Do not free the packet.
478 return (mp);
480 bad_pkt:
481 freemsg(mp);
482 return (NULL);
485 static uint_t
486 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
488 ilm_t *ilm;
489 int timer;
490 uint_t next, current;
491 ip_stack_t *ipst;
493 ipst = ill->ill_ipst;
494 ++ipst->ips_igmpstat.igps_rcv_queries;
496 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
498 * In the IGMPv2 specification, there are 3 states and a flag.
500 * In Non-Member state, we simply don't have a membership record.
501 * In Delaying Member state, our timer is running (ilm->ilm_timer
502 * < INFINITY). In Idle Member state, our timer is not running
503 * (ilm->ilm_timer == INFINITY).
505 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
506 * we have heard a report from another member, or IGMP_IREPORTEDLAST
507 * if I sent the last report.
509 if ((igmpa->igmpa_code == 0) ||
510 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
512 * Query from an old router.
513 * Remember that the querier on this interface is old,
514 * and set the timer to the value in RFC 1112.
516 ill->ill_mcast_v1_time = 0;
517 ill->ill_mcast_v1_tset = 1;
518 if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
519 ip1dbg(("Received IGMPv1 Query on %s, switching mode "
520 "to IGMP_V1_ROUTER\n", ill->ill_name));
521 atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
522 ill->ill_mcast_type = IGMP_V1_ROUTER;
525 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
527 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
528 igmpa->igmpa_group != 0) {
529 ++ipst->ips_igmpstat.igps_rcv_badqueries;
530 rw_exit(&ill->ill_mcast_lock);
531 ill_mcast_timer_start(ill->ill_ipst);
532 return (0);
535 } else {
536 in_addr_t group;
539 * Query from a new router
540 * Simply do a validity check
542 group = igmpa->igmpa_group;
543 if (group != 0 && (!CLASSD(group))) {
544 ++ipst->ips_igmpstat.igps_rcv_badqueries;
545 rw_exit(&ill->ill_mcast_lock);
546 ill_mcast_timer_start(ill->ill_ipst);
547 return (0);
551 * Switch interface state to v2 on receipt of a v2 query
552 * ONLY IF current state is v3. Let things be if current
553 * state if v1 but do reset the v2-querier-present timer.
555 if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
556 ip1dbg(("Received IGMPv2 Query on %s, switching mode "
557 "to IGMP_V2_ROUTER", ill->ill_name));
558 atomic_inc_16(&ill->ill_ifptr->illif_mcast_v2);
559 ill->ill_mcast_type = IGMP_V2_ROUTER;
561 ill->ill_mcast_v2_time = 0;
562 ill->ill_mcast_v2_tset = 1;
564 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
567 if (ip_debug > 1) {
568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
570 (int)ntohs(igmpa->igmpa_code),
571 (int)ntohs(igmpa->igmpa_type));
575 * -Start the timers in all of our membership records
576 * for the physical interface on which the query
577 * arrived, excluding those that belong to the "all
578 * hosts" group (224.0.0.1).
580 * -Restart any timer that is already running but has
581 * a value longer than the requested timeout.
583 * -Use the value specified in the query message as
584 * the maximum timeout.
586 next = (unsigned)INFINITY;
588 current = CURRENT_MSTIME;
589 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
592 * A multicast router joins INADDR_ANY address
593 * to enable promiscuous reception of all
594 * mcasts from the interface. This INADDR_ANY
595 * is stored in the ilm_v6addr as V6 unspec addr
597 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
598 continue;
599 if (ilm->ilm_addr == htonl(INADDR_ANY))
600 continue;
601 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
602 (igmpa->igmpa_group == 0) ||
603 (igmpa->igmpa_group == ilm->ilm_addr)) {
604 if (ilm->ilm_timer > timer) {
605 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
606 if (ilm->ilm_timer < next)
607 next = ilm->ilm_timer;
608 ilm->ilm_timer += current;
612 rw_exit(&ill->ill_mcast_lock);
614 * No packets have been sent above - no
615 * ill_mcast_send_queued is needed.
617 ill_mcast_timer_start(ill->ill_ipst);
619 return (next);
622 static uint_t
623 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
625 uint_t i, next, mrd, qqi, timer, delay, numsrc;
626 uint_t current;
627 ilm_t *ilm;
628 ipaddr_t *src_array;
629 uint8_t qrv;
630 ip_stack_t *ipst;
632 ipst = ill->ill_ipst;
633 /* make sure numsrc matches packet size */
634 numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
635 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
636 ++ipst->ips_igmpstat.igps_rcv_tooshort;
637 return (0);
639 src_array = (ipaddr_t *)&igmp3qa[1];
641 ++ipst->ips_igmpstat.igps_rcv_queries;
643 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
645 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
646 uint_t hdrval, mant, exp;
647 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
648 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
649 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
650 mrd = (mant | 0x10) << (exp + 3);
652 if (mrd == 0)
653 mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
654 timer = DSEC_TO_MSEC(mrd);
655 MCAST_RANDOM_DELAY(delay, timer);
656 next = (unsigned)INFINITY;
657 current = CURRENT_MSTIME;
659 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
660 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
661 else
662 ill->ill_mcast_rv = qrv;
664 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
665 uint_t hdrval, mant, exp;
666 hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
667 mant = hdrval & IGMP_V3_QQI_MANT_MASK;
668 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
669 qqi = (mant | 0x10) << (exp + 3);
671 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
674 * If we have a pending general query response that's scheduled
675 * sooner than the delay we calculated for this response, then
676 * no action is required (RFC3376 section 5.2 rule 1)
678 if (ill->ill_global_timer < (current + delay)) {
679 rw_exit(&ill->ill_mcast_lock);
680 ill_mcast_timer_start(ill->ill_ipst);
681 return (next);
685 * Now take action depending upon query type:
686 * general, group specific, or group/source specific.
688 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
690 * general query
691 * We know global timer is either not running or is
692 * greater than our calculated delay, so reset it to
693 * our delay (random value in range [0, response time]).
695 ill->ill_global_timer = current + delay;
696 next = delay;
697 } else {
698 /* group or group/source specific query */
699 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
700 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
701 (ilm->ilm_addr == htonl(INADDR_ANY)) ||
702 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
703 (igmp3qa->igmp3qa_group != ilm->ilm_addr))
704 continue;
706 * If the query is group specific or we have a
707 * pending group specific query, the response is
708 * group specific (pending sources list should be
709 * empty). Otherwise, need to update the pending
710 * sources list for the group and source specific
711 * response.
713 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
714 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
715 group_query:
716 FREE_SLIST(ilm->ilm_pendsrcs);
717 ilm->ilm_pendsrcs = NULL;
718 } else {
719 boolean_t overflow;
720 slist_t *pktl;
721 if (numsrc > MAX_FILTER_SIZE ||
722 (ilm->ilm_pendsrcs == NULL &&
723 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
725 * We've been sent more sources than
726 * we can deal with; or we can't deal
727 * with a source list at all. Revert
728 * to a group specific query.
730 goto group_query;
732 if ((pktl = l_alloc()) == NULL)
733 goto group_query;
734 pktl->sl_numsrc = numsrc;
735 for (i = 0; i < numsrc; i++)
736 IN6_IPADDR_TO_V4MAPPED(src_array[i],
737 &(pktl->sl_addr[i]));
738 l_union_in_a(ilm->ilm_pendsrcs, pktl,
739 &overflow);
740 l_free(pktl);
741 if (overflow)
742 goto group_query;
745 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
746 INFINITY : (ilm->ilm_timer - current);
747 /* choose soonest timer */
748 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
749 if (ilm->ilm_timer < next)
750 next = ilm->ilm_timer;
751 ilm->ilm_timer += current;
754 rw_exit(&ill->ill_mcast_lock);
756 * No packets have been sent above - no
757 * ill_mcast_send_queued is needed.
759 ill_mcast_timer_start(ill->ill_ipst);
761 return (next);
765 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
766 * and it gets sent after the lock is dropped.
768 void
769 igmp_joingroup(ilm_t *ilm)
771 uint_t timer;
772 ill_t *ill;
773 ip_stack_t *ipst = ilm->ilm_ipst;
775 ill = ilm->ilm_ill;
777 ASSERT(!ill->ill_isv6);
778 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
780 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
781 ilm->ilm_rtx.rtx_timer = INFINITY;
782 ilm->ilm_state = IGMP_OTHERMEMBER;
783 } else {
784 ip1dbg(("Querier mode %d, sending report, group %x\n",
785 ill->ill_mcast_type, htonl(ilm->ilm_addr)));
786 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
787 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
788 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
789 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
790 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
791 mrec_t *rp;
792 mcast_record_t rtype;
794 * The possible state changes we need to handle here:
795 * Old State New State Report
797 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
798 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
800 * No need to send the BLOCK(0) report; ALLOW(X)
801 * is enough.
803 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
804 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
805 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
806 ilm->ilm_filter, NULL);
807 igmpv3_sendrpt(ill, rp);
809 * Set up retransmission state. Timer is set below,
810 * for both v3 and older versions.
812 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
813 ilm->ilm_filter);
816 /* Set the ilm timer value */
817 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
818 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
819 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
820 timer = ilm->ilm_rtx.rtx_timer;
821 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
822 ilm->ilm_state = IGMP_IREPORTEDLAST;
825 * We are holding ill_mcast_lock here and the timeout
826 * handler (igmp_timeout_handler_per_ill) acquires that
827 * lock. Hence we can't call igmp_start_timers since it could
828 * deadlock in untimeout().
829 * Instead the thread which drops ill_mcast_lock will have
830 * to call ill_mcast_timer_start().
832 mutex_enter(&ipst->ips_igmp_timer_lock);
833 ipst->ips_igmp_deferred_next = MIN(timer,
834 ipst->ips_igmp_deferred_next);
835 mutex_exit(&ipst->ips_igmp_timer_lock);
838 if (ip_debug > 1) {
839 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
840 "igmp_joingroup: multicast_type %d timer %d",
841 (ilm->ilm_ill->ill_mcast_type),
842 (int)ntohl(timer));
847 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
848 * and it gets sent after the lock is dropped.
850 void
851 mld_joingroup(ilm_t *ilm)
853 uint_t timer;
854 ill_t *ill;
855 ip_stack_t *ipst = ilm->ilm_ipst;
857 ill = ilm->ilm_ill;
859 ASSERT(ill->ill_isv6);
861 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
863 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
864 ilm->ilm_rtx.rtx_timer = INFINITY;
865 ilm->ilm_state = IGMP_OTHERMEMBER;
866 } else {
867 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
868 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
869 } else {
870 mrec_t *rp;
871 mcast_record_t rtype;
873 * The possible state changes we need to handle here:
874 * Old State New State Report
876 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
877 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
879 * No need to send the BLOCK(0) report; ALLOW(X)
880 * is enough
882 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
883 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
884 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
885 ilm->ilm_filter, NULL);
886 mldv2_sendrpt(ill, rp);
888 * Set up retransmission state. Timer is set below,
889 * for both v2 and v1.
891 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
892 ilm->ilm_filter);
895 /* Set the ilm timer value */
896 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
897 ilm->ilm_rtx.rtx_cnt > 0);
899 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
900 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
901 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
902 timer = ilm->ilm_rtx.rtx_timer;
903 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
904 ilm->ilm_state = IGMP_IREPORTEDLAST;
907 * We are holding ill_mcast_lock here and the timeout
908 * handler (mld_timeout_handler_per_ill) acquires that
909 * lock. Hence we can't call mld_start_timers since it could
910 * deadlock in untimeout().
911 * Instead the thread which drops ill_mcast_lock will have
912 * to call ill_mcast_timer_start().
914 mutex_enter(&ipst->ips_mld_timer_lock);
915 ipst->ips_mld_deferred_next = MIN(timer,
916 ipst->ips_mld_deferred_next);
917 mutex_exit(&ipst->ips_mld_timer_lock);
920 if (ip_debug > 1) {
921 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
922 "mld_joingroup: multicast_type %d timer %d",
923 (ilm->ilm_ill->ill_mcast_type),
924 (int)ntohl(timer));
929 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
930 * and it gets sent after the lock is dropped.
932 void
933 igmp_leavegroup(ilm_t *ilm)
935 ill_t *ill = ilm->ilm_ill;
937 ASSERT(!ill->ill_isv6);
939 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
940 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
941 ill->ill_mcast_type == IGMP_V2_ROUTER &&
942 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
943 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
944 (htonl(INADDR_ALLRTRS_GROUP)));
945 return;
947 if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
948 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
949 mrec_t *rp;
951 * The possible state changes we need to handle here:
952 * Old State New State Report
954 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
955 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
957 * No need to send the ALLOW(0) report; BLOCK(X) is enough
959 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
960 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
961 ilm->ilm_filter, NULL);
962 } else {
963 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
964 NULL, NULL);
966 igmpv3_sendrpt(ill, rp);
967 return;
972 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
973 * and it gets sent after the lock is dropped.
975 void
976 mld_leavegroup(ilm_t *ilm)
978 ill_t *ill = ilm->ilm_ill;
980 ASSERT(ill->ill_isv6);
982 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
983 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
984 ill->ill_mcast_type == MLD_V1_ROUTER &&
985 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
986 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
987 return;
989 if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
990 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
991 mrec_t *rp;
993 * The possible state changes we need to handle here:
994 * Old State New State Report
996 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
997 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
999 * No need to send the ALLOW(0) report; BLOCK(X) is enough
1001 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1002 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1003 ilm->ilm_filter, NULL);
1004 } else {
1005 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
1006 NULL, NULL);
1008 mldv2_sendrpt(ill, rp);
1009 return;
1014 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1015 * and it gets sent after the lock is dropped.
1017 void
1018 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1020 ill_t *ill;
1021 mrec_t *rp;
1022 ip_stack_t *ipst = ilm->ilm_ipst;
1024 ASSERT(ilm != NULL);
1026 /* state change reports should only be sent if the router is v3 */
1027 if (ilm->ilm_ill->ill_mcast_type != IGMP_V3_ROUTER)
1028 return;
1030 ill = ilm->ilm_ill;
1031 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1034 * Compare existing(old) state with the new state and prepare
1035 * State Change Report, according to the rules in RFC 3376:
1037 * Old State New State State Change Report
1039 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1040 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1041 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1042 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1045 if (ilm->ilm_fmode == fmode) {
1046 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1047 slist_t *allow, *block;
1048 if (((a_minus_b = l_alloc()) == NULL) ||
1049 ((b_minus_a = l_alloc()) == NULL)) {
1050 l_free(a_minus_b);
1051 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1052 goto send_to_ex;
1053 else
1054 goto send_to_in;
1056 l_difference(ilm->ilm_filter, flist, a_minus_b);
1057 l_difference(flist, ilm->ilm_filter, b_minus_a);
1058 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1059 allow = b_minus_a;
1060 block = a_minus_b;
1061 } else {
1062 allow = a_minus_b;
1063 block = b_minus_a;
1065 rp = NULL;
1066 if (!SLIST_IS_EMPTY(allow))
1067 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1068 allow, rp);
1069 if (!SLIST_IS_EMPTY(block))
1070 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1071 block, rp);
1072 l_free(a_minus_b);
1073 l_free(b_minus_a);
1074 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1075 send_to_ex:
1076 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1077 NULL);
1078 } else {
1079 send_to_in:
1080 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1081 NULL);
1085 * Need to set up retransmission state; merge the new info with the
1086 * current state (which may be null). If the timer is not currently
1087 * running, the caller will start it when dropping ill_mcast_lock.
1089 rp = mcast_merge_rtx(ilm, rp, flist);
1090 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1091 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1092 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1093 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1094 mutex_enter(&ipst->ips_igmp_timer_lock);
1095 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
1096 ilm->ilm_rtx.rtx_timer);
1097 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1098 mutex_exit(&ipst->ips_igmp_timer_lock);
1101 igmpv3_sendrpt(ill, rp);
1105 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1106 * and it gets sent after the lock is dropped.
1108 void
1109 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1111 ill_t *ill;
1112 mrec_t *rp = NULL;
1113 ip_stack_t *ipst = ilm->ilm_ipst;
1115 ASSERT(ilm != NULL);
1117 ill = ilm->ilm_ill;
1118 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1120 /* only need to send if we have an mldv2-capable router */
1121 if (ill->ill_mcast_type != MLD_V2_ROUTER) {
1122 return;
1126 * Compare existing (old) state with the new state passed in
1127 * and send appropriate MLDv2 State Change Report.
1129 * Old State New State State Change Report
1131 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1132 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1133 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1134 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1136 if (ilm->ilm_fmode == fmode) {
1137 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1138 slist_t *allow, *block;
1139 if (((a_minus_b = l_alloc()) == NULL) ||
1140 ((b_minus_a = l_alloc()) == NULL)) {
1141 l_free(a_minus_b);
1142 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1143 goto send_to_ex;
1144 else
1145 goto send_to_in;
1147 l_difference(ilm->ilm_filter, flist, a_minus_b);
1148 l_difference(flist, ilm->ilm_filter, b_minus_a);
1149 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1150 allow = b_minus_a;
1151 block = a_minus_b;
1152 } else {
1153 allow = a_minus_b;
1154 block = b_minus_a;
1156 if (!SLIST_IS_EMPTY(allow))
1157 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1158 allow, rp);
1159 if (!SLIST_IS_EMPTY(block))
1160 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1161 block, rp);
1162 l_free(a_minus_b);
1163 l_free(b_minus_a);
1164 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1165 send_to_ex:
1166 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1167 NULL);
1168 } else {
1169 send_to_in:
1170 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1171 NULL);
1175 * Need to set up retransmission state; merge the new info with the
1176 * current state (which may be null). If the timer is not currently
1177 * running, the caller will start it when dropping ill_mcast_lock.
1179 rp = mcast_merge_rtx(ilm, rp, flist);
1180 ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
1181 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1182 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1183 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1184 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1185 mutex_enter(&ipst->ips_mld_timer_lock);
1186 ipst->ips_mld_deferred_next =
1187 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
1188 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1189 mutex_exit(&ipst->ips_mld_timer_lock);
1192 mldv2_sendrpt(ill, rp);
1195 uint_t
1196 igmp_timeout_handler_per_ill(ill_t *ill)
1198 uint_t next = INFINITY, current;
1199 ilm_t *ilm;
1200 mrec_t *rp = NULL;
1201 mrec_t *rtxrp = NULL;
1202 rtx_state_t *rtxp;
1203 mcast_record_t rtype;
1205 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1207 current = CURRENT_MSTIME;
1208 /* First check the global timer on this interface */
1209 if (ill->ill_global_timer == INFINITY)
1210 goto per_ilm_timer;
1211 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1212 ill->ill_global_timer = INFINITY;
1214 * Send report for each group on this interface.
1215 * Since we just set the global timer (received a v3 general
1216 * query), need to skip the all hosts addr (224.0.0.1), per
1217 * RFC 3376 section 5.
1219 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1220 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
1221 continue;
1222 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1223 ilm->ilm_filter, rp);
1225 * Since we're sending a report on this group, okay
1226 * to delete pending group-specific timers. Note
1227 * that group-specific retransmit timers still need
1228 * to be checked in the per_ilm_timer for-loop.
1230 ilm->ilm_timer = INFINITY;
1231 ilm->ilm_state = IGMP_IREPORTEDLAST;
1232 FREE_SLIST(ilm->ilm_pendsrcs);
1233 ilm->ilm_pendsrcs = NULL;
1235 igmpv3_sendrpt(ill, rp);
1236 rp = NULL;
1237 } else {
1238 if ((ill->ill_global_timer - current) < next)
1239 next = ill->ill_global_timer - current;
1242 per_ilm_timer:
1243 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1244 if (ilm->ilm_timer == INFINITY)
1245 goto per_ilm_rtxtimer;
1247 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1248 if ((ilm->ilm_timer - current) < next)
1249 next = ilm->ilm_timer - current;
1251 if (ip_debug > 1) {
1252 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1253 "igmp_timo_hlr 2: ilm_timr %d "
1254 "typ %d nxt %d",
1255 (int)ntohl(ilm->ilm_timer - current),
1256 (ill->ill_mcast_type), next);
1259 goto per_ilm_rtxtimer;
1262 /* the timer has expired, need to take action */
1263 ilm->ilm_timer = INFINITY;
1264 ilm->ilm_state = IGMP_IREPORTEDLAST;
1265 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1266 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1267 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1268 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1269 } else {
1270 slist_t *rsp;
1271 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1272 (rsp = l_alloc()) != NULL) {
1274 * Contents of reply depend on pending
1275 * requested source list.
1277 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1278 l_intersection(ilm->ilm_filter,
1279 ilm->ilm_pendsrcs, rsp);
1280 } else {
1281 l_difference(ilm->ilm_pendsrcs,
1282 ilm->ilm_filter, rsp);
1284 FREE_SLIST(ilm->ilm_pendsrcs);
1285 ilm->ilm_pendsrcs = NULL;
1286 if (!SLIST_IS_EMPTY(rsp))
1287 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1288 &ilm->ilm_v6addr, rsp, rp);
1289 FREE_SLIST(rsp);
1290 } else {
1292 * Either the pending request is just group-
1293 * specific, or we couldn't get the resources
1294 * (rsp) to build a source-specific reply.
1296 rp = mcast_bldmrec(ilm->ilm_fmode,
1297 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1299 igmpv3_sendrpt(ill, rp);
1300 rp = NULL;
1303 per_ilm_rtxtimer:
1304 rtxp = &ilm->ilm_rtx;
1306 if (rtxp->rtx_timer == INFINITY)
1307 continue;
1308 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1309 if ((rtxp->rtx_timer - current) < next)
1310 next = rtxp->rtx_timer - current;
1311 continue;
1314 rtxp->rtx_timer = INFINITY;
1315 ilm->ilm_state = IGMP_IREPORTEDLAST;
1316 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1317 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1318 continue;
1320 if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1321 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1322 continue;
1326 * The retransmit timer has popped, and our router is
1327 * IGMPv3. We have to delve into the retransmit state
1328 * stored in the ilm.
1330 * Decrement the retransmit count. If the fmode rtx
1331 * count is active, decrement it, and send a filter
1332 * mode change report with the ilm's source list.
1333 * Otherwise, send a source list change report with
1334 * the current retransmit lists.
1336 ASSERT(rtxp->rtx_cnt > 0);
1337 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1338 rtxp->rtx_cnt--;
1339 if (rtxp->rtx_fmode_cnt > 0) {
1340 rtxp->rtx_fmode_cnt--;
1341 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1342 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1343 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1344 ilm->ilm_filter, rtxrp);
1345 } else {
1346 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1347 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1348 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1349 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1351 if (rtxp->rtx_cnt > 0) {
1352 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1353 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1354 if (rtxp->rtx_timer < next)
1355 next = rtxp->rtx_timer;
1356 rtxp->rtx_timer += current;
1357 } else {
1358 ASSERT(rtxp->rtx_timer == INFINITY);
1359 CLEAR_SLIST(rtxp->rtx_allow);
1360 CLEAR_SLIST(rtxp->rtx_block);
1362 igmpv3_sendrpt(ill, rtxrp);
1363 rtxrp = NULL;
1366 rw_exit(&ill->ill_mcast_lock);
1367 /* Send any deferred/queued IP packets */
1368 ill_mcast_send_queued(ill);
1369 /* Defer ill_mcast_timer_start() until the caller is done */
1371 return (next);
1375 * igmp_timeout_handler:
1376 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1377 * Returns number of ticks to next event (or 0 if none).
1379 * As part of multicast join and leave igmp we may need to send out an
1380 * igmp request. The igmp related state variables in the ilm are protected
1381 * by ill_mcast_lock. A single global igmp timer is used to track igmp timeouts.
1382 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1383 * starts the igmp timer if needed. It serializes multiple threads trying to
1384 * simultaneously start the timer using the igmp_timer_setter_active flag.
1386 * igmp_input() receives igmp queries and responds to the queries
1387 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1388 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler()
1389 * performs the action exclusively after acquiring ill_mcast_lock.
1391 * The igmp_slowtimeo() function is called thru another timer.
1392 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1394 void
1395 igmp_timeout_handler(void *arg)
1397 ill_t *ill;
1398 uint_t global_next = INFINITY;
1399 uint_t next;
1400 ill_walk_context_t ctx;
1401 ip_stack_t *ipst = arg;
1403 ASSERT(arg != NULL);
1404 mutex_enter(&ipst->ips_igmp_timer_lock);
1405 ASSERT(ipst->ips_igmp_timeout_id != 0);
1406 ipst->ips_igmp_timeout_id = 0;
1407 ipst->ips_igmp_timer_scheduled_last = 0;
1408 ipst->ips_igmp_time_to_next = 0;
1409 mutex_exit(&ipst->ips_igmp_timer_lock);
1411 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1412 ill = ILL_START_WALK_V4(&ctx, ipst);
1413 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1414 ASSERT(!ill->ill_isv6);
1415 /* Make sure the ill isn't going away. */
1416 if (!ill_check_and_refhold(ill))
1417 continue;
1418 rw_exit(&ipst->ips_ill_g_lock);
1419 next = igmp_timeout_handler_per_ill(ill);
1420 if (next < global_next)
1421 global_next = next;
1422 ill_refrele(ill);
1423 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1425 rw_exit(&ipst->ips_ill_g_lock);
1426 if (global_next != INFINITY)
1427 igmp_start_timers(global_next, ipst);
1431 * mld_timeout_handler:
1432 * Called when there are timeout events, every next (tick).
1433 * Returns number of ticks to next event (or 0 if none).
1435 uint_t
1436 mld_timeout_handler_per_ill(ill_t *ill)
1438 ilm_t *ilm;
1439 uint_t next = INFINITY, current;
1440 mrec_t *rp, *rtxrp;
1441 rtx_state_t *rtxp;
1442 mcast_record_t rtype;
1444 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1446 current = CURRENT_MSTIME;
1448 * First check the global timer on this interface; the global timer
1449 * is not used for MLDv1, so if it's set we can assume we're v2.
1451 if (ill->ill_global_timer == INFINITY)
1452 goto per_ilm_timer;
1453 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1454 ill->ill_global_timer = INFINITY;
1456 * Send report for each group on this interface.
1457 * Since we just set the global timer (received a v2 general
1458 * query), need to skip the all hosts addr (ff02::1), per
1459 * RFC 3810 section 6.
1461 rp = NULL;
1462 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1463 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
1464 &ipv6_all_hosts_mcast))
1465 continue;
1466 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1467 ilm->ilm_filter, rp);
1469 * Since we're sending a report on this group, okay
1470 * to delete pending group-specific timers. Note
1471 * that group-specific retransmit timers still need
1472 * to be checked in the per_ilm_timer for-loop.
1474 ilm->ilm_timer = INFINITY;
1475 ilm->ilm_state = IGMP_IREPORTEDLAST;
1476 FREE_SLIST(ilm->ilm_pendsrcs);
1477 ilm->ilm_pendsrcs = NULL;
1479 mldv2_sendrpt(ill, rp);
1480 } else {
1481 if ((ill->ill_global_timer - current) < next)
1482 next = ill->ill_global_timer - current;
1485 per_ilm_timer:
1486 rp = rtxrp = NULL;
1487 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1488 if (ilm->ilm_timer == INFINITY)
1489 goto per_ilm_rtxtimer;
1491 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1492 if ((ilm->ilm_timer - current) < next)
1493 next = ilm->ilm_timer - current;
1495 if (ip_debug > 1) {
1496 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1497 "igmp_timo_hlr 2: ilm_timr"
1498 " %d typ %d nxt %d",
1499 (int)ntohl(ilm->ilm_timer - current),
1500 (ill->ill_mcast_type), next);
1503 goto per_ilm_rtxtimer;
1506 /* the timer has expired, need to take action */
1507 ilm->ilm_timer = INFINITY;
1508 ilm->ilm_state = IGMP_IREPORTEDLAST;
1509 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1510 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1511 } else {
1512 slist_t *rsp;
1513 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1514 (rsp = l_alloc()) != NULL) {
1516 * Contents of reply depend on pending
1517 * requested source list.
1519 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1520 l_intersection(ilm->ilm_filter,
1521 ilm->ilm_pendsrcs, rsp);
1522 } else {
1523 l_difference(ilm->ilm_pendsrcs,
1524 ilm->ilm_filter, rsp);
1526 FREE_SLIST(ilm->ilm_pendsrcs);
1527 ilm->ilm_pendsrcs = NULL;
1528 if (!SLIST_IS_EMPTY(rsp))
1529 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1530 &ilm->ilm_v6addr, rsp, rp);
1531 FREE_SLIST(rsp);
1532 } else {
1533 rp = mcast_bldmrec(ilm->ilm_fmode,
1534 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1538 per_ilm_rtxtimer:
1539 rtxp = &ilm->ilm_rtx;
1541 if (rtxp->rtx_timer == INFINITY)
1542 continue;
1543 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1544 if ((rtxp->rtx_timer - current) < next)
1545 next = rtxp->rtx_timer - current;
1546 continue;
1549 rtxp->rtx_timer = INFINITY;
1550 ilm->ilm_state = IGMP_IREPORTEDLAST;
1551 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1552 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1553 continue;
1557 * The retransmit timer has popped, and our router is
1558 * MLDv2. We have to delve into the retransmit state
1559 * stored in the ilm.
1561 * Decrement the retransmit count. If the fmode rtx
1562 * count is active, decrement it, and send a filter
1563 * mode change report with the ilm's source list.
1564 * Otherwise, send a source list change report with
1565 * the current retransmit lists.
1567 ASSERT(rtxp->rtx_cnt > 0);
1568 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1569 rtxp->rtx_cnt--;
1570 if (rtxp->rtx_fmode_cnt > 0) {
1571 rtxp->rtx_fmode_cnt--;
1572 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1573 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1574 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1575 ilm->ilm_filter, rtxrp);
1576 } else {
1577 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1578 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1579 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1580 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1582 if (rtxp->rtx_cnt > 0) {
1583 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1584 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1585 if (rtxp->rtx_timer < next)
1586 next = rtxp->rtx_timer;
1587 rtxp->rtx_timer += current;
1588 } else {
1589 ASSERT(rtxp->rtx_timer == INFINITY);
1590 CLEAR_SLIST(rtxp->rtx_allow);
1591 CLEAR_SLIST(rtxp->rtx_block);
1595 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
1596 mldv2_sendrpt(ill, rp);
1597 mldv2_sendrpt(ill, rtxrp);
1599 rw_exit(&ill->ill_mcast_lock);
1600 /* Send any deferred/queued IP packets */
1601 ill_mcast_send_queued(ill);
1602 /* Defer ill_mcast_timer_start() until the caller is done */
1604 return (next);
1608 * mld_timeout_handler:
1609 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1610 * Returns number of ticks to next event (or 0 if none).
1611 * MT issues are same as igmp_timeout_handler
1613 void
1614 mld_timeout_handler(void *arg)
1616 ill_t *ill;
1617 uint_t global_next = INFINITY;
1618 uint_t next;
1619 ill_walk_context_t ctx;
1620 ip_stack_t *ipst = arg;
1622 ASSERT(arg != NULL);
1623 mutex_enter(&ipst->ips_mld_timer_lock);
1624 ASSERT(ipst->ips_mld_timeout_id != 0);
1625 ipst->ips_mld_timeout_id = 0;
1626 ipst->ips_mld_timer_scheduled_last = 0;
1627 ipst->ips_mld_time_to_next = 0;
1628 mutex_exit(&ipst->ips_mld_timer_lock);
1630 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1631 ill = ILL_START_WALK_V6(&ctx, ipst);
1632 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1633 ASSERT(ill->ill_isv6);
1634 /* Make sure the ill isn't going away. */
1635 if (!ill_check_and_refhold(ill))
1636 continue;
1637 rw_exit(&ipst->ips_ill_g_lock);
1638 next = mld_timeout_handler_per_ill(ill);
1639 if (next < global_next)
1640 global_next = next;
1641 ill_refrele(ill);
1642 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1644 rw_exit(&ipst->ips_ill_g_lock);
1645 if (global_next != INFINITY)
1646 mld_start_timers(global_next, ipst);
1650 * Calculate the Older Version Querier Present timeout value, in number
1651 * of slowtimo intervals, for the given ill.
1653 #define OVQP(ill) \
1654 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1655 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1658 * igmp_slowtimo:
1659 * - Resets to new router if we didnt we hear from the router
1660 * in IGMP_AGE_THRESHOLD seconds.
1661 * - Resets slowtimeout.
1662 * Check for ips_igmp_max_version ensures that we don't revert to a higher
1663 * IGMP version than configured.
1665 void
1666 igmp_slowtimo(void *arg)
1668 ill_t *ill;
1669 ill_if_t *ifp;
1670 avl_tree_t *avl_tree;
1671 ip_stack_t *ipst = (ip_stack_t *)arg;
1673 ASSERT(arg != NULL);
1676 * The ill_if_t list is circular, hence the odd loop parameters.
1678 * We can't use the ILL_START_WALK and ill_next() wrappers for this
1679 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1680 * structure (allowing us to skip if none of the instances have timers
1681 * running).
1683 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1684 for (ifp = IP_V4_ILL_G_LIST(ipst);
1685 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
1686 ifp = ifp->illif_next) {
1688 * illif_mcast_v[12] are set using atomics. If an ill hears
1689 * a V1 or V2 query now and we miss seeing the count now,
1690 * we will see it the next time igmp_slowtimo is called.
1692 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
1693 continue;
1695 avl_tree = &ifp->illif_avl_by_ppa;
1696 for (ill = avl_first(avl_tree); ill != NULL;
1697 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1698 /* Make sure the ill isn't going away. */
1699 if (!ill_check_and_refhold(ill))
1700 continue;
1701 rw_exit(&ipst->ips_ill_g_lock);
1702 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1703 if (ill->ill_mcast_v1_tset == 1)
1704 ill->ill_mcast_v1_time++;
1705 if (ill->ill_mcast_v2_tset == 1)
1706 ill->ill_mcast_v2_time++;
1707 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
1708 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
1709 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1710 if ((ill->ill_mcast_v2_tset > 0) ||
1711 (ipst->ips_igmp_max_version ==
1712 IGMP_V2_ROUTER)) {
1713 ip1dbg(("V1 query timer "
1714 "expired on %s; switching "
1715 "mode to IGMP_V2\n",
1716 ill->ill_name));
1717 ill->ill_mcast_type =
1718 IGMP_V2_ROUTER;
1719 } else {
1720 ip1dbg(("V1 query timer "
1721 "expired on %s; switching "
1722 "mode to IGMP_V3\n",
1723 ill->ill_name));
1724 ill->ill_mcast_type =
1725 IGMP_V3_ROUTER;
1727 ill->ill_mcast_v1_time = 0;
1728 ill->ill_mcast_v1_tset = 0;
1729 atomic_dec_16(&ifp->illif_mcast_v1);
1731 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
1732 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
1733 (ill->ill_mcast_v2_time >= OVQP(ill))) {
1734 ip1dbg(("V2 query timer expired on "
1735 "%s; switching mode to IGMP_V3\n",
1736 ill->ill_name));
1737 ill->ill_mcast_type = IGMP_V3_ROUTER;
1738 ill->ill_mcast_v2_time = 0;
1739 ill->ill_mcast_v2_tset = 0;
1740 atomic_dec_16(&ifp->illif_mcast_v2);
1742 rw_exit(&ill->ill_mcast_lock);
1743 ill_refrele(ill);
1744 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1747 rw_exit(&ipst->ips_ill_g_lock);
1748 ill_mcast_timer_start(ipst);
1749 mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
1750 if (ipst->ips_igmp_slowtimeout_quiesce != B_TRUE) {
1751 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo,
1752 (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1753 } else {
1754 ipst->ips_igmp_slowtimeout_id = 0;
1756 mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
1760 * mld_slowtimo:
1761 * - Resets to newer version if we didn't hear from the older version router
1762 * in MLD_AGE_THRESHOLD seconds.
1763 * - Restarts slowtimeout.
1764 * Check for ips_mld_max_version ensures that we don't revert to a higher
1765 * IGMP version than configured.
1767 void
1768 mld_slowtimo(void *arg)
1770 ill_t *ill;
1771 ill_if_t *ifp;
1772 avl_tree_t *avl_tree;
1773 ip_stack_t *ipst = (ip_stack_t *)arg;
1775 ASSERT(arg != NULL);
1776 /* See comments in igmp_slowtimo() above... */
1777 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1778 for (ifp = IP_V6_ILL_G_LIST(ipst);
1779 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
1780 ifp = ifp->illif_next) {
1781 if (ifp->illif_mcast_v1 == 0)
1782 continue;
1784 avl_tree = &ifp->illif_avl_by_ppa;
1785 for (ill = avl_first(avl_tree); ill != NULL;
1786 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1787 /* Make sure the ill isn't going away. */
1788 if (!ill_check_and_refhold(ill))
1789 continue;
1790 rw_exit(&ipst->ips_ill_g_lock);
1791 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1792 if (ill->ill_mcast_v1_tset == 1)
1793 ill->ill_mcast_v1_time++;
1794 if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
1795 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
1796 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1797 ip1dbg(("MLD query timer expired on"
1798 " %s; switching mode to MLD_V2\n",
1799 ill->ill_name));
1800 ill->ill_mcast_type = MLD_V2_ROUTER;
1801 ill->ill_mcast_v1_time = 0;
1802 ill->ill_mcast_v1_tset = 0;
1803 atomic_dec_16(&ifp->illif_mcast_v1);
1805 rw_exit(&ill->ill_mcast_lock);
1806 ill_refrele(ill);
1807 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1810 rw_exit(&ipst->ips_ill_g_lock);
1811 ill_mcast_timer_start(ipst);
1812 mutex_enter(&ipst->ips_mld_slowtimeout_lock);
1813 if (ipst->ips_mld_slowtimeout_quiesce != B_TRUE) {
1814 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo,
1815 (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1816 } else {
1817 ipst->ips_mld_slowtimeout_id = 0;
1819 mutex_exit(&ipst->ips_mld_slowtimeout_lock);
1823 * igmp_sendpkt:
1824 * This will send to ip_output_simple just like icmp_inbound.
1826 static void
1827 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
1829 mblk_t *mp;
1830 igmpa_t *igmpa;
1831 uint8_t *rtralert;
1832 ipha_t *ipha;
1833 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
1834 size_t size = hdrlen + sizeof (igmpa_t);
1835 ill_t *ill = ilm->ilm_ill;
1836 ip_stack_t *ipst = ill->ill_ipst;
1838 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1840 mp = allocb(size, BPRI_HI);
1841 if (mp == NULL) {
1842 return;
1844 mp->b_wptr = mp->b_rptr + size;
1846 ipha = (ipha_t *)mp->b_rptr;
1847 rtralert = (uint8_t *)&(ipha[1]);
1848 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
1849 igmpa->igmpa_type = type;
1850 igmpa->igmpa_code = 0;
1851 igmpa->igmpa_group = ilm->ilm_addr;
1852 igmpa->igmpa_cksum = 0;
1853 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0);
1855 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1856 rtralert[1] = RTRALERT_LEN;
1857 rtralert[2] = 0;
1858 rtralert[3] = 0;
1860 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
1861 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
1862 ipha->ipha_type_of_service = 0;
1863 ipha->ipha_length = htons(size);
1864 ipha->ipha_ident = 0;
1865 ipha->ipha_fragment_offset_and_flags = 0;
1866 ipha->ipha_ttl = IGMP_TTL;
1867 ipha->ipha_protocol = IPPROTO_IGMP;
1868 ipha->ipha_hdr_checksum = 0;
1869 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
1870 ipha->ipha_src = INADDR_ANY;
1872 ill_mcast_queue(ill, mp);
1874 ++ipst->ips_igmpstat.igps_snd_reports;
1878 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill.
1879 * The report will contain one group record
1880 * for each element of reclist. If this causes packet length to
1881 * exceed ill->ill_mc_mtu, multiple reports are sent.
1882 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1883 * and those buffers are freed here.
1885 static void
1886 igmpv3_sendrpt(ill_t *ill, mrec_t *reclist)
1888 igmp3ra_t *igmp3ra;
1889 grphdra_t *grphdr;
1890 mblk_t *mp;
1891 ipha_t *ipha;
1892 uint8_t *rtralert;
1893 ipaddr_t *src_array;
1894 int i, j, numrec, more_src_cnt;
1895 size_t hdrsize, size, rsize;
1896 mrec_t *rp, *cur_reclist;
1897 mrec_t *next_reclist = reclist;
1898 boolean_t morepkts;
1899 ip_stack_t *ipst = ill->ill_ipst;
1901 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1903 /* if there aren't any records, there's nothing to send */
1904 if (reclist == NULL)
1905 return;
1907 hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
1908 nextpkt:
1909 size = hdrsize + sizeof (igmp3ra_t);
1910 morepkts = B_FALSE;
1911 more_src_cnt = 0;
1912 cur_reclist = next_reclist;
1913 numrec = 0;
1914 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
1915 rsize = sizeof (grphdra_t) +
1916 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
1917 if (size + rsize > ill->ill_mc_mtu) {
1918 if (rp == cur_reclist) {
1920 * If the first mrec we looked at is too big
1921 * to fit in a single packet (i.e the source
1922 * list is too big), we must either truncate
1923 * the list (if TO_EX or IS_EX), or send
1924 * multiple reports for the same group (all
1925 * other types).
1927 int srcspace, srcsperpkt;
1928 srcspace = ill->ill_mc_mtu - (size +
1929 sizeof (grphdra_t));
1932 * Skip if there's not even enough room in
1933 * a single packet to send something useful.
1935 if (srcspace <= sizeof (ipaddr_t))
1936 continue;
1938 srcsperpkt = srcspace / sizeof (ipaddr_t);
1940 * Increment size and numrec, because we will
1941 * be sending a record for the mrec we're
1942 * looking at now.
1944 size += sizeof (grphdra_t) +
1945 (srcsperpkt * sizeof (ipaddr_t));
1946 numrec++;
1947 if (rp->mrec_type == MODE_IS_EXCLUDE ||
1948 rp->mrec_type == CHANGE_TO_EXCLUDE) {
1949 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1950 if (rp->mrec_next == NULL) {
1951 /* no more packets to send */
1952 break;
1953 } else {
1955 * more packets, but we're
1956 * done with this mrec.
1958 next_reclist = rp->mrec_next;
1960 } else {
1961 more_src_cnt = rp->mrec_srcs.sl_numsrc
1962 - srcsperpkt;
1963 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1965 * We'll fix up this mrec (remove the
1966 * srcs we've already sent) before
1967 * returning to nextpkt above.
1969 next_reclist = rp;
1971 } else {
1972 next_reclist = rp;
1974 morepkts = B_TRUE;
1975 break;
1977 size += rsize;
1978 numrec++;
1981 mp = allocb(size, BPRI_HI);
1982 if (mp == NULL) {
1983 goto free_reclist;
1985 bzero((char *)mp->b_rptr, size);
1986 mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
1988 ipha = (ipha_t *)mp->b_rptr;
1989 rtralert = (uint8_t *)&(ipha[1]);
1990 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
1991 grphdr = (grphdra_t *)&(igmp3ra[1]);
1993 rp = cur_reclist;
1994 for (i = 0; i < numrec; i++) {
1995 grphdr->grphdra_type = rp->mrec_type;
1996 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
1997 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
1998 src_array = (ipaddr_t *)&(grphdr[1]);
2000 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
2001 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
2003 grphdr = (grphdra_t *)&(src_array[j]);
2004 rp = rp->mrec_next;
2007 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
2008 igmp3ra->igmp3ra_numrec = htons(numrec);
2009 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
2011 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
2012 rtralert[1] = RTRALERT_LEN;
2013 rtralert[2] = 0;
2014 rtralert[3] = 0;
2016 ipha->ipha_version_and_hdr_length = IP_VERSION << 4
2017 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
2018 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
2019 ipha->ipha_length = htons(size);
2020 ipha->ipha_ttl = IGMP_TTL;
2021 ipha->ipha_protocol = IPPROTO_IGMP;
2022 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
2023 ipha->ipha_src = INADDR_ANY;
2025 ill_mcast_queue(ill, mp);
2027 ++ipst->ips_igmpstat.igps_snd_reports;
2029 if (morepkts) {
2030 if (more_src_cnt > 0) {
2031 int index, mvsize;
2032 slist_t *sl = &next_reclist->mrec_srcs;
2033 index = sl->sl_numsrc;
2034 mvsize = more_src_cnt * sizeof (in6_addr_t);
2035 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2036 mvsize);
2037 sl->sl_numsrc = more_src_cnt;
2039 goto nextpkt;
2042 free_reclist:
2043 while (reclist != NULL) {
2044 rp = reclist->mrec_next;
2045 mi_free(reclist);
2046 reclist = rp;
2051 * mld_input:
2052 * Return NULL for a bad packet that is discarded here.
2053 * Return mp if the message is OK and should be handed to "raw" receivers.
2054 * Callers of mld_input() may need to reinitialize variables that were copied
2055 * from the mblk as this calls pullupmsg().
2057 mblk_t *
2058 mld_input(mblk_t *mp, ip_recv_attr_t *ira)
2060 ip6_t *ip6h = (ip6_t *)(mp->b_rptr);
2061 mld_hdr_t *mldh;
2062 ilm_t *ilm;
2063 ipif_t *ipif;
2064 uint16_t hdr_length, exthdr_length;
2065 in6_addr_t *v6group_ptr;
2066 uint_t next;
2067 int mldlen;
2068 ill_t *ill = ira->ira_ill;
2069 ip_stack_t *ipst = ill->ill_ipst;
2071 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
2073 /* Make sure the src address of the packet is link-local */
2074 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
2075 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2076 freemsg(mp);
2077 return (NULL);
2080 if (ip6h->ip6_hlim != 1) {
2081 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
2082 freemsg(mp);
2083 return (NULL);
2086 /* Get to the icmp header part */
2087 hdr_length = ira->ira_ip_hdr_length;
2088 exthdr_length = hdr_length - IPV6_HDR_LEN;
2090 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
2092 /* An MLD packet must at least be 24 octets to be valid */
2093 if (mldlen < MLD_MINLEN) {
2094 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2095 freemsg(mp);
2096 return (NULL);
2099 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
2101 switch (mldh->mld_type) {
2102 case MLD_LISTENER_QUERY:
2104 * packet length differentiates between v1 and v2. v1
2105 * query should be exactly 24 octets long; v2 is >= 28.
2107 if ((mldlen == MLD_MINLEN) ||
2108 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
2109 next = mld_query_in(mldh, ill);
2110 } else if (mldlen >= MLD_V2_QUERY_MINLEN) {
2111 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
2112 } else {
2113 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2114 freemsg(mp);
2115 return (NULL);
2117 if (next == 0) {
2118 return (mp);
2121 if (next != INFINITY)
2122 mld_start_timers(next, ipst);
2123 break;
2125 case MLD_LISTENER_REPORT:
2127 * For fast leave to work, we have to know that we are the
2128 * last person to send a report for this group. Reports
2129 * generated by us are looped back since we could potentially
2130 * be a multicast router, so discard reports sourced by me.
2132 mutex_enter(&ill->ill_lock);
2133 for (ipif = ill->ill_ipif; ipif != NULL;
2134 ipif = ipif->ipif_next) {
2135 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
2136 &ip6h->ip6_src)) {
2137 if (ip_debug > 1) {
2138 char buf1[INET6_ADDRSTRLEN];
2140 (void) mi_strlog(ill->ill_rq,
2142 SL_TRACE,
2143 "mld_input: we are only "
2144 "member src %s\n",
2145 inet_ntop(AF_INET6, &ip6h->ip6_src,
2146 buf1, sizeof (buf1)));
2148 mutex_exit(&ill->ill_lock);
2149 return (mp);
2152 mutex_exit(&ill->ill_lock);
2153 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
2155 v6group_ptr = &mldh->mld_addr;
2156 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
2157 BUMP_MIB(ill->ill_icmp6_mib,
2158 ipv6IfIcmpInGroupMembBadReports);
2159 freemsg(mp);
2160 return (NULL);
2165 * If we belong to the group being reported, and we are a
2166 * 'Delaying member' per the RFC terminology, stop our timer
2167 * for that group and 'clear flag' i.e. mark ilm_state as
2168 * IGMP_OTHERMEMBER. With zones, there can be multiple group
2169 * membership entries for the same group address (one per zone)
2170 * so we need to walk the ill_ilm list.
2172 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2173 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2174 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
2175 continue;
2176 BUMP_MIB(ill->ill_icmp6_mib,
2177 ipv6IfIcmpInGroupMembOurReports);
2179 ilm->ilm_timer = INFINITY;
2180 ilm->ilm_state = IGMP_OTHERMEMBER;
2182 rw_exit(&ill->ill_mcast_lock);
2184 * No packets have been sent above - no
2185 * ill_mcast_send_queued is needed.
2187 ill_mcast_timer_start(ill->ill_ipst);
2188 break;
2190 case MLD_LISTENER_REDUCTION:
2191 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
2192 break;
2194 return (mp);
2198 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate
2199 * (non-zero, unsigned) timer value to be set on success.
2201 static uint_t
2202 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
2204 ilm_t *ilm;
2205 int timer;
2206 uint_t next, current;
2207 in6_addr_t *v6group;
2209 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2212 * In the MLD specification, there are 3 states and a flag.
2214 * In Non-Listener state, we simply don't have a membership record.
2215 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2216 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2217 * INFINITY)
2219 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2220 * we have heard a report from another member, or IGMP_IREPORTEDLAST
2221 * if I sent the last report.
2223 v6group = &mldh->mld_addr;
2224 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
2225 ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
2226 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
2227 return (0);
2230 /* Need to do compatibility mode checking */
2231 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2232 ill->ill_mcast_v1_time = 0;
2233 ill->ill_mcast_v1_tset = 1;
2234 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
2235 ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2236 "MLD_V1_ROUTER\n", ill->ill_name));
2237 atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
2238 ill->ill_mcast_type = MLD_V1_ROUTER;
2241 timer = (int)ntohs(mldh->mld_maxdelay);
2242 if (ip_debug > 1) {
2243 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
2244 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2245 timer, (int)mldh->mld_type);
2249 * -Start the timers in all of our membership records for
2250 * the physical interface on which the query arrived,
2251 * excl:
2252 * 1. those that belong to the "all hosts" group,
2253 * 2. those with 0 scope, or 1 node-local scope.
2255 * -Restart any timer that is already running but has a value
2256 * longer that the requested timeout.
2257 * -Use the value specified in the query message as the
2258 * maximum timeout.
2260 next = INFINITY;
2262 current = CURRENT_MSTIME;
2263 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2264 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
2266 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2267 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2268 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
2269 continue;
2270 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
2271 &ipv6_all_hosts_mcast)) &&
2272 (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
2273 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
2274 if (timer == 0) {
2275 /* Respond immediately */
2276 ilm->ilm_timer = INFINITY;
2277 ilm->ilm_state = IGMP_IREPORTEDLAST;
2278 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
2279 break;
2281 if (ilm->ilm_timer > timer) {
2282 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
2283 if (ilm->ilm_timer < next)
2284 next = ilm->ilm_timer;
2285 ilm->ilm_timer += current;
2287 break;
2290 rw_exit(&ill->ill_mcast_lock);
2291 /* Send any deferred/queued IP packets */
2292 ill_mcast_send_queued(ill);
2293 ill_mcast_timer_start(ill->ill_ipst);
2295 return (next);
2299 * Handles an MLDv2 Listener Query. On error, returns 0; on success,
2300 * returns the appropriate (non-zero, unsigned) timer value (which may
2301 * be INFINITY) to be set.
2303 static uint_t
2304 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
2306 ilm_t *ilm;
2307 in6_addr_t *v6group, *src_array;
2308 uint_t next, numsrc, i, mrd, delay, qqi, current;
2309 uint8_t qrv;
2311 v6group = &mld2q->mld2q_addr;
2312 numsrc = ntohs(mld2q->mld2q_numsrc);
2314 /* make sure numsrc matches packet size */
2315 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
2316 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2317 return (0);
2319 src_array = (in6_addr_t *)&mld2q[1];
2321 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2323 /* extract Maximum Response Delay from code in header */
2324 mrd = ntohs(mld2q->mld2q_mxrc);
2325 if (mrd >= MLD_V2_MAXRT_FPMIN) {
2326 uint_t hdrval, mant, exp;
2327 hdrval = mrd;
2328 mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
2329 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
2330 mrd = (mant | 0x1000) << (exp + 3);
2332 if (mrd == 0)
2333 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
2335 MCAST_RANDOM_DELAY(delay, mrd);
2336 next = (unsigned)INFINITY;
2337 current = CURRENT_MSTIME;
2339 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
2340 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
2341 else
2342 ill->ill_mcast_rv = qrv;
2344 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
2345 uint_t mant, exp;
2346 mant = qqi & MLD_V2_QQI_MANT_MASK;
2347 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
2348 qqi = (mant | 0x10) << (exp + 3);
2350 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
2353 * If we have a pending general query response that's scheduled
2354 * sooner than the delay we calculated for this response, then
2355 * no action is required (MLDv2 draft section 6.2 rule 1)
2357 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2358 if (ill->ill_global_timer < (current + delay)) {
2359 rw_exit(&ill->ill_mcast_lock);
2360 return (next);
2364 * Now take action depending on query type: general,
2365 * group specific, or group/source specific.
2367 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
2369 * general query
2370 * We know global timer is either not running or is
2371 * greater than our calculated delay, so reset it to
2372 * our delay (random value in range [0, response time])
2374 ill->ill_global_timer = current + delay;
2375 next = delay;
2376 } else {
2377 /* group or group/source specific query */
2378 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2379 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2380 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2381 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
2382 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
2383 continue;
2386 * If the query is group specific or we have a
2387 * pending group specific query, the response is
2388 * group specific (pending sources list should be
2389 * empty). Otherwise, need to update the pending
2390 * sources list for the group and source specific
2391 * response.
2393 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
2394 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
2395 group_query:
2396 FREE_SLIST(ilm->ilm_pendsrcs);
2397 ilm->ilm_pendsrcs = NULL;
2398 } else {
2399 boolean_t overflow;
2400 slist_t *pktl;
2401 if (numsrc > MAX_FILTER_SIZE ||
2402 (ilm->ilm_pendsrcs == NULL &&
2403 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
2405 * We've been sent more sources than
2406 * we can deal with; or we can't deal
2407 * with a source list at all. Revert
2408 * to a group specific query.
2410 goto group_query;
2412 if ((pktl = l_alloc()) == NULL)
2413 goto group_query;
2414 pktl->sl_numsrc = numsrc;
2415 for (i = 0; i < numsrc; i++)
2416 pktl->sl_addr[i] = src_array[i];
2417 l_union_in_a(ilm->ilm_pendsrcs, pktl,
2418 &overflow);
2419 l_free(pktl);
2420 if (overflow)
2421 goto group_query;
2423 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
2424 INFINITY : (ilm->ilm_timer - current);
2425 /* set timer to soonest value */
2426 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
2427 if (ilm->ilm_timer < next)
2428 next = ilm->ilm_timer;
2429 ilm->ilm_timer += current;
2430 break;
2433 rw_exit(&ill->ill_mcast_lock);
2435 * No packets have been sent above - no
2436 * ill_mcast_send_queued is needed.
2438 ill_mcast_timer_start(ill->ill_ipst);
2440 return (next);
2444 * Send MLDv1 response packet with hoplimit 1
2446 static void
2447 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
2449 mblk_t *mp;
2450 mld_hdr_t *mldh;
2451 ip6_t *ip6h;
2452 ip6_hbh_t *ip6hbh;
2453 struct ip6_opt_router *ip6router;
2454 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
2455 ill_t *ill = ilm->ilm_ill;
2457 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2460 * We need to place a router alert option in this packet. The length
2461 * of the options must be a multiple of 8. The hbh option header is 2
2462 * bytes followed by the 4 byte router alert option. That leaves
2463 * 2 bytes of pad for a total of 8 bytes.
2465 const int router_alert_length = 8;
2467 ASSERT(ill->ill_isv6);
2469 size += router_alert_length;
2470 mp = allocb(size, BPRI_HI);
2471 if (mp == NULL)
2472 return;
2473 bzero(mp->b_rptr, size);
2474 mp->b_wptr = mp->b_rptr + size;
2476 ip6h = (ip6_t *)mp->b_rptr;
2477 ip6hbh = (struct ip6_hbh *)&ip6h[1];
2478 ip6router = (struct ip6_opt_router *)&ip6hbh[1];
2480 * A zero is a pad option of length 1. The bzero of the whole packet
2481 * above will pad between ip6router and mld.
2483 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
2485 mldh->mld_type = type;
2486 mldh->mld_addr = ilm->ilm_v6addr;
2488 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2489 ip6router->ip6or_len = 2;
2490 ip6router->ip6or_value[0] = 0;
2491 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2493 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2494 ip6hbh->ip6h_len = 0;
2496 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2497 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
2498 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2499 ip6h->ip6_hops = MLD_HOP_LIMIT;
2500 if (v6addr == NULL)
2501 ip6h->ip6_dst = ilm->ilm_v6addr;
2502 else
2503 ip6h->ip6_dst = *v6addr;
2505 ip6h->ip6_src = ipv6_all_zeros;
2507 * Prepare for checksum by putting icmp length in the icmp
2508 * checksum field. The checksum is calculated in ip_output.
2510 mldh->mld_cksum = htons(sizeof (*mldh));
2512 ill_mcast_queue(ill, mp);
2516 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The
2517 * report will contain one multicast address record for each element of
2518 * reclist. If this causes packet length to exceed ill->ill_mc_mtu,
2519 * multiple reports are sent. reclist is assumed to be made up of
2520 * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2522 static void
2523 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
2525 mblk_t *mp;
2526 mld2r_t *mld2r;
2527 mld2mar_t *mld2mar;
2528 in6_addr_t *srcarray;
2529 ip6_t *ip6h;
2530 ip6_hbh_t *ip6hbh;
2531 struct ip6_opt_router *ip6router;
2532 size_t size, optlen, padlen, icmpsize, rsize;
2533 int i, numrec, more_src_cnt;
2534 mrec_t *rp, *cur_reclist;
2535 mrec_t *next_reclist = reclist;
2536 boolean_t morepkts;
2538 /* If there aren't any records, there's nothing to send */
2539 if (reclist == NULL)
2540 return;
2542 ASSERT(ill->ill_isv6);
2543 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2546 * Total option length (optlen + padlen) must be a multiple of
2547 * 8 bytes. We assume here that optlen <= 8, so the total option
2548 * length will be 8. Assert this in case anything ever changes.
2550 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
2551 ASSERT(optlen <= 8);
2552 padlen = 8 - optlen;
2553 nextpkt:
2554 icmpsize = sizeof (mld2r_t);
2555 size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
2556 morepkts = B_FALSE;
2557 more_src_cnt = 0;
2558 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
2559 rp = rp->mrec_next, numrec++) {
2560 rsize = sizeof (mld2mar_t) +
2561 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
2562 if (size + rsize > ill->ill_mc_mtu) {
2563 if (rp == cur_reclist) {
2565 * If the first mrec we looked at is too big
2566 * to fit in a single packet (i.e the source
2567 * list is too big), we must either truncate
2568 * the list (if TO_EX or IS_EX), or send
2569 * multiple reports for the same group (all
2570 * other types).
2572 int srcspace, srcsperpkt;
2573 srcspace = ill->ill_mc_mtu -
2574 (size + sizeof (mld2mar_t));
2577 * Skip if there's not even enough room in
2578 * a single packet to send something useful.
2580 if (srcspace <= sizeof (in6_addr_t))
2581 continue;
2583 srcsperpkt = srcspace / sizeof (in6_addr_t);
2585 * Increment icmpsize and size, because we will
2586 * be sending a record for the mrec we're
2587 * looking at now.
2589 rsize = sizeof (mld2mar_t) +
2590 (srcsperpkt * sizeof (in6_addr_t));
2591 icmpsize += rsize;
2592 size += rsize;
2593 if (rp->mrec_type == MODE_IS_EXCLUDE ||
2594 rp->mrec_type == CHANGE_TO_EXCLUDE) {
2595 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2596 if (rp->mrec_next == NULL) {
2597 /* no more packets to send */
2598 break;
2599 } else {
2601 * more packets, but we're
2602 * done with this mrec.
2604 next_reclist = rp->mrec_next;
2606 } else {
2607 more_src_cnt = rp->mrec_srcs.sl_numsrc
2608 - srcsperpkt;
2609 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2611 * We'll fix up this mrec (remove the
2612 * srcs we've already sent) before
2613 * returning to nextpkt above.
2615 next_reclist = rp;
2617 } else {
2618 next_reclist = rp;
2620 morepkts = B_TRUE;
2621 break;
2623 icmpsize += rsize;
2624 size += rsize;
2627 mp = allocb(size, BPRI_HI);
2628 if (mp == NULL)
2629 goto free_reclist;
2630 bzero(mp->b_rptr, size);
2631 mp->b_wptr = mp->b_rptr + size;
2633 ip6h = (ip6_t *)mp->b_rptr;
2634 ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
2635 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
2636 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
2637 mld2mar = (mld2mar_t *)&(mld2r[1]);
2639 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2640 ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
2641 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2642 ip6h->ip6_hops = MLD_HOP_LIMIT;
2643 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
2644 ip6h->ip6_src = ipv6_all_zeros;
2646 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2648 * ip6h_len is the number of 8-byte words, not including the first
2649 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2651 ip6hbh->ip6h_len = 0;
2653 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2654 ip6router->ip6or_len = 2;
2655 ip6router->ip6or_value[0] = 0;
2656 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2658 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
2659 mld2r->mld2r_nummar = htons(numrec);
2661 * Prepare for the checksum by putting icmp length in the icmp
2662 * checksum field. The checksum is calculated in ip_output_simple.
2664 mld2r->mld2r_cksum = htons(icmpsize);
2666 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2667 mld2mar->mld2mar_type = rp->mrec_type;
2668 mld2mar->mld2mar_auxlen = 0;
2669 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2670 mld2mar->mld2mar_group = rp->mrec_group;
2671 srcarray = (in6_addr_t *)&(mld2mar[1]);
2673 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
2674 srcarray[i] = rp->mrec_srcs.sl_addr[i];
2676 mld2mar = (mld2mar_t *)&(srcarray[i]);
2679 ill_mcast_queue(ill, mp);
2681 if (morepkts) {
2682 if (more_src_cnt > 0) {
2683 int index, mvsize;
2684 slist_t *sl = &next_reclist->mrec_srcs;
2685 index = sl->sl_numsrc;
2686 mvsize = more_src_cnt * sizeof (in6_addr_t);
2687 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2688 mvsize);
2689 sl->sl_numsrc = more_src_cnt;
2691 goto nextpkt;
2694 free_reclist:
2695 while (reclist != NULL) {
2696 rp = reclist->mrec_next;
2697 mi_free(reclist);
2698 reclist = rp;
2702 static mrec_t *
2703 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
2704 mrec_t *next)
2706 mrec_t *rp;
2707 int i;
2709 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
2710 SLIST_IS_EMPTY(srclist))
2711 return (next);
2713 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
2714 if (rp == NULL)
2715 return (next);
2717 rp->mrec_next = next;
2718 rp->mrec_type = type;
2719 rp->mrec_auxlen = 0;
2720 rp->mrec_group = *grp;
2721 if (srclist == NULL) {
2722 rp->mrec_srcs.sl_numsrc = 0;
2723 } else {
2724 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
2725 for (i = 0; i < srclist->sl_numsrc; i++)
2726 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
2729 return (rp);
2733 * Set up initial retransmit state. If memory cannot be allocated for
2734 * the source lists, simply create as much state as is possible; memory
2735 * allocation failures are considered one type of transient error that
2736 * the retransmissions are designed to overcome (and if they aren't
2737 * transient, there are bigger problems than failing to notify the
2738 * router about multicast group membership state changes).
2740 static void
2741 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
2742 slist_t *flist)
2745 * There are only three possibilities for rtype:
2746 * New join, transition from INCLUDE {} to INCLUDE {flist}
2747 * => rtype is ALLOW_NEW_SOURCES
2748 * New join, transition from INCLUDE {} to EXCLUDE {flist}
2749 * => rtype is CHANGE_TO_EXCLUDE
2750 * State change that involves a filter mode change
2751 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2753 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
2754 rtype == ALLOW_NEW_SOURCES);
2756 rtxp->rtx_cnt = ill->ill_mcast_rv;
2758 switch (rtype) {
2759 case CHANGE_TO_EXCLUDE:
2760 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
2761 CLEAR_SLIST(rtxp->rtx_allow);
2762 COPY_SLIST(flist, rtxp->rtx_block);
2763 break;
2764 case ALLOW_NEW_SOURCES:
2765 case CHANGE_TO_INCLUDE:
2766 rtxp->rtx_fmode_cnt =
2767 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
2768 CLEAR_SLIST(rtxp->rtx_block);
2769 COPY_SLIST(flist, rtxp->rtx_allow);
2770 break;
2775 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2776 * RFC 3376 section 5.1, covers three cases:
2777 * * The current state change is a filter mode change
2778 * Set filter mode retransmit counter; set retransmit allow or
2779 * block list to new source list as appropriate, and clear the
2780 * retransmit list that was not set; send TO_IN or TO_EX with
2781 * new source list.
2782 * * The current state change is a source list change, but the filter
2783 * mode retransmit counter is > 0
2784 * Decrement filter mode retransmit counter; set retransmit
2785 * allow or block list to new source list as appropriate,
2786 * and clear the retransmit list that was not set; send TO_IN
2787 * or TO_EX with new source list.
2788 * * The current state change is a source list change, and the filter
2789 * mode retransmit counter is 0.
2790 * Merge existing rtx allow and block lists with new state:
2791 * rtx_allow = (new allow + rtx_allow) - new block
2792 * rtx_block = (new block + rtx_block) - new allow
2793 * Send ALLOW and BLOCK records for new retransmit lists;
2794 * decrement retransmit counter.
2796 * As is the case for mcast_init_rtx(), memory allocation failures are
2797 * acceptable; we just create as much state as we can.
2799 static mrec_t *
2800 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
2802 ill_t *ill;
2803 rtx_state_t *rtxp = &ilm->ilm_rtx;
2804 mcast_record_t txtype;
2805 mrec_t *rp, *rpnext, *rtnmrec;
2806 boolean_t ovf;
2808 ill = ilm->ilm_ill;
2810 if (mreclist == NULL)
2811 return (mreclist);
2814 * A filter mode change is indicated by a single mrec, which is
2815 * either TO_IN or TO_EX. In this case, we just need to set new
2816 * retransmit state as if this were an initial join. There is
2817 * no change to the mrec list.
2819 if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
2820 mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
2821 mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
2822 &mreclist->mrec_srcs);
2823 return (mreclist);
2827 * Only the source list has changed
2829 rtxp->rtx_cnt = ill->ill_mcast_rv;
2830 if (rtxp->rtx_fmode_cnt > 0) {
2831 /* but we're still sending filter mode change reports */
2832 rtxp->rtx_fmode_cnt--;
2833 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
2834 CLEAR_SLIST(rtxp->rtx_block);
2835 COPY_SLIST(flist, rtxp->rtx_allow);
2836 txtype = CHANGE_TO_INCLUDE;
2837 } else {
2838 CLEAR_SLIST(rtxp->rtx_allow);
2839 COPY_SLIST(flist, rtxp->rtx_block);
2840 txtype = CHANGE_TO_EXCLUDE;
2842 /* overwrite first mrec with new info */
2843 mreclist->mrec_type = txtype;
2844 l_copy(flist, &mreclist->mrec_srcs);
2845 /* then free any remaining mrecs */
2846 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
2847 rpnext = rp->mrec_next;
2848 mi_free(rp);
2850 mreclist->mrec_next = NULL;
2851 rtnmrec = mreclist;
2852 } else {
2853 mrec_t *allow_mrec, *block_mrec;
2855 * Just send the source change reports; but we need to
2856 * recalculate the ALLOW and BLOCK lists based on previous
2857 * state and new changes.
2859 rtnmrec = mreclist;
2860 allow_mrec = block_mrec = NULL;
2861 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
2862 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
2863 rp->mrec_type == BLOCK_OLD_SOURCES);
2864 if (rp->mrec_type == ALLOW_NEW_SOURCES)
2865 allow_mrec = rp;
2866 else
2867 block_mrec = rp;
2870 * Perform calculations:
2871 * new_allow = mrec_allow + (rtx_allow - mrec_block)
2872 * new_block = mrec_block + (rtx_block - mrec_allow)
2874 * Each calc requires two steps, for example:
2875 * rtx_allow = rtx_allow - mrec_block;
2876 * new_allow = mrec_allow + rtx_allow;
2878 * Store results in mrec lists, and then copy into rtx lists.
2879 * We do it in this order in case the rtx list hasn't been
2880 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
2881 * Overflows are also okay.
2883 if (block_mrec != NULL) {
2884 l_difference_in_a(rtxp->rtx_allow,
2885 &block_mrec->mrec_srcs);
2887 if (allow_mrec != NULL) {
2888 l_difference_in_a(rtxp->rtx_block,
2889 &allow_mrec->mrec_srcs);
2890 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
2891 &ovf);
2893 if (block_mrec != NULL) {
2894 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
2895 &ovf);
2896 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
2897 } else {
2898 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
2899 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
2901 if (allow_mrec != NULL) {
2902 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
2903 } else {
2904 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
2905 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
2909 return (rtnmrec);