4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
24 /* Copyright (c) 1990 Mentat Inc. */
27 * Copyright (c) 2018, Joyent, Inc.
28 * Copyright 2024 Oxide Computer Company
32 * Procedures for the kernel part of DVMRP,
33 * a Distance-Vector Multicast Routing Protocol.
35 * Written by David Waitzman, BBN Labs, August 1988.
36 * Modified by Steve Deering, Stanford, February 1989.
37 * Modified by Mark J. Steiglitz, Stanford, May, 1991
38 * Modified by Van Jacobson, LBL, January 1993
39 * Modified by Ajit Thyagarajan, PARC, August 1993
40 * Modified by Bill Fenner, PARC, April 1995
47 * - function pointer field in vif, void *vif_sendit()
50 #include <sys/types.h>
51 #include <sys/stream.h>
52 #include <sys/stropts.h>
53 #include <sys/strlog.h>
54 #include <sys/systm.h>
56 #include <sys/cmn_err.h>
59 #include <sys/param.h>
60 #include <sys/socket.h>
61 #include <sys/vtrace.h>
62 #include <sys/debug.h>
64 #include <sys/sockio.h>
65 #include <netinet/in.h>
66 #include <net/if_dl.h>
68 #include <inet/ipsec_impl.h>
69 #include <inet/common.h>
72 #include <inet/tunables.h>
73 #include <inet/mib2.h>
74 #include <netinet/ip6.h>
76 #include <inet/snmpcom.h>
78 #include <netinet/igmp.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/udp.h>
81 #include <netinet/ip_mroute.h>
82 #include <inet/ip_multi.h>
83 #include <inet/ip_ire.h>
84 #include <inet/ip_ndp.h>
85 #include <inet/ip_if.h>
86 #include <inet/ipclassifier.h>
88 #include <netinet/pim.h>
94 * There are three main data structures viftable, mfctable and tbftable that
95 * need to be protected against MT races.
97 * vitable is a fixed length array of vif structs. There is no lock to protect
98 * the whole array, instead each struct is protected by its own indiviual lock.
99 * The value of v_marks in conjuction with the value of v_refcnt determines the
100 * current state of a vif structure. One special state that needs mention
101 * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates
102 * that vif is being initalized.
103 * Each structure is freed when the refcnt goes down to zero. If a delete comes
104 * in when the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED
105 * which prevents the struct from further use. When the refcnt goes to zero
106 * the struct is freed and is marked VIF_MARK_NOTINUSE.
107 * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill
108 * from going away a refhold is put on the ipif before using it. see
109 * lock_good_vif() and unlock_good_vif().
111 * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts
114 * tbftable is also a fixed length array of tbf structs and is only accessed
115 * via v_tbf. It is protected by its own lock tbf_lock.
118 * v_lock --> tbf_lock
119 * v_lock --> ill_locK
121 * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb).
122 * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker,
123 * it also maintains a state. These fields are protected by a lock (mfcb_lock).
124 * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to
125 * protect the struct elements.
127 * mfc structs are dynamically allocated and are singly linked
128 * at the head of the chain. When an mfc structure is to be deleted
129 * it is marked condemned and so is the state in the bucket struct.
130 * When the last walker of the hash bucket exits all the mfc structs
131 * marked condemed are freed.
134 * The bucket lock should be acquired before the mfc struct lock.
135 * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking
136 * operations on the bucket struct.
138 * last_encap_lock and numvifs_mutex should be acquired after
139 * acquring vif or mfc locks. These locks protect some global variables.
141 * The statistics are not currently protected by a lock
142 * causing the stats be be approximate, not exact.
145 #define NO_VIF MAXVIFS /* from mrouted, no route for src */
149 * Upcall timeouts - BSD uses boolean_t mfc->expire and
150 * nexpire[MFCTBLSIZE], the number of times expire has been called.
151 * SunOS 5.x uses mfc->timeout for each mfc.
152 * Some Unixes are limited in the number of simultaneous timeouts
153 * that can be run, SunOS 5.x does not have this restriction.
157 * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and
158 * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall
159 * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE
161 #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */
162 #define UPCALL_EXPIRE 6 /* number of timeouts */
165 * Hash function for a source, group entry
167 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
168 ((g) >> 20) ^ ((g) >> 10) ^ (g))
170 #define TBF_REPROCESS (hz / 100) /* 100x /second */
172 /* Identify PIM packet that came on a Register interface */
173 #define PIM_REGISTER_MARKER 0xffffffff
175 /* Function declarations */
176 static int add_mfc(struct mfcctl
*, ip_stack_t
*);
177 static int add_vif(struct vifctl
*, conn_t
*, ip_stack_t
*);
178 static int del_mfc(struct mfcctl
*, ip_stack_t
*);
179 static int del_vif(vifi_t
*, ip_stack_t
*);
180 static void del_vifp(struct vif
*);
181 static void encap_send(ipha_t
*, mblk_t
*, struct vif
*, ipaddr_t
);
182 static void expire_upcalls(void *);
183 static void fill_route(struct mfc
*, struct mfcctl
*, ip_stack_t
*);
184 static void free_queue(struct mfc
*);
185 static int get_assert(uchar_t
*, ip_stack_t
*);
186 static int get_lsg_cnt(struct sioc_lsg_req
*, ip_stack_t
*);
187 static int get_sg_cnt(struct sioc_sg_req
*, ip_stack_t
*);
188 static int get_version(uchar_t
*);
189 static int get_vif_cnt(struct sioc_vif_req
*, ip_stack_t
*);
190 static int ip_mdq(mblk_t
*, ipha_t
*, ill_t
*,
191 ipaddr_t
, struct mfc
*);
192 static int ip_mrouter_init(conn_t
*, uchar_t
*, int, ip_stack_t
*);
193 static void phyint_send(ipha_t
*, mblk_t
*, struct vif
*, ipaddr_t
);
194 static int register_mforward(mblk_t
*, ip_recv_attr_t
*);
195 static void register_send(ipha_t
*, mblk_t
*, struct vif
*, ipaddr_t
);
196 static int set_assert(int *, ip_stack_t
*);
199 * Token Bucket Filter functions
201 static int priority(struct vif
*, ipha_t
*);
202 static void tbf_control(struct vif
*, mblk_t
*, ipha_t
*);
203 static int tbf_dq_sel(struct vif
*, ipha_t
*);
204 static void tbf_process_q(struct vif
*);
205 static void tbf_queue(struct vif
*, mblk_t
*);
206 static void tbf_reprocess_q(void *);
207 static void tbf_send_packet(struct vif
*, mblk_t
*);
208 static void tbf_update_tokens(struct vif
*);
209 static void release_mfc(struct mfcb
*);
211 static boolean_t
is_mrouter_off(ip_stack_t
*);
213 * Encapsulation packets
218 /* prototype IP hdr for encapsulated packets */
219 static ipha_t multicast_encap_iphdr
= {
220 IP_SIMPLE_HDR_VERSION
,
222 sizeof (ipha_t
), /* total length */
225 ENCAP_TTL
, IPPROTO_ENCAP
,
230 * Rate limit for assert notification messages, in nsec.
232 #define ASSERT_MSG_TIME 3000000000
235 #define VIF_REFHOLD(vifp) { \
236 mutex_enter(&(vifp)->v_lock); \
237 (vifp)->v_refcnt++; \
238 mutex_exit(&(vifp)->v_lock); \
241 #define VIF_REFRELE_LOCKED(vifp) { \
242 (vifp)->v_refcnt--; \
243 if ((vifp)->v_refcnt == 0 && \
244 ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \
247 mutex_exit(&(vifp)->v_lock); \
251 #define VIF_REFRELE(vifp) { \
252 mutex_enter(&(vifp)->v_lock); \
253 (vifp)->v_refcnt--; \
254 if ((vifp)->v_refcnt == 0 && \
255 ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \
258 mutex_exit(&(vifp)->v_lock); \
262 #define MFCB_REFHOLD(mfcb) { \
263 mutex_enter(&(mfcb)->mfcb_lock); \
264 (mfcb)->mfcb_refcnt++; \
265 ASSERT((mfcb)->mfcb_refcnt != 0); \
266 mutex_exit(&(mfcb)->mfcb_lock); \
269 #define MFCB_REFRELE(mfcb) { \
270 mutex_enter(&(mfcb)->mfcb_lock); \
271 ASSERT((mfcb)->mfcb_refcnt != 0); \
272 if (--(mfcb)->mfcb_refcnt == 0 && \
273 ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \
276 mutex_exit(&(mfcb)->mfcb_lock); \
281 * Find a route for a given origin IP address and multicast group address.
282 * Skip entries with pending upcalls.
283 * Type of service parameter to be added in the future!
285 #define MFCFIND(mfcbp, o, g, rt) { \
286 struct mfc *_mb_rt = NULL; \
288 _mb_rt = mfcbp->mfcb_mfc; \
290 if ((_mb_rt->mfc_origin.s_addr == o) && \
291 (_mb_rt->mfc_mcastgrp.s_addr == g) && \
292 (_mb_rt->mfc_rte == NULL) && \
293 (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \
297 _mb_rt = _mb_rt->mfc_next; \
302 * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime()
303 * are inefficient. We use gethrestime() which returns a timespec_t with
304 * sec and nsec, the resolution is machine dependent.
305 * The following 2 macros have been changed to use nsec instead of usec.
308 * Macros to compute elapsed time efficiently.
309 * Borrowed from Van Jacobson's scheduling code.
310 * Delta should be a hrtime_t.
312 #define TV_DELTA(a, b, delta) { \
315 delta = (a).tv_nsec - (b).tv_nsec; \
316 if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
319 delta += 1000000000; \
322 delta += 1000000000; \
325 delta += (1000000000 * xxs); \
330 #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \
331 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
334 * Handle MRT setsockopt commands to modify the multicast routing tables.
337 ip_mrouter_set(int cmd
, conn_t
*connp
, int checkonly
, uchar_t
*data
,
340 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
342 mutex_enter(&ipst
->ips_ip_g_mrouter_mutex
);
343 if (cmd
!= MRT_INIT
&& connp
!= ipst
->ips_ip_g_mrouter
) {
344 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
347 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
351 * do not do operation, just pretend to - new T_CHECK
352 * Note: Even routines further on can probably fail but
353 * this T_CHECK stuff is only to please XTI so it not
354 * necessary to be perfect.
371 * make sure no command is issued after multicast routing has been
374 if (cmd
!= MRT_INIT
&& cmd
!= MRT_DONE
) {
375 if (is_mrouter_off(ipst
))
380 case MRT_INIT
: return (ip_mrouter_init(connp
, data
, datalen
, ipst
));
381 case MRT_DONE
: return (ip_mrouter_done(ipst
));
382 case MRT_ADD_VIF
: return (add_vif((struct vifctl
*)data
, connp
, ipst
));
383 case MRT_DEL_VIF
: return (del_vif((vifi_t
*)data
, ipst
));
384 case MRT_ADD_MFC
: return (add_mfc((struct mfcctl
*)data
, ipst
));
385 case MRT_DEL_MFC
: return (del_mfc((struct mfcctl
*)data
, ipst
));
386 case MRT_ASSERT
: return (set_assert((int *)data
, ipst
));
387 default: return (EOPNOTSUPP
);
392 * Handle MRT getsockopt commands
395 ip_mrouter_get(int cmd
, conn_t
*connp
, uchar_t
*data
)
397 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
399 if (connp
!= ipst
->ips_ip_g_mrouter
)
403 case MRT_VERSION
: return (get_version((uchar_t
*)data
));
404 case MRT_ASSERT
: return (get_assert((uchar_t
*)data
, ipst
));
405 default: return (EOPNOTSUPP
);
410 * Handle ioctl commands to obtain information from the cache.
411 * Called with shared access to IP. These are read_only ioctls.
415 mrt_ioctl(ipif_t
*ipif
, sin_t
*sin
, queue_t
*q
, mblk_t
*mp
,
416 ip_ioctl_cmd_t
*ipip
, void *if_req
)
419 struct iocblk
*iocp
= (struct iocblk
*)mp
->b_rptr
;
420 conn_t
*connp
= Q_TO_CONN(q
);
421 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
423 /* Existence verified in ip_wput_nondata */
424 mp1
= mp
->b_cont
->b_cont
;
426 switch (iocp
->ioc_cmd
) {
427 case (SIOCGETVIFCNT
):
428 return (get_vif_cnt((struct sioc_vif_req
*)mp1
->b_rptr
, ipst
));
430 return (get_sg_cnt((struct sioc_sg_req
*)mp1
->b_rptr
, ipst
));
431 case (SIOCGETLSGCNT
):
432 return (get_lsg_cnt((struct sioc_lsg_req
*)mp1
->b_rptr
, ipst
));
439 * Returns the packet, byte, rpf-failure count for the source, group provided.
442 get_sg_cnt(struct sioc_sg_req
*req
, ip_stack_t
*ipst
)
447 mfcbp
= &ipst
->ips_mfcs
[MFCHASH(req
->src
.s_addr
, req
->grp
.s_addr
)];
449 MFCFIND(mfcbp
, req
->src
.s_addr
, req
->grp
.s_addr
, rt
);
452 mutex_enter(&rt
->mfc_mutex
);
453 req
->pktcnt
= rt
->mfc_pkt_cnt
;
454 req
->bytecnt
= rt
->mfc_byte_cnt
;
455 req
->wrong_if
= rt
->mfc_wrong_if
;
456 mutex_exit(&rt
->mfc_mutex
);
458 req
->pktcnt
= req
->bytecnt
= req
->wrong_if
= 0xffffffffU
;
465 * Returns the packet, byte, rpf-failure count for the source, group provided.
466 * Uses larger counters and IPv6 addresses.
468 /* ARGSUSED XXX until implemented */
470 get_lsg_cnt(struct sioc_lsg_req
*req
, ip_stack_t
*ipst
)
472 /* XXX TODO SIOCGETLSGCNT */
477 * Returns the input and output packet and byte counts on the vif provided.
480 get_vif_cnt(struct sioc_vif_req
*req
, ip_stack_t
*ipst
)
482 vifi_t vifi
= req
->vifi
;
484 if (vifi
>= ipst
->ips_numvifs
)
488 * No locks here, an approximation is fine.
490 req
->icount
= ipst
->ips_vifs
[vifi
].v_pkt_in
;
491 req
->ocount
= ipst
->ips_vifs
[vifi
].v_pkt_out
;
492 req
->ibytes
= ipst
->ips_vifs
[vifi
].v_bytes_in
;
493 req
->obytes
= ipst
->ips_vifs
[vifi
].v_bytes_out
;
499 get_version(uchar_t
*data
)
501 int *v
= (int *)data
;
503 *v
= 0x0305; /* XXX !!!! */
509 * Set PIM assert processing global.
512 set_assert(int *i
, ip_stack_t
*ipst
)
514 if ((*i
!= 1) && (*i
!= 0))
517 ipst
->ips_pim_assert
= *i
;
523 * Get PIM assert processing global.
526 get_assert(uchar_t
*data
, ip_stack_t
*ipst
)
528 int *i
= (int *)data
;
530 *i
= ipst
->ips_pim_assert
;
536 * Enable multicast routing.
539 ip_mrouter_init(conn_t
*connp
, uchar_t
*data
, int datalen
, ip_stack_t
*ipst
)
543 if (data
== NULL
|| (datalen
!= sizeof (int)))
544 return (ENOPROTOOPT
);
548 return (ENOPROTOOPT
);
550 mutex_enter(&ipst
->ips_ip_g_mrouter_mutex
);
551 if (ipst
->ips_ip_g_mrouter
!= NULL
) {
552 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
557 * MRT_INIT should only be allowed for RAW sockets, but we double
560 if (!IPCL_IS_RAWIP(connp
)) {
561 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
565 ipst
->ips_ip_g_mrouter
= connp
;
566 connp
->conn_multi_router
= 1;
567 /* In order for tunnels to work we have to turn ip_g_forward on */
568 if (!WE_ARE_FORWARDING(ipst
)) {
569 if (ipst
->ips_ip_mrtdebug
> 1) {
570 (void) mi_strlog(connp
->conn_rq
, 1, SL_TRACE
,
571 "ip_mrouter_init: turning on forwarding");
573 ipst
->ips_saved_ip_forwarding
= ipst
->ips_ip_forwarding
;
574 ipst
->ips_ip_forwarding
= IP_FORWARD_ALWAYS
;
577 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
582 ip_mrouter_stack_init(ip_stack_t
*ipst
)
584 mutex_init(&ipst
->ips_ip_g_mrouter_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
586 ipst
->ips_vifs
= kmem_zalloc(sizeof (struct vif
) * (MAXVIFS
+1),
588 ipst
->ips_mrtstat
= kmem_zalloc(sizeof (struct mrtstat
), KM_SLEEP
);
591 * Includes all mfcs, including waiting upcalls.
592 * Multiple mfcs per bucket.
594 ipst
->ips_mfcs
= kmem_zalloc(sizeof (struct mfcb
) * MFCTBLSIZ
,
597 * Define the token bucket filter structures.
598 * tbftable -> each vif has one of these for storing info.
600 ipst
->ips_tbfs
= kmem_zalloc(sizeof (struct tbf
) * MAXVIFS
, KM_SLEEP
);
602 mutex_init(&ipst
->ips_last_encap_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
604 ipst
->ips_mrtstat
->mrts_vifctlSize
= sizeof (struct vifctl
);
605 ipst
->ips_mrtstat
->mrts_mfcctlSize
= sizeof (struct mfcctl
);
609 * Disable multicast routing.
610 * Didn't use global timeout_val (BSD version), instead check the mfctable.
613 ip_mrouter_done(ip_stack_t
*ipst
)
620 mutex_enter(&ipst
->ips_ip_g_mrouter_mutex
);
621 if (ipst
->ips_ip_g_mrouter
== NULL
) {
622 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
626 mrouter
= ipst
->ips_ip_g_mrouter
;
628 if (ipst
->ips_saved_ip_forwarding
!= -1) {
629 if (ipst
->ips_ip_mrtdebug
> 1) {
630 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
631 "ip_mrouter_done: turning off forwarding");
633 ipst
->ips_ip_forwarding
= ipst
->ips_saved_ip_forwarding
;
634 ipst
->ips_saved_ip_forwarding
= -1;
638 * Always clear cache when vifs change.
639 * No need to get ipst->ips_last_encap_lock since we are running as
642 mutex_enter(&ipst
->ips_last_encap_lock
);
643 ipst
->ips_last_encap_src
= 0;
644 ipst
->ips_last_encap_vif
= NULL
;
645 mutex_exit(&ipst
->ips_last_encap_lock
);
646 mrouter
->conn_multi_router
= 0;
648 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
651 * For each phyint in use,
652 * disable promiscuous reception of all IP multicasts.
654 for (vifi
= 0; vifi
< MAXVIFS
; vifi
++) {
655 struct vif
*vifp
= ipst
->ips_vifs
+ vifi
;
657 mutex_enter(&vifp
->v_lock
);
659 * if the vif is active mark it condemned.
661 if (vifp
->v_marks
& VIF_MARK_GOOD
) {
662 ASSERT(vifp
->v_ipif
!= NULL
);
663 ipif_refhold(vifp
->v_ipif
);
665 if (!(vifp
->v_flags
& (VIFF_TUNNEL
| VIFF_REGISTER
))) {
666 ipif_t
*ipif
= vifp
->v_ipif
;
667 ilm_t
*ilm
= vifp
->v_ilm
;
670 vifp
->v_marks
&= ~VIF_MARK_GOOD
;
671 vifp
->v_marks
|= VIF_MARK_CONDEMNED
;
673 mutex_exit(&(vifp
)->v_lock
);
675 ill_t
*ill
= ipif
->ipif_ill
;
677 (void) ip_delmulti(ilm
);
678 ASSERT(ill
->ill_mrouter_cnt
> 0);
679 atomic_dec_32(&ill
->ill_mrouter_cnt
);
681 mutex_enter(&vifp
->v_lock
);
683 ipif_refrele(vifp
->v_ipif
);
685 * decreases the refcnt added in add_vif.
686 * and release v_lock.
688 VIF_REFRELE_LOCKED(vifp
);
690 mutex_exit(&vifp
->v_lock
);
695 mutex_enter(&ipst
->ips_numvifs_mutex
);
696 ipst
->ips_numvifs
= 0;
697 ipst
->ips_pim_assert
= 0;
698 ipst
->ips_reg_vif_num
= ALL_VIFS
;
699 mutex_exit(&ipst
->ips_numvifs_mutex
);
703 * Go through mfctable and stop any outstanding upcall
704 * timeouts remaining on mfcs.
706 for (i
= 0; i
< MFCTBLSIZ
; i
++) {
707 mutex_enter(&ipst
->ips_mfcs
[i
].mfcb_lock
);
708 ipst
->ips_mfcs
[i
].mfcb_refcnt
++;
709 ipst
->ips_mfcs
[i
].mfcb_marks
|= MFCB_MARK_CONDEMNED
;
710 mutex_exit(&ipst
->ips_mfcs
[i
].mfcb_lock
);
711 mfc_rt
= ipst
->ips_mfcs
[i
].mfcb_mfc
;
714 mutex_enter(&mfc_rt
->mfc_mutex
);
715 if (mfc_rt
->mfc_rte
!= NULL
) {
716 if (mfc_rt
->mfc_timeout_id
!= 0) {
718 * OK to drop the lock as we have
719 * a refcnt on the bucket. timeout
720 * can fire but it will see that
721 * mfc_timeout_id == 0 and not do
722 * anything. see expire_upcalls().
724 mfc_rt
->mfc_timeout_id
= 0;
725 mutex_exit(&mfc_rt
->mfc_mutex
);
727 mfc_rt
->mfc_timeout_id
);
728 mfc_rt
->mfc_timeout_id
= 0;
729 mutex_enter(&mfc_rt
->mfc_mutex
);
732 * all queued upcall packets
733 * and mblk will be freed in
739 mfc_rt
->mfc_marks
|= MFCB_MARK_CONDEMNED
;
741 mutex_exit(&mfc_rt
->mfc_mutex
);
742 mfc_rt
= mfc_rt
->mfc_next
;
744 MFCB_REFRELE(&ipst
->ips_mfcs
[i
]);
747 mutex_enter(&ipst
->ips_ip_g_mrouter_mutex
);
748 ipst
->ips_ip_g_mrouter
= NULL
;
749 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
754 ip_mrouter_stack_destroy(ip_stack_t
*ipst
)
760 for (i
= 0; i
< MFCTBLSIZ
; i
++) {
761 mfcbp
= &ipst
->ips_mfcs
[i
];
763 while ((rt
= mfcbp
->mfcb_mfc
) != NULL
) {
764 (void) printf("ip_mrouter_stack_destroy: free for %d\n",
767 mfcbp
->mfcb_mfc
= rt
->mfc_next
;
772 kmem_free(ipst
->ips_vifs
, sizeof (struct vif
) * (MAXVIFS
+1));
773 ipst
->ips_vifs
= NULL
;
774 kmem_free(ipst
->ips_mrtstat
, sizeof (struct mrtstat
));
775 ipst
->ips_mrtstat
= NULL
;
776 kmem_free(ipst
->ips_mfcs
, sizeof (struct mfcb
) * MFCTBLSIZ
);
777 ipst
->ips_mfcs
= NULL
;
778 kmem_free(ipst
->ips_tbfs
, sizeof (struct tbf
) * MAXVIFS
);
779 ipst
->ips_tbfs
= NULL
;
781 mutex_destroy(&ipst
->ips_last_encap_lock
);
782 mutex_destroy(&ipst
->ips_ip_g_mrouter_mutex
);
786 is_mrouter_off(ip_stack_t
*ipst
)
790 mutex_enter(&ipst
->ips_ip_g_mrouter_mutex
);
791 if (ipst
->ips_ip_g_mrouter
== NULL
) {
792 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
796 mrouter
= ipst
->ips_ip_g_mrouter
;
797 if (mrouter
->conn_multi_router
== 0) {
798 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
801 mutex_exit(&ipst
->ips_ip_g_mrouter_mutex
);
806 unlock_good_vif(struct vif
*vifp
)
808 ASSERT(vifp
->v_ipif
!= NULL
);
809 ipif_refrele(vifp
->v_ipif
);
814 lock_good_vif(struct vif
*vifp
)
816 mutex_enter(&vifp
->v_lock
);
817 if (!(vifp
->v_marks
& VIF_MARK_GOOD
)) {
818 mutex_exit(&vifp
->v_lock
);
822 ASSERT(vifp
->v_ipif
!= NULL
);
823 mutex_enter(&vifp
->v_ipif
->ipif_ill
->ill_lock
);
824 if (!IPIF_CAN_LOOKUP(vifp
->v_ipif
)) {
825 mutex_exit(&vifp
->v_ipif
->ipif_ill
->ill_lock
);
826 mutex_exit(&vifp
->v_lock
);
829 ipif_refhold_locked(vifp
->v_ipif
);
830 mutex_exit(&vifp
->v_ipif
->ipif_ill
->ill_lock
);
832 mutex_exit(&vifp
->v_lock
);
837 * Add a vif to the vif table.
840 add_vif(struct vifctl
*vifcp
, conn_t
*connp
, ip_stack_t
*ipst
)
842 struct vif
*vifp
= ipst
->ips_vifs
+ vifcp
->vifc_vifi
;
845 struct tbf
*v_tbf
= ipst
->ips_tbfs
+ vifcp
->vifc_vifi
;
846 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
850 ASSERT(connp
!= NULL
);
852 if (vifcp
->vifc_vifi
>= MAXVIFS
)
855 if (is_mrouter_off(ipst
))
858 mutex_enter(&vifp
->v_lock
);
860 * Viftable entry should be 0.
861 * if v_marks == 0 but v_refcnt != 0 means struct is being
864 * Also note that it is very unlikely that we will get a MRT_ADD_VIF
865 * request while the delete is in progress, mrouted only sends add
866 * requests when a new interface is added and the new interface cannot
867 * have the same vifi as an existing interface. We make sure that
868 * ill_delete will block till the vif is deleted by adding a refcnt
869 * to ipif in del_vif().
871 if (vifp
->v_lcl_addr
.s_addr
!= 0 ||
872 vifp
->v_marks
!= 0 ||
873 vifp
->v_refcnt
!= 0) {
874 mutex_exit(&vifp
->v_lock
);
878 /* Incoming vif should not be 0 */
879 if (vifcp
->vifc_lcl_addr
.s_addr
== 0) {
880 mutex_exit(&vifp
->v_lock
);
885 mutex_exit(&vifp
->v_lock
);
886 /* Find the interface with the local address */
887 ipif
= ipif_lookup_addr((ipaddr_t
)vifcp
->vifc_lcl_addr
.s_addr
, NULL
,
888 IPCL_ZONEID(connp
), ipst
);
891 return (EADDRNOTAVAIL
);
894 if (ipst
->ips_ip_mrtdebug
> 1) {
895 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
896 "add_vif: src 0x%x enter",
897 vifcp
->vifc_lcl_addr
.s_addr
);
900 mutex_enter(&vifp
->v_lock
);
902 * Always clear cache when vifs change.
903 * Needed to ensure that src isn't left over from before vif was added.
904 * No need to get last_encap_lock, since we are running as a writer.
907 mutex_enter(&ipst
->ips_last_encap_lock
);
908 ipst
->ips_last_encap_src
= 0;
909 ipst
->ips_last_encap_vif
= NULL
;
910 mutex_exit(&ipst
->ips_last_encap_lock
);
912 if (vifcp
->vifc_flags
& VIFF_TUNNEL
) {
913 if ((vifcp
->vifc_flags
& VIFF_SRCRT
) != 0) {
915 "add_vif: source route tunnels not supported\n");
916 VIF_REFRELE_LOCKED(vifp
);
920 vifp
->v_rmt_addr
= vifcp
->vifc_rmt_addr
;
923 /* Phyint or Register vif */
924 if (vifcp
->vifc_flags
& VIFF_REGISTER
) {
926 * Note: Since all IPPROTO_IP level options (including
927 * MRT_ADD_VIF) are done exclusively via
928 * ip_optmgmt_writer(), a lock is not necessary to
929 * protect reg_vif_num.
931 mutex_enter(&ipst
->ips_numvifs_mutex
);
932 if (ipst
->ips_reg_vif_num
== ALL_VIFS
) {
933 ipst
->ips_reg_vif_num
= vifcp
->vifc_vifi
;
934 mutex_exit(&ipst
->ips_numvifs_mutex
);
936 mutex_exit(&ipst
->ips_numvifs_mutex
);
937 VIF_REFRELE_LOCKED(vifp
);
943 /* Make sure the interface supports multicast */
944 if ((ipif
->ipif_ill
->ill_flags
& ILLF_MULTICAST
) == 0) {
945 VIF_REFRELE_LOCKED(vifp
);
947 if (vifcp
->vifc_flags
& VIFF_REGISTER
) {
948 mutex_enter(&ipst
->ips_numvifs_mutex
);
949 ipst
->ips_reg_vif_num
= ALL_VIFS
;
950 mutex_exit(&ipst
->ips_numvifs_mutex
);
954 /* Enable promiscuous reception of all IP mcasts from the if */
955 mutex_exit(&vifp
->v_lock
);
957 ill
= ipif
->ipif_ill
;
958 if (IS_UNDER_IPMP(ill
))
959 ill
= ipmp_ill_hold_ipmp_ill(ill
);
964 ilm
= ip_addmulti(&ipv6_all_zeros
, ill
,
965 ipif
->ipif_zoneid
, &error
);
967 atomic_inc_32(&ill
->ill_mrouter_cnt
);
968 if (IS_UNDER_IPMP(ipif
->ipif_ill
)) {
970 ill
= ipif
->ipif_ill
;
974 mutex_enter(&vifp
->v_lock
);
976 * since we released the lock lets make sure that
977 * ip_mrouter_done() has not been called.
979 if (ilm
== NULL
|| is_mrouter_off(ipst
)) {
981 (void) ip_delmulti(ilm
);
982 ASSERT(ill
->ill_mrouter_cnt
> 0);
983 atomic_dec_32(&ill
->ill_mrouter_cnt
);
985 if (vifcp
->vifc_flags
& VIFF_REGISTER
) {
986 mutex_enter(&ipst
->ips_numvifs_mutex
);
987 ipst
->ips_reg_vif_num
= ALL_VIFS
;
988 mutex_exit(&ipst
->ips_numvifs_mutex
);
990 VIF_REFRELE_LOCKED(vifp
);
992 return (error
?error
:EINVAL
);
996 /* Define parameters for the tbf structure */
998 gethrestime(&vifp
->v_tbf
->tbf_last_pkt_t
);
999 vifp
->v_tbf
->tbf_n_tok
= 0;
1000 vifp
->v_tbf
->tbf_q_len
= 0;
1001 vifp
->v_tbf
->tbf_max_q_len
= MAXQSIZE
;
1002 vifp
->v_tbf
->tbf_q
= vifp
->v_tbf
->tbf_t
= NULL
;
1004 vifp
->v_flags
= vifcp
->vifc_flags
;
1005 vifp
->v_threshold
= vifcp
->vifc_threshold
;
1006 vifp
->v_lcl_addr
= vifcp
->vifc_lcl_addr
;
1007 vifp
->v_ipif
= ipif
;
1009 /* Scaling up here, allows division by 1024 in critical code. */
1010 vifp
->v_rate_limit
= vifcp
->vifc_rate_limit
* (1024/1000);
1011 vifp
->v_timeout_id
= 0;
1012 /* initialize per vif pkt counters */
1014 vifp
->v_pkt_out
= 0;
1015 vifp
->v_bytes_in
= 0;
1016 vifp
->v_bytes_out
= 0;
1017 mutex_init(&vifp
->v_tbf
->tbf_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1019 /* Adjust numvifs up, if the vifi is higher than numvifs */
1020 mutex_enter(&ipst
->ips_numvifs_mutex
);
1021 if (ipst
->ips_numvifs
<= vifcp
->vifc_vifi
)
1022 ipst
->ips_numvifs
= vifcp
->vifc_vifi
+ 1;
1023 mutex_exit(&ipst
->ips_numvifs_mutex
);
1025 if (ipst
->ips_ip_mrtdebug
> 1) {
1026 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1027 "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d",
1029 ntohl(vifcp
->vifc_lcl_addr
.s_addr
),
1030 (vifcp
->vifc_flags
& VIFF_TUNNEL
) ? "rmtaddr" : "mask",
1031 ntohl(vifcp
->vifc_rmt_addr
.s_addr
),
1032 vifcp
->vifc_threshold
, vifcp
->vifc_rate_limit
);
1035 vifp
->v_marks
= VIF_MARK_GOOD
;
1036 mutex_exit(&vifp
->v_lock
);
1041 /* Delete a vif from the vif table. */
1043 del_vifp(struct vif
*vifp
)
1045 struct tbf
*t
= vifp
->v_tbf
;
1048 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
1049 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
1051 ASSERT(vifp
->v_marks
& VIF_MARK_CONDEMNED
);
1054 if (ipst
->ips_ip_mrtdebug
> 1) {
1055 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1056 "del_vif: src 0x%x\n", vifp
->v_lcl_addr
.s_addr
);
1059 if (vifp
->v_timeout_id
!= 0) {
1060 (void) untimeout(vifp
->v_timeout_id
);
1061 vifp
->v_timeout_id
= 0;
1065 * Free packets queued at the interface.
1066 * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc.
1068 mutex_enter(&t
->tbf_lock
);
1069 while (t
->tbf_q
!= NULL
) {
1071 t
->tbf_q
= t
->tbf_q
->b_next
;
1072 mp0
->b_prev
= mp0
->b_next
= NULL
;
1075 mutex_exit(&t
->tbf_lock
);
1078 * Always clear cache when vifs change.
1079 * No need to get last_encap_lock since we are running as a writer.
1081 mutex_enter(&ipst
->ips_last_encap_lock
);
1082 if (vifp
== ipst
->ips_last_encap_vif
) {
1083 ipst
->ips_last_encap_vif
= NULL
;
1084 ipst
->ips_last_encap_src
= 0;
1086 mutex_exit(&ipst
->ips_last_encap_lock
);
1088 mutex_destroy(&t
->tbf_lock
);
1090 bzero(vifp
->v_tbf
, sizeof (*(vifp
->v_tbf
)));
1092 /* Adjust numvifs down */
1093 mutex_enter(&ipst
->ips_numvifs_mutex
);
1094 for (vifi
= ipst
->ips_numvifs
; vifi
!= 0; vifi
--) /* vifi is unsigned */
1095 if (ipst
->ips_vifs
[vifi
- 1].v_lcl_addr
.s_addr
!= 0)
1097 ipst
->ips_numvifs
= vifi
;
1098 mutex_exit(&ipst
->ips_numvifs_mutex
);
1100 bzero(vifp
, sizeof (*vifp
));
1104 del_vif(vifi_t
*vifip
, ip_stack_t
*ipst
)
1106 struct vif
*vifp
= ipst
->ips_vifs
+ *vifip
;
1108 if (*vifip
>= ipst
->ips_numvifs
)
1111 mutex_enter(&vifp
->v_lock
);
1114 * Here we are not looking at the vif that is being initialized
1115 * i.e vifp->v_marks == 0 and refcnt > 0.
1117 if (vifp
->v_lcl_addr
.s_addr
== 0 ||
1118 !(vifp
->v_marks
& VIF_MARK_GOOD
)) {
1119 mutex_exit(&vifp
->v_lock
);
1120 return (EADDRNOTAVAIL
);
1123 /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */
1124 vifp
->v_marks
&= ~VIF_MARK_GOOD
;
1125 vifp
->v_marks
|= VIF_MARK_CONDEMNED
;
1128 if (!(vifp
->v_flags
& (VIFF_TUNNEL
| VIFF_REGISTER
))) {
1129 ipif_t
*ipif
= vifp
->v_ipif
;
1130 ilm_t
*ilm
= vifp
->v_ilm
;
1134 ASSERT(ipif
!= NULL
);
1136 * should be OK to drop the lock as we
1137 * have marked this as CONDEMNED.
1139 mutex_exit(&(vifp
)->v_lock
);
1141 (void) ip_delmulti(ilm
);
1142 ASSERT(ipif
->ipif_ill
->ill_mrouter_cnt
> 0);
1143 atomic_dec_32(&ipif
->ipif_ill
->ill_mrouter_cnt
);
1145 mutex_enter(&(vifp
)->v_lock
);
1148 if (vifp
->v_flags
& VIFF_REGISTER
) {
1149 mutex_enter(&ipst
->ips_numvifs_mutex
);
1150 ipst
->ips_reg_vif_num
= ALL_VIFS
;
1151 mutex_exit(&ipst
->ips_numvifs_mutex
);
1155 * decreases the refcnt added in add_vif.
1157 VIF_REFRELE_LOCKED(vifp
);
1165 add_mfc(struct mfcctl
*mfccp
, ip_stack_t
*ipst
)
1172 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
1175 * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted
1176 * did not have a real route for pkt.
1177 * We want this pkt without rt installed in the mfctable to prevent
1178 * multiiple tries, so go ahead and put it in mfctable, it will
1179 * be discarded later in ip_mdq() because the child is NULL.
1182 /* Error checking, out of bounds? */
1183 if (mfccp
->mfcc_parent
> MAXVIFS
) {
1184 ip0dbg(("ADD_MFC: mfcc_parent out of range %d",
1185 (int)mfccp
->mfcc_parent
));
1189 if ((mfccp
->mfcc_parent
!= NO_VIF
) &&
1190 (ipst
->ips_vifs
[mfccp
->mfcc_parent
].v_ipif
== NULL
)) {
1191 ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n",
1192 (int)mfccp
->mfcc_parent
));
1196 if (is_mrouter_off(ipst
)) {
1200 mfcbp
= &ipst
->ips_mfcs
[MFCHASH(mfccp
->mfcc_origin
.s_addr
,
1201 mfccp
->mfcc_mcastgrp
.s_addr
)];
1202 MFCB_REFHOLD(mfcbp
);
1203 MFCFIND(mfcbp
, mfccp
->mfcc_origin
.s_addr
,
1204 mfccp
->mfcc_mcastgrp
.s_addr
, rt
);
1206 /* If an entry already exists, just update the fields */
1208 if (ipst
->ips_ip_mrtdebug
> 1) {
1209 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1210 "add_mfc: update o %x grp %x parent %x",
1211 ntohl(mfccp
->mfcc_origin
.s_addr
),
1212 ntohl(mfccp
->mfcc_mcastgrp
.s_addr
),
1213 mfccp
->mfcc_parent
);
1215 mutex_enter(&rt
->mfc_mutex
);
1216 rt
->mfc_parent
= mfccp
->mfcc_parent
;
1218 mutex_enter(&ipst
->ips_numvifs_mutex
);
1219 for (i
= 0; i
< (int)ipst
->ips_numvifs
; i
++)
1220 rt
->mfc_ttls
[i
] = mfccp
->mfcc_ttls
[i
];
1221 mutex_exit(&ipst
->ips_numvifs_mutex
);
1222 mutex_exit(&rt
->mfc_mutex
);
1224 MFCB_REFRELE(mfcbp
);
1229 * Find the entry for which the upcall was made and update.
1231 for (rt
= mfcbp
->mfcb_mfc
, nstl
= 0; rt
; rt
= rt
->mfc_next
) {
1232 mutex_enter(&rt
->mfc_mutex
);
1233 if ((rt
->mfc_origin
.s_addr
== mfccp
->mfcc_origin
.s_addr
) &&
1234 (rt
->mfc_mcastgrp
.s_addr
== mfccp
->mfcc_mcastgrp
.s_addr
) &&
1235 (rt
->mfc_rte
!= NULL
) &&
1236 !(rt
->mfc_marks
& MFCB_MARK_CONDEMNED
)) {
1239 "add_mfc: %s o %x g %x p %x",
1240 "multiple kernel entries",
1241 ntohl(mfccp
->mfcc_origin
.s_addr
),
1242 ntohl(mfccp
->mfcc_mcastgrp
.s_addr
),
1243 mfccp
->mfcc_parent
);
1245 if (ipst
->ips_ip_mrtdebug
> 1) {
1246 (void) mi_strlog(mrouter
->conn_rq
, 1,
1248 "add_mfc: o %x g %x p %x",
1249 ntohl(mfccp
->mfcc_origin
.s_addr
),
1250 ntohl(mfccp
->mfcc_mcastgrp
.s_addr
),
1251 mfccp
->mfcc_parent
);
1253 fill_route(rt
, mfccp
, ipst
);
1256 * Prevent cleanup of cache entry.
1257 * Timer starts in ip_mforward.
1259 if (rt
->mfc_timeout_id
!= 0) {
1261 id
= rt
->mfc_timeout_id
;
1263 * setting id to zero will avoid this
1264 * entry from being cleaned up in
1265 * expire_up_calls().
1267 rt
->mfc_timeout_id
= 0;
1269 * dropping the lock is fine as we
1270 * have a refhold on the bucket.
1271 * so mfc cannot be freed.
1272 * The timeout can fire but it will see
1273 * that mfc_timeout_id == 0 and not cleanup.
1275 mutex_exit(&rt
->mfc_mutex
);
1276 (void) untimeout(id
);
1277 mutex_enter(&rt
->mfc_mutex
);
1281 * Send all pkts that are queued waiting for the upcall.
1282 * ip_mdq param tun set to 0 -
1283 * the return value of ip_mdq() isn't used here,
1284 * so value we send doesn't matter.
1286 while (rt
->mfc_rte
!= NULL
) {
1288 rt
->mfc_rte
= rte
->rte_next
;
1289 mutex_exit(&rt
->mfc_mutex
);
1290 (void) ip_mdq(rte
->mp
, (ipha_t
*)
1291 rte
->mp
->b_rptr
, rte
->ill
, 0, rt
);
1293 mi_free((char *)rte
);
1294 mutex_enter(&rt
->mfc_mutex
);
1297 mutex_exit(&rt
->mfc_mutex
);
1302 * It is possible that an entry is being inserted without an upcall
1305 mutex_enter(&(mfcbp
->mfcb_lock
));
1306 if (ipst
->ips_ip_mrtdebug
> 1) {
1307 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1308 "add_mfc: no upcall o %x g %x p %x",
1309 ntohl(mfccp
->mfcc_origin
.s_addr
),
1310 ntohl(mfccp
->mfcc_mcastgrp
.s_addr
),
1311 mfccp
->mfcc_parent
);
1313 if (is_mrouter_off(ipst
)) {
1314 mutex_exit(&mfcbp
->mfcb_lock
);
1315 MFCB_REFRELE(mfcbp
);
1319 for (rt
= mfcbp
->mfcb_mfc
; rt
; rt
= rt
->mfc_next
) {
1321 mutex_enter(&rt
->mfc_mutex
);
1322 if ((rt
->mfc_origin
.s_addr
==
1323 mfccp
->mfcc_origin
.s_addr
) &&
1324 (rt
->mfc_mcastgrp
.s_addr
==
1325 mfccp
->mfcc_mcastgrp
.s_addr
) &&
1326 (!(rt
->mfc_marks
& MFCB_MARK_CONDEMNED
))) {
1327 fill_route(rt
, mfccp
, ipst
);
1328 mutex_exit(&rt
->mfc_mutex
);
1331 mutex_exit(&rt
->mfc_mutex
);
1334 /* No upcall, so make a new entry into mfctable */
1336 rt
= (struct mfc
*)mi_zalloc(sizeof (struct mfc
));
1338 ip1dbg(("add_mfc: out of memory\n"));
1339 mutex_exit(&mfcbp
->mfcb_lock
);
1340 MFCB_REFRELE(mfcbp
);
1344 /* Insert new entry at head of hash chain */
1345 mutex_enter(&rt
->mfc_mutex
);
1346 fill_route(rt
, mfccp
, ipst
);
1348 /* Link into table */
1349 rt
->mfc_next
= mfcbp
->mfcb_mfc
;
1350 mfcbp
->mfcb_mfc
= rt
;
1351 mutex_exit(&rt
->mfc_mutex
);
1353 mutex_exit(&mfcbp
->mfcb_lock
);
1356 MFCB_REFRELE(mfcbp
);
1361 * Fills in mfc structure from mrouted mfcctl.
1364 fill_route(struct mfc
*rt
, struct mfcctl
*mfccp
, ip_stack_t
*ipst
)
1368 rt
->mfc_origin
= mfccp
->mfcc_origin
;
1369 rt
->mfc_mcastgrp
= mfccp
->mfcc_mcastgrp
;
1370 rt
->mfc_parent
= mfccp
->mfcc_parent
;
1371 mutex_enter(&ipst
->ips_numvifs_mutex
);
1372 for (i
= 0; i
< (int)ipst
->ips_numvifs
; i
++) {
1373 rt
->mfc_ttls
[i
] = mfccp
->mfcc_ttls
[i
];
1375 mutex_exit(&ipst
->ips_numvifs_mutex
);
1376 /* Initialize pkt counters per src-grp */
1377 rt
->mfc_pkt_cnt
= 0;
1378 rt
->mfc_byte_cnt
= 0;
1379 rt
->mfc_wrong_if
= 0;
1380 rt
->mfc_last_assert
.tv_sec
= rt
->mfc_last_assert
.tv_nsec
= 0;
1385 free_queue(struct mfc
*mfcp
)
1387 struct rtdetq
*rte0
;
1390 * Drop all queued upcall packets.
1391 * Free the mbuf with the pkt.
1393 while ((rte0
= mfcp
->mfc_rte
) != NULL
) {
1394 mfcp
->mfc_rte
= rte0
->rte_next
;
1396 mi_free((char *)rte0
);
1400 * go thorugh the hash bucket and free all the entries marked condemned.
1403 release_mfc(struct mfcb
*mfcbp
)
1405 struct mfc
*current_mfcp
;
1406 struct mfc
*prev_mfcp
;
1408 prev_mfcp
= current_mfcp
= mfcbp
->mfcb_mfc
;
1410 while (current_mfcp
!= NULL
) {
1411 if (current_mfcp
->mfc_marks
& MFCB_MARK_CONDEMNED
) {
1412 if (current_mfcp
== mfcbp
->mfcb_mfc
) {
1413 mfcbp
->mfcb_mfc
= current_mfcp
->mfc_next
;
1414 free_queue(current_mfcp
);
1415 mi_free(current_mfcp
);
1416 prev_mfcp
= current_mfcp
= mfcbp
->mfcb_mfc
;
1419 ASSERT(prev_mfcp
!= NULL
);
1420 prev_mfcp
->mfc_next
= current_mfcp
->mfc_next
;
1421 free_queue(current_mfcp
);
1422 mi_free(current_mfcp
);
1423 current_mfcp
= NULL
;
1425 prev_mfcp
= current_mfcp
;
1428 current_mfcp
= prev_mfcp
->mfc_next
;
1431 mfcbp
->mfcb_marks
&= ~MFCB_MARK_CONDEMNED
;
1432 ASSERT(mfcbp
->mfcb_mfc
!= NULL
|| mfcbp
->mfcb_marks
== 0);
1436 * Delete an mfc entry.
1439 del_mfc(struct mfcctl
*mfccp
, ip_stack_t
*ipst
)
1441 struct in_addr origin
;
1442 struct in_addr mcastgrp
;
1445 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
1447 origin
= mfccp
->mfcc_origin
;
1448 mcastgrp
= mfccp
->mfcc_mcastgrp
;
1449 hash
= MFCHASH(origin
.s_addr
, mcastgrp
.s_addr
);
1451 if (ipst
->ips_ip_mrtdebug
> 1) {
1452 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1453 "del_mfc: o %x g %x",
1454 ntohl(origin
.s_addr
),
1455 ntohl(mcastgrp
.s_addr
));
1458 MFCB_REFHOLD(&ipst
->ips_mfcs
[hash
]);
1460 /* Find mfc in mfctable, finds only entries without upcalls */
1461 for (rt
= ipst
->ips_mfcs
[hash
].mfcb_mfc
; rt
; rt
= rt
->mfc_next
) {
1462 mutex_enter(&rt
->mfc_mutex
);
1463 if (origin
.s_addr
== rt
->mfc_origin
.s_addr
&&
1464 mcastgrp
.s_addr
== rt
->mfc_mcastgrp
.s_addr
&&
1465 rt
->mfc_rte
== NULL
&&
1466 !(rt
->mfc_marks
& MFCB_MARK_CONDEMNED
))
1468 mutex_exit(&rt
->mfc_mutex
);
1472 * Return if there was an upcall (mfc_rte != NULL,
1473 * or rt not in mfctable.
1476 MFCB_REFRELE(&ipst
->ips_mfcs
[hash
]);
1477 return (EADDRNOTAVAIL
);
1482 * no need to hold lock as we have a reference.
1484 ipst
->ips_mfcs
[hash
].mfcb_marks
|= MFCB_MARK_CONDEMNED
;
1485 /* error checking */
1486 if (rt
->mfc_timeout_id
!= 0) {
1487 ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null"));
1489 * Its ok to drop the lock, the struct cannot be freed
1490 * since we have a ref on the hash bucket.
1492 rt
->mfc_timeout_id
= 0;
1493 mutex_exit(&rt
->mfc_mutex
);
1494 (void) untimeout(rt
->mfc_timeout_id
);
1495 mutex_enter(&rt
->mfc_mutex
);
1498 ASSERT(rt
->mfc_rte
== NULL
);
1502 * Delete the entry from the cache
1504 rt
->mfc_marks
|= MFCB_MARK_CONDEMNED
;
1505 mutex_exit(&rt
->mfc_mutex
);
1507 MFCB_REFRELE(&ipst
->ips_mfcs
[hash
]);
1512 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
1515 * IP multicast forwarding function. This function assumes that the packet
1516 * pointed to by ipha has arrived on (or is about to be sent to) the interface
1517 * pointed to by "ill", and the packet is to be relayed to other networks
1518 * that have members of the packet's destination IP multicast group.
1520 * The packet is returned unscathed to the caller, unless it is
1521 * erroneous, in which case a -1 value tells the caller (IP)
1524 * Unlike BSD, SunOS 5.x needs to return to IP info about
1525 * whether pkt came in thru a tunnel, so it can be discarded, unless
1526 * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try
1528 * Return values are 0 - pkt is okay and phyint
1529 * -1 - pkt is malformed and to be tossed
1530 * 1 - pkt came in on tunnel
1533 ip_mforward(mblk_t
*mp
, ip_recv_attr_t
*ira
)
1535 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
1536 ill_t
*ill
= ira
->ira_ill
;
1538 ipaddr_t src
, dst
, tunnel_src
= 0;
1539 static int srctun
= 0;
1541 boolean_t pim_reg_packet
= B_FALSE
;
1543 ip_stack_t
*ipst
= ill
->ill_ipst
;
1544 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
1545 ill_t
*rill
= ira
->ira_rill
;
1547 ASSERT(ira
->ira_pktlen
== msgdsize(mp
));
1549 if (ipst
->ips_ip_mrtdebug
> 1) {
1550 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1551 "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s",
1552 ntohl(ipha
->ipha_src
), ntohl(ipha
->ipha_dst
),
1556 dst
= ipha
->ipha_dst
;
1557 if (ira
->ira_flags
& IRAF_PIM_REGISTER
)
1558 pim_reg_packet
= B_TRUE
;
1559 else if (ira
->ira_flags
& IRAF_MROUTE_TUNNEL_SET
)
1560 tunnel_src
= ira
->ira_mroute_tunnel
;
1563 * Don't forward a packet with time-to-live of zero or one,
1564 * or a packet destined to a local-only group.
1566 if (CLASSD(dst
) && (ipha
->ipha_ttl
<= 1 ||
1567 (ipaddr_t
)ntohl(dst
) <= INADDR_MAX_LOCAL_GROUP
)) {
1568 if (ipst
->ips_ip_mrtdebug
> 1) {
1569 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1570 "ip_mforward: not forwarded ttl %d,"
1572 ipha
->ipha_ttl
, ntohl(dst
), ill
->ill_name
);
1574 if (tunnel_src
!= 0)
1580 if ((tunnel_src
!= 0) || pim_reg_packet
) {
1582 * Packet arrived over an encapsulated tunnel or via a PIM
1585 if (ipst
->ips_ip_mrtdebug
> 1) {
1586 if (tunnel_src
!= 0) {
1587 (void) mi_strlog(mrouter
->conn_rq
, 1,
1589 "ip_mforward: ill %s arrived via ENCAP TUN",
1591 } else if (pim_reg_packet
) {
1592 (void) mi_strlog(mrouter
->conn_rq
, 1,
1594 "ip_mforward: ill %s arrived via"
1599 } else if ((ipha
->ipha_version_and_hdr_length
& 0xf) <
1600 (uint_t
)(IP_SIMPLE_HDR_LENGTH
+ TUNNEL_LEN
) >> 2 ||
1601 ((uchar_t
*)(ipha
+ 1))[1] != IPOPT_LSRR
) {
1602 /* Packet arrived via a physical interface. */
1603 if (ipst
->ips_ip_mrtdebug
> 1) {
1604 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1605 "ip_mforward: ill %s arrived via PHYINT",
1611 * Packet arrived through a SRCRT tunnel.
1612 * Source-route tunnels are no longer supported.
1613 * Error message printed every 1000 times.
1615 if ((srctun
++ % 1000) == 0) {
1617 "ip_mforward: received source-routed pkt from %x",
1618 ntohl(ipha
->ipha_src
));
1623 ipst
->ips_mrtstat
->mrts_fwd_in
++;
1624 src
= ipha
->ipha_src
;
1626 /* Find route in cache, return NULL if not there or upcalls q'ed. */
1629 * Lock the mfctable against changes made by ip_mforward.
1630 * Note that only add_mfc and del_mfc can remove entries and
1631 * they run with exclusive access to IP. So we do not need to
1632 * guard against the rt being deleted, so release lock after reading.
1635 if (is_mrouter_off(ipst
))
1638 mfcbp
= &ipst
->ips_mfcs
[MFCHASH(src
, dst
)];
1639 MFCB_REFHOLD(mfcbp
);
1640 MFCFIND(mfcbp
, src
, dst
, rt
);
1642 /* Entry exists, so forward if necessary */
1645 ipst
->ips_mrtstat
->mrts_mfc_hits
++;
1646 if (pim_reg_packet
) {
1647 ASSERT(ipst
->ips_reg_vif_num
!= ALL_VIFS
);
1648 ret
= ip_mdq(mp
, ipha
,
1649 ipst
->ips_vifs
[ipst
->ips_reg_vif_num
].
1653 ret
= ip_mdq(mp
, ipha
, ill
, tunnel_src
, rt
);
1656 MFCB_REFRELE(mfcbp
);
1660 * Don't forward if we don't have a cache entry. Mrouted will
1661 * always provide a cache entry in response to an upcall.
1665 * If we don't have a route for packet's origin, make a copy
1666 * of the packet and send message to routing daemon.
1668 struct mfc
*mfc_rt
= NULL
;
1670 mblk_t
*mp_copy
= NULL
;
1671 struct rtdetq
*rte
= NULL
;
1672 struct rtdetq
*rte_m
, *rte1
, *prev_rte
;
1675 boolean_t new_mfc
= B_FALSE
;
1676 ipst
->ips_mrtstat
->mrts_mfc_misses
++;
1677 /* BSD uses mrts_no_route++ */
1678 if (ipst
->ips_ip_mrtdebug
> 1) {
1679 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1680 "ip_mforward: no rte ill %s src %x g %x misses %d",
1681 ill
->ill_name
, ntohl(src
), ntohl(dst
),
1682 (int)ipst
->ips_mrtstat
->mrts_mfc_misses
);
1685 * The order of the following code differs from the BSD code.
1686 * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x
1687 * code works, so SunOS 5.x wasn't changed to conform to the
1691 /* Lock mfctable. */
1692 hash
= MFCHASH(src
, dst
);
1693 mutex_enter(&(ipst
->ips_mfcs
[hash
].mfcb_lock
));
1696 * If we are turning off mrouted return an error
1698 if (is_mrouter_off(ipst
)) {
1699 mutex_exit(&mfcbp
->mfcb_lock
);
1700 MFCB_REFRELE(mfcbp
);
1704 /* Is there an upcall waiting for this packet? */
1705 for (mfc_rt
= ipst
->ips_mfcs
[hash
].mfcb_mfc
; mfc_rt
;
1706 mfc_rt
= mfc_rt
->mfc_next
) {
1707 mutex_enter(&mfc_rt
->mfc_mutex
);
1708 if (ipst
->ips_ip_mrtdebug
> 1) {
1709 (void) mi_strlog(mrouter
->conn_rq
, 1,
1711 "ip_mforward: MFCTAB hash %d o 0x%x"
1713 hash
, ntohl(mfc_rt
->mfc_origin
.s_addr
),
1714 ntohl(mfc_rt
->mfc_mcastgrp
.s_addr
));
1716 /* There is an upcall */
1717 if ((src
== mfc_rt
->mfc_origin
.s_addr
) &&
1718 (dst
== mfc_rt
->mfc_mcastgrp
.s_addr
) &&
1719 (mfc_rt
->mfc_rte
!= NULL
) &&
1720 !(mfc_rt
->mfc_marks
& MFCB_MARK_CONDEMNED
)) {
1723 mutex_exit(&mfc_rt
->mfc_mutex
);
1725 /* No upcall, so make a new entry into mfctable */
1726 if (mfc_rt
== NULL
) {
1727 mfc_rt
= (struct mfc
*)mi_zalloc(sizeof (struct mfc
));
1728 if (mfc_rt
== NULL
) {
1729 ipst
->ips_mrtstat
->mrts_fwd_drop
++;
1730 ip1dbg(("ip_mforward: out of memory "
1731 "for mfc, mfc_rt\n"));
1736 /* TODO could copy header and dup rest */
1737 mp_copy
= copymsg(mp
);
1738 if (mp_copy
== NULL
) {
1739 ipst
->ips_mrtstat
->mrts_fwd_drop
++;
1740 ip1dbg(("ip_mforward: out of memory for "
1741 "mblk, mp_copy\n"));
1744 mutex_enter(&mfc_rt
->mfc_mutex
);
1746 /* Get resources for rte, whether first rte or not first. */
1747 /* Add this packet into rtdetq */
1748 rte
= (struct rtdetq
*)mi_zalloc(sizeof (struct rtdetq
));
1750 ipst
->ips_mrtstat
->mrts_fwd_drop
++;
1751 mutex_exit(&mfc_rt
->mfc_mutex
);
1752 ip1dbg(("ip_mforward: out of memory for"
1759 ipst
->ips_mrtstat
->mrts_fwd_drop
++;
1760 ip1dbg(("ip_mforward: out of memory for mblk, mp0\n"));
1761 mutex_exit(&mfc_rt
->mfc_mutex
);
1765 if (pim_reg_packet
) {
1766 ASSERT(ipst
->ips_reg_vif_num
!= ALL_VIFS
);
1768 ipst
->ips_vifs
[ipst
->ips_reg_vif_num
].
1773 rte
->rte_next
= NULL
;
1776 * Determine if upcall q (rtdetq) has overflowed.
1777 * mfc_rt->mfc_rte is null by mi_zalloc
1778 * if it is the first message.
1780 for (rte_m
= mfc_rt
->mfc_rte
, npkts
= 0; rte_m
;
1781 rte_m
= rte_m
->rte_next
)
1783 if (ipst
->ips_ip_mrtdebug
> 1) {
1784 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1785 "ip_mforward: upcalls %d\n", npkts
);
1787 if (npkts
> MAX_UPQ
) {
1788 ipst
->ips_mrtstat
->mrts_upq_ovflw
++;
1789 mutex_exit(&mfc_rt
->mfc_mutex
);
1793 if (npkts
== 0) { /* first upcall */
1796 * Now finish installing the new mfc! Now that we have
1797 * resources! Insert new entry at head of hash chain.
1798 * Use src and dst which are ipaddr_t's.
1800 mfc_rt
->mfc_origin
.s_addr
= src
;
1801 mfc_rt
->mfc_mcastgrp
.s_addr
= dst
;
1803 mutex_enter(&ipst
->ips_numvifs_mutex
);
1804 for (i
= 0; i
< (int)ipst
->ips_numvifs
; i
++)
1805 mfc_rt
->mfc_ttls
[i
] = 0;
1806 mutex_exit(&ipst
->ips_numvifs_mutex
);
1807 mfc_rt
->mfc_parent
= ALL_VIFS
;
1809 /* Link into table */
1810 if (ipst
->ips_ip_mrtdebug
> 1) {
1811 (void) mi_strlog(mrouter
->conn_rq
, 1,
1813 "ip_mforward: NEW MFCTAB hash %d o 0x%x "
1815 ntohl(mfc_rt
->mfc_origin
.s_addr
),
1816 ntohl(mfc_rt
->mfc_mcastgrp
.s_addr
));
1818 mfc_rt
->mfc_next
= ipst
->ips_mfcs
[hash
].mfcb_mfc
;
1819 ipst
->ips_mfcs
[hash
].mfcb_mfc
= mfc_rt
;
1820 mfc_rt
->mfc_rte
= NULL
;
1823 /* Link in the upcall */
1825 if (mfc_rt
->mfc_rte
== NULL
)
1826 mfc_rt
->mfc_rte
= rte
;
1828 /* not the first upcall */
1829 prev_rte
= mfc_rt
->mfc_rte
;
1830 for (rte1
= mfc_rt
->mfc_rte
->rte_next
; rte1
;
1831 prev_rte
= rte1
, rte1
= rte1
->rte_next
)
1833 prev_rte
->rte_next
= rte
;
1837 * No upcalls waiting, this is first one, so send a message to
1838 * routing daemon to install a route into kernel table.
1842 /* ipha_protocol is 0, for upcall */
1843 ASSERT(mp_copy
!= NULL
);
1844 im
= (struct igmpmsg
*)mp_copy
->b_rptr
;
1845 im
->im_msgtype
= IGMPMSG_NOCACHE
;
1847 mutex_enter(&ipst
->ips_numvifs_mutex
);
1848 if (pim_reg_packet
) {
1849 im
->im_vif
= (uchar_t
)ipst
->ips_reg_vif_num
;
1850 mutex_exit(&ipst
->ips_numvifs_mutex
);
1853 * XXX do we need to hold locks here ?
1856 vifi
< ipst
->ips_numvifs
;
1858 if (ipst
->ips_vifs
[vifi
].v_ipif
== NULL
)
1860 if (ipst
->ips_vifs
[vifi
].
1861 v_ipif
->ipif_ill
== ill
) {
1862 im
->im_vif
= (uchar_t
)vifi
;
1866 mutex_exit(&ipst
->ips_numvifs_mutex
);
1867 ASSERT(vifi
< ipst
->ips_numvifs
);
1870 ipst
->ips_mrtstat
->mrts_upcalls
++;
1871 /* Timer to discard upcalls if mrouted is too slow */
1872 mfc_rt
->mfc_timeout_id
= timeout(expire_upcalls
,
1873 mfc_rt
, EXPIRE_TIMEOUT
* UPCALL_EXPIRE
);
1874 mutex_exit(&mfc_rt
->mfc_mutex
);
1875 mutex_exit(&(ipst
->ips_mfcs
[hash
].mfcb_lock
));
1877 ira
->ira_ill
= ira
->ira_rill
= NULL
;
1878 (mrouter
->conn_recv
)(mrouter
, mp_copy
, NULL
, ira
);
1880 ira
->ira_rill
= rill
;
1882 mutex_exit(&mfc_rt
->mfc_mutex
);
1883 mutex_exit(&(ipst
->ips_mfcs
[hash
].mfcb_lock
));
1884 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1885 ip_drop_input("ip_mforward - upcall already waiting",
1890 MFCB_REFRELE(mfcbp
);
1891 if (tunnel_src
!= 0)
1896 mutex_exit(&(ipst
->ips_mfcs
[hash
].mfcb_lock
));
1897 MFCB_REFRELE(mfcbp
);
1898 if (mfc_rt
!= NULL
&& (new_mfc
== B_TRUE
))
1899 mi_free((char *)mfc_rt
);
1901 mi_free((char *)rte
);
1902 if (mp_copy
!= NULL
) {
1903 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1904 ip_drop_input("ip_mforward error", mp_copy
, ill
);
1914 * Clean up the mfctable cache entry if upcall is not serviced.
1915 * SunOS 5.x has timeout per mfc, unlike BSD which has one timer.
1918 expire_upcalls(void *arg
)
1920 struct mfc
*mfc_rt
= arg
;
1922 struct mfc
*prev_mfc
, *mfc0
;
1926 if (mfc_rt
->mfc_rte
== NULL
|| mfc_rt
->mfc_rte
->ill
!= NULL
) {
1927 cmn_err(CE_WARN
, "expire_upcalls: no ILL\n");
1930 ipst
= mfc_rt
->mfc_rte
->ill
->ill_ipst
;
1931 mrouter
= ipst
->ips_ip_g_mrouter
;
1933 hash
= MFCHASH(mfc_rt
->mfc_origin
.s_addr
, mfc_rt
->mfc_mcastgrp
.s_addr
);
1934 if (ipst
->ips_ip_mrtdebug
> 1) {
1935 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1936 "expire_upcalls: hash %d s %x g %x",
1937 hash
, ntohl(mfc_rt
->mfc_origin
.s_addr
),
1938 ntohl(mfc_rt
->mfc_mcastgrp
.s_addr
));
1940 MFCB_REFHOLD(&ipst
->ips_mfcs
[hash
]);
1941 mutex_enter(&mfc_rt
->mfc_mutex
);
1943 * if timeout has been set to zero, than the
1944 * entry has been filled, no need to delete it.
1946 if (mfc_rt
->mfc_timeout_id
== 0)
1948 ipst
->ips_mrtstat
->mrts_cache_cleanups
++;
1949 mfc_rt
->mfc_timeout_id
= 0;
1951 /* Determine entry to be cleaned up in cache table. */
1952 for (prev_mfc
= mfc0
= ipst
->ips_mfcs
[hash
].mfcb_mfc
; mfc0
;
1953 prev_mfc
= mfc0
, mfc0
= mfc0
->mfc_next
)
1957 /* del_mfc takes care of gone mfcs */
1958 ASSERT(prev_mfc
!= NULL
);
1959 ASSERT(mfc0
!= NULL
);
1962 * Delete the entry from the cache
1964 ipst
->ips_mfcs
[hash
].mfcb_marks
|= MFCB_MARK_CONDEMNED
;
1965 mfc_rt
->mfc_marks
|= MFCB_MARK_CONDEMNED
;
1968 * release_mfc will drop all queued upcall packets.
1969 * and will free the mbuf with the pkt, if, timing info.
1972 mutex_exit(&mfc_rt
->mfc_mutex
);
1973 MFCB_REFRELE(&ipst
->ips_mfcs
[hash
]);
1977 * Packet forwarding routine once entry in the cache is made.
1980 ip_mdq(mblk_t
*mp
, ipha_t
*ipha
, ill_t
*ill
, ipaddr_t tunnel_src
,
1985 ipaddr_t dst
= ipha
->ipha_dst
;
1986 size_t plen
= msgdsize(mp
);
1988 ip_stack_t
*ipst
= ill
->ill_ipst
;
1989 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
1990 ip_recv_attr_t iras
;
1992 if (ipst
->ips_ip_mrtdebug
> 1) {
1993 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
1994 "ip_mdq: SEND src %x, ipha_dst %x, ill %s",
1995 ntohl(ipha
->ipha_src
), ntohl(ipha
->ipha_dst
),
1999 /* Macro to send packet on vif */
2000 #define MC_SEND(ipha, mp, vifp, dst) { \
2001 if ((vifp)->v_flags & VIFF_TUNNEL) \
2002 encap_send((ipha), (mp), (vifp), (dst)); \
2003 else if ((vifp)->v_flags & VIFF_REGISTER) \
2004 register_send((ipha), (mp), (vifp), (dst)); \
2006 phyint_send((ipha), (mp), (vifp), (dst)); \
2009 vifi
= rt
->mfc_parent
;
2012 * The value of vifi is MAXVIFS if the pkt had no parent, i.e.,
2013 * Mrouted had no route.
2014 * We wanted the route installed in the mfctable to prevent multiple
2015 * tries, so it passed add_mfc(), but is discarded here. The v_ipif is
2016 * NULL so we don't want to check the ill. Still needed as of Mrouted
2019 if (vifi
== NO_VIF
) {
2020 ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n",
2022 if (ipst
->ips_ip_mrtdebug
> 1) {
2023 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2024 "ip_mdq: vifi is NO_VIF ill = %s", ill
->ill_name
);
2026 return (-1); /* drop pkt */
2029 if (!lock_good_vif(&ipst
->ips_vifs
[vifi
]))
2032 * The MFC entries are not cleaned up when an ipif goes
2033 * away thus this code has to guard against an MFC referencing
2034 * an ipif that has been closed. Note: reset_mrt_vif_ipif
2035 * sets the v_ipif to NULL when the ipif disappears.
2037 ASSERT(ipst
->ips_vifs
[vifi
].v_ipif
!= NULL
);
2039 if (vifi
>= ipst
->ips_numvifs
) {
2040 cmn_err(CE_WARN
, "ip_mdq: illegal vifi %d numvifs "
2041 "%d ill %s viftable ill %s\n",
2042 (int)vifi
, (int)ipst
->ips_numvifs
, ill
->ill_name
,
2043 ipst
->ips_vifs
[vifi
].v_ipif
->ipif_ill
->ill_name
);
2044 unlock_good_vif(&ipst
->ips_vifs
[vifi
]);
2048 * Don't forward if it didn't arrive from the parent vif for its
2051 if ((ipst
->ips_vifs
[vifi
].v_ipif
->ipif_ill
!= ill
) ||
2052 (ipst
->ips_vifs
[vifi
].v_rmt_addr
.s_addr
!= tunnel_src
)) {
2053 /* Came in the wrong interface */
2054 ip1dbg(("ip_mdq: arrived wrong if, vifi %d "
2055 "numvifs %d ill %s viftable ill %s\n",
2056 (int)vifi
, (int)ipst
->ips_numvifs
, ill
->ill_name
,
2057 ipst
->ips_vifs
[vifi
].v_ipif
->ipif_ill
->ill_name
));
2058 if (ipst
->ips_ip_mrtdebug
> 1) {
2059 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2060 "ip_mdq: arrived wrong if, vifi %d ill "
2061 "%s viftable ill %s\n",
2062 (int)vifi
, ill
->ill_name
,
2063 ipst
->ips_vifs
[vifi
].v_ipif
->ipif_ill
->ill_name
);
2065 ipst
->ips_mrtstat
->mrts_wrong_if
++;
2069 * If we are doing PIM assert processing and we are forwarding
2070 * packets on this interface, and it is a broadcast medium
2071 * interface (and not a tunnel), send a message to the routing.
2073 * We use the first ipif on the list, since it's all we have.
2074 * Chances are the ipif_flags are the same for ipifs on the ill.
2076 if (ipst
->ips_pim_assert
&& rt
->mfc_ttls
[vifi
] > 0 &&
2077 (ill
->ill_ipif
->ipif_flags
& IPIF_BROADCAST
) &&
2078 !(ipst
->ips_vifs
[vifi
].v_flags
& VIFF_TUNNEL
)) {
2082 /* TODO could copy header and dup rest */
2083 mp_copy
= copymsg(mp
);
2084 if (mp_copy
== NULL
) {
2085 ipst
->ips_mrtstat
->mrts_fwd_drop
++;
2086 ip1dbg(("ip_mdq: out of memory "
2087 "for mblk, mp_copy\n"));
2088 unlock_good_vif(&ipst
->ips_vifs
[vifi
]);
2092 im
= (struct igmpmsg
*)mp_copy
->b_rptr
;
2093 im
->im_msgtype
= IGMPMSG_WRONGVIF
;
2095 im
->im_vif
= (ushort_t
)vifi
;
2098 bzero(&iras
, sizeof (iras
));
2099 iras
.ira_flags
= IRAF_IS_IPV4
;
2100 iras
.ira_ip_hdr_length
=
2101 IPH_HDR_LENGTH(mp_copy
->b_rptr
);
2102 iras
.ira_pktlen
= msgdsize(mp_copy
);
2103 iras
.ira_ttl
= ipha
->ipha_ttl
;
2104 (mrouter
->conn_recv
)(mrouter
, mp_copy
, NULL
, &iras
);
2105 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
2107 unlock_good_vif(&ipst
->ips_vifs
[vifi
]);
2108 if (tunnel_src
!= 0)
2114 * If I sourced this packet, it counts as output, else it was input.
2116 if (ipha
->ipha_src
== ipst
->ips_vifs
[vifi
].v_lcl_addr
.s_addr
) {
2117 ipst
->ips_vifs
[vifi
].v_pkt_out
++;
2118 ipst
->ips_vifs
[vifi
].v_bytes_out
+= plen
;
2120 ipst
->ips_vifs
[vifi
].v_pkt_in
++;
2121 ipst
->ips_vifs
[vifi
].v_bytes_in
+= plen
;
2123 mutex_enter(&rt
->mfc_mutex
);
2125 rt
->mfc_byte_cnt
+= plen
;
2126 mutex_exit(&rt
->mfc_mutex
);
2127 unlock_good_vif(&ipst
->ips_vifs
[vifi
]);
2129 * For each vif, decide if a copy of the packet should be forwarded.
2131 * - the vif threshold ttl is non-zero AND
2132 * - the pkt ttl exceeds the vif's threshold
2133 * A non-zero mfc_ttl indicates that the vif is part of
2134 * the output set for the mfc entry.
2136 mutex_enter(&ipst
->ips_numvifs_mutex
);
2137 num_of_vifs
= ipst
->ips_numvifs
;
2138 mutex_exit(&ipst
->ips_numvifs_mutex
);
2139 for (vifp
= ipst
->ips_vifs
, vifi
= 0;
2142 if (!lock_good_vif(vifp
))
2144 if ((rt
->mfc_ttls
[vifi
] > 0) &&
2145 (ipha
->ipha_ttl
> rt
->mfc_ttls
[vifi
])) {
2147 * lock_good_vif should not have succedded if
2150 ASSERT(vifp
->v_ipif
!= NULL
);
2152 vifp
->v_bytes_out
+= plen
;
2153 MC_SEND(ipha
, mp
, vifp
, dst
);
2154 ipst
->ips_mrtstat
->mrts_fwd_out
++;
2156 unlock_good_vif(vifp
);
2158 if (tunnel_src
!= 0)
2165 * Send the packet on physical interface.
2166 * Caller assumes can continue to use mp on return.
2170 phyint_send(ipha_t
*ipha
, mblk_t
*mp
, struct vif
*vifp
, ipaddr_t dst
)
2173 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
2174 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2176 /* Make a new reference to the packet */
2177 mp_copy
= copymsg(mp
); /* TODO could copy header and dup rest */
2178 if (mp_copy
== NULL
) {
2179 ipst
->ips_mrtstat
->mrts_fwd_drop
++;
2180 ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n"));
2183 if (vifp
->v_rate_limit
<= 0)
2184 tbf_send_packet(vifp
, mp_copy
);
2186 if (ipst
->ips_ip_mrtdebug
> 1) {
2187 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2188 "phyint_send: tbf_contr rate %d "
2189 "vifp 0x%p mp 0x%p dst 0x%x",
2190 vifp
->v_rate_limit
, (void *)vifp
, (void *)mp
, dst
);
2192 tbf_control(vifp
, mp_copy
, (ipha_t
*)mp_copy
->b_rptr
);
2197 * Send the whole packet for REGISTER encapsulation to PIM daemon
2198 * Caller assumes it can continue to use mp on return.
2202 register_send(ipha_t
*ipha
, mblk_t
*mp
, struct vif
*vifp
, ipaddr_t dst
)
2207 ill_t
*ill
= vifp
->v_ipif
->ipif_ill
;
2208 ip_stack_t
*ipst
= ill
->ill_ipst
;
2209 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2210 ip_recv_attr_t iras
;
2212 if (ipst
->ips_ip_mrtdebug
> 1) {
2213 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2214 "register_send: src %x, dst %x\n",
2215 ntohl(ipha
->ipha_src
), ntohl(ipha
->ipha_dst
));
2219 * Copy the old packet & pullup its IP header into the new mblk_t so we
2220 * can modify it. Try to fill the new mblk_t since if we don't the
2221 * ethernet driver will.
2223 mp_copy
= allocb(sizeof (struct igmpmsg
) + sizeof (ipha_t
), BPRI_MED
);
2224 if (mp_copy
== NULL
) {
2225 ++ipst
->ips_mrtstat
->mrts_pim_nomemory
;
2226 if (ipst
->ips_ip_mrtdebug
> 3) {
2227 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2228 "register_send: allocb failure.");
2234 * Bump write pointer to account for igmpmsg being added.
2236 mp_copy
->b_wptr
= mp_copy
->b_rptr
+ sizeof (struct igmpmsg
);
2239 * Chain packet to new mblk_t.
2241 if ((mp_copy
->b_cont
= copymsg(mp
)) == NULL
) {
2242 ++ipst
->ips_mrtstat
->mrts_pim_nomemory
;
2243 if (ipst
->ips_ip_mrtdebug
> 3) {
2244 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2245 "register_send: copymsg failure.");
2252 * icmp_input() asserts that IP version field is set to an
2253 * appropriate version. Hence, the struct igmpmsg that this really
2254 * becomes, needs to have the correct IP version field.
2256 ipha_copy
= (ipha_t
*)mp_copy
->b_rptr
;
2257 *ipha_copy
= multicast_encap_iphdr
;
2260 * The kernel uses the struct igmpmsg header to encode the messages to
2261 * the multicast routing daemon. Fill in the fields in the header
2262 * starting with the message type which is IGMPMSG_WHOLEPKT
2264 im
= (struct igmpmsg
*)mp_copy
->b_rptr
;
2265 im
->im_msgtype
= IGMPMSG_WHOLEPKT
;
2266 im
->im_src
.s_addr
= ipha
->ipha_src
;
2267 im
->im_dst
.s_addr
= ipha
->ipha_dst
;
2270 * Must Be Zero. This is because the struct igmpmsg is really an IP
2271 * header with renamed fields and the multicast routing daemon uses
2272 * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages.
2276 ++ipst
->ips_mrtstat
->mrts_upcalls
;
2277 if (IPCL_IS_NONSTR(mrouter
) ? mrouter
->conn_flow_cntrld
:
2278 !canputnext(mrouter
->conn_rq
)) {
2279 ++ipst
->ips_mrtstat
->mrts_pim_regsend_drops
;
2280 if (ipst
->ips_ip_mrtdebug
> 3) {
2281 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2282 "register_send: register upcall failure.");
2284 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2285 ip_drop_input("mrts_pim_regsend_drops", mp_copy
, ill
);
2289 bzero(&iras
, sizeof (iras
));
2290 iras
.ira_flags
= IRAF_IS_IPV4
;
2291 iras
.ira_ip_hdr_length
= sizeof (ipha_t
);
2292 iras
.ira_pktlen
= msgdsize(mp_copy
);
2293 iras
.ira_ttl
= ipha
->ipha_ttl
;
2294 (mrouter
->conn_recv
)(mrouter
, mp_copy
, NULL
, &iras
);
2295 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
2300 * pim_validate_cksum handles verification of the checksum in the
2301 * pim header. For PIM Register packets, the checksum is calculated
2302 * across the PIM header only. For all other packets, the checksum
2303 * is for the PIM header and remainder of the packet.
2305 * returns: B_TRUE, if checksum is okay.
2306 * B_FALSE, if checksum is not valid.
2309 pim_validate_cksum(mblk_t
*mp
, ipha_t
*ip
, struct pim
*pimp
)
2313 if ((mp_dup
= dupmsg(mp
)) == NULL
)
2316 mp_dup
->b_rptr
+= IPH_HDR_LENGTH(ip
);
2317 if (pimp
->pim_type
== PIM_REGISTER
)
2318 mp_dup
->b_wptr
= mp_dup
->b_rptr
+ PIM_MINLEN
;
2319 if (IP_CSUM(mp_dup
, 0, 0)) {
2328 * Process PIM protocol packets i.e. IP Protocol 103.
2329 * Register messages are decapsulated and sent onto multicast forwarding.
2331 * Return NULL for a bad packet that is discarded here.
2332 * Return mp if the message is OK and should be handed to "raw" receivers.
2333 * Callers of pim_input() may need to reinitialize variables that were copied
2334 * from the mblk as this calls pullupmsg().
2337 pim_input(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2340 int iplen
, pimlen
, iphlen
;
2341 struct pim
*pimp
; /* pointer to a pim struct */
2343 ill_t
*ill
= ira
->ira_ill
;
2344 ip_stack_t
*ipst
= ill
->ill_ipst
;
2345 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2348 * Pullup the msg for PIM protocol processing.
2350 if (pullupmsg(mp
, -1) == 0) {
2351 ++ipst
->ips_mrtstat
->mrts_pim_nomemory
;
2352 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2353 ip_drop_input("mrts_pim_nomemory", mp
, ill
);
2358 ip
= (ipha_t
*)mp
->b_rptr
;
2359 iplen
= ip
->ipha_length
;
2360 iphlen
= IPH_HDR_LENGTH(ip
);
2361 pimlen
= ntohs(iplen
) - iphlen
;
2366 if (pimlen
< PIM_MINLEN
) {
2367 ++ipst
->ips_mrtstat
->mrts_pim_malformed
;
2368 if (ipst
->ips_ip_mrtdebug
> 1) {
2369 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2370 "pim_input: length not at least minlen");
2372 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2373 ip_drop_input("mrts_pim_malformed", mp
, ill
);
2379 * Point to the PIM header.
2381 pimp
= (struct pim
*)((caddr_t
)ip
+ iphlen
);
2384 * Check the version number.
2386 if (pimp
->pim_vers
!= PIM_VERSION
) {
2387 ++ipst
->ips_mrtstat
->mrts_pim_badversion
;
2388 if (ipst
->ips_ip_mrtdebug
> 1) {
2389 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2390 "pim_input: unknown version of PIM");
2392 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2393 ip_drop_input("mrts_pim_badversion", mp
, ill
);
2399 * Validate the checksum
2401 if (!pim_validate_cksum(mp
, ip
, pimp
)) {
2402 ++ipst
->ips_mrtstat
->mrts_pim_rcv_badcsum
;
2403 if (ipst
->ips_ip_mrtdebug
> 1) {
2404 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2405 "pim_input: invalid checksum");
2407 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2408 ip_drop_input("pim_rcv_badcsum", mp
, ill
);
2413 if (pimp
->pim_type
!= PIM_REGISTER
)
2416 reghdr
= (uint32_t *)(pimp
+ 1);
2417 eip
= (ipha_t
*)(reghdr
+ 1);
2420 * check if the inner packet is destined to mcast group
2422 if (!CLASSD(eip
->ipha_dst
)) {
2423 ++ipst
->ips_mrtstat
->mrts_pim_badregisters
;
2424 if (ipst
->ips_ip_mrtdebug
> 1) {
2425 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2426 "pim_input: Inner pkt not mcast .. !");
2428 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2429 ip_drop_input("mrts_pim_badregisters", mp
, ill
);
2433 if (ipst
->ips_ip_mrtdebug
> 1) {
2434 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2435 "register from %x, to %x, len %d",
2436 ntohl(eip
->ipha_src
),
2437 ntohl(eip
->ipha_dst
),
2438 ntohs(eip
->ipha_length
));
2441 * If the null register bit is not set, decapsulate
2442 * the packet before forwarding it.
2443 * Avoid this in no register vif
2445 if (!(ntohl(*reghdr
) & PIM_NULL_REGISTER
) &&
2446 ipst
->ips_reg_vif_num
!= ALL_VIFS
) {
2448 uint_t saved_pktlen
;
2450 /* Copy the message */
2451 if ((mp_copy
= copymsg(mp
)) == NULL
) {
2452 ++ipst
->ips_mrtstat
->mrts_pim_nomemory
;
2453 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2454 ip_drop_input("mrts_pim_nomemory", mp
, ill
);
2460 * Decapsulate the packet and give it to
2461 * register_mforward.
2463 mp_copy
->b_rptr
+= iphlen
+ sizeof (pim_t
) + sizeof (*reghdr
);
2464 saved_pktlen
= ira
->ira_pktlen
;
2465 ira
->ira_pktlen
-= iphlen
+ sizeof (pim_t
) + sizeof (*reghdr
);
2466 if (register_mforward(mp_copy
, ira
) != 0) {
2467 /* register_mforward already called ip_drop_input */
2469 ira
->ira_pktlen
= saved_pktlen
;
2472 ira
->ira_pktlen
= saved_pktlen
;
2476 * Pass all valid PIM packets up to any process(es) listening on a raw
2477 * PIM socket. For Solaris it is done right after pim_input() is
2484 * PIM sparse mode hook. Called by pim_input after decapsulating
2485 * the packet. Loop back the packet, as if we have received it.
2486 * In pim_input() we have to check if the destination is a multicast address.
2489 register_mforward(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2492 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
2493 ill_t
*ill
= ira
->ira_ill
;
2494 ip_stack_t
*ipst
= ill
->ill_ipst
;
2495 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2497 ASSERT(ipst
->ips_reg_vif_num
<= ipst
->ips_numvifs
);
2499 if (ipst
->ips_ip_mrtdebug
> 3) {
2500 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2501 "register_mforward: src %x, dst %x\n",
2502 ntohl(ipha
->ipha_src
), ntohl(ipha
->ipha_dst
));
2505 * Need to pass in to ip_mforward() the information that the
2506 * packet has arrived on the register_vif. We mark it with
2507 * the IRAF_PIM_REGISTER attribute.
2508 * pim_input verified that the (inner) destination is multicast,
2509 * hence we skip the generic code in ip_input.
2511 ira
->ira_flags
|= IRAF_PIM_REGISTER
;
2512 ++ipst
->ips_mrtstat
->mrts_pim_regforwards
;
2514 if (!CLASSD(ipha
->ipha_dst
)) {
2515 ire
= ire_route_recursive_v4(ipha
->ipha_dst
, 0, NULL
, ALL_ZONES
,
2516 ira
->ira_tsl
, MATCH_IRE_SECATTR
, IRR_ALLOCATE
, 0, ipst
,
2519 ire
= ire_multicast(ill
);
2521 ASSERT(ire
!= NULL
);
2522 /* Normally this will return the IRE_MULTICAST */
2523 if (ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
2524 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2525 ip_drop_input("mrts_pim RTF_REJECT", mp
, ill
);
2530 ASSERT(ire
->ire_type
& IRE_MULTICAST
);
2531 (*ire
->ire_recvfn
)(ire
, mp
, ipha
, ira
);
2538 * Send an encapsulated packet.
2539 * Caller assumes can continue to use mp when routine returns.
2543 encap_send(ipha_t
*ipha
, mblk_t
*mp
, struct vif
*vifp
, ipaddr_t dst
)
2548 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
2549 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2551 if (ipst
->ips_ip_mrtdebug
> 1) {
2552 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2553 "encap_send: vif %ld enter",
2554 (ptrdiff_t)(vifp
- ipst
->ips_vifs
));
2556 len
= ntohs(ipha
->ipha_length
);
2559 * Copy the old packet & pullup it's IP header into the
2560 * new mbuf so we can modify it. Try to fill the new
2561 * mbuf since if we don't the ethernet driver will.
2563 mp_copy
= allocb(32 + sizeof (multicast_encap_iphdr
), BPRI_MED
);
2564 if (mp_copy
== NULL
)
2566 mp_copy
->b_rptr
+= 32;
2567 mp_copy
->b_wptr
= mp_copy
->b_rptr
+ sizeof (multicast_encap_iphdr
);
2568 if ((mp_copy
->b_cont
= copymsg(mp
)) == NULL
) {
2574 * Fill in the encapsulating IP header.
2575 * Remote tunnel dst in rmt_addr, from add_vif().
2577 ipha_copy
= (ipha_t
*)mp_copy
->b_rptr
;
2578 *ipha_copy
= multicast_encap_iphdr
;
2579 ASSERT((len
+ sizeof (ipha_t
)) <= IP_MAXPACKET
);
2580 ipha_copy
->ipha_length
= htons(len
+ sizeof (ipha_t
));
2581 ipha_copy
->ipha_src
= vifp
->v_lcl_addr
.s_addr
;
2582 ipha_copy
->ipha_dst
= vifp
->v_rmt_addr
.s_addr
;
2583 ASSERT(ipha_copy
->ipha_ident
== 0);
2585 /* Turn the encapsulated IP header back into a valid one. */
2586 ipha
= (ipha_t
*)mp_copy
->b_cont
->b_rptr
;
2588 ipha
->ipha_hdr_checksum
= 0;
2589 ipha
->ipha_hdr_checksum
= ip_csum_hdr(ipha
);
2591 ipha_copy
->ipha_ttl
= ipha
->ipha_ttl
;
2593 if (ipst
->ips_ip_mrtdebug
> 1) {
2594 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2595 "encap_send: group 0x%x", ntohl(ipha
->ipha_dst
));
2597 if (vifp
->v_rate_limit
<= 0)
2598 tbf_send_packet(vifp
, mp_copy
);
2600 /* ipha is from the original header */
2601 tbf_control(vifp
, mp_copy
, ipha
);
2605 * De-encapsulate a packet and feed it back through IP input if it
2606 * matches one of our multicast tunnels.
2608 * This routine is called whenever IP gets a packet with prototype
2609 * IPPROTO_ENCAP and a local destination address and the packet didn't
2610 * match one of our configured IP-in-IP tunnels.
2613 ip_mroute_decap(mblk_t
*mp
, ip_recv_attr_t
*ira
)
2615 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
2617 int hlen
= IPH_HDR_LENGTH(ipha
);
2622 ill_t
*ill
= ira
->ira_ill
;
2623 ip_stack_t
*ipst
= ill
->ill_ipst
;
2624 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2626 /* Make sure we have all of the inner header */
2627 ipha_encap
= (ipha_t
*)((char *)ipha
+ hlen
);
2628 if (mp
->b_wptr
- mp
->b_rptr
< hlen
+ IP_SIMPLE_HDR_LENGTH
) {
2629 ipha
= ip_pullup(mp
, hlen
+ IP_SIMPLE_HDR_LENGTH
, ira
);
2631 ipst
->ips_mrtstat
->mrts_bad_tunnel
++;
2632 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2633 ip_drop_input("ip_mroute_decap: too short", mp
, ill
);
2637 ipha_encap
= (ipha_t
*)((char *)ipha
+ hlen
);
2639 hlen_encap
= IPH_HDR_LENGTH(ipha_encap
);
2640 if (mp
->b_wptr
- mp
->b_rptr
< hlen
+ hlen_encap
) {
2641 ipha
= ip_pullup(mp
, hlen
+ hlen_encap
, ira
);
2643 ipst
->ips_mrtstat
->mrts_bad_tunnel
++;
2644 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2645 ip_drop_input("ip_mroute_decap: too short", mp
, ill
);
2649 ipha_encap
= (ipha_t
*)((char *)ipha
+ hlen
);
2653 * Dump the packet if it's not to a multicast destination or if
2654 * we don't have an encapsulating tunnel with the source.
2655 * Note: This code assumes that the remote site IP address
2656 * uniquely identifies the tunnel (i.e., that this site has
2657 * at most one tunnel with the remote site).
2659 if (!CLASSD(ipha_encap
->ipha_dst
)) {
2660 ipst
->ips_mrtstat
->mrts_bad_tunnel
++;
2661 ip1dbg(("ip_mroute_decap: bad tunnel\n"));
2662 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2663 ip_drop_input("mrts_bad_tunnel", mp
, ill
);
2667 src
= (ipaddr_t
)ipha
->ipha_src
;
2668 mutex_enter(&ipst
->ips_last_encap_lock
);
2669 if (src
!= ipst
->ips_last_encap_src
) {
2672 vifp
= ipst
->ips_vifs
;
2673 vife
= vifp
+ ipst
->ips_numvifs
;
2674 ipst
->ips_last_encap_src
= src
;
2675 ipst
->ips_last_encap_vif
= 0;
2676 for (; vifp
< vife
; ++vifp
) {
2677 if (!lock_good_vif(vifp
))
2679 if (vifp
->v_rmt_addr
.s_addr
== src
) {
2680 if (vifp
->v_flags
& VIFF_TUNNEL
)
2681 ipst
->ips_last_encap_vif
= vifp
;
2682 if (ipst
->ips_ip_mrtdebug
> 1) {
2683 (void) mi_strlog(mrouter
->conn_rq
,
2685 "ip_mroute_decap: good tun "
2687 (ptrdiff_t)(vifp
- ipst
->ips_vifs
),
2690 unlock_good_vif(vifp
);
2693 unlock_good_vif(vifp
);
2696 if ((vifp
= ipst
->ips_last_encap_vif
) == 0) {
2697 mutex_exit(&ipst
->ips_last_encap_lock
);
2698 ipst
->ips_mrtstat
->mrts_bad_tunnel
++;
2699 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2700 ip_drop_input("mrts_bad_tunnel", mp
, ill
);
2702 ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n",
2703 (ptrdiff_t)(vifp
- ipst
->ips_vifs
), ntohl(src
)));
2706 mutex_exit(&ipst
->ips_last_encap_lock
);
2709 * Need to pass in the tunnel source to ip_mforward (so that it can
2710 * verify that the packet arrived over the correct vif.)
2712 ira
->ira_flags
|= IRAF_MROUTE_TUNNEL_SET
;
2713 ira
->ira_mroute_tunnel
= src
;
2715 ira
->ira_pktlen
-= hlen
;
2716 ira
->ira_ip_hdr_length
= hlen_encap
;
2719 * We don't redo any of the filtering in ill_input_full_v4 and we
2720 * have checked that all of ipha_encap and any IP options are
2721 * pulled up. Hence we call ire_recv_multicast_v4 directly.
2722 * However, we have to check for RSVP as in ip_input_full_v4
2723 * and if so we pass it to ire_recv_broadcast_v4 for local delivery
2726 if (ipha_encap
->ipha_protocol
== IPPROTO_RSVP
&&
2727 ipst
->ips_ipcl_proto_fanout_v4
[IPPROTO_RSVP
].connf_head
!= NULL
) {
2728 ire
= ire_route_recursive_v4(INADDR_BROADCAST
, 0, ill
,
2729 ALL_ZONES
, ira
->ira_tsl
, MATCH_IRE_ILL
|MATCH_IRE_SECATTR
,
2730 IRR_ALLOCATE
, 0, ipst
, NULL
, NULL
, NULL
);
2732 ire
= ire_multicast(ill
);
2734 ASSERT(ire
!= NULL
);
2735 /* Normally this will return the IRE_MULTICAST or IRE_BROADCAST */
2736 if (ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
2737 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2738 ip_drop_input("ip_mroute_decap: RTF_REJECT", mp
, ill
);
2743 ire
->ire_ib_pkt_count
++;
2744 ASSERT(ire
->ire_type
& (IRE_MULTICAST
|IRE_BROADCAST
));
2745 (*ire
->ire_recvfn
)(ire
, mp
, ipha_encap
, ira
);
2750 * Remove all records with v_ipif == ipif. Called when an interface goes away
2751 * (stream closed). Called as writer.
2754 reset_mrt_vif_ipif(ipif_t
*ipif
)
2756 vifi_t vifi
, tmp_vifi
;
2758 ip_stack_t
*ipst
= ipif
->ipif_ill
->ill_ipst
;
2760 /* Can't check vifi >= 0 since vifi_t is unsigned! */
2762 mutex_enter(&ipst
->ips_numvifs_mutex
);
2763 num_of_vifs
= ipst
->ips_numvifs
;
2764 mutex_exit(&ipst
->ips_numvifs_mutex
);
2766 for (vifi
= num_of_vifs
; vifi
!= 0; vifi
--) {
2767 tmp_vifi
= vifi
- 1;
2768 if (ipst
->ips_vifs
[tmp_vifi
].v_ipif
== ipif
) {
2769 (void) del_vif(&tmp_vifi
, ipst
);
2774 /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */
2776 reset_mrt_ill(ill_t
*ill
)
2781 ip_stack_t
*ipst
= ill
->ill_ipst
;
2782 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2785 for (i
= 0; i
< MFCTBLSIZ
; i
++) {
2786 MFCB_REFHOLD(&ipst
->ips_mfcs
[i
]);
2787 if ((rt
= ipst
->ips_mfcs
[i
].mfcb_mfc
) != NULL
) {
2788 if (ipst
->ips_ip_mrtdebug
> 1) {
2789 (void) mi_strlog(mrouter
->conn_rq
, 1,
2791 "reset_mrt_ill: mfctable [%d]", i
);
2793 while (rt
!= NULL
) {
2794 mutex_enter(&rt
->mfc_mutex
);
2795 while ((rte
= rt
->mfc_rte
) != NULL
) {
2796 if (rte
->ill
== ill
&&
2797 (id
= rt
->mfc_timeout_id
) != 0) {
2799 * Its ok to drop the lock, the
2800 * struct cannot be freed since
2801 * we have a ref on the hash
2804 mutex_exit(&rt
->mfc_mutex
);
2805 (void) untimeout(id
);
2806 mutex_enter(&rt
->mfc_mutex
);
2808 if (rte
->ill
== ill
) {
2809 if (ipst
->ips_ip_mrtdebug
> 1) {
2814 "ill 0x%p", (void *)ill
);
2816 rt
->mfc_rte
= rte
->rte_next
;
2818 mi_free((char *)rte
);
2821 mutex_exit(&rt
->mfc_mutex
);
2825 MFCB_REFRELE(&ipst
->ips_mfcs
[i
]);
2830 * Token bucket filter module.
2831 * The ipha is for mcastgrp destination for phyint and encap.
2834 tbf_control(struct vif
*vifp
, mblk_t
*mp
, ipha_t
*ipha
)
2836 size_t p_len
= msgdsize(mp
);
2837 struct tbf
*t
= vifp
->v_tbf
;
2838 timeout_id_t id
= 0;
2839 ill_t
*ill
= vifp
->v_ipif
->ipif_ill
;
2840 ip_stack_t
*ipst
= ill
->ill_ipst
;
2841 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2843 /* Drop if packet is too large */
2844 if (p_len
> MAX_BKT_SIZE
) {
2845 ipst
->ips_mrtstat
->mrts_pkt2large
++;
2846 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
2847 ip_drop_output("tbf_control - too large", mp
, ill
);
2851 if (ipst
->ips_ip_mrtdebug
> 1) {
2852 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2853 "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x",
2854 (ptrdiff_t)(vifp
- ipst
->ips_vifs
), t
->tbf_q_len
,
2855 ntohl(ipha
->ipha_dst
));
2858 mutex_enter(&t
->tbf_lock
);
2860 tbf_update_tokens(vifp
);
2863 * If there are enough tokens,
2864 * and the queue is empty, send this packet out.
2866 if (ipst
->ips_ip_mrtdebug
> 1) {
2867 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2868 "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d",
2869 (ptrdiff_t)(vifp
- ipst
->ips_vifs
), t
->tbf_n_tok
, p_len
,
2872 /* No packets are queued */
2873 if (t
->tbf_q_len
== 0) {
2874 /* queue empty, send packet if enough tokens */
2875 if (p_len
<= t
->tbf_n_tok
) {
2876 t
->tbf_n_tok
-= p_len
;
2877 mutex_exit(&t
->tbf_lock
);
2878 tbf_send_packet(vifp
, mp
);
2881 /* Queue packet and timeout till later */
2882 tbf_queue(vifp
, mp
);
2883 ASSERT(vifp
->v_timeout_id
== 0);
2884 vifp
->v_timeout_id
= timeout(tbf_reprocess_q
, vifp
,
2887 } else if (t
->tbf_q_len
< t
->tbf_max_q_len
) {
2888 /* Finite queue length, so queue pkts and process queue */
2889 tbf_queue(vifp
, mp
);
2890 tbf_process_q(vifp
);
2892 /* Check that we have UDP header with IP header */
2893 size_t hdr_length
= IPH_HDR_LENGTH(ipha
) +
2894 sizeof (struct udphdr
);
2896 if ((mp
->b_wptr
- mp
->b_rptr
) < hdr_length
) {
2897 if (!pullupmsg(mp
, hdr_length
)) {
2898 BUMP_MIB(ill
->ill_ip_mib
,
2899 ipIfStatsOutDiscards
);
2900 ip_drop_output("tbf_control - pullup", mp
, ill
);
2902 ip1dbg(("tbf_ctl: couldn't pullup udp hdr, "
2903 "vif %ld src 0x%x dst 0x%x\n",
2904 (ptrdiff_t)(vifp
- ipst
->ips_vifs
),
2905 ntohl(ipha
->ipha_src
),
2906 ntohl(ipha
->ipha_dst
)));
2907 mutex_exit(&vifp
->v_tbf
->tbf_lock
);
2910 /* Have to reassign ipha after pullupmsg */
2911 ipha
= (ipha_t
*)mp
->b_rptr
;
2914 * Queue length too much,
2915 * try to selectively dq, or queue and process
2917 if (!tbf_dq_sel(vifp
, ipha
)) {
2918 ipst
->ips_mrtstat
->mrts_q_overflow
++;
2919 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
2920 ip_drop_output("mrts_q_overflow", mp
, ill
);
2923 tbf_queue(vifp
, mp
);
2924 tbf_process_q(vifp
);
2927 if (t
->tbf_q_len
== 0) {
2928 id
= vifp
->v_timeout_id
;
2929 vifp
->v_timeout_id
= 0;
2931 mutex_exit(&vifp
->v_tbf
->tbf_lock
);
2933 (void) untimeout(id
);
2937 * Adds a packet to the tbf queue at the interface.
2938 * The ipha is for mcastgrp destination for phyint and encap.
2941 tbf_queue(struct vif
*vifp
, mblk_t
*mp
)
2943 struct tbf
*t
= vifp
->v_tbf
;
2944 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
2945 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2947 if (ipst
->ips_ip_mrtdebug
> 1) {
2948 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2949 "tbf_queue: vif %ld", (ptrdiff_t)(vifp
- ipst
->ips_vifs
));
2951 ASSERT(MUTEX_HELD(&t
->tbf_lock
));
2953 if (t
->tbf_t
== NULL
) {
2954 /* Queue was empty */
2957 /* Insert at tail */
2958 t
->tbf_t
->b_next
= mp
;
2960 /* set new tail pointer */
2963 mp
->b_next
= mp
->b_prev
= NULL
;
2969 * Process the queue at the vif interface.
2970 * Drops the tbf_lock when sending packets.
2972 * NOTE : The caller should quntimeout if the queue length is 0.
2975 tbf_process_q(struct vif
*vifp
)
2978 struct tbf
*t
= vifp
->v_tbf
;
2980 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
2981 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
2983 if (ipst
->ips_ip_mrtdebug
> 1) {
2984 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
2985 "tbf_process_q 1: vif %ld qlen = %d",
2986 (ptrdiff_t)(vifp
- ipst
->ips_vifs
), t
->tbf_q_len
);
2990 * Loop through the queue at the interface and send
2991 * as many packets as possible.
2993 ASSERT(MUTEX_HELD(&t
->tbf_lock
));
2995 while (t
->tbf_q_len
> 0) {
2997 len
= (size_t)msgdsize(mp
); /* length of ip pkt */
2999 /* Determine if the packet can be sent */
3000 if (len
<= t
->tbf_n_tok
) {
3002 * If so, reduce no. of tokens, dequeue the packet,
3005 t
->tbf_n_tok
-= len
;
3007 t
->tbf_q
= mp
->b_next
;
3008 if (--t
->tbf_q_len
== 0) {
3012 /* Exit mutex before sending packet, then re-enter */
3013 mutex_exit(&t
->tbf_lock
);
3014 tbf_send_packet(vifp
, mp
);
3015 mutex_enter(&t
->tbf_lock
);
3021 /* Called at tbf timeout to update tokens, process q and reset timer. */
3023 tbf_reprocess_q(void *arg
)
3025 struct vif
*vifp
= arg
;
3026 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
3027 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
3029 mutex_enter(&vifp
->v_tbf
->tbf_lock
);
3030 vifp
->v_timeout_id
= 0;
3031 tbf_update_tokens(vifp
);
3033 tbf_process_q(vifp
);
3035 if (vifp
->v_tbf
->tbf_q_len
> 0) {
3036 vifp
->v_timeout_id
= timeout(tbf_reprocess_q
, vifp
,
3039 mutex_exit(&vifp
->v_tbf
->tbf_lock
);
3041 if (ipst
->ips_ip_mrtdebug
> 1) {
3042 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
3043 "tbf_reprcess_q: vif %ld timeout id = %p",
3044 (ptrdiff_t)(vifp
- ipst
->ips_vifs
), vifp
->v_timeout_id
);
3049 * Function that will selectively discard a member of the tbf queue,
3050 * based on the precedence value and the priority.
3052 * NOTE : The caller should quntimeout if the queue length is 0.
3055 tbf_dq_sel(struct vif
*vifp
, ipha_t
*ipha
)
3058 struct tbf
*t
= vifp
->v_tbf
;
3061 ill_t
*ill
= vifp
->v_ipif
->ipif_ill
;
3062 ip_stack_t
*ipst
= ill
->ill_ipst
;
3063 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
3065 if (ipst
->ips_ip_mrtdebug
> 1) {
3066 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
3067 "dq_sel: vif %ld dst 0x%x",
3068 (ptrdiff_t)(vifp
- ipst
->ips_vifs
), ntohl(ipha
->ipha_dst
));
3071 ASSERT(MUTEX_HELD(&t
->tbf_lock
));
3072 p
= priority(vifp
, ipha
);
3076 while ((mp
= *np
) != NULL
) {
3077 if (p
> (priority(vifp
, (ipha_t
*)mp
->b_rptr
))) {
3079 /* If removing the last packet, fix the tail pointer */
3082 mp
->b_prev
= mp
->b_next
= NULL
;
3083 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
3084 ip_drop_output("tbf_dq_send", mp
, ill
);
3087 * It's impossible for the queue to be empty, but
3090 if (--t
->tbf_q_len
== 0) {
3093 ipst
->ips_mrtstat
->mrts_drop_sel
++;
3102 /* Sends packet, 2 cases - encap tunnel, phyint. */
3104 tbf_send_packet(struct vif
*vifp
, mblk_t
*mp
)
3106 ipif_t
*ipif
= vifp
->v_ipif
;
3107 ill_t
*ill
= ipif
->ipif_ill
;
3108 ip_stack_t
*ipst
= ill
->ill_ipst
;
3109 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
3112 ipha
= (ipha_t
*)mp
->b_rptr
;
3113 /* If encap tunnel options */
3114 if (vifp
->v_flags
& VIFF_TUNNEL
) {
3115 ip_xmit_attr_t ixas
;
3117 if (ipst
->ips_ip_mrtdebug
> 1) {
3118 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
3119 "tbf_send_packet: ENCAP tunnel vif %ld",
3120 (ptrdiff_t)(vifp
- ipst
->ips_vifs
));
3122 bzero(&ixas
, sizeof (ixas
));
3124 IXAF_IS_IPV4
| IXAF_NO_TTL_CHANGE
| IXAF_VERIFY_SOURCE
;
3125 ixas
.ixa_ipst
= ipst
;
3126 ixas
.ixa_ifindex
= 0;
3127 ixas
.ixa_cred
= kcred
;
3128 ixas
.ixa_cpid
= NOPID
;
3129 ixas
.ixa_tsl
= NULL
;
3130 ixas
.ixa_zoneid
= GLOBAL_ZONEID
; /* Multicast router in GZ */
3131 ixas
.ixa_pktlen
= ntohs(ipha
->ipha_length
);
3132 ixas
.ixa_ip_hdr_length
= IPH_HDR_LENGTH(ipha
);
3135 * Feed into ip_output_simple which will set the ident field
3136 * and checksum the encapsulating header.
3137 * BSD gets the cached route vifp->v_route from ip_output()
3138 * to speed up route table lookups. Not necessary in SunOS 5.x.
3139 * One could make multicast forwarding faster by putting an
3140 * ip_xmit_attr_t in each vif thereby caching the ire/nce.
3142 (void) ip_output_simple(mp
, &ixas
);
3148 /* Need to loop back to members on the outgoing interface. */
3150 ip_recv_attr_t iras
;
3153 bzero(&iras
, sizeof (iras
));
3154 iras
.ira_flags
= IRAF_IS_IPV4
;
3155 iras
.ira_ill
= iras
.ira_rill
= ill
;
3156 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
3157 iras
.ira_zoneid
= GLOBAL_ZONEID
; /* Multicast router in GZ */
3158 iras
.ira_pktlen
= ntohs(ipha
->ipha_length
);
3159 iras
.ira_ip_hdr_length
= IPH_HDR_LENGTH(ipha
);
3161 dst
= ipha
->ipha_dst
;
3162 if (ill_hasmembers_v4(ill
, dst
)) {
3163 iras
.ira_flags
|= IRAF_LOOPBACK_COPY
;
3165 if (ipst
->ips_ip_mrtdebug
> 1) {
3166 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
3167 "tbf_send_pkt: phyint forward vif %ld dst = 0x%x",
3168 (ptrdiff_t)(vifp
- ipst
->ips_vifs
), ntohl(dst
));
3171 * Find an NCE which matches the nexthop.
3172 * For a pt-pt interface we use the other end of the pt-pt
3175 if (ipif
->ipif_flags
& IPIF_POINTOPOINT
) {
3176 dst
= ipif
->ipif_pp_dst_addr
;
3177 nce
= arp_nce_init(ill
, dst
, ill
->ill_net_type
);
3179 nce
= arp_nce_init(ill
, dst
, IRE_MULTICAST
);
3182 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
3183 ip_drop_output("tbf_send_packet - no nce", mp
, ill
);
3189 * We don't remeber the incoming ill. Thus we
3190 * pretend the packet arrived on the outbound ill. This means
3191 * statistics for input errors will be increased on the wrong
3192 * ill but that isn't a big deal.
3194 ip_forward_xmit_v4(nce
, ill
, mp
, ipha
, &iras
, ill
->ill_mc_mtu
,
3196 ASSERT(!(iras
.ira_flags
& IRAF_IPSEC_SECURE
));
3203 * Determine the current time and then the elapsed time (between the last time
3204 * and time now). Update the no. of tokens in the bucket.
3207 tbf_update_tokens(struct vif
*vifp
)
3211 struct tbf
*t
= vifp
->v_tbf
;
3212 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
3213 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
3215 ASSERT(MUTEX_HELD(&t
->tbf_lock
));
3217 /* Time in secs and nsecs, rate limit in kbits/sec */
3221 TV_DELTA(tp
, t
->tbf_last_pkt_t
, tm
);
3224 * This formula is actually
3225 * "time in seconds" * "bytes/second". Scaled for nsec.
3226 * (tm/1000000000) * (v_rate_limit * 1000 * (1000/1024) /8)
3228 * The (1000/1024) was introduced in add_vif to optimize
3229 * this divide into a shift.
3231 t
->tbf_n_tok
+= (tm
/1000) * vifp
->v_rate_limit
/ 1024 / 8;
3232 t
->tbf_last_pkt_t
= tp
;
3234 if (t
->tbf_n_tok
> MAX_BKT_SIZE
)
3235 t
->tbf_n_tok
= MAX_BKT_SIZE
;
3236 if (ipst
->ips_ip_mrtdebug
> 1) {
3237 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
3238 "tbf_update_tok: tm %lld tok %d vif %ld",
3239 tm
, t
->tbf_n_tok
, (ptrdiff_t)(vifp
- ipst
->ips_vifs
));
3244 * Priority currently is based on port nos.
3245 * Different forwarding mechanisms have different ways
3246 * of obtaining the port no. Hence, the vif must be
3247 * given along with the packet itself.
3251 priority(struct vif
*vifp
, ipha_t
*ipha
)
3254 ip_stack_t
*ipst
= vifp
->v_ipif
->ipif_ill
->ill_ipst
;
3255 conn_t
*mrouter
= ipst
->ips_ip_g_mrouter
;
3257 /* Temporary hack; may add general packet classifier some day */
3259 ASSERT(MUTEX_HELD(&vifp
->v_tbf
->tbf_lock
));
3262 * The UDP port space is divided up into four priority ranges:
3263 * [0, 16384) : unclassified - lowest priority
3264 * [16384, 32768) : audio - highest priority
3265 * [32768, 49152) : whiteboard - medium priority
3266 * [49152, 65536) : video - low priority
3269 if (ipha
->ipha_protocol
== IPPROTO_UDP
) {
3270 struct udphdr
*udp
=
3271 (struct udphdr
*)((char *)ipha
+ IPH_HDR_LENGTH(ipha
));
3272 switch (ntohs(udp
->uh_dport
) & 0xc000) {
3286 if (ipst
->ips_ip_mrtdebug
> 1) {
3287 (void) mi_strlog(mrouter
->conn_rq
, 1, SL_TRACE
,
3288 "priority: port %x prio %d\n",
3289 ntohs(udp
->uh_dport
), prio
);
3292 prio
= 50; /* default priority */
3297 * End of token bucket filter modifications
3303 * Produces data for netstat -M.
3306 ip_mroute_stats(mblk_t
*mp
, ip_stack_t
*ipst
)
3308 ipst
->ips_mrtstat
->mrts_vifctlSize
= sizeof (struct vifctl
);
3309 ipst
->ips_mrtstat
->mrts_mfcctlSize
= sizeof (struct mfcctl
);
3310 if (!snmp_append_data(mp
, (char *)ipst
->ips_mrtstat
,
3311 sizeof (struct mrtstat
))) {
3312 ip0dbg(("ip_mroute_stats: failed %ld bytes\n",
3313 (size_t)sizeof (struct mrtstat
)));
3320 * Sends info for SNMP's MIB.
3323 ip_mroute_vif(mblk_t
*mp
, ip_stack_t
*ipst
)
3328 mutex_enter(&ipst
->ips_numvifs_mutex
);
3329 for (vifi
= 0; vifi
< ipst
->ips_numvifs
; vifi
++) {
3330 if (ipst
->ips_vifs
[vifi
].v_lcl_addr
.s_addr
== 0)
3333 * No locks here, an approximation is fine.
3335 vi
.vifc_vifi
= vifi
;
3336 vi
.vifc_flags
= ipst
->ips_vifs
[vifi
].v_flags
;
3337 vi
.vifc_threshold
= ipst
->ips_vifs
[vifi
].v_threshold
;
3338 vi
.vifc_rate_limit
= ipst
->ips_vifs
[vifi
].v_rate_limit
;
3339 vi
.vifc_lcl_addr
= ipst
->ips_vifs
[vifi
].v_lcl_addr
;
3340 vi
.vifc_rmt_addr
= ipst
->ips_vifs
[vifi
].v_rmt_addr
;
3341 vi
.vifc_pkt_in
= ipst
->ips_vifs
[vifi
].v_pkt_in
;
3342 vi
.vifc_pkt_out
= ipst
->ips_vifs
[vifi
].v_pkt_out
;
3344 if (!snmp_append_data(mp
, (char *)&vi
, sizeof (vi
))) {
3345 ip0dbg(("ip_mroute_vif: failed %ld bytes\n",
3346 (size_t)sizeof (vi
)));
3347 mutex_exit(&ipst
->ips_numvifs_mutex
);
3351 mutex_exit(&ipst
->ips_numvifs_mutex
);
3356 * Called by ip_snmp_get to send up multicast routing table.
3359 ip_mroute_mrt(mblk_t
*mp
, ip_stack_t
*ipst
)
3366 * Make sure multicast has not been turned off.
3368 if (is_mrouter_off(ipst
))
3371 /* Loop over all hash buckets and their chains */
3372 for (i
= 0; i
< MFCTBLSIZ
; i
++) {
3373 MFCB_REFHOLD(&ipst
->ips_mfcs
[i
]);
3374 for (rt
= ipst
->ips_mfcs
[i
].mfcb_mfc
; rt
; rt
= rt
->mfc_next
) {
3375 mutex_enter(&rt
->mfc_mutex
);
3376 if (rt
->mfc_rte
!= NULL
||
3377 (rt
->mfc_marks
& MFCB_MARK_CONDEMNED
)) {
3378 mutex_exit(&rt
->mfc_mutex
);
3381 mfcc
.mfcc_origin
= rt
->mfc_origin
;
3382 mfcc
.mfcc_mcastgrp
= rt
->mfc_mcastgrp
;
3383 mfcc
.mfcc_parent
= rt
->mfc_parent
;
3384 mfcc
.mfcc_pkt_cnt
= rt
->mfc_pkt_cnt
;
3385 mutex_enter(&ipst
->ips_numvifs_mutex
);
3386 for (j
= 0; j
< (int)ipst
->ips_numvifs
; j
++)
3387 mfcc
.mfcc_ttls
[j
] = rt
->mfc_ttls
[j
];
3388 for (j
= (int)ipst
->ips_numvifs
; j
< MAXVIFS
; j
++)
3389 mfcc
.mfcc_ttls
[j
] = 0;
3390 mutex_exit(&ipst
->ips_numvifs_mutex
);
3392 mutex_exit(&rt
->mfc_mutex
);
3393 if (!snmp_append_data(mp
, (char *)&mfcc
,
3395 MFCB_REFRELE(&ipst
->ips_mfcs
[i
]);
3396 ip0dbg(("ip_mroute_mrt: failed %ld bytes\n",
3397 (size_t)sizeof (mfcc
)));
3401 MFCB_REFRELE(&ipst
->ips_mfcs
[i
]);