2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
74 static DEFINE_RWLOCK(mrt_lock
);
77 * Multicast router control variables
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82 static struct mfc_cache
*mfc_unres_queue
; /* Queue of unresolved entries */
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock
);
87 /* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
92 In this case data path is free of exclusive locks at all.
95 static struct kmem_cache
*mrt_cachep __read_mostly
;
97 static int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
);
98 static int ipmr_cache_report(struct net
*net
,
99 struct sk_buff
*pkt
, vifi_t vifi
, int assert);
100 static int ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
);
102 #ifdef CONFIG_IP_PIMSM_V2
103 static struct net_protocol pim_protocol
;
106 static struct timer_list ipmr_expire_timer
;
108 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
110 static void ipmr_del_tunnel(struct net_device
*dev
, struct vifctl
*v
)
112 struct net
*net
= dev_net(dev
);
116 dev
= __dev_get_by_name(net
, "tunl0");
118 const struct net_device_ops
*ops
= dev
->netdev_ops
;
120 struct ip_tunnel_parm p
;
122 memset(&p
, 0, sizeof(p
));
123 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
124 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
127 p
.iph
.protocol
= IPPROTO_IPIP
;
128 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
129 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
131 if (ops
->ndo_do_ioctl
) {
132 mm_segment_t oldfs
= get_fs();
135 ops
->ndo_do_ioctl(dev
, &ifr
, SIOCDELTUNNEL
);
142 struct net_device
*ipmr_new_tunnel(struct net
*net
, struct vifctl
*v
)
144 struct net_device
*dev
;
146 dev
= __dev_get_by_name(net
, "tunl0");
149 const struct net_device_ops
*ops
= dev
->netdev_ops
;
152 struct ip_tunnel_parm p
;
153 struct in_device
*in_dev
;
155 memset(&p
, 0, sizeof(p
));
156 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
157 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
160 p
.iph
.protocol
= IPPROTO_IPIP
;
161 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
162 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
164 if (ops
->ndo_do_ioctl
) {
165 mm_segment_t oldfs
= get_fs();
168 err
= ops
->ndo_do_ioctl(dev
, &ifr
, SIOCADDTUNNEL
);
176 (dev
= __dev_get_by_name(net
, p
.name
)) != NULL
) {
177 dev
->flags
|= IFF_MULTICAST
;
179 in_dev
= __in_dev_get_rtnl(dev
);
183 ipv4_devconf_setall(in_dev
);
184 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
194 /* allow the register to be completed before unregistering. */
198 unregister_netdevice(dev
);
202 #ifdef CONFIG_IP_PIMSM
204 static int reg_vif_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
206 struct net
*net
= dev_net(dev
);
208 read_lock(&mrt_lock
);
209 dev
->stats
.tx_bytes
+= skb
->len
;
210 dev
->stats
.tx_packets
++;
211 ipmr_cache_report(net
, skb
, net
->ipv4
.mroute_reg_vif_num
,
213 read_unlock(&mrt_lock
);
218 static const struct net_device_ops reg_vif_netdev_ops
= {
219 .ndo_start_xmit
= reg_vif_xmit
,
222 static void reg_vif_setup(struct net_device
*dev
)
224 dev
->type
= ARPHRD_PIMREG
;
225 dev
->mtu
= ETH_DATA_LEN
- sizeof(struct iphdr
) - 8;
226 dev
->flags
= IFF_NOARP
;
227 dev
->netdev_ops
= ®_vif_netdev_ops
,
228 dev
->destructor
= free_netdev
;
231 static struct net_device
*ipmr_reg_vif(void)
233 struct net_device
*dev
;
234 struct in_device
*in_dev
;
236 dev
= alloc_netdev(0, "pimreg", reg_vif_setup
);
241 if (register_netdevice(dev
)) {
248 if ((in_dev
= __in_dev_get_rcu(dev
)) == NULL
) {
253 ipv4_devconf_setall(in_dev
);
254 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
265 /* allow the register to be completed before unregistering. */
269 unregister_netdevice(dev
);
276 * @notify: Set to 1, if the caller is a notifier_call
279 static int vif_delete(struct net
*net
, int vifi
, int notify
)
281 struct vif_device
*v
;
282 struct net_device
*dev
;
283 struct in_device
*in_dev
;
285 if (vifi
< 0 || vifi
>= net
->ipv4
.maxvif
)
286 return -EADDRNOTAVAIL
;
288 v
= &net
->ipv4
.vif_table
[vifi
];
290 write_lock_bh(&mrt_lock
);
295 write_unlock_bh(&mrt_lock
);
296 return -EADDRNOTAVAIL
;
299 #ifdef CONFIG_IP_PIMSM
300 if (vifi
== net
->ipv4
.mroute_reg_vif_num
)
301 net
->ipv4
.mroute_reg_vif_num
= -1;
304 if (vifi
+1 == net
->ipv4
.maxvif
) {
306 for (tmp
=vifi
-1; tmp
>=0; tmp
--) {
307 if (VIF_EXISTS(net
, tmp
))
310 net
->ipv4
.maxvif
= tmp
+1;
313 write_unlock_bh(&mrt_lock
);
315 dev_set_allmulti(dev
, -1);
317 if ((in_dev
= __in_dev_get_rtnl(dev
)) != NULL
) {
318 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)--;
319 ip_rt_multicast_event(in_dev
);
322 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
) && !notify
)
323 unregister_netdevice(dev
);
329 static inline void ipmr_cache_free(struct mfc_cache
*c
)
331 release_net(mfc_net(c
));
332 kmem_cache_free(mrt_cachep
, c
);
335 /* Destroy an unresolved cache entry, killing queued skbs
336 and reporting error to netlink readers.
339 static void ipmr_destroy_unres(struct mfc_cache
*c
)
343 struct net
*net
= mfc_net(c
);
345 atomic_dec(&net
->ipv4
.cache_resolve_queue_len
);
347 while ((skb
= skb_dequeue(&c
->mfc_un
.unres
.unresolved
))) {
348 if (ip_hdr(skb
)->version
== 0) {
349 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
350 nlh
->nlmsg_type
= NLMSG_ERROR
;
351 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
352 skb_trim(skb
, nlh
->nlmsg_len
);
354 e
->error
= -ETIMEDOUT
;
355 memset(&e
->msg
, 0, sizeof(e
->msg
));
357 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).pid
);
366 /* Single timer process for all the unresolved queue. */
368 static void ipmr_expire_process(unsigned long dummy
)
371 unsigned long expires
;
372 struct mfc_cache
*c
, **cp
;
374 if (!spin_trylock(&mfc_unres_lock
)) {
375 mod_timer(&ipmr_expire_timer
, jiffies
+HZ
/10);
379 if (mfc_unres_queue
== NULL
)
384 cp
= &mfc_unres_queue
;
386 while ((c
=*cp
) != NULL
) {
387 if (time_after(c
->mfc_un
.unres
.expires
, now
)) {
388 unsigned long interval
= c
->mfc_un
.unres
.expires
- now
;
389 if (interval
< expires
)
397 ipmr_destroy_unres(c
);
400 if (mfc_unres_queue
!= NULL
)
401 mod_timer(&ipmr_expire_timer
, jiffies
+ expires
);
404 spin_unlock(&mfc_unres_lock
);
407 /* Fill oifs list. It is called under write locked mrt_lock. */
409 static void ipmr_update_thresholds(struct mfc_cache
*cache
, unsigned char *ttls
)
412 struct net
*net
= mfc_net(cache
);
414 cache
->mfc_un
.res
.minvif
= MAXVIFS
;
415 cache
->mfc_un
.res
.maxvif
= 0;
416 memset(cache
->mfc_un
.res
.ttls
, 255, MAXVIFS
);
418 for (vifi
= 0; vifi
< net
->ipv4
.maxvif
; vifi
++) {
419 if (VIF_EXISTS(net
, vifi
) &&
420 ttls
[vifi
] && ttls
[vifi
] < 255) {
421 cache
->mfc_un
.res
.ttls
[vifi
] = ttls
[vifi
];
422 if (cache
->mfc_un
.res
.minvif
> vifi
)
423 cache
->mfc_un
.res
.minvif
= vifi
;
424 if (cache
->mfc_un
.res
.maxvif
<= vifi
)
425 cache
->mfc_un
.res
.maxvif
= vifi
+ 1;
430 static int vif_add(struct net
*net
, struct vifctl
*vifc
, int mrtsock
)
432 int vifi
= vifc
->vifc_vifi
;
433 struct vif_device
*v
= &net
->ipv4
.vif_table
[vifi
];
434 struct net_device
*dev
;
435 struct in_device
*in_dev
;
439 if (VIF_EXISTS(net
, vifi
))
442 switch (vifc
->vifc_flags
) {
443 #ifdef CONFIG_IP_PIMSM
446 * Special Purpose VIF in PIM
447 * All the packets will be sent to the daemon
449 if (net
->ipv4
.mroute_reg_vif_num
>= 0)
451 dev
= ipmr_reg_vif();
454 err
= dev_set_allmulti(dev
, 1);
456 unregister_netdevice(dev
);
463 dev
= ipmr_new_tunnel(net
, vifc
);
466 err
= dev_set_allmulti(dev
, 1);
468 ipmr_del_tunnel(dev
, vifc
);
474 dev
= ip_dev_find(net
, vifc
->vifc_lcl_addr
.s_addr
);
476 return -EADDRNOTAVAIL
;
477 err
= dev_set_allmulti(dev
, 1);
487 if ((in_dev
= __in_dev_get_rtnl(dev
)) == NULL
)
488 return -EADDRNOTAVAIL
;
489 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)++;
490 ip_rt_multicast_event(in_dev
);
493 * Fill in the VIF structures
495 v
->rate_limit
= vifc
->vifc_rate_limit
;
496 v
->local
= vifc
->vifc_lcl_addr
.s_addr
;
497 v
->remote
= vifc
->vifc_rmt_addr
.s_addr
;
498 v
->flags
= vifc
->vifc_flags
;
500 v
->flags
|= VIFF_STATIC
;
501 v
->threshold
= vifc
->vifc_threshold
;
506 v
->link
= dev
->ifindex
;
507 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
))
508 v
->link
= dev
->iflink
;
510 /* And finish update writing critical data */
511 write_lock_bh(&mrt_lock
);
513 #ifdef CONFIG_IP_PIMSM
514 if (v
->flags
&VIFF_REGISTER
)
515 net
->ipv4
.mroute_reg_vif_num
= vifi
;
517 if (vifi
+1 > net
->ipv4
.maxvif
)
518 net
->ipv4
.maxvif
= vifi
+1;
519 write_unlock_bh(&mrt_lock
);
523 static struct mfc_cache
*ipmr_cache_find(struct net
*net
,
527 int line
= MFC_HASH(mcastgrp
, origin
);
530 for (c
= net
->ipv4
.mfc_cache_array
[line
]; c
; c
= c
->next
) {
531 if (c
->mfc_origin
==origin
&& c
->mfc_mcastgrp
==mcastgrp
)
538 * Allocate a multicast cache entry
540 static struct mfc_cache
*ipmr_cache_alloc(struct net
*net
)
542 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_KERNEL
);
545 c
->mfc_un
.res
.minvif
= MAXVIFS
;
550 static struct mfc_cache
*ipmr_cache_alloc_unres(struct net
*net
)
552 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_ATOMIC
);
555 skb_queue_head_init(&c
->mfc_un
.unres
.unresolved
);
556 c
->mfc_un
.unres
.expires
= jiffies
+ 10*HZ
;
562 * A cache entry has gone into a resolved state from queued
565 static void ipmr_cache_resolve(struct mfc_cache
*uc
, struct mfc_cache
*c
)
571 * Play the pending entries through our router
574 while ((skb
= __skb_dequeue(&uc
->mfc_un
.unres
.unresolved
))) {
575 if (ip_hdr(skb
)->version
== 0) {
576 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
578 if (ipmr_fill_mroute(skb
, c
, NLMSG_DATA(nlh
)) > 0) {
579 nlh
->nlmsg_len
= (skb_tail_pointer(skb
) -
582 nlh
->nlmsg_type
= NLMSG_ERROR
;
583 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
584 skb_trim(skb
, nlh
->nlmsg_len
);
586 e
->error
= -EMSGSIZE
;
587 memset(&e
->msg
, 0, sizeof(e
->msg
));
590 rtnl_unicast(skb
, mfc_net(c
), NETLINK_CB(skb
).pid
);
592 ip_mr_forward(skb
, c
, 0);
597 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
598 * expects the following bizarre scheme.
600 * Called under mrt_lock.
603 static int ipmr_cache_report(struct net
*net
,
604 struct sk_buff
*pkt
, vifi_t vifi
, int assert)
607 const int ihl
= ip_hdrlen(pkt
);
608 struct igmphdr
*igmp
;
612 #ifdef CONFIG_IP_PIMSM
613 if (assert == IGMPMSG_WHOLEPKT
)
614 skb
= skb_realloc_headroom(pkt
, sizeof(struct iphdr
));
617 skb
= alloc_skb(128, GFP_ATOMIC
);
622 #ifdef CONFIG_IP_PIMSM
623 if (assert == IGMPMSG_WHOLEPKT
) {
624 /* Ugly, but we have no choice with this interface.
625 Duplicate old header, fix ihl, length etc.
626 And all this only to mangle msg->im_msgtype and
627 to set msg->im_mbz to "mbz" :-)
629 skb_push(skb
, sizeof(struct iphdr
));
630 skb_reset_network_header(skb
);
631 skb_reset_transport_header(skb
);
632 msg
= (struct igmpmsg
*)skb_network_header(skb
);
633 memcpy(msg
, skb_network_header(pkt
), sizeof(struct iphdr
));
634 msg
->im_msgtype
= IGMPMSG_WHOLEPKT
;
636 msg
->im_vif
= net
->ipv4
.mroute_reg_vif_num
;
637 ip_hdr(skb
)->ihl
= sizeof(struct iphdr
) >> 2;
638 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(pkt
)->tot_len
) +
639 sizeof(struct iphdr
));
648 skb
->network_header
= skb
->tail
;
650 skb_copy_to_linear_data(skb
, pkt
->data
, ihl
);
651 ip_hdr(skb
)->protocol
= 0; /* Flag to the kernel this is a route add */
652 msg
= (struct igmpmsg
*)skb_network_header(skb
);
654 skb
->dst
= dst_clone(pkt
->dst
);
660 igmp
=(struct igmphdr
*)skb_put(skb
, sizeof(struct igmphdr
));
662 msg
->im_msgtype
= assert;
664 ip_hdr(skb
)->tot_len
= htons(skb
->len
); /* Fix the length */
665 skb
->transport_header
= skb
->network_header
;
668 if (net
->ipv4
.mroute_sk
== NULL
) {
676 ret
= sock_queue_rcv_skb(net
->ipv4
.mroute_sk
, skb
);
679 printk(KERN_WARNING
"mroute: pending queue full, dropping entries.\n");
687 * Queue a packet for resolution. It gets locked cache entry!
691 ipmr_cache_unresolved(struct net
*net
, vifi_t vifi
, struct sk_buff
*skb
)
695 const struct iphdr
*iph
= ip_hdr(skb
);
697 spin_lock_bh(&mfc_unres_lock
);
698 for (c
=mfc_unres_queue
; c
; c
=c
->next
) {
699 if (net_eq(mfc_net(c
), net
) &&
700 c
->mfc_mcastgrp
== iph
->daddr
&&
701 c
->mfc_origin
== iph
->saddr
)
707 * Create a new entry if allowable
710 if (atomic_read(&net
->ipv4
.cache_resolve_queue_len
) >= 10 ||
711 (c
= ipmr_cache_alloc_unres(net
)) == NULL
) {
712 spin_unlock_bh(&mfc_unres_lock
);
719 * Fill in the new cache entry
722 c
->mfc_origin
= iph
->saddr
;
723 c
->mfc_mcastgrp
= iph
->daddr
;
726 * Reflect first query at mrouted.
728 err
= ipmr_cache_report(net
, skb
, vifi
, IGMPMSG_NOCACHE
);
730 /* If the report failed throw the cache entry
733 spin_unlock_bh(&mfc_unres_lock
);
740 atomic_inc(&net
->ipv4
.cache_resolve_queue_len
);
741 c
->next
= mfc_unres_queue
;
744 mod_timer(&ipmr_expire_timer
, c
->mfc_un
.unres
.expires
);
748 * See if we can append the packet
750 if (c
->mfc_un
.unres
.unresolved
.qlen
>3) {
754 skb_queue_tail(&c
->mfc_un
.unres
.unresolved
, skb
);
758 spin_unlock_bh(&mfc_unres_lock
);
763 * MFC cache manipulation by user space mroute daemon
766 static int ipmr_mfc_delete(struct net
*net
, struct mfcctl
*mfc
)
769 struct mfc_cache
*c
, **cp
;
771 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
773 for (cp
= &net
->ipv4
.mfc_cache_array
[line
];
774 (c
= *cp
) != NULL
; cp
= &c
->next
) {
775 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
776 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
) {
777 write_lock_bh(&mrt_lock
);
779 write_unlock_bh(&mrt_lock
);
788 static int ipmr_mfc_add(struct net
*net
, struct mfcctl
*mfc
, int mrtsock
)
791 struct mfc_cache
*uc
, *c
, **cp
;
793 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
795 for (cp
= &net
->ipv4
.mfc_cache_array
[line
];
796 (c
= *cp
) != NULL
; cp
= &c
->next
) {
797 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
798 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
)
803 write_lock_bh(&mrt_lock
);
804 c
->mfc_parent
= mfc
->mfcc_parent
;
805 ipmr_update_thresholds(c
, mfc
->mfcc_ttls
);
807 c
->mfc_flags
|= MFC_STATIC
;
808 write_unlock_bh(&mrt_lock
);
812 if (!ipv4_is_multicast(mfc
->mfcc_mcastgrp
.s_addr
))
815 c
= ipmr_cache_alloc(net
);
819 c
->mfc_origin
= mfc
->mfcc_origin
.s_addr
;
820 c
->mfc_mcastgrp
= mfc
->mfcc_mcastgrp
.s_addr
;
821 c
->mfc_parent
= mfc
->mfcc_parent
;
822 ipmr_update_thresholds(c
, mfc
->mfcc_ttls
);
824 c
->mfc_flags
|= MFC_STATIC
;
826 write_lock_bh(&mrt_lock
);
827 c
->next
= net
->ipv4
.mfc_cache_array
[line
];
828 net
->ipv4
.mfc_cache_array
[line
] = c
;
829 write_unlock_bh(&mrt_lock
);
832 * Check to see if we resolved a queued list. If so we
833 * need to send on the frames and tidy up.
835 spin_lock_bh(&mfc_unres_lock
);
836 for (cp
= &mfc_unres_queue
; (uc
=*cp
) != NULL
;
838 if (net_eq(mfc_net(uc
), net
) &&
839 uc
->mfc_origin
== c
->mfc_origin
&&
840 uc
->mfc_mcastgrp
== c
->mfc_mcastgrp
) {
842 atomic_dec(&net
->ipv4
.cache_resolve_queue_len
);
846 if (mfc_unres_queue
== NULL
)
847 del_timer(&ipmr_expire_timer
);
848 spin_unlock_bh(&mfc_unres_lock
);
851 ipmr_cache_resolve(uc
, c
);
858 * Close the multicast socket, and clear the vif tables etc
861 static void mroute_clean_tables(struct net
*net
)
866 * Shut down all active vif entries
868 for (i
= 0; i
< net
->ipv4
.maxvif
; i
++) {
869 if (!(net
->ipv4
.vif_table
[i
].flags
&VIFF_STATIC
))
870 vif_delete(net
, i
, 0);
876 for (i
=0; i
<MFC_LINES
; i
++) {
877 struct mfc_cache
*c
, **cp
;
879 cp
= &net
->ipv4
.mfc_cache_array
[i
];
880 while ((c
= *cp
) != NULL
) {
881 if (c
->mfc_flags
&MFC_STATIC
) {
885 write_lock_bh(&mrt_lock
);
887 write_unlock_bh(&mrt_lock
);
893 if (atomic_read(&net
->ipv4
.cache_resolve_queue_len
) != 0) {
894 struct mfc_cache
*c
, **cp
;
896 spin_lock_bh(&mfc_unres_lock
);
897 cp
= &mfc_unres_queue
;
898 while ((c
= *cp
) != NULL
) {
899 if (!net_eq(mfc_net(c
), net
)) {
905 ipmr_destroy_unres(c
);
907 spin_unlock_bh(&mfc_unres_lock
);
911 static void mrtsock_destruct(struct sock
*sk
)
913 struct net
*net
= sock_net(sk
);
916 if (sk
== net
->ipv4
.mroute_sk
) {
917 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)--;
919 write_lock_bh(&mrt_lock
);
920 net
->ipv4
.mroute_sk
= NULL
;
921 write_unlock_bh(&mrt_lock
);
923 mroute_clean_tables(net
);
929 * Socket options and virtual interface manipulation. The whole
930 * virtual interface system is a complete heap, but unfortunately
931 * that's how BSD mrouted happens to think. Maybe one day with a proper
932 * MOSPF/PIM router set up we can clean this up.
935 int ip_mroute_setsockopt(struct sock
*sk
, int optname
, char __user
*optval
, int optlen
)
940 struct net
*net
= sock_net(sk
);
942 if (optname
!= MRT_INIT
) {
943 if (sk
!= net
->ipv4
.mroute_sk
&& !capable(CAP_NET_ADMIN
))
949 if (sk
->sk_type
!= SOCK_RAW
||
950 inet_sk(sk
)->num
!= IPPROTO_IGMP
)
952 if (optlen
!= sizeof(int))
956 if (net
->ipv4
.mroute_sk
) {
961 ret
= ip_ra_control(sk
, 1, mrtsock_destruct
);
963 write_lock_bh(&mrt_lock
);
964 net
->ipv4
.mroute_sk
= sk
;
965 write_unlock_bh(&mrt_lock
);
967 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)++;
972 if (sk
!= net
->ipv4
.mroute_sk
)
974 return ip_ra_control(sk
, 0, NULL
);
977 if (optlen
!= sizeof(vif
))
979 if (copy_from_user(&vif
, optval
, sizeof(vif
)))
981 if (vif
.vifc_vifi
>= MAXVIFS
)
984 if (optname
== MRT_ADD_VIF
) {
985 ret
= vif_add(net
, &vif
, sk
== net
->ipv4
.mroute_sk
);
987 ret
= vif_delete(net
, vif
.vifc_vifi
, 0);
993 * Manipulate the forwarding caches. These live
994 * in a sort of kernel/user symbiosis.
998 if (optlen
!= sizeof(mfc
))
1000 if (copy_from_user(&mfc
, optval
, sizeof(mfc
)))
1003 if (optname
== MRT_DEL_MFC
)
1004 ret
= ipmr_mfc_delete(net
, &mfc
);
1006 ret
= ipmr_mfc_add(net
, &mfc
, sk
== net
->ipv4
.mroute_sk
);
1010 * Control PIM assert.
1015 if (get_user(v
,(int __user
*)optval
))
1017 net
->ipv4
.mroute_do_assert
= (v
) ? 1 : 0;
1020 #ifdef CONFIG_IP_PIMSM
1025 if (get_user(v
,(int __user
*)optval
))
1031 if (v
!= net
->ipv4
.mroute_do_pim
) {
1032 net
->ipv4
.mroute_do_pim
= v
;
1033 net
->ipv4
.mroute_do_assert
= v
;
1034 #ifdef CONFIG_IP_PIMSM_V2
1035 if (net
->ipv4
.mroute_do_pim
)
1036 ret
= inet_add_protocol(&pim_protocol
,
1039 ret
= inet_del_protocol(&pim_protocol
,
1050 * Spurious command, or MRT_VERSION which you cannot
1054 return -ENOPROTOOPT
;
1059 * Getsock opt support for the multicast routing system.
1062 int ip_mroute_getsockopt(struct sock
*sk
, int optname
, char __user
*optval
, int __user
*optlen
)
1066 struct net
*net
= sock_net(sk
);
1068 if (optname
!= MRT_VERSION
&&
1069 #ifdef CONFIG_IP_PIMSM
1072 optname
!=MRT_ASSERT
)
1073 return -ENOPROTOOPT
;
1075 if (get_user(olr
, optlen
))
1078 olr
= min_t(unsigned int, olr
, sizeof(int));
1082 if (put_user(olr
, optlen
))
1084 if (optname
== MRT_VERSION
)
1086 #ifdef CONFIG_IP_PIMSM
1087 else if (optname
== MRT_PIM
)
1088 val
= net
->ipv4
.mroute_do_pim
;
1091 val
= net
->ipv4
.mroute_do_assert
;
1092 if (copy_to_user(optval
, &val
, olr
))
1098 * The IP multicast ioctl support routines.
1101 int ipmr_ioctl(struct sock
*sk
, int cmd
, void __user
*arg
)
1103 struct sioc_sg_req sr
;
1104 struct sioc_vif_req vr
;
1105 struct vif_device
*vif
;
1106 struct mfc_cache
*c
;
1107 struct net
*net
= sock_net(sk
);
1111 if (copy_from_user(&vr
, arg
, sizeof(vr
)))
1113 if (vr
.vifi
>= net
->ipv4
.maxvif
)
1115 read_lock(&mrt_lock
);
1116 vif
= &net
->ipv4
.vif_table
[vr
.vifi
];
1117 if (VIF_EXISTS(net
, vr
.vifi
)) {
1118 vr
.icount
= vif
->pkt_in
;
1119 vr
.ocount
= vif
->pkt_out
;
1120 vr
.ibytes
= vif
->bytes_in
;
1121 vr
.obytes
= vif
->bytes_out
;
1122 read_unlock(&mrt_lock
);
1124 if (copy_to_user(arg
, &vr
, sizeof(vr
)))
1128 read_unlock(&mrt_lock
);
1129 return -EADDRNOTAVAIL
;
1131 if (copy_from_user(&sr
, arg
, sizeof(sr
)))
1134 read_lock(&mrt_lock
);
1135 c
= ipmr_cache_find(net
, sr
.src
.s_addr
, sr
.grp
.s_addr
);
1137 sr
.pktcnt
= c
->mfc_un
.res
.pkt
;
1138 sr
.bytecnt
= c
->mfc_un
.res
.bytes
;
1139 sr
.wrong_if
= c
->mfc_un
.res
.wrong_if
;
1140 read_unlock(&mrt_lock
);
1142 if (copy_to_user(arg
, &sr
, sizeof(sr
)))
1146 read_unlock(&mrt_lock
);
1147 return -EADDRNOTAVAIL
;
1149 return -ENOIOCTLCMD
;
1154 static int ipmr_device_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
1156 struct net_device
*dev
= ptr
;
1157 struct net
*net
= dev_net(dev
);
1158 struct vif_device
*v
;
1161 if (!net_eq(dev_net(dev
), net
))
1164 if (event
!= NETDEV_UNREGISTER
)
1166 v
= &net
->ipv4
.vif_table
[0];
1167 for (ct
= 0; ct
< net
->ipv4
.maxvif
; ct
++, v
++) {
1169 vif_delete(net
, ct
, 1);
1175 static struct notifier_block ip_mr_notifier
= {
1176 .notifier_call
= ipmr_device_event
,
1180 * Encapsulate a packet by attaching a valid IPIP header to it.
1181 * This avoids tunnel drivers and other mess and gives us the speed so
1182 * important for multicast video.
1185 static void ip_encap(struct sk_buff
*skb
, __be32 saddr
, __be32 daddr
)
1188 struct iphdr
*old_iph
= ip_hdr(skb
);
1190 skb_push(skb
, sizeof(struct iphdr
));
1191 skb
->transport_header
= skb
->network_header
;
1192 skb_reset_network_header(skb
);
1196 iph
->tos
= old_iph
->tos
;
1197 iph
->ttl
= old_iph
->ttl
;
1201 iph
->protocol
= IPPROTO_IPIP
;
1203 iph
->tot_len
= htons(skb
->len
);
1204 ip_select_ident(iph
, skb
->dst
, NULL
);
1207 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
1211 static inline int ipmr_forward_finish(struct sk_buff
*skb
)
1213 struct ip_options
* opt
= &(IPCB(skb
)->opt
);
1215 IP_INC_STATS_BH(dev_net(skb
->dst
->dev
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
1217 if (unlikely(opt
->optlen
))
1218 ip_forward_options(skb
);
1220 return dst_output(skb
);
1224 * Processing handlers for ipmr_forward
1227 static void ipmr_queue_xmit(struct sk_buff
*skb
, struct mfc_cache
*c
, int vifi
)
1229 struct net
*net
= mfc_net(c
);
1230 const struct iphdr
*iph
= ip_hdr(skb
);
1231 struct vif_device
*vif
= &net
->ipv4
.vif_table
[vifi
];
1232 struct net_device
*dev
;
1236 if (vif
->dev
== NULL
)
1239 #ifdef CONFIG_IP_PIMSM
1240 if (vif
->flags
& VIFF_REGISTER
) {
1242 vif
->bytes_out
+= skb
->len
;
1243 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1244 vif
->dev
->stats
.tx_packets
++;
1245 ipmr_cache_report(net
, skb
, vifi
, IGMPMSG_WHOLEPKT
);
1251 if (vif
->flags
&VIFF_TUNNEL
) {
1252 struct flowi fl
= { .oif
= vif
->link
,
1254 { .daddr
= vif
->remote
,
1255 .saddr
= vif
->local
,
1256 .tos
= RT_TOS(iph
->tos
) } },
1257 .proto
= IPPROTO_IPIP
};
1258 if (ip_route_output_key(net
, &rt
, &fl
))
1260 encap
= sizeof(struct iphdr
);
1262 struct flowi fl
= { .oif
= vif
->link
,
1264 { .daddr
= iph
->daddr
,
1265 .tos
= RT_TOS(iph
->tos
) } },
1266 .proto
= IPPROTO_IPIP
};
1267 if (ip_route_output_key(net
, &rt
, &fl
))
1271 dev
= rt
->u
.dst
.dev
;
1273 if (skb
->len
+encap
> dst_mtu(&rt
->u
.dst
) && (ntohs(iph
->frag_off
) & IP_DF
)) {
1274 /* Do not fragment multicasts. Alas, IPv4 does not
1275 allow to send ICMP, so that packets will disappear
1279 IP_INC_STATS_BH(dev_net(dev
), IPSTATS_MIB_FRAGFAILS
);
1284 encap
+= LL_RESERVED_SPACE(dev
) + rt
->u
.dst
.header_len
;
1286 if (skb_cow(skb
, encap
)) {
1292 vif
->bytes_out
+= skb
->len
;
1294 dst_release(skb
->dst
);
1295 skb
->dst
= &rt
->u
.dst
;
1296 ip_decrease_ttl(ip_hdr(skb
));
1298 /* FIXME: forward and output firewalls used to be called here.
1299 * What do we do with netfilter? -- RR */
1300 if (vif
->flags
& VIFF_TUNNEL
) {
1301 ip_encap(skb
, vif
->local
, vif
->remote
);
1302 /* FIXME: extra output firewall step used to be here. --RR */
1303 vif
->dev
->stats
.tx_packets
++;
1304 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1307 IPCB(skb
)->flags
|= IPSKB_FORWARDED
;
1310 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1311 * not only before forwarding, but after forwarding on all output
1312 * interfaces. It is clear, if mrouter runs a multicasting
1313 * program, it should receive packets not depending to what interface
1314 * program is joined.
1315 * If we will not make it, the program will have to join on all
1316 * interfaces. On the other hand, multihoming host (or router, but
1317 * not mrouter) cannot join to more than one interface - it will
1318 * result in receiving multiple packets.
1320 NF_HOOK(PF_INET
, NF_INET_FORWARD
, skb
, skb
->dev
, dev
,
1321 ipmr_forward_finish
);
1329 static int ipmr_find_vif(struct net_device
*dev
)
1331 struct net
*net
= dev_net(dev
);
1333 for (ct
= net
->ipv4
.maxvif
-1; ct
>= 0; ct
--) {
1334 if (net
->ipv4
.vif_table
[ct
].dev
== dev
)
1340 /* "local" means that we should preserve one skb (for local delivery) */
1342 static int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
)
1346 struct net
*net
= mfc_net(cache
);
1348 vif
= cache
->mfc_parent
;
1349 cache
->mfc_un
.res
.pkt
++;
1350 cache
->mfc_un
.res
.bytes
+= skb
->len
;
1353 * Wrong interface: drop packet and (maybe) send PIM assert.
1355 if (net
->ipv4
.vif_table
[vif
].dev
!= skb
->dev
) {
1358 if (skb
->rtable
->fl
.iif
== 0) {
1359 /* It is our own packet, looped back.
1360 Very complicated situation...
1362 The best workaround until routing daemons will be
1363 fixed is not to redistribute packet, if it was
1364 send through wrong interface. It means, that
1365 multicast applications WILL NOT work for
1366 (S,G), which have default multicast route pointing
1367 to wrong oif. In any case, it is not a good
1368 idea to use multicasting applications on router.
1373 cache
->mfc_un
.res
.wrong_if
++;
1374 true_vifi
= ipmr_find_vif(skb
->dev
);
1376 if (true_vifi
>= 0 && net
->ipv4
.mroute_do_assert
&&
1377 /* pimsm uses asserts, when switching from RPT to SPT,
1378 so that we cannot check that packet arrived on an oif.
1379 It is bad, but otherwise we would need to move pretty
1380 large chunk of pimd to kernel. Ough... --ANK
1382 (net
->ipv4
.mroute_do_pim
||
1383 cache
->mfc_un
.res
.ttls
[true_vifi
] < 255) &&
1385 cache
->mfc_un
.res
.last_assert
+ MFC_ASSERT_THRESH
)) {
1386 cache
->mfc_un
.res
.last_assert
= jiffies
;
1387 ipmr_cache_report(net
, skb
, true_vifi
, IGMPMSG_WRONGVIF
);
1392 net
->ipv4
.vif_table
[vif
].pkt_in
++;
1393 net
->ipv4
.vif_table
[vif
].bytes_in
+= skb
->len
;
1398 for (ct
= cache
->mfc_un
.res
.maxvif
-1; ct
>= cache
->mfc_un
.res
.minvif
; ct
--) {
1399 if (ip_hdr(skb
)->ttl
> cache
->mfc_un
.res
.ttls
[ct
]) {
1401 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1403 ipmr_queue_xmit(skb2
, cache
, psend
);
1410 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1412 ipmr_queue_xmit(skb2
, cache
, psend
);
1414 ipmr_queue_xmit(skb
, cache
, psend
);
1427 * Multicast packets for forwarding arrive here
1430 int ip_mr_input(struct sk_buff
*skb
)
1432 struct mfc_cache
*cache
;
1433 struct net
*net
= dev_net(skb
->dev
);
1434 int local
= skb
->rtable
->rt_flags
&RTCF_LOCAL
;
1436 /* Packet is looped back after forward, it should not be
1437 forwarded second time, but still can be delivered locally.
1439 if (IPCB(skb
)->flags
&IPSKB_FORWARDED
)
1443 if (IPCB(skb
)->opt
.router_alert
) {
1444 if (ip_call_ra_chain(skb
))
1446 } else if (ip_hdr(skb
)->protocol
== IPPROTO_IGMP
){
1447 /* IGMPv1 (and broken IGMPv2 implementations sort of
1448 Cisco IOS <= 11.2(8)) do not put router alert
1449 option to IGMP packets destined to routable
1450 groups. It is very bad, because it means
1451 that we can forward NO IGMP messages.
1453 read_lock(&mrt_lock
);
1454 if (net
->ipv4
.mroute_sk
) {
1456 raw_rcv(net
->ipv4
.mroute_sk
, skb
);
1457 read_unlock(&mrt_lock
);
1460 read_unlock(&mrt_lock
);
1464 read_lock(&mrt_lock
);
1465 cache
= ipmr_cache_find(net
, ip_hdr(skb
)->saddr
, ip_hdr(skb
)->daddr
);
1468 * No usable cache entry
1470 if (cache
== NULL
) {
1474 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1475 ip_local_deliver(skb
);
1477 read_unlock(&mrt_lock
);
1483 vif
= ipmr_find_vif(skb
->dev
);
1485 int err
= ipmr_cache_unresolved(net
, vif
, skb
);
1486 read_unlock(&mrt_lock
);
1490 read_unlock(&mrt_lock
);
1495 ip_mr_forward(skb
, cache
, local
);
1497 read_unlock(&mrt_lock
);
1500 return ip_local_deliver(skb
);
1506 return ip_local_deliver(skb
);
1511 #ifdef CONFIG_IP_PIMSM
1512 static int __pim_rcv(struct sk_buff
*skb
, unsigned int pimlen
)
1514 struct net_device
*reg_dev
= NULL
;
1515 struct iphdr
*encap
;
1516 struct net
*net
= dev_net(skb
->dev
);
1518 encap
= (struct iphdr
*)(skb_transport_header(skb
) + pimlen
);
1521 a. packet is really destinted to a multicast group
1522 b. packet is not a NULL-REGISTER
1523 c. packet is not truncated
1525 if (!ipv4_is_multicast(encap
->daddr
) ||
1526 encap
->tot_len
== 0 ||
1527 ntohs(encap
->tot_len
) + pimlen
> skb
->len
)
1530 read_lock(&mrt_lock
);
1531 if (net
->ipv4
.mroute_reg_vif_num
>= 0)
1532 reg_dev
= net
->ipv4
.vif_table
[net
->ipv4
.mroute_reg_vif_num
].dev
;
1535 read_unlock(&mrt_lock
);
1537 if (reg_dev
== NULL
)
1540 skb
->mac_header
= skb
->network_header
;
1541 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1542 skb_reset_network_header(skb
);
1544 skb
->protocol
= htons(ETH_P_IP
);
1546 skb
->pkt_type
= PACKET_HOST
;
1547 dst_release(skb
->dst
);
1549 reg_dev
->stats
.rx_bytes
+= skb
->len
;
1550 reg_dev
->stats
.rx_packets
++;
1559 #ifdef CONFIG_IP_PIMSM_V1
1561 * Handle IGMP messages of PIMv1
1564 int pim_rcv_v1(struct sk_buff
* skb
)
1566 struct igmphdr
*pim
;
1567 struct net
*net
= dev_net(skb
->dev
);
1569 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1572 pim
= igmp_hdr(skb
);
1574 if (!net
->ipv4
.mroute_do_pim
||
1575 pim
->group
!= PIM_V1_VERSION
|| pim
->code
!= PIM_V1_REGISTER
)
1578 if (__pim_rcv(skb
, sizeof(*pim
))) {
1586 #ifdef CONFIG_IP_PIMSM_V2
1587 static int pim_rcv(struct sk_buff
* skb
)
1589 struct pimreghdr
*pim
;
1591 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1594 pim
= (struct pimreghdr
*)skb_transport_header(skb
);
1595 if (pim
->type
!= ((PIM_VERSION
<<4)|(PIM_REGISTER
)) ||
1596 (pim
->flags
&PIM_NULL_REGISTER
) ||
1597 (ip_compute_csum((void *)pim
, sizeof(*pim
)) != 0 &&
1598 csum_fold(skb_checksum(skb
, 0, skb
->len
, 0))))
1601 if (__pim_rcv(skb
, sizeof(*pim
))) {
1610 ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
)
1613 struct rtnexthop
*nhp
;
1614 struct net
*net
= mfc_net(c
);
1615 struct net_device
*dev
= net
->ipv4
.vif_table
[c
->mfc_parent
].dev
;
1616 u8
*b
= skb_tail_pointer(skb
);
1617 struct rtattr
*mp_head
;
1620 RTA_PUT(skb
, RTA_IIF
, 4, &dev
->ifindex
);
1622 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_LENGTH(0));
1624 for (ct
= c
->mfc_un
.res
.minvif
; ct
< c
->mfc_un
.res
.maxvif
; ct
++) {
1625 if (c
->mfc_un
.res
.ttls
[ct
] < 255) {
1626 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
1627 goto rtattr_failure
;
1628 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
1629 nhp
->rtnh_flags
= 0;
1630 nhp
->rtnh_hops
= c
->mfc_un
.res
.ttls
[ct
];
1631 nhp
->rtnh_ifindex
= net
->ipv4
.vif_table
[ct
].dev
->ifindex
;
1632 nhp
->rtnh_len
= sizeof(*nhp
);
1635 mp_head
->rta_type
= RTA_MULTIPATH
;
1636 mp_head
->rta_len
= skb_tail_pointer(skb
) - (u8
*)mp_head
;
1637 rtm
->rtm_type
= RTN_MULTICAST
;
1645 int ipmr_get_route(struct net
*net
,
1646 struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
1649 struct mfc_cache
*cache
;
1650 struct rtable
*rt
= skb
->rtable
;
1652 read_lock(&mrt_lock
);
1653 cache
= ipmr_cache_find(net
, rt
->rt_src
, rt
->rt_dst
);
1655 if (cache
== NULL
) {
1656 struct sk_buff
*skb2
;
1658 struct net_device
*dev
;
1662 read_unlock(&mrt_lock
);
1667 if (dev
== NULL
|| (vif
= ipmr_find_vif(dev
)) < 0) {
1668 read_unlock(&mrt_lock
);
1671 skb2
= skb_clone(skb
, GFP_ATOMIC
);
1673 read_unlock(&mrt_lock
);
1677 skb_push(skb2
, sizeof(struct iphdr
));
1678 skb_reset_network_header(skb2
);
1680 iph
->ihl
= sizeof(struct iphdr
) >> 2;
1681 iph
->saddr
= rt
->rt_src
;
1682 iph
->daddr
= rt
->rt_dst
;
1684 err
= ipmr_cache_unresolved(net
, vif
, skb2
);
1685 read_unlock(&mrt_lock
);
1689 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
1690 cache
->mfc_flags
|= MFC_NOTIFY
;
1691 err
= ipmr_fill_mroute(skb
, cache
, rtm
);
1692 read_unlock(&mrt_lock
);
1696 #ifdef CONFIG_PROC_FS
1698 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1700 struct ipmr_vif_iter
{
1701 struct seq_net_private p
;
1705 static struct vif_device
*ipmr_vif_seq_idx(struct net
*net
,
1706 struct ipmr_vif_iter
*iter
,
1709 for (iter
->ct
= 0; iter
->ct
< net
->ipv4
.maxvif
; ++iter
->ct
) {
1710 if (!VIF_EXISTS(net
, iter
->ct
))
1713 return &net
->ipv4
.vif_table
[iter
->ct
];
1718 static void *ipmr_vif_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1719 __acquires(mrt_lock
)
1721 struct net
*net
= seq_file_net(seq
);
1723 read_lock(&mrt_lock
);
1724 return *pos
? ipmr_vif_seq_idx(net
, seq
->private, *pos
- 1)
1728 static void *ipmr_vif_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1730 struct ipmr_vif_iter
*iter
= seq
->private;
1731 struct net
*net
= seq_file_net(seq
);
1734 if (v
== SEQ_START_TOKEN
)
1735 return ipmr_vif_seq_idx(net
, iter
, 0);
1737 while (++iter
->ct
< net
->ipv4
.maxvif
) {
1738 if (!VIF_EXISTS(net
, iter
->ct
))
1740 return &net
->ipv4
.vif_table
[iter
->ct
];
1745 static void ipmr_vif_seq_stop(struct seq_file
*seq
, void *v
)
1746 __releases(mrt_lock
)
1748 read_unlock(&mrt_lock
);
1751 static int ipmr_vif_seq_show(struct seq_file
*seq
, void *v
)
1753 struct net
*net
= seq_file_net(seq
);
1755 if (v
== SEQ_START_TOKEN
) {
1757 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1759 const struct vif_device
*vif
= v
;
1760 const char *name
= vif
->dev
? vif
->dev
->name
: "none";
1763 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1764 vif
- net
->ipv4
.vif_table
,
1765 name
, vif
->bytes_in
, vif
->pkt_in
,
1766 vif
->bytes_out
, vif
->pkt_out
,
1767 vif
->flags
, vif
->local
, vif
->remote
);
1772 static const struct seq_operations ipmr_vif_seq_ops
= {
1773 .start
= ipmr_vif_seq_start
,
1774 .next
= ipmr_vif_seq_next
,
1775 .stop
= ipmr_vif_seq_stop
,
1776 .show
= ipmr_vif_seq_show
,
1779 static int ipmr_vif_open(struct inode
*inode
, struct file
*file
)
1781 return seq_open_net(inode
, file
, &ipmr_vif_seq_ops
,
1782 sizeof(struct ipmr_vif_iter
));
1785 static const struct file_operations ipmr_vif_fops
= {
1786 .owner
= THIS_MODULE
,
1787 .open
= ipmr_vif_open
,
1789 .llseek
= seq_lseek
,
1790 .release
= seq_release_net
,
1793 struct ipmr_mfc_iter
{
1794 struct seq_net_private p
;
1795 struct mfc_cache
**cache
;
1800 static struct mfc_cache
*ipmr_mfc_seq_idx(struct net
*net
,
1801 struct ipmr_mfc_iter
*it
, loff_t pos
)
1803 struct mfc_cache
*mfc
;
1805 it
->cache
= net
->ipv4
.mfc_cache_array
;
1806 read_lock(&mrt_lock
);
1807 for (it
->ct
= 0; it
->ct
< MFC_LINES
; it
->ct
++)
1808 for (mfc
= net
->ipv4
.mfc_cache_array
[it
->ct
];
1809 mfc
; mfc
= mfc
->next
)
1812 read_unlock(&mrt_lock
);
1814 it
->cache
= &mfc_unres_queue
;
1815 spin_lock_bh(&mfc_unres_lock
);
1816 for (mfc
= mfc_unres_queue
; mfc
; mfc
= mfc
->next
)
1817 if (net_eq(mfc_net(mfc
), net
) &&
1820 spin_unlock_bh(&mfc_unres_lock
);
1827 static void *ipmr_mfc_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1829 struct ipmr_mfc_iter
*it
= seq
->private;
1830 struct net
*net
= seq_file_net(seq
);
1834 return *pos
? ipmr_mfc_seq_idx(net
, seq
->private, *pos
- 1)
1838 static void *ipmr_mfc_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1840 struct mfc_cache
*mfc
= v
;
1841 struct ipmr_mfc_iter
*it
= seq
->private;
1842 struct net
*net
= seq_file_net(seq
);
1846 if (v
== SEQ_START_TOKEN
)
1847 return ipmr_mfc_seq_idx(net
, seq
->private, 0);
1852 if (it
->cache
== &mfc_unres_queue
)
1855 BUG_ON(it
->cache
!= net
->ipv4
.mfc_cache_array
);
1857 while (++it
->ct
< MFC_LINES
) {
1858 mfc
= net
->ipv4
.mfc_cache_array
[it
->ct
];
1863 /* exhausted cache_array, show unresolved */
1864 read_unlock(&mrt_lock
);
1865 it
->cache
= &mfc_unres_queue
;
1868 spin_lock_bh(&mfc_unres_lock
);
1869 mfc
= mfc_unres_queue
;
1870 while (mfc
&& !net_eq(mfc_net(mfc
), net
))
1876 spin_unlock_bh(&mfc_unres_lock
);
1882 static void ipmr_mfc_seq_stop(struct seq_file
*seq
, void *v
)
1884 struct ipmr_mfc_iter
*it
= seq
->private;
1885 struct net
*net
= seq_file_net(seq
);
1887 if (it
->cache
== &mfc_unres_queue
)
1888 spin_unlock_bh(&mfc_unres_lock
);
1889 else if (it
->cache
== net
->ipv4
.mfc_cache_array
)
1890 read_unlock(&mrt_lock
);
1893 static int ipmr_mfc_seq_show(struct seq_file
*seq
, void *v
)
1896 struct net
*net
= seq_file_net(seq
);
1898 if (v
== SEQ_START_TOKEN
) {
1900 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1902 const struct mfc_cache
*mfc
= v
;
1903 const struct ipmr_mfc_iter
*it
= seq
->private;
1905 seq_printf(seq
, "%08lX %08lX %-3hd",
1906 (unsigned long) mfc
->mfc_mcastgrp
,
1907 (unsigned long) mfc
->mfc_origin
,
1910 if (it
->cache
!= &mfc_unres_queue
) {
1911 seq_printf(seq
, " %8lu %8lu %8lu",
1912 mfc
->mfc_un
.res
.pkt
,
1913 mfc
->mfc_un
.res
.bytes
,
1914 mfc
->mfc_un
.res
.wrong_if
);
1915 for (n
= mfc
->mfc_un
.res
.minvif
;
1916 n
< mfc
->mfc_un
.res
.maxvif
; n
++ ) {
1917 if (VIF_EXISTS(net
, n
) &&
1918 mfc
->mfc_un
.res
.ttls
[n
] < 255)
1921 n
, mfc
->mfc_un
.res
.ttls
[n
]);
1924 /* unresolved mfc_caches don't contain
1925 * pkt, bytes and wrong_if values
1927 seq_printf(seq
, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1929 seq_putc(seq
, '\n');
1934 static const struct seq_operations ipmr_mfc_seq_ops
= {
1935 .start
= ipmr_mfc_seq_start
,
1936 .next
= ipmr_mfc_seq_next
,
1937 .stop
= ipmr_mfc_seq_stop
,
1938 .show
= ipmr_mfc_seq_show
,
1941 static int ipmr_mfc_open(struct inode
*inode
, struct file
*file
)
1943 return seq_open_net(inode
, file
, &ipmr_mfc_seq_ops
,
1944 sizeof(struct ipmr_mfc_iter
));
1947 static const struct file_operations ipmr_mfc_fops
= {
1948 .owner
= THIS_MODULE
,
1949 .open
= ipmr_mfc_open
,
1951 .llseek
= seq_lseek
,
1952 .release
= seq_release_net
,
1956 #ifdef CONFIG_IP_PIMSM_V2
1957 static struct net_protocol pim_protocol
= {
1964 * Setup for IP multicast routing
1966 static int __net_init
ipmr_net_init(struct net
*net
)
1970 net
->ipv4
.vif_table
= kcalloc(MAXVIFS
, sizeof(struct vif_device
),
1972 if (!net
->ipv4
.vif_table
) {
1977 /* Forwarding cache */
1978 net
->ipv4
.mfc_cache_array
= kcalloc(MFC_LINES
,
1979 sizeof(struct mfc_cache
*),
1981 if (!net
->ipv4
.mfc_cache_array
) {
1983 goto fail_mfc_cache
;
1986 #ifdef CONFIG_IP_PIMSM
1987 net
->ipv4
.mroute_reg_vif_num
= -1;
1990 #ifdef CONFIG_PROC_FS
1992 if (!proc_net_fops_create(net
, "ip_mr_vif", 0, &ipmr_vif_fops
))
1994 if (!proc_net_fops_create(net
, "ip_mr_cache", 0, &ipmr_mfc_fops
))
1995 goto proc_cache_fail
;
1999 #ifdef CONFIG_PROC_FS
2001 proc_net_remove(net
, "ip_mr_vif");
2003 kfree(net
->ipv4
.mfc_cache_array
);
2006 kfree(net
->ipv4
.vif_table
);
2011 static void __net_exit
ipmr_net_exit(struct net
*net
)
2013 #ifdef CONFIG_PROC_FS
2014 proc_net_remove(net
, "ip_mr_cache");
2015 proc_net_remove(net
, "ip_mr_vif");
2017 kfree(net
->ipv4
.mfc_cache_array
);
2018 kfree(net
->ipv4
.vif_table
);
2021 static struct pernet_operations ipmr_net_ops
= {
2022 .init
= ipmr_net_init
,
2023 .exit
= ipmr_net_exit
,
2026 int __init
ip_mr_init(void)
2030 mrt_cachep
= kmem_cache_create("ip_mrt_cache",
2031 sizeof(struct mfc_cache
),
2032 0, SLAB_HWCACHE_ALIGN
|SLAB_PANIC
,
2037 err
= register_pernet_subsys(&ipmr_net_ops
);
2039 goto reg_pernet_fail
;
2041 setup_timer(&ipmr_expire_timer
, ipmr_expire_process
, 0);
2042 err
= register_netdevice_notifier(&ip_mr_notifier
);
2044 goto reg_notif_fail
;
2048 del_timer(&ipmr_expire_timer
);
2049 unregister_pernet_subsys(&ipmr_net_ops
);
2051 kmem_cache_destroy(mrt_cachep
);