2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
73 struct list_head list
;
78 struct sock
*mroute_sk
;
79 struct timer_list ipmr_expire_timer
;
80 struct list_head mfc_unres_queue
;
81 struct list_head mfc_cache_array
[MFC_LINES
];
82 struct vif_device vif_table
[MAXVIFS
];
84 atomic_t cache_resolve_queue_len
;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num
;
93 struct fib_rule common
;
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
104 static DEFINE_RWLOCK(mrt_lock
);
107 * Multicast router control variables
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock
);
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
120 In this case data path is free of exclusive locks at all.
123 static struct kmem_cache
*mrt_cachep __read_mostly
;
125 static struct mr_table
*ipmr_new_table(struct net
*net
, u32 id
);
126 static int ip_mr_forward(struct net
*net
, struct mr_table
*mrt
,
127 struct sk_buff
*skb
, struct mfc_cache
*cache
,
129 static int ipmr_cache_report(struct mr_table
*mrt
,
130 struct sk_buff
*pkt
, vifi_t vifi
, int assert);
131 static int __ipmr_fill_mroute(struct mr_table
*mrt
, struct sk_buff
*skb
,
132 struct mfc_cache
*c
, struct rtmsg
*rtm
);
133 static void ipmr_expire_process(unsigned long arg
);
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
139 static struct mr_table
*ipmr_get_table(struct net
*net
, u32 id
)
141 struct mr_table
*mrt
;
143 ipmr_for_each_table(mrt
, net
) {
150 static int ipmr_fib_lookup(struct net
*net
, struct flowi
*flp
,
151 struct mr_table
**mrt
)
153 struct ipmr_result res
;
154 struct fib_lookup_arg arg
= { .result
= &res
, };
157 err
= fib_rules_lookup(net
->ipv4
.mr_rules_ops
, flp
, 0, &arg
);
164 static int ipmr_rule_action(struct fib_rule
*rule
, struct flowi
*flp
,
165 int flags
, struct fib_lookup_arg
*arg
)
167 struct ipmr_result
*res
= arg
->result
;
168 struct mr_table
*mrt
;
170 switch (rule
->action
) {
173 case FR_ACT_UNREACHABLE
:
175 case FR_ACT_PROHIBIT
:
177 case FR_ACT_BLACKHOLE
:
182 mrt
= ipmr_get_table(rule
->fr_net
, rule
->table
);
189 static int ipmr_rule_match(struct fib_rule
*rule
, struct flowi
*fl
, int flags
)
194 static const struct nla_policy ipmr_rule_policy
[FRA_MAX
+ 1] = {
198 static int ipmr_rule_configure(struct fib_rule
*rule
, struct sk_buff
*skb
,
199 struct fib_rule_hdr
*frh
, struct nlattr
**tb
)
204 static int ipmr_rule_compare(struct fib_rule
*rule
, struct fib_rule_hdr
*frh
,
210 static int ipmr_rule_fill(struct fib_rule
*rule
, struct sk_buff
*skb
,
211 struct fib_rule_hdr
*frh
)
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template
= {
220 .family
= RTNL_FAMILY_IPMR
,
221 .rule_size
= sizeof(struct ipmr_rule
),
222 .addr_size
= sizeof(u32
),
223 .action
= ipmr_rule_action
,
224 .match
= ipmr_rule_match
,
225 .configure
= ipmr_rule_configure
,
226 .compare
= ipmr_rule_compare
,
227 .default_pref
= fib_default_rule_pref
,
228 .fill
= ipmr_rule_fill
,
229 .nlgroup
= RTNLGRP_IPV4_RULE
,
230 .policy
= ipmr_rule_policy
,
231 .owner
= THIS_MODULE
,
234 static int __net_init
ipmr_rules_init(struct net
*net
)
236 struct fib_rules_ops
*ops
;
237 struct mr_table
*mrt
;
240 ops
= fib_rules_register(&ipmr_rules_ops_template
, net
);
244 INIT_LIST_HEAD(&net
->ipv4
.mr_tables
);
246 mrt
= ipmr_new_table(net
, RT_TABLE_DEFAULT
);
252 err
= fib_default_rule_add(ops
, 0x7fff, RT_TABLE_DEFAULT
, 0);
256 net
->ipv4
.mr_rules_ops
= ops
;
262 fib_rules_unregister(ops
);
266 static void __net_exit
ipmr_rules_exit(struct net
*net
)
268 struct mr_table
*mrt
, *next
;
270 list_for_each_entry_safe(mrt
, next
, &net
->ipv4
.mr_tables
, list
) {
271 list_del(&mrt
->list
);
274 fib_rules_unregister(net
->ipv4
.mr_rules_ops
);
277 #define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
280 static struct mr_table
*ipmr_get_table(struct net
*net
, u32 id
)
282 return net
->ipv4
.mrt
;
285 static int ipmr_fib_lookup(struct net
*net
, struct flowi
*flp
,
286 struct mr_table
**mrt
)
288 *mrt
= net
->ipv4
.mrt
;
292 static int __net_init
ipmr_rules_init(struct net
*net
)
294 net
->ipv4
.mrt
= ipmr_new_table(net
, RT_TABLE_DEFAULT
);
295 return net
->ipv4
.mrt
? 0 : -ENOMEM
;
298 static void __net_exit
ipmr_rules_exit(struct net
*net
)
300 kfree(net
->ipv4
.mrt
);
304 static struct mr_table
*ipmr_new_table(struct net
*net
, u32 id
)
306 struct mr_table
*mrt
;
309 mrt
= ipmr_get_table(net
, id
);
313 mrt
= kzalloc(sizeof(*mrt
), GFP_KERNEL
);
316 write_pnet(&mrt
->net
, net
);
319 /* Forwarding cache */
320 for (i
= 0; i
< MFC_LINES
; i
++)
321 INIT_LIST_HEAD(&mrt
->mfc_cache_array
[i
]);
323 INIT_LIST_HEAD(&mrt
->mfc_unres_queue
);
325 setup_timer(&mrt
->ipmr_expire_timer
, ipmr_expire_process
,
328 #ifdef CONFIG_IP_PIMSM
329 mrt
->mroute_reg_vif_num
= -1;
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt
->list
, &net
->ipv4
.mr_tables
);
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
339 static void ipmr_del_tunnel(struct net_device
*dev
, struct vifctl
*v
)
341 struct net
*net
= dev_net(dev
);
345 dev
= __dev_get_by_name(net
, "tunl0");
347 const struct net_device_ops
*ops
= dev
->netdev_ops
;
349 struct ip_tunnel_parm p
;
351 memset(&p
, 0, sizeof(p
));
352 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
353 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
356 p
.iph
.protocol
= IPPROTO_IPIP
;
357 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
358 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
360 if (ops
->ndo_do_ioctl
) {
361 mm_segment_t oldfs
= get_fs();
364 ops
->ndo_do_ioctl(dev
, &ifr
, SIOCDELTUNNEL
);
371 struct net_device
*ipmr_new_tunnel(struct net
*net
, struct vifctl
*v
)
373 struct net_device
*dev
;
375 dev
= __dev_get_by_name(net
, "tunl0");
378 const struct net_device_ops
*ops
= dev
->netdev_ops
;
381 struct ip_tunnel_parm p
;
382 struct in_device
*in_dev
;
384 memset(&p
, 0, sizeof(p
));
385 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
386 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
389 p
.iph
.protocol
= IPPROTO_IPIP
;
390 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
391 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
393 if (ops
->ndo_do_ioctl
) {
394 mm_segment_t oldfs
= get_fs();
397 err
= ops
->ndo_do_ioctl(dev
, &ifr
, SIOCADDTUNNEL
);
405 (dev
= __dev_get_by_name(net
, p
.name
)) != NULL
) {
406 dev
->flags
|= IFF_MULTICAST
;
408 in_dev
= __in_dev_get_rtnl(dev
);
412 ipv4_devconf_setall(in_dev
);
413 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
423 /* allow the register to be completed before unregistering. */
427 unregister_netdevice(dev
);
431 #ifdef CONFIG_IP_PIMSM
433 static netdev_tx_t
reg_vif_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
435 struct net
*net
= dev_net(dev
);
436 struct mr_table
*mrt
;
444 err
= ipmr_fib_lookup(net
, &fl
, &mrt
);
450 read_lock(&mrt_lock
);
451 dev
->stats
.tx_bytes
+= skb
->len
;
452 dev
->stats
.tx_packets
++;
453 ipmr_cache_report(mrt
, skb
, mrt
->mroute_reg_vif_num
, IGMPMSG_WHOLEPKT
);
454 read_unlock(&mrt_lock
);
459 static const struct net_device_ops reg_vif_netdev_ops
= {
460 .ndo_start_xmit
= reg_vif_xmit
,
463 static void reg_vif_setup(struct net_device
*dev
)
465 dev
->type
= ARPHRD_PIMREG
;
466 dev
->mtu
= ETH_DATA_LEN
- sizeof(struct iphdr
) - 8;
467 dev
->flags
= IFF_NOARP
;
468 dev
->netdev_ops
= ®_vif_netdev_ops
,
469 dev
->destructor
= free_netdev
;
470 dev
->features
|= NETIF_F_NETNS_LOCAL
;
473 static struct net_device
*ipmr_reg_vif(struct net
*net
, struct mr_table
*mrt
)
475 struct net_device
*dev
;
476 struct in_device
*in_dev
;
479 if (mrt
->id
== RT_TABLE_DEFAULT
)
480 sprintf(name
, "pimreg");
482 sprintf(name
, "pimreg%u", mrt
->id
);
484 dev
= alloc_netdev(0, name
, reg_vif_setup
);
489 dev_net_set(dev
, net
);
491 if (register_netdevice(dev
)) {
498 if ((in_dev
= __in_dev_get_rcu(dev
)) == NULL
) {
503 ipv4_devconf_setall(in_dev
);
504 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
515 /* allow the register to be completed before unregistering. */
519 unregister_netdevice(dev
);
526 * @notify: Set to 1, if the caller is a notifier_call
529 static int vif_delete(struct mr_table
*mrt
, int vifi
, int notify
,
530 struct list_head
*head
)
532 struct vif_device
*v
;
533 struct net_device
*dev
;
534 struct in_device
*in_dev
;
536 if (vifi
< 0 || vifi
>= mrt
->maxvif
)
537 return -EADDRNOTAVAIL
;
539 v
= &mrt
->vif_table
[vifi
];
541 write_lock_bh(&mrt_lock
);
546 write_unlock_bh(&mrt_lock
);
547 return -EADDRNOTAVAIL
;
550 #ifdef CONFIG_IP_PIMSM
551 if (vifi
== mrt
->mroute_reg_vif_num
)
552 mrt
->mroute_reg_vif_num
= -1;
555 if (vifi
+1 == mrt
->maxvif
) {
557 for (tmp
=vifi
-1; tmp
>=0; tmp
--) {
558 if (VIF_EXISTS(mrt
, tmp
))
564 write_unlock_bh(&mrt_lock
);
566 dev_set_allmulti(dev
, -1);
568 if ((in_dev
= __in_dev_get_rtnl(dev
)) != NULL
) {
569 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)--;
570 ip_rt_multicast_event(in_dev
);
573 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
) && !notify
)
574 unregister_netdevice_queue(dev
, head
);
580 static inline void ipmr_cache_free(struct mfc_cache
*c
)
582 kmem_cache_free(mrt_cachep
, c
);
585 /* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers.
589 static void ipmr_destroy_unres(struct mr_table
*mrt
, struct mfc_cache
*c
)
591 struct net
*net
= read_pnet(&mrt
->net
);
595 atomic_dec(&mrt
->cache_resolve_queue_len
);
597 while ((skb
= skb_dequeue(&c
->mfc_un
.unres
.unresolved
))) {
598 if (ip_hdr(skb
)->version
== 0) {
599 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
600 nlh
->nlmsg_type
= NLMSG_ERROR
;
601 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
602 skb_trim(skb
, nlh
->nlmsg_len
);
604 e
->error
= -ETIMEDOUT
;
605 memset(&e
->msg
, 0, sizeof(e
->msg
));
607 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).pid
);
616 /* Timer process for the unresolved queue. */
618 static void ipmr_expire_process(unsigned long arg
)
620 struct mr_table
*mrt
= (struct mr_table
*)arg
;
622 unsigned long expires
;
623 struct mfc_cache
*c
, *next
;
625 if (!spin_trylock(&mfc_unres_lock
)) {
626 mod_timer(&mrt
->ipmr_expire_timer
, jiffies
+HZ
/10);
630 if (list_empty(&mrt
->mfc_unres_queue
))
636 list_for_each_entry_safe(c
, next
, &mrt
->mfc_unres_queue
, list
) {
637 if (time_after(c
->mfc_un
.unres
.expires
, now
)) {
638 unsigned long interval
= c
->mfc_un
.unres
.expires
- now
;
639 if (interval
< expires
)
645 ipmr_destroy_unres(mrt
, c
);
648 if (!list_empty(&mrt
->mfc_unres_queue
))
649 mod_timer(&mrt
->ipmr_expire_timer
, jiffies
+ expires
);
652 spin_unlock(&mfc_unres_lock
);
655 /* Fill oifs list. It is called under write locked mrt_lock. */
657 static void ipmr_update_thresholds(struct mr_table
*mrt
, struct mfc_cache
*cache
,
662 cache
->mfc_un
.res
.minvif
= MAXVIFS
;
663 cache
->mfc_un
.res
.maxvif
= 0;
664 memset(cache
->mfc_un
.res
.ttls
, 255, MAXVIFS
);
666 for (vifi
= 0; vifi
< mrt
->maxvif
; vifi
++) {
667 if (VIF_EXISTS(mrt
, vifi
) &&
668 ttls
[vifi
] && ttls
[vifi
] < 255) {
669 cache
->mfc_un
.res
.ttls
[vifi
] = ttls
[vifi
];
670 if (cache
->mfc_un
.res
.minvif
> vifi
)
671 cache
->mfc_un
.res
.minvif
= vifi
;
672 if (cache
->mfc_un
.res
.maxvif
<= vifi
)
673 cache
->mfc_un
.res
.maxvif
= vifi
+ 1;
678 static int vif_add(struct net
*net
, struct mr_table
*mrt
,
679 struct vifctl
*vifc
, int mrtsock
)
681 int vifi
= vifc
->vifc_vifi
;
682 struct vif_device
*v
= &mrt
->vif_table
[vifi
];
683 struct net_device
*dev
;
684 struct in_device
*in_dev
;
688 if (VIF_EXISTS(mrt
, vifi
))
691 switch (vifc
->vifc_flags
) {
692 #ifdef CONFIG_IP_PIMSM
695 * Special Purpose VIF in PIM
696 * All the packets will be sent to the daemon
698 if (mrt
->mroute_reg_vif_num
>= 0)
700 dev
= ipmr_reg_vif(net
, mrt
);
703 err
= dev_set_allmulti(dev
, 1);
705 unregister_netdevice(dev
);
712 dev
= ipmr_new_tunnel(net
, vifc
);
715 err
= dev_set_allmulti(dev
, 1);
717 ipmr_del_tunnel(dev
, vifc
);
723 case VIFF_USE_IFINDEX
:
725 if (vifc
->vifc_flags
== VIFF_USE_IFINDEX
) {
726 dev
= dev_get_by_index(net
, vifc
->vifc_lcl_ifindex
);
727 if (dev
&& dev
->ip_ptr
== NULL
) {
729 return -EADDRNOTAVAIL
;
732 dev
= ip_dev_find(net
, vifc
->vifc_lcl_addr
.s_addr
);
735 return -EADDRNOTAVAIL
;
736 err
= dev_set_allmulti(dev
, 1);
746 if ((in_dev
= __in_dev_get_rtnl(dev
)) == NULL
) {
748 return -EADDRNOTAVAIL
;
750 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)++;
751 ip_rt_multicast_event(in_dev
);
754 * Fill in the VIF structures
756 v
->rate_limit
= vifc
->vifc_rate_limit
;
757 v
->local
= vifc
->vifc_lcl_addr
.s_addr
;
758 v
->remote
= vifc
->vifc_rmt_addr
.s_addr
;
759 v
->flags
= vifc
->vifc_flags
;
761 v
->flags
|= VIFF_STATIC
;
762 v
->threshold
= vifc
->vifc_threshold
;
767 v
->link
= dev
->ifindex
;
768 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
))
769 v
->link
= dev
->iflink
;
771 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock
);
774 #ifdef CONFIG_IP_PIMSM
775 if (v
->flags
&VIFF_REGISTER
)
776 mrt
->mroute_reg_vif_num
= vifi
;
778 if (vifi
+1 > mrt
->maxvif
)
779 mrt
->maxvif
= vifi
+1;
780 write_unlock_bh(&mrt_lock
);
784 static struct mfc_cache
*ipmr_cache_find(struct mr_table
*mrt
,
788 int line
= MFC_HASH(mcastgrp
, origin
);
791 list_for_each_entry(c
, &mrt
->mfc_cache_array
[line
], list
) {
792 if (c
->mfc_origin
== origin
&& c
->mfc_mcastgrp
== mcastgrp
)
799 * Allocate a multicast cache entry
801 static struct mfc_cache
*ipmr_cache_alloc(void)
803 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_KERNEL
);
806 c
->mfc_un
.res
.minvif
= MAXVIFS
;
810 static struct mfc_cache
*ipmr_cache_alloc_unres(void)
812 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_ATOMIC
);
815 skb_queue_head_init(&c
->mfc_un
.unres
.unresolved
);
816 c
->mfc_un
.unres
.expires
= jiffies
+ 10*HZ
;
821 * A cache entry has gone into a resolved state from queued
824 static void ipmr_cache_resolve(struct net
*net
, struct mr_table
*mrt
,
825 struct mfc_cache
*uc
, struct mfc_cache
*c
)
831 * Play the pending entries through our router
834 while ((skb
= __skb_dequeue(&uc
->mfc_un
.unres
.unresolved
))) {
835 if (ip_hdr(skb
)->version
== 0) {
836 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
838 if (__ipmr_fill_mroute(mrt
, skb
, c
, NLMSG_DATA(nlh
)) > 0) {
839 nlh
->nlmsg_len
= (skb_tail_pointer(skb
) -
842 nlh
->nlmsg_type
= NLMSG_ERROR
;
843 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
844 skb_trim(skb
, nlh
->nlmsg_len
);
846 e
->error
= -EMSGSIZE
;
847 memset(&e
->msg
, 0, sizeof(e
->msg
));
850 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).pid
);
852 ip_mr_forward(net
, mrt
, skb
, c
, 0);
857 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858 * expects the following bizarre scheme.
860 * Called under mrt_lock.
863 static int ipmr_cache_report(struct mr_table
*mrt
,
864 struct sk_buff
*pkt
, vifi_t vifi
, int assert)
867 const int ihl
= ip_hdrlen(pkt
);
868 struct igmphdr
*igmp
;
872 #ifdef CONFIG_IP_PIMSM
873 if (assert == IGMPMSG_WHOLEPKT
)
874 skb
= skb_realloc_headroom(pkt
, sizeof(struct iphdr
));
877 skb
= alloc_skb(128, GFP_ATOMIC
);
882 #ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT
) {
884 /* Ugly, but we have no choice with this interface.
885 Duplicate old header, fix ihl, length etc.
886 And all this only to mangle msg->im_msgtype and
887 to set msg->im_mbz to "mbz" :-)
889 skb_push(skb
, sizeof(struct iphdr
));
890 skb_reset_network_header(skb
);
891 skb_reset_transport_header(skb
);
892 msg
= (struct igmpmsg
*)skb_network_header(skb
);
893 memcpy(msg
, skb_network_header(pkt
), sizeof(struct iphdr
));
894 msg
->im_msgtype
= IGMPMSG_WHOLEPKT
;
896 msg
->im_vif
= mrt
->mroute_reg_vif_num
;
897 ip_hdr(skb
)->ihl
= sizeof(struct iphdr
) >> 2;
898 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(pkt
)->tot_len
) +
899 sizeof(struct iphdr
));
908 skb
->network_header
= skb
->tail
;
910 skb_copy_to_linear_data(skb
, pkt
->data
, ihl
);
911 ip_hdr(skb
)->protocol
= 0; /* Flag to the kernel this is a route add */
912 msg
= (struct igmpmsg
*)skb_network_header(skb
);
914 skb_dst_set(skb
, dst_clone(skb_dst(pkt
)));
920 igmp
=(struct igmphdr
*)skb_put(skb
, sizeof(struct igmphdr
));
922 msg
->im_msgtype
= assert;
924 ip_hdr(skb
)->tot_len
= htons(skb
->len
); /* Fix the length */
925 skb
->transport_header
= skb
->network_header
;
928 if (mrt
->mroute_sk
== NULL
) {
936 ret
= sock_queue_rcv_skb(mrt
->mroute_sk
, skb
);
939 printk(KERN_WARNING
"mroute: pending queue full, dropping entries.\n");
947 * Queue a packet for resolution. It gets locked cache entry!
951 ipmr_cache_unresolved(struct mr_table
*mrt
, vifi_t vifi
, struct sk_buff
*skb
)
956 const struct iphdr
*iph
= ip_hdr(skb
);
958 spin_lock_bh(&mfc_unres_lock
);
959 list_for_each_entry(c
, &mrt
->mfc_unres_queue
, list
) {
960 if (c
->mfc_mcastgrp
== iph
->daddr
&&
961 c
->mfc_origin
== iph
->saddr
) {
969 * Create a new entry if allowable
972 if (atomic_read(&mrt
->cache_resolve_queue_len
) >= 10 ||
973 (c
= ipmr_cache_alloc_unres()) == NULL
) {
974 spin_unlock_bh(&mfc_unres_lock
);
981 * Fill in the new cache entry
984 c
->mfc_origin
= iph
->saddr
;
985 c
->mfc_mcastgrp
= iph
->daddr
;
988 * Reflect first query at mrouted.
990 err
= ipmr_cache_report(mrt
, skb
, vifi
, IGMPMSG_NOCACHE
);
992 /* If the report failed throw the cache entry
995 spin_unlock_bh(&mfc_unres_lock
);
1002 atomic_inc(&mrt
->cache_resolve_queue_len
);
1003 list_add(&c
->list
, &mrt
->mfc_unres_queue
);
1005 if (atomic_read(&mrt
->cache_resolve_queue_len
) == 1)
1006 mod_timer(&mrt
->ipmr_expire_timer
, c
->mfc_un
.unres
.expires
);
1010 * See if we can append the packet
1012 if (c
->mfc_un
.unres
.unresolved
.qlen
>3) {
1016 skb_queue_tail(&c
->mfc_un
.unres
.unresolved
, skb
);
1020 spin_unlock_bh(&mfc_unres_lock
);
1025 * MFC cache manipulation by user space mroute daemon
1028 static int ipmr_mfc_delete(struct mr_table
*mrt
, struct mfcctl
*mfc
)
1031 struct mfc_cache
*c
, *next
;
1033 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
1035 list_for_each_entry_safe(c
, next
, &mrt
->mfc_cache_array
[line
], list
) {
1036 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
1037 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
) {
1038 write_lock_bh(&mrt_lock
);
1040 write_unlock_bh(&mrt_lock
);
1049 static int ipmr_mfc_add(struct net
*net
, struct mr_table
*mrt
,
1050 struct mfcctl
*mfc
, int mrtsock
)
1054 struct mfc_cache
*uc
, *c
;
1056 if (mfc
->mfcc_parent
>= MAXVIFS
)
1059 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
1061 list_for_each_entry(c
, &mrt
->mfc_cache_array
[line
], list
) {
1062 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
1063 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
) {
1070 write_lock_bh(&mrt_lock
);
1071 c
->mfc_parent
= mfc
->mfcc_parent
;
1072 ipmr_update_thresholds(mrt
, c
, mfc
->mfcc_ttls
);
1074 c
->mfc_flags
|= MFC_STATIC
;
1075 write_unlock_bh(&mrt_lock
);
1079 if (!ipv4_is_multicast(mfc
->mfcc_mcastgrp
.s_addr
))
1082 c
= ipmr_cache_alloc();
1086 c
->mfc_origin
= mfc
->mfcc_origin
.s_addr
;
1087 c
->mfc_mcastgrp
= mfc
->mfcc_mcastgrp
.s_addr
;
1088 c
->mfc_parent
= mfc
->mfcc_parent
;
1089 ipmr_update_thresholds(mrt
, c
, mfc
->mfcc_ttls
);
1091 c
->mfc_flags
|= MFC_STATIC
;
1093 write_lock_bh(&mrt_lock
);
1094 list_add(&c
->list
, &mrt
->mfc_cache_array
[line
]);
1095 write_unlock_bh(&mrt_lock
);
1098 * Check to see if we resolved a queued list. If so we
1099 * need to send on the frames and tidy up.
1102 spin_lock_bh(&mfc_unres_lock
);
1103 list_for_each_entry(uc
, &mrt
->mfc_unres_queue
, list
) {
1104 if (uc
->mfc_origin
== c
->mfc_origin
&&
1105 uc
->mfc_mcastgrp
== c
->mfc_mcastgrp
) {
1106 list_del(&uc
->list
);
1107 atomic_dec(&mrt
->cache_resolve_queue_len
);
1112 if (list_empty(&mrt
->mfc_unres_queue
))
1113 del_timer(&mrt
->ipmr_expire_timer
);
1114 spin_unlock_bh(&mfc_unres_lock
);
1117 ipmr_cache_resolve(net
, mrt
, uc
, c
);
1118 ipmr_cache_free(uc
);
1124 * Close the multicast socket, and clear the vif tables etc
1127 static void mroute_clean_tables(struct mr_table
*mrt
)
1131 struct mfc_cache
*c
, *next
;
1134 * Shut down all active vif entries
1136 for (i
= 0; i
< mrt
->maxvif
; i
++) {
1137 if (!(mrt
->vif_table
[i
].flags
&VIFF_STATIC
))
1138 vif_delete(mrt
, i
, 0, &list
);
1140 unregister_netdevice_many(&list
);
1145 for (i
= 0; i
< MFC_LINES
; i
++) {
1146 list_for_each_entry_safe(c
, next
, &mrt
->mfc_cache_array
[i
], list
) {
1147 if (c
->mfc_flags
&MFC_STATIC
)
1149 write_lock_bh(&mrt_lock
);
1151 write_unlock_bh(&mrt_lock
);
1157 if (atomic_read(&mrt
->cache_resolve_queue_len
) != 0) {
1158 spin_lock_bh(&mfc_unres_lock
);
1159 list_for_each_entry_safe(c
, next
, &mrt
->mfc_unres_queue
, list
) {
1161 ipmr_destroy_unres(mrt
, c
);
1163 spin_unlock_bh(&mfc_unres_lock
);
1167 static void mrtsock_destruct(struct sock
*sk
)
1169 struct net
*net
= sock_net(sk
);
1170 struct mr_table
*mrt
;
1173 ipmr_for_each_table(mrt
, net
) {
1174 if (sk
== mrt
->mroute_sk
) {
1175 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)--;
1177 write_lock_bh(&mrt_lock
);
1178 mrt
->mroute_sk
= NULL
;
1179 write_unlock_bh(&mrt_lock
);
1181 mroute_clean_tables(mrt
);
1188 * Socket options and virtual interface manipulation. The whole
1189 * virtual interface system is a complete heap, but unfortunately
1190 * that's how BSD mrouted happens to think. Maybe one day with a proper
1191 * MOSPF/PIM router set up we can clean this up.
1194 int ip_mroute_setsockopt(struct sock
*sk
, int optname
, char __user
*optval
, unsigned int optlen
)
1199 struct net
*net
= sock_net(sk
);
1200 struct mr_table
*mrt
;
1202 mrt
= ipmr_get_table(net
, raw_sk(sk
)->ipmr_table
? : RT_TABLE_DEFAULT
);
1206 if (optname
!= MRT_INIT
) {
1207 if (sk
!= mrt
->mroute_sk
&& !capable(CAP_NET_ADMIN
))
1213 if (sk
->sk_type
!= SOCK_RAW
||
1214 inet_sk(sk
)->inet_num
!= IPPROTO_IGMP
)
1216 if (optlen
!= sizeof(int))
1217 return -ENOPROTOOPT
;
1220 if (mrt
->mroute_sk
) {
1225 ret
= ip_ra_control(sk
, 1, mrtsock_destruct
);
1227 write_lock_bh(&mrt_lock
);
1228 mrt
->mroute_sk
= sk
;
1229 write_unlock_bh(&mrt_lock
);
1231 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)++;
1236 if (sk
!= mrt
->mroute_sk
)
1238 return ip_ra_control(sk
, 0, NULL
);
1241 if (optlen
!= sizeof(vif
))
1243 if (copy_from_user(&vif
, optval
, sizeof(vif
)))
1245 if (vif
.vifc_vifi
>= MAXVIFS
)
1248 if (optname
== MRT_ADD_VIF
) {
1249 ret
= vif_add(net
, mrt
, &vif
, sk
== mrt
->mroute_sk
);
1251 ret
= vif_delete(mrt
, vif
.vifc_vifi
, 0, NULL
);
1257 * Manipulate the forwarding caches. These live
1258 * in a sort of kernel/user symbiosis.
1262 if (optlen
!= sizeof(mfc
))
1264 if (copy_from_user(&mfc
, optval
, sizeof(mfc
)))
1267 if (optname
== MRT_DEL_MFC
)
1268 ret
= ipmr_mfc_delete(mrt
, &mfc
);
1270 ret
= ipmr_mfc_add(net
, mrt
, &mfc
, sk
== mrt
->mroute_sk
);
1274 * Control PIM assert.
1279 if (get_user(v
,(int __user
*)optval
))
1281 mrt
->mroute_do_assert
= (v
) ? 1 : 0;
1284 #ifdef CONFIG_IP_PIMSM
1289 if (get_user(v
,(int __user
*)optval
))
1295 if (v
!= mrt
->mroute_do_pim
) {
1296 mrt
->mroute_do_pim
= v
;
1297 mrt
->mroute_do_assert
= v
;
1303 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1308 if (optlen
!= sizeof(u32
))
1310 if (get_user(v
, (u32 __user
*)optval
))
1312 if (sk
== mrt
->mroute_sk
)
1317 if (!ipmr_new_table(net
, v
))
1319 raw_sk(sk
)->ipmr_table
= v
;
1325 * Spurious command, or MRT_VERSION which you cannot
1329 return -ENOPROTOOPT
;
1334 * Getsock opt support for the multicast routing system.
1337 int ip_mroute_getsockopt(struct sock
*sk
, int optname
, char __user
*optval
, int __user
*optlen
)
1341 struct net
*net
= sock_net(sk
);
1342 struct mr_table
*mrt
;
1344 mrt
= ipmr_get_table(net
, raw_sk(sk
)->ipmr_table
? : RT_TABLE_DEFAULT
);
1348 if (optname
!= MRT_VERSION
&&
1349 #ifdef CONFIG_IP_PIMSM
1352 optname
!=MRT_ASSERT
)
1353 return -ENOPROTOOPT
;
1355 if (get_user(olr
, optlen
))
1358 olr
= min_t(unsigned int, olr
, sizeof(int));
1362 if (put_user(olr
, optlen
))
1364 if (optname
== MRT_VERSION
)
1366 #ifdef CONFIG_IP_PIMSM
1367 else if (optname
== MRT_PIM
)
1368 val
= mrt
->mroute_do_pim
;
1371 val
= mrt
->mroute_do_assert
;
1372 if (copy_to_user(optval
, &val
, olr
))
1378 * The IP multicast ioctl support routines.
1381 int ipmr_ioctl(struct sock
*sk
, int cmd
, void __user
*arg
)
1383 struct sioc_sg_req sr
;
1384 struct sioc_vif_req vr
;
1385 struct vif_device
*vif
;
1386 struct mfc_cache
*c
;
1387 struct net
*net
= sock_net(sk
);
1388 struct mr_table
*mrt
;
1390 mrt
= ipmr_get_table(net
, raw_sk(sk
)->ipmr_table
? : RT_TABLE_DEFAULT
);
1396 if (copy_from_user(&vr
, arg
, sizeof(vr
)))
1398 if (vr
.vifi
>= mrt
->maxvif
)
1400 read_lock(&mrt_lock
);
1401 vif
= &mrt
->vif_table
[vr
.vifi
];
1402 if (VIF_EXISTS(mrt
, vr
.vifi
)) {
1403 vr
.icount
= vif
->pkt_in
;
1404 vr
.ocount
= vif
->pkt_out
;
1405 vr
.ibytes
= vif
->bytes_in
;
1406 vr
.obytes
= vif
->bytes_out
;
1407 read_unlock(&mrt_lock
);
1409 if (copy_to_user(arg
, &vr
, sizeof(vr
)))
1413 read_unlock(&mrt_lock
);
1414 return -EADDRNOTAVAIL
;
1416 if (copy_from_user(&sr
, arg
, sizeof(sr
)))
1419 read_lock(&mrt_lock
);
1420 c
= ipmr_cache_find(mrt
, sr
.src
.s_addr
, sr
.grp
.s_addr
);
1422 sr
.pktcnt
= c
->mfc_un
.res
.pkt
;
1423 sr
.bytecnt
= c
->mfc_un
.res
.bytes
;
1424 sr
.wrong_if
= c
->mfc_un
.res
.wrong_if
;
1425 read_unlock(&mrt_lock
);
1427 if (copy_to_user(arg
, &sr
, sizeof(sr
)))
1431 read_unlock(&mrt_lock
);
1432 return -EADDRNOTAVAIL
;
1434 return -ENOIOCTLCMD
;
1439 static int ipmr_device_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
1441 struct net_device
*dev
= ptr
;
1442 struct net
*net
= dev_net(dev
);
1443 struct mr_table
*mrt
;
1444 struct vif_device
*v
;
1448 if (event
!= NETDEV_UNREGISTER
)
1451 ipmr_for_each_table(mrt
, net
) {
1452 v
= &mrt
->vif_table
[0];
1453 for (ct
= 0; ct
< mrt
->maxvif
; ct
++, v
++) {
1455 vif_delete(mrt
, ct
, 1, &list
);
1458 unregister_netdevice_many(&list
);
1463 static struct notifier_block ip_mr_notifier
= {
1464 .notifier_call
= ipmr_device_event
,
1468 * Encapsulate a packet by attaching a valid IPIP header to it.
1469 * This avoids tunnel drivers and other mess and gives us the speed so
1470 * important for multicast video.
1473 static void ip_encap(struct sk_buff
*skb
, __be32 saddr
, __be32 daddr
)
1476 struct iphdr
*old_iph
= ip_hdr(skb
);
1478 skb_push(skb
, sizeof(struct iphdr
));
1479 skb
->transport_header
= skb
->network_header
;
1480 skb_reset_network_header(skb
);
1484 iph
->tos
= old_iph
->tos
;
1485 iph
->ttl
= old_iph
->ttl
;
1489 iph
->protocol
= IPPROTO_IPIP
;
1491 iph
->tot_len
= htons(skb
->len
);
1492 ip_select_ident(iph
, skb_dst(skb
), NULL
);
1495 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
1499 static inline int ipmr_forward_finish(struct sk_buff
*skb
)
1501 struct ip_options
* opt
= &(IPCB(skb
)->opt
);
1503 IP_INC_STATS_BH(dev_net(skb_dst(skb
)->dev
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
1505 if (unlikely(opt
->optlen
))
1506 ip_forward_options(skb
);
1508 return dst_output(skb
);
1512 * Processing handlers for ipmr_forward
1515 static void ipmr_queue_xmit(struct net
*net
, struct mr_table
*mrt
,
1516 struct sk_buff
*skb
, struct mfc_cache
*c
, int vifi
)
1518 const struct iphdr
*iph
= ip_hdr(skb
);
1519 struct vif_device
*vif
= &mrt
->vif_table
[vifi
];
1520 struct net_device
*dev
;
1524 if (vif
->dev
== NULL
)
1527 #ifdef CONFIG_IP_PIMSM
1528 if (vif
->flags
& VIFF_REGISTER
) {
1530 vif
->bytes_out
+= skb
->len
;
1531 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1532 vif
->dev
->stats
.tx_packets
++;
1533 ipmr_cache_report(mrt
, skb
, vifi
, IGMPMSG_WHOLEPKT
);
1538 if (vif
->flags
&VIFF_TUNNEL
) {
1539 struct flowi fl
= { .oif
= vif
->link
,
1541 { .daddr
= vif
->remote
,
1542 .saddr
= vif
->local
,
1543 .tos
= RT_TOS(iph
->tos
) } },
1544 .proto
= IPPROTO_IPIP
};
1545 if (ip_route_output_key(net
, &rt
, &fl
))
1547 encap
= sizeof(struct iphdr
);
1549 struct flowi fl
= { .oif
= vif
->link
,
1551 { .daddr
= iph
->daddr
,
1552 .tos
= RT_TOS(iph
->tos
) } },
1553 .proto
= IPPROTO_IPIP
};
1554 if (ip_route_output_key(net
, &rt
, &fl
))
1558 dev
= rt
->u
.dst
.dev
;
1560 if (skb
->len
+encap
> dst_mtu(&rt
->u
.dst
) && (ntohs(iph
->frag_off
) & IP_DF
)) {
1561 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear
1566 IP_INC_STATS_BH(dev_net(dev
), IPSTATS_MIB_FRAGFAILS
);
1571 encap
+= LL_RESERVED_SPACE(dev
) + rt
->u
.dst
.header_len
;
1573 if (skb_cow(skb
, encap
)) {
1579 vif
->bytes_out
+= skb
->len
;
1582 skb_dst_set(skb
, &rt
->u
.dst
);
1583 ip_decrease_ttl(ip_hdr(skb
));
1585 /* FIXME: forward and output firewalls used to be called here.
1586 * What do we do with netfilter? -- RR */
1587 if (vif
->flags
& VIFF_TUNNEL
) {
1588 ip_encap(skb
, vif
->local
, vif
->remote
);
1589 /* FIXME: extra output firewall step used to be here. --RR */
1590 vif
->dev
->stats
.tx_packets
++;
1591 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1594 IPCB(skb
)->flags
|= IPSKB_FORWARDED
;
1597 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1598 * not only before forwarding, but after forwarding on all output
1599 * interfaces. It is clear, if mrouter runs a multicasting
1600 * program, it should receive packets not depending to what interface
1601 * program is joined.
1602 * If we will not make it, the program will have to join on all
1603 * interfaces. On the other hand, multihoming host (or router, but
1604 * not mrouter) cannot join to more than one interface - it will
1605 * result in receiving multiple packets.
1607 NF_HOOK(NFPROTO_IPV4
, NF_INET_FORWARD
, skb
, skb
->dev
, dev
,
1608 ipmr_forward_finish
);
1615 static int ipmr_find_vif(struct mr_table
*mrt
, struct net_device
*dev
)
1619 for (ct
= mrt
->maxvif
-1; ct
>= 0; ct
--) {
1620 if (mrt
->vif_table
[ct
].dev
== dev
)
1626 /* "local" means that we should preserve one skb (for local delivery) */
1628 static int ip_mr_forward(struct net
*net
, struct mr_table
*mrt
,
1629 struct sk_buff
*skb
, struct mfc_cache
*cache
,
1635 vif
= cache
->mfc_parent
;
1636 cache
->mfc_un
.res
.pkt
++;
1637 cache
->mfc_un
.res
.bytes
+= skb
->len
;
1640 * Wrong interface: drop packet and (maybe) send PIM assert.
1642 if (mrt
->vif_table
[vif
].dev
!= skb
->dev
) {
1645 if (skb_rtable(skb
)->fl
.iif
== 0) {
1646 /* It is our own packet, looped back.
1647 Very complicated situation...
1649 The best workaround until routing daemons will be
1650 fixed is not to redistribute packet, if it was
1651 send through wrong interface. It means, that
1652 multicast applications WILL NOT work for
1653 (S,G), which have default multicast route pointing
1654 to wrong oif. In any case, it is not a good
1655 idea to use multicasting applications on router.
1660 cache
->mfc_un
.res
.wrong_if
++;
1661 true_vifi
= ipmr_find_vif(mrt
, skb
->dev
);
1663 if (true_vifi
>= 0 && mrt
->mroute_do_assert
&&
1664 /* pimsm uses asserts, when switching from RPT to SPT,
1665 so that we cannot check that packet arrived on an oif.
1666 It is bad, but otherwise we would need to move pretty
1667 large chunk of pimd to kernel. Ough... --ANK
1669 (mrt
->mroute_do_pim
||
1670 cache
->mfc_un
.res
.ttls
[true_vifi
] < 255) &&
1672 cache
->mfc_un
.res
.last_assert
+ MFC_ASSERT_THRESH
)) {
1673 cache
->mfc_un
.res
.last_assert
= jiffies
;
1674 ipmr_cache_report(mrt
, skb
, true_vifi
, IGMPMSG_WRONGVIF
);
1679 mrt
->vif_table
[vif
].pkt_in
++;
1680 mrt
->vif_table
[vif
].bytes_in
+= skb
->len
;
1685 for (ct
= cache
->mfc_un
.res
.maxvif
-1; ct
>= cache
->mfc_un
.res
.minvif
; ct
--) {
1686 if (ip_hdr(skb
)->ttl
> cache
->mfc_un
.res
.ttls
[ct
]) {
1688 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1690 ipmr_queue_xmit(net
, mrt
, skb2
, cache
,
1698 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1700 ipmr_queue_xmit(net
, mrt
, skb2
, cache
, psend
);
1702 ipmr_queue_xmit(net
, mrt
, skb
, cache
, psend
);
1715 * Multicast packets for forwarding arrive here
1718 int ip_mr_input(struct sk_buff
*skb
)
1720 struct mfc_cache
*cache
;
1721 struct net
*net
= dev_net(skb
->dev
);
1722 int local
= skb_rtable(skb
)->rt_flags
& RTCF_LOCAL
;
1723 struct mr_table
*mrt
;
1726 /* Packet is looped back after forward, it should not be
1727 forwarded second time, but still can be delivered locally.
1729 if (IPCB(skb
)->flags
&IPSKB_FORWARDED
)
1732 err
= ipmr_fib_lookup(net
, &skb_rtable(skb
)->fl
, &mrt
);
1739 if (IPCB(skb
)->opt
.router_alert
) {
1740 if (ip_call_ra_chain(skb
))
1742 } else if (ip_hdr(skb
)->protocol
== IPPROTO_IGMP
){
1743 /* IGMPv1 (and broken IGMPv2 implementations sort of
1744 Cisco IOS <= 11.2(8)) do not put router alert
1745 option to IGMP packets destined to routable
1746 groups. It is very bad, because it means
1747 that we can forward NO IGMP messages.
1749 read_lock(&mrt_lock
);
1750 if (mrt
->mroute_sk
) {
1752 raw_rcv(mrt
->mroute_sk
, skb
);
1753 read_unlock(&mrt_lock
);
1756 read_unlock(&mrt_lock
);
1760 read_lock(&mrt_lock
);
1761 cache
= ipmr_cache_find(mrt
, ip_hdr(skb
)->saddr
, ip_hdr(skb
)->daddr
);
1764 * No usable cache entry
1766 if (cache
== NULL
) {
1770 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1771 ip_local_deliver(skb
);
1773 read_unlock(&mrt_lock
);
1779 vif
= ipmr_find_vif(mrt
, skb
->dev
);
1781 int err2
= ipmr_cache_unresolved(mrt
, vif
, skb
);
1782 read_unlock(&mrt_lock
);
1786 read_unlock(&mrt_lock
);
1791 ip_mr_forward(net
, mrt
, skb
, cache
, local
);
1793 read_unlock(&mrt_lock
);
1796 return ip_local_deliver(skb
);
1802 return ip_local_deliver(skb
);
1807 #ifdef CONFIG_IP_PIMSM
1808 static int __pim_rcv(struct mr_table
*mrt
, struct sk_buff
*skb
,
1809 unsigned int pimlen
)
1811 struct net_device
*reg_dev
= NULL
;
1812 struct iphdr
*encap
;
1814 encap
= (struct iphdr
*)(skb_transport_header(skb
) + pimlen
);
1817 a. packet is really destinted to a multicast group
1818 b. packet is not a NULL-REGISTER
1819 c. packet is not truncated
1821 if (!ipv4_is_multicast(encap
->daddr
) ||
1822 encap
->tot_len
== 0 ||
1823 ntohs(encap
->tot_len
) + pimlen
> skb
->len
)
1826 read_lock(&mrt_lock
);
1827 if (mrt
->mroute_reg_vif_num
>= 0)
1828 reg_dev
= mrt
->vif_table
[mrt
->mroute_reg_vif_num
].dev
;
1831 read_unlock(&mrt_lock
);
1833 if (reg_dev
== NULL
)
1836 skb
->mac_header
= skb
->network_header
;
1837 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1838 skb_reset_network_header(skb
);
1839 skb
->protocol
= htons(ETH_P_IP
);
1841 skb
->pkt_type
= PACKET_HOST
;
1843 skb_tunnel_rx(skb
, reg_dev
);
1852 #ifdef CONFIG_IP_PIMSM_V1
1854 * Handle IGMP messages of PIMv1
1857 int pim_rcv_v1(struct sk_buff
* skb
)
1859 struct igmphdr
*pim
;
1860 struct net
*net
= dev_net(skb
->dev
);
1861 struct mr_table
*mrt
;
1863 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1866 pim
= igmp_hdr(skb
);
1868 if (ipmr_fib_lookup(net
, &skb_rtable(skb
)->fl
, &mrt
) < 0)
1871 if (!mrt
->mroute_do_pim
||
1872 pim
->group
!= PIM_V1_VERSION
|| pim
->code
!= PIM_V1_REGISTER
)
1875 if (__pim_rcv(mrt
, skb
, sizeof(*pim
))) {
1883 #ifdef CONFIG_IP_PIMSM_V2
1884 static int pim_rcv(struct sk_buff
* skb
)
1886 struct pimreghdr
*pim
;
1887 struct net
*net
= dev_net(skb
->dev
);
1888 struct mr_table
*mrt
;
1890 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1893 pim
= (struct pimreghdr
*)skb_transport_header(skb
);
1894 if (pim
->type
!= ((PIM_VERSION
<<4)|(PIM_REGISTER
)) ||
1895 (pim
->flags
&PIM_NULL_REGISTER
) ||
1896 (ip_compute_csum((void *)pim
, sizeof(*pim
)) != 0 &&
1897 csum_fold(skb_checksum(skb
, 0, skb
->len
, 0))))
1900 if (ipmr_fib_lookup(net
, &skb_rtable(skb
)->fl
, &mrt
) < 0)
1903 if (__pim_rcv(mrt
, skb
, sizeof(*pim
))) {
1911 static int __ipmr_fill_mroute(struct mr_table
*mrt
, struct sk_buff
*skb
,
1912 struct mfc_cache
*c
, struct rtmsg
*rtm
)
1915 struct rtnexthop
*nhp
;
1916 u8
*b
= skb_tail_pointer(skb
);
1917 struct rtattr
*mp_head
;
1919 /* If cache is unresolved, don't try to parse IIF and OIF */
1920 if (c
->mfc_parent
>= MAXVIFS
)
1923 if (VIF_EXISTS(mrt
, c
->mfc_parent
))
1924 RTA_PUT(skb
, RTA_IIF
, 4, &mrt
->vif_table
[c
->mfc_parent
].dev
->ifindex
);
1926 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_LENGTH(0));
1928 for (ct
= c
->mfc_un
.res
.minvif
; ct
< c
->mfc_un
.res
.maxvif
; ct
++) {
1929 if (VIF_EXISTS(mrt
, ct
) && c
->mfc_un
.res
.ttls
[ct
] < 255) {
1930 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
1931 goto rtattr_failure
;
1932 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
1933 nhp
->rtnh_flags
= 0;
1934 nhp
->rtnh_hops
= c
->mfc_un
.res
.ttls
[ct
];
1935 nhp
->rtnh_ifindex
= mrt
->vif_table
[ct
].dev
->ifindex
;
1936 nhp
->rtnh_len
= sizeof(*nhp
);
1939 mp_head
->rta_type
= RTA_MULTIPATH
;
1940 mp_head
->rta_len
= skb_tail_pointer(skb
) - (u8
*)mp_head
;
1941 rtm
->rtm_type
= RTN_MULTICAST
;
1949 int ipmr_get_route(struct net
*net
,
1950 struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
1953 struct mr_table
*mrt
;
1954 struct mfc_cache
*cache
;
1955 struct rtable
*rt
= skb_rtable(skb
);
1957 mrt
= ipmr_get_table(net
, RT_TABLE_DEFAULT
);
1961 read_lock(&mrt_lock
);
1962 cache
= ipmr_cache_find(mrt
, rt
->rt_src
, rt
->rt_dst
);
1964 if (cache
== NULL
) {
1965 struct sk_buff
*skb2
;
1967 struct net_device
*dev
;
1971 read_unlock(&mrt_lock
);
1976 if (dev
== NULL
|| (vif
= ipmr_find_vif(mrt
, dev
)) < 0) {
1977 read_unlock(&mrt_lock
);
1980 skb2
= skb_clone(skb
, GFP_ATOMIC
);
1982 read_unlock(&mrt_lock
);
1986 skb_push(skb2
, sizeof(struct iphdr
));
1987 skb_reset_network_header(skb2
);
1989 iph
->ihl
= sizeof(struct iphdr
) >> 2;
1990 iph
->saddr
= rt
->rt_src
;
1991 iph
->daddr
= rt
->rt_dst
;
1993 err
= ipmr_cache_unresolved(mrt
, vif
, skb2
);
1994 read_unlock(&mrt_lock
);
1998 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
1999 cache
->mfc_flags
|= MFC_NOTIFY
;
2000 err
= __ipmr_fill_mroute(mrt
, skb
, cache
, rtm
);
2001 read_unlock(&mrt_lock
);
2005 static int ipmr_fill_mroute(struct mr_table
*mrt
, struct sk_buff
*skb
,
2006 u32 pid
, u32 seq
, struct mfc_cache
*c
)
2008 struct nlmsghdr
*nlh
;
2011 nlh
= nlmsg_put(skb
, pid
, seq
, RTM_NEWROUTE
, sizeof(*rtm
), NLM_F_MULTI
);
2015 rtm
= nlmsg_data(nlh
);
2016 rtm
->rtm_family
= RTNL_FAMILY_IPMR
;
2017 rtm
->rtm_dst_len
= 32;
2018 rtm
->rtm_src_len
= 32;
2020 rtm
->rtm_table
= mrt
->id
;
2021 NLA_PUT_U32(skb
, RTA_TABLE
, mrt
->id
);
2022 rtm
->rtm_type
= RTN_MULTICAST
;
2023 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2024 rtm
->rtm_protocol
= RTPROT_UNSPEC
;
2027 NLA_PUT_BE32(skb
, RTA_SRC
, c
->mfc_origin
);
2028 NLA_PUT_BE32(skb
, RTA_DST
, c
->mfc_mcastgrp
);
2030 if (__ipmr_fill_mroute(mrt
, skb
, c
, rtm
) < 0)
2031 goto nla_put_failure
;
2033 return nlmsg_end(skb
, nlh
);
2036 nlmsg_cancel(skb
, nlh
);
2040 static int ipmr_rtm_dumproute(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2042 struct net
*net
= sock_net(skb
->sk
);
2043 struct mr_table
*mrt
;
2044 struct mfc_cache
*mfc
;
2045 unsigned int t
= 0, s_t
;
2046 unsigned int h
= 0, s_h
;
2047 unsigned int e
= 0, s_e
;
2053 read_lock(&mrt_lock
);
2054 ipmr_for_each_table(mrt
, net
) {
2059 for (h
= s_h
; h
< MFC_LINES
; h
++) {
2060 list_for_each_entry(mfc
, &mrt
->mfc_cache_array
[h
], list
) {
2063 if (ipmr_fill_mroute(mrt
, skb
,
2064 NETLINK_CB(cb
->skb
).pid
,
2078 read_unlock(&mrt_lock
);
2087 #ifdef CONFIG_PROC_FS
2089 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2091 struct ipmr_vif_iter
{
2092 struct seq_net_private p
;
2093 struct mr_table
*mrt
;
2097 static struct vif_device
*ipmr_vif_seq_idx(struct net
*net
,
2098 struct ipmr_vif_iter
*iter
,
2101 struct mr_table
*mrt
= iter
->mrt
;
2103 for (iter
->ct
= 0; iter
->ct
< mrt
->maxvif
; ++iter
->ct
) {
2104 if (!VIF_EXISTS(mrt
, iter
->ct
))
2107 return &mrt
->vif_table
[iter
->ct
];
2112 static void *ipmr_vif_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2113 __acquires(mrt_lock
)
2115 struct ipmr_vif_iter
*iter
= seq
->private;
2116 struct net
*net
= seq_file_net(seq
);
2117 struct mr_table
*mrt
;
2119 mrt
= ipmr_get_table(net
, RT_TABLE_DEFAULT
);
2121 return ERR_PTR(-ENOENT
);
2125 read_lock(&mrt_lock
);
2126 return *pos
? ipmr_vif_seq_idx(net
, seq
->private, *pos
- 1)
2130 static void *ipmr_vif_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2132 struct ipmr_vif_iter
*iter
= seq
->private;
2133 struct net
*net
= seq_file_net(seq
);
2134 struct mr_table
*mrt
= iter
->mrt
;
2137 if (v
== SEQ_START_TOKEN
)
2138 return ipmr_vif_seq_idx(net
, iter
, 0);
2140 while (++iter
->ct
< mrt
->maxvif
) {
2141 if (!VIF_EXISTS(mrt
, iter
->ct
))
2143 return &mrt
->vif_table
[iter
->ct
];
2148 static void ipmr_vif_seq_stop(struct seq_file
*seq
, void *v
)
2149 __releases(mrt_lock
)
2151 read_unlock(&mrt_lock
);
2154 static int ipmr_vif_seq_show(struct seq_file
*seq
, void *v
)
2156 struct ipmr_vif_iter
*iter
= seq
->private;
2157 struct mr_table
*mrt
= iter
->mrt
;
2159 if (v
== SEQ_START_TOKEN
) {
2161 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2163 const struct vif_device
*vif
= v
;
2164 const char *name
= vif
->dev
? vif
->dev
->name
: "none";
2167 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2168 vif
- mrt
->vif_table
,
2169 name
, vif
->bytes_in
, vif
->pkt_in
,
2170 vif
->bytes_out
, vif
->pkt_out
,
2171 vif
->flags
, vif
->local
, vif
->remote
);
2176 static const struct seq_operations ipmr_vif_seq_ops
= {
2177 .start
= ipmr_vif_seq_start
,
2178 .next
= ipmr_vif_seq_next
,
2179 .stop
= ipmr_vif_seq_stop
,
2180 .show
= ipmr_vif_seq_show
,
2183 static int ipmr_vif_open(struct inode
*inode
, struct file
*file
)
2185 return seq_open_net(inode
, file
, &ipmr_vif_seq_ops
,
2186 sizeof(struct ipmr_vif_iter
));
2189 static const struct file_operations ipmr_vif_fops
= {
2190 .owner
= THIS_MODULE
,
2191 .open
= ipmr_vif_open
,
2193 .llseek
= seq_lseek
,
2194 .release
= seq_release_net
,
2197 struct ipmr_mfc_iter
{
2198 struct seq_net_private p
;
2199 struct mr_table
*mrt
;
2200 struct list_head
*cache
;
2205 static struct mfc_cache
*ipmr_mfc_seq_idx(struct net
*net
,
2206 struct ipmr_mfc_iter
*it
, loff_t pos
)
2208 struct mr_table
*mrt
= it
->mrt
;
2209 struct mfc_cache
*mfc
;
2211 read_lock(&mrt_lock
);
2212 for (it
->ct
= 0; it
->ct
< MFC_LINES
; it
->ct
++) {
2213 it
->cache
= &mrt
->mfc_cache_array
[it
->ct
];
2214 list_for_each_entry(mfc
, it
->cache
, list
)
2218 read_unlock(&mrt_lock
);
2220 spin_lock_bh(&mfc_unres_lock
);
2221 it
->cache
= &mrt
->mfc_unres_queue
;
2222 list_for_each_entry(mfc
, it
->cache
, list
)
2225 spin_unlock_bh(&mfc_unres_lock
);
2232 static void *ipmr_mfc_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2234 struct ipmr_mfc_iter
*it
= seq
->private;
2235 struct net
*net
= seq_file_net(seq
);
2236 struct mr_table
*mrt
;
2238 mrt
= ipmr_get_table(net
, RT_TABLE_DEFAULT
);
2240 return ERR_PTR(-ENOENT
);
2245 return *pos
? ipmr_mfc_seq_idx(net
, seq
->private, *pos
- 1)
2249 static void *ipmr_mfc_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2251 struct mfc_cache
*mfc
= v
;
2252 struct ipmr_mfc_iter
*it
= seq
->private;
2253 struct net
*net
= seq_file_net(seq
);
2254 struct mr_table
*mrt
= it
->mrt
;
2258 if (v
== SEQ_START_TOKEN
)
2259 return ipmr_mfc_seq_idx(net
, seq
->private, 0);
2261 if (mfc
->list
.next
!= it
->cache
)
2262 return list_entry(mfc
->list
.next
, struct mfc_cache
, list
);
2264 if (it
->cache
== &mrt
->mfc_unres_queue
)
2267 BUG_ON(it
->cache
!= &mrt
->mfc_cache_array
[it
->ct
]);
2269 while (++it
->ct
< MFC_LINES
) {
2270 it
->cache
= &mrt
->mfc_cache_array
[it
->ct
];
2271 if (list_empty(it
->cache
))
2273 return list_first_entry(it
->cache
, struct mfc_cache
, list
);
2276 /* exhausted cache_array, show unresolved */
2277 read_unlock(&mrt_lock
);
2278 it
->cache
= &mrt
->mfc_unres_queue
;
2281 spin_lock_bh(&mfc_unres_lock
);
2282 if (!list_empty(it
->cache
))
2283 return list_first_entry(it
->cache
, struct mfc_cache
, list
);
2286 spin_unlock_bh(&mfc_unres_lock
);
2292 static void ipmr_mfc_seq_stop(struct seq_file
*seq
, void *v
)
2294 struct ipmr_mfc_iter
*it
= seq
->private;
2295 struct mr_table
*mrt
= it
->mrt
;
2297 if (it
->cache
== &mrt
->mfc_unres_queue
)
2298 spin_unlock_bh(&mfc_unres_lock
);
2299 else if (it
->cache
== &mrt
->mfc_cache_array
[it
->ct
])
2300 read_unlock(&mrt_lock
);
2303 static int ipmr_mfc_seq_show(struct seq_file
*seq
, void *v
)
2307 if (v
== SEQ_START_TOKEN
) {
2309 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2311 const struct mfc_cache
*mfc
= v
;
2312 const struct ipmr_mfc_iter
*it
= seq
->private;
2313 const struct mr_table
*mrt
= it
->mrt
;
2315 seq_printf(seq
, "%08X %08X %-3hd",
2316 (__force u32
) mfc
->mfc_mcastgrp
,
2317 (__force u32
) mfc
->mfc_origin
,
2320 if (it
->cache
!= &mrt
->mfc_unres_queue
) {
2321 seq_printf(seq
, " %8lu %8lu %8lu",
2322 mfc
->mfc_un
.res
.pkt
,
2323 mfc
->mfc_un
.res
.bytes
,
2324 mfc
->mfc_un
.res
.wrong_if
);
2325 for (n
= mfc
->mfc_un
.res
.minvif
;
2326 n
< mfc
->mfc_un
.res
.maxvif
; n
++ ) {
2327 if (VIF_EXISTS(mrt
, n
) &&
2328 mfc
->mfc_un
.res
.ttls
[n
] < 255)
2331 n
, mfc
->mfc_un
.res
.ttls
[n
]);
2334 /* unresolved mfc_caches don't contain
2335 * pkt, bytes and wrong_if values
2337 seq_printf(seq
, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2339 seq_putc(seq
, '\n');
2344 static const struct seq_operations ipmr_mfc_seq_ops
= {
2345 .start
= ipmr_mfc_seq_start
,
2346 .next
= ipmr_mfc_seq_next
,
2347 .stop
= ipmr_mfc_seq_stop
,
2348 .show
= ipmr_mfc_seq_show
,
2351 static int ipmr_mfc_open(struct inode
*inode
, struct file
*file
)
2353 return seq_open_net(inode
, file
, &ipmr_mfc_seq_ops
,
2354 sizeof(struct ipmr_mfc_iter
));
2357 static const struct file_operations ipmr_mfc_fops
= {
2358 .owner
= THIS_MODULE
,
2359 .open
= ipmr_mfc_open
,
2361 .llseek
= seq_lseek
,
2362 .release
= seq_release_net
,
2366 #ifdef CONFIG_IP_PIMSM_V2
2367 static const struct net_protocol pim_protocol
= {
2375 * Setup for IP multicast routing
2377 static int __net_init
ipmr_net_init(struct net
*net
)
2381 err
= ipmr_rules_init(net
);
2385 #ifdef CONFIG_PROC_FS
2387 if (!proc_net_fops_create(net
, "ip_mr_vif", 0, &ipmr_vif_fops
))
2389 if (!proc_net_fops_create(net
, "ip_mr_cache", 0, &ipmr_mfc_fops
))
2390 goto proc_cache_fail
;
2394 #ifdef CONFIG_PROC_FS
2396 proc_net_remove(net
, "ip_mr_vif");
2398 ipmr_rules_exit(net
);
2404 static void __net_exit
ipmr_net_exit(struct net
*net
)
2406 #ifdef CONFIG_PROC_FS
2407 proc_net_remove(net
, "ip_mr_cache");
2408 proc_net_remove(net
, "ip_mr_vif");
2410 ipmr_rules_exit(net
);
2413 static struct pernet_operations ipmr_net_ops
= {
2414 .init
= ipmr_net_init
,
2415 .exit
= ipmr_net_exit
,
2418 int __init
ip_mr_init(void)
2422 mrt_cachep
= kmem_cache_create("ip_mrt_cache",
2423 sizeof(struct mfc_cache
),
2424 0, SLAB_HWCACHE_ALIGN
|SLAB_PANIC
,
2429 err
= register_pernet_subsys(&ipmr_net_ops
);
2431 goto reg_pernet_fail
;
2433 err
= register_netdevice_notifier(&ip_mr_notifier
);
2435 goto reg_notif_fail
;
2436 #ifdef CONFIG_IP_PIMSM_V2
2437 if (inet_add_protocol(&pim_protocol
, IPPROTO_PIM
) < 0) {
2438 printk(KERN_ERR
"ip_mr_init: can't add PIM protocol\n");
2440 goto add_proto_fail
;
2443 rtnl_register(RTNL_FAMILY_IPMR
, RTM_GETROUTE
, NULL
, ipmr_rtm_dumproute
);
2446 #ifdef CONFIG_IP_PIMSM_V2
2448 unregister_netdevice_notifier(&ip_mr_notifier
);
2451 unregister_pernet_subsys(&ipmr_net_ops
);
2453 kmem_cache_destroy(mrt_cachep
);