2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
73 struct list_head list
;
78 struct sock
*mroute_sk
;
79 struct timer_list ipmr_expire_timer
;
80 struct list_head mfc_unres_queue
;
81 struct list_head mfc_cache_array
[MFC_LINES
];
82 struct vif_device vif_table
[MAXVIFS
];
84 atomic_t cache_resolve_queue_len
;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num
;
93 struct fib_rule common
;
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
104 static DEFINE_RWLOCK(mrt_lock
);
107 * Multicast router control variables
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock
);
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
120 In this case data path is free of exclusive locks at all.
123 static struct kmem_cache
*mrt_cachep __read_mostly
;
125 static struct mr_table
*ipmr_new_table(struct net
*net
, u32 id
);
126 static int ip_mr_forward(struct net
*net
, struct mr_table
*mrt
,
127 struct sk_buff
*skb
, struct mfc_cache
*cache
,
129 static int ipmr_cache_report(struct mr_table
*mrt
,
130 struct sk_buff
*pkt
, vifi_t vifi
, int assert);
131 static int __ipmr_fill_mroute(struct mr_table
*mrt
, struct sk_buff
*skb
,
132 struct mfc_cache
*c
, struct rtmsg
*rtm
);
133 static void ipmr_expire_process(unsigned long arg
);
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
139 static struct mr_table
*ipmr_get_table(struct net
*net
, u32 id
)
141 struct mr_table
*mrt
;
143 ipmr_for_each_table(mrt
, net
) {
150 static int ipmr_fib_lookup(struct net
*net
, struct flowi
*flp
,
151 struct mr_table
**mrt
)
153 struct ipmr_result res
;
154 struct fib_lookup_arg arg
= { .result
= &res
, };
157 err
= fib_rules_lookup(net
->ipv4
.mr_rules_ops
, flp
, 0, &arg
);
164 static int ipmr_rule_action(struct fib_rule
*rule
, struct flowi
*flp
,
165 int flags
, struct fib_lookup_arg
*arg
)
167 struct ipmr_result
*res
= arg
->result
;
168 struct mr_table
*mrt
;
170 switch (rule
->action
) {
173 case FR_ACT_UNREACHABLE
:
175 case FR_ACT_PROHIBIT
:
177 case FR_ACT_BLACKHOLE
:
182 mrt
= ipmr_get_table(rule
->fr_net
, rule
->table
);
189 static int ipmr_rule_match(struct fib_rule
*rule
, struct flowi
*fl
, int flags
)
194 static const struct nla_policy ipmr_rule_policy
[FRA_MAX
+ 1] = {
198 static int ipmr_rule_configure(struct fib_rule
*rule
, struct sk_buff
*skb
,
199 struct fib_rule_hdr
*frh
, struct nlattr
**tb
)
204 static int ipmr_rule_compare(struct fib_rule
*rule
, struct fib_rule_hdr
*frh
,
210 static int ipmr_rule_fill(struct fib_rule
*rule
, struct sk_buff
*skb
,
211 struct fib_rule_hdr
*frh
)
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template
= {
220 .family
= RTNL_FAMILY_IPMR
,
221 .rule_size
= sizeof(struct ipmr_rule
),
222 .addr_size
= sizeof(u32
),
223 .action
= ipmr_rule_action
,
224 .match
= ipmr_rule_match
,
225 .configure
= ipmr_rule_configure
,
226 .compare
= ipmr_rule_compare
,
227 .default_pref
= fib_default_rule_pref
,
228 .fill
= ipmr_rule_fill
,
229 .nlgroup
= RTNLGRP_IPV4_RULE
,
230 .policy
= ipmr_rule_policy
,
231 .owner
= THIS_MODULE
,
234 static int __net_init
ipmr_rules_init(struct net
*net
)
236 struct fib_rules_ops
*ops
;
237 struct mr_table
*mrt
;
240 ops
= fib_rules_register(&ipmr_rules_ops_template
, net
);
244 INIT_LIST_HEAD(&net
->ipv4
.mr_tables
);
246 mrt
= ipmr_new_table(net
, RT_TABLE_DEFAULT
);
252 err
= fib_default_rule_add(ops
, 0x7fff, RT_TABLE_DEFAULT
, 0);
256 net
->ipv4
.mr_rules_ops
= ops
;
262 fib_rules_unregister(ops
);
266 static void __net_exit
ipmr_rules_exit(struct net
*net
)
268 struct mr_table
*mrt
, *next
;
270 list_for_each_entry_safe(mrt
, next
, &net
->ipv4
.mr_tables
, list
) {
271 list_del(&mrt
->list
);
274 fib_rules_unregister(net
->ipv4
.mr_rules_ops
);
277 #define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
280 static struct mr_table
*ipmr_get_table(struct net
*net
, u32 id
)
282 return net
->ipv4
.mrt
;
285 static int ipmr_fib_lookup(struct net
*net
, struct flowi
*flp
,
286 struct mr_table
**mrt
)
288 *mrt
= net
->ipv4
.mrt
;
292 static int __net_init
ipmr_rules_init(struct net
*net
)
294 net
->ipv4
.mrt
= ipmr_new_table(net
, RT_TABLE_DEFAULT
);
295 return net
->ipv4
.mrt
? 0 : -ENOMEM
;
298 static void __net_exit
ipmr_rules_exit(struct net
*net
)
300 kfree(net
->ipv4
.mrt
);
304 static struct mr_table
*ipmr_new_table(struct net
*net
, u32 id
)
306 struct mr_table
*mrt
;
309 mrt
= ipmr_get_table(net
, id
);
313 mrt
= kzalloc(sizeof(*mrt
), GFP_KERNEL
);
316 write_pnet(&mrt
->net
, net
);
319 /* Forwarding cache */
320 for (i
= 0; i
< MFC_LINES
; i
++)
321 INIT_LIST_HEAD(&mrt
->mfc_cache_array
[i
]);
323 INIT_LIST_HEAD(&mrt
->mfc_unres_queue
);
325 setup_timer(&mrt
->ipmr_expire_timer
, ipmr_expire_process
,
328 #ifdef CONFIG_IP_PIMSM
329 mrt
->mroute_reg_vif_num
= -1;
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt
->list
, &net
->ipv4
.mr_tables
);
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
339 static void ipmr_del_tunnel(struct net_device
*dev
, struct vifctl
*v
)
341 struct net
*net
= dev_net(dev
);
345 dev
= __dev_get_by_name(net
, "tunl0");
347 const struct net_device_ops
*ops
= dev
->netdev_ops
;
349 struct ip_tunnel_parm p
;
351 memset(&p
, 0, sizeof(p
));
352 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
353 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
356 p
.iph
.protocol
= IPPROTO_IPIP
;
357 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
358 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
360 if (ops
->ndo_do_ioctl
) {
361 mm_segment_t oldfs
= get_fs();
364 ops
->ndo_do_ioctl(dev
, &ifr
, SIOCDELTUNNEL
);
371 struct net_device
*ipmr_new_tunnel(struct net
*net
, struct vifctl
*v
)
373 struct net_device
*dev
;
375 dev
= __dev_get_by_name(net
, "tunl0");
378 const struct net_device_ops
*ops
= dev
->netdev_ops
;
381 struct ip_tunnel_parm p
;
382 struct in_device
*in_dev
;
384 memset(&p
, 0, sizeof(p
));
385 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
386 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
389 p
.iph
.protocol
= IPPROTO_IPIP
;
390 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
391 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
393 if (ops
->ndo_do_ioctl
) {
394 mm_segment_t oldfs
= get_fs();
397 err
= ops
->ndo_do_ioctl(dev
, &ifr
, SIOCADDTUNNEL
);
405 (dev
= __dev_get_by_name(net
, p
.name
)) != NULL
) {
406 dev
->flags
|= IFF_MULTICAST
;
408 in_dev
= __in_dev_get_rtnl(dev
);
412 ipv4_devconf_setall(in_dev
);
413 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
423 /* allow the register to be completed before unregistering. */
427 unregister_netdevice(dev
);
431 #ifdef CONFIG_IP_PIMSM
433 static netdev_tx_t
reg_vif_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
435 struct net
*net
= dev_net(dev
);
436 struct mr_table
*mrt
;
444 err
= ipmr_fib_lookup(net
, &fl
, &mrt
);
448 read_lock(&mrt_lock
);
449 dev
->stats
.tx_bytes
+= skb
->len
;
450 dev
->stats
.tx_packets
++;
451 ipmr_cache_report(mrt
, skb
, mrt
->mroute_reg_vif_num
, IGMPMSG_WHOLEPKT
);
452 read_unlock(&mrt_lock
);
457 static const struct net_device_ops reg_vif_netdev_ops
= {
458 .ndo_start_xmit
= reg_vif_xmit
,
461 static void reg_vif_setup(struct net_device
*dev
)
463 dev
->type
= ARPHRD_PIMREG
;
464 dev
->mtu
= ETH_DATA_LEN
- sizeof(struct iphdr
) - 8;
465 dev
->flags
= IFF_NOARP
;
466 dev
->netdev_ops
= ®_vif_netdev_ops
,
467 dev
->destructor
= free_netdev
;
468 dev
->features
|= NETIF_F_NETNS_LOCAL
;
471 static struct net_device
*ipmr_reg_vif(struct net
*net
, struct mr_table
*mrt
)
473 struct net_device
*dev
;
474 struct in_device
*in_dev
;
477 if (mrt
->id
== RT_TABLE_DEFAULT
)
478 sprintf(name
, "pimreg");
480 sprintf(name
, "pimreg%u", mrt
->id
);
482 dev
= alloc_netdev(0, name
, reg_vif_setup
);
487 dev_net_set(dev
, net
);
489 if (register_netdevice(dev
)) {
496 if ((in_dev
= __in_dev_get_rcu(dev
)) == NULL
) {
501 ipv4_devconf_setall(in_dev
);
502 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
513 /* allow the register to be completed before unregistering. */
517 unregister_netdevice(dev
);
524 * @notify: Set to 1, if the caller is a notifier_call
527 static int vif_delete(struct mr_table
*mrt
, int vifi
, int notify
,
528 struct list_head
*head
)
530 struct vif_device
*v
;
531 struct net_device
*dev
;
532 struct in_device
*in_dev
;
534 if (vifi
< 0 || vifi
>= mrt
->maxvif
)
535 return -EADDRNOTAVAIL
;
537 v
= &mrt
->vif_table
[vifi
];
539 write_lock_bh(&mrt_lock
);
544 write_unlock_bh(&mrt_lock
);
545 return -EADDRNOTAVAIL
;
548 #ifdef CONFIG_IP_PIMSM
549 if (vifi
== mrt
->mroute_reg_vif_num
)
550 mrt
->mroute_reg_vif_num
= -1;
553 if (vifi
+1 == mrt
->maxvif
) {
555 for (tmp
=vifi
-1; tmp
>=0; tmp
--) {
556 if (VIF_EXISTS(mrt
, tmp
))
562 write_unlock_bh(&mrt_lock
);
564 dev_set_allmulti(dev
, -1);
566 if ((in_dev
= __in_dev_get_rtnl(dev
)) != NULL
) {
567 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)--;
568 ip_rt_multicast_event(in_dev
);
571 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
) && !notify
)
572 unregister_netdevice_queue(dev
, head
);
578 static inline void ipmr_cache_free(struct mfc_cache
*c
)
580 kmem_cache_free(mrt_cachep
, c
);
583 /* Destroy an unresolved cache entry, killing queued skbs
584 and reporting error to netlink readers.
587 static void ipmr_destroy_unres(struct mr_table
*mrt
, struct mfc_cache
*c
)
589 struct net
*net
= read_pnet(&mrt
->net
);
593 atomic_dec(&mrt
->cache_resolve_queue_len
);
595 while ((skb
= skb_dequeue(&c
->mfc_un
.unres
.unresolved
))) {
596 if (ip_hdr(skb
)->version
== 0) {
597 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
598 nlh
->nlmsg_type
= NLMSG_ERROR
;
599 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
600 skb_trim(skb
, nlh
->nlmsg_len
);
602 e
->error
= -ETIMEDOUT
;
603 memset(&e
->msg
, 0, sizeof(e
->msg
));
605 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).pid
);
614 /* Timer process for the unresolved queue. */
616 static void ipmr_expire_process(unsigned long arg
)
618 struct mr_table
*mrt
= (struct mr_table
*)arg
;
620 unsigned long expires
;
621 struct mfc_cache
*c
, *next
;
623 if (!spin_trylock(&mfc_unres_lock
)) {
624 mod_timer(&mrt
->ipmr_expire_timer
, jiffies
+HZ
/10);
628 if (list_empty(&mrt
->mfc_unres_queue
))
634 list_for_each_entry_safe(c
, next
, &mrt
->mfc_unres_queue
, list
) {
635 if (time_after(c
->mfc_un
.unres
.expires
, now
)) {
636 unsigned long interval
= c
->mfc_un
.unres
.expires
- now
;
637 if (interval
< expires
)
643 ipmr_destroy_unres(mrt
, c
);
646 if (!list_empty(&mrt
->mfc_unres_queue
))
647 mod_timer(&mrt
->ipmr_expire_timer
, jiffies
+ expires
);
650 spin_unlock(&mfc_unres_lock
);
653 /* Fill oifs list. It is called under write locked mrt_lock. */
655 static void ipmr_update_thresholds(struct mr_table
*mrt
, struct mfc_cache
*cache
,
660 cache
->mfc_un
.res
.minvif
= MAXVIFS
;
661 cache
->mfc_un
.res
.maxvif
= 0;
662 memset(cache
->mfc_un
.res
.ttls
, 255, MAXVIFS
);
664 for (vifi
= 0; vifi
< mrt
->maxvif
; vifi
++) {
665 if (VIF_EXISTS(mrt
, vifi
) &&
666 ttls
[vifi
] && ttls
[vifi
] < 255) {
667 cache
->mfc_un
.res
.ttls
[vifi
] = ttls
[vifi
];
668 if (cache
->mfc_un
.res
.minvif
> vifi
)
669 cache
->mfc_un
.res
.minvif
= vifi
;
670 if (cache
->mfc_un
.res
.maxvif
<= vifi
)
671 cache
->mfc_un
.res
.maxvif
= vifi
+ 1;
676 static int vif_add(struct net
*net
, struct mr_table
*mrt
,
677 struct vifctl
*vifc
, int mrtsock
)
679 int vifi
= vifc
->vifc_vifi
;
680 struct vif_device
*v
= &mrt
->vif_table
[vifi
];
681 struct net_device
*dev
;
682 struct in_device
*in_dev
;
686 if (VIF_EXISTS(mrt
, vifi
))
689 switch (vifc
->vifc_flags
) {
690 #ifdef CONFIG_IP_PIMSM
693 * Special Purpose VIF in PIM
694 * All the packets will be sent to the daemon
696 if (mrt
->mroute_reg_vif_num
>= 0)
698 dev
= ipmr_reg_vif(net
, mrt
);
701 err
= dev_set_allmulti(dev
, 1);
703 unregister_netdevice(dev
);
710 dev
= ipmr_new_tunnel(net
, vifc
);
713 err
= dev_set_allmulti(dev
, 1);
715 ipmr_del_tunnel(dev
, vifc
);
721 case VIFF_USE_IFINDEX
:
723 if (vifc
->vifc_flags
== VIFF_USE_IFINDEX
) {
724 dev
= dev_get_by_index(net
, vifc
->vifc_lcl_ifindex
);
725 if (dev
&& dev
->ip_ptr
== NULL
) {
727 return -EADDRNOTAVAIL
;
730 dev
= ip_dev_find(net
, vifc
->vifc_lcl_addr
.s_addr
);
733 return -EADDRNOTAVAIL
;
734 err
= dev_set_allmulti(dev
, 1);
744 if ((in_dev
= __in_dev_get_rtnl(dev
)) == NULL
) {
746 return -EADDRNOTAVAIL
;
748 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)++;
749 ip_rt_multicast_event(in_dev
);
752 * Fill in the VIF structures
754 v
->rate_limit
= vifc
->vifc_rate_limit
;
755 v
->local
= vifc
->vifc_lcl_addr
.s_addr
;
756 v
->remote
= vifc
->vifc_rmt_addr
.s_addr
;
757 v
->flags
= vifc
->vifc_flags
;
759 v
->flags
|= VIFF_STATIC
;
760 v
->threshold
= vifc
->vifc_threshold
;
765 v
->link
= dev
->ifindex
;
766 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
))
767 v
->link
= dev
->iflink
;
769 /* And finish update writing critical data */
770 write_lock_bh(&mrt_lock
);
772 #ifdef CONFIG_IP_PIMSM
773 if (v
->flags
&VIFF_REGISTER
)
774 mrt
->mroute_reg_vif_num
= vifi
;
776 if (vifi
+1 > mrt
->maxvif
)
777 mrt
->maxvif
= vifi
+1;
778 write_unlock_bh(&mrt_lock
);
782 static struct mfc_cache
*ipmr_cache_find(struct mr_table
*mrt
,
786 int line
= MFC_HASH(mcastgrp
, origin
);
789 list_for_each_entry(c
, &mrt
->mfc_cache_array
[line
], list
) {
790 if (c
->mfc_origin
== origin
&& c
->mfc_mcastgrp
== mcastgrp
)
797 * Allocate a multicast cache entry
799 static struct mfc_cache
*ipmr_cache_alloc(void)
801 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_KERNEL
);
804 c
->mfc_un
.res
.minvif
= MAXVIFS
;
808 static struct mfc_cache
*ipmr_cache_alloc_unres(void)
810 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_ATOMIC
);
813 skb_queue_head_init(&c
->mfc_un
.unres
.unresolved
);
814 c
->mfc_un
.unres
.expires
= jiffies
+ 10*HZ
;
819 * A cache entry has gone into a resolved state from queued
822 static void ipmr_cache_resolve(struct net
*net
, struct mr_table
*mrt
,
823 struct mfc_cache
*uc
, struct mfc_cache
*c
)
829 * Play the pending entries through our router
832 while ((skb
= __skb_dequeue(&uc
->mfc_un
.unres
.unresolved
))) {
833 if (ip_hdr(skb
)->version
== 0) {
834 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
836 if (__ipmr_fill_mroute(mrt
, skb
, c
, NLMSG_DATA(nlh
)) > 0) {
837 nlh
->nlmsg_len
= (skb_tail_pointer(skb
) -
840 nlh
->nlmsg_type
= NLMSG_ERROR
;
841 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
842 skb_trim(skb
, nlh
->nlmsg_len
);
844 e
->error
= -EMSGSIZE
;
845 memset(&e
->msg
, 0, sizeof(e
->msg
));
848 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).pid
);
850 ip_mr_forward(net
, mrt
, skb
, c
, 0);
855 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
856 * expects the following bizarre scheme.
858 * Called under mrt_lock.
861 static int ipmr_cache_report(struct mr_table
*mrt
,
862 struct sk_buff
*pkt
, vifi_t vifi
, int assert)
865 const int ihl
= ip_hdrlen(pkt
);
866 struct igmphdr
*igmp
;
870 #ifdef CONFIG_IP_PIMSM
871 if (assert == IGMPMSG_WHOLEPKT
)
872 skb
= skb_realloc_headroom(pkt
, sizeof(struct iphdr
));
875 skb
= alloc_skb(128, GFP_ATOMIC
);
880 #ifdef CONFIG_IP_PIMSM
881 if (assert == IGMPMSG_WHOLEPKT
) {
882 /* Ugly, but we have no choice with this interface.
883 Duplicate old header, fix ihl, length etc.
884 And all this only to mangle msg->im_msgtype and
885 to set msg->im_mbz to "mbz" :-)
887 skb_push(skb
, sizeof(struct iphdr
));
888 skb_reset_network_header(skb
);
889 skb_reset_transport_header(skb
);
890 msg
= (struct igmpmsg
*)skb_network_header(skb
);
891 memcpy(msg
, skb_network_header(pkt
), sizeof(struct iphdr
));
892 msg
->im_msgtype
= IGMPMSG_WHOLEPKT
;
894 msg
->im_vif
= mrt
->mroute_reg_vif_num
;
895 ip_hdr(skb
)->ihl
= sizeof(struct iphdr
) >> 2;
896 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(pkt
)->tot_len
) +
897 sizeof(struct iphdr
));
906 skb
->network_header
= skb
->tail
;
908 skb_copy_to_linear_data(skb
, pkt
->data
, ihl
);
909 ip_hdr(skb
)->protocol
= 0; /* Flag to the kernel this is a route add */
910 msg
= (struct igmpmsg
*)skb_network_header(skb
);
912 skb_dst_set(skb
, dst_clone(skb_dst(pkt
)));
918 igmp
=(struct igmphdr
*)skb_put(skb
, sizeof(struct igmphdr
));
920 msg
->im_msgtype
= assert;
922 ip_hdr(skb
)->tot_len
= htons(skb
->len
); /* Fix the length */
923 skb
->transport_header
= skb
->network_header
;
926 if (mrt
->mroute_sk
== NULL
) {
934 ret
= sock_queue_rcv_skb(mrt
->mroute_sk
, skb
);
937 printk(KERN_WARNING
"mroute: pending queue full, dropping entries.\n");
945 * Queue a packet for resolution. It gets locked cache entry!
949 ipmr_cache_unresolved(struct mr_table
*mrt
, vifi_t vifi
, struct sk_buff
*skb
)
954 const struct iphdr
*iph
= ip_hdr(skb
);
956 spin_lock_bh(&mfc_unres_lock
);
957 list_for_each_entry(c
, &mrt
->mfc_unres_queue
, list
) {
958 if (c
->mfc_mcastgrp
== iph
->daddr
&&
959 c
->mfc_origin
== iph
->saddr
) {
967 * Create a new entry if allowable
970 if (atomic_read(&mrt
->cache_resolve_queue_len
) >= 10 ||
971 (c
= ipmr_cache_alloc_unres()) == NULL
) {
972 spin_unlock_bh(&mfc_unres_lock
);
979 * Fill in the new cache entry
982 c
->mfc_origin
= iph
->saddr
;
983 c
->mfc_mcastgrp
= iph
->daddr
;
986 * Reflect first query at mrouted.
988 err
= ipmr_cache_report(mrt
, skb
, vifi
, IGMPMSG_NOCACHE
);
990 /* If the report failed throw the cache entry
993 spin_unlock_bh(&mfc_unres_lock
);
1000 atomic_inc(&mrt
->cache_resolve_queue_len
);
1001 list_add(&c
->list
, &mrt
->mfc_unres_queue
);
1003 if (atomic_read(&mrt
->cache_resolve_queue_len
) == 1)
1004 mod_timer(&mrt
->ipmr_expire_timer
, c
->mfc_un
.unres
.expires
);
1008 * See if we can append the packet
1010 if (c
->mfc_un
.unres
.unresolved
.qlen
>3) {
1014 skb_queue_tail(&c
->mfc_un
.unres
.unresolved
, skb
);
1018 spin_unlock_bh(&mfc_unres_lock
);
1023 * MFC cache manipulation by user space mroute daemon
1026 static int ipmr_mfc_delete(struct mr_table
*mrt
, struct mfcctl
*mfc
)
1029 struct mfc_cache
*c
, *next
;
1031 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
1033 list_for_each_entry_safe(c
, next
, &mrt
->mfc_cache_array
[line
], list
) {
1034 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
1035 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
) {
1036 write_lock_bh(&mrt_lock
);
1038 write_unlock_bh(&mrt_lock
);
1047 static int ipmr_mfc_add(struct net
*net
, struct mr_table
*mrt
,
1048 struct mfcctl
*mfc
, int mrtsock
)
1052 struct mfc_cache
*uc
, *c
;
1054 if (mfc
->mfcc_parent
>= MAXVIFS
)
1057 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
1059 list_for_each_entry(c
, &mrt
->mfc_cache_array
[line
], list
) {
1060 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
1061 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
) {
1068 write_lock_bh(&mrt_lock
);
1069 c
->mfc_parent
= mfc
->mfcc_parent
;
1070 ipmr_update_thresholds(mrt
, c
, mfc
->mfcc_ttls
);
1072 c
->mfc_flags
|= MFC_STATIC
;
1073 write_unlock_bh(&mrt_lock
);
1077 if (!ipv4_is_multicast(mfc
->mfcc_mcastgrp
.s_addr
))
1080 c
= ipmr_cache_alloc();
1084 c
->mfc_origin
= mfc
->mfcc_origin
.s_addr
;
1085 c
->mfc_mcastgrp
= mfc
->mfcc_mcastgrp
.s_addr
;
1086 c
->mfc_parent
= mfc
->mfcc_parent
;
1087 ipmr_update_thresholds(mrt
, c
, mfc
->mfcc_ttls
);
1089 c
->mfc_flags
|= MFC_STATIC
;
1091 write_lock_bh(&mrt_lock
);
1092 list_add(&c
->list
, &mrt
->mfc_cache_array
[line
]);
1093 write_unlock_bh(&mrt_lock
);
1096 * Check to see if we resolved a queued list. If so we
1097 * need to send on the frames and tidy up.
1100 spin_lock_bh(&mfc_unres_lock
);
1101 list_for_each_entry(uc
, &mrt
->mfc_unres_queue
, list
) {
1102 if (uc
->mfc_origin
== c
->mfc_origin
&&
1103 uc
->mfc_mcastgrp
== c
->mfc_mcastgrp
) {
1104 list_del(&uc
->list
);
1105 atomic_dec(&mrt
->cache_resolve_queue_len
);
1110 if (list_empty(&mrt
->mfc_unres_queue
))
1111 del_timer(&mrt
->ipmr_expire_timer
);
1112 spin_unlock_bh(&mfc_unres_lock
);
1115 ipmr_cache_resolve(net
, mrt
, uc
, c
);
1116 ipmr_cache_free(uc
);
1122 * Close the multicast socket, and clear the vif tables etc
1125 static void mroute_clean_tables(struct mr_table
*mrt
)
1129 struct mfc_cache
*c
, *next
;
1132 * Shut down all active vif entries
1134 for (i
= 0; i
< mrt
->maxvif
; i
++) {
1135 if (!(mrt
->vif_table
[i
].flags
&VIFF_STATIC
))
1136 vif_delete(mrt
, i
, 0, &list
);
1138 unregister_netdevice_many(&list
);
1143 for (i
= 0; i
< MFC_LINES
; i
++) {
1144 list_for_each_entry_safe(c
, next
, &mrt
->mfc_cache_array
[i
], list
) {
1145 if (c
->mfc_flags
&MFC_STATIC
)
1147 write_lock_bh(&mrt_lock
);
1149 write_unlock_bh(&mrt_lock
);
1155 if (atomic_read(&mrt
->cache_resolve_queue_len
) != 0) {
1156 spin_lock_bh(&mfc_unres_lock
);
1157 list_for_each_entry_safe(c
, next
, &mrt
->mfc_unres_queue
, list
) {
1159 ipmr_destroy_unres(mrt
, c
);
1161 spin_unlock_bh(&mfc_unres_lock
);
1165 static void mrtsock_destruct(struct sock
*sk
)
1167 struct net
*net
= sock_net(sk
);
1168 struct mr_table
*mrt
;
1171 ipmr_for_each_table(mrt
, net
) {
1172 if (sk
== mrt
->mroute_sk
) {
1173 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)--;
1175 write_lock_bh(&mrt_lock
);
1176 mrt
->mroute_sk
= NULL
;
1177 write_unlock_bh(&mrt_lock
);
1179 mroute_clean_tables(mrt
);
1186 * Socket options and virtual interface manipulation. The whole
1187 * virtual interface system is a complete heap, but unfortunately
1188 * that's how BSD mrouted happens to think. Maybe one day with a proper
1189 * MOSPF/PIM router set up we can clean this up.
1192 int ip_mroute_setsockopt(struct sock
*sk
, int optname
, char __user
*optval
, unsigned int optlen
)
1197 struct net
*net
= sock_net(sk
);
1198 struct mr_table
*mrt
;
1200 mrt
= ipmr_get_table(net
, raw_sk(sk
)->ipmr_table
? : RT_TABLE_DEFAULT
);
1204 if (optname
!= MRT_INIT
) {
1205 if (sk
!= mrt
->mroute_sk
&& !capable(CAP_NET_ADMIN
))
1211 if (sk
->sk_type
!= SOCK_RAW
||
1212 inet_sk(sk
)->inet_num
!= IPPROTO_IGMP
)
1214 if (optlen
!= sizeof(int))
1215 return -ENOPROTOOPT
;
1218 if (mrt
->mroute_sk
) {
1223 ret
= ip_ra_control(sk
, 1, mrtsock_destruct
);
1225 write_lock_bh(&mrt_lock
);
1226 mrt
->mroute_sk
= sk
;
1227 write_unlock_bh(&mrt_lock
);
1229 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)++;
1234 if (sk
!= mrt
->mroute_sk
)
1236 return ip_ra_control(sk
, 0, NULL
);
1239 if (optlen
!= sizeof(vif
))
1241 if (copy_from_user(&vif
, optval
, sizeof(vif
)))
1243 if (vif
.vifc_vifi
>= MAXVIFS
)
1246 if (optname
== MRT_ADD_VIF
) {
1247 ret
= vif_add(net
, mrt
, &vif
, sk
== mrt
->mroute_sk
);
1249 ret
= vif_delete(mrt
, vif
.vifc_vifi
, 0, NULL
);
1255 * Manipulate the forwarding caches. These live
1256 * in a sort of kernel/user symbiosis.
1260 if (optlen
!= sizeof(mfc
))
1262 if (copy_from_user(&mfc
, optval
, sizeof(mfc
)))
1265 if (optname
== MRT_DEL_MFC
)
1266 ret
= ipmr_mfc_delete(mrt
, &mfc
);
1268 ret
= ipmr_mfc_add(net
, mrt
, &mfc
, sk
== mrt
->mroute_sk
);
1272 * Control PIM assert.
1277 if (get_user(v
,(int __user
*)optval
))
1279 mrt
->mroute_do_assert
= (v
) ? 1 : 0;
1282 #ifdef CONFIG_IP_PIMSM
1287 if (get_user(v
,(int __user
*)optval
))
1293 if (v
!= mrt
->mroute_do_pim
) {
1294 mrt
->mroute_do_pim
= v
;
1295 mrt
->mroute_do_assert
= v
;
1301 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1306 if (optlen
!= sizeof(u32
))
1308 if (get_user(v
, (u32 __user
*)optval
))
1310 if (sk
== mrt
->mroute_sk
)
1315 if (!ipmr_new_table(net
, v
))
1317 raw_sk(sk
)->ipmr_table
= v
;
1323 * Spurious command, or MRT_VERSION which you cannot
1327 return -ENOPROTOOPT
;
1332 * Getsock opt support for the multicast routing system.
1335 int ip_mroute_getsockopt(struct sock
*sk
, int optname
, char __user
*optval
, int __user
*optlen
)
1339 struct net
*net
= sock_net(sk
);
1340 struct mr_table
*mrt
;
1342 mrt
= ipmr_get_table(net
, raw_sk(sk
)->ipmr_table
? : RT_TABLE_DEFAULT
);
1346 if (optname
!= MRT_VERSION
&&
1347 #ifdef CONFIG_IP_PIMSM
1350 optname
!=MRT_ASSERT
)
1351 return -ENOPROTOOPT
;
1353 if (get_user(olr
, optlen
))
1356 olr
= min_t(unsigned int, olr
, sizeof(int));
1360 if (put_user(olr
, optlen
))
1362 if (optname
== MRT_VERSION
)
1364 #ifdef CONFIG_IP_PIMSM
1365 else if (optname
== MRT_PIM
)
1366 val
= mrt
->mroute_do_pim
;
1369 val
= mrt
->mroute_do_assert
;
1370 if (copy_to_user(optval
, &val
, olr
))
1376 * The IP multicast ioctl support routines.
1379 int ipmr_ioctl(struct sock
*sk
, int cmd
, void __user
*arg
)
1381 struct sioc_sg_req sr
;
1382 struct sioc_vif_req vr
;
1383 struct vif_device
*vif
;
1384 struct mfc_cache
*c
;
1385 struct net
*net
= sock_net(sk
);
1386 struct mr_table
*mrt
;
1388 mrt
= ipmr_get_table(net
, raw_sk(sk
)->ipmr_table
? : RT_TABLE_DEFAULT
);
1394 if (copy_from_user(&vr
, arg
, sizeof(vr
)))
1396 if (vr
.vifi
>= mrt
->maxvif
)
1398 read_lock(&mrt_lock
);
1399 vif
= &mrt
->vif_table
[vr
.vifi
];
1400 if (VIF_EXISTS(mrt
, vr
.vifi
)) {
1401 vr
.icount
= vif
->pkt_in
;
1402 vr
.ocount
= vif
->pkt_out
;
1403 vr
.ibytes
= vif
->bytes_in
;
1404 vr
.obytes
= vif
->bytes_out
;
1405 read_unlock(&mrt_lock
);
1407 if (copy_to_user(arg
, &vr
, sizeof(vr
)))
1411 read_unlock(&mrt_lock
);
1412 return -EADDRNOTAVAIL
;
1414 if (copy_from_user(&sr
, arg
, sizeof(sr
)))
1417 read_lock(&mrt_lock
);
1418 c
= ipmr_cache_find(mrt
, sr
.src
.s_addr
, sr
.grp
.s_addr
);
1420 sr
.pktcnt
= c
->mfc_un
.res
.pkt
;
1421 sr
.bytecnt
= c
->mfc_un
.res
.bytes
;
1422 sr
.wrong_if
= c
->mfc_un
.res
.wrong_if
;
1423 read_unlock(&mrt_lock
);
1425 if (copy_to_user(arg
, &sr
, sizeof(sr
)))
1429 read_unlock(&mrt_lock
);
1430 return -EADDRNOTAVAIL
;
1432 return -ENOIOCTLCMD
;
1437 static int ipmr_device_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
1439 struct net_device
*dev
= ptr
;
1440 struct net
*net
= dev_net(dev
);
1441 struct mr_table
*mrt
;
1442 struct vif_device
*v
;
1446 if (event
!= NETDEV_UNREGISTER
)
1449 ipmr_for_each_table(mrt
, net
) {
1450 v
= &mrt
->vif_table
[0];
1451 for (ct
= 0; ct
< mrt
->maxvif
; ct
++, v
++) {
1453 vif_delete(mrt
, ct
, 1, &list
);
1456 unregister_netdevice_many(&list
);
1461 static struct notifier_block ip_mr_notifier
= {
1462 .notifier_call
= ipmr_device_event
,
1466 * Encapsulate a packet by attaching a valid IPIP header to it.
1467 * This avoids tunnel drivers and other mess and gives us the speed so
1468 * important for multicast video.
1471 static void ip_encap(struct sk_buff
*skb
, __be32 saddr
, __be32 daddr
)
1474 struct iphdr
*old_iph
= ip_hdr(skb
);
1476 skb_push(skb
, sizeof(struct iphdr
));
1477 skb
->transport_header
= skb
->network_header
;
1478 skb_reset_network_header(skb
);
1482 iph
->tos
= old_iph
->tos
;
1483 iph
->ttl
= old_iph
->ttl
;
1487 iph
->protocol
= IPPROTO_IPIP
;
1489 iph
->tot_len
= htons(skb
->len
);
1490 ip_select_ident(iph
, skb_dst(skb
), NULL
);
1493 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
1497 static inline int ipmr_forward_finish(struct sk_buff
*skb
)
1499 struct ip_options
* opt
= &(IPCB(skb
)->opt
);
1501 IP_INC_STATS_BH(dev_net(skb_dst(skb
)->dev
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
1503 if (unlikely(opt
->optlen
))
1504 ip_forward_options(skb
);
1506 return dst_output(skb
);
1510 * Processing handlers for ipmr_forward
1513 static void ipmr_queue_xmit(struct net
*net
, struct mr_table
*mrt
,
1514 struct sk_buff
*skb
, struct mfc_cache
*c
, int vifi
)
1516 const struct iphdr
*iph
= ip_hdr(skb
);
1517 struct vif_device
*vif
= &mrt
->vif_table
[vifi
];
1518 struct net_device
*dev
;
1522 if (vif
->dev
== NULL
)
1525 #ifdef CONFIG_IP_PIMSM
1526 if (vif
->flags
& VIFF_REGISTER
) {
1528 vif
->bytes_out
+= skb
->len
;
1529 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1530 vif
->dev
->stats
.tx_packets
++;
1531 ipmr_cache_report(mrt
, skb
, vifi
, IGMPMSG_WHOLEPKT
);
1536 if (vif
->flags
&VIFF_TUNNEL
) {
1537 struct flowi fl
= { .oif
= vif
->link
,
1539 { .daddr
= vif
->remote
,
1540 .saddr
= vif
->local
,
1541 .tos
= RT_TOS(iph
->tos
) } },
1542 .proto
= IPPROTO_IPIP
};
1543 if (ip_route_output_key(net
, &rt
, &fl
))
1545 encap
= sizeof(struct iphdr
);
1547 struct flowi fl
= { .oif
= vif
->link
,
1549 { .daddr
= iph
->daddr
,
1550 .tos
= RT_TOS(iph
->tos
) } },
1551 .proto
= IPPROTO_IPIP
};
1552 if (ip_route_output_key(net
, &rt
, &fl
))
1558 if (skb
->len
+encap
> dst_mtu(&rt
->dst
) && (ntohs(iph
->frag_off
) & IP_DF
)) {
1559 /* Do not fragment multicasts. Alas, IPv4 does not
1560 allow to send ICMP, so that packets will disappear
1564 IP_INC_STATS_BH(dev_net(dev
), IPSTATS_MIB_FRAGFAILS
);
1569 encap
+= LL_RESERVED_SPACE(dev
) + rt
->dst
.header_len
;
1571 if (skb_cow(skb
, encap
)) {
1577 vif
->bytes_out
+= skb
->len
;
1580 skb_dst_set(skb
, &rt
->dst
);
1581 ip_decrease_ttl(ip_hdr(skb
));
1583 /* FIXME: forward and output firewalls used to be called here.
1584 * What do we do with netfilter? -- RR */
1585 if (vif
->flags
& VIFF_TUNNEL
) {
1586 ip_encap(skb
, vif
->local
, vif
->remote
);
1587 /* FIXME: extra output firewall step used to be here. --RR */
1588 vif
->dev
->stats
.tx_packets
++;
1589 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1592 IPCB(skb
)->flags
|= IPSKB_FORWARDED
;
1595 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1596 * not only before forwarding, but after forwarding on all output
1597 * interfaces. It is clear, if mrouter runs a multicasting
1598 * program, it should receive packets not depending to what interface
1599 * program is joined.
1600 * If we will not make it, the program will have to join on all
1601 * interfaces. On the other hand, multihoming host (or router, but
1602 * not mrouter) cannot join to more than one interface - it will
1603 * result in receiving multiple packets.
1605 NF_HOOK(NFPROTO_IPV4
, NF_INET_FORWARD
, skb
, skb
->dev
, dev
,
1606 ipmr_forward_finish
);
1613 static int ipmr_find_vif(struct mr_table
*mrt
, struct net_device
*dev
)
1617 for (ct
= mrt
->maxvif
-1; ct
>= 0; ct
--) {
1618 if (mrt
->vif_table
[ct
].dev
== dev
)
1624 /* "local" means that we should preserve one skb (for local delivery) */
1626 static int ip_mr_forward(struct net
*net
, struct mr_table
*mrt
,
1627 struct sk_buff
*skb
, struct mfc_cache
*cache
,
1633 vif
= cache
->mfc_parent
;
1634 cache
->mfc_un
.res
.pkt
++;
1635 cache
->mfc_un
.res
.bytes
+= skb
->len
;
1638 * Wrong interface: drop packet and (maybe) send PIM assert.
1640 if (mrt
->vif_table
[vif
].dev
!= skb
->dev
) {
1643 if (skb_rtable(skb
)->fl
.iif
== 0) {
1644 /* It is our own packet, looped back.
1645 Very complicated situation...
1647 The best workaround until routing daemons will be
1648 fixed is not to redistribute packet, if it was
1649 send through wrong interface. It means, that
1650 multicast applications WILL NOT work for
1651 (S,G), which have default multicast route pointing
1652 to wrong oif. In any case, it is not a good
1653 idea to use multicasting applications on router.
1658 cache
->mfc_un
.res
.wrong_if
++;
1659 true_vifi
= ipmr_find_vif(mrt
, skb
->dev
);
1661 if (true_vifi
>= 0 && mrt
->mroute_do_assert
&&
1662 /* pimsm uses asserts, when switching from RPT to SPT,
1663 so that we cannot check that packet arrived on an oif.
1664 It is bad, but otherwise we would need to move pretty
1665 large chunk of pimd to kernel. Ough... --ANK
1667 (mrt
->mroute_do_pim
||
1668 cache
->mfc_un
.res
.ttls
[true_vifi
] < 255) &&
1670 cache
->mfc_un
.res
.last_assert
+ MFC_ASSERT_THRESH
)) {
1671 cache
->mfc_un
.res
.last_assert
= jiffies
;
1672 ipmr_cache_report(mrt
, skb
, true_vifi
, IGMPMSG_WRONGVIF
);
1677 mrt
->vif_table
[vif
].pkt_in
++;
1678 mrt
->vif_table
[vif
].bytes_in
+= skb
->len
;
1683 for (ct
= cache
->mfc_un
.res
.maxvif
-1; ct
>= cache
->mfc_un
.res
.minvif
; ct
--) {
1684 if (ip_hdr(skb
)->ttl
> cache
->mfc_un
.res
.ttls
[ct
]) {
1686 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1688 ipmr_queue_xmit(net
, mrt
, skb2
, cache
,
1696 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1698 ipmr_queue_xmit(net
, mrt
, skb2
, cache
, psend
);
1700 ipmr_queue_xmit(net
, mrt
, skb
, cache
, psend
);
1713 * Multicast packets for forwarding arrive here
1716 int ip_mr_input(struct sk_buff
*skb
)
1718 struct mfc_cache
*cache
;
1719 struct net
*net
= dev_net(skb
->dev
);
1720 int local
= skb_rtable(skb
)->rt_flags
& RTCF_LOCAL
;
1721 struct mr_table
*mrt
;
1724 /* Packet is looped back after forward, it should not be
1725 forwarded second time, but still can be delivered locally.
1727 if (IPCB(skb
)->flags
&IPSKB_FORWARDED
)
1730 err
= ipmr_fib_lookup(net
, &skb_rtable(skb
)->fl
, &mrt
);
1735 if (IPCB(skb
)->opt
.router_alert
) {
1736 if (ip_call_ra_chain(skb
))
1738 } else if (ip_hdr(skb
)->protocol
== IPPROTO_IGMP
){
1739 /* IGMPv1 (and broken IGMPv2 implementations sort of
1740 Cisco IOS <= 11.2(8)) do not put router alert
1741 option to IGMP packets destined to routable
1742 groups. It is very bad, because it means
1743 that we can forward NO IGMP messages.
1745 read_lock(&mrt_lock
);
1746 if (mrt
->mroute_sk
) {
1748 raw_rcv(mrt
->mroute_sk
, skb
);
1749 read_unlock(&mrt_lock
);
1752 read_unlock(&mrt_lock
);
1756 read_lock(&mrt_lock
);
1757 cache
= ipmr_cache_find(mrt
, ip_hdr(skb
)->saddr
, ip_hdr(skb
)->daddr
);
1760 * No usable cache entry
1762 if (cache
== NULL
) {
1766 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1767 ip_local_deliver(skb
);
1769 read_unlock(&mrt_lock
);
1775 vif
= ipmr_find_vif(mrt
, skb
->dev
);
1777 int err2
= ipmr_cache_unresolved(mrt
, vif
, skb
);
1778 read_unlock(&mrt_lock
);
1782 read_unlock(&mrt_lock
);
1787 ip_mr_forward(net
, mrt
, skb
, cache
, local
);
1789 read_unlock(&mrt_lock
);
1792 return ip_local_deliver(skb
);
1798 return ip_local_deliver(skb
);
1803 #ifdef CONFIG_IP_PIMSM
1804 static int __pim_rcv(struct mr_table
*mrt
, struct sk_buff
*skb
,
1805 unsigned int pimlen
)
1807 struct net_device
*reg_dev
= NULL
;
1808 struct iphdr
*encap
;
1810 encap
= (struct iphdr
*)(skb_transport_header(skb
) + pimlen
);
1813 a. packet is really destinted to a multicast group
1814 b. packet is not a NULL-REGISTER
1815 c. packet is not truncated
1817 if (!ipv4_is_multicast(encap
->daddr
) ||
1818 encap
->tot_len
== 0 ||
1819 ntohs(encap
->tot_len
) + pimlen
> skb
->len
)
1822 read_lock(&mrt_lock
);
1823 if (mrt
->mroute_reg_vif_num
>= 0)
1824 reg_dev
= mrt
->vif_table
[mrt
->mroute_reg_vif_num
].dev
;
1827 read_unlock(&mrt_lock
);
1829 if (reg_dev
== NULL
)
1832 skb
->mac_header
= skb
->network_header
;
1833 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1834 skb_reset_network_header(skb
);
1835 skb
->protocol
= htons(ETH_P_IP
);
1837 skb
->pkt_type
= PACKET_HOST
;
1839 skb_tunnel_rx(skb
, reg_dev
);
1848 #ifdef CONFIG_IP_PIMSM_V1
1850 * Handle IGMP messages of PIMv1
1853 int pim_rcv_v1(struct sk_buff
* skb
)
1855 struct igmphdr
*pim
;
1856 struct net
*net
= dev_net(skb
->dev
);
1857 struct mr_table
*mrt
;
1859 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1862 pim
= igmp_hdr(skb
);
1864 if (ipmr_fib_lookup(net
, &skb_rtable(skb
)->fl
, &mrt
) < 0)
1867 if (!mrt
->mroute_do_pim
||
1868 pim
->group
!= PIM_V1_VERSION
|| pim
->code
!= PIM_V1_REGISTER
)
1871 if (__pim_rcv(mrt
, skb
, sizeof(*pim
))) {
1879 #ifdef CONFIG_IP_PIMSM_V2
1880 static int pim_rcv(struct sk_buff
* skb
)
1882 struct pimreghdr
*pim
;
1883 struct net
*net
= dev_net(skb
->dev
);
1884 struct mr_table
*mrt
;
1886 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1889 pim
= (struct pimreghdr
*)skb_transport_header(skb
);
1890 if (pim
->type
!= ((PIM_VERSION
<<4)|(PIM_REGISTER
)) ||
1891 (pim
->flags
&PIM_NULL_REGISTER
) ||
1892 (ip_compute_csum((void *)pim
, sizeof(*pim
)) != 0 &&
1893 csum_fold(skb_checksum(skb
, 0, skb
->len
, 0))))
1896 if (ipmr_fib_lookup(net
, &skb_rtable(skb
)->fl
, &mrt
) < 0)
1899 if (__pim_rcv(mrt
, skb
, sizeof(*pim
))) {
1907 static int __ipmr_fill_mroute(struct mr_table
*mrt
, struct sk_buff
*skb
,
1908 struct mfc_cache
*c
, struct rtmsg
*rtm
)
1911 struct rtnexthop
*nhp
;
1912 u8
*b
= skb_tail_pointer(skb
);
1913 struct rtattr
*mp_head
;
1915 /* If cache is unresolved, don't try to parse IIF and OIF */
1916 if (c
->mfc_parent
>= MAXVIFS
)
1919 if (VIF_EXISTS(mrt
, c
->mfc_parent
))
1920 RTA_PUT(skb
, RTA_IIF
, 4, &mrt
->vif_table
[c
->mfc_parent
].dev
->ifindex
);
1922 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_LENGTH(0));
1924 for (ct
= c
->mfc_un
.res
.minvif
; ct
< c
->mfc_un
.res
.maxvif
; ct
++) {
1925 if (VIF_EXISTS(mrt
, ct
) && c
->mfc_un
.res
.ttls
[ct
] < 255) {
1926 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
1927 goto rtattr_failure
;
1928 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
1929 nhp
->rtnh_flags
= 0;
1930 nhp
->rtnh_hops
= c
->mfc_un
.res
.ttls
[ct
];
1931 nhp
->rtnh_ifindex
= mrt
->vif_table
[ct
].dev
->ifindex
;
1932 nhp
->rtnh_len
= sizeof(*nhp
);
1935 mp_head
->rta_type
= RTA_MULTIPATH
;
1936 mp_head
->rta_len
= skb_tail_pointer(skb
) - (u8
*)mp_head
;
1937 rtm
->rtm_type
= RTN_MULTICAST
;
1945 int ipmr_get_route(struct net
*net
,
1946 struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
1949 struct mr_table
*mrt
;
1950 struct mfc_cache
*cache
;
1951 struct rtable
*rt
= skb_rtable(skb
);
1953 mrt
= ipmr_get_table(net
, RT_TABLE_DEFAULT
);
1957 read_lock(&mrt_lock
);
1958 cache
= ipmr_cache_find(mrt
, rt
->rt_src
, rt
->rt_dst
);
1960 if (cache
== NULL
) {
1961 struct sk_buff
*skb2
;
1963 struct net_device
*dev
;
1967 read_unlock(&mrt_lock
);
1972 if (dev
== NULL
|| (vif
= ipmr_find_vif(mrt
, dev
)) < 0) {
1973 read_unlock(&mrt_lock
);
1976 skb2
= skb_clone(skb
, GFP_ATOMIC
);
1978 read_unlock(&mrt_lock
);
1982 skb_push(skb2
, sizeof(struct iphdr
));
1983 skb_reset_network_header(skb2
);
1985 iph
->ihl
= sizeof(struct iphdr
) >> 2;
1986 iph
->saddr
= rt
->rt_src
;
1987 iph
->daddr
= rt
->rt_dst
;
1989 err
= ipmr_cache_unresolved(mrt
, vif
, skb2
);
1990 read_unlock(&mrt_lock
);
1994 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
1995 cache
->mfc_flags
|= MFC_NOTIFY
;
1996 err
= __ipmr_fill_mroute(mrt
, skb
, cache
, rtm
);
1997 read_unlock(&mrt_lock
);
2001 static int ipmr_fill_mroute(struct mr_table
*mrt
, struct sk_buff
*skb
,
2002 u32 pid
, u32 seq
, struct mfc_cache
*c
)
2004 struct nlmsghdr
*nlh
;
2007 nlh
= nlmsg_put(skb
, pid
, seq
, RTM_NEWROUTE
, sizeof(*rtm
), NLM_F_MULTI
);
2011 rtm
= nlmsg_data(nlh
);
2012 rtm
->rtm_family
= RTNL_FAMILY_IPMR
;
2013 rtm
->rtm_dst_len
= 32;
2014 rtm
->rtm_src_len
= 32;
2016 rtm
->rtm_table
= mrt
->id
;
2017 NLA_PUT_U32(skb
, RTA_TABLE
, mrt
->id
);
2018 rtm
->rtm_type
= RTN_MULTICAST
;
2019 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2020 rtm
->rtm_protocol
= RTPROT_UNSPEC
;
2023 NLA_PUT_BE32(skb
, RTA_SRC
, c
->mfc_origin
);
2024 NLA_PUT_BE32(skb
, RTA_DST
, c
->mfc_mcastgrp
);
2026 if (__ipmr_fill_mroute(mrt
, skb
, c
, rtm
) < 0)
2027 goto nla_put_failure
;
2029 return nlmsg_end(skb
, nlh
);
2032 nlmsg_cancel(skb
, nlh
);
2036 static int ipmr_rtm_dumproute(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2038 struct net
*net
= sock_net(skb
->sk
);
2039 struct mr_table
*mrt
;
2040 struct mfc_cache
*mfc
;
2041 unsigned int t
= 0, s_t
;
2042 unsigned int h
= 0, s_h
;
2043 unsigned int e
= 0, s_e
;
2049 read_lock(&mrt_lock
);
2050 ipmr_for_each_table(mrt
, net
) {
2055 for (h
= s_h
; h
< MFC_LINES
; h
++) {
2056 list_for_each_entry(mfc
, &mrt
->mfc_cache_array
[h
], list
) {
2059 if (ipmr_fill_mroute(mrt
, skb
,
2060 NETLINK_CB(cb
->skb
).pid
,
2074 read_unlock(&mrt_lock
);
2083 #ifdef CONFIG_PROC_FS
2085 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2087 struct ipmr_vif_iter
{
2088 struct seq_net_private p
;
2089 struct mr_table
*mrt
;
2093 static struct vif_device
*ipmr_vif_seq_idx(struct net
*net
,
2094 struct ipmr_vif_iter
*iter
,
2097 struct mr_table
*mrt
= iter
->mrt
;
2099 for (iter
->ct
= 0; iter
->ct
< mrt
->maxvif
; ++iter
->ct
) {
2100 if (!VIF_EXISTS(mrt
, iter
->ct
))
2103 return &mrt
->vif_table
[iter
->ct
];
2108 static void *ipmr_vif_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2109 __acquires(mrt_lock
)
2111 struct ipmr_vif_iter
*iter
= seq
->private;
2112 struct net
*net
= seq_file_net(seq
);
2113 struct mr_table
*mrt
;
2115 mrt
= ipmr_get_table(net
, RT_TABLE_DEFAULT
);
2117 return ERR_PTR(-ENOENT
);
2121 read_lock(&mrt_lock
);
2122 return *pos
? ipmr_vif_seq_idx(net
, seq
->private, *pos
- 1)
2126 static void *ipmr_vif_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2128 struct ipmr_vif_iter
*iter
= seq
->private;
2129 struct net
*net
= seq_file_net(seq
);
2130 struct mr_table
*mrt
= iter
->mrt
;
2133 if (v
== SEQ_START_TOKEN
)
2134 return ipmr_vif_seq_idx(net
, iter
, 0);
2136 while (++iter
->ct
< mrt
->maxvif
) {
2137 if (!VIF_EXISTS(mrt
, iter
->ct
))
2139 return &mrt
->vif_table
[iter
->ct
];
2144 static void ipmr_vif_seq_stop(struct seq_file
*seq
, void *v
)
2145 __releases(mrt_lock
)
2147 read_unlock(&mrt_lock
);
2150 static int ipmr_vif_seq_show(struct seq_file
*seq
, void *v
)
2152 struct ipmr_vif_iter
*iter
= seq
->private;
2153 struct mr_table
*mrt
= iter
->mrt
;
2155 if (v
== SEQ_START_TOKEN
) {
2157 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2159 const struct vif_device
*vif
= v
;
2160 const char *name
= vif
->dev
? vif
->dev
->name
: "none";
2163 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2164 vif
- mrt
->vif_table
,
2165 name
, vif
->bytes_in
, vif
->pkt_in
,
2166 vif
->bytes_out
, vif
->pkt_out
,
2167 vif
->flags
, vif
->local
, vif
->remote
);
2172 static const struct seq_operations ipmr_vif_seq_ops
= {
2173 .start
= ipmr_vif_seq_start
,
2174 .next
= ipmr_vif_seq_next
,
2175 .stop
= ipmr_vif_seq_stop
,
2176 .show
= ipmr_vif_seq_show
,
2179 static int ipmr_vif_open(struct inode
*inode
, struct file
*file
)
2181 return seq_open_net(inode
, file
, &ipmr_vif_seq_ops
,
2182 sizeof(struct ipmr_vif_iter
));
2185 static const struct file_operations ipmr_vif_fops
= {
2186 .owner
= THIS_MODULE
,
2187 .open
= ipmr_vif_open
,
2189 .llseek
= seq_lseek
,
2190 .release
= seq_release_net
,
2193 struct ipmr_mfc_iter
{
2194 struct seq_net_private p
;
2195 struct mr_table
*mrt
;
2196 struct list_head
*cache
;
2201 static struct mfc_cache
*ipmr_mfc_seq_idx(struct net
*net
,
2202 struct ipmr_mfc_iter
*it
, loff_t pos
)
2204 struct mr_table
*mrt
= it
->mrt
;
2205 struct mfc_cache
*mfc
;
2207 read_lock(&mrt_lock
);
2208 for (it
->ct
= 0; it
->ct
< MFC_LINES
; it
->ct
++) {
2209 it
->cache
= &mrt
->mfc_cache_array
[it
->ct
];
2210 list_for_each_entry(mfc
, it
->cache
, list
)
2214 read_unlock(&mrt_lock
);
2216 spin_lock_bh(&mfc_unres_lock
);
2217 it
->cache
= &mrt
->mfc_unres_queue
;
2218 list_for_each_entry(mfc
, it
->cache
, list
)
2221 spin_unlock_bh(&mfc_unres_lock
);
2228 static void *ipmr_mfc_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2230 struct ipmr_mfc_iter
*it
= seq
->private;
2231 struct net
*net
= seq_file_net(seq
);
2232 struct mr_table
*mrt
;
2234 mrt
= ipmr_get_table(net
, RT_TABLE_DEFAULT
);
2236 return ERR_PTR(-ENOENT
);
2241 return *pos
? ipmr_mfc_seq_idx(net
, seq
->private, *pos
- 1)
2245 static void *ipmr_mfc_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2247 struct mfc_cache
*mfc
= v
;
2248 struct ipmr_mfc_iter
*it
= seq
->private;
2249 struct net
*net
= seq_file_net(seq
);
2250 struct mr_table
*mrt
= it
->mrt
;
2254 if (v
== SEQ_START_TOKEN
)
2255 return ipmr_mfc_seq_idx(net
, seq
->private, 0);
2257 if (mfc
->list
.next
!= it
->cache
)
2258 return list_entry(mfc
->list
.next
, struct mfc_cache
, list
);
2260 if (it
->cache
== &mrt
->mfc_unres_queue
)
2263 BUG_ON(it
->cache
!= &mrt
->mfc_cache_array
[it
->ct
]);
2265 while (++it
->ct
< MFC_LINES
) {
2266 it
->cache
= &mrt
->mfc_cache_array
[it
->ct
];
2267 if (list_empty(it
->cache
))
2269 return list_first_entry(it
->cache
, struct mfc_cache
, list
);
2272 /* exhausted cache_array, show unresolved */
2273 read_unlock(&mrt_lock
);
2274 it
->cache
= &mrt
->mfc_unres_queue
;
2277 spin_lock_bh(&mfc_unres_lock
);
2278 if (!list_empty(it
->cache
))
2279 return list_first_entry(it
->cache
, struct mfc_cache
, list
);
2282 spin_unlock_bh(&mfc_unres_lock
);
2288 static void ipmr_mfc_seq_stop(struct seq_file
*seq
, void *v
)
2290 struct ipmr_mfc_iter
*it
= seq
->private;
2291 struct mr_table
*mrt
= it
->mrt
;
2293 if (it
->cache
== &mrt
->mfc_unres_queue
)
2294 spin_unlock_bh(&mfc_unres_lock
);
2295 else if (it
->cache
== &mrt
->mfc_cache_array
[it
->ct
])
2296 read_unlock(&mrt_lock
);
2299 static int ipmr_mfc_seq_show(struct seq_file
*seq
, void *v
)
2303 if (v
== SEQ_START_TOKEN
) {
2305 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2307 const struct mfc_cache
*mfc
= v
;
2308 const struct ipmr_mfc_iter
*it
= seq
->private;
2309 const struct mr_table
*mrt
= it
->mrt
;
2311 seq_printf(seq
, "%08X %08X %-3hd",
2312 (__force u32
) mfc
->mfc_mcastgrp
,
2313 (__force u32
) mfc
->mfc_origin
,
2316 if (it
->cache
!= &mrt
->mfc_unres_queue
) {
2317 seq_printf(seq
, " %8lu %8lu %8lu",
2318 mfc
->mfc_un
.res
.pkt
,
2319 mfc
->mfc_un
.res
.bytes
,
2320 mfc
->mfc_un
.res
.wrong_if
);
2321 for (n
= mfc
->mfc_un
.res
.minvif
;
2322 n
< mfc
->mfc_un
.res
.maxvif
; n
++ ) {
2323 if (VIF_EXISTS(mrt
, n
) &&
2324 mfc
->mfc_un
.res
.ttls
[n
] < 255)
2327 n
, mfc
->mfc_un
.res
.ttls
[n
]);
2330 /* unresolved mfc_caches don't contain
2331 * pkt, bytes and wrong_if values
2333 seq_printf(seq
, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2335 seq_putc(seq
, '\n');
2340 static const struct seq_operations ipmr_mfc_seq_ops
= {
2341 .start
= ipmr_mfc_seq_start
,
2342 .next
= ipmr_mfc_seq_next
,
2343 .stop
= ipmr_mfc_seq_stop
,
2344 .show
= ipmr_mfc_seq_show
,
2347 static int ipmr_mfc_open(struct inode
*inode
, struct file
*file
)
2349 return seq_open_net(inode
, file
, &ipmr_mfc_seq_ops
,
2350 sizeof(struct ipmr_mfc_iter
));
2353 static const struct file_operations ipmr_mfc_fops
= {
2354 .owner
= THIS_MODULE
,
2355 .open
= ipmr_mfc_open
,
2357 .llseek
= seq_lseek
,
2358 .release
= seq_release_net
,
2362 #ifdef CONFIG_IP_PIMSM_V2
2363 static const struct net_protocol pim_protocol
= {
2371 * Setup for IP multicast routing
2373 static int __net_init
ipmr_net_init(struct net
*net
)
2377 err
= ipmr_rules_init(net
);
2381 #ifdef CONFIG_PROC_FS
2383 if (!proc_net_fops_create(net
, "ip_mr_vif", 0, &ipmr_vif_fops
))
2385 if (!proc_net_fops_create(net
, "ip_mr_cache", 0, &ipmr_mfc_fops
))
2386 goto proc_cache_fail
;
2390 #ifdef CONFIG_PROC_FS
2392 proc_net_remove(net
, "ip_mr_vif");
2394 ipmr_rules_exit(net
);
2400 static void __net_exit
ipmr_net_exit(struct net
*net
)
2402 #ifdef CONFIG_PROC_FS
2403 proc_net_remove(net
, "ip_mr_cache");
2404 proc_net_remove(net
, "ip_mr_vif");
2406 ipmr_rules_exit(net
);
2409 static struct pernet_operations ipmr_net_ops
= {
2410 .init
= ipmr_net_init
,
2411 .exit
= ipmr_net_exit
,
2414 int __init
ip_mr_init(void)
2418 mrt_cachep
= kmem_cache_create("ip_mrt_cache",
2419 sizeof(struct mfc_cache
),
2420 0, SLAB_HWCACHE_ALIGN
|SLAB_PANIC
,
2425 err
= register_pernet_subsys(&ipmr_net_ops
);
2427 goto reg_pernet_fail
;
2429 err
= register_netdevice_notifier(&ip_mr_notifier
);
2431 goto reg_notif_fail
;
2432 #ifdef CONFIG_IP_PIMSM_V2
2433 if (inet_add_protocol(&pim_protocol
, IPPROTO_PIM
) < 0) {
2434 printk(KERN_ERR
"ip_mr_init: can't add PIM protocol\n");
2436 goto add_proto_fail
;
2439 rtnl_register(RTNL_FAMILY_IPMR
, RTM_GETROUTE
, NULL
, ipmr_rtm_dumproute
);
2442 #ifdef CONFIG_IP_PIMSM_V2
2444 unregister_netdevice_notifier(&ip_mr_notifier
);
2447 unregister_pernet_subsys(&ipmr_net_ops
);
2449 kmem_cache_destroy(mrt_cachep
);