GUI: Fix Tomato RAF theme for all builds. Compilation typo.
[tomato.git] / release / src-rt-6.x.4708 / linux / linux-2.6.36 / net / ipv4 / ipmr.c
blob75486f451ffa72e7b04715c6f5d5e34226e6eb7b
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
70 #endif
72 struct mr_table {
73 struct list_head list;
74 #ifdef CONFIG_NET_NS
75 struct net *net;
76 #endif
77 u32 id;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89 #endif
92 struct ipmr_rule {
93 struct fib_rule common;
96 struct ipmr_result {
97 struct mr_table *mrt;
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
104 static DEFINE_RWLOCK(mrt_lock);
107 * Multicast router control variables
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
120 In this case data path is free of exclusive locks at all.
123 static struct kmem_cache *mrt_cachep __read_mostly;
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129 static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
141 struct mr_table *mrt;
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
147 return NULL;
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
191 return 1;
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
201 return 0;
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
207 return 1;
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
234 static int __net_init ipmr_rules_init(struct net *net)
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
259 err2:
260 kfree(mrt);
261 err1:
262 fib_rules_unregister(ops);
263 return err;
266 static void __net_exit ipmr_rules_exit(struct net *net)
268 struct mr_table *mrt, *next;
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
272 kfree(mrt);
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
276 #else
277 #define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
280 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
282 return net->ipv4.mrt;
285 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
288 *mrt = net->ipv4.mrt;
289 return 0;
292 static int __net_init ipmr_rules_init(struct net *net)
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
298 static void __net_exit ipmr_rules_exit(struct net *net)
300 kfree(net->ipv4.mrt);
302 #endif
304 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
306 struct mr_table *mrt;
307 unsigned int i;
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
316 write_pnet(&mrt->net, net);
317 mrt->id = id;
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
328 #ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330 #endif
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333 #endif
334 return mrt;
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
339 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
341 struct net *net = dev_net(dev);
343 dev_close(dev);
345 dev = __dev_get_by_name(net, "tunl0");
346 if (dev) {
347 const struct net_device_ops *ops = dev->netdev_ops;
348 struct ifreq ifr;
349 struct ip_tunnel_parm p;
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
370 static
371 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
373 struct net_device *dev;
375 dev = __dev_get_by_name(net, "tunl0");
377 if (dev) {
378 const struct net_device_ops *ops = dev->netdev_ops;
379 int err;
380 struct ifreq ifr;
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
402 dev = NULL;
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
406 dev->flags |= IFF_MULTICAST;
408 in_dev = __in_dev_get_rtnl(dev);
409 if (in_dev == NULL)
410 goto failure;
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
415 if (dev_open(dev))
416 goto failure;
417 dev_hold(dev);
420 return dev;
422 failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
427 unregister_netdevice(dev);
428 return NULL;
431 #ifdef CONFIG_IP_PIMSM
433 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
435 struct net *net = dev_net(dev);
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
442 int err;
444 err = ipmr_fib_lookup(net, &fl, &mrt);
445 if (err < 0) {
446 kfree_skb(skb);
447 return err;
450 read_lock(&mrt_lock);
451 dev->stats.tx_bytes += skb->len;
452 dev->stats.tx_packets++;
453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
454 read_unlock(&mrt_lock);
455 kfree_skb(skb);
456 return NETDEV_TX_OK;
459 static const struct net_device_ops reg_vif_netdev_ops = {
460 .ndo_start_xmit = reg_vif_xmit,
463 static void reg_vif_setup(struct net_device *dev)
465 dev->type = ARPHRD_PIMREG;
466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
467 dev->flags = IFF_NOARP;
468 dev->netdev_ops = &reg_vif_netdev_ops,
469 dev->destructor = free_netdev;
470 dev->features |= NETIF_F_NETNS_LOCAL;
473 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
475 struct net_device *dev;
476 struct in_device *in_dev;
477 char name[IFNAMSIZ];
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
481 else
482 sprintf(name, "pimreg%u", mrt->id);
484 dev = alloc_netdev(0, name, reg_vif_setup);
486 if (dev == NULL)
487 return NULL;
489 dev_net_set(dev, net);
491 if (register_netdevice(dev)) {
492 free_netdev(dev);
493 return NULL;
495 dev->iflink = 0;
497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499 rcu_read_unlock();
500 goto failure;
503 ipv4_devconf_setall(in_dev);
504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
505 rcu_read_unlock();
507 if (dev_open(dev))
508 goto failure;
510 dev_hold(dev);
512 return dev;
514 failure:
515 /* allow the register to be completed before unregistering. */
516 rtnl_unlock();
517 rtnl_lock();
519 unregister_netdevice(dev);
520 return NULL;
522 #endif
525 * Delete a VIF entry
526 * @notify: Set to 1, if the caller is a notifier_call
529 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
530 struct list_head *head)
532 struct vif_device *v;
533 struct net_device *dev;
534 struct in_device *in_dev;
536 if (vifi < 0 || vifi >= mrt->maxvif)
537 return -EADDRNOTAVAIL;
539 v = &mrt->vif_table[vifi];
541 write_lock_bh(&mrt_lock);
542 dev = v->dev;
543 v->dev = NULL;
545 if (!dev) {
546 write_unlock_bh(&mrt_lock);
547 return -EADDRNOTAVAIL;
550 #ifdef CONFIG_IP_PIMSM
551 if (vifi == mrt->mroute_reg_vif_num)
552 mrt->mroute_reg_vif_num = -1;
553 #endif
555 if (vifi+1 == mrt->maxvif) {
556 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) {
558 if (VIF_EXISTS(mrt, tmp))
559 break;
561 mrt->maxvif = tmp+1;
564 write_unlock_bh(&mrt_lock);
566 dev_set_allmulti(dev, -1);
568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570 ip_rt_multicast_event(in_dev);
573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
574 unregister_netdevice_queue(dev, head);
576 dev_put(dev);
577 return 0;
580 static inline void ipmr_cache_free(struct mfc_cache *c)
582 kmem_cache_free(mrt_cachep, c);
585 /* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers.
589 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
591 struct net *net = read_pnet(&mrt->net);
592 struct sk_buff *skb;
593 struct nlmsgerr *e;
595 atomic_dec(&mrt->cache_resolve_queue_len);
597 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
598 if (ip_hdr(skb)->version == 0) {
599 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
600 nlh->nlmsg_type = NLMSG_ERROR;
601 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
602 skb_trim(skb, nlh->nlmsg_len);
603 e = NLMSG_DATA(nlh);
604 e->error = -ETIMEDOUT;
605 memset(&e->msg, 0, sizeof(e->msg));
607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608 } else
609 kfree_skb(skb);
612 ipmr_cache_free(c);
616 /* Timer process for the unresolved queue. */
618 static void ipmr_expire_process(unsigned long arg)
620 struct mr_table *mrt = (struct mr_table *)arg;
621 unsigned long now;
622 unsigned long expires;
623 struct mfc_cache *c, *next;
625 if (!spin_trylock(&mfc_unres_lock)) {
626 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
627 return;
630 if (list_empty(&mrt->mfc_unres_queue))
631 goto out;
633 now = jiffies;
634 expires = 10*HZ;
636 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
637 if (time_after(c->mfc_un.unres.expires, now)) {
638 unsigned long interval = c->mfc_un.unres.expires - now;
639 if (interval < expires)
640 expires = interval;
641 continue;
644 list_del(&c->list);
645 ipmr_destroy_unres(mrt, c);
648 if (!list_empty(&mrt->mfc_unres_queue))
649 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
651 out:
652 spin_unlock(&mfc_unres_lock);
655 /* Fill oifs list. It is called under write locked mrt_lock. */
657 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
658 unsigned char *ttls)
660 int vifi;
662 cache->mfc_un.res.minvif = MAXVIFS;
663 cache->mfc_un.res.maxvif = 0;
664 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
666 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
667 if (VIF_EXISTS(mrt, vifi) &&
668 ttls[vifi] && ttls[vifi] < 255) {
669 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
670 if (cache->mfc_un.res.minvif > vifi)
671 cache->mfc_un.res.minvif = vifi;
672 if (cache->mfc_un.res.maxvif <= vifi)
673 cache->mfc_un.res.maxvif = vifi + 1;
678 static int vif_add(struct net *net, struct mr_table *mrt,
679 struct vifctl *vifc, int mrtsock)
681 int vifi = vifc->vifc_vifi;
682 struct vif_device *v = &mrt->vif_table[vifi];
683 struct net_device *dev;
684 struct in_device *in_dev;
685 int err;
687 /* Is vif busy ? */
688 if (VIF_EXISTS(mrt, vifi))
689 return -EADDRINUSE;
691 switch (vifc->vifc_flags) {
692 #ifdef CONFIG_IP_PIMSM
693 case VIFF_REGISTER:
695 * Special Purpose VIF in PIM
696 * All the packets will be sent to the daemon
698 if (mrt->mroute_reg_vif_num >= 0)
699 return -EADDRINUSE;
700 dev = ipmr_reg_vif(net, mrt);
701 if (!dev)
702 return -ENOBUFS;
703 err = dev_set_allmulti(dev, 1);
704 if (err) {
705 unregister_netdevice(dev);
706 dev_put(dev);
707 return err;
709 break;
710 #endif
711 case VIFF_TUNNEL:
712 dev = ipmr_new_tunnel(net, vifc);
713 if (!dev)
714 return -ENOBUFS;
715 err = dev_set_allmulti(dev, 1);
716 if (err) {
717 ipmr_del_tunnel(dev, vifc);
718 dev_put(dev);
719 return err;
721 break;
723 case VIFF_USE_IFINDEX:
724 case 0:
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727 if (dev && dev->ip_ptr == NULL) {
728 dev_put(dev);
729 return -EADDRNOTAVAIL;
731 } else
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
734 if (!dev)
735 return -EADDRNOTAVAIL;
736 err = dev_set_allmulti(dev, 1);
737 if (err) {
738 dev_put(dev);
739 return err;
741 break;
742 default:
743 return -EINVAL;
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
747 dev_put(dev);
748 return -EADDRNOTAVAIL;
750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
751 ip_rt_multicast_event(in_dev);
754 * Fill in the VIF structures
756 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr;
759 v->flags = vifc->vifc_flags;
760 if (!mrtsock)
761 v->flags |= VIFF_STATIC;
762 v->threshold = vifc->vifc_threshold;
763 v->bytes_in = 0;
764 v->bytes_out = 0;
765 v->pkt_in = 0;
766 v->pkt_out = 0;
767 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
769 v->link = dev->iflink;
771 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock);
773 v->dev = dev;
774 #ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER)
776 mrt->mroute_reg_vif_num = vifi;
777 #endif
778 if (vifi+1 > mrt->maxvif)
779 mrt->maxvif = vifi+1;
780 write_unlock_bh(&mrt_lock);
781 return 0;
784 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
785 __be32 origin,
786 __be32 mcastgrp)
788 int line = MFC_HASH(mcastgrp, origin);
789 struct mfc_cache *c;
791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793 return c;
795 return NULL;
799 * Allocate a multicast cache entry
801 static struct mfc_cache *ipmr_cache_alloc(void)
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804 if (c == NULL)
805 return NULL;
806 c->mfc_un.res.minvif = MAXVIFS;
807 return c;
810 static struct mfc_cache *ipmr_cache_alloc_unres(void)
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813 if (c == NULL)
814 return NULL;
815 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ;
817 return c;
821 * A cache entry has gone into a resolved state from queued
824 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825 struct mfc_cache *uc, struct mfc_cache *c)
827 struct sk_buff *skb;
828 struct nlmsgerr *e;
831 * Play the pending entries through our router
834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
835 if (ip_hdr(skb)->version == 0) {
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
839 nlh->nlmsg_len = (skb_tail_pointer(skb) -
840 (u8 *)nlh);
841 } else {
842 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
844 skb_trim(skb, nlh->nlmsg_len);
845 e = NLMSG_DATA(nlh);
846 e->error = -EMSGSIZE;
847 memset(&e->msg, 0, sizeof(e->msg));
850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
851 } else
852 ip_mr_forward(net, mrt, skb, c, 0);
857 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858 * expects the following bizarre scheme.
860 * Called under mrt_lock.
863 static int ipmr_cache_report(struct mr_table *mrt,
864 struct sk_buff *pkt, vifi_t vifi, int assert)
866 struct sk_buff *skb;
867 const int ihl = ip_hdrlen(pkt);
868 struct igmphdr *igmp;
869 struct igmpmsg *msg;
870 int ret;
872 #ifdef CONFIG_IP_PIMSM
873 if (assert == IGMPMSG_WHOLEPKT)
874 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
875 else
876 #endif
877 skb = alloc_skb(128, GFP_ATOMIC);
879 if (!skb)
880 return -ENOBUFS;
882 #ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT) {
884 /* Ugly, but we have no choice with this interface.
885 Duplicate old header, fix ihl, length etc.
886 And all this only to mangle msg->im_msgtype and
887 to set msg->im_mbz to "mbz" :-)
889 skb_push(skb, sizeof(struct iphdr));
890 skb_reset_network_header(skb);
891 skb_reset_transport_header(skb);
892 msg = (struct igmpmsg *)skb_network_header(skb);
893 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
894 msg->im_msgtype = IGMPMSG_WHOLEPKT;
895 msg->im_mbz = 0;
896 msg->im_vif = mrt->mroute_reg_vif_num;
897 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
898 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
899 sizeof(struct iphdr));
900 } else
901 #endif
905 * Copy the IP header
908 skb->network_header = skb->tail;
909 skb_put(skb, ihl);
910 skb_copy_to_linear_data(skb, pkt->data, ihl);
911 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
912 msg = (struct igmpmsg *)skb_network_header(skb);
913 msg->im_vif = vifi;
914 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
917 * Add our header
920 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
921 igmp->type =
922 msg->im_msgtype = assert;
923 igmp->code = 0;
924 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
925 skb->transport_header = skb->network_header;
928 if (mrt->mroute_sk == NULL) {
929 kfree_skb(skb);
930 return -EINVAL;
934 * Deliver to mrouted
936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
937 if (ret < 0) {
938 if (net_ratelimit())
939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
940 kfree_skb(skb);
943 return ret;
947 * Queue a packet for resolution. It gets locked cache entry!
950 static int
951 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
953 bool found = false;
954 int err;
955 struct mfc_cache *c;
956 const struct iphdr *iph = ip_hdr(skb);
958 spin_lock_bh(&mfc_unres_lock);
959 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
960 if (c->mfc_mcastgrp == iph->daddr &&
961 c->mfc_origin == iph->saddr) {
962 found = true;
963 break;
967 if (!found) {
969 * Create a new entry if allowable
972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
973 (c = ipmr_cache_alloc_unres()) == NULL) {
974 spin_unlock_bh(&mfc_unres_lock);
976 kfree_skb(skb);
977 return -ENOBUFS;
981 * Fill in the new cache entry
983 c->mfc_parent = -1;
984 c->mfc_origin = iph->saddr;
985 c->mfc_mcastgrp = iph->daddr;
988 * Reflect first query at mrouted.
990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
991 if (err < 0) {
992 /* If the report failed throw the cache entry
993 out - Brad Parker
995 spin_unlock_bh(&mfc_unres_lock);
997 ipmr_cache_free(c);
998 kfree_skb(skb);
999 return err;
1002 atomic_inc(&mrt->cache_resolve_queue_len);
1003 list_add(&c->list, &mrt->mfc_unres_queue);
1005 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1010 * See if we can append the packet
1012 if (c->mfc_un.unres.unresolved.qlen>3) {
1013 kfree_skb(skb);
1014 err = -ENOBUFS;
1015 } else {
1016 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1017 err = 0;
1020 spin_unlock_bh(&mfc_unres_lock);
1021 return err;
1025 * MFC cache manipulation by user space mroute daemon
1028 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1030 int line;
1031 struct mfc_cache *c, *next;
1033 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038 write_lock_bh(&mrt_lock);
1039 list_del(&c->list);
1040 write_unlock_bh(&mrt_lock);
1042 ipmr_cache_free(c);
1043 return 0;
1046 return -ENOENT;
1049 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1050 struct mfcctl *mfc, int mrtsock)
1052 bool found = false;
1053 int line;
1054 struct mfc_cache *uc, *c;
1056 if (mfc->mfcc_parent >= MAXVIFS)
1057 return -ENFILE;
1059 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1061 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1062 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1063 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1064 found = true;
1065 break;
1069 if (found) {
1070 write_lock_bh(&mrt_lock);
1071 c->mfc_parent = mfc->mfcc_parent;
1072 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1073 if (!mrtsock)
1074 c->mfc_flags |= MFC_STATIC;
1075 write_unlock_bh(&mrt_lock);
1076 return 0;
1079 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1080 return -EINVAL;
1082 c = ipmr_cache_alloc();
1083 if (c == NULL)
1084 return -ENOMEM;
1086 c->mfc_origin = mfc->mfcc_origin.s_addr;
1087 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1088 c->mfc_parent = mfc->mfcc_parent;
1089 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1090 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC;
1093 write_lock_bh(&mrt_lock);
1094 list_add(&c->list, &mrt->mfc_cache_array[line]);
1095 write_unlock_bh(&mrt_lock);
1098 * Check to see if we resolved a queued list. If so we
1099 * need to send on the frames and tidy up.
1101 found = false;
1102 spin_lock_bh(&mfc_unres_lock);
1103 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1104 if (uc->mfc_origin == c->mfc_origin &&
1105 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1106 list_del(&uc->list);
1107 atomic_dec(&mrt->cache_resolve_queue_len);
1108 found = true;
1109 break;
1112 if (list_empty(&mrt->mfc_unres_queue))
1113 del_timer(&mrt->ipmr_expire_timer);
1114 spin_unlock_bh(&mfc_unres_lock);
1116 if (found) {
1117 ipmr_cache_resolve(net, mrt, uc, c);
1118 ipmr_cache_free(uc);
1120 return 0;
1124 * Close the multicast socket, and clear the vif tables etc
1127 static void mroute_clean_tables(struct mr_table *mrt)
1129 int i;
1130 LIST_HEAD(list);
1131 struct mfc_cache *c, *next;
1134 * Shut down all active vif entries
1136 for (i = 0; i < mrt->maxvif; i++) {
1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1138 vif_delete(mrt, i, 0, &list);
1140 unregister_netdevice_many(&list);
1143 * Wipe the cache
1145 for (i = 0; i < MFC_LINES; i++) {
1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1147 if (c->mfc_flags&MFC_STATIC)
1148 continue;
1149 write_lock_bh(&mrt_lock);
1150 list_del(&c->list);
1151 write_unlock_bh(&mrt_lock);
1153 ipmr_cache_free(c);
1157 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1158 spin_lock_bh(&mfc_unres_lock);
1159 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1160 list_del(&c->list);
1161 ipmr_destroy_unres(mrt, c);
1163 spin_unlock_bh(&mfc_unres_lock);
1167 static void mrtsock_destruct(struct sock *sk)
1169 struct net *net = sock_net(sk);
1170 struct mr_table *mrt;
1172 rtnl_lock();
1173 ipmr_for_each_table(mrt, net) {
1174 if (sk == mrt->mroute_sk) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1177 write_lock_bh(&mrt_lock);
1178 mrt->mroute_sk = NULL;
1179 write_unlock_bh(&mrt_lock);
1181 mroute_clean_tables(mrt);
1184 rtnl_unlock();
1188 * Socket options and virtual interface manipulation. The whole
1189 * virtual interface system is a complete heap, but unfortunately
1190 * that's how BSD mrouted happens to think. Maybe one day with a proper
1191 * MOSPF/PIM router set up we can clean this up.
1194 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1196 int ret;
1197 struct vifctl vif;
1198 struct mfcctl mfc;
1199 struct net *net = sock_net(sk);
1200 struct mr_table *mrt;
1202 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1203 if (mrt == NULL)
1204 return -ENOENT;
1206 if (optname != MRT_INIT) {
1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1208 return -EACCES;
1211 switch (optname) {
1212 case MRT_INIT:
1213 if (sk->sk_type != SOCK_RAW ||
1214 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1215 return -EOPNOTSUPP;
1216 if (optlen != sizeof(int))
1217 return -ENOPROTOOPT;
1219 rtnl_lock();
1220 if (mrt->mroute_sk) {
1221 rtnl_unlock();
1222 return -EADDRINUSE;
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) {
1227 write_lock_bh(&mrt_lock);
1228 mrt->mroute_sk = sk;
1229 write_unlock_bh(&mrt_lock);
1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1233 rtnl_unlock();
1234 return ret;
1235 case MRT_DONE:
1236 if (sk != mrt->mroute_sk)
1237 return -EACCES;
1238 return ip_ra_control(sk, 0, NULL);
1239 case MRT_ADD_VIF:
1240 case MRT_DEL_VIF:
1241 if (optlen != sizeof(vif))
1242 return -EINVAL;
1243 if (copy_from_user(&vif, optval, sizeof(vif)))
1244 return -EFAULT;
1245 if (vif.vifc_vifi >= MAXVIFS)
1246 return -ENFILE;
1247 rtnl_lock();
1248 if (optname == MRT_ADD_VIF) {
1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1250 } else {
1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1253 rtnl_unlock();
1254 return ret;
1257 * Manipulate the forwarding caches. These live
1258 * in a sort of kernel/user symbiosis.
1260 case MRT_ADD_MFC:
1261 case MRT_DEL_MFC:
1262 if (optlen != sizeof(mfc))
1263 return -EINVAL;
1264 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1265 return -EFAULT;
1266 rtnl_lock();
1267 if (optname == MRT_DEL_MFC)
1268 ret = ipmr_mfc_delete(mrt, &mfc);
1269 else
1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1271 rtnl_unlock();
1272 return ret;
1274 * Control PIM assert.
1276 case MRT_ASSERT:
1278 int v;
1279 if (get_user(v,(int __user *)optval))
1280 return -EFAULT;
1281 mrt->mroute_do_assert = (v) ? 1 : 0;
1282 return 0;
1284 #ifdef CONFIG_IP_PIMSM
1285 case MRT_PIM:
1287 int v;
1289 if (get_user(v,(int __user *)optval))
1290 return -EFAULT;
1291 v = (v) ? 1 : 0;
1293 rtnl_lock();
1294 ret = 0;
1295 if (v != mrt->mroute_do_pim) {
1296 mrt->mroute_do_pim = v;
1297 mrt->mroute_do_assert = v;
1299 rtnl_unlock();
1300 return ret;
1302 #endif
1303 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1304 case MRT_TABLE:
1306 u32 v;
1308 if (optlen != sizeof(u32))
1309 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1315 rtnl_lock();
1316 ret = 0;
1317 if (!ipmr_new_table(net, v))
1318 ret = -ENOMEM;
1319 raw_sk(sk)->ipmr_table = v;
1320 rtnl_unlock();
1321 return ret;
1323 #endif
1325 * Spurious command, or MRT_VERSION which you cannot
1326 * set.
1328 default:
1329 return -ENOPROTOOPT;
1334 * Getsock opt support for the multicast routing system.
1337 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1339 int olr;
1340 int val;
1341 struct net *net = sock_net(sk);
1342 struct mr_table *mrt;
1344 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1345 if (mrt == NULL)
1346 return -ENOENT;
1348 if (optname != MRT_VERSION &&
1349 #ifdef CONFIG_IP_PIMSM
1350 optname!=MRT_PIM &&
1351 #endif
1352 optname!=MRT_ASSERT)
1353 return -ENOPROTOOPT;
1355 if (get_user(olr, optlen))
1356 return -EFAULT;
1358 olr = min_t(unsigned int, olr, sizeof(int));
1359 if (olr < 0)
1360 return -EINVAL;
1362 if (put_user(olr, optlen))
1363 return -EFAULT;
1364 if (optname == MRT_VERSION)
1365 val = 0x0305;
1366 #ifdef CONFIG_IP_PIMSM
1367 else if (optname == MRT_PIM)
1368 val = mrt->mroute_do_pim;
1369 #endif
1370 else
1371 val = mrt->mroute_do_assert;
1372 if (copy_to_user(optval, &val, olr))
1373 return -EFAULT;
1374 return 0;
1378 * The IP multicast ioctl support routines.
1381 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1383 struct sioc_sg_req sr;
1384 struct sioc_vif_req vr;
1385 struct vif_device *vif;
1386 struct mfc_cache *c;
1387 struct net *net = sock_net(sk);
1388 struct mr_table *mrt;
1390 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1391 if (mrt == NULL)
1392 return -ENOENT;
1394 switch (cmd) {
1395 case SIOCGETVIFCNT:
1396 if (copy_from_user(&vr, arg, sizeof(vr)))
1397 return -EFAULT;
1398 if (vr.vifi >= mrt->maxvif)
1399 return -EINVAL;
1400 read_lock(&mrt_lock);
1401 vif = &mrt->vif_table[vr.vifi];
1402 if (VIF_EXISTS(mrt, vr.vifi)) {
1403 vr.icount = vif->pkt_in;
1404 vr.ocount = vif->pkt_out;
1405 vr.ibytes = vif->bytes_in;
1406 vr.obytes = vif->bytes_out;
1407 read_unlock(&mrt_lock);
1409 if (copy_to_user(arg, &vr, sizeof(vr)))
1410 return -EFAULT;
1411 return 0;
1413 read_unlock(&mrt_lock);
1414 return -EADDRNOTAVAIL;
1415 case SIOCGETSGCNT:
1416 if (copy_from_user(&sr, arg, sizeof(sr)))
1417 return -EFAULT;
1419 read_lock(&mrt_lock);
1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1421 if (c) {
1422 sr.pktcnt = c->mfc_un.res.pkt;
1423 sr.bytecnt = c->mfc_un.res.bytes;
1424 sr.wrong_if = c->mfc_un.res.wrong_if;
1425 read_unlock(&mrt_lock);
1427 if (copy_to_user(arg, &sr, sizeof(sr)))
1428 return -EFAULT;
1429 return 0;
1431 read_unlock(&mrt_lock);
1432 return -EADDRNOTAVAIL;
1433 default:
1434 return -ENOIOCTLCMD;
1439 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1441 struct net_device *dev = ptr;
1442 struct net *net = dev_net(dev);
1443 struct mr_table *mrt;
1444 struct vif_device *v;
1445 int ct;
1446 LIST_HEAD(list);
1448 if (event != NETDEV_UNREGISTER)
1449 return NOTIFY_DONE;
1451 ipmr_for_each_table(mrt, net) {
1452 v = &mrt->vif_table[0];
1453 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1454 if (v->dev == dev)
1455 vif_delete(mrt, ct, 1, &list);
1458 unregister_netdevice_many(&list);
1459 return NOTIFY_DONE;
1463 static struct notifier_block ip_mr_notifier = {
1464 .notifier_call = ipmr_device_event,
1468 * Encapsulate a packet by attaching a valid IPIP header to it.
1469 * This avoids tunnel drivers and other mess and gives us the speed so
1470 * important for multicast video.
1473 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1475 struct iphdr *iph;
1476 struct iphdr *old_iph = ip_hdr(skb);
1478 skb_push(skb, sizeof(struct iphdr));
1479 skb->transport_header = skb->network_header;
1480 skb_reset_network_header(skb);
1481 iph = ip_hdr(skb);
1483 iph->version = 4;
1484 iph->tos = old_iph->tos;
1485 iph->ttl = old_iph->ttl;
1486 iph->frag_off = 0;
1487 iph->daddr = daddr;
1488 iph->saddr = saddr;
1489 iph->protocol = IPPROTO_IPIP;
1490 iph->ihl = 5;
1491 iph->tot_len = htons(skb->len);
1492 ip_select_ident(iph, skb_dst(skb), NULL);
1493 ip_send_check(iph);
1495 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1496 nf_reset(skb);
1499 static inline int ipmr_forward_finish(struct sk_buff *skb)
1501 struct ip_options * opt = &(IPCB(skb)->opt);
1503 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1505 if (unlikely(opt->optlen))
1506 ip_forward_options(skb);
1508 return dst_output(skb);
1512 * Processing handlers for ipmr_forward
1515 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1516 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1518 const struct iphdr *iph = ip_hdr(skb);
1519 struct vif_device *vif = &mrt->vif_table[vifi];
1520 struct net_device *dev;
1521 struct rtable *rt;
1522 int encap = 0;
1524 if (vif->dev == NULL)
1525 goto out_free;
1527 #ifdef CONFIG_IP_PIMSM
1528 if (vif->flags & VIFF_REGISTER) {
1529 vif->pkt_out++;
1530 vif->bytes_out += skb->len;
1531 vif->dev->stats.tx_bytes += skb->len;
1532 vif->dev->stats.tx_packets++;
1533 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1534 goto out_free;
1536 #endif
1538 if (vif->flags&VIFF_TUNNEL) {
1539 struct flowi fl = { .oif = vif->link,
1540 .nl_u = { .ip4_u =
1541 { .daddr = vif->remote,
1542 .saddr = vif->local,
1543 .tos = RT_TOS(iph->tos) } },
1544 .proto = IPPROTO_IPIP };
1545 if (ip_route_output_key(net, &rt, &fl))
1546 goto out_free;
1547 encap = sizeof(struct iphdr);
1548 } else {
1549 struct flowi fl = { .oif = vif->link,
1550 .nl_u = { .ip4_u =
1551 { .daddr = iph->daddr,
1552 .tos = RT_TOS(iph->tos) } },
1553 .proto = IPPROTO_IPIP };
1554 if (ip_route_output_key(net, &rt, &fl))
1555 goto out_free;
1558 dev = rt->dst.dev;
1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1561 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear
1563 to blackhole.
1566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1567 ip_rt_put(rt);
1568 goto out_free;
1571 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1573 if (skb_cow(skb, encap)) {
1574 ip_rt_put(rt);
1575 goto out_free;
1578 vif->pkt_out++;
1579 vif->bytes_out += skb->len;
1581 skb_dst_drop(skb);
1582 skb_dst_set(skb, &rt->dst);
1583 ip_decrease_ttl(ip_hdr(skb));
1585 if (vif->flags & VIFF_TUNNEL) {
1586 ip_encap(skb, vif->local, vif->remote);
1587 vif->dev->stats.tx_packets++;
1588 vif->dev->stats.tx_bytes += skb->len;
1591 IPCB(skb)->flags |= IPSKB_FORWARDED;
1594 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1595 * not only before forwarding, but after forwarding on all output
1596 * interfaces. It is clear, if mrouter runs a multicasting
1597 * program, it should receive packets not depending to what interface
1598 * program is joined.
1599 * If we will not make it, the program will have to join on all
1600 * interfaces. On the other hand, multihoming host (or router, but
1601 * not mrouter) cannot join to more than one interface - it will
1602 * result in receiving multiple packets.
1604 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1605 ipmr_forward_finish);
1606 return;
1608 out_free:
1609 kfree_skb(skb);
1612 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1614 int ct;
1616 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1617 if (mrt->vif_table[ct].dev == dev)
1618 break;
1620 return ct;
1623 /* "local" means that we should preserve one skb (for local delivery) */
1625 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1626 struct sk_buff *skb, struct mfc_cache *cache,
1627 int local)
1629 int psend = -1;
1630 int vif, ct;
1632 vif = cache->mfc_parent;
1633 cache->mfc_un.res.pkt++;
1634 cache->mfc_un.res.bytes += skb->len;
1637 * Wrong interface: drop packet and (maybe) send PIM assert.
1639 if (mrt->vif_table[vif].dev != skb->dev) {
1640 int true_vifi;
1642 if (skb_rtable(skb)->fl.iif == 0) {
1643 goto dont_forward;
1646 cache->mfc_un.res.wrong_if++;
1647 true_vifi = ipmr_find_vif(mrt, skb->dev);
1649 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1650 /* pimsm uses asserts, when switching from RPT to SPT,
1651 so that we cannot check that packet arrived on an oif.
1652 It is bad, but otherwise we would need to move pretty
1653 large chunk of pimd to kernel. Ough... --ANK
1655 (mrt->mroute_do_pim ||
1656 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1657 time_after(jiffies,
1658 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1659 cache->mfc_un.res.last_assert = jiffies;
1660 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1662 goto dont_forward;
1665 mrt->vif_table[vif].pkt_in++;
1666 mrt->vif_table[vif].bytes_in += skb->len;
1669 * Forward the frame
1671 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1672 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1673 if (psend != -1) {
1674 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1675 if (skb2)
1676 ipmr_queue_xmit(net, mrt, skb2, cache,
1677 psend);
1679 psend = ct;
1682 if (psend != -1) {
1683 if (local) {
1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685 if (skb2)
1686 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1687 } else {
1688 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1689 return 0;
1693 dont_forward:
1694 if (!local)
1695 kfree_skb(skb);
1696 return 0;
1701 * Multicast packets for forwarding arrive here
1704 int ip_mr_input(struct sk_buff *skb)
1706 struct mfc_cache *cache;
1707 struct net *net = dev_net(skb->dev);
1708 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1709 struct mr_table *mrt;
1710 int err;
1712 /* Packet is looped back after forward, it should not be
1713 forwarded second time, but still can be delivered locally.
1715 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1716 goto dont_forward;
1718 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1719 if (err < 0) {
1720 kfree_skb(skb);
1721 return err;
1724 if (!local) {
1725 if (IPCB(skb)->opt.router_alert) {
1726 if (ip_call_ra_chain(skb))
1727 return 0;
1728 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1729 /* IGMPv1 (and broken IGMPv2 implementations sort of
1730 Cisco IOS <= 11.2(8)) do not put router alert
1731 option to IGMP packets destined to routable
1732 groups. It is very bad, because it means
1733 that we can forward NO IGMP messages.
1735 read_lock(&mrt_lock);
1736 if (mrt->mroute_sk) {
1737 nf_reset(skb);
1738 raw_rcv(mrt->mroute_sk, skb);
1739 read_unlock(&mrt_lock);
1740 return 0;
1742 read_unlock(&mrt_lock);
1746 read_lock(&mrt_lock);
1747 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1750 * No usable cache entry
1752 if (cache == NULL) {
1753 int vif;
1755 if (local) {
1756 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1757 ip_local_deliver(skb);
1758 if (skb2 == NULL) {
1759 read_unlock(&mrt_lock);
1760 return -ENOBUFS;
1762 skb = skb2;
1765 vif = ipmr_find_vif(mrt, skb->dev);
1766 if (vif >= 0) {
1767 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1768 read_unlock(&mrt_lock);
1770 return err2;
1772 read_unlock(&mrt_lock);
1773 kfree_skb(skb);
1774 return -ENODEV;
1777 ip_mr_forward(net, mrt, skb, cache, local);
1779 read_unlock(&mrt_lock);
1781 if (local)
1782 return ip_local_deliver(skb);
1784 return 0;
1786 dont_forward:
1787 if (local)
1788 return ip_local_deliver(skb);
1789 kfree_skb(skb);
1790 return 0;
1793 #ifdef CONFIG_IP_PIMSM
1794 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1795 unsigned int pimlen)
1797 struct net_device *reg_dev = NULL;
1798 struct iphdr *encap;
1800 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1802 Check that:
1803 a. packet is really destinted to a multicast group
1804 b. packet is not a NULL-REGISTER
1805 c. packet is not truncated
1807 if (!ipv4_is_multicast(encap->daddr) ||
1808 encap->tot_len == 0 ||
1809 ntohs(encap->tot_len) + pimlen > skb->len)
1810 return 1;
1812 read_lock(&mrt_lock);
1813 if (mrt->mroute_reg_vif_num >= 0)
1814 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1815 if (reg_dev)
1816 dev_hold(reg_dev);
1817 read_unlock(&mrt_lock);
1819 if (reg_dev == NULL)
1820 return 1;
1822 skb->mac_header = skb->network_header;
1823 skb_pull(skb, (u8*)encap - skb->data);
1824 skb_reset_network_header(skb);
1825 skb->protocol = htons(ETH_P_IP);
1826 skb->ip_summed = 0;
1827 skb->pkt_type = PACKET_HOST;
1829 skb_tunnel_rx(skb, reg_dev);
1831 netif_rx(skb);
1832 dev_put(reg_dev);
1834 return 0;
1836 #endif
1838 #ifdef CONFIG_IP_PIMSM_V1
1840 * Handle IGMP messages of PIMv1
1843 int pim_rcv_v1(struct sk_buff * skb)
1845 struct igmphdr *pim;
1846 struct net *net = dev_net(skb->dev);
1847 struct mr_table *mrt;
1849 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1850 goto drop;
1852 pim = igmp_hdr(skb);
1854 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1855 goto drop;
1857 if (!mrt->mroute_do_pim ||
1858 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1859 goto drop;
1861 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1862 drop:
1863 kfree_skb(skb);
1865 return 0;
1867 #endif
1869 #ifdef CONFIG_IP_PIMSM_V2
1870 static int pim_rcv(struct sk_buff * skb)
1872 struct pimreghdr *pim;
1873 struct net *net = dev_net(skb->dev);
1874 struct mr_table *mrt;
1876 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1877 goto drop;
1879 pim = (struct pimreghdr *)skb_transport_header(skb);
1880 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1881 (pim->flags&PIM_NULL_REGISTER) ||
1882 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1883 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1884 goto drop;
1886 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1887 goto drop;
1889 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1890 drop:
1891 kfree_skb(skb);
1893 return 0;
1895 #endif
1897 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1898 struct mfc_cache *c, struct rtmsg *rtm)
1900 int ct;
1901 struct rtnexthop *nhp;
1902 u8 *b = skb_tail_pointer(skb);
1903 struct rtattr *mp_head;
1905 /* If cache is unresolved, don't try to parse IIF and OIF */
1906 if (c->mfc_parent >= MAXVIFS)
1907 return -ENOENT;
1909 if (VIF_EXISTS(mrt, c->mfc_parent))
1910 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1912 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1914 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1915 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1916 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1917 goto rtattr_failure;
1918 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1919 nhp->rtnh_flags = 0;
1920 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1921 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1922 nhp->rtnh_len = sizeof(*nhp);
1925 mp_head->rta_type = RTA_MULTIPATH;
1926 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1927 rtm->rtm_type = RTN_MULTICAST;
1928 return 1;
1930 rtattr_failure:
1931 nlmsg_trim(skb, b);
1932 return -EMSGSIZE;
1935 int ipmr_get_route(struct net *net,
1936 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1938 int err;
1939 struct mr_table *mrt;
1940 struct mfc_cache *cache;
1941 struct rtable *rt = skb_rtable(skb);
1943 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1944 if (mrt == NULL)
1945 return -ENOENT;
1947 read_lock(&mrt_lock);
1948 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1950 if (cache == NULL) {
1951 struct sk_buff *skb2;
1952 struct iphdr *iph;
1953 struct net_device *dev;
1954 int vif;
1956 if (nowait) {
1957 read_unlock(&mrt_lock);
1958 return -EAGAIN;
1961 dev = skb->dev;
1962 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1963 read_unlock(&mrt_lock);
1964 return -ENODEV;
1966 skb2 = skb_clone(skb, GFP_ATOMIC);
1967 if (!skb2) {
1968 read_unlock(&mrt_lock);
1969 return -ENOMEM;
1972 skb_push(skb2, sizeof(struct iphdr));
1973 skb_reset_network_header(skb2);
1974 iph = ip_hdr(skb2);
1975 iph->ihl = sizeof(struct iphdr) >> 2;
1976 iph->saddr = rt->rt_src;
1977 iph->daddr = rt->rt_dst;
1978 iph->version = 0;
1979 err = ipmr_cache_unresolved(mrt, vif, skb2);
1980 read_unlock(&mrt_lock);
1981 return err;
1984 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1985 cache->mfc_flags |= MFC_NOTIFY;
1986 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1987 read_unlock(&mrt_lock);
1988 return err;
1991 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1992 u32 pid, u32 seq, struct mfc_cache *c)
1994 struct nlmsghdr *nlh;
1995 struct rtmsg *rtm;
1997 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
1998 if (nlh == NULL)
1999 return -EMSGSIZE;
2001 rtm = nlmsg_data(nlh);
2002 rtm->rtm_family = RTNL_FAMILY_IPMR;
2003 rtm->rtm_dst_len = 32;
2004 rtm->rtm_src_len = 32;
2005 rtm->rtm_tos = 0;
2006 rtm->rtm_table = mrt->id;
2007 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2008 rtm->rtm_type = RTN_MULTICAST;
2009 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2010 rtm->rtm_protocol = RTPROT_UNSPEC;
2011 rtm->rtm_flags = 0;
2013 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2014 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2016 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2017 goto nla_put_failure;
2019 return nlmsg_end(skb, nlh);
2021 nla_put_failure:
2022 nlmsg_cancel(skb, nlh);
2023 return -EMSGSIZE;
2026 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2028 struct net *net = sock_net(skb->sk);
2029 struct mr_table *mrt;
2030 struct mfc_cache *mfc;
2031 unsigned int t = 0, s_t;
2032 unsigned int h = 0, s_h;
2033 unsigned int e = 0, s_e;
2035 s_t = cb->args[0];
2036 s_h = cb->args[1];
2037 s_e = cb->args[2];
2039 read_lock(&mrt_lock);
2040 ipmr_for_each_table(mrt, net) {
2041 if (t < s_t)
2042 goto next_table;
2043 if (t > s_t)
2044 s_h = 0;
2045 for (h = s_h; h < MFC_LINES; h++) {
2046 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2047 if (e < s_e)
2048 goto next_entry;
2049 if (ipmr_fill_mroute(mrt, skb,
2050 NETLINK_CB(cb->skb).pid,
2051 cb->nlh->nlmsg_seq,
2052 mfc) < 0)
2053 goto done;
2054 next_entry:
2055 e++;
2057 e = s_e = 0;
2059 s_h = 0;
2060 next_table:
2061 t++;
2063 done:
2064 read_unlock(&mrt_lock);
2066 cb->args[2] = e;
2067 cb->args[1] = h;
2068 cb->args[0] = t;
2070 return skb->len;
2073 #ifdef CONFIG_PROC_FS
2075 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2077 struct ipmr_vif_iter {
2078 struct seq_net_private p;
2079 struct mr_table *mrt;
2080 int ct;
2083 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2084 struct ipmr_vif_iter *iter,
2085 loff_t pos)
2087 struct mr_table *mrt = iter->mrt;
2089 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2090 if (!VIF_EXISTS(mrt, iter->ct))
2091 continue;
2092 if (pos-- == 0)
2093 return &mrt->vif_table[iter->ct];
2095 return NULL;
2098 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2099 __acquires(mrt_lock)
2101 struct ipmr_vif_iter *iter = seq->private;
2102 struct net *net = seq_file_net(seq);
2103 struct mr_table *mrt;
2105 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2106 if (mrt == NULL)
2107 return ERR_PTR(-ENOENT);
2109 iter->mrt = mrt;
2111 read_lock(&mrt_lock);
2112 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2113 : SEQ_START_TOKEN;
2116 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2118 struct ipmr_vif_iter *iter = seq->private;
2119 struct net *net = seq_file_net(seq);
2120 struct mr_table *mrt = iter->mrt;
2122 ++*pos;
2123 if (v == SEQ_START_TOKEN)
2124 return ipmr_vif_seq_idx(net, iter, 0);
2126 while (++iter->ct < mrt->maxvif) {
2127 if (!VIF_EXISTS(mrt, iter->ct))
2128 continue;
2129 return &mrt->vif_table[iter->ct];
2131 return NULL;
2134 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2135 __releases(mrt_lock)
2137 read_unlock(&mrt_lock);
2140 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2142 struct ipmr_vif_iter *iter = seq->private;
2143 struct mr_table *mrt = iter->mrt;
2145 if (v == SEQ_START_TOKEN) {
2146 seq_puts(seq,
2147 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2148 } else {
2149 const struct vif_device *vif = v;
2150 const char *name = vif->dev ? vif->dev->name : "none";
2152 seq_printf(seq,
2153 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2154 vif - mrt->vif_table,
2155 name, vif->bytes_in, vif->pkt_in,
2156 vif->bytes_out, vif->pkt_out,
2157 vif->flags, vif->local, vif->remote);
2159 return 0;
2162 static const struct seq_operations ipmr_vif_seq_ops = {
2163 .start = ipmr_vif_seq_start,
2164 .next = ipmr_vif_seq_next,
2165 .stop = ipmr_vif_seq_stop,
2166 .show = ipmr_vif_seq_show,
2169 static int ipmr_vif_open(struct inode *inode, struct file *file)
2171 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2172 sizeof(struct ipmr_vif_iter));
2175 static const struct file_operations ipmr_vif_fops = {
2176 .owner = THIS_MODULE,
2177 .open = ipmr_vif_open,
2178 .read = seq_read,
2179 .llseek = seq_lseek,
2180 .release = seq_release_net,
2183 struct ipmr_mfc_iter {
2184 struct seq_net_private p;
2185 struct mr_table *mrt;
2186 struct list_head *cache;
2187 int ct;
2191 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2192 struct ipmr_mfc_iter *it, loff_t pos)
2194 struct mr_table *mrt = it->mrt;
2195 struct mfc_cache *mfc;
2197 read_lock(&mrt_lock);
2198 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2199 it->cache = &mrt->mfc_cache_array[it->ct];
2200 list_for_each_entry(mfc, it->cache, list)
2201 if (pos-- == 0)
2202 return mfc;
2204 read_unlock(&mrt_lock);
2206 spin_lock_bh(&mfc_unres_lock);
2207 it->cache = &mrt->mfc_unres_queue;
2208 list_for_each_entry(mfc, it->cache, list)
2209 if (pos-- == 0)
2210 return mfc;
2211 spin_unlock_bh(&mfc_unres_lock);
2213 it->cache = NULL;
2214 return NULL;
2218 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2220 struct ipmr_mfc_iter *it = seq->private;
2221 struct net *net = seq_file_net(seq);
2222 struct mr_table *mrt;
2224 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2225 if (mrt == NULL)
2226 return ERR_PTR(-ENOENT);
2228 it->mrt = mrt;
2229 it->cache = NULL;
2230 it->ct = 0;
2231 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2232 : SEQ_START_TOKEN;
2235 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2237 struct mfc_cache *mfc = v;
2238 struct ipmr_mfc_iter *it = seq->private;
2239 struct net *net = seq_file_net(seq);
2240 struct mr_table *mrt = it->mrt;
2242 ++*pos;
2244 if (v == SEQ_START_TOKEN)
2245 return ipmr_mfc_seq_idx(net, seq->private, 0);
2247 if (mfc->list.next != it->cache)
2248 return list_entry(mfc->list.next, struct mfc_cache, list);
2250 if (it->cache == &mrt->mfc_unres_queue)
2251 goto end_of_list;
2253 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2255 while (++it->ct < MFC_LINES) {
2256 it->cache = &mrt->mfc_cache_array[it->ct];
2257 if (list_empty(it->cache))
2258 continue;
2259 return list_first_entry(it->cache, struct mfc_cache, list);
2262 /* exhausted cache_array, show unresolved */
2263 read_unlock(&mrt_lock);
2264 it->cache = &mrt->mfc_unres_queue;
2265 it->ct = 0;
2267 spin_lock_bh(&mfc_unres_lock);
2268 if (!list_empty(it->cache))
2269 return list_first_entry(it->cache, struct mfc_cache, list);
2271 end_of_list:
2272 spin_unlock_bh(&mfc_unres_lock);
2273 it->cache = NULL;
2275 return NULL;
2278 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2280 struct ipmr_mfc_iter *it = seq->private;
2281 struct mr_table *mrt = it->mrt;
2283 if (it->cache == &mrt->mfc_unres_queue)
2284 spin_unlock_bh(&mfc_unres_lock);
2285 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2286 read_unlock(&mrt_lock);
2289 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2291 int n;
2293 if (v == SEQ_START_TOKEN) {
2294 seq_puts(seq,
2295 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2296 } else {
2297 const struct mfc_cache *mfc = v;
2298 const struct ipmr_mfc_iter *it = seq->private;
2299 const struct mr_table *mrt = it->mrt;
2301 seq_printf(seq, "%08X %08X %-3hd",
2302 (__force u32) mfc->mfc_mcastgrp,
2303 (__force u32) mfc->mfc_origin,
2304 mfc->mfc_parent);
2306 if (it->cache != &mrt->mfc_unres_queue) {
2307 seq_printf(seq, " %8lu %8lu %8lu",
2308 mfc->mfc_un.res.pkt,
2309 mfc->mfc_un.res.bytes,
2310 mfc->mfc_un.res.wrong_if);
2311 for (n = mfc->mfc_un.res.minvif;
2312 n < mfc->mfc_un.res.maxvif; n++ ) {
2313 if (VIF_EXISTS(mrt, n) &&
2314 mfc->mfc_un.res.ttls[n] < 255)
2315 seq_printf(seq,
2316 " %2d:%-3d",
2317 n, mfc->mfc_un.res.ttls[n]);
2319 } else {
2320 /* unresolved mfc_caches don't contain
2321 * pkt, bytes and wrong_if values
2323 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2325 seq_putc(seq, '\n');
2327 return 0;
2330 static const struct seq_operations ipmr_mfc_seq_ops = {
2331 .start = ipmr_mfc_seq_start,
2332 .next = ipmr_mfc_seq_next,
2333 .stop = ipmr_mfc_seq_stop,
2334 .show = ipmr_mfc_seq_show,
2337 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2339 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2340 sizeof(struct ipmr_mfc_iter));
2343 static const struct file_operations ipmr_mfc_fops = {
2344 .owner = THIS_MODULE,
2345 .open = ipmr_mfc_open,
2346 .read = seq_read,
2347 .llseek = seq_lseek,
2348 .release = seq_release_net,
2350 #endif
2352 #ifdef CONFIG_IP_PIMSM_V2
2353 static const struct net_protocol pim_protocol = {
2354 .handler = pim_rcv,
2355 .netns_ok = 1,
2357 #endif
2361 * Setup for IP multicast routing
2363 static int __net_init ipmr_net_init(struct net *net)
2365 int err;
2367 err = ipmr_rules_init(net);
2368 if (err < 0)
2369 goto fail;
2371 #ifdef CONFIG_PROC_FS
2372 err = -ENOMEM;
2373 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2374 goto proc_vif_fail;
2375 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2376 goto proc_cache_fail;
2377 #endif
2378 return 0;
2380 #ifdef CONFIG_PROC_FS
2381 proc_cache_fail:
2382 proc_net_remove(net, "ip_mr_vif");
2383 proc_vif_fail:
2384 ipmr_rules_exit(net);
2385 #endif
2386 fail:
2387 return err;
2390 static void __net_exit ipmr_net_exit(struct net *net)
2392 #ifdef CONFIG_PROC_FS
2393 proc_net_remove(net, "ip_mr_cache");
2394 proc_net_remove(net, "ip_mr_vif");
2395 #endif
2396 ipmr_rules_exit(net);
2399 static struct pernet_operations ipmr_net_ops = {
2400 .init = ipmr_net_init,
2401 .exit = ipmr_net_exit,
2404 int __init ip_mr_init(void)
2406 int err;
2408 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2409 sizeof(struct mfc_cache),
2410 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2411 NULL);
2412 if (!mrt_cachep)
2413 return -ENOMEM;
2415 err = register_pernet_subsys(&ipmr_net_ops);
2416 if (err)
2417 goto reg_pernet_fail;
2419 err = register_netdevice_notifier(&ip_mr_notifier);
2420 if (err)
2421 goto reg_notif_fail;
2422 #ifdef CONFIG_IP_PIMSM_V2
2423 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2424 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2425 err = -EAGAIN;
2426 goto add_proto_fail;
2428 #endif
2429 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2430 return 0;
2432 #ifdef CONFIG_IP_PIMSM_V2
2433 add_proto_fail:
2434 unregister_netdevice_notifier(&ip_mr_notifier);
2435 #endif
2436 reg_notif_fail:
2437 unregister_pernet_subsys(&ipmr_net_ops);
2438 reg_pernet_fail:
2439 kmem_cache_destroy(mrt_cachep);
2440 return err;