netns: ipmr: declare mroute_do_assert and mroute_do_pim per-namespace
[linux-2.6/mini2440.git] / net / ipv4 / ipmr.c
blobd6a28acc0683d0d225f03d1a3da5c40806e18132
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
74 static DEFINE_RWLOCK(mrt_lock);
77 * Multicast router control variables
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
87 /* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
92 In this case data path is free of exclusive locks at all.
95 static struct kmem_cache *mrt_cachep __read_mostly;
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
99 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101 #ifdef CONFIG_IP_PIMSM_V2
102 static struct net_protocol pim_protocol;
103 #endif
105 static struct timer_list ipmr_expire_timer;
107 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111 dev_close(dev);
113 dev = __dev_get_by_name(&init_net, "tunl0");
114 if (dev) {
115 const struct net_device_ops *ops = dev->netdev_ops;
116 struct ifreq ifr;
117 struct ip_tunnel_parm p;
119 memset(&p, 0, sizeof(p));
120 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122 p.iph.version = 4;
123 p.iph.ihl = 5;
124 p.iph.protocol = IPPROTO_IPIP;
125 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
128 if (ops->ndo_do_ioctl) {
129 mm_segment_t oldfs = get_fs();
131 set_fs(KERNEL_DS);
132 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133 set_fs(oldfs);
138 static
139 struct net_device *ipmr_new_tunnel(struct vifctl *v)
141 struct net_device *dev;
143 dev = __dev_get_by_name(&init_net, "tunl0");
145 if (dev) {
146 const struct net_device_ops *ops = dev->netdev_ops;
147 int err;
148 struct ifreq ifr;
149 struct ip_tunnel_parm p;
150 struct in_device *in_dev;
152 memset(&p, 0, sizeof(p));
153 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155 p.iph.version = 4;
156 p.iph.ihl = 5;
157 p.iph.protocol = IPPROTO_IPIP;
158 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
159 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
161 if (ops->ndo_do_ioctl) {
162 mm_segment_t oldfs = get_fs();
164 set_fs(KERNEL_DS);
165 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166 set_fs(oldfs);
167 } else
168 err = -EOPNOTSUPP;
170 dev = NULL;
172 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
173 dev->flags |= IFF_MULTICAST;
175 in_dev = __in_dev_get_rtnl(dev);
176 if (in_dev == NULL)
177 goto failure;
179 ipv4_devconf_setall(in_dev);
180 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
182 if (dev_open(dev))
183 goto failure;
184 dev_hold(dev);
187 return dev;
189 failure:
190 /* allow the register to be completed before unregistering. */
191 rtnl_unlock();
192 rtnl_lock();
194 unregister_netdevice(dev);
195 return NULL;
198 #ifdef CONFIG_IP_PIMSM
200 static int reg_vif_num = -1;
202 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
204 read_lock(&mrt_lock);
205 dev->stats.tx_bytes += skb->len;
206 dev->stats.tx_packets++;
207 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
208 read_unlock(&mrt_lock);
209 kfree_skb(skb);
210 return 0;
213 static const struct net_device_ops reg_vif_netdev_ops = {
214 .ndo_start_xmit = reg_vif_xmit,
217 static void reg_vif_setup(struct net_device *dev)
219 dev->type = ARPHRD_PIMREG;
220 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
221 dev->flags = IFF_NOARP;
222 dev->netdev_ops = &reg_vif_netdev_ops,
223 dev->destructor = free_netdev;
226 static struct net_device *ipmr_reg_vif(void)
228 struct net_device *dev;
229 struct in_device *in_dev;
231 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
233 if (dev == NULL)
234 return NULL;
236 if (register_netdevice(dev)) {
237 free_netdev(dev);
238 return NULL;
240 dev->iflink = 0;
242 rcu_read_lock();
243 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
244 rcu_read_unlock();
245 goto failure;
248 ipv4_devconf_setall(in_dev);
249 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
250 rcu_read_unlock();
252 if (dev_open(dev))
253 goto failure;
255 dev_hold(dev);
257 return dev;
259 failure:
260 /* allow the register to be completed before unregistering. */
261 rtnl_unlock();
262 rtnl_lock();
264 unregister_netdevice(dev);
265 return NULL;
267 #endif
270 * Delete a VIF entry
271 * @notify: Set to 1, if the caller is a notifier_call
274 static int vif_delete(int vifi, int notify)
276 struct vif_device *v;
277 struct net_device *dev;
278 struct in_device *in_dev;
280 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
281 return -EADDRNOTAVAIL;
283 v = &init_net.ipv4.vif_table[vifi];
285 write_lock_bh(&mrt_lock);
286 dev = v->dev;
287 v->dev = NULL;
289 if (!dev) {
290 write_unlock_bh(&mrt_lock);
291 return -EADDRNOTAVAIL;
294 #ifdef CONFIG_IP_PIMSM
295 if (vifi == reg_vif_num)
296 reg_vif_num = -1;
297 #endif
299 if (vifi+1 == init_net.ipv4.maxvif) {
300 int tmp;
301 for (tmp=vifi-1; tmp>=0; tmp--) {
302 if (VIF_EXISTS(&init_net, tmp))
303 break;
305 init_net.ipv4.maxvif = tmp+1;
308 write_unlock_bh(&mrt_lock);
310 dev_set_allmulti(dev, -1);
312 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
313 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
314 ip_rt_multicast_event(in_dev);
317 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
318 unregister_netdevice(dev);
320 dev_put(dev);
321 return 0;
324 static inline void ipmr_cache_free(struct mfc_cache *c)
326 release_net(mfc_net(c));
327 kmem_cache_free(mrt_cachep, c);
330 /* Destroy an unresolved cache entry, killing queued skbs
331 and reporting error to netlink readers.
334 static void ipmr_destroy_unres(struct mfc_cache *c)
336 struct sk_buff *skb;
337 struct nlmsgerr *e;
339 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
341 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
342 if (ip_hdr(skb)->version == 0) {
343 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
344 nlh->nlmsg_type = NLMSG_ERROR;
345 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
346 skb_trim(skb, nlh->nlmsg_len);
347 e = NLMSG_DATA(nlh);
348 e->error = -ETIMEDOUT;
349 memset(&e->msg, 0, sizeof(e->msg));
351 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
352 } else
353 kfree_skb(skb);
356 ipmr_cache_free(c);
360 /* Single timer process for all the unresolved queue. */
362 static void ipmr_expire_process(unsigned long dummy)
364 unsigned long now;
365 unsigned long expires;
366 struct mfc_cache *c, **cp;
368 if (!spin_trylock(&mfc_unres_lock)) {
369 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
370 return;
373 if (mfc_unres_queue == NULL)
374 goto out;
376 now = jiffies;
377 expires = 10*HZ;
378 cp = &mfc_unres_queue;
380 while ((c=*cp) != NULL) {
381 if (time_after(c->mfc_un.unres.expires, now)) {
382 unsigned long interval = c->mfc_un.unres.expires - now;
383 if (interval < expires)
384 expires = interval;
385 cp = &c->next;
386 continue;
389 *cp = c->next;
391 ipmr_destroy_unres(c);
394 if (mfc_unres_queue != NULL)
395 mod_timer(&ipmr_expire_timer, jiffies + expires);
397 out:
398 spin_unlock(&mfc_unres_lock);
401 /* Fill oifs list. It is called under write locked mrt_lock. */
403 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
405 int vifi;
407 cache->mfc_un.res.minvif = MAXVIFS;
408 cache->mfc_un.res.maxvif = 0;
409 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
411 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
412 if (VIF_EXISTS(&init_net, vifi) &&
413 ttls[vifi] && ttls[vifi] < 255) {
414 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
415 if (cache->mfc_un.res.minvif > vifi)
416 cache->mfc_un.res.minvif = vifi;
417 if (cache->mfc_un.res.maxvif <= vifi)
418 cache->mfc_un.res.maxvif = vifi + 1;
423 static int vif_add(struct vifctl *vifc, int mrtsock)
425 int vifi = vifc->vifc_vifi;
426 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
427 struct net_device *dev;
428 struct in_device *in_dev;
429 int err;
431 /* Is vif busy ? */
432 if (VIF_EXISTS(&init_net, vifi))
433 return -EADDRINUSE;
435 switch (vifc->vifc_flags) {
436 #ifdef CONFIG_IP_PIMSM
437 case VIFF_REGISTER:
439 * Special Purpose VIF in PIM
440 * All the packets will be sent to the daemon
442 if (reg_vif_num >= 0)
443 return -EADDRINUSE;
444 dev = ipmr_reg_vif();
445 if (!dev)
446 return -ENOBUFS;
447 err = dev_set_allmulti(dev, 1);
448 if (err) {
449 unregister_netdevice(dev);
450 dev_put(dev);
451 return err;
453 break;
454 #endif
455 case VIFF_TUNNEL:
456 dev = ipmr_new_tunnel(vifc);
457 if (!dev)
458 return -ENOBUFS;
459 err = dev_set_allmulti(dev, 1);
460 if (err) {
461 ipmr_del_tunnel(dev, vifc);
462 dev_put(dev);
463 return err;
465 break;
466 case 0:
467 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
468 if (!dev)
469 return -EADDRNOTAVAIL;
470 err = dev_set_allmulti(dev, 1);
471 if (err) {
472 dev_put(dev);
473 return err;
475 break;
476 default:
477 return -EINVAL;
480 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
481 return -EADDRNOTAVAIL;
482 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
483 ip_rt_multicast_event(in_dev);
486 * Fill in the VIF structures
488 v->rate_limit = vifc->vifc_rate_limit;
489 v->local = vifc->vifc_lcl_addr.s_addr;
490 v->remote = vifc->vifc_rmt_addr.s_addr;
491 v->flags = vifc->vifc_flags;
492 if (!mrtsock)
493 v->flags |= VIFF_STATIC;
494 v->threshold = vifc->vifc_threshold;
495 v->bytes_in = 0;
496 v->bytes_out = 0;
497 v->pkt_in = 0;
498 v->pkt_out = 0;
499 v->link = dev->ifindex;
500 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
501 v->link = dev->iflink;
503 /* And finish update writing critical data */
504 write_lock_bh(&mrt_lock);
505 v->dev = dev;
506 #ifdef CONFIG_IP_PIMSM
507 if (v->flags&VIFF_REGISTER)
508 reg_vif_num = vifi;
509 #endif
510 if (vifi+1 > init_net.ipv4.maxvif)
511 init_net.ipv4.maxvif = vifi+1;
512 write_unlock_bh(&mrt_lock);
513 return 0;
516 static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
518 int line = MFC_HASH(mcastgrp, origin);
519 struct mfc_cache *c;
521 for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
522 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
523 break;
525 return c;
529 * Allocate a multicast cache entry
531 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
533 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
534 if (c == NULL)
535 return NULL;
536 c->mfc_un.res.minvif = MAXVIFS;
537 mfc_net_set(c, net);
538 return c;
541 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
543 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
544 if (c == NULL)
545 return NULL;
546 skb_queue_head_init(&c->mfc_un.unres.unresolved);
547 c->mfc_un.unres.expires = jiffies + 10*HZ;
548 mfc_net_set(c, net);
549 return c;
553 * A cache entry has gone into a resolved state from queued
556 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
558 struct sk_buff *skb;
559 struct nlmsgerr *e;
562 * Play the pending entries through our router
565 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
566 if (ip_hdr(skb)->version == 0) {
567 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
569 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
570 nlh->nlmsg_len = (skb_tail_pointer(skb) -
571 (u8 *)nlh);
572 } else {
573 nlh->nlmsg_type = NLMSG_ERROR;
574 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
575 skb_trim(skb, nlh->nlmsg_len);
576 e = NLMSG_DATA(nlh);
577 e->error = -EMSGSIZE;
578 memset(&e->msg, 0, sizeof(e->msg));
581 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
582 } else
583 ip_mr_forward(skb, c, 0);
588 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
589 * expects the following bizarre scheme.
591 * Called under mrt_lock.
594 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
596 struct sk_buff *skb;
597 const int ihl = ip_hdrlen(pkt);
598 struct igmphdr *igmp;
599 struct igmpmsg *msg;
600 int ret;
602 #ifdef CONFIG_IP_PIMSM
603 if (assert == IGMPMSG_WHOLEPKT)
604 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
605 else
606 #endif
607 skb = alloc_skb(128, GFP_ATOMIC);
609 if (!skb)
610 return -ENOBUFS;
612 #ifdef CONFIG_IP_PIMSM
613 if (assert == IGMPMSG_WHOLEPKT) {
614 /* Ugly, but we have no choice with this interface.
615 Duplicate old header, fix ihl, length etc.
616 And all this only to mangle msg->im_msgtype and
617 to set msg->im_mbz to "mbz" :-)
619 skb_push(skb, sizeof(struct iphdr));
620 skb_reset_network_header(skb);
621 skb_reset_transport_header(skb);
622 msg = (struct igmpmsg *)skb_network_header(skb);
623 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
624 msg->im_msgtype = IGMPMSG_WHOLEPKT;
625 msg->im_mbz = 0;
626 msg->im_vif = reg_vif_num;
627 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
628 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
629 sizeof(struct iphdr));
630 } else
631 #endif
635 * Copy the IP header
638 skb->network_header = skb->tail;
639 skb_put(skb, ihl);
640 skb_copy_to_linear_data(skb, pkt->data, ihl);
641 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
642 msg = (struct igmpmsg *)skb_network_header(skb);
643 msg->im_vif = vifi;
644 skb->dst = dst_clone(pkt->dst);
647 * Add our header
650 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
651 igmp->type =
652 msg->im_msgtype = assert;
653 igmp->code = 0;
654 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
655 skb->transport_header = skb->network_header;
658 if (init_net.ipv4.mroute_sk == NULL) {
659 kfree_skb(skb);
660 return -EINVAL;
664 * Deliver to mrouted
666 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
667 if (ret < 0) {
668 if (net_ratelimit())
669 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
670 kfree_skb(skb);
673 return ret;
677 * Queue a packet for resolution. It gets locked cache entry!
680 static int
681 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
683 int err;
684 struct mfc_cache *c;
685 const struct iphdr *iph = ip_hdr(skb);
687 spin_lock_bh(&mfc_unres_lock);
688 for (c=mfc_unres_queue; c; c=c->next) {
689 if (net_eq(mfc_net(c), &init_net) &&
690 c->mfc_mcastgrp == iph->daddr &&
691 c->mfc_origin == iph->saddr)
692 break;
695 if (c == NULL) {
697 * Create a new entry if allowable
700 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
701 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
702 spin_unlock_bh(&mfc_unres_lock);
704 kfree_skb(skb);
705 return -ENOBUFS;
709 * Fill in the new cache entry
711 c->mfc_parent = -1;
712 c->mfc_origin = iph->saddr;
713 c->mfc_mcastgrp = iph->daddr;
716 * Reflect first query at mrouted.
718 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
719 /* If the report failed throw the cache entry
720 out - Brad Parker
722 spin_unlock_bh(&mfc_unres_lock);
724 ipmr_cache_free(c);
725 kfree_skb(skb);
726 return err;
729 atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
730 c->next = mfc_unres_queue;
731 mfc_unres_queue = c;
733 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
737 * See if we can append the packet
739 if (c->mfc_un.unres.unresolved.qlen>3) {
740 kfree_skb(skb);
741 err = -ENOBUFS;
742 } else {
743 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
744 err = 0;
747 spin_unlock_bh(&mfc_unres_lock);
748 return err;
752 * MFC cache manipulation by user space mroute daemon
755 static int ipmr_mfc_delete(struct mfcctl *mfc)
757 int line;
758 struct mfc_cache *c, **cp;
760 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
762 for (cp = &init_net.ipv4.mfc_cache_array[line];
763 (c = *cp) != NULL; cp = &c->next) {
764 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
765 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
766 write_lock_bh(&mrt_lock);
767 *cp = c->next;
768 write_unlock_bh(&mrt_lock);
770 ipmr_cache_free(c);
771 return 0;
774 return -ENOENT;
777 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
779 int line;
780 struct mfc_cache *uc, *c, **cp;
782 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
784 for (cp = &init_net.ipv4.mfc_cache_array[line];
785 (c = *cp) != NULL; cp = &c->next) {
786 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
787 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
788 break;
791 if (c != NULL) {
792 write_lock_bh(&mrt_lock);
793 c->mfc_parent = mfc->mfcc_parent;
794 ipmr_update_thresholds(c, mfc->mfcc_ttls);
795 if (!mrtsock)
796 c->mfc_flags |= MFC_STATIC;
797 write_unlock_bh(&mrt_lock);
798 return 0;
801 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
802 return -EINVAL;
804 c = ipmr_cache_alloc(&init_net);
805 if (c == NULL)
806 return -ENOMEM;
808 c->mfc_origin = mfc->mfcc_origin.s_addr;
809 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
810 c->mfc_parent = mfc->mfcc_parent;
811 ipmr_update_thresholds(c, mfc->mfcc_ttls);
812 if (!mrtsock)
813 c->mfc_flags |= MFC_STATIC;
815 write_lock_bh(&mrt_lock);
816 c->next = init_net.ipv4.mfc_cache_array[line];
817 init_net.ipv4.mfc_cache_array[line] = c;
818 write_unlock_bh(&mrt_lock);
821 * Check to see if we resolved a queued list. If so we
822 * need to send on the frames and tidy up.
824 spin_lock_bh(&mfc_unres_lock);
825 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
826 cp = &uc->next) {
827 if (net_eq(mfc_net(uc), &init_net) &&
828 uc->mfc_origin == c->mfc_origin &&
829 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
830 *cp = uc->next;
831 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
832 break;
835 if (mfc_unres_queue == NULL)
836 del_timer(&ipmr_expire_timer);
837 spin_unlock_bh(&mfc_unres_lock);
839 if (uc) {
840 ipmr_cache_resolve(uc, c);
841 ipmr_cache_free(uc);
843 return 0;
847 * Close the multicast socket, and clear the vif tables etc
850 static void mroute_clean_tables(struct sock *sk)
852 int i;
855 * Shut down all active vif entries
857 for (i = 0; i < init_net.ipv4.maxvif; i++) {
858 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
859 vif_delete(i, 0);
863 * Wipe the cache
865 for (i=0; i<MFC_LINES; i++) {
866 struct mfc_cache *c, **cp;
868 cp = &init_net.ipv4.mfc_cache_array[i];
869 while ((c = *cp) != NULL) {
870 if (c->mfc_flags&MFC_STATIC) {
871 cp = &c->next;
872 continue;
874 write_lock_bh(&mrt_lock);
875 *cp = c->next;
876 write_unlock_bh(&mrt_lock);
878 ipmr_cache_free(c);
882 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
883 struct mfc_cache *c, **cp;
885 spin_lock_bh(&mfc_unres_lock);
886 cp = &mfc_unres_queue;
887 while ((c = *cp) != NULL) {
888 if (!net_eq(mfc_net(c), &init_net)) {
889 cp = &c->next;
890 continue;
892 *cp = c->next;
894 ipmr_destroy_unres(c);
896 spin_unlock_bh(&mfc_unres_lock);
900 static void mrtsock_destruct(struct sock *sk)
902 rtnl_lock();
903 if (sk == init_net.ipv4.mroute_sk) {
904 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
906 write_lock_bh(&mrt_lock);
907 init_net.ipv4.mroute_sk = NULL;
908 write_unlock_bh(&mrt_lock);
910 mroute_clean_tables(sk);
912 rtnl_unlock();
916 * Socket options and virtual interface manipulation. The whole
917 * virtual interface system is a complete heap, but unfortunately
918 * that's how BSD mrouted happens to think. Maybe one day with a proper
919 * MOSPF/PIM router set up we can clean this up.
922 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
924 int ret;
925 struct vifctl vif;
926 struct mfcctl mfc;
928 if (optname != MRT_INIT) {
929 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
930 return -EACCES;
933 switch (optname) {
934 case MRT_INIT:
935 if (sk->sk_type != SOCK_RAW ||
936 inet_sk(sk)->num != IPPROTO_IGMP)
937 return -EOPNOTSUPP;
938 if (optlen != sizeof(int))
939 return -ENOPROTOOPT;
941 rtnl_lock();
942 if (init_net.ipv4.mroute_sk) {
943 rtnl_unlock();
944 return -EADDRINUSE;
947 ret = ip_ra_control(sk, 1, mrtsock_destruct);
948 if (ret == 0) {
949 write_lock_bh(&mrt_lock);
950 init_net.ipv4.mroute_sk = sk;
951 write_unlock_bh(&mrt_lock);
953 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
955 rtnl_unlock();
956 return ret;
957 case MRT_DONE:
958 if (sk != init_net.ipv4.mroute_sk)
959 return -EACCES;
960 return ip_ra_control(sk, 0, NULL);
961 case MRT_ADD_VIF:
962 case MRT_DEL_VIF:
963 if (optlen != sizeof(vif))
964 return -EINVAL;
965 if (copy_from_user(&vif, optval, sizeof(vif)))
966 return -EFAULT;
967 if (vif.vifc_vifi >= MAXVIFS)
968 return -ENFILE;
969 rtnl_lock();
970 if (optname == MRT_ADD_VIF) {
971 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
972 } else {
973 ret = vif_delete(vif.vifc_vifi, 0);
975 rtnl_unlock();
976 return ret;
979 * Manipulate the forwarding caches. These live
980 * in a sort of kernel/user symbiosis.
982 case MRT_ADD_MFC:
983 case MRT_DEL_MFC:
984 if (optlen != sizeof(mfc))
985 return -EINVAL;
986 if (copy_from_user(&mfc, optval, sizeof(mfc)))
987 return -EFAULT;
988 rtnl_lock();
989 if (optname == MRT_DEL_MFC)
990 ret = ipmr_mfc_delete(&mfc);
991 else
992 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
993 rtnl_unlock();
994 return ret;
996 * Control PIM assert.
998 case MRT_ASSERT:
1000 int v;
1001 if (get_user(v,(int __user *)optval))
1002 return -EFAULT;
1003 init_net.ipv4.mroute_do_assert = (v) ? 1 : 0;
1004 return 0;
1006 #ifdef CONFIG_IP_PIMSM
1007 case MRT_PIM:
1009 int v;
1011 if (get_user(v,(int __user *)optval))
1012 return -EFAULT;
1013 v = (v) ? 1 : 0;
1015 rtnl_lock();
1016 ret = 0;
1017 if (v != init_net.ipv4.mroute_do_pim) {
1018 init_net.ipv4.mroute_do_pim = v;
1019 init_net.ipv4.mroute_do_assert = v;
1020 #ifdef CONFIG_IP_PIMSM_V2
1021 if (init_net.ipv4.mroute_do_pim)
1022 ret = inet_add_protocol(&pim_protocol,
1023 IPPROTO_PIM);
1024 else
1025 ret = inet_del_protocol(&pim_protocol,
1026 IPPROTO_PIM);
1027 if (ret < 0)
1028 ret = -EAGAIN;
1029 #endif
1031 rtnl_unlock();
1032 return ret;
1034 #endif
1036 * Spurious command, or MRT_VERSION which you cannot
1037 * set.
1039 default:
1040 return -ENOPROTOOPT;
1045 * Getsock opt support for the multicast routing system.
1048 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1050 int olr;
1051 int val;
1053 if (optname != MRT_VERSION &&
1054 #ifdef CONFIG_IP_PIMSM
1055 optname!=MRT_PIM &&
1056 #endif
1057 optname!=MRT_ASSERT)
1058 return -ENOPROTOOPT;
1060 if (get_user(olr, optlen))
1061 return -EFAULT;
1063 olr = min_t(unsigned int, olr, sizeof(int));
1064 if (olr < 0)
1065 return -EINVAL;
1067 if (put_user(olr, optlen))
1068 return -EFAULT;
1069 if (optname == MRT_VERSION)
1070 val = 0x0305;
1071 #ifdef CONFIG_IP_PIMSM
1072 else if (optname == MRT_PIM)
1073 val = init_net.ipv4.mroute_do_pim;
1074 #endif
1075 else
1076 val = init_net.ipv4.mroute_do_assert;
1077 if (copy_to_user(optval, &val, olr))
1078 return -EFAULT;
1079 return 0;
1083 * The IP multicast ioctl support routines.
1086 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1088 struct sioc_sg_req sr;
1089 struct sioc_vif_req vr;
1090 struct vif_device *vif;
1091 struct mfc_cache *c;
1093 switch (cmd) {
1094 case SIOCGETVIFCNT:
1095 if (copy_from_user(&vr, arg, sizeof(vr)))
1096 return -EFAULT;
1097 if (vr.vifi >= init_net.ipv4.maxvif)
1098 return -EINVAL;
1099 read_lock(&mrt_lock);
1100 vif = &init_net.ipv4.vif_table[vr.vifi];
1101 if (VIF_EXISTS(&init_net, vr.vifi)) {
1102 vr.icount = vif->pkt_in;
1103 vr.ocount = vif->pkt_out;
1104 vr.ibytes = vif->bytes_in;
1105 vr.obytes = vif->bytes_out;
1106 read_unlock(&mrt_lock);
1108 if (copy_to_user(arg, &vr, sizeof(vr)))
1109 return -EFAULT;
1110 return 0;
1112 read_unlock(&mrt_lock);
1113 return -EADDRNOTAVAIL;
1114 case SIOCGETSGCNT:
1115 if (copy_from_user(&sr, arg, sizeof(sr)))
1116 return -EFAULT;
1118 read_lock(&mrt_lock);
1119 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1120 if (c) {
1121 sr.pktcnt = c->mfc_un.res.pkt;
1122 sr.bytecnt = c->mfc_un.res.bytes;
1123 sr.wrong_if = c->mfc_un.res.wrong_if;
1124 read_unlock(&mrt_lock);
1126 if (copy_to_user(arg, &sr, sizeof(sr)))
1127 return -EFAULT;
1128 return 0;
1130 read_unlock(&mrt_lock);
1131 return -EADDRNOTAVAIL;
1132 default:
1133 return -ENOIOCTLCMD;
1138 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1140 struct net_device *dev = ptr;
1141 struct vif_device *v;
1142 int ct;
1144 if (!net_eq(dev_net(dev), &init_net))
1145 return NOTIFY_DONE;
1147 if (event != NETDEV_UNREGISTER)
1148 return NOTIFY_DONE;
1149 v = &init_net.ipv4.vif_table[0];
1150 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
1151 if (v->dev == dev)
1152 vif_delete(ct, 1);
1154 return NOTIFY_DONE;
1158 static struct notifier_block ip_mr_notifier = {
1159 .notifier_call = ipmr_device_event,
1163 * Encapsulate a packet by attaching a valid IPIP header to it.
1164 * This avoids tunnel drivers and other mess and gives us the speed so
1165 * important for multicast video.
1168 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1170 struct iphdr *iph;
1171 struct iphdr *old_iph = ip_hdr(skb);
1173 skb_push(skb, sizeof(struct iphdr));
1174 skb->transport_header = skb->network_header;
1175 skb_reset_network_header(skb);
1176 iph = ip_hdr(skb);
1178 iph->version = 4;
1179 iph->tos = old_iph->tos;
1180 iph->ttl = old_iph->ttl;
1181 iph->frag_off = 0;
1182 iph->daddr = daddr;
1183 iph->saddr = saddr;
1184 iph->protocol = IPPROTO_IPIP;
1185 iph->ihl = 5;
1186 iph->tot_len = htons(skb->len);
1187 ip_select_ident(iph, skb->dst, NULL);
1188 ip_send_check(iph);
1190 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1191 nf_reset(skb);
1194 static inline int ipmr_forward_finish(struct sk_buff *skb)
1196 struct ip_options * opt = &(IPCB(skb)->opt);
1198 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1200 if (unlikely(opt->optlen))
1201 ip_forward_options(skb);
1203 return dst_output(skb);
1207 * Processing handlers for ipmr_forward
1210 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1212 const struct iphdr *iph = ip_hdr(skb);
1213 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
1214 struct net_device *dev;
1215 struct rtable *rt;
1216 int encap = 0;
1218 if (vif->dev == NULL)
1219 goto out_free;
1221 #ifdef CONFIG_IP_PIMSM
1222 if (vif->flags & VIFF_REGISTER) {
1223 vif->pkt_out++;
1224 vif->bytes_out += skb->len;
1225 vif->dev->stats.tx_bytes += skb->len;
1226 vif->dev->stats.tx_packets++;
1227 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1228 kfree_skb(skb);
1229 return;
1231 #endif
1233 if (vif->flags&VIFF_TUNNEL) {
1234 struct flowi fl = { .oif = vif->link,
1235 .nl_u = { .ip4_u =
1236 { .daddr = vif->remote,
1237 .saddr = vif->local,
1238 .tos = RT_TOS(iph->tos) } },
1239 .proto = IPPROTO_IPIP };
1240 if (ip_route_output_key(&init_net, &rt, &fl))
1241 goto out_free;
1242 encap = sizeof(struct iphdr);
1243 } else {
1244 struct flowi fl = { .oif = vif->link,
1245 .nl_u = { .ip4_u =
1246 { .daddr = iph->daddr,
1247 .tos = RT_TOS(iph->tos) } },
1248 .proto = IPPROTO_IPIP };
1249 if (ip_route_output_key(&init_net, &rt, &fl))
1250 goto out_free;
1253 dev = rt->u.dst.dev;
1255 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1256 /* Do not fragment multicasts. Alas, IPv4 does not
1257 allow to send ICMP, so that packets will disappear
1258 to blackhole.
1261 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1262 ip_rt_put(rt);
1263 goto out_free;
1266 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1268 if (skb_cow(skb, encap)) {
1269 ip_rt_put(rt);
1270 goto out_free;
1273 vif->pkt_out++;
1274 vif->bytes_out += skb->len;
1276 dst_release(skb->dst);
1277 skb->dst = &rt->u.dst;
1278 ip_decrease_ttl(ip_hdr(skb));
1280 /* FIXME: forward and output firewalls used to be called here.
1281 * What do we do with netfilter? -- RR */
1282 if (vif->flags & VIFF_TUNNEL) {
1283 ip_encap(skb, vif->local, vif->remote);
1284 /* FIXME: extra output firewall step used to be here. --RR */
1285 vif->dev->stats.tx_packets++;
1286 vif->dev->stats.tx_bytes += skb->len;
1289 IPCB(skb)->flags |= IPSKB_FORWARDED;
1292 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1293 * not only before forwarding, but after forwarding on all output
1294 * interfaces. It is clear, if mrouter runs a multicasting
1295 * program, it should receive packets not depending to what interface
1296 * program is joined.
1297 * If we will not make it, the program will have to join on all
1298 * interfaces. On the other hand, multihoming host (or router, but
1299 * not mrouter) cannot join to more than one interface - it will
1300 * result in receiving multiple packets.
1302 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1303 ipmr_forward_finish);
1304 return;
1306 out_free:
1307 kfree_skb(skb);
1308 return;
1311 static int ipmr_find_vif(struct net_device *dev)
1313 int ct;
1314 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1315 if (init_net.ipv4.vif_table[ct].dev == dev)
1316 break;
1318 return ct;
1321 /* "local" means that we should preserve one skb (for local delivery) */
1323 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1325 int psend = -1;
1326 int vif, ct;
1328 vif = cache->mfc_parent;
1329 cache->mfc_un.res.pkt++;
1330 cache->mfc_un.res.bytes += skb->len;
1333 * Wrong interface: drop packet and (maybe) send PIM assert.
1335 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
1336 int true_vifi;
1338 if (skb->rtable->fl.iif == 0) {
1339 /* It is our own packet, looped back.
1340 Very complicated situation...
1342 The best workaround until routing daemons will be
1343 fixed is not to redistribute packet, if it was
1344 send through wrong interface. It means, that
1345 multicast applications WILL NOT work for
1346 (S,G), which have default multicast route pointing
1347 to wrong oif. In any case, it is not a good
1348 idea to use multicasting applications on router.
1350 goto dont_forward;
1353 cache->mfc_un.res.wrong_if++;
1354 true_vifi = ipmr_find_vif(skb->dev);
1356 if (true_vifi >= 0 && init_net.ipv4.mroute_do_assert &&
1357 /* pimsm uses asserts, when switching from RPT to SPT,
1358 so that we cannot check that packet arrived on an oif.
1359 It is bad, but otherwise we would need to move pretty
1360 large chunk of pimd to kernel. Ough... --ANK
1362 (init_net.ipv4.mroute_do_pim ||
1363 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1364 time_after(jiffies,
1365 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1366 cache->mfc_un.res.last_assert = jiffies;
1367 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1369 goto dont_forward;
1372 init_net.ipv4.vif_table[vif].pkt_in++;
1373 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
1376 * Forward the frame
1378 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1379 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1380 if (psend != -1) {
1381 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1382 if (skb2)
1383 ipmr_queue_xmit(skb2, cache, psend);
1385 psend = ct;
1388 if (psend != -1) {
1389 if (local) {
1390 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1391 if (skb2)
1392 ipmr_queue_xmit(skb2, cache, psend);
1393 } else {
1394 ipmr_queue_xmit(skb, cache, psend);
1395 return 0;
1399 dont_forward:
1400 if (!local)
1401 kfree_skb(skb);
1402 return 0;
1407 * Multicast packets for forwarding arrive here
1410 int ip_mr_input(struct sk_buff *skb)
1412 struct mfc_cache *cache;
1413 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1415 /* Packet is looped back after forward, it should not be
1416 forwarded second time, but still can be delivered locally.
1418 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1419 goto dont_forward;
1421 if (!local) {
1422 if (IPCB(skb)->opt.router_alert) {
1423 if (ip_call_ra_chain(skb))
1424 return 0;
1425 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1426 /* IGMPv1 (and broken IGMPv2 implementations sort of
1427 Cisco IOS <= 11.2(8)) do not put router alert
1428 option to IGMP packets destined to routable
1429 groups. It is very bad, because it means
1430 that we can forward NO IGMP messages.
1432 read_lock(&mrt_lock);
1433 if (init_net.ipv4.mroute_sk) {
1434 nf_reset(skb);
1435 raw_rcv(init_net.ipv4.mroute_sk, skb);
1436 read_unlock(&mrt_lock);
1437 return 0;
1439 read_unlock(&mrt_lock);
1443 read_lock(&mrt_lock);
1444 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1447 * No usable cache entry
1449 if (cache == NULL) {
1450 int vif;
1452 if (local) {
1453 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1454 ip_local_deliver(skb);
1455 if (skb2 == NULL) {
1456 read_unlock(&mrt_lock);
1457 return -ENOBUFS;
1459 skb = skb2;
1462 vif = ipmr_find_vif(skb->dev);
1463 if (vif >= 0) {
1464 int err = ipmr_cache_unresolved(vif, skb);
1465 read_unlock(&mrt_lock);
1467 return err;
1469 read_unlock(&mrt_lock);
1470 kfree_skb(skb);
1471 return -ENODEV;
1474 ip_mr_forward(skb, cache, local);
1476 read_unlock(&mrt_lock);
1478 if (local)
1479 return ip_local_deliver(skb);
1481 return 0;
1483 dont_forward:
1484 if (local)
1485 return ip_local_deliver(skb);
1486 kfree_skb(skb);
1487 return 0;
1490 #ifdef CONFIG_IP_PIMSM
1491 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1493 struct net_device *reg_dev = NULL;
1494 struct iphdr *encap;
1496 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1498 Check that:
1499 a. packet is really destinted to a multicast group
1500 b. packet is not a NULL-REGISTER
1501 c. packet is not truncated
1503 if (!ipv4_is_multicast(encap->daddr) ||
1504 encap->tot_len == 0 ||
1505 ntohs(encap->tot_len) + pimlen > skb->len)
1506 return 1;
1508 read_lock(&mrt_lock);
1509 if (reg_vif_num >= 0)
1510 reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev;
1511 if (reg_dev)
1512 dev_hold(reg_dev);
1513 read_unlock(&mrt_lock);
1515 if (reg_dev == NULL)
1516 return 1;
1518 skb->mac_header = skb->network_header;
1519 skb_pull(skb, (u8*)encap - skb->data);
1520 skb_reset_network_header(skb);
1521 skb->dev = reg_dev;
1522 skb->protocol = htons(ETH_P_IP);
1523 skb->ip_summed = 0;
1524 skb->pkt_type = PACKET_HOST;
1525 dst_release(skb->dst);
1526 skb->dst = NULL;
1527 reg_dev->stats.rx_bytes += skb->len;
1528 reg_dev->stats.rx_packets++;
1529 nf_reset(skb);
1530 netif_rx(skb);
1531 dev_put(reg_dev);
1533 return 0;
1535 #endif
1537 #ifdef CONFIG_IP_PIMSM_V1
1539 * Handle IGMP messages of PIMv1
1542 int pim_rcv_v1(struct sk_buff * skb)
1544 struct igmphdr *pim;
1546 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1547 goto drop;
1549 pim = igmp_hdr(skb);
1551 if (!init_net.ipv4.mroute_do_pim ||
1552 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1553 goto drop;
1555 if (__pim_rcv(skb, sizeof(*pim))) {
1556 drop:
1557 kfree_skb(skb);
1559 return 0;
1561 #endif
1563 #ifdef CONFIG_IP_PIMSM_V2
1564 static int pim_rcv(struct sk_buff * skb)
1566 struct pimreghdr *pim;
1568 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1569 goto drop;
1571 pim = (struct pimreghdr *)skb_transport_header(skb);
1572 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1573 (pim->flags&PIM_NULL_REGISTER) ||
1574 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1575 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1576 goto drop;
1578 if (__pim_rcv(skb, sizeof(*pim))) {
1579 drop:
1580 kfree_skb(skb);
1582 return 0;
1584 #endif
1586 static int
1587 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1589 int ct;
1590 struct rtnexthop *nhp;
1591 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
1592 u8 *b = skb_tail_pointer(skb);
1593 struct rtattr *mp_head;
1595 if (dev)
1596 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1598 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1600 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1601 if (c->mfc_un.res.ttls[ct] < 255) {
1602 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1603 goto rtattr_failure;
1604 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1605 nhp->rtnh_flags = 0;
1606 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1607 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
1608 nhp->rtnh_len = sizeof(*nhp);
1611 mp_head->rta_type = RTA_MULTIPATH;
1612 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1613 rtm->rtm_type = RTN_MULTICAST;
1614 return 1;
1616 rtattr_failure:
1617 nlmsg_trim(skb, b);
1618 return -EMSGSIZE;
1621 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1623 int err;
1624 struct mfc_cache *cache;
1625 struct rtable *rt = skb->rtable;
1627 read_lock(&mrt_lock);
1628 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1630 if (cache == NULL) {
1631 struct sk_buff *skb2;
1632 struct iphdr *iph;
1633 struct net_device *dev;
1634 int vif;
1636 if (nowait) {
1637 read_unlock(&mrt_lock);
1638 return -EAGAIN;
1641 dev = skb->dev;
1642 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1643 read_unlock(&mrt_lock);
1644 return -ENODEV;
1646 skb2 = skb_clone(skb, GFP_ATOMIC);
1647 if (!skb2) {
1648 read_unlock(&mrt_lock);
1649 return -ENOMEM;
1652 skb_push(skb2, sizeof(struct iphdr));
1653 skb_reset_network_header(skb2);
1654 iph = ip_hdr(skb2);
1655 iph->ihl = sizeof(struct iphdr) >> 2;
1656 iph->saddr = rt->rt_src;
1657 iph->daddr = rt->rt_dst;
1658 iph->version = 0;
1659 err = ipmr_cache_unresolved(vif, skb2);
1660 read_unlock(&mrt_lock);
1661 return err;
1664 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1665 cache->mfc_flags |= MFC_NOTIFY;
1666 err = ipmr_fill_mroute(skb, cache, rtm);
1667 read_unlock(&mrt_lock);
1668 return err;
1671 #ifdef CONFIG_PROC_FS
1673 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1675 struct ipmr_vif_iter {
1676 int ct;
1679 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1680 loff_t pos)
1682 for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) {
1683 if (!VIF_EXISTS(&init_net, iter->ct))
1684 continue;
1685 if (pos-- == 0)
1686 return &init_net.ipv4.vif_table[iter->ct];
1688 return NULL;
1691 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1692 __acquires(mrt_lock)
1694 read_lock(&mrt_lock);
1695 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1696 : SEQ_START_TOKEN;
1699 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1701 struct ipmr_vif_iter *iter = seq->private;
1703 ++*pos;
1704 if (v == SEQ_START_TOKEN)
1705 return ipmr_vif_seq_idx(iter, 0);
1707 while (++iter->ct < init_net.ipv4.maxvif) {
1708 if (!VIF_EXISTS(&init_net, iter->ct))
1709 continue;
1710 return &init_net.ipv4.vif_table[iter->ct];
1712 return NULL;
1715 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1716 __releases(mrt_lock)
1718 read_unlock(&mrt_lock);
1721 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1723 if (v == SEQ_START_TOKEN) {
1724 seq_puts(seq,
1725 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1726 } else {
1727 const struct vif_device *vif = v;
1728 const char *name = vif->dev ? vif->dev->name : "none";
1730 seq_printf(seq,
1731 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1732 vif - init_net.ipv4.vif_table,
1733 name, vif->bytes_in, vif->pkt_in,
1734 vif->bytes_out, vif->pkt_out,
1735 vif->flags, vif->local, vif->remote);
1737 return 0;
1740 static const struct seq_operations ipmr_vif_seq_ops = {
1741 .start = ipmr_vif_seq_start,
1742 .next = ipmr_vif_seq_next,
1743 .stop = ipmr_vif_seq_stop,
1744 .show = ipmr_vif_seq_show,
1747 static int ipmr_vif_open(struct inode *inode, struct file *file)
1749 return seq_open_private(file, &ipmr_vif_seq_ops,
1750 sizeof(struct ipmr_vif_iter));
1753 static const struct file_operations ipmr_vif_fops = {
1754 .owner = THIS_MODULE,
1755 .open = ipmr_vif_open,
1756 .read = seq_read,
1757 .llseek = seq_lseek,
1758 .release = seq_release_private,
1761 struct ipmr_mfc_iter {
1762 struct mfc_cache **cache;
1763 int ct;
1767 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1769 struct mfc_cache *mfc;
1771 it->cache = init_net.ipv4.mfc_cache_array;
1772 read_lock(&mrt_lock);
1773 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1774 for (mfc = init_net.ipv4.mfc_cache_array[it->ct];
1775 mfc; mfc = mfc->next)
1776 if (pos-- == 0)
1777 return mfc;
1778 read_unlock(&mrt_lock);
1780 it->cache = &mfc_unres_queue;
1781 spin_lock_bh(&mfc_unres_lock);
1782 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1783 if (pos-- == 0)
1784 return mfc;
1785 spin_unlock_bh(&mfc_unres_lock);
1787 it->cache = NULL;
1788 return NULL;
1792 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1794 struct ipmr_mfc_iter *it = seq->private;
1795 it->cache = NULL;
1796 it->ct = 0;
1797 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1798 : SEQ_START_TOKEN;
1801 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1803 struct mfc_cache *mfc = v;
1804 struct ipmr_mfc_iter *it = seq->private;
1806 ++*pos;
1808 if (v == SEQ_START_TOKEN)
1809 return ipmr_mfc_seq_idx(seq->private, 0);
1811 if (mfc->next)
1812 return mfc->next;
1814 if (it->cache == &mfc_unres_queue)
1815 goto end_of_list;
1817 BUG_ON(it->cache != init_net.ipv4.mfc_cache_array);
1819 while (++it->ct < MFC_LINES) {
1820 mfc = init_net.ipv4.mfc_cache_array[it->ct];
1821 if (mfc)
1822 return mfc;
1825 /* exhausted cache_array, show unresolved */
1826 read_unlock(&mrt_lock);
1827 it->cache = &mfc_unres_queue;
1828 it->ct = 0;
1830 spin_lock_bh(&mfc_unres_lock);
1831 mfc = mfc_unres_queue;
1832 if (mfc)
1833 return mfc;
1835 end_of_list:
1836 spin_unlock_bh(&mfc_unres_lock);
1837 it->cache = NULL;
1839 return NULL;
1842 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1844 struct ipmr_mfc_iter *it = seq->private;
1846 if (it->cache == &mfc_unres_queue)
1847 spin_unlock_bh(&mfc_unres_lock);
1848 else if (it->cache == init_net.ipv4.mfc_cache_array)
1849 read_unlock(&mrt_lock);
1852 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1854 int n;
1856 if (v == SEQ_START_TOKEN) {
1857 seq_puts(seq,
1858 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1859 } else {
1860 const struct mfc_cache *mfc = v;
1861 const struct ipmr_mfc_iter *it = seq->private;
1863 seq_printf(seq, "%08lX %08lX %-3hd",
1864 (unsigned long) mfc->mfc_mcastgrp,
1865 (unsigned long) mfc->mfc_origin,
1866 mfc->mfc_parent);
1868 if (it->cache != &mfc_unres_queue) {
1869 seq_printf(seq, " %8lu %8lu %8lu",
1870 mfc->mfc_un.res.pkt,
1871 mfc->mfc_un.res.bytes,
1872 mfc->mfc_un.res.wrong_if);
1873 for (n = mfc->mfc_un.res.minvif;
1874 n < mfc->mfc_un.res.maxvif; n++ ) {
1875 if (VIF_EXISTS(&init_net, n) &&
1876 mfc->mfc_un.res.ttls[n] < 255)
1877 seq_printf(seq,
1878 " %2d:%-3d",
1879 n, mfc->mfc_un.res.ttls[n]);
1881 } else {
1882 /* unresolved mfc_caches don't contain
1883 * pkt, bytes and wrong_if values
1885 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1887 seq_putc(seq, '\n');
1889 return 0;
1892 static const struct seq_operations ipmr_mfc_seq_ops = {
1893 .start = ipmr_mfc_seq_start,
1894 .next = ipmr_mfc_seq_next,
1895 .stop = ipmr_mfc_seq_stop,
1896 .show = ipmr_mfc_seq_show,
1899 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1901 return seq_open_private(file, &ipmr_mfc_seq_ops,
1902 sizeof(struct ipmr_mfc_iter));
1905 static const struct file_operations ipmr_mfc_fops = {
1906 .owner = THIS_MODULE,
1907 .open = ipmr_mfc_open,
1908 .read = seq_read,
1909 .llseek = seq_lseek,
1910 .release = seq_release_private,
1912 #endif
1914 #ifdef CONFIG_IP_PIMSM_V2
1915 static struct net_protocol pim_protocol = {
1916 .handler = pim_rcv,
1918 #endif
1922 * Setup for IP multicast routing
1924 static int __net_init ipmr_net_init(struct net *net)
1926 int err = 0;
1928 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1929 GFP_KERNEL);
1930 if (!net->ipv4.vif_table) {
1931 err = -ENOMEM;
1932 goto fail;
1935 /* Forwarding cache */
1936 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1937 sizeof(struct mfc_cache *),
1938 GFP_KERNEL);
1939 if (!net->ipv4.mfc_cache_array) {
1940 err = -ENOMEM;
1941 goto fail_mfc_cache;
1943 return 0;
1945 fail_mfc_cache:
1946 kfree(net->ipv4.vif_table);
1947 fail:
1948 return err;
1951 static void __net_exit ipmr_net_exit(struct net *net)
1953 kfree(net->ipv4.mfc_cache_array);
1954 kfree(net->ipv4.vif_table);
1957 static struct pernet_operations ipmr_net_ops = {
1958 .init = ipmr_net_init,
1959 .exit = ipmr_net_exit,
1962 int __init ip_mr_init(void)
1964 int err;
1966 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1967 sizeof(struct mfc_cache),
1968 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1969 NULL);
1970 if (!mrt_cachep)
1971 return -ENOMEM;
1973 err = register_pernet_subsys(&ipmr_net_ops);
1974 if (err)
1975 goto reg_pernet_fail;
1977 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1978 err = register_netdevice_notifier(&ip_mr_notifier);
1979 if (err)
1980 goto reg_notif_fail;
1981 #ifdef CONFIG_PROC_FS
1982 err = -ENOMEM;
1983 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1984 goto proc_vif_fail;
1985 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1986 goto proc_cache_fail;
1987 #endif
1988 return 0;
1989 #ifdef CONFIG_PROC_FS
1990 proc_cache_fail:
1991 proc_net_remove(&init_net, "ip_mr_vif");
1992 proc_vif_fail:
1993 unregister_netdevice_notifier(&ip_mr_notifier);
1994 #endif
1995 reg_notif_fail:
1996 del_timer(&ipmr_expire_timer);
1997 unregister_pernet_subsys(&ipmr_net_ops);
1998 reg_pernet_fail:
1999 kmem_cache_destroy(mrt_cachep);
2000 return err;