netns: ipmr: allocate mroute_socket per-namespace.
[linux-2.6/mini2440.git] / net / ipv4 / ipmr.c
blobac324b702e8b70e1c45cb764d7fe3ccedbaf910a
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
74 static DEFINE_RWLOCK(mrt_lock);
77 * Multicast router control variables
80 static struct vif_device vif_table[MAXVIFS]; /* Devices */
81 static int maxvif;
83 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85 static int mroute_do_assert; /* Set in PIM assert */
86 static int mroute_do_pim;
88 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
90 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
91 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
93 /* Special spinlock for queue of unresolved entries */
94 static DEFINE_SPINLOCK(mfc_unres_lock);
96 /* We return to original Alan's scheme. Hash table of resolved
97 entries is changed only in process context and protected
98 with weak lock mrt_lock. Queue of unresolved entries is protected
99 with strong spinlock mfc_unres_lock.
101 In this case data path is free of exclusive locks at all.
104 static struct kmem_cache *mrt_cachep __read_mostly;
106 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
107 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
108 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110 #ifdef CONFIG_IP_PIMSM_V2
111 static struct net_protocol pim_protocol;
112 #endif
114 static struct timer_list ipmr_expire_timer;
116 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
118 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
120 dev_close(dev);
122 dev = __dev_get_by_name(&init_net, "tunl0");
123 if (dev) {
124 const struct net_device_ops *ops = dev->netdev_ops;
125 struct ifreq ifr;
126 struct ip_tunnel_parm p;
128 memset(&p, 0, sizeof(p));
129 p.iph.daddr = v->vifc_rmt_addr.s_addr;
130 p.iph.saddr = v->vifc_lcl_addr.s_addr;
131 p.iph.version = 4;
132 p.iph.ihl = 5;
133 p.iph.protocol = IPPROTO_IPIP;
134 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
135 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
137 if (ops->ndo_do_ioctl) {
138 mm_segment_t oldfs = get_fs();
140 set_fs(KERNEL_DS);
141 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
142 set_fs(oldfs);
147 static
148 struct net_device *ipmr_new_tunnel(struct vifctl *v)
150 struct net_device *dev;
152 dev = __dev_get_by_name(&init_net, "tunl0");
154 if (dev) {
155 const struct net_device_ops *ops = dev->netdev_ops;
156 int err;
157 struct ifreq ifr;
158 struct ip_tunnel_parm p;
159 struct in_device *in_dev;
161 memset(&p, 0, sizeof(p));
162 p.iph.daddr = v->vifc_rmt_addr.s_addr;
163 p.iph.saddr = v->vifc_lcl_addr.s_addr;
164 p.iph.version = 4;
165 p.iph.ihl = 5;
166 p.iph.protocol = IPPROTO_IPIP;
167 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
168 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
170 if (ops->ndo_do_ioctl) {
171 mm_segment_t oldfs = get_fs();
173 set_fs(KERNEL_DS);
174 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
175 set_fs(oldfs);
176 } else
177 err = -EOPNOTSUPP;
179 dev = NULL;
181 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
182 dev->flags |= IFF_MULTICAST;
184 in_dev = __in_dev_get_rtnl(dev);
185 if (in_dev == NULL)
186 goto failure;
188 ipv4_devconf_setall(in_dev);
189 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
191 if (dev_open(dev))
192 goto failure;
193 dev_hold(dev);
196 return dev;
198 failure:
199 /* allow the register to be completed before unregistering. */
200 rtnl_unlock();
201 rtnl_lock();
203 unregister_netdevice(dev);
204 return NULL;
207 #ifdef CONFIG_IP_PIMSM
209 static int reg_vif_num = -1;
211 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
213 read_lock(&mrt_lock);
214 dev->stats.tx_bytes += skb->len;
215 dev->stats.tx_packets++;
216 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
217 read_unlock(&mrt_lock);
218 kfree_skb(skb);
219 return 0;
222 static const struct net_device_ops reg_vif_netdev_ops = {
223 .ndo_start_xmit = reg_vif_xmit,
226 static void reg_vif_setup(struct net_device *dev)
228 dev->type = ARPHRD_PIMREG;
229 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
230 dev->flags = IFF_NOARP;
231 dev->netdev_ops = &reg_vif_netdev_ops,
232 dev->destructor = free_netdev;
235 static struct net_device *ipmr_reg_vif(void)
237 struct net_device *dev;
238 struct in_device *in_dev;
240 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
242 if (dev == NULL)
243 return NULL;
245 if (register_netdevice(dev)) {
246 free_netdev(dev);
247 return NULL;
249 dev->iflink = 0;
251 rcu_read_lock();
252 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
253 rcu_read_unlock();
254 goto failure;
257 ipv4_devconf_setall(in_dev);
258 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
259 rcu_read_unlock();
261 if (dev_open(dev))
262 goto failure;
264 dev_hold(dev);
266 return dev;
268 failure:
269 /* allow the register to be completed before unregistering. */
270 rtnl_unlock();
271 rtnl_lock();
273 unregister_netdevice(dev);
274 return NULL;
276 #endif
279 * Delete a VIF entry
280 * @notify: Set to 1, if the caller is a notifier_call
283 static int vif_delete(int vifi, int notify)
285 struct vif_device *v;
286 struct net_device *dev;
287 struct in_device *in_dev;
289 if (vifi < 0 || vifi >= maxvif)
290 return -EADDRNOTAVAIL;
292 v = &vif_table[vifi];
294 write_lock_bh(&mrt_lock);
295 dev = v->dev;
296 v->dev = NULL;
298 if (!dev) {
299 write_unlock_bh(&mrt_lock);
300 return -EADDRNOTAVAIL;
303 #ifdef CONFIG_IP_PIMSM
304 if (vifi == reg_vif_num)
305 reg_vif_num = -1;
306 #endif
308 if (vifi+1 == maxvif) {
309 int tmp;
310 for (tmp=vifi-1; tmp>=0; tmp--) {
311 if (VIF_EXISTS(tmp))
312 break;
314 maxvif = tmp+1;
317 write_unlock_bh(&mrt_lock);
319 dev_set_allmulti(dev, -1);
321 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
322 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
323 ip_rt_multicast_event(in_dev);
326 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
327 unregister_netdevice(dev);
329 dev_put(dev);
330 return 0;
333 /* Destroy an unresolved cache entry, killing queued skbs
334 and reporting error to netlink readers.
337 static void ipmr_destroy_unres(struct mfc_cache *c)
339 struct sk_buff *skb;
340 struct nlmsgerr *e;
342 atomic_dec(&cache_resolve_queue_len);
344 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
345 if (ip_hdr(skb)->version == 0) {
346 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347 nlh->nlmsg_type = NLMSG_ERROR;
348 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349 skb_trim(skb, nlh->nlmsg_len);
350 e = NLMSG_DATA(nlh);
351 e->error = -ETIMEDOUT;
352 memset(&e->msg, 0, sizeof(e->msg));
354 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
355 } else
356 kfree_skb(skb);
359 kmem_cache_free(mrt_cachep, c);
363 /* Single timer process for all the unresolved queue. */
365 static void ipmr_expire_process(unsigned long dummy)
367 unsigned long now;
368 unsigned long expires;
369 struct mfc_cache *c, **cp;
371 if (!spin_trylock(&mfc_unres_lock)) {
372 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
373 return;
376 if (atomic_read(&cache_resolve_queue_len) == 0)
377 goto out;
379 now = jiffies;
380 expires = 10*HZ;
381 cp = &mfc_unres_queue;
383 while ((c=*cp) != NULL) {
384 if (time_after(c->mfc_un.unres.expires, now)) {
385 unsigned long interval = c->mfc_un.unres.expires - now;
386 if (interval < expires)
387 expires = interval;
388 cp = &c->next;
389 continue;
392 *cp = c->next;
394 ipmr_destroy_unres(c);
397 if (atomic_read(&cache_resolve_queue_len))
398 mod_timer(&ipmr_expire_timer, jiffies + expires);
400 out:
401 spin_unlock(&mfc_unres_lock);
404 /* Fill oifs list. It is called under write locked mrt_lock. */
406 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
408 int vifi;
410 cache->mfc_un.res.minvif = MAXVIFS;
411 cache->mfc_un.res.maxvif = 0;
412 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
414 for (vifi=0; vifi<maxvif; vifi++) {
415 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
416 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
417 if (cache->mfc_un.res.minvif > vifi)
418 cache->mfc_un.res.minvif = vifi;
419 if (cache->mfc_un.res.maxvif <= vifi)
420 cache->mfc_un.res.maxvif = vifi + 1;
425 static int vif_add(struct vifctl *vifc, int mrtsock)
427 int vifi = vifc->vifc_vifi;
428 struct vif_device *v = &vif_table[vifi];
429 struct net_device *dev;
430 struct in_device *in_dev;
431 int err;
433 /* Is vif busy ? */
434 if (VIF_EXISTS(vifi))
435 return -EADDRINUSE;
437 switch (vifc->vifc_flags) {
438 #ifdef CONFIG_IP_PIMSM
439 case VIFF_REGISTER:
441 * Special Purpose VIF in PIM
442 * All the packets will be sent to the daemon
444 if (reg_vif_num >= 0)
445 return -EADDRINUSE;
446 dev = ipmr_reg_vif();
447 if (!dev)
448 return -ENOBUFS;
449 err = dev_set_allmulti(dev, 1);
450 if (err) {
451 unregister_netdevice(dev);
452 dev_put(dev);
453 return err;
455 break;
456 #endif
457 case VIFF_TUNNEL:
458 dev = ipmr_new_tunnel(vifc);
459 if (!dev)
460 return -ENOBUFS;
461 err = dev_set_allmulti(dev, 1);
462 if (err) {
463 ipmr_del_tunnel(dev, vifc);
464 dev_put(dev);
465 return err;
467 break;
468 case 0:
469 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
470 if (!dev)
471 return -EADDRNOTAVAIL;
472 err = dev_set_allmulti(dev, 1);
473 if (err) {
474 dev_put(dev);
475 return err;
477 break;
478 default:
479 return -EINVAL;
482 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
483 return -EADDRNOTAVAIL;
484 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
485 ip_rt_multicast_event(in_dev);
488 * Fill in the VIF structures
490 v->rate_limit = vifc->vifc_rate_limit;
491 v->local = vifc->vifc_lcl_addr.s_addr;
492 v->remote = vifc->vifc_rmt_addr.s_addr;
493 v->flags = vifc->vifc_flags;
494 if (!mrtsock)
495 v->flags |= VIFF_STATIC;
496 v->threshold = vifc->vifc_threshold;
497 v->bytes_in = 0;
498 v->bytes_out = 0;
499 v->pkt_in = 0;
500 v->pkt_out = 0;
501 v->link = dev->ifindex;
502 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
503 v->link = dev->iflink;
505 /* And finish update writing critical data */
506 write_lock_bh(&mrt_lock);
507 v->dev = dev;
508 #ifdef CONFIG_IP_PIMSM
509 if (v->flags&VIFF_REGISTER)
510 reg_vif_num = vifi;
511 #endif
512 if (vifi+1 > maxvif)
513 maxvif = vifi+1;
514 write_unlock_bh(&mrt_lock);
515 return 0;
518 static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
520 int line = MFC_HASH(mcastgrp, origin);
521 struct mfc_cache *c;
523 for (c=mfc_cache_array[line]; c; c = c->next) {
524 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
525 break;
527 return c;
531 * Allocate a multicast cache entry
533 static struct mfc_cache *ipmr_cache_alloc(void)
535 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
536 if (c == NULL)
537 return NULL;
538 c->mfc_un.res.minvif = MAXVIFS;
539 return c;
542 static struct mfc_cache *ipmr_cache_alloc_unres(void)
544 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
545 if (c == NULL)
546 return NULL;
547 skb_queue_head_init(&c->mfc_un.unres.unresolved);
548 c->mfc_un.unres.expires = jiffies + 10*HZ;
549 return c;
553 * A cache entry has gone into a resolved state from queued
556 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
558 struct sk_buff *skb;
559 struct nlmsgerr *e;
562 * Play the pending entries through our router
565 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
566 if (ip_hdr(skb)->version == 0) {
567 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
569 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
570 nlh->nlmsg_len = (skb_tail_pointer(skb) -
571 (u8 *)nlh);
572 } else {
573 nlh->nlmsg_type = NLMSG_ERROR;
574 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
575 skb_trim(skb, nlh->nlmsg_len);
576 e = NLMSG_DATA(nlh);
577 e->error = -EMSGSIZE;
578 memset(&e->msg, 0, sizeof(e->msg));
581 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
582 } else
583 ip_mr_forward(skb, c, 0);
588 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
589 * expects the following bizarre scheme.
591 * Called under mrt_lock.
594 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
596 struct sk_buff *skb;
597 const int ihl = ip_hdrlen(pkt);
598 struct igmphdr *igmp;
599 struct igmpmsg *msg;
600 int ret;
602 #ifdef CONFIG_IP_PIMSM
603 if (assert == IGMPMSG_WHOLEPKT)
604 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
605 else
606 #endif
607 skb = alloc_skb(128, GFP_ATOMIC);
609 if (!skb)
610 return -ENOBUFS;
612 #ifdef CONFIG_IP_PIMSM
613 if (assert == IGMPMSG_WHOLEPKT) {
614 /* Ugly, but we have no choice with this interface.
615 Duplicate old header, fix ihl, length etc.
616 And all this only to mangle msg->im_msgtype and
617 to set msg->im_mbz to "mbz" :-)
619 skb_push(skb, sizeof(struct iphdr));
620 skb_reset_network_header(skb);
621 skb_reset_transport_header(skb);
622 msg = (struct igmpmsg *)skb_network_header(skb);
623 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
624 msg->im_msgtype = IGMPMSG_WHOLEPKT;
625 msg->im_mbz = 0;
626 msg->im_vif = reg_vif_num;
627 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
628 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
629 sizeof(struct iphdr));
630 } else
631 #endif
635 * Copy the IP header
638 skb->network_header = skb->tail;
639 skb_put(skb, ihl);
640 skb_copy_to_linear_data(skb, pkt->data, ihl);
641 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
642 msg = (struct igmpmsg *)skb_network_header(skb);
643 msg->im_vif = vifi;
644 skb->dst = dst_clone(pkt->dst);
647 * Add our header
650 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
651 igmp->type =
652 msg->im_msgtype = assert;
653 igmp->code = 0;
654 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
655 skb->transport_header = skb->network_header;
658 if (init_net.ipv4.mroute_sk == NULL) {
659 kfree_skb(skb);
660 return -EINVAL;
664 * Deliver to mrouted
666 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
667 if (ret < 0) {
668 if (net_ratelimit())
669 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
670 kfree_skb(skb);
673 return ret;
677 * Queue a packet for resolution. It gets locked cache entry!
680 static int
681 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
683 int err;
684 struct mfc_cache *c;
685 const struct iphdr *iph = ip_hdr(skb);
687 spin_lock_bh(&mfc_unres_lock);
688 for (c=mfc_unres_queue; c; c=c->next) {
689 if (c->mfc_mcastgrp == iph->daddr &&
690 c->mfc_origin == iph->saddr)
691 break;
694 if (c == NULL) {
696 * Create a new entry if allowable
699 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
700 (c=ipmr_cache_alloc_unres())==NULL) {
701 spin_unlock_bh(&mfc_unres_lock);
703 kfree_skb(skb);
704 return -ENOBUFS;
708 * Fill in the new cache entry
710 c->mfc_parent = -1;
711 c->mfc_origin = iph->saddr;
712 c->mfc_mcastgrp = iph->daddr;
715 * Reflect first query at mrouted.
717 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
718 /* If the report failed throw the cache entry
719 out - Brad Parker
721 spin_unlock_bh(&mfc_unres_lock);
723 kmem_cache_free(mrt_cachep, c);
724 kfree_skb(skb);
725 return err;
728 atomic_inc(&cache_resolve_queue_len);
729 c->next = mfc_unres_queue;
730 mfc_unres_queue = c;
732 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
736 * See if we can append the packet
738 if (c->mfc_un.unres.unresolved.qlen>3) {
739 kfree_skb(skb);
740 err = -ENOBUFS;
741 } else {
742 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
743 err = 0;
746 spin_unlock_bh(&mfc_unres_lock);
747 return err;
751 * MFC cache manipulation by user space mroute daemon
754 static int ipmr_mfc_delete(struct mfcctl *mfc)
756 int line;
757 struct mfc_cache *c, **cp;
759 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
761 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
762 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
763 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
764 write_lock_bh(&mrt_lock);
765 *cp = c->next;
766 write_unlock_bh(&mrt_lock);
768 kmem_cache_free(mrt_cachep, c);
769 return 0;
772 return -ENOENT;
775 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
777 int line;
778 struct mfc_cache *uc, *c, **cp;
780 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
782 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
783 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
784 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
785 break;
788 if (c != NULL) {
789 write_lock_bh(&mrt_lock);
790 c->mfc_parent = mfc->mfcc_parent;
791 ipmr_update_thresholds(c, mfc->mfcc_ttls);
792 if (!mrtsock)
793 c->mfc_flags |= MFC_STATIC;
794 write_unlock_bh(&mrt_lock);
795 return 0;
798 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
799 return -EINVAL;
801 c = ipmr_cache_alloc();
802 if (c == NULL)
803 return -ENOMEM;
805 c->mfc_origin = mfc->mfcc_origin.s_addr;
806 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
807 c->mfc_parent = mfc->mfcc_parent;
808 ipmr_update_thresholds(c, mfc->mfcc_ttls);
809 if (!mrtsock)
810 c->mfc_flags |= MFC_STATIC;
812 write_lock_bh(&mrt_lock);
813 c->next = mfc_cache_array[line];
814 mfc_cache_array[line] = c;
815 write_unlock_bh(&mrt_lock);
818 * Check to see if we resolved a queued list. If so we
819 * need to send on the frames and tidy up.
821 spin_lock_bh(&mfc_unres_lock);
822 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
823 cp = &uc->next) {
824 if (uc->mfc_origin == c->mfc_origin &&
825 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
826 *cp = uc->next;
827 if (atomic_dec_and_test(&cache_resolve_queue_len))
828 del_timer(&ipmr_expire_timer);
829 break;
832 spin_unlock_bh(&mfc_unres_lock);
834 if (uc) {
835 ipmr_cache_resolve(uc, c);
836 kmem_cache_free(mrt_cachep, uc);
838 return 0;
842 * Close the multicast socket, and clear the vif tables etc
845 static void mroute_clean_tables(struct sock *sk)
847 int i;
850 * Shut down all active vif entries
852 for (i=0; i<maxvif; i++) {
853 if (!(vif_table[i].flags&VIFF_STATIC))
854 vif_delete(i, 0);
858 * Wipe the cache
860 for (i=0; i<MFC_LINES; i++) {
861 struct mfc_cache *c, **cp;
863 cp = &mfc_cache_array[i];
864 while ((c = *cp) != NULL) {
865 if (c->mfc_flags&MFC_STATIC) {
866 cp = &c->next;
867 continue;
869 write_lock_bh(&mrt_lock);
870 *cp = c->next;
871 write_unlock_bh(&mrt_lock);
873 kmem_cache_free(mrt_cachep, c);
877 if (atomic_read(&cache_resolve_queue_len) != 0) {
878 struct mfc_cache *c;
880 spin_lock_bh(&mfc_unres_lock);
881 while (mfc_unres_queue != NULL) {
882 c = mfc_unres_queue;
883 mfc_unres_queue = c->next;
884 spin_unlock_bh(&mfc_unres_lock);
886 ipmr_destroy_unres(c);
888 spin_lock_bh(&mfc_unres_lock);
890 spin_unlock_bh(&mfc_unres_lock);
894 static void mrtsock_destruct(struct sock *sk)
896 rtnl_lock();
897 if (sk == init_net.ipv4.mroute_sk) {
898 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
900 write_lock_bh(&mrt_lock);
901 init_net.ipv4.mroute_sk = NULL;
902 write_unlock_bh(&mrt_lock);
904 mroute_clean_tables(sk);
906 rtnl_unlock();
910 * Socket options and virtual interface manipulation. The whole
911 * virtual interface system is a complete heap, but unfortunately
912 * that's how BSD mrouted happens to think. Maybe one day with a proper
913 * MOSPF/PIM router set up we can clean this up.
916 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
918 int ret;
919 struct vifctl vif;
920 struct mfcctl mfc;
922 if (optname != MRT_INIT) {
923 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
924 return -EACCES;
927 switch (optname) {
928 case MRT_INIT:
929 if (sk->sk_type != SOCK_RAW ||
930 inet_sk(sk)->num != IPPROTO_IGMP)
931 return -EOPNOTSUPP;
932 if (optlen != sizeof(int))
933 return -ENOPROTOOPT;
935 rtnl_lock();
936 if (init_net.ipv4.mroute_sk) {
937 rtnl_unlock();
938 return -EADDRINUSE;
941 ret = ip_ra_control(sk, 1, mrtsock_destruct);
942 if (ret == 0) {
943 write_lock_bh(&mrt_lock);
944 init_net.ipv4.mroute_sk = sk;
945 write_unlock_bh(&mrt_lock);
947 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
949 rtnl_unlock();
950 return ret;
951 case MRT_DONE:
952 if (sk != init_net.ipv4.mroute_sk)
953 return -EACCES;
954 return ip_ra_control(sk, 0, NULL);
955 case MRT_ADD_VIF:
956 case MRT_DEL_VIF:
957 if (optlen != sizeof(vif))
958 return -EINVAL;
959 if (copy_from_user(&vif, optval, sizeof(vif)))
960 return -EFAULT;
961 if (vif.vifc_vifi >= MAXVIFS)
962 return -ENFILE;
963 rtnl_lock();
964 if (optname == MRT_ADD_VIF) {
965 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
966 } else {
967 ret = vif_delete(vif.vifc_vifi, 0);
969 rtnl_unlock();
970 return ret;
973 * Manipulate the forwarding caches. These live
974 * in a sort of kernel/user symbiosis.
976 case MRT_ADD_MFC:
977 case MRT_DEL_MFC:
978 if (optlen != sizeof(mfc))
979 return -EINVAL;
980 if (copy_from_user(&mfc, optval, sizeof(mfc)))
981 return -EFAULT;
982 rtnl_lock();
983 if (optname == MRT_DEL_MFC)
984 ret = ipmr_mfc_delete(&mfc);
985 else
986 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
987 rtnl_unlock();
988 return ret;
990 * Control PIM assert.
992 case MRT_ASSERT:
994 int v;
995 if (get_user(v,(int __user *)optval))
996 return -EFAULT;
997 mroute_do_assert=(v)?1:0;
998 return 0;
1000 #ifdef CONFIG_IP_PIMSM
1001 case MRT_PIM:
1003 int v;
1005 if (get_user(v,(int __user *)optval))
1006 return -EFAULT;
1007 v = (v) ? 1 : 0;
1009 rtnl_lock();
1010 ret = 0;
1011 if (v != mroute_do_pim) {
1012 mroute_do_pim = v;
1013 mroute_do_assert = v;
1014 #ifdef CONFIG_IP_PIMSM_V2
1015 if (mroute_do_pim)
1016 ret = inet_add_protocol(&pim_protocol,
1017 IPPROTO_PIM);
1018 else
1019 ret = inet_del_protocol(&pim_protocol,
1020 IPPROTO_PIM);
1021 if (ret < 0)
1022 ret = -EAGAIN;
1023 #endif
1025 rtnl_unlock();
1026 return ret;
1028 #endif
1030 * Spurious command, or MRT_VERSION which you cannot
1031 * set.
1033 default:
1034 return -ENOPROTOOPT;
1039 * Getsock opt support for the multicast routing system.
1042 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1044 int olr;
1045 int val;
1047 if (optname != MRT_VERSION &&
1048 #ifdef CONFIG_IP_PIMSM
1049 optname!=MRT_PIM &&
1050 #endif
1051 optname!=MRT_ASSERT)
1052 return -ENOPROTOOPT;
1054 if (get_user(olr, optlen))
1055 return -EFAULT;
1057 olr = min_t(unsigned int, olr, sizeof(int));
1058 if (olr < 0)
1059 return -EINVAL;
1061 if (put_user(olr, optlen))
1062 return -EFAULT;
1063 if (optname == MRT_VERSION)
1064 val = 0x0305;
1065 #ifdef CONFIG_IP_PIMSM
1066 else if (optname == MRT_PIM)
1067 val = mroute_do_pim;
1068 #endif
1069 else
1070 val = mroute_do_assert;
1071 if (copy_to_user(optval, &val, olr))
1072 return -EFAULT;
1073 return 0;
1077 * The IP multicast ioctl support routines.
1080 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1082 struct sioc_sg_req sr;
1083 struct sioc_vif_req vr;
1084 struct vif_device *vif;
1085 struct mfc_cache *c;
1087 switch (cmd) {
1088 case SIOCGETVIFCNT:
1089 if (copy_from_user(&vr, arg, sizeof(vr)))
1090 return -EFAULT;
1091 if (vr.vifi >= maxvif)
1092 return -EINVAL;
1093 read_lock(&mrt_lock);
1094 vif=&vif_table[vr.vifi];
1095 if (VIF_EXISTS(vr.vifi)) {
1096 vr.icount = vif->pkt_in;
1097 vr.ocount = vif->pkt_out;
1098 vr.ibytes = vif->bytes_in;
1099 vr.obytes = vif->bytes_out;
1100 read_unlock(&mrt_lock);
1102 if (copy_to_user(arg, &vr, sizeof(vr)))
1103 return -EFAULT;
1104 return 0;
1106 read_unlock(&mrt_lock);
1107 return -EADDRNOTAVAIL;
1108 case SIOCGETSGCNT:
1109 if (copy_from_user(&sr, arg, sizeof(sr)))
1110 return -EFAULT;
1112 read_lock(&mrt_lock);
1113 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1114 if (c) {
1115 sr.pktcnt = c->mfc_un.res.pkt;
1116 sr.bytecnt = c->mfc_un.res.bytes;
1117 sr.wrong_if = c->mfc_un.res.wrong_if;
1118 read_unlock(&mrt_lock);
1120 if (copy_to_user(arg, &sr, sizeof(sr)))
1121 return -EFAULT;
1122 return 0;
1124 read_unlock(&mrt_lock);
1125 return -EADDRNOTAVAIL;
1126 default:
1127 return -ENOIOCTLCMD;
1132 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1134 struct net_device *dev = ptr;
1135 struct vif_device *v;
1136 int ct;
1138 if (!net_eq(dev_net(dev), &init_net))
1139 return NOTIFY_DONE;
1141 if (event != NETDEV_UNREGISTER)
1142 return NOTIFY_DONE;
1143 v=&vif_table[0];
1144 for (ct=0; ct<maxvif; ct++,v++) {
1145 if (v->dev == dev)
1146 vif_delete(ct, 1);
1148 return NOTIFY_DONE;
1152 static struct notifier_block ip_mr_notifier = {
1153 .notifier_call = ipmr_device_event,
1157 * Encapsulate a packet by attaching a valid IPIP header to it.
1158 * This avoids tunnel drivers and other mess and gives us the speed so
1159 * important for multicast video.
1162 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1164 struct iphdr *iph;
1165 struct iphdr *old_iph = ip_hdr(skb);
1167 skb_push(skb, sizeof(struct iphdr));
1168 skb->transport_header = skb->network_header;
1169 skb_reset_network_header(skb);
1170 iph = ip_hdr(skb);
1172 iph->version = 4;
1173 iph->tos = old_iph->tos;
1174 iph->ttl = old_iph->ttl;
1175 iph->frag_off = 0;
1176 iph->daddr = daddr;
1177 iph->saddr = saddr;
1178 iph->protocol = IPPROTO_IPIP;
1179 iph->ihl = 5;
1180 iph->tot_len = htons(skb->len);
1181 ip_select_ident(iph, skb->dst, NULL);
1182 ip_send_check(iph);
1184 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1185 nf_reset(skb);
1188 static inline int ipmr_forward_finish(struct sk_buff *skb)
1190 struct ip_options * opt = &(IPCB(skb)->opt);
1192 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1194 if (unlikely(opt->optlen))
1195 ip_forward_options(skb);
1197 return dst_output(skb);
1201 * Processing handlers for ipmr_forward
1204 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1206 const struct iphdr *iph = ip_hdr(skb);
1207 struct vif_device *vif = &vif_table[vifi];
1208 struct net_device *dev;
1209 struct rtable *rt;
1210 int encap = 0;
1212 if (vif->dev == NULL)
1213 goto out_free;
1215 #ifdef CONFIG_IP_PIMSM
1216 if (vif->flags & VIFF_REGISTER) {
1217 vif->pkt_out++;
1218 vif->bytes_out += skb->len;
1219 vif->dev->stats.tx_bytes += skb->len;
1220 vif->dev->stats.tx_packets++;
1221 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1222 kfree_skb(skb);
1223 return;
1225 #endif
1227 if (vif->flags&VIFF_TUNNEL) {
1228 struct flowi fl = { .oif = vif->link,
1229 .nl_u = { .ip4_u =
1230 { .daddr = vif->remote,
1231 .saddr = vif->local,
1232 .tos = RT_TOS(iph->tos) } },
1233 .proto = IPPROTO_IPIP };
1234 if (ip_route_output_key(&init_net, &rt, &fl))
1235 goto out_free;
1236 encap = sizeof(struct iphdr);
1237 } else {
1238 struct flowi fl = { .oif = vif->link,
1239 .nl_u = { .ip4_u =
1240 { .daddr = iph->daddr,
1241 .tos = RT_TOS(iph->tos) } },
1242 .proto = IPPROTO_IPIP };
1243 if (ip_route_output_key(&init_net, &rt, &fl))
1244 goto out_free;
1247 dev = rt->u.dst.dev;
1249 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1250 /* Do not fragment multicasts. Alas, IPv4 does not
1251 allow to send ICMP, so that packets will disappear
1252 to blackhole.
1255 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1256 ip_rt_put(rt);
1257 goto out_free;
1260 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1262 if (skb_cow(skb, encap)) {
1263 ip_rt_put(rt);
1264 goto out_free;
1267 vif->pkt_out++;
1268 vif->bytes_out += skb->len;
1270 dst_release(skb->dst);
1271 skb->dst = &rt->u.dst;
1272 ip_decrease_ttl(ip_hdr(skb));
1274 /* FIXME: forward and output firewalls used to be called here.
1275 * What do we do with netfilter? -- RR */
1276 if (vif->flags & VIFF_TUNNEL) {
1277 ip_encap(skb, vif->local, vif->remote);
1278 /* FIXME: extra output firewall step used to be here. --RR */
1279 vif->dev->stats.tx_packets++;
1280 vif->dev->stats.tx_bytes += skb->len;
1283 IPCB(skb)->flags |= IPSKB_FORWARDED;
1286 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1287 * not only before forwarding, but after forwarding on all output
1288 * interfaces. It is clear, if mrouter runs a multicasting
1289 * program, it should receive packets not depending to what interface
1290 * program is joined.
1291 * If we will not make it, the program will have to join on all
1292 * interfaces. On the other hand, multihoming host (or router, but
1293 * not mrouter) cannot join to more than one interface - it will
1294 * result in receiving multiple packets.
1296 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1297 ipmr_forward_finish);
1298 return;
1300 out_free:
1301 kfree_skb(skb);
1302 return;
1305 static int ipmr_find_vif(struct net_device *dev)
1307 int ct;
1308 for (ct=maxvif-1; ct>=0; ct--) {
1309 if (vif_table[ct].dev == dev)
1310 break;
1312 return ct;
1315 /* "local" means that we should preserve one skb (for local delivery) */
1317 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1319 int psend = -1;
1320 int vif, ct;
1322 vif = cache->mfc_parent;
1323 cache->mfc_un.res.pkt++;
1324 cache->mfc_un.res.bytes += skb->len;
1327 * Wrong interface: drop packet and (maybe) send PIM assert.
1329 if (vif_table[vif].dev != skb->dev) {
1330 int true_vifi;
1332 if (skb->rtable->fl.iif == 0) {
1333 /* It is our own packet, looped back.
1334 Very complicated situation...
1336 The best workaround until routing daemons will be
1337 fixed is not to redistribute packet, if it was
1338 send through wrong interface. It means, that
1339 multicast applications WILL NOT work for
1340 (S,G), which have default multicast route pointing
1341 to wrong oif. In any case, it is not a good
1342 idea to use multicasting applications on router.
1344 goto dont_forward;
1347 cache->mfc_un.res.wrong_if++;
1348 true_vifi = ipmr_find_vif(skb->dev);
1350 if (true_vifi >= 0 && mroute_do_assert &&
1351 /* pimsm uses asserts, when switching from RPT to SPT,
1352 so that we cannot check that packet arrived on an oif.
1353 It is bad, but otherwise we would need to move pretty
1354 large chunk of pimd to kernel. Ough... --ANK
1356 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1357 time_after(jiffies,
1358 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1359 cache->mfc_un.res.last_assert = jiffies;
1360 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1362 goto dont_forward;
1365 vif_table[vif].pkt_in++;
1366 vif_table[vif].bytes_in += skb->len;
1369 * Forward the frame
1371 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1372 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1373 if (psend != -1) {
1374 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1375 if (skb2)
1376 ipmr_queue_xmit(skb2, cache, psend);
1378 psend = ct;
1381 if (psend != -1) {
1382 if (local) {
1383 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1384 if (skb2)
1385 ipmr_queue_xmit(skb2, cache, psend);
1386 } else {
1387 ipmr_queue_xmit(skb, cache, psend);
1388 return 0;
1392 dont_forward:
1393 if (!local)
1394 kfree_skb(skb);
1395 return 0;
1400 * Multicast packets for forwarding arrive here
1403 int ip_mr_input(struct sk_buff *skb)
1405 struct mfc_cache *cache;
1406 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1408 /* Packet is looped back after forward, it should not be
1409 forwarded second time, but still can be delivered locally.
1411 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1412 goto dont_forward;
1414 if (!local) {
1415 if (IPCB(skb)->opt.router_alert) {
1416 if (ip_call_ra_chain(skb))
1417 return 0;
1418 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1419 /* IGMPv1 (and broken IGMPv2 implementations sort of
1420 Cisco IOS <= 11.2(8)) do not put router alert
1421 option to IGMP packets destined to routable
1422 groups. It is very bad, because it means
1423 that we can forward NO IGMP messages.
1425 read_lock(&mrt_lock);
1426 if (init_net.ipv4.mroute_sk) {
1427 nf_reset(skb);
1428 raw_rcv(init_net.ipv4.mroute_sk, skb);
1429 read_unlock(&mrt_lock);
1430 return 0;
1432 read_unlock(&mrt_lock);
1436 read_lock(&mrt_lock);
1437 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1440 * No usable cache entry
1442 if (cache == NULL) {
1443 int vif;
1445 if (local) {
1446 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1447 ip_local_deliver(skb);
1448 if (skb2 == NULL) {
1449 read_unlock(&mrt_lock);
1450 return -ENOBUFS;
1452 skb = skb2;
1455 vif = ipmr_find_vif(skb->dev);
1456 if (vif >= 0) {
1457 int err = ipmr_cache_unresolved(vif, skb);
1458 read_unlock(&mrt_lock);
1460 return err;
1462 read_unlock(&mrt_lock);
1463 kfree_skb(skb);
1464 return -ENODEV;
1467 ip_mr_forward(skb, cache, local);
1469 read_unlock(&mrt_lock);
1471 if (local)
1472 return ip_local_deliver(skb);
1474 return 0;
1476 dont_forward:
1477 if (local)
1478 return ip_local_deliver(skb);
1479 kfree_skb(skb);
1480 return 0;
1483 #ifdef CONFIG_IP_PIMSM
1484 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1486 struct net_device *reg_dev = NULL;
1487 struct iphdr *encap;
1489 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1491 Check that:
1492 a. packet is really destinted to a multicast group
1493 b. packet is not a NULL-REGISTER
1494 c. packet is not truncated
1496 if (!ipv4_is_multicast(encap->daddr) ||
1497 encap->tot_len == 0 ||
1498 ntohs(encap->tot_len) + pimlen > skb->len)
1499 return 1;
1501 read_lock(&mrt_lock);
1502 if (reg_vif_num >= 0)
1503 reg_dev = vif_table[reg_vif_num].dev;
1504 if (reg_dev)
1505 dev_hold(reg_dev);
1506 read_unlock(&mrt_lock);
1508 if (reg_dev == NULL)
1509 return 1;
1511 skb->mac_header = skb->network_header;
1512 skb_pull(skb, (u8*)encap - skb->data);
1513 skb_reset_network_header(skb);
1514 skb->dev = reg_dev;
1515 skb->protocol = htons(ETH_P_IP);
1516 skb->ip_summed = 0;
1517 skb->pkt_type = PACKET_HOST;
1518 dst_release(skb->dst);
1519 skb->dst = NULL;
1520 reg_dev->stats.rx_bytes += skb->len;
1521 reg_dev->stats.rx_packets++;
1522 nf_reset(skb);
1523 netif_rx(skb);
1524 dev_put(reg_dev);
1526 return 0;
1528 #endif
1530 #ifdef CONFIG_IP_PIMSM_V1
1532 * Handle IGMP messages of PIMv1
1535 int pim_rcv_v1(struct sk_buff * skb)
1537 struct igmphdr *pim;
1539 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1540 goto drop;
1542 pim = igmp_hdr(skb);
1544 if (!mroute_do_pim ||
1545 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1546 goto drop;
1548 if (__pim_rcv(skb, sizeof(*pim))) {
1549 drop:
1550 kfree_skb(skb);
1552 return 0;
1554 #endif
1556 #ifdef CONFIG_IP_PIMSM_V2
1557 static int pim_rcv(struct sk_buff * skb)
1559 struct pimreghdr *pim;
1561 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1562 goto drop;
1564 pim = (struct pimreghdr *)skb_transport_header(skb);
1565 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1566 (pim->flags&PIM_NULL_REGISTER) ||
1567 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1568 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1569 goto drop;
1571 if (__pim_rcv(skb, sizeof(*pim))) {
1572 drop:
1573 kfree_skb(skb);
1575 return 0;
1577 #endif
1579 static int
1580 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1582 int ct;
1583 struct rtnexthop *nhp;
1584 struct net_device *dev = vif_table[c->mfc_parent].dev;
1585 u8 *b = skb_tail_pointer(skb);
1586 struct rtattr *mp_head;
1588 if (dev)
1589 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1591 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1593 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1594 if (c->mfc_un.res.ttls[ct] < 255) {
1595 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1596 goto rtattr_failure;
1597 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1598 nhp->rtnh_flags = 0;
1599 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1600 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1601 nhp->rtnh_len = sizeof(*nhp);
1604 mp_head->rta_type = RTA_MULTIPATH;
1605 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1606 rtm->rtm_type = RTN_MULTICAST;
1607 return 1;
1609 rtattr_failure:
1610 nlmsg_trim(skb, b);
1611 return -EMSGSIZE;
1614 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1616 int err;
1617 struct mfc_cache *cache;
1618 struct rtable *rt = skb->rtable;
1620 read_lock(&mrt_lock);
1621 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1623 if (cache == NULL) {
1624 struct sk_buff *skb2;
1625 struct iphdr *iph;
1626 struct net_device *dev;
1627 int vif;
1629 if (nowait) {
1630 read_unlock(&mrt_lock);
1631 return -EAGAIN;
1634 dev = skb->dev;
1635 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1636 read_unlock(&mrt_lock);
1637 return -ENODEV;
1639 skb2 = skb_clone(skb, GFP_ATOMIC);
1640 if (!skb2) {
1641 read_unlock(&mrt_lock);
1642 return -ENOMEM;
1645 skb_push(skb2, sizeof(struct iphdr));
1646 skb_reset_network_header(skb2);
1647 iph = ip_hdr(skb2);
1648 iph->ihl = sizeof(struct iphdr) >> 2;
1649 iph->saddr = rt->rt_src;
1650 iph->daddr = rt->rt_dst;
1651 iph->version = 0;
1652 err = ipmr_cache_unresolved(vif, skb2);
1653 read_unlock(&mrt_lock);
1654 return err;
1657 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1658 cache->mfc_flags |= MFC_NOTIFY;
1659 err = ipmr_fill_mroute(skb, cache, rtm);
1660 read_unlock(&mrt_lock);
1661 return err;
1664 #ifdef CONFIG_PROC_FS
1666 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1668 struct ipmr_vif_iter {
1669 int ct;
1672 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1673 loff_t pos)
1675 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1676 if (!VIF_EXISTS(iter->ct))
1677 continue;
1678 if (pos-- == 0)
1679 return &vif_table[iter->ct];
1681 return NULL;
1684 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1685 __acquires(mrt_lock)
1687 read_lock(&mrt_lock);
1688 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1689 : SEQ_START_TOKEN;
1692 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1694 struct ipmr_vif_iter *iter = seq->private;
1696 ++*pos;
1697 if (v == SEQ_START_TOKEN)
1698 return ipmr_vif_seq_idx(iter, 0);
1700 while (++iter->ct < maxvif) {
1701 if (!VIF_EXISTS(iter->ct))
1702 continue;
1703 return &vif_table[iter->ct];
1705 return NULL;
1708 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1709 __releases(mrt_lock)
1711 read_unlock(&mrt_lock);
1714 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1716 if (v == SEQ_START_TOKEN) {
1717 seq_puts(seq,
1718 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1719 } else {
1720 const struct vif_device *vif = v;
1721 const char *name = vif->dev ? vif->dev->name : "none";
1723 seq_printf(seq,
1724 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1725 vif - vif_table,
1726 name, vif->bytes_in, vif->pkt_in,
1727 vif->bytes_out, vif->pkt_out,
1728 vif->flags, vif->local, vif->remote);
1730 return 0;
1733 static const struct seq_operations ipmr_vif_seq_ops = {
1734 .start = ipmr_vif_seq_start,
1735 .next = ipmr_vif_seq_next,
1736 .stop = ipmr_vif_seq_stop,
1737 .show = ipmr_vif_seq_show,
1740 static int ipmr_vif_open(struct inode *inode, struct file *file)
1742 return seq_open_private(file, &ipmr_vif_seq_ops,
1743 sizeof(struct ipmr_vif_iter));
1746 static const struct file_operations ipmr_vif_fops = {
1747 .owner = THIS_MODULE,
1748 .open = ipmr_vif_open,
1749 .read = seq_read,
1750 .llseek = seq_lseek,
1751 .release = seq_release_private,
1754 struct ipmr_mfc_iter {
1755 struct mfc_cache **cache;
1756 int ct;
1760 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1762 struct mfc_cache *mfc;
1764 it->cache = mfc_cache_array;
1765 read_lock(&mrt_lock);
1766 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1767 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1768 if (pos-- == 0)
1769 return mfc;
1770 read_unlock(&mrt_lock);
1772 it->cache = &mfc_unres_queue;
1773 spin_lock_bh(&mfc_unres_lock);
1774 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1775 if (pos-- == 0)
1776 return mfc;
1777 spin_unlock_bh(&mfc_unres_lock);
1779 it->cache = NULL;
1780 return NULL;
1784 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1786 struct ipmr_mfc_iter *it = seq->private;
1787 it->cache = NULL;
1788 it->ct = 0;
1789 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1790 : SEQ_START_TOKEN;
1793 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1795 struct mfc_cache *mfc = v;
1796 struct ipmr_mfc_iter *it = seq->private;
1798 ++*pos;
1800 if (v == SEQ_START_TOKEN)
1801 return ipmr_mfc_seq_idx(seq->private, 0);
1803 if (mfc->next)
1804 return mfc->next;
1806 if (it->cache == &mfc_unres_queue)
1807 goto end_of_list;
1809 BUG_ON(it->cache != mfc_cache_array);
1811 while (++it->ct < MFC_LINES) {
1812 mfc = mfc_cache_array[it->ct];
1813 if (mfc)
1814 return mfc;
1817 /* exhausted cache_array, show unresolved */
1818 read_unlock(&mrt_lock);
1819 it->cache = &mfc_unres_queue;
1820 it->ct = 0;
1822 spin_lock_bh(&mfc_unres_lock);
1823 mfc = mfc_unres_queue;
1824 if (mfc)
1825 return mfc;
1827 end_of_list:
1828 spin_unlock_bh(&mfc_unres_lock);
1829 it->cache = NULL;
1831 return NULL;
1834 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1836 struct ipmr_mfc_iter *it = seq->private;
1838 if (it->cache == &mfc_unres_queue)
1839 spin_unlock_bh(&mfc_unres_lock);
1840 else if (it->cache == mfc_cache_array)
1841 read_unlock(&mrt_lock);
1844 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1846 int n;
1848 if (v == SEQ_START_TOKEN) {
1849 seq_puts(seq,
1850 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1851 } else {
1852 const struct mfc_cache *mfc = v;
1853 const struct ipmr_mfc_iter *it = seq->private;
1855 seq_printf(seq, "%08lX %08lX %-3hd",
1856 (unsigned long) mfc->mfc_mcastgrp,
1857 (unsigned long) mfc->mfc_origin,
1858 mfc->mfc_parent);
1860 if (it->cache != &mfc_unres_queue) {
1861 seq_printf(seq, " %8lu %8lu %8lu",
1862 mfc->mfc_un.res.pkt,
1863 mfc->mfc_un.res.bytes,
1864 mfc->mfc_un.res.wrong_if);
1865 for (n = mfc->mfc_un.res.minvif;
1866 n < mfc->mfc_un.res.maxvif; n++ ) {
1867 if (VIF_EXISTS(n)
1868 && mfc->mfc_un.res.ttls[n] < 255)
1869 seq_printf(seq,
1870 " %2d:%-3d",
1871 n, mfc->mfc_un.res.ttls[n]);
1873 } else {
1874 /* unresolved mfc_caches don't contain
1875 * pkt, bytes and wrong_if values
1877 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1879 seq_putc(seq, '\n');
1881 return 0;
1884 static const struct seq_operations ipmr_mfc_seq_ops = {
1885 .start = ipmr_mfc_seq_start,
1886 .next = ipmr_mfc_seq_next,
1887 .stop = ipmr_mfc_seq_stop,
1888 .show = ipmr_mfc_seq_show,
1891 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1893 return seq_open_private(file, &ipmr_mfc_seq_ops,
1894 sizeof(struct ipmr_mfc_iter));
1897 static const struct file_operations ipmr_mfc_fops = {
1898 .owner = THIS_MODULE,
1899 .open = ipmr_mfc_open,
1900 .read = seq_read,
1901 .llseek = seq_lseek,
1902 .release = seq_release_private,
1904 #endif
1906 #ifdef CONFIG_IP_PIMSM_V2
1907 static struct net_protocol pim_protocol = {
1908 .handler = pim_rcv,
1910 #endif
1914 * Setup for IP multicast routing
1917 int __init ip_mr_init(void)
1919 int err;
1921 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1922 sizeof(struct mfc_cache),
1923 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1924 NULL);
1925 if (!mrt_cachep)
1926 return -ENOMEM;
1928 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1929 err = register_netdevice_notifier(&ip_mr_notifier);
1930 if (err)
1931 goto reg_notif_fail;
1932 #ifdef CONFIG_PROC_FS
1933 err = -ENOMEM;
1934 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1935 goto proc_vif_fail;
1936 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1937 goto proc_cache_fail;
1938 #endif
1939 return 0;
1940 #ifdef CONFIG_PROC_FS
1941 proc_cache_fail:
1942 proc_net_remove(&init_net, "ip_mr_vif");
1943 proc_vif_fail:
1944 unregister_netdevice_notifier(&ip_mr_notifier);
1945 #endif
1946 reg_notif_fail:
1947 del_timer(&ipmr_expire_timer);
1948 kmem_cache_destroy(mrt_cachep);
1949 return err;