sched: rt-bandwidth group disable fixes
[linux-2.6/mini2440.git] / net / ipv4 / ipmr.c
blobc519b8d30eee5c46f4547c3a014b3832d5a090ab
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
70 static struct sock *mroute_socket;
73 /* Big lock, protecting vif table, mrt cache and mroute socket state.
74 Note that the changes are semaphored via rtnl_lock.
77 static DEFINE_RWLOCK(mrt_lock);
80 * Multicast router control variables
83 static struct vif_device vif_table[MAXVIFS]; /* Devices */
84 static int maxvif;
86 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
88 static int mroute_do_assert; /* Set in PIM assert */
89 static int mroute_do_pim;
91 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
93 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
94 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
96 /* Special spinlock for queue of unresolved entries */
97 static DEFINE_SPINLOCK(mfc_unres_lock);
99 /* We return to original Alan's scheme. Hash table of resolved
100 entries is changed only in process context and protected
101 with weak lock mrt_lock. Queue of unresolved entries is protected
102 with strong spinlock mfc_unres_lock.
104 In this case data path is free of exclusive locks at all.
107 static struct kmem_cache *mrt_cachep __read_mostly;
109 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
110 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
111 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
113 #ifdef CONFIG_IP_PIMSM_V2
114 static struct net_protocol pim_protocol;
115 #endif
117 static struct timer_list ipmr_expire_timer;
119 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
121 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
123 dev_close(dev);
125 dev = __dev_get_by_name(&init_net, "tunl0");
126 if (dev) {
127 struct ifreq ifr;
128 mm_segment_t oldfs;
129 struct ip_tunnel_parm p;
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
142 set_fs(oldfs);
146 static
147 struct net_device *ipmr_new_tunnel(struct vifctl *v)
149 struct net_device *dev;
151 dev = __dev_get_by_name(&init_net, "tunl0");
153 if (dev) {
154 int err;
155 struct ifreq ifr;
156 mm_segment_t oldfs;
157 struct ip_tunnel_parm p;
158 struct in_device *in_dev;
160 memset(&p, 0, sizeof(p));
161 p.iph.daddr = v->vifc_rmt_addr.s_addr;
162 p.iph.saddr = v->vifc_lcl_addr.s_addr;
163 p.iph.version = 4;
164 p.iph.ihl = 5;
165 p.iph.protocol = IPPROTO_IPIP;
166 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
167 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
169 oldfs = get_fs(); set_fs(KERNEL_DS);
170 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
171 set_fs(oldfs);
173 dev = NULL;
175 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
176 dev->flags |= IFF_MULTICAST;
178 in_dev = __in_dev_get_rtnl(dev);
179 if (in_dev == NULL)
180 goto failure;
182 ipv4_devconf_setall(in_dev);
183 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
185 if (dev_open(dev))
186 goto failure;
187 dev_hold(dev);
190 return dev;
192 failure:
193 /* allow the register to be completed before unregistering. */
194 rtnl_unlock();
195 rtnl_lock();
197 unregister_netdevice(dev);
198 return NULL;
201 #ifdef CONFIG_IP_PIMSM
203 static int reg_vif_num = -1;
205 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
207 read_lock(&mrt_lock);
208 dev->stats.tx_bytes += skb->len;
209 dev->stats.tx_packets++;
210 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
211 read_unlock(&mrt_lock);
212 kfree_skb(skb);
213 return 0;
216 static void reg_vif_setup(struct net_device *dev)
218 dev->type = ARPHRD_PIMREG;
219 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
220 dev->flags = IFF_NOARP;
221 dev->hard_start_xmit = reg_vif_xmit;
222 dev->destructor = free_netdev;
225 static struct net_device *ipmr_reg_vif(void)
227 struct net_device *dev;
228 struct in_device *in_dev;
230 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
232 if (dev == NULL)
233 return NULL;
235 if (register_netdevice(dev)) {
236 free_netdev(dev);
237 return NULL;
239 dev->iflink = 0;
241 rcu_read_lock();
242 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
243 rcu_read_unlock();
244 goto failure;
247 ipv4_devconf_setall(in_dev);
248 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
249 rcu_read_unlock();
251 if (dev_open(dev))
252 goto failure;
254 dev_hold(dev);
256 return dev;
258 failure:
259 /* allow the register to be completed before unregistering. */
260 rtnl_unlock();
261 rtnl_lock();
263 unregister_netdevice(dev);
264 return NULL;
266 #endif
269 * Delete a VIF entry
270 * @notify: Set to 1, if the caller is a notifier_call
273 static int vif_delete(int vifi, int notify)
275 struct vif_device *v;
276 struct net_device *dev;
277 struct in_device *in_dev;
279 if (vifi < 0 || vifi >= maxvif)
280 return -EADDRNOTAVAIL;
282 v = &vif_table[vifi];
284 write_lock_bh(&mrt_lock);
285 dev = v->dev;
286 v->dev = NULL;
288 if (!dev) {
289 write_unlock_bh(&mrt_lock);
290 return -EADDRNOTAVAIL;
293 #ifdef CONFIG_IP_PIMSM
294 if (vifi == reg_vif_num)
295 reg_vif_num = -1;
296 #endif
298 if (vifi+1 == maxvif) {
299 int tmp;
300 for (tmp=vifi-1; tmp>=0; tmp--) {
301 if (VIF_EXISTS(tmp))
302 break;
304 maxvif = tmp+1;
307 write_unlock_bh(&mrt_lock);
309 dev_set_allmulti(dev, -1);
311 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
312 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
313 ip_rt_multicast_event(in_dev);
316 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
317 unregister_netdevice(dev);
319 dev_put(dev);
320 return 0;
323 /* Destroy an unresolved cache entry, killing queued skbs
324 and reporting error to netlink readers.
327 static void ipmr_destroy_unres(struct mfc_cache *c)
329 struct sk_buff *skb;
330 struct nlmsgerr *e;
332 atomic_dec(&cache_resolve_queue_len);
334 while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
335 if (ip_hdr(skb)->version == 0) {
336 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
337 nlh->nlmsg_type = NLMSG_ERROR;
338 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
339 skb_trim(skb, nlh->nlmsg_len);
340 e = NLMSG_DATA(nlh);
341 e->error = -ETIMEDOUT;
342 memset(&e->msg, 0, sizeof(e->msg));
344 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
345 } else
346 kfree_skb(skb);
349 kmem_cache_free(mrt_cachep, c);
353 /* Single timer process for all the unresolved queue. */
355 static void ipmr_expire_process(unsigned long dummy)
357 unsigned long now;
358 unsigned long expires;
359 struct mfc_cache *c, **cp;
361 if (!spin_trylock(&mfc_unres_lock)) {
362 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
363 return;
366 if (atomic_read(&cache_resolve_queue_len) == 0)
367 goto out;
369 now = jiffies;
370 expires = 10*HZ;
371 cp = &mfc_unres_queue;
373 while ((c=*cp) != NULL) {
374 if (time_after(c->mfc_un.unres.expires, now)) {
375 unsigned long interval = c->mfc_un.unres.expires - now;
376 if (interval < expires)
377 expires = interval;
378 cp = &c->next;
379 continue;
382 *cp = c->next;
384 ipmr_destroy_unres(c);
387 if (atomic_read(&cache_resolve_queue_len))
388 mod_timer(&ipmr_expire_timer, jiffies + expires);
390 out:
391 spin_unlock(&mfc_unres_lock);
394 /* Fill oifs list. It is called under write locked mrt_lock. */
396 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
398 int vifi;
400 cache->mfc_un.res.minvif = MAXVIFS;
401 cache->mfc_un.res.maxvif = 0;
402 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
404 for (vifi=0; vifi<maxvif; vifi++) {
405 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
406 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
407 if (cache->mfc_un.res.minvif > vifi)
408 cache->mfc_un.res.minvif = vifi;
409 if (cache->mfc_un.res.maxvif <= vifi)
410 cache->mfc_un.res.maxvif = vifi + 1;
415 static int vif_add(struct vifctl *vifc, int mrtsock)
417 int vifi = vifc->vifc_vifi;
418 struct vif_device *v = &vif_table[vifi];
419 struct net_device *dev;
420 struct in_device *in_dev;
421 int err;
423 /* Is vif busy ? */
424 if (VIF_EXISTS(vifi))
425 return -EADDRINUSE;
427 switch (vifc->vifc_flags) {
428 #ifdef CONFIG_IP_PIMSM
429 case VIFF_REGISTER:
431 * Special Purpose VIF in PIM
432 * All the packets will be sent to the daemon
434 if (reg_vif_num >= 0)
435 return -EADDRINUSE;
436 dev = ipmr_reg_vif();
437 if (!dev)
438 return -ENOBUFS;
439 err = dev_set_allmulti(dev, 1);
440 if (err) {
441 unregister_netdevice(dev);
442 dev_put(dev);
443 return err;
445 break;
446 #endif
447 case VIFF_TUNNEL:
448 dev = ipmr_new_tunnel(vifc);
449 if (!dev)
450 return -ENOBUFS;
451 err = dev_set_allmulti(dev, 1);
452 if (err) {
453 ipmr_del_tunnel(dev, vifc);
454 dev_put(dev);
455 return err;
457 break;
458 case 0:
459 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
460 if (!dev)
461 return -EADDRNOTAVAIL;
462 err = dev_set_allmulti(dev, 1);
463 if (err) {
464 dev_put(dev);
465 return err;
467 break;
468 default:
469 return -EINVAL;
472 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
473 return -EADDRNOTAVAIL;
474 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
475 ip_rt_multicast_event(in_dev);
478 * Fill in the VIF structures
480 v->rate_limit=vifc->vifc_rate_limit;
481 v->local=vifc->vifc_lcl_addr.s_addr;
482 v->remote=vifc->vifc_rmt_addr.s_addr;
483 v->flags=vifc->vifc_flags;
484 if (!mrtsock)
485 v->flags |= VIFF_STATIC;
486 v->threshold=vifc->vifc_threshold;
487 v->bytes_in = 0;
488 v->bytes_out = 0;
489 v->pkt_in = 0;
490 v->pkt_out = 0;
491 v->link = dev->ifindex;
492 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
493 v->link = dev->iflink;
495 /* And finish update writing critical data */
496 write_lock_bh(&mrt_lock);
497 v->dev=dev;
498 #ifdef CONFIG_IP_PIMSM
499 if (v->flags&VIFF_REGISTER)
500 reg_vif_num = vifi;
501 #endif
502 if (vifi+1 > maxvif)
503 maxvif = vifi+1;
504 write_unlock_bh(&mrt_lock);
505 return 0;
508 static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
510 int line=MFC_HASH(mcastgrp,origin);
511 struct mfc_cache *c;
513 for (c=mfc_cache_array[line]; c; c = c->next) {
514 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
515 break;
517 return c;
521 * Allocate a multicast cache entry
523 static struct mfc_cache *ipmr_cache_alloc(void)
525 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
526 if (c==NULL)
527 return NULL;
528 c->mfc_un.res.minvif = MAXVIFS;
529 return c;
532 static struct mfc_cache *ipmr_cache_alloc_unres(void)
534 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
535 if (c==NULL)
536 return NULL;
537 skb_queue_head_init(&c->mfc_un.unres.unresolved);
538 c->mfc_un.unres.expires = jiffies + 10*HZ;
539 return c;
543 * A cache entry has gone into a resolved state from queued
546 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
548 struct sk_buff *skb;
549 struct nlmsgerr *e;
552 * Play the pending entries through our router
555 while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
556 if (ip_hdr(skb)->version == 0) {
557 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
559 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
560 nlh->nlmsg_len = (skb_tail_pointer(skb) -
561 (u8 *)nlh);
562 } else {
563 nlh->nlmsg_type = NLMSG_ERROR;
564 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
565 skb_trim(skb, nlh->nlmsg_len);
566 e = NLMSG_DATA(nlh);
567 e->error = -EMSGSIZE;
568 memset(&e->msg, 0, sizeof(e->msg));
571 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
572 } else
573 ip_mr_forward(skb, c, 0);
578 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
579 * expects the following bizarre scheme.
581 * Called under mrt_lock.
584 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
586 struct sk_buff *skb;
587 const int ihl = ip_hdrlen(pkt);
588 struct igmphdr *igmp;
589 struct igmpmsg *msg;
590 int ret;
592 #ifdef CONFIG_IP_PIMSM
593 if (assert == IGMPMSG_WHOLEPKT)
594 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
595 else
596 #endif
597 skb = alloc_skb(128, GFP_ATOMIC);
599 if (!skb)
600 return -ENOBUFS;
602 #ifdef CONFIG_IP_PIMSM
603 if (assert == IGMPMSG_WHOLEPKT) {
604 /* Ugly, but we have no choice with this interface.
605 Duplicate old header, fix ihl, length etc.
606 And all this only to mangle msg->im_msgtype and
607 to set msg->im_mbz to "mbz" :-)
609 skb_push(skb, sizeof(struct iphdr));
610 skb_reset_network_header(skb);
611 skb_reset_transport_header(skb);
612 msg = (struct igmpmsg *)skb_network_header(skb);
613 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
614 msg->im_msgtype = IGMPMSG_WHOLEPKT;
615 msg->im_mbz = 0;
616 msg->im_vif = reg_vif_num;
617 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
618 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
619 sizeof(struct iphdr));
620 } else
621 #endif
625 * Copy the IP header
628 skb->network_header = skb->tail;
629 skb_put(skb, ihl);
630 skb_copy_to_linear_data(skb, pkt->data, ihl);
631 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
632 msg = (struct igmpmsg *)skb_network_header(skb);
633 msg->im_vif = vifi;
634 skb->dst = dst_clone(pkt->dst);
637 * Add our header
640 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
641 igmp->type =
642 msg->im_msgtype = assert;
643 igmp->code = 0;
644 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
645 skb->transport_header = skb->network_header;
648 if (mroute_socket == NULL) {
649 kfree_skb(skb);
650 return -EINVAL;
654 * Deliver to mrouted
656 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
657 if (net_ratelimit())
658 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
659 kfree_skb(skb);
662 return ret;
666 * Queue a packet for resolution. It gets locked cache entry!
669 static int
670 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
672 int err;
673 struct mfc_cache *c;
674 const struct iphdr *iph = ip_hdr(skb);
676 spin_lock_bh(&mfc_unres_lock);
677 for (c=mfc_unres_queue; c; c=c->next) {
678 if (c->mfc_mcastgrp == iph->daddr &&
679 c->mfc_origin == iph->saddr)
680 break;
683 if (c == NULL) {
685 * Create a new entry if allowable
688 if (atomic_read(&cache_resolve_queue_len)>=10 ||
689 (c=ipmr_cache_alloc_unres())==NULL) {
690 spin_unlock_bh(&mfc_unres_lock);
692 kfree_skb(skb);
693 return -ENOBUFS;
697 * Fill in the new cache entry
699 c->mfc_parent = -1;
700 c->mfc_origin = iph->saddr;
701 c->mfc_mcastgrp = iph->daddr;
704 * Reflect first query at mrouted.
706 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
707 /* If the report failed throw the cache entry
708 out - Brad Parker
710 spin_unlock_bh(&mfc_unres_lock);
712 kmem_cache_free(mrt_cachep, c);
713 kfree_skb(skb);
714 return err;
717 atomic_inc(&cache_resolve_queue_len);
718 c->next = mfc_unres_queue;
719 mfc_unres_queue = c;
721 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
725 * See if we can append the packet
727 if (c->mfc_un.unres.unresolved.qlen>3) {
728 kfree_skb(skb);
729 err = -ENOBUFS;
730 } else {
731 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
732 err = 0;
735 spin_unlock_bh(&mfc_unres_lock);
736 return err;
740 * MFC cache manipulation by user space mroute daemon
743 static int ipmr_mfc_delete(struct mfcctl *mfc)
745 int line;
746 struct mfc_cache *c, **cp;
748 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
750 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
751 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
752 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
753 write_lock_bh(&mrt_lock);
754 *cp = c->next;
755 write_unlock_bh(&mrt_lock);
757 kmem_cache_free(mrt_cachep, c);
758 return 0;
761 return -ENOENT;
764 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
766 int line;
767 struct mfc_cache *uc, *c, **cp;
769 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
771 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
772 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
773 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
774 break;
777 if (c != NULL) {
778 write_lock_bh(&mrt_lock);
779 c->mfc_parent = mfc->mfcc_parent;
780 ipmr_update_thresholds(c, mfc->mfcc_ttls);
781 if (!mrtsock)
782 c->mfc_flags |= MFC_STATIC;
783 write_unlock_bh(&mrt_lock);
784 return 0;
787 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
788 return -EINVAL;
790 c=ipmr_cache_alloc();
791 if (c==NULL)
792 return -ENOMEM;
794 c->mfc_origin=mfc->mfcc_origin.s_addr;
795 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
796 c->mfc_parent=mfc->mfcc_parent;
797 ipmr_update_thresholds(c, mfc->mfcc_ttls);
798 if (!mrtsock)
799 c->mfc_flags |= MFC_STATIC;
801 write_lock_bh(&mrt_lock);
802 c->next = mfc_cache_array[line];
803 mfc_cache_array[line] = c;
804 write_unlock_bh(&mrt_lock);
807 * Check to see if we resolved a queued list. If so we
808 * need to send on the frames and tidy up.
810 spin_lock_bh(&mfc_unres_lock);
811 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
812 cp = &uc->next) {
813 if (uc->mfc_origin == c->mfc_origin &&
814 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
815 *cp = uc->next;
816 if (atomic_dec_and_test(&cache_resolve_queue_len))
817 del_timer(&ipmr_expire_timer);
818 break;
821 spin_unlock_bh(&mfc_unres_lock);
823 if (uc) {
824 ipmr_cache_resolve(uc, c);
825 kmem_cache_free(mrt_cachep, uc);
827 return 0;
831 * Close the multicast socket, and clear the vif tables etc
834 static void mroute_clean_tables(struct sock *sk)
836 int i;
839 * Shut down all active vif entries
841 for (i=0; i<maxvif; i++) {
842 if (!(vif_table[i].flags&VIFF_STATIC))
843 vif_delete(i, 0);
847 * Wipe the cache
849 for (i=0;i<MFC_LINES;i++) {
850 struct mfc_cache *c, **cp;
852 cp = &mfc_cache_array[i];
853 while ((c = *cp) != NULL) {
854 if (c->mfc_flags&MFC_STATIC) {
855 cp = &c->next;
856 continue;
858 write_lock_bh(&mrt_lock);
859 *cp = c->next;
860 write_unlock_bh(&mrt_lock);
862 kmem_cache_free(mrt_cachep, c);
866 if (atomic_read(&cache_resolve_queue_len) != 0) {
867 struct mfc_cache *c;
869 spin_lock_bh(&mfc_unres_lock);
870 while (mfc_unres_queue != NULL) {
871 c = mfc_unres_queue;
872 mfc_unres_queue = c->next;
873 spin_unlock_bh(&mfc_unres_lock);
875 ipmr_destroy_unres(c);
877 spin_lock_bh(&mfc_unres_lock);
879 spin_unlock_bh(&mfc_unres_lock);
883 static void mrtsock_destruct(struct sock *sk)
885 rtnl_lock();
886 if (sk == mroute_socket) {
887 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
889 write_lock_bh(&mrt_lock);
890 mroute_socket=NULL;
891 write_unlock_bh(&mrt_lock);
893 mroute_clean_tables(sk);
895 rtnl_unlock();
899 * Socket options and virtual interface manipulation. The whole
900 * virtual interface system is a complete heap, but unfortunately
901 * that's how BSD mrouted happens to think. Maybe one day with a proper
902 * MOSPF/PIM router set up we can clean this up.
905 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
907 int ret;
908 struct vifctl vif;
909 struct mfcctl mfc;
911 if (optname != MRT_INIT) {
912 if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
913 return -EACCES;
916 switch (optname) {
917 case MRT_INIT:
918 if (sk->sk_type != SOCK_RAW ||
919 inet_sk(sk)->num != IPPROTO_IGMP)
920 return -EOPNOTSUPP;
921 if (optlen!=sizeof(int))
922 return -ENOPROTOOPT;
924 rtnl_lock();
925 if (mroute_socket) {
926 rtnl_unlock();
927 return -EADDRINUSE;
930 ret = ip_ra_control(sk, 1, mrtsock_destruct);
931 if (ret == 0) {
932 write_lock_bh(&mrt_lock);
933 mroute_socket=sk;
934 write_unlock_bh(&mrt_lock);
936 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
938 rtnl_unlock();
939 return ret;
940 case MRT_DONE:
941 if (sk!=mroute_socket)
942 return -EACCES;
943 return ip_ra_control(sk, 0, NULL);
944 case MRT_ADD_VIF:
945 case MRT_DEL_VIF:
946 if (optlen!=sizeof(vif))
947 return -EINVAL;
948 if (copy_from_user(&vif,optval,sizeof(vif)))
949 return -EFAULT;
950 if (vif.vifc_vifi >= MAXVIFS)
951 return -ENFILE;
952 rtnl_lock();
953 if (optname==MRT_ADD_VIF) {
954 ret = vif_add(&vif, sk==mroute_socket);
955 } else {
956 ret = vif_delete(vif.vifc_vifi, 0);
958 rtnl_unlock();
959 return ret;
962 * Manipulate the forwarding caches. These live
963 * in a sort of kernel/user symbiosis.
965 case MRT_ADD_MFC:
966 case MRT_DEL_MFC:
967 if (optlen!=sizeof(mfc))
968 return -EINVAL;
969 if (copy_from_user(&mfc,optval, sizeof(mfc)))
970 return -EFAULT;
971 rtnl_lock();
972 if (optname==MRT_DEL_MFC)
973 ret = ipmr_mfc_delete(&mfc);
974 else
975 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
976 rtnl_unlock();
977 return ret;
979 * Control PIM assert.
981 case MRT_ASSERT:
983 int v;
984 if (get_user(v,(int __user *)optval))
985 return -EFAULT;
986 mroute_do_assert=(v)?1:0;
987 return 0;
989 #ifdef CONFIG_IP_PIMSM
990 case MRT_PIM:
992 int v;
994 if (get_user(v,(int __user *)optval))
995 return -EFAULT;
996 v = (v) ? 1 : 0;
998 rtnl_lock();
999 ret = 0;
1000 if (v != mroute_do_pim) {
1001 mroute_do_pim = v;
1002 mroute_do_assert = v;
1003 #ifdef CONFIG_IP_PIMSM_V2
1004 if (mroute_do_pim)
1005 ret = inet_add_protocol(&pim_protocol,
1006 IPPROTO_PIM);
1007 else
1008 ret = inet_del_protocol(&pim_protocol,
1009 IPPROTO_PIM);
1010 if (ret < 0)
1011 ret = -EAGAIN;
1012 #endif
1014 rtnl_unlock();
1015 return ret;
1017 #endif
1019 * Spurious command, or MRT_VERSION which you cannot
1020 * set.
1022 default:
1023 return -ENOPROTOOPT;
1028 * Getsock opt support for the multicast routing system.
1031 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
1033 int olr;
1034 int val;
1036 if (optname!=MRT_VERSION &&
1037 #ifdef CONFIG_IP_PIMSM
1038 optname!=MRT_PIM &&
1039 #endif
1040 optname!=MRT_ASSERT)
1041 return -ENOPROTOOPT;
1043 if (get_user(olr, optlen))
1044 return -EFAULT;
1046 olr = min_t(unsigned int, olr, sizeof(int));
1047 if (olr < 0)
1048 return -EINVAL;
1050 if (put_user(olr,optlen))
1051 return -EFAULT;
1052 if (optname==MRT_VERSION)
1053 val=0x0305;
1054 #ifdef CONFIG_IP_PIMSM
1055 else if (optname==MRT_PIM)
1056 val=mroute_do_pim;
1057 #endif
1058 else
1059 val=mroute_do_assert;
1060 if (copy_to_user(optval,&val,olr))
1061 return -EFAULT;
1062 return 0;
1066 * The IP multicast ioctl support routines.
1069 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1071 struct sioc_sg_req sr;
1072 struct sioc_vif_req vr;
1073 struct vif_device *vif;
1074 struct mfc_cache *c;
1076 switch (cmd) {
1077 case SIOCGETVIFCNT:
1078 if (copy_from_user(&vr,arg,sizeof(vr)))
1079 return -EFAULT;
1080 if (vr.vifi>=maxvif)
1081 return -EINVAL;
1082 read_lock(&mrt_lock);
1083 vif=&vif_table[vr.vifi];
1084 if (VIF_EXISTS(vr.vifi)) {
1085 vr.icount=vif->pkt_in;
1086 vr.ocount=vif->pkt_out;
1087 vr.ibytes=vif->bytes_in;
1088 vr.obytes=vif->bytes_out;
1089 read_unlock(&mrt_lock);
1091 if (copy_to_user(arg,&vr,sizeof(vr)))
1092 return -EFAULT;
1093 return 0;
1095 read_unlock(&mrt_lock);
1096 return -EADDRNOTAVAIL;
1097 case SIOCGETSGCNT:
1098 if (copy_from_user(&sr,arg,sizeof(sr)))
1099 return -EFAULT;
1101 read_lock(&mrt_lock);
1102 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1103 if (c) {
1104 sr.pktcnt = c->mfc_un.res.pkt;
1105 sr.bytecnt = c->mfc_un.res.bytes;
1106 sr.wrong_if = c->mfc_un.res.wrong_if;
1107 read_unlock(&mrt_lock);
1109 if (copy_to_user(arg,&sr,sizeof(sr)))
1110 return -EFAULT;
1111 return 0;
1113 read_unlock(&mrt_lock);
1114 return -EADDRNOTAVAIL;
1115 default:
1116 return -ENOIOCTLCMD;
1121 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1123 struct net_device *dev = ptr;
1124 struct vif_device *v;
1125 int ct;
1127 if (!net_eq(dev_net(dev), &init_net))
1128 return NOTIFY_DONE;
1130 if (event != NETDEV_UNREGISTER)
1131 return NOTIFY_DONE;
1132 v=&vif_table[0];
1133 for (ct=0;ct<maxvif;ct++,v++) {
1134 if (v->dev==dev)
1135 vif_delete(ct, 1);
1137 return NOTIFY_DONE;
1141 static struct notifier_block ip_mr_notifier={
1142 .notifier_call = ipmr_device_event,
1146 * Encapsulate a packet by attaching a valid IPIP header to it.
1147 * This avoids tunnel drivers and other mess and gives us the speed so
1148 * important for multicast video.
1151 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1153 struct iphdr *iph;
1154 struct iphdr *old_iph = ip_hdr(skb);
1156 skb_push(skb, sizeof(struct iphdr));
1157 skb->transport_header = skb->network_header;
1158 skb_reset_network_header(skb);
1159 iph = ip_hdr(skb);
1161 iph->version = 4;
1162 iph->tos = old_iph->tos;
1163 iph->ttl = old_iph->ttl;
1164 iph->frag_off = 0;
1165 iph->daddr = daddr;
1166 iph->saddr = saddr;
1167 iph->protocol = IPPROTO_IPIP;
1168 iph->ihl = 5;
1169 iph->tot_len = htons(skb->len);
1170 ip_select_ident(iph, skb->dst, NULL);
1171 ip_send_check(iph);
1173 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1174 nf_reset(skb);
1177 static inline int ipmr_forward_finish(struct sk_buff *skb)
1179 struct ip_options * opt = &(IPCB(skb)->opt);
1181 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1183 if (unlikely(opt->optlen))
1184 ip_forward_options(skb);
1186 return dst_output(skb);
1190 * Processing handlers for ipmr_forward
1193 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1195 const struct iphdr *iph = ip_hdr(skb);
1196 struct vif_device *vif = &vif_table[vifi];
1197 struct net_device *dev;
1198 struct rtable *rt;
1199 int encap = 0;
1201 if (vif->dev == NULL)
1202 goto out_free;
1204 #ifdef CONFIG_IP_PIMSM
1205 if (vif->flags & VIFF_REGISTER) {
1206 vif->pkt_out++;
1207 vif->bytes_out+=skb->len;
1208 vif->dev->stats.tx_bytes += skb->len;
1209 vif->dev->stats.tx_packets++;
1210 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1211 kfree_skb(skb);
1212 return;
1214 #endif
1216 if (vif->flags&VIFF_TUNNEL) {
1217 struct flowi fl = { .oif = vif->link,
1218 .nl_u = { .ip4_u =
1219 { .daddr = vif->remote,
1220 .saddr = vif->local,
1221 .tos = RT_TOS(iph->tos) } },
1222 .proto = IPPROTO_IPIP };
1223 if (ip_route_output_key(&init_net, &rt, &fl))
1224 goto out_free;
1225 encap = sizeof(struct iphdr);
1226 } else {
1227 struct flowi fl = { .oif = vif->link,
1228 .nl_u = { .ip4_u =
1229 { .daddr = iph->daddr,
1230 .tos = RT_TOS(iph->tos) } },
1231 .proto = IPPROTO_IPIP };
1232 if (ip_route_output_key(&init_net, &rt, &fl))
1233 goto out_free;
1236 dev = rt->u.dst.dev;
1238 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1239 /* Do not fragment multicasts. Alas, IPv4 does not
1240 allow to send ICMP, so that packets will disappear
1241 to blackhole.
1244 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1245 ip_rt_put(rt);
1246 goto out_free;
1249 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1251 if (skb_cow(skb, encap)) {
1252 ip_rt_put(rt);
1253 goto out_free;
1256 vif->pkt_out++;
1257 vif->bytes_out+=skb->len;
1259 dst_release(skb->dst);
1260 skb->dst = &rt->u.dst;
1261 ip_decrease_ttl(ip_hdr(skb));
1263 /* FIXME: forward and output firewalls used to be called here.
1264 * What do we do with netfilter? -- RR */
1265 if (vif->flags & VIFF_TUNNEL) {
1266 ip_encap(skb, vif->local, vif->remote);
1267 /* FIXME: extra output firewall step used to be here. --RR */
1268 vif->dev->stats.tx_packets++;
1269 vif->dev->stats.tx_bytes += skb->len;
1272 IPCB(skb)->flags |= IPSKB_FORWARDED;
1275 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1276 * not only before forwarding, but after forwarding on all output
1277 * interfaces. It is clear, if mrouter runs a multicasting
1278 * program, it should receive packets not depending to what interface
1279 * program is joined.
1280 * If we will not make it, the program will have to join on all
1281 * interfaces. On the other hand, multihoming host (or router, but
1282 * not mrouter) cannot join to more than one interface - it will
1283 * result in receiving multiple packets.
1285 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1286 ipmr_forward_finish);
1287 return;
1289 out_free:
1290 kfree_skb(skb);
1291 return;
1294 static int ipmr_find_vif(struct net_device *dev)
1296 int ct;
1297 for (ct=maxvif-1; ct>=0; ct--) {
1298 if (vif_table[ct].dev == dev)
1299 break;
1301 return ct;
1304 /* "local" means that we should preserve one skb (for local delivery) */
1306 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1308 int psend = -1;
1309 int vif, ct;
1311 vif = cache->mfc_parent;
1312 cache->mfc_un.res.pkt++;
1313 cache->mfc_un.res.bytes += skb->len;
1316 * Wrong interface: drop packet and (maybe) send PIM assert.
1318 if (vif_table[vif].dev != skb->dev) {
1319 int true_vifi;
1321 if (skb->rtable->fl.iif == 0) {
1322 /* It is our own packet, looped back.
1323 Very complicated situation...
1325 The best workaround until routing daemons will be
1326 fixed is not to redistribute packet, if it was
1327 send through wrong interface. It means, that
1328 multicast applications WILL NOT work for
1329 (S,G), which have default multicast route pointing
1330 to wrong oif. In any case, it is not a good
1331 idea to use multicasting applications on router.
1333 goto dont_forward;
1336 cache->mfc_un.res.wrong_if++;
1337 true_vifi = ipmr_find_vif(skb->dev);
1339 if (true_vifi >= 0 && mroute_do_assert &&
1340 /* pimsm uses asserts, when switching from RPT to SPT,
1341 so that we cannot check that packet arrived on an oif.
1342 It is bad, but otherwise we would need to move pretty
1343 large chunk of pimd to kernel. Ough... --ANK
1345 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1346 time_after(jiffies,
1347 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1348 cache->mfc_un.res.last_assert = jiffies;
1349 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1351 goto dont_forward;
1354 vif_table[vif].pkt_in++;
1355 vif_table[vif].bytes_in+=skb->len;
1358 * Forward the frame
1360 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1361 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1362 if (psend != -1) {
1363 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1364 if (skb2)
1365 ipmr_queue_xmit(skb2, cache, psend);
1367 psend=ct;
1370 if (psend != -1) {
1371 if (local) {
1372 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1373 if (skb2)
1374 ipmr_queue_xmit(skb2, cache, psend);
1375 } else {
1376 ipmr_queue_xmit(skb, cache, psend);
1377 return 0;
1381 dont_forward:
1382 if (!local)
1383 kfree_skb(skb);
1384 return 0;
1389 * Multicast packets for forwarding arrive here
1392 int ip_mr_input(struct sk_buff *skb)
1394 struct mfc_cache *cache;
1395 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1397 /* Packet is looped back after forward, it should not be
1398 forwarded second time, but still can be delivered locally.
1400 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1401 goto dont_forward;
1403 if (!local) {
1404 if (IPCB(skb)->opt.router_alert) {
1405 if (ip_call_ra_chain(skb))
1406 return 0;
1407 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1408 /* IGMPv1 (and broken IGMPv2 implementations sort of
1409 Cisco IOS <= 11.2(8)) do not put router alert
1410 option to IGMP packets destined to routable
1411 groups. It is very bad, because it means
1412 that we can forward NO IGMP messages.
1414 read_lock(&mrt_lock);
1415 if (mroute_socket) {
1416 nf_reset(skb);
1417 raw_rcv(mroute_socket, skb);
1418 read_unlock(&mrt_lock);
1419 return 0;
1421 read_unlock(&mrt_lock);
1425 read_lock(&mrt_lock);
1426 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1429 * No usable cache entry
1431 if (cache==NULL) {
1432 int vif;
1434 if (local) {
1435 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1436 ip_local_deliver(skb);
1437 if (skb2 == NULL) {
1438 read_unlock(&mrt_lock);
1439 return -ENOBUFS;
1441 skb = skb2;
1444 vif = ipmr_find_vif(skb->dev);
1445 if (vif >= 0) {
1446 int err = ipmr_cache_unresolved(vif, skb);
1447 read_unlock(&mrt_lock);
1449 return err;
1451 read_unlock(&mrt_lock);
1452 kfree_skb(skb);
1453 return -ENODEV;
1456 ip_mr_forward(skb, cache, local);
1458 read_unlock(&mrt_lock);
1460 if (local)
1461 return ip_local_deliver(skb);
1463 return 0;
1465 dont_forward:
1466 if (local)
1467 return ip_local_deliver(skb);
1468 kfree_skb(skb);
1469 return 0;
1472 #ifdef CONFIG_IP_PIMSM_V1
1474 * Handle IGMP messages of PIMv1
1477 int pim_rcv_v1(struct sk_buff * skb)
1479 struct igmphdr *pim;
1480 struct iphdr *encap;
1481 struct net_device *reg_dev = NULL;
1483 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1484 goto drop;
1486 pim = igmp_hdr(skb);
1488 if (!mroute_do_pim ||
1489 skb->len < sizeof(*pim) + sizeof(*encap) ||
1490 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1491 goto drop;
1493 encap = (struct iphdr *)(skb_transport_header(skb) +
1494 sizeof(struct igmphdr));
1496 Check that:
1497 a. packet is really destinted to a multicast group
1498 b. packet is not a NULL-REGISTER
1499 c. packet is not truncated
1501 if (!ipv4_is_multicast(encap->daddr) ||
1502 encap->tot_len == 0 ||
1503 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1504 goto drop;
1506 read_lock(&mrt_lock);
1507 if (reg_vif_num >= 0)
1508 reg_dev = vif_table[reg_vif_num].dev;
1509 if (reg_dev)
1510 dev_hold(reg_dev);
1511 read_unlock(&mrt_lock);
1513 if (reg_dev == NULL)
1514 goto drop;
1516 skb->mac_header = skb->network_header;
1517 skb_pull(skb, (u8*)encap - skb->data);
1518 skb_reset_network_header(skb);
1519 skb->dev = reg_dev;
1520 skb->protocol = htons(ETH_P_IP);
1521 skb->ip_summed = 0;
1522 skb->pkt_type = PACKET_HOST;
1523 dst_release(skb->dst);
1524 skb->dst = NULL;
1525 reg_dev->stats.rx_bytes += skb->len;
1526 reg_dev->stats.rx_packets++;
1527 nf_reset(skb);
1528 netif_rx(skb);
1529 dev_put(reg_dev);
1530 return 0;
1531 drop:
1532 kfree_skb(skb);
1533 return 0;
1535 #endif
1537 #ifdef CONFIG_IP_PIMSM_V2
1538 static int pim_rcv(struct sk_buff * skb)
1540 struct pimreghdr *pim;
1541 struct iphdr *encap;
1542 struct net_device *reg_dev = NULL;
1544 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1545 goto drop;
1547 pim = (struct pimreghdr *)skb_transport_header(skb);
1548 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1549 (pim->flags&PIM_NULL_REGISTER) ||
1550 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1551 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1552 goto drop;
1554 /* check if the inner packet is destined to mcast group */
1555 encap = (struct iphdr *)(skb_transport_header(skb) +
1556 sizeof(struct pimreghdr));
1557 if (!ipv4_is_multicast(encap->daddr) ||
1558 encap->tot_len == 0 ||
1559 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1560 goto drop;
1562 read_lock(&mrt_lock);
1563 if (reg_vif_num >= 0)
1564 reg_dev = vif_table[reg_vif_num].dev;
1565 if (reg_dev)
1566 dev_hold(reg_dev);
1567 read_unlock(&mrt_lock);
1569 if (reg_dev == NULL)
1570 goto drop;
1572 skb->mac_header = skb->network_header;
1573 skb_pull(skb, (u8*)encap - skb->data);
1574 skb_reset_network_header(skb);
1575 skb->dev = reg_dev;
1576 skb->protocol = htons(ETH_P_IP);
1577 skb->ip_summed = 0;
1578 skb->pkt_type = PACKET_HOST;
1579 dst_release(skb->dst);
1580 reg_dev->stats.rx_bytes += skb->len;
1581 reg_dev->stats.rx_packets++;
1582 skb->dst = NULL;
1583 nf_reset(skb);
1584 netif_rx(skb);
1585 dev_put(reg_dev);
1586 return 0;
1587 drop:
1588 kfree_skb(skb);
1589 return 0;
1591 #endif
1593 static int
1594 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1596 int ct;
1597 struct rtnexthop *nhp;
1598 struct net_device *dev = vif_table[c->mfc_parent].dev;
1599 u8 *b = skb_tail_pointer(skb);
1600 struct rtattr *mp_head;
1602 if (dev)
1603 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1605 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1607 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1608 if (c->mfc_un.res.ttls[ct] < 255) {
1609 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1610 goto rtattr_failure;
1611 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1612 nhp->rtnh_flags = 0;
1613 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1614 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1615 nhp->rtnh_len = sizeof(*nhp);
1618 mp_head->rta_type = RTA_MULTIPATH;
1619 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1620 rtm->rtm_type = RTN_MULTICAST;
1621 return 1;
1623 rtattr_failure:
1624 nlmsg_trim(skb, b);
1625 return -EMSGSIZE;
1628 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1630 int err;
1631 struct mfc_cache *cache;
1632 struct rtable *rt = skb->rtable;
1634 read_lock(&mrt_lock);
1635 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1637 if (cache==NULL) {
1638 struct sk_buff *skb2;
1639 struct iphdr *iph;
1640 struct net_device *dev;
1641 int vif;
1643 if (nowait) {
1644 read_unlock(&mrt_lock);
1645 return -EAGAIN;
1648 dev = skb->dev;
1649 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1650 read_unlock(&mrt_lock);
1651 return -ENODEV;
1653 skb2 = skb_clone(skb, GFP_ATOMIC);
1654 if (!skb2) {
1655 read_unlock(&mrt_lock);
1656 return -ENOMEM;
1659 skb_push(skb2, sizeof(struct iphdr));
1660 skb_reset_network_header(skb2);
1661 iph = ip_hdr(skb2);
1662 iph->ihl = sizeof(struct iphdr) >> 2;
1663 iph->saddr = rt->rt_src;
1664 iph->daddr = rt->rt_dst;
1665 iph->version = 0;
1666 err = ipmr_cache_unresolved(vif, skb2);
1667 read_unlock(&mrt_lock);
1668 return err;
1671 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1672 cache->mfc_flags |= MFC_NOTIFY;
1673 err = ipmr_fill_mroute(skb, cache, rtm);
1674 read_unlock(&mrt_lock);
1675 return err;
1678 #ifdef CONFIG_PROC_FS
1680 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1682 struct ipmr_vif_iter {
1683 int ct;
1686 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1687 loff_t pos)
1689 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1690 if (!VIF_EXISTS(iter->ct))
1691 continue;
1692 if (pos-- == 0)
1693 return &vif_table[iter->ct];
1695 return NULL;
1698 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1699 __acquires(mrt_lock)
1701 read_lock(&mrt_lock);
1702 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1703 : SEQ_START_TOKEN;
1706 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1708 struct ipmr_vif_iter *iter = seq->private;
1710 ++*pos;
1711 if (v == SEQ_START_TOKEN)
1712 return ipmr_vif_seq_idx(iter, 0);
1714 while (++iter->ct < maxvif) {
1715 if (!VIF_EXISTS(iter->ct))
1716 continue;
1717 return &vif_table[iter->ct];
1719 return NULL;
1722 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1723 __releases(mrt_lock)
1725 read_unlock(&mrt_lock);
1728 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1730 if (v == SEQ_START_TOKEN) {
1731 seq_puts(seq,
1732 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1733 } else {
1734 const struct vif_device *vif = v;
1735 const char *name = vif->dev ? vif->dev->name : "none";
1737 seq_printf(seq,
1738 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1739 vif - vif_table,
1740 name, vif->bytes_in, vif->pkt_in,
1741 vif->bytes_out, vif->pkt_out,
1742 vif->flags, vif->local, vif->remote);
1744 return 0;
1747 static const struct seq_operations ipmr_vif_seq_ops = {
1748 .start = ipmr_vif_seq_start,
1749 .next = ipmr_vif_seq_next,
1750 .stop = ipmr_vif_seq_stop,
1751 .show = ipmr_vif_seq_show,
1754 static int ipmr_vif_open(struct inode *inode, struct file *file)
1756 return seq_open_private(file, &ipmr_vif_seq_ops,
1757 sizeof(struct ipmr_vif_iter));
1760 static const struct file_operations ipmr_vif_fops = {
1761 .owner = THIS_MODULE,
1762 .open = ipmr_vif_open,
1763 .read = seq_read,
1764 .llseek = seq_lseek,
1765 .release = seq_release_private,
1768 struct ipmr_mfc_iter {
1769 struct mfc_cache **cache;
1770 int ct;
1774 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1776 struct mfc_cache *mfc;
1778 it->cache = mfc_cache_array;
1779 read_lock(&mrt_lock);
1780 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1781 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1782 if (pos-- == 0)
1783 return mfc;
1784 read_unlock(&mrt_lock);
1786 it->cache = &mfc_unres_queue;
1787 spin_lock_bh(&mfc_unres_lock);
1788 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1789 if (pos-- == 0)
1790 return mfc;
1791 spin_unlock_bh(&mfc_unres_lock);
1793 it->cache = NULL;
1794 return NULL;
1798 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1800 struct ipmr_mfc_iter *it = seq->private;
1801 it->cache = NULL;
1802 it->ct = 0;
1803 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1804 : SEQ_START_TOKEN;
1807 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1809 struct mfc_cache *mfc = v;
1810 struct ipmr_mfc_iter *it = seq->private;
1812 ++*pos;
1814 if (v == SEQ_START_TOKEN)
1815 return ipmr_mfc_seq_idx(seq->private, 0);
1817 if (mfc->next)
1818 return mfc->next;
1820 if (it->cache == &mfc_unres_queue)
1821 goto end_of_list;
1823 BUG_ON(it->cache != mfc_cache_array);
1825 while (++it->ct < MFC_LINES) {
1826 mfc = mfc_cache_array[it->ct];
1827 if (mfc)
1828 return mfc;
1831 /* exhausted cache_array, show unresolved */
1832 read_unlock(&mrt_lock);
1833 it->cache = &mfc_unres_queue;
1834 it->ct = 0;
1836 spin_lock_bh(&mfc_unres_lock);
1837 mfc = mfc_unres_queue;
1838 if (mfc)
1839 return mfc;
1841 end_of_list:
1842 spin_unlock_bh(&mfc_unres_lock);
1843 it->cache = NULL;
1845 return NULL;
1848 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1850 struct ipmr_mfc_iter *it = seq->private;
1852 if (it->cache == &mfc_unres_queue)
1853 spin_unlock_bh(&mfc_unres_lock);
1854 else if (it->cache == mfc_cache_array)
1855 read_unlock(&mrt_lock);
1858 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1860 int n;
1862 if (v == SEQ_START_TOKEN) {
1863 seq_puts(seq,
1864 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1865 } else {
1866 const struct mfc_cache *mfc = v;
1867 const struct ipmr_mfc_iter *it = seq->private;
1869 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1870 (unsigned long) mfc->mfc_mcastgrp,
1871 (unsigned long) mfc->mfc_origin,
1872 mfc->mfc_parent,
1873 mfc->mfc_un.res.pkt,
1874 mfc->mfc_un.res.bytes,
1875 mfc->mfc_un.res.wrong_if);
1877 if (it->cache != &mfc_unres_queue) {
1878 for (n = mfc->mfc_un.res.minvif;
1879 n < mfc->mfc_un.res.maxvif; n++ ) {
1880 if (VIF_EXISTS(n)
1881 && mfc->mfc_un.res.ttls[n] < 255)
1882 seq_printf(seq,
1883 " %2d:%-3d",
1884 n, mfc->mfc_un.res.ttls[n]);
1887 seq_putc(seq, '\n');
1889 return 0;
1892 static const struct seq_operations ipmr_mfc_seq_ops = {
1893 .start = ipmr_mfc_seq_start,
1894 .next = ipmr_mfc_seq_next,
1895 .stop = ipmr_mfc_seq_stop,
1896 .show = ipmr_mfc_seq_show,
1899 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1901 return seq_open_private(file, &ipmr_mfc_seq_ops,
1902 sizeof(struct ipmr_mfc_iter));
1905 static const struct file_operations ipmr_mfc_fops = {
1906 .owner = THIS_MODULE,
1907 .open = ipmr_mfc_open,
1908 .read = seq_read,
1909 .llseek = seq_lseek,
1910 .release = seq_release_private,
1912 #endif
1914 #ifdef CONFIG_IP_PIMSM_V2
1915 static struct net_protocol pim_protocol = {
1916 .handler = pim_rcv,
1918 #endif
1922 * Setup for IP multicast routing
1925 int __init ip_mr_init(void)
1927 int err;
1929 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1930 sizeof(struct mfc_cache),
1931 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1932 NULL);
1933 if (!mrt_cachep)
1934 return -ENOMEM;
1936 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1937 err = register_netdevice_notifier(&ip_mr_notifier);
1938 if (err)
1939 goto reg_notif_fail;
1940 #ifdef CONFIG_PROC_FS
1941 err = -ENOMEM;
1942 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1943 goto proc_vif_fail;
1944 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1945 goto proc_cache_fail;
1946 #endif
1947 return 0;
1948 reg_notif_fail:
1949 kmem_cache_destroy(mrt_cachep);
1950 #ifdef CONFIG_PROC_FS
1951 proc_vif_fail:
1952 unregister_netdevice_notifier(&ip_mr_notifier);
1953 proc_cache_fail:
1954 proc_net_remove(&init_net, "ip_mr_vif");
1955 #endif
1956 return err;