Merge with Linux 2.5.74.
[linux-2.6/linux-mips.git] / net / ipv4 / ipmr.c
blobb9210beb3675d8f7f2b6fed0db54265db96c9bcf
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
14 * Fixes:
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow.
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/sock.h>
55 #include <net/icmp.h>
56 #include <net/udp.h>
57 #include <net/raw.h>
58 #include <linux/notifier.h>
59 #include <linux/if_arp.h>
60 #include <linux/netfilter_ipv4.h>
61 #include <net/ipip.h>
62 #include <net/checksum.h>
64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65 #define CONFIG_IP_PIMSM 1
66 #endif
68 static struct sock *mroute_socket;
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
78 * Multicast router control variables
81 static struct vif_device vif_table[MAXVIFS]; /* Devices */
82 static int maxvif;
84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
86 static int mroute_do_assert; /* Set in PIM assert */
87 static int mroute_do_pim;
89 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
91 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
92 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
94 /* Special spinlock for queue of unresolved entries */
95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
97 /* We return to original Alan's scheme. Hash table of resolved
98 entries is changed only in process context and protected
99 with weak lock mrt_lock. Queue of unresolved entries is protected
100 with strong spinlock mfc_unres_lock.
102 In this case data path is free of exclusive locks at all.
105 static kmem_cache_t *mrt_cachep;
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
111 static struct inet_protocol pim_protocol;
113 static struct timer_list ipmr_expire_timer;
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
117 static
118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
120 struct net_device *dev;
122 dev = __dev_get_by_name("tunl0");
124 if (dev) {
125 int err;
126 struct ifreq ifr;
127 mm_segment_t oldfs;
128 struct ip_tunnel_parm p;
129 struct in_device *in_dev;
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (void*)&p;
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142 set_fs(oldfs);
144 dev = NULL;
146 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147 dev->flags |= IFF_MULTICAST;
149 in_dev = __in_dev_get(dev);
150 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151 goto failure;
152 in_dev->cnf.rp_filter = 0;
154 if (dev_open(dev))
155 goto failure;
158 return dev;
160 failure:
161 /* allow the register to be completed before unregistering. */
162 rtnl_unlock();
163 rtnl_lock();
165 unregister_netdevice(dev);
166 return NULL;
169 #ifdef CONFIG_IP_PIMSM
171 static int reg_vif_num = -1;
173 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
175 read_lock(&mrt_lock);
176 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
177 ((struct net_device_stats*)dev->priv)->tx_packets++;
178 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
179 read_unlock(&mrt_lock);
180 kfree_skb(skb);
181 return 0;
184 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
186 return (struct net_device_stats*)dev->priv;
189 static void reg_vif_setup(struct net_device *dev)
191 dev->type = ARPHRD_PIMREG;
192 dev->mtu = 1500 - sizeof(struct iphdr) - 8;
193 dev->flags = IFF_NOARP;
194 dev->hard_start_xmit = reg_vif_xmit;
195 dev->get_stats = reg_vif_get_stats;
196 dev->destructor = (void (*)(struct net_device *)) kfree;
199 static struct net_device *ipmr_reg_vif(void)
201 struct net_device *dev;
202 struct in_device *in_dev;
204 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
205 reg_vif_setup);
207 if (register_netdevice(dev)) {
208 kfree(dev);
209 return NULL;
211 dev->iflink = 0;
213 if ((in_dev = inetdev_init(dev)) == NULL)
214 goto failure;
216 in_dev->cnf.rp_filter = 0;
218 if (dev_open(dev))
219 goto failure;
221 return dev;
223 failure:
224 /* allow the register to be completed before unregistering. */
225 rtnl_unlock();
226 rtnl_lock();
228 unregister_netdevice(dev);
229 return NULL;
231 #endif
234 * Delete a VIF entry
237 static int vif_delete(int vifi)
239 struct vif_device *v;
240 struct net_device *dev;
241 struct in_device *in_dev;
243 if (vifi < 0 || vifi >= maxvif)
244 return -EADDRNOTAVAIL;
246 v = &vif_table[vifi];
248 write_lock_bh(&mrt_lock);
249 dev = v->dev;
250 v->dev = NULL;
252 if (!dev) {
253 write_unlock_bh(&mrt_lock);
254 return -EADDRNOTAVAIL;
257 #ifdef CONFIG_IP_PIMSM
258 if (vifi == reg_vif_num)
259 reg_vif_num = -1;
260 #endif
262 if (vifi+1 == maxvif) {
263 int tmp;
264 for (tmp=vifi-1; tmp>=0; tmp--) {
265 if (VIF_EXISTS(tmp))
266 break;
268 maxvif = tmp+1;
271 write_unlock_bh(&mrt_lock);
273 dev_set_allmulti(dev, -1);
275 if ((in_dev = __in_dev_get(dev)) != NULL) {
276 in_dev->cnf.mc_forwarding--;
277 ip_rt_multicast_event(in_dev);
280 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
281 unregister_netdevice(dev);
283 dev_put(dev);
284 return 0;
287 /* Destroy an unresolved cache entry, killing queued skbs
288 and reporting error to netlink readers.
291 static void ipmr_destroy_unres(struct mfc_cache *c)
293 struct sk_buff *skb;
295 atomic_dec(&cache_resolve_queue_len);
297 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
298 if (skb->nh.iph->version == 0) {
299 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300 nlh->nlmsg_type = NLMSG_ERROR;
301 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302 skb_trim(skb, nlh->nlmsg_len);
303 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
304 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
305 } else
306 kfree_skb(skb);
309 kmem_cache_free(mrt_cachep, c);
313 /* Single timer process for all the unresolved queue. */
315 static void ipmr_expire_process(unsigned long dummy)
317 unsigned long now;
318 unsigned long expires;
319 struct mfc_cache *c, **cp;
321 if (!spin_trylock(&mfc_unres_lock)) {
322 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
323 return;
326 if (atomic_read(&cache_resolve_queue_len) == 0)
327 goto out;
329 now = jiffies;
330 expires = 10*HZ;
331 cp = &mfc_unres_queue;
333 while ((c=*cp) != NULL) {
334 if (time_after(c->mfc_un.unres.expires, now)) {
335 unsigned long interval = c->mfc_un.unres.expires - now;
336 if (interval < expires)
337 expires = interval;
338 cp = &c->next;
339 continue;
342 *cp = c->next;
344 ipmr_destroy_unres(c);
347 if (atomic_read(&cache_resolve_queue_len))
348 mod_timer(&ipmr_expire_timer, jiffies + expires);
350 out:
351 spin_unlock(&mfc_unres_lock);
354 /* Fill oifs list. It is called under write locked mrt_lock. */
356 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
358 int vifi;
360 cache->mfc_un.res.minvif = MAXVIFS;
361 cache->mfc_un.res.maxvif = 0;
362 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
364 for (vifi=0; vifi<maxvif; vifi++) {
365 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
366 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
367 if (cache->mfc_un.res.minvif > vifi)
368 cache->mfc_un.res.minvif = vifi;
369 if (cache->mfc_un.res.maxvif <= vifi)
370 cache->mfc_un.res.maxvif = vifi + 1;
375 static int vif_add(struct vifctl *vifc, int mrtsock)
377 int vifi = vifc->vifc_vifi;
378 struct vif_device *v = &vif_table[vifi];
379 struct net_device *dev;
380 struct in_device *in_dev;
382 /* Is vif busy ? */
383 if (VIF_EXISTS(vifi))
384 return -EADDRINUSE;
386 switch (vifc->vifc_flags) {
387 #ifdef CONFIG_IP_PIMSM
388 case VIFF_REGISTER:
390 * Special Purpose VIF in PIM
391 * All the packets will be sent to the daemon
393 if (reg_vif_num >= 0)
394 return -EADDRINUSE;
395 dev = ipmr_reg_vif();
396 if (!dev)
397 return -ENOBUFS;
398 break;
399 #endif
400 case VIFF_TUNNEL:
401 dev = ipmr_new_tunnel(vifc);
402 if (!dev)
403 return -ENOBUFS;
404 break;
405 case 0:
406 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
407 if (!dev)
408 return -EADDRNOTAVAIL;
409 __dev_put(dev);
410 break;
411 default:
412 return -EINVAL;
415 if ((in_dev = __in_dev_get(dev)) == NULL)
416 return -EADDRNOTAVAIL;
417 in_dev->cnf.mc_forwarding++;
418 dev_set_allmulti(dev, +1);
419 ip_rt_multicast_event(in_dev);
422 * Fill in the VIF structures
424 v->rate_limit=vifc->vifc_rate_limit;
425 v->local=vifc->vifc_lcl_addr.s_addr;
426 v->remote=vifc->vifc_rmt_addr.s_addr;
427 v->flags=vifc->vifc_flags;
428 if (!mrtsock)
429 v->flags |= VIFF_STATIC;
430 v->threshold=vifc->vifc_threshold;
431 v->bytes_in = 0;
432 v->bytes_out = 0;
433 v->pkt_in = 0;
434 v->pkt_out = 0;
435 v->link = dev->ifindex;
436 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
437 v->link = dev->iflink;
439 /* And finish update writing critical data */
440 write_lock_bh(&mrt_lock);
441 dev_hold(dev);
442 v->dev=dev;
443 #ifdef CONFIG_IP_PIMSM
444 if (v->flags&VIFF_REGISTER)
445 reg_vif_num = vifi;
446 #endif
447 if (vifi+1 > maxvif)
448 maxvif = vifi+1;
449 write_unlock_bh(&mrt_lock);
450 return 0;
453 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
455 int line=MFC_HASH(mcastgrp,origin);
456 struct mfc_cache *c;
458 for (c=mfc_cache_array[line]; c; c = c->next) {
459 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
460 break;
462 return c;
466 * Allocate a multicast cache entry
468 static struct mfc_cache *ipmr_cache_alloc(void)
470 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
471 if(c==NULL)
472 return NULL;
473 memset(c, 0, sizeof(*c));
474 c->mfc_un.res.minvif = MAXVIFS;
475 return c;
478 static struct mfc_cache *ipmr_cache_alloc_unres(void)
480 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
481 if(c==NULL)
482 return NULL;
483 memset(c, 0, sizeof(*c));
484 skb_queue_head_init(&c->mfc_un.unres.unresolved);
485 c->mfc_un.unres.expires = jiffies + 10*HZ;
486 return c;
490 * A cache entry has gone into a resolved state from queued
493 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
495 struct sk_buff *skb;
498 * Play the pending entries through our router
501 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
502 if (skb->nh.iph->version == 0) {
503 int err;
504 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
506 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
507 nlh->nlmsg_len = skb->tail - (u8*)nlh;
508 } else {
509 nlh->nlmsg_type = NLMSG_ERROR;
510 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
511 skb_trim(skb, nlh->nlmsg_len);
512 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
514 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
515 } else
516 ip_mr_forward(skb, c, 0);
521 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
522 * expects the following bizarre scheme.
524 * Called under mrt_lock.
527 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
529 struct sk_buff *skb;
530 int ihl = pkt->nh.iph->ihl<<2;
531 struct igmphdr *igmp;
532 struct igmpmsg *msg;
533 int ret;
535 #ifdef CONFIG_IP_PIMSM
536 if (assert == IGMPMSG_WHOLEPKT)
537 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
538 else
539 #endif
540 skb = alloc_skb(128, GFP_ATOMIC);
542 if(!skb)
543 return -ENOBUFS;
545 #ifdef CONFIG_IP_PIMSM
546 if (assert == IGMPMSG_WHOLEPKT) {
547 /* Ugly, but we have no choice with this interface.
548 Duplicate old header, fix ihl, length etc.
549 And all this only to mangle msg->im_msgtype and
550 to set msg->im_mbz to "mbz" :-)
552 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
553 skb->nh.raw = skb->h.raw = (u8*)msg;
554 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
555 msg->im_msgtype = IGMPMSG_WHOLEPKT;
556 msg->im_mbz = 0;
557 msg->im_vif = reg_vif_num;
558 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
559 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
560 } else
561 #endif
565 * Copy the IP header
568 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
569 memcpy(skb->data,pkt->data,ihl);
570 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
571 msg = (struct igmpmsg*)skb->nh.iph;
572 msg->im_vif = vifi;
573 skb->dst = dst_clone(pkt->dst);
576 * Add our header
579 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
580 igmp->type =
581 msg->im_msgtype = assert;
582 igmp->code = 0;
583 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
584 skb->h.raw = skb->nh.raw;
587 if (mroute_socket == NULL) {
588 kfree_skb(skb);
589 return -EINVAL;
593 * Deliver to mrouted
595 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
596 if (net_ratelimit())
597 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
598 kfree_skb(skb);
601 return ret;
605 * Queue a packet for resolution. It gets locked cache entry!
608 static int
609 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
611 int err;
612 struct mfc_cache *c;
614 spin_lock_bh(&mfc_unres_lock);
615 for (c=mfc_unres_queue; c; c=c->next) {
616 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
617 c->mfc_origin == skb->nh.iph->saddr)
618 break;
621 if (c == NULL) {
623 * Create a new entry if allowable
626 if (atomic_read(&cache_resolve_queue_len)>=10 ||
627 (c=ipmr_cache_alloc_unres())==NULL) {
628 spin_unlock_bh(&mfc_unres_lock);
630 kfree_skb(skb);
631 return -ENOBUFS;
635 * Fill in the new cache entry
637 c->mfc_parent=-1;
638 c->mfc_origin=skb->nh.iph->saddr;
639 c->mfc_mcastgrp=skb->nh.iph->daddr;
642 * Reflect first query at mrouted.
644 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
645 /* If the report failed throw the cache entry
646 out - Brad Parker
648 spin_unlock_bh(&mfc_unres_lock);
650 kmem_cache_free(mrt_cachep, c);
651 kfree_skb(skb);
652 return err;
655 atomic_inc(&cache_resolve_queue_len);
656 c->next = mfc_unres_queue;
657 mfc_unres_queue = c;
659 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
663 * See if we can append the packet
665 if (c->mfc_un.unres.unresolved.qlen>3) {
666 kfree_skb(skb);
667 err = -ENOBUFS;
668 } else {
669 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
670 err = 0;
673 spin_unlock_bh(&mfc_unres_lock);
674 return err;
678 * MFC cache manipulation by user space mroute daemon
681 static int ipmr_mfc_delete(struct mfcctl *mfc)
683 int line;
684 struct mfc_cache *c, **cp;
686 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
688 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
689 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
690 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
691 write_lock_bh(&mrt_lock);
692 *cp = c->next;
693 write_unlock_bh(&mrt_lock);
695 kmem_cache_free(mrt_cachep, c);
696 return 0;
699 return -ENOENT;
702 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
704 int line;
705 struct mfc_cache *uc, *c, **cp;
707 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
709 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
710 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
711 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
712 break;
715 if (c != NULL) {
716 write_lock_bh(&mrt_lock);
717 c->mfc_parent = mfc->mfcc_parent;
718 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
719 if (!mrtsock)
720 c->mfc_flags |= MFC_STATIC;
721 write_unlock_bh(&mrt_lock);
722 return 0;
725 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
726 return -EINVAL;
728 c=ipmr_cache_alloc();
729 if (c==NULL)
730 return -ENOMEM;
732 c->mfc_origin=mfc->mfcc_origin.s_addr;
733 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
734 c->mfc_parent=mfc->mfcc_parent;
735 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
736 if (!mrtsock)
737 c->mfc_flags |= MFC_STATIC;
739 write_lock_bh(&mrt_lock);
740 c->next = mfc_cache_array[line];
741 mfc_cache_array[line] = c;
742 write_unlock_bh(&mrt_lock);
745 * Check to see if we resolved a queued list. If so we
746 * need to send on the frames and tidy up.
748 spin_lock_bh(&mfc_unres_lock);
749 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
750 cp = &uc->next) {
751 if (uc->mfc_origin == c->mfc_origin &&
752 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
753 *cp = uc->next;
754 if (atomic_dec_and_test(&cache_resolve_queue_len))
755 del_timer(&ipmr_expire_timer);
756 break;
759 spin_unlock_bh(&mfc_unres_lock);
761 if (uc) {
762 ipmr_cache_resolve(uc, c);
763 kmem_cache_free(mrt_cachep, uc);
765 return 0;
769 * Close the multicast socket, and clear the vif tables etc
772 static void mroute_clean_tables(struct sock *sk)
774 int i;
777 * Shut down all active vif entries
779 for(i=0; i<maxvif; i++) {
780 if (!(vif_table[i].flags&VIFF_STATIC))
781 vif_delete(i);
785 * Wipe the cache
787 for (i=0;i<MFC_LINES;i++) {
788 struct mfc_cache *c, **cp;
790 cp = &mfc_cache_array[i];
791 while ((c = *cp) != NULL) {
792 if (c->mfc_flags&MFC_STATIC) {
793 cp = &c->next;
794 continue;
796 write_lock_bh(&mrt_lock);
797 *cp = c->next;
798 write_unlock_bh(&mrt_lock);
800 kmem_cache_free(mrt_cachep, c);
804 if (atomic_read(&cache_resolve_queue_len) != 0) {
805 struct mfc_cache *c;
807 spin_lock_bh(&mfc_unres_lock);
808 while (mfc_unres_queue != NULL) {
809 c = mfc_unres_queue;
810 mfc_unres_queue = c->next;
811 spin_unlock_bh(&mfc_unres_lock);
813 ipmr_destroy_unres(c);
815 spin_lock_bh(&mfc_unres_lock);
817 spin_unlock_bh(&mfc_unres_lock);
821 static void mrtsock_destruct(struct sock *sk)
823 rtnl_lock();
824 if (sk == mroute_socket) {
825 ipv4_devconf.mc_forwarding--;
827 write_lock_bh(&mrt_lock);
828 mroute_socket=NULL;
829 write_unlock_bh(&mrt_lock);
831 mroute_clean_tables(sk);
833 rtnl_unlock();
837 * Socket options and virtual interface manipulation. The whole
838 * virtual interface system is a complete heap, but unfortunately
839 * that's how BSD mrouted happens to think. Maybe one day with a proper
840 * MOSPF/PIM router set up we can clean this up.
843 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
845 int ret;
846 struct vifctl vif;
847 struct mfcctl mfc;
849 if(optname!=MRT_INIT)
851 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
852 return -EACCES;
855 switch(optname)
857 case MRT_INIT:
858 if (sk->sk_type != SOCK_RAW ||
859 inet_sk(sk)->num != IPPROTO_IGMP)
860 return -EOPNOTSUPP;
861 if(optlen!=sizeof(int))
862 return -ENOPROTOOPT;
864 rtnl_lock();
865 if (mroute_socket) {
866 rtnl_unlock();
867 return -EADDRINUSE;
870 ret = ip_ra_control(sk, 1, mrtsock_destruct);
871 if (ret == 0) {
872 write_lock_bh(&mrt_lock);
873 mroute_socket=sk;
874 write_unlock_bh(&mrt_lock);
876 ipv4_devconf.mc_forwarding++;
878 rtnl_unlock();
879 return ret;
880 case MRT_DONE:
881 if (sk!=mroute_socket)
882 return -EACCES;
883 return ip_ra_control(sk, 0, NULL);
884 case MRT_ADD_VIF:
885 case MRT_DEL_VIF:
886 if(optlen!=sizeof(vif))
887 return -EINVAL;
888 if (copy_from_user(&vif,optval,sizeof(vif)))
889 return -EFAULT;
890 if(vif.vifc_vifi >= MAXVIFS)
891 return -ENFILE;
892 rtnl_lock();
893 if (optname==MRT_ADD_VIF) {
894 ret = vif_add(&vif, sk==mroute_socket);
895 } else {
896 ret = vif_delete(vif.vifc_vifi);
898 rtnl_unlock();
899 return ret;
902 * Manipulate the forwarding caches. These live
903 * in a sort of kernel/user symbiosis.
905 case MRT_ADD_MFC:
906 case MRT_DEL_MFC:
907 if(optlen!=sizeof(mfc))
908 return -EINVAL;
909 if (copy_from_user(&mfc,optval, sizeof(mfc)))
910 return -EFAULT;
911 rtnl_lock();
912 if (optname==MRT_DEL_MFC)
913 ret = ipmr_mfc_delete(&mfc);
914 else
915 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
916 rtnl_unlock();
917 return ret;
919 * Control PIM assert.
921 case MRT_ASSERT:
923 int v;
924 if(get_user(v,(int *)optval))
925 return -EFAULT;
926 mroute_do_assert=(v)?1:0;
927 return 0;
929 #ifdef CONFIG_IP_PIMSM
930 case MRT_PIM:
932 int v, ret;
933 if(get_user(v,(int *)optval))
934 return -EFAULT;
935 v = (v)?1:0;
936 rtnl_lock();
937 ret = 0;
938 if (v != mroute_do_pim) {
939 mroute_do_pim = v;
940 mroute_do_assert = v;
941 #ifdef CONFIG_IP_PIMSM_V2
942 if (mroute_do_pim)
943 ret = inet_add_protocol(&pim_protocol,
944 IPPROTO_PIM);
945 else
946 ret = inet_del_protocol(&pim_protocol,
947 IPPROTO_PIM);
948 if (ret < 0)
949 ret = -EAGAIN;
950 #endif
952 rtnl_unlock();
953 return ret;
955 #endif
957 * Spurious command, or MRT_VERSION which you cannot
958 * set.
960 default:
961 return -ENOPROTOOPT;
966 * Getsock opt support for the multicast routing system.
969 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
971 int olr;
972 int val;
974 if(optname!=MRT_VERSION &&
975 #ifdef CONFIG_IP_PIMSM
976 optname!=MRT_PIM &&
977 #endif
978 optname!=MRT_ASSERT)
979 return -ENOPROTOOPT;
981 if (get_user(olr, optlen))
982 return -EFAULT;
984 olr = min_t(unsigned int, olr, sizeof(int));
985 if (olr < 0)
986 return -EINVAL;
988 if(put_user(olr,optlen))
989 return -EFAULT;
990 if(optname==MRT_VERSION)
991 val=0x0305;
992 #ifdef CONFIG_IP_PIMSM
993 else if(optname==MRT_PIM)
994 val=mroute_do_pim;
995 #endif
996 else
997 val=mroute_do_assert;
998 if(copy_to_user(optval,&val,olr))
999 return -EFAULT;
1000 return 0;
1004 * The IP multicast ioctl support routines.
1007 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1009 struct sioc_sg_req sr;
1010 struct sioc_vif_req vr;
1011 struct vif_device *vif;
1012 struct mfc_cache *c;
1014 switch(cmd)
1016 case SIOCGETVIFCNT:
1017 if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1018 return -EFAULT;
1019 if(vr.vifi>=maxvif)
1020 return -EINVAL;
1021 read_lock(&mrt_lock);
1022 vif=&vif_table[vr.vifi];
1023 if(VIF_EXISTS(vr.vifi)) {
1024 vr.icount=vif->pkt_in;
1025 vr.ocount=vif->pkt_out;
1026 vr.ibytes=vif->bytes_in;
1027 vr.obytes=vif->bytes_out;
1028 read_unlock(&mrt_lock);
1030 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1031 return -EFAULT;
1032 return 0;
1034 read_unlock(&mrt_lock);
1035 return -EADDRNOTAVAIL;
1036 case SIOCGETSGCNT:
1037 if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1038 return -EFAULT;
1040 read_lock(&mrt_lock);
1041 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1042 if (c) {
1043 sr.pktcnt = c->mfc_un.res.pkt;
1044 sr.bytecnt = c->mfc_un.res.bytes;
1045 sr.wrong_if = c->mfc_un.res.wrong_if;
1046 read_unlock(&mrt_lock);
1048 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1049 return -EFAULT;
1050 return 0;
1052 read_unlock(&mrt_lock);
1053 return -EADDRNOTAVAIL;
1054 default:
1055 return -ENOIOCTLCMD;
1060 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1062 struct vif_device *v;
1063 int ct;
1064 if (event != NETDEV_UNREGISTER)
1065 return NOTIFY_DONE;
1066 v=&vif_table[0];
1067 for(ct=0;ct<maxvif;ct++,v++) {
1068 if (v->dev==ptr)
1069 vif_delete(ct);
1071 return NOTIFY_DONE;
1075 static struct notifier_block ip_mr_notifier={
1076 .notifier_call = ipmr_device_event,
1080 * Encapsulate a packet by attaching a valid IPIP header to it.
1081 * This avoids tunnel drivers and other mess and gives us the speed so
1082 * important for multicast video.
1085 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1087 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1089 iph->version = 4;
1090 iph->tos = skb->nh.iph->tos;
1091 iph->ttl = skb->nh.iph->ttl;
1092 iph->frag_off = 0;
1093 iph->daddr = daddr;
1094 iph->saddr = saddr;
1095 iph->protocol = IPPROTO_IPIP;
1096 iph->ihl = 5;
1097 iph->tot_len = htons(skb->len);
1098 ip_select_ident(iph, skb->dst, NULL);
1099 ip_send_check(iph);
1101 skb->h.ipiph = skb->nh.iph;
1102 skb->nh.iph = iph;
1103 #ifdef CONFIG_NETFILTER
1104 nf_conntrack_put(skb->nfct);
1105 skb->nfct = NULL;
1106 #endif
1109 static inline int ipmr_forward_finish(struct sk_buff *skb)
1111 struct dst_entry *dst = skb->dst;
1113 if (skb->len <= dst_pmtu(dst))
1114 return dst_output(skb);
1115 else
1116 return ip_fragment(skb, dst_output);
1120 * Processing handlers for ipmr_forward
1123 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1124 int vifi, int last)
1126 struct iphdr *iph = skb->nh.iph;
1127 struct vif_device *vif = &vif_table[vifi];
1128 struct net_device *dev;
1129 struct rtable *rt;
1130 int encap = 0;
1131 struct sk_buff *skb2;
1133 if (vif->dev == NULL)
1134 return;
1136 #ifdef CONFIG_IP_PIMSM
1137 if (vif->flags & VIFF_REGISTER) {
1138 vif->pkt_out++;
1139 vif->bytes_out+=skb->len;
1140 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1141 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1142 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1143 return;
1145 #endif
1147 if (vif->flags&VIFF_TUNNEL) {
1148 struct flowi fl = { .oif = vif->link,
1149 .nl_u = { .ip4_u =
1150 { .daddr = vif->remote,
1151 .saddr = vif->local,
1152 .tos = RT_TOS(iph->tos) } },
1153 .proto = IPPROTO_IPIP };
1154 if (ip_route_output_key(&rt, &fl))
1155 return;
1156 encap = sizeof(struct iphdr);
1157 } else {
1158 struct flowi fl = { .oif = vif->link,
1159 .nl_u = { .ip4_u =
1160 { .daddr = iph->daddr,
1161 .tos = RT_TOS(iph->tos) } },
1162 .proto = IPPROTO_IPIP };
1163 if (ip_route_output_key(&rt, &fl))
1164 return;
1167 dev = rt->u.dst.dev;
1169 if (skb->len+encap > dst_pmtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1170 /* Do not fragment multicasts. Alas, IPv4 does not
1171 allow to send ICMP, so that packets will disappear
1172 to blackhole.
1175 IP_INC_STATS_BH(IpFragFails);
1176 ip_rt_put(rt);
1177 return;
1180 encap += LL_RESERVED_SPACE(dev);
1182 if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1183 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1184 else if (atomic_read(&skb->users) != 1)
1185 skb2 = skb_clone(skb, GFP_ATOMIC);
1186 else {
1187 atomic_inc(&skb->users);
1188 skb2 = skb;
1191 if (skb2 == NULL) {
1192 ip_rt_put(rt);
1193 return;
1196 vif->pkt_out++;
1197 vif->bytes_out+=skb->len;
1199 dst_release(skb2->dst);
1200 skb2->dst = &rt->u.dst;
1201 iph = skb2->nh.iph;
1202 ip_decrease_ttl(iph);
1204 /* FIXME: forward and output firewalls used to be called here.
1205 * What do we do with netfilter? -- RR */
1206 if (vif->flags & VIFF_TUNNEL) {
1207 ip_encap(skb2, vif->local, vif->remote);
1208 /* FIXME: extra output firewall step used to be here. --RR */
1209 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1210 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1213 IPCB(skb2)->flags |= IPSKB_FORWARDED;
1216 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1217 * not only before forwarding, but after forwarding on all output
1218 * interfaces. It is clear, if mrouter runs a multicasting
1219 * program, it should receive packets not depending to what interface
1220 * program is joined.
1221 * If we will not make it, the program will have to join on all
1222 * interfaces. On the other hand, multihoming host (or router, but
1223 * not mrouter) cannot join to more than one interface - it will
1224 * result in receiving multiple packets.
1226 NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev,
1227 ipmr_forward_finish);
1230 static int ipmr_find_vif(struct net_device *dev)
1232 int ct;
1233 for (ct=maxvif-1; ct>=0; ct--) {
1234 if (vif_table[ct].dev == dev)
1235 break;
1237 return ct;
1240 /* "local" means that we should preserve one skb (for local delivery) */
1242 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1244 int psend = -1;
1245 int vif, ct;
1247 vif = cache->mfc_parent;
1248 cache->mfc_un.res.pkt++;
1249 cache->mfc_un.res.bytes += skb->len;
1252 * Wrong interface: drop packet and (maybe) send PIM assert.
1254 if (vif_table[vif].dev != skb->dev) {
1255 int true_vifi;
1257 if (((struct rtable*)skb->dst)->fl.iif == 0) {
1258 /* It is our own packet, looped back.
1259 Very complicated situation...
1261 The best workaround until routing daemons will be
1262 fixed is not to redistribute packet, if it was
1263 send through wrong interface. It means, that
1264 multicast applications WILL NOT work for
1265 (S,G), which have default multicast route pointing
1266 to wrong oif. In any case, it is not a good
1267 idea to use multicasting applications on router.
1269 goto dont_forward;
1272 cache->mfc_un.res.wrong_if++;
1273 true_vifi = ipmr_find_vif(skb->dev);
1275 if (true_vifi >= 0 && mroute_do_assert &&
1276 /* pimsm uses asserts, when switching from RPT to SPT,
1277 so that we cannot check that packet arrived on an oif.
1278 It is bad, but otherwise we would need to move pretty
1279 large chunk of pimd to kernel. Ough... --ANK
1281 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1282 time_after(jiffies,
1283 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1284 cache->mfc_un.res.last_assert = jiffies;
1285 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1287 goto dont_forward;
1290 vif_table[vif].pkt_in++;
1291 vif_table[vif].bytes_in+=skb->len;
1294 * Forward the frame
1296 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1297 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1298 if (psend != -1)
1299 ipmr_queue_xmit(skb, cache, psend, 0);
1300 psend=ct;
1303 if (psend != -1)
1304 ipmr_queue_xmit(skb, cache, psend, !local);
1306 dont_forward:
1307 if (!local)
1308 kfree_skb(skb);
1309 return 0;
1314 * Multicast packets for forwarding arrive here
1317 int ip_mr_input(struct sk_buff *skb)
1319 struct mfc_cache *cache;
1320 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1322 /* Packet is looped back after forward, it should not be
1323 forwarded second time, but still can be delivered locally.
1325 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1326 goto dont_forward;
1328 if (!local) {
1329 if (IPCB(skb)->opt.router_alert) {
1330 if (ip_call_ra_chain(skb))
1331 return 0;
1332 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1333 /* IGMPv1 (and broken IGMPv2 implementations sort of
1334 Cisco IOS <= 11.2(8)) do not put router alert
1335 option to IGMP packets destined to routable
1336 groups. It is very bad, because it means
1337 that we can forward NO IGMP messages.
1339 read_lock(&mrt_lock);
1340 if (mroute_socket) {
1341 raw_rcv(mroute_socket, skb);
1342 read_unlock(&mrt_lock);
1343 return 0;
1345 read_unlock(&mrt_lock);
1349 read_lock(&mrt_lock);
1350 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1353 * No usable cache entry
1355 if (cache==NULL) {
1356 int vif;
1358 if (local) {
1359 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1360 ip_local_deliver(skb);
1361 if (skb2 == NULL) {
1362 read_unlock(&mrt_lock);
1363 return -ENOBUFS;
1365 skb = skb2;
1368 vif = ipmr_find_vif(skb->dev);
1369 if (vif >= 0) {
1370 int err = ipmr_cache_unresolved(vif, skb);
1371 read_unlock(&mrt_lock);
1373 return err;
1375 read_unlock(&mrt_lock);
1376 kfree_skb(skb);
1377 return -ENODEV;
1380 ip_mr_forward(skb, cache, local);
1382 read_unlock(&mrt_lock);
1384 if (local)
1385 return ip_local_deliver(skb);
1387 return 0;
1389 dont_forward:
1390 if (local)
1391 return ip_local_deliver(skb);
1392 kfree_skb(skb);
1393 return 0;
1396 #ifdef CONFIG_IP_PIMSM_V1
1398 * Handle IGMP messages of PIMv1
1401 int pim_rcv_v1(struct sk_buff * skb)
1403 struct igmphdr *pim;
1404 struct iphdr *encap;
1405 struct net_device *reg_dev = NULL;
1407 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1408 goto drop;
1410 pim = (struct igmphdr*)skb->h.raw;
1412 if (!mroute_do_pim ||
1413 skb->len < sizeof(*pim) + sizeof(*encap) ||
1414 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1415 goto drop;
1417 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1419 Check that:
1420 a. packet is really destinted to a multicast group
1421 b. packet is not a NULL-REGISTER
1422 c. packet is not truncated
1424 if (!MULTICAST(encap->daddr) ||
1425 encap->tot_len == 0 ||
1426 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1427 goto drop;
1429 read_lock(&mrt_lock);
1430 if (reg_vif_num >= 0)
1431 reg_dev = vif_table[reg_vif_num].dev;
1432 if (reg_dev)
1433 dev_hold(reg_dev);
1434 read_unlock(&mrt_lock);
1436 if (reg_dev == NULL)
1437 goto drop;
1439 skb->mac.raw = skb->nh.raw;
1440 skb_pull(skb, (u8*)encap - skb->data);
1441 skb->nh.iph = (struct iphdr *)skb->data;
1442 skb->dev = reg_dev;
1443 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1444 skb->protocol = htons(ETH_P_IP);
1445 skb->ip_summed = 0;
1446 skb->pkt_type = PACKET_HOST;
1447 dst_release(skb->dst);
1448 skb->dst = NULL;
1449 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1450 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1451 #ifdef CONFIG_NETFILTER
1452 nf_conntrack_put(skb->nfct);
1453 skb->nfct = NULL;
1454 #endif
1455 netif_rx(skb);
1456 dev_put(reg_dev);
1457 return 0;
1458 drop:
1459 kfree_skb(skb);
1460 return 0;
1462 #endif
1464 #ifdef CONFIG_IP_PIMSM_V2
1465 static int pim_rcv(struct sk_buff * skb)
1467 struct pimreghdr *pim;
1468 struct iphdr *encap;
1469 struct net_device *reg_dev = NULL;
1471 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1472 goto drop;
1474 pim = (struct pimreghdr*)skb->h.raw;
1475 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1476 (pim->flags&PIM_NULL_REGISTER) ||
1477 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1478 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1479 goto drop;
1481 /* check if the inner packet is destined to mcast group */
1482 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1483 if (!MULTICAST(encap->daddr) ||
1484 encap->tot_len == 0 ||
1485 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1486 goto drop;
1488 read_lock(&mrt_lock);
1489 if (reg_vif_num >= 0)
1490 reg_dev = vif_table[reg_vif_num].dev;
1491 if (reg_dev)
1492 dev_hold(reg_dev);
1493 read_unlock(&mrt_lock);
1495 if (reg_dev == NULL)
1496 goto drop;
1498 skb->mac.raw = skb->nh.raw;
1499 skb_pull(skb, (u8*)encap - skb->data);
1500 skb->nh.iph = (struct iphdr *)skb->data;
1501 skb->dev = reg_dev;
1502 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1503 skb->protocol = htons(ETH_P_IP);
1504 skb->ip_summed = 0;
1505 skb->pkt_type = PACKET_HOST;
1506 dst_release(skb->dst);
1507 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1508 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1509 skb->dst = NULL;
1510 #ifdef CONFIG_NETFILTER
1511 nf_conntrack_put(skb->nfct);
1512 skb->nfct = NULL;
1513 #endif
1514 netif_rx(skb);
1515 dev_put(reg_dev);
1516 return 0;
1517 drop:
1518 kfree_skb(skb);
1519 return 0;
1521 #endif
1523 static int
1524 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1526 int ct;
1527 struct rtnexthop *nhp;
1528 struct net_device *dev = vif_table[c->mfc_parent].dev;
1529 u8 *b = skb->tail;
1530 struct rtattr *mp_head;
1532 if (dev)
1533 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1535 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1537 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1538 if (c->mfc_un.res.ttls[ct] < 255) {
1539 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1540 goto rtattr_failure;
1541 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1542 nhp->rtnh_flags = 0;
1543 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1544 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1545 nhp->rtnh_len = sizeof(*nhp);
1548 mp_head->rta_type = RTA_MULTIPATH;
1549 mp_head->rta_len = skb->tail - (u8*)mp_head;
1550 rtm->rtm_type = RTN_MULTICAST;
1551 return 1;
1553 rtattr_failure:
1554 skb_trim(skb, b - skb->data);
1555 return -EMSGSIZE;
1558 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1560 int err;
1561 struct mfc_cache *cache;
1562 struct rtable *rt = (struct rtable*)skb->dst;
1564 read_lock(&mrt_lock);
1565 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1567 if (cache==NULL) {
1568 struct net_device *dev;
1569 int vif;
1571 if (nowait) {
1572 read_unlock(&mrt_lock);
1573 return -EAGAIN;
1576 dev = skb->dev;
1577 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1578 read_unlock(&mrt_lock);
1579 return -ENODEV;
1581 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1582 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1583 skb->nh.iph->saddr = rt->rt_src;
1584 skb->nh.iph->daddr = rt->rt_dst;
1585 skb->nh.iph->version = 0;
1586 err = ipmr_cache_unresolved(vif, skb);
1587 read_unlock(&mrt_lock);
1588 return err;
1591 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1592 cache->mfc_flags |= MFC_NOTIFY;
1593 err = ipmr_fill_mroute(skb, cache, rtm);
1594 read_unlock(&mrt_lock);
1595 return err;
1598 #ifdef CONFIG_PROC_FS
1600 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1603 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1605 struct vif_device *vif;
1606 int len=0;
1607 off_t pos=0;
1608 off_t begin=0;
1609 int size;
1610 int ct;
1612 len += sprintf(buffer,
1613 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1614 pos=len;
1616 read_lock(&mrt_lock);
1617 for (ct=0;ct<maxvif;ct++)
1619 char *name = "none";
1620 vif=&vif_table[ct];
1621 if(!VIF_EXISTS(ct))
1622 continue;
1623 if (vif->dev)
1624 name = vif->dev->name;
1625 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1626 ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1627 vif->flags, vif->local, vif->remote);
1628 len+=size;
1629 pos+=size;
1630 if(pos<offset)
1632 len=0;
1633 begin=pos;
1635 if(pos>offset+length)
1636 break;
1638 read_unlock(&mrt_lock);
1640 *start=buffer+(offset-begin);
1641 len-=(offset-begin);
1642 if(len>length)
1643 len=length;
1644 if (len<0)
1645 len = 0;
1646 return len;
1649 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1651 struct mfc_cache *mfc;
1652 int len=0;
1653 off_t pos=0;
1654 off_t begin=0;
1655 int size;
1656 int ct;
1658 len += sprintf(buffer,
1659 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1660 pos=len;
1662 read_lock(&mrt_lock);
1663 for (ct=0;ct<MFC_LINES;ct++)
1665 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1667 int n;
1670 * Interface forwarding map
1672 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1673 (unsigned long)mfc->mfc_mcastgrp,
1674 (unsigned long)mfc->mfc_origin,
1675 mfc->mfc_parent,
1676 mfc->mfc_un.res.pkt,
1677 mfc->mfc_un.res.bytes,
1678 mfc->mfc_un.res.wrong_if);
1679 for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1681 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1682 size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1684 size += sprintf(buffer+len+size, "\n");
1685 len+=size;
1686 pos+=size;
1687 if(pos<offset)
1689 len=0;
1690 begin=pos;
1692 if(pos>offset+length)
1693 goto done;
1697 spin_lock_bh(&mfc_unres_lock);
1698 for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1699 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1700 (unsigned long)mfc->mfc_mcastgrp,
1701 (unsigned long)mfc->mfc_origin,
1703 (long)mfc->mfc_un.unres.unresolved.qlen,
1704 0L, 0L);
1705 len+=size;
1706 pos+=size;
1707 if(pos<offset)
1709 len=0;
1710 begin=pos;
1712 if(pos>offset+length)
1713 break;
1715 spin_unlock_bh(&mfc_unres_lock);
1717 done:
1718 read_unlock(&mrt_lock);
1719 *start=buffer+(offset-begin);
1720 len-=(offset-begin);
1721 if(len>length)
1722 len=length;
1723 if (len < 0) {
1724 len = 0;
1726 return len;
1729 #endif
1731 #ifdef CONFIG_IP_PIMSM_V2
1732 static struct inet_protocol pim_protocol = {
1733 .handler = pim_rcv,
1735 #endif
1739 * Setup for IP multicast routing
1742 void __init ip_mr_init(void)
1744 printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1745 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1746 sizeof(struct mfc_cache),
1747 0, SLAB_HWCACHE_ALIGN,
1748 NULL, NULL);
1749 init_timer(&ipmr_expire_timer);
1750 ipmr_expire_timer.function=ipmr_expire_process;
1751 register_netdevice_notifier(&ip_mr_notifier);
1752 #ifdef CONFIG_PROC_FS
1753 proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1754 proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1755 #endif