Ok. I didn't make 2.4.0 in 2000. Tough. I tried, but we had some
[davej-history.git] / net / ipv4 / ipmr.c
blob0be5d93071a18d8eb9b07fae8dff5a95ed3549f9
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Version: $Id: ipmr.c,v 1.55 2000/11/28 13:13:27 davem Exp $
14 * Fixes:
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow.
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/sock.h>
55 #include <net/icmp.h>
56 #include <net/udp.h>
57 #include <net/raw.h>
58 #include <linux/notifier.h>
59 #include <linux/if_arp.h>
60 #include <linux/netfilter_ipv4.h>
61 #include <net/ipip.h>
62 #include <net/checksum.h>
64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65 #define CONFIG_IP_PIMSM 1
66 #endif
68 static struct sock *mroute_socket;
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
78 * Multicast router control variables
81 static struct vif_device vif_table[MAXVIFS]; /* Devices */
82 static int maxvif;
84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
86 int mroute_do_assert = 0; /* Set in PIM assert */
87 int mroute_do_pim = 0;
89 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
91 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
92 atomic_t cache_resolve_queue_len; /* Size of unresolved */
94 /* Special spinlock for queue of unresolved entries */
95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
97 /* We return to original Alan's scheme. Hash table of resolved
98 entries is changed only in process context and protected
99 with weak lock mrt_lock. Queue of unresolved entries is protected
100 with strong spinlock mfc_unres_lock.
102 In this case data path is free of exclusive locks at all.
105 kmem_cache_t *mrt_cachep;
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
111 extern struct inet_protocol pim_protocol;
113 static struct timer_list ipmr_expire_timer;
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
117 static
118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
120 struct net_device *dev;
122 dev = __dev_get_by_name("tunl0");
124 if (dev) {
125 int err;
126 struct ifreq ifr;
127 mm_segment_t oldfs;
128 struct ip_tunnel_parm p;
129 struct in_device *in_dev;
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (void*)&p;
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142 set_fs(oldfs);
144 dev = NULL;
146 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147 dev->flags |= IFF_MULTICAST;
149 in_dev = __in_dev_get(dev);
150 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151 goto failure;
152 in_dev->cnf.rp_filter = 0;
154 if (dev_open(dev))
155 goto failure;
158 return dev;
160 failure:
161 unregister_netdevice(dev);
162 return NULL;
165 #ifdef CONFIG_IP_PIMSM
167 static int reg_vif_num = -1;
169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
171 read_lock(&mrt_lock);
172 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173 ((struct net_device_stats*)dev->priv)->tx_packets++;
174 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175 read_unlock(&mrt_lock);
176 kfree_skb(skb);
177 return 0;
180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
182 return (struct net_device_stats*)dev->priv;
185 static
186 struct net_device *ipmr_reg_vif(struct vifctl *v)
188 struct net_device *dev;
189 struct in_device *in_dev;
190 int size;
192 size = sizeof(*dev) + sizeof(struct net_device_stats);
193 dev = kmalloc(size, GFP_KERNEL);
194 if (!dev)
195 return NULL;
197 memset(dev, 0, size);
199 dev->priv = dev + 1;
201 strcpy(dev->name, "pimreg");
203 dev->type = ARPHRD_PIMREG;
204 dev->mtu = 1500 - sizeof(struct iphdr) - 8;
205 dev->flags = IFF_NOARP;
206 dev->hard_start_xmit = reg_vif_xmit;
207 dev->get_stats = reg_vif_get_stats;
208 dev->features |= NETIF_F_DYNALLOC;
210 if (register_netdevice(dev)) {
211 kfree(dev);
212 return NULL;
214 dev->iflink = 0;
216 if ((in_dev = inetdev_init(dev)) == NULL)
217 goto failure;
219 in_dev->cnf.rp_filter = 0;
221 if (dev_open(dev))
222 goto failure;
224 return dev;
226 failure:
227 unregister_netdevice(dev);
228 return NULL;
230 #endif
233 * Delete a VIF entry
236 static int vif_delete(int vifi)
238 struct vif_device *v;
239 struct net_device *dev;
240 struct in_device *in_dev;
242 if (vifi < 0 || vifi >= maxvif)
243 return -EADDRNOTAVAIL;
245 v = &vif_table[vifi];
247 write_lock_bh(&mrt_lock);
248 dev = v->dev;
249 v->dev = NULL;
251 if (!dev) {
252 write_unlock_bh(&mrt_lock);
253 return -EADDRNOTAVAIL;
256 #ifdef CONFIG_IP_PIMSM
257 if (vifi == reg_vif_num)
258 reg_vif_num = -1;
259 #endif
261 if (vifi+1 == maxvif) {
262 int tmp;
263 for (tmp=vifi-1; tmp>=0; tmp--) {
264 if (VIF_EXISTS(tmp))
265 break;
267 maxvif = tmp+1;
270 write_unlock_bh(&mrt_lock);
272 dev_set_allmulti(dev, -1);
274 if ((in_dev = __in_dev_get(dev)) != NULL) {
275 in_dev->cnf.mc_forwarding--;
276 ip_rt_multicast_event(in_dev);
279 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280 unregister_netdevice(dev);
282 dev_put(dev);
283 return 0;
286 /* Destroy an unresolved cache entry, killing queued skbs
287 and reporting error to netlink readers.
290 static void ipmr_destroy_unres(struct mfc_cache *c)
292 struct sk_buff *skb;
294 atomic_dec(&cache_resolve_queue_len);
296 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
297 #ifdef CONFIG_RTNETLINK
298 if (skb->nh.iph->version == 0) {
299 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300 nlh->nlmsg_type = NLMSG_ERROR;
301 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302 skb_trim(skb, nlh->nlmsg_len);
303 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
304 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
305 } else
306 #endif
307 kfree_skb(skb);
310 kmem_cache_free(mrt_cachep, c);
314 /* Single timer process for all the unresolved queue. */
316 void ipmr_expire_process(unsigned long dummy)
318 unsigned long now;
319 unsigned long expires;
320 struct mfc_cache *c, **cp;
322 if (!spin_trylock(&mfc_unres_lock)) {
323 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
324 return;
327 if (atomic_read(&cache_resolve_queue_len) == 0)
328 goto out;
330 now = jiffies;
331 expires = 10*HZ;
332 cp = &mfc_unres_queue;
334 while ((c=*cp) != NULL) {
335 long interval = c->mfc_un.unres.expires - now;
337 if (interval > 0) {
338 if (interval < expires)
339 expires = interval;
340 cp = &c->next;
341 continue;
344 *cp = c->next;
346 ipmr_destroy_unres(c);
349 if (atomic_read(&cache_resolve_queue_len))
350 mod_timer(&ipmr_expire_timer, jiffies + expires);
352 out:
353 spin_unlock(&mfc_unres_lock);
356 /* Fill oifs list. It is called under write locked mrt_lock. */
358 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
360 int vifi;
362 cache->mfc_un.res.minvif = MAXVIFS;
363 cache->mfc_un.res.maxvif = 0;
364 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
366 for (vifi=0; vifi<maxvif; vifi++) {
367 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
368 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
369 if (cache->mfc_un.res.minvif > vifi)
370 cache->mfc_un.res.minvif = vifi;
371 if (cache->mfc_un.res.maxvif <= vifi)
372 cache->mfc_un.res.maxvif = vifi + 1;
377 static int vif_add(struct vifctl *vifc, int mrtsock)
379 int vifi = vifc->vifc_vifi;
380 struct vif_device *v = &vif_table[vifi];
381 struct net_device *dev;
382 struct in_device *in_dev;
384 /* Is vif busy ? */
385 if (VIF_EXISTS(vifi))
386 return -EADDRINUSE;
388 switch (vifc->vifc_flags) {
389 #ifdef CONFIG_IP_PIMSM
390 case VIFF_REGISTER:
392 * Special Purpose VIF in PIM
393 * All the packets will be sent to the daemon
395 if (reg_vif_num >= 0)
396 return -EADDRINUSE;
397 dev = ipmr_reg_vif(vifc);
398 if (!dev)
399 return -ENOBUFS;
400 break;
401 #endif
402 case VIFF_TUNNEL:
403 dev = ipmr_new_tunnel(vifc);
404 if (!dev)
405 return -ENOBUFS;
406 break;
407 case 0:
408 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
409 if (!dev)
410 return -EADDRNOTAVAIL;
411 __dev_put(dev);
412 break;
413 default:
414 return -EINVAL;
417 if ((in_dev = __in_dev_get(dev)) == NULL)
418 return -EADDRNOTAVAIL;
419 in_dev->cnf.mc_forwarding++;
420 dev_set_allmulti(dev, +1);
421 ip_rt_multicast_event(in_dev);
424 * Fill in the VIF structures
426 v->rate_limit=vifc->vifc_rate_limit;
427 v->local=vifc->vifc_lcl_addr.s_addr;
428 v->remote=vifc->vifc_rmt_addr.s_addr;
429 v->flags=vifc->vifc_flags;
430 if (!mrtsock)
431 v->flags |= VIFF_STATIC;
432 v->threshold=vifc->vifc_threshold;
433 v->bytes_in = 0;
434 v->bytes_out = 0;
435 v->pkt_in = 0;
436 v->pkt_out = 0;
437 v->link = dev->ifindex;
438 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
439 v->link = dev->iflink;
441 /* And finish update writing critical data */
442 write_lock_bh(&mrt_lock);
443 dev_hold(dev);
444 v->dev=dev;
445 #ifdef CONFIG_IP_PIMSM
446 if (v->flags&VIFF_REGISTER)
447 reg_vif_num = vifi;
448 #endif
449 if (vifi+1 > maxvif)
450 maxvif = vifi+1;
451 write_unlock_bh(&mrt_lock);
452 return 0;
455 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
457 int line=MFC_HASH(mcastgrp,origin);
458 struct mfc_cache *c;
460 for (c=mfc_cache_array[line]; c; c = c->next) {
461 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
462 break;
464 return c;
468 * Allocate a multicast cache entry
470 static struct mfc_cache *ipmr_cache_alloc(void)
472 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
473 if(c==NULL)
474 return NULL;
475 memset(c, 0, sizeof(*c));
476 c->mfc_un.res.minvif = MAXVIFS;
477 return c;
480 static struct mfc_cache *ipmr_cache_alloc_unres(void)
482 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
483 if(c==NULL)
484 return NULL;
485 memset(c, 0, sizeof(*c));
486 skb_queue_head_init(&c->mfc_un.unres.unresolved);
487 c->mfc_un.unres.expires = jiffies + 10*HZ;
488 return c;
492 * A cache entry has gone into a resolved state from queued
495 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
497 struct sk_buff *skb;
500 * Play the pending entries through our router
503 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
504 #ifdef CONFIG_RTNETLINK
505 if (skb->nh.iph->version == 0) {
506 int err;
507 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
509 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
510 nlh->nlmsg_len = skb->tail - (u8*)nlh;
511 } else {
512 nlh->nlmsg_type = NLMSG_ERROR;
513 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
514 skb_trim(skb, nlh->nlmsg_len);
515 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
517 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
518 } else
519 #endif
520 ip_mr_forward(skb, c, 0);
525 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
526 * expects the following bizarre scheme.
528 * Called under mrt_lock.
531 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
533 struct sk_buff *skb;
534 int ihl = pkt->nh.iph->ihl<<2;
535 struct igmphdr *igmp;
536 struct igmpmsg *msg;
537 int ret;
539 #ifdef CONFIG_IP_PIMSM
540 if (assert == IGMPMSG_WHOLEPKT)
541 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
542 else
543 #endif
544 skb = alloc_skb(128, GFP_ATOMIC);
546 if(!skb)
547 return -ENOBUFS;
549 #ifdef CONFIG_IP_PIMSM
550 if (assert == IGMPMSG_WHOLEPKT) {
551 /* Ugly, but we have no choice with this interface.
552 Duplicate old header, fix ihl, length etc.
553 And all this only to mangle msg->im_msgtype and
554 to set msg->im_mbz to "mbz" :-)
556 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
557 skb->nh.raw = skb->h.raw = (u8*)msg;
558 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
559 msg->im_msgtype = IGMPMSG_WHOLEPKT;
560 msg->im_mbz = 0;
561 msg->im_vif = reg_vif_num;
562 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
563 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
564 } else
565 #endif
569 * Copy the IP header
572 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
573 memcpy(skb->data,pkt->data,ihl);
574 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
575 msg = (struct igmpmsg*)skb->nh.iph;
576 msg->im_vif = vifi;
577 skb->dst = dst_clone(pkt->dst);
580 * Add our header
583 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
584 igmp->type =
585 msg->im_msgtype = assert;
586 igmp->code = 0;
587 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
588 skb->h.raw = skb->nh.raw;
591 if (mroute_socket == NULL) {
592 kfree_skb(skb);
593 return -EINVAL;
597 * Deliver to mrouted
599 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
600 if (net_ratelimit())
601 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
602 kfree_skb(skb);
605 return ret;
609 * Queue a packet for resolution. It gets locked cache entry!
612 static int
613 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
615 int err;
616 struct mfc_cache *c;
618 spin_lock_bh(&mfc_unres_lock);
619 for (c=mfc_unres_queue; c; c=c->next) {
620 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
621 c->mfc_origin == skb->nh.iph->saddr)
622 break;
625 if (c == NULL) {
627 * Create a new entry if allowable
630 if (atomic_read(&cache_resolve_queue_len)>=10 ||
631 (c=ipmr_cache_alloc_unres())==NULL) {
632 spin_unlock_bh(&mfc_unres_lock);
634 kfree_skb(skb);
635 return -ENOBUFS;
639 * Fill in the new cache entry
641 c->mfc_parent=-1;
642 c->mfc_origin=skb->nh.iph->saddr;
643 c->mfc_mcastgrp=skb->nh.iph->daddr;
646 * Reflect first query at mrouted.
648 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
649 /* If the report failed throw the cache entry
650 out - Brad Parker
652 spin_unlock_bh(&mfc_unres_lock);
654 kmem_cache_free(mrt_cachep, c);
655 kfree_skb(skb);
656 return err;
659 atomic_inc(&cache_resolve_queue_len);
660 c->next = mfc_unres_queue;
661 mfc_unres_queue = c;
663 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
667 * See if we can append the packet
669 if (c->mfc_un.unres.unresolved.qlen>3) {
670 kfree_skb(skb);
671 err = -ENOBUFS;
672 } else {
673 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
674 err = 0;
677 spin_unlock_bh(&mfc_unres_lock);
678 return err;
682 * MFC cache manipulation by user space mroute daemon
685 int ipmr_mfc_delete(struct mfcctl *mfc)
687 int line;
688 struct mfc_cache *c, **cp;
690 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
692 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
693 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
694 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
695 write_lock_bh(&mrt_lock);
696 *cp = c->next;
697 write_unlock_bh(&mrt_lock);
699 kmem_cache_free(mrt_cachep, c);
700 return 0;
703 return -ENOENT;
706 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
708 int line;
709 struct mfc_cache *uc, *c, **cp;
711 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
713 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
714 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
715 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
716 break;
719 if (c != NULL) {
720 write_lock_bh(&mrt_lock);
721 c->mfc_parent = mfc->mfcc_parent;
722 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
723 if (!mrtsock)
724 c->mfc_flags |= MFC_STATIC;
725 write_unlock_bh(&mrt_lock);
726 return 0;
729 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
730 return -EINVAL;
732 c=ipmr_cache_alloc();
733 if (c==NULL)
734 return -ENOMEM;
736 c->mfc_origin=mfc->mfcc_origin.s_addr;
737 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
738 c->mfc_parent=mfc->mfcc_parent;
739 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
740 if (!mrtsock)
741 c->mfc_flags |= MFC_STATIC;
743 write_lock_bh(&mrt_lock);
744 c->next = mfc_cache_array[line];
745 mfc_cache_array[line] = c;
746 write_unlock_bh(&mrt_lock);
749 * Check to see if we resolved a queued list. If so we
750 * need to send on the frames and tidy up.
752 spin_lock_bh(&mfc_unres_lock);
753 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
754 cp = &uc->next) {
755 if (uc->mfc_origin == c->mfc_origin &&
756 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
757 *cp = uc->next;
758 if (atomic_dec_and_test(&cache_resolve_queue_len))
759 del_timer(&ipmr_expire_timer);
760 break;
763 spin_unlock_bh(&mfc_unres_lock);
765 if (uc) {
766 ipmr_cache_resolve(uc, c);
767 kmem_cache_free(mrt_cachep, uc);
769 return 0;
773 * Close the multicast socket, and clear the vif tables etc
776 static void mroute_clean_tables(struct sock *sk)
778 int i;
781 * Shut down all active vif entries
783 for(i=0; i<maxvif; i++) {
784 if (!(vif_table[i].flags&VIFF_STATIC))
785 vif_delete(i);
789 * Wipe the cache
791 for (i=0;i<MFC_LINES;i++) {
792 struct mfc_cache *c, **cp;
794 cp = &mfc_cache_array[i];
795 while ((c = *cp) != NULL) {
796 if (c->mfc_flags&MFC_STATIC) {
797 cp = &c->next;
798 continue;
800 write_lock_bh(&mrt_lock);
801 *cp = c->next;
802 write_unlock_bh(&mrt_lock);
804 kmem_cache_free(mrt_cachep, c);
808 if (atomic_read(&cache_resolve_queue_len) != 0) {
809 struct mfc_cache *c;
811 spin_lock_bh(&mfc_unres_lock);
812 while (mfc_unres_queue != NULL) {
813 c = mfc_unres_queue;
814 mfc_unres_queue = c->next;
815 spin_unlock_bh(&mfc_unres_lock);
817 ipmr_destroy_unres(c);
819 spin_lock_bh(&mfc_unres_lock);
821 spin_unlock_bh(&mfc_unres_lock);
825 static void mrtsock_destruct(struct sock *sk)
827 rtnl_lock();
828 if (sk == mroute_socket) {
829 ipv4_devconf.mc_forwarding--;
831 write_lock_bh(&mrt_lock);
832 mroute_socket=NULL;
833 write_unlock_bh(&mrt_lock);
835 mroute_clean_tables(sk);
837 rtnl_unlock();
841 * Socket options and virtual interface manipulation. The whole
842 * virtual interface system is a complete heap, but unfortunately
843 * that's how BSD mrouted happens to think. Maybe one day with a proper
844 * MOSPF/PIM router set up we can clean this up.
847 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
849 int ret;
850 struct vifctl vif;
851 struct mfcctl mfc;
853 if(optname!=MRT_INIT)
855 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
856 return -EACCES;
859 switch(optname)
861 case MRT_INIT:
862 if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
863 return -EOPNOTSUPP;
864 if(optlen!=sizeof(int))
865 return -ENOPROTOOPT;
867 rtnl_lock();
868 if (mroute_socket) {
869 rtnl_unlock();
870 return -EADDRINUSE;
873 ret = ip_ra_control(sk, 1, mrtsock_destruct);
874 if (ret == 0) {
875 write_lock_bh(&mrt_lock);
876 mroute_socket=sk;
877 write_unlock_bh(&mrt_lock);
879 ipv4_devconf.mc_forwarding++;
881 rtnl_unlock();
882 return ret;
883 case MRT_DONE:
884 if (sk!=mroute_socket)
885 return -EACCES;
886 return ip_ra_control(sk, 0, NULL);
887 case MRT_ADD_VIF:
888 case MRT_DEL_VIF:
889 if(optlen!=sizeof(vif))
890 return -EINVAL;
891 if (copy_from_user(&vif,optval,sizeof(vif)))
892 return -EFAULT;
893 if(vif.vifc_vifi >= MAXVIFS)
894 return -ENFILE;
895 rtnl_lock();
896 if (optname==MRT_ADD_VIF) {
897 ret = vif_add(&vif, sk==mroute_socket);
898 } else {
899 ret = vif_delete(vif.vifc_vifi);
901 rtnl_unlock();
902 return ret;
905 * Manipulate the forwarding caches. These live
906 * in a sort of kernel/user symbiosis.
908 case MRT_ADD_MFC:
909 case MRT_DEL_MFC:
910 if(optlen!=sizeof(mfc))
911 return -EINVAL;
912 if (copy_from_user(&mfc,optval, sizeof(mfc)))
913 return -EFAULT;
914 rtnl_lock();
915 if (optname==MRT_DEL_MFC)
916 ret = ipmr_mfc_delete(&mfc);
917 else
918 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
919 rtnl_unlock();
920 return ret;
922 * Control PIM assert.
924 case MRT_ASSERT:
926 int v;
927 if(get_user(v,(int *)optval))
928 return -EFAULT;
929 mroute_do_assert=(v)?1:0;
930 return 0;
932 #ifdef CONFIG_IP_PIMSM
933 case MRT_PIM:
935 int v;
936 if(get_user(v,(int *)optval))
937 return -EFAULT;
938 v = (v)?1:0;
939 rtnl_lock();
940 if (v != mroute_do_pim) {
941 mroute_do_pim = v;
942 mroute_do_assert = v;
943 #ifdef CONFIG_IP_PIMSM_V2
944 if (mroute_do_pim)
945 inet_add_protocol(&pim_protocol);
946 else
947 inet_del_protocol(&pim_protocol);
948 #endif
950 rtnl_unlock();
951 return 0;
953 #endif
955 * Spurious command, or MRT_VERSION which you cannot
956 * set.
958 default:
959 return -ENOPROTOOPT;
964 * Getsock opt support for the multicast routing system.
967 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
969 int olr;
970 int val;
972 if(optname!=MRT_VERSION &&
973 #ifdef CONFIG_IP_PIMSM
974 optname!=MRT_PIM &&
975 #endif
976 optname!=MRT_ASSERT)
977 return -ENOPROTOOPT;
979 if(get_user(olr, optlen))
980 return -EFAULT;
982 olr=min(olr,sizeof(int));
983 if(put_user(olr,optlen))
984 return -EFAULT;
985 if(optname==MRT_VERSION)
986 val=0x0305;
987 #ifdef CONFIG_IP_PIMSM
988 else if(optname==MRT_PIM)
989 val=mroute_do_pim;
990 #endif
991 else
992 val=mroute_do_assert;
993 if(copy_to_user(optval,&val,olr))
994 return -EFAULT;
995 return 0;
999 * The IP multicast ioctl support routines.
1002 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1004 struct sioc_sg_req sr;
1005 struct sioc_vif_req vr;
1006 struct vif_device *vif;
1007 struct mfc_cache *c;
1009 switch(cmd)
1011 case SIOCGETVIFCNT:
1012 if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1013 return -EFAULT;
1014 if(vr.vifi>=maxvif)
1015 return -EINVAL;
1016 read_lock(&mrt_lock);
1017 vif=&vif_table[vr.vifi];
1018 if(VIF_EXISTS(vr.vifi)) {
1019 vr.icount=vif->pkt_in;
1020 vr.ocount=vif->pkt_out;
1021 vr.ibytes=vif->bytes_in;
1022 vr.obytes=vif->bytes_out;
1023 read_unlock(&mrt_lock);
1025 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1026 return -EFAULT;
1027 return 0;
1029 read_unlock(&mrt_lock);
1030 return -EADDRNOTAVAIL;
1031 case SIOCGETSGCNT:
1032 if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1033 return -EFAULT;
1035 read_lock(&mrt_lock);
1036 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1037 if (c) {
1038 sr.pktcnt = c->mfc_un.res.pkt;
1039 sr.bytecnt = c->mfc_un.res.bytes;
1040 sr.wrong_if = c->mfc_un.res.wrong_if;
1041 read_unlock(&mrt_lock);
1043 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1044 return -EFAULT;
1045 return 0;
1047 read_unlock(&mrt_lock);
1048 return -EADDRNOTAVAIL;
1049 default:
1050 return -ENOIOCTLCMD;
1055 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1057 struct vif_device *v;
1058 int ct;
1059 if (event != NETDEV_UNREGISTER)
1060 return NOTIFY_DONE;
1061 v=&vif_table[0];
1062 for(ct=0;ct<maxvif;ct++,v++) {
1063 if (v->dev==ptr)
1064 vif_delete(ct);
1066 return NOTIFY_DONE;
1070 static struct notifier_block ip_mr_notifier={
1071 ipmr_device_event,
1072 NULL,
1077 * Encapsulate a packet by attaching a valid IPIP header to it.
1078 * This avoids tunnel drivers and other mess and gives us the speed so
1079 * important for multicast video.
1082 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1084 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1086 iph->version = 4;
1087 iph->tos = skb->nh.iph->tos;
1088 iph->ttl = skb->nh.iph->ttl;
1089 iph->frag_off = 0;
1090 iph->daddr = daddr;
1091 iph->saddr = saddr;
1092 iph->protocol = IPPROTO_IPIP;
1093 iph->ihl = 5;
1094 iph->tot_len = htons(skb->len);
1095 ip_select_ident(iph, skb->dst);
1096 ip_send_check(iph);
1098 skb->h.ipiph = skb->nh.iph;
1099 skb->nh.iph = iph;
1100 #ifdef CONFIG_NETFILTER
1101 nf_conntrack_put(skb->nfct);
1102 skb->nfct = NULL;
1103 #endif
1106 static inline int ipmr_forward_finish(struct sk_buff *skb)
1108 struct dst_entry *dst = skb->dst;
1110 if (skb->len <= dst->pmtu)
1111 return dst->output(skb);
1112 else
1113 return ip_fragment(skb, dst->output);
1117 * Processing handlers for ipmr_forward
1120 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1121 int vifi, int last)
1123 struct iphdr *iph = skb->nh.iph;
1124 struct vif_device *vif = &vif_table[vifi];
1125 struct net_device *dev;
1126 struct rtable *rt;
1127 int encap = 0;
1128 struct sk_buff *skb2;
1130 if (vif->dev == NULL)
1131 return;
1133 #ifdef CONFIG_IP_PIMSM
1134 if (vif->flags & VIFF_REGISTER) {
1135 vif->pkt_out++;
1136 vif->bytes_out+=skb->len;
1137 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1138 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1139 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1140 return;
1142 #endif
1144 if (vif->flags&VIFF_TUNNEL) {
1145 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1146 return;
1147 encap = sizeof(struct iphdr);
1148 } else {
1149 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1150 return;
1153 dev = rt->u.dst.dev;
1155 if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1156 /* Do not fragment multicasts. Alas, IPv4 does not
1157 allow to send ICMP, so that packets will disappear
1158 to blackhole.
1161 IP_INC_STATS_BH(IpFragFails);
1162 ip_rt_put(rt);
1163 return;
1166 encap += dev->hard_header_len;
1168 if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1169 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1170 else if (atomic_read(&skb->users) != 1)
1171 skb2 = skb_clone(skb, GFP_ATOMIC);
1172 else {
1173 atomic_inc(&skb->users);
1174 skb2 = skb;
1177 if (skb2 == NULL) {
1178 ip_rt_put(rt);
1179 return;
1182 vif->pkt_out++;
1183 vif->bytes_out+=skb->len;
1185 dst_release(skb2->dst);
1186 skb2->dst = &rt->u.dst;
1187 iph = skb2->nh.iph;
1188 ip_decrease_ttl(iph);
1190 /* FIXME: forward and output firewalls used to be called here.
1191 * What do we do with netfilter? -- RR */
1192 if (vif->flags & VIFF_TUNNEL) {
1193 ip_encap(skb2, vif->local, vif->remote);
1194 /* FIXME: extra output firewall step used to be here. --RR */
1195 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1196 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1199 IPCB(skb2)->flags |= IPSKB_FORWARDED;
1202 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1203 * not only before forwarding, but after forwarding on all output
1204 * interfaces. It is clear, if mrouter runs a multicasting
1205 * program, it should receive packets not depending to what interface
1206 * program is joined.
1207 * If we will not make it, the program will have to join on all
1208 * interfaces. On the other hand, multihoming host (or router, but
1209 * not mrouter) cannot join to more than one interface - it will
1210 * result in receiving multiple packets.
1212 NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev,
1213 ipmr_forward_finish);
1216 int ipmr_find_vif(struct net_device *dev)
1218 int ct;
1219 for (ct=maxvif-1; ct>=0; ct--) {
1220 if (vif_table[ct].dev == dev)
1221 break;
1223 return ct;
1226 /* "local" means that we should preserve one skb (for local delivery) */
1228 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1230 int psend = -1;
1231 int vif, ct;
1233 vif = cache->mfc_parent;
1234 cache->mfc_un.res.pkt++;
1235 cache->mfc_un.res.bytes += skb->len;
1238 * Wrong interface: drop packet and (maybe) send PIM assert.
1240 if (vif_table[vif].dev != skb->dev) {
1241 int true_vifi;
1243 if (((struct rtable*)skb->dst)->key.iif == 0) {
1244 /* It is our own packet, looped back.
1245 Very complicated situation...
1247 The best workaround until routing daemons will be
1248 fixed is not to redistribute packet, if it was
1249 send through wrong interface. It means, that
1250 multicast applications WILL NOT work for
1251 (S,G), which have default multicast route pointing
1252 to wrong oif. In any case, it is not a good
1253 idea to use multicasting applications on router.
1255 goto dont_forward;
1258 cache->mfc_un.res.wrong_if++;
1259 true_vifi = ipmr_find_vif(skb->dev);
1261 if (true_vifi >= 0 && mroute_do_assert &&
1262 /* pimsm uses asserts, when switching from RPT to SPT,
1263 so that we cannot check that packet arrived on an oif.
1264 It is bad, but otherwise we would need to move pretty
1265 large chunk of pimd to kernel. Ough... --ANK
1267 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1268 jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1269 cache->mfc_un.res.last_assert = jiffies;
1270 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1272 goto dont_forward;
1275 vif_table[vif].pkt_in++;
1276 vif_table[vif].bytes_in+=skb->len;
1279 * Forward the frame
1281 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1282 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1283 if (psend != -1)
1284 ipmr_queue_xmit(skb, cache, psend, 0);
1285 psend=ct;
1288 if (psend != -1)
1289 ipmr_queue_xmit(skb, cache, psend, !local);
1291 dont_forward:
1292 if (!local)
1293 kfree_skb(skb);
1294 return 0;
1299 * Multicast packets for forwarding arrive here
1302 int ip_mr_input(struct sk_buff *skb)
1304 struct mfc_cache *cache;
1305 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1307 /* Packet is looped back after forward, it should not be
1308 forwarded second time, but still can be delivered locally.
1310 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1311 goto dont_forward;
1313 if (!local) {
1314 if (IPCB(skb)->opt.router_alert) {
1315 if (ip_call_ra_chain(skb))
1316 return 0;
1317 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1318 /* IGMPv1 (and broken IGMPv2 implementations sort of
1319 Cisco IOS <= 11.2(8)) do not put router alert
1320 option to IGMP packets destined to routable
1321 groups. It is very bad, because it means
1322 that we can forward NO IGMP messages.
1324 read_lock(&mrt_lock);
1325 if (mroute_socket) {
1326 raw_rcv(mroute_socket, skb);
1327 read_unlock(&mrt_lock);
1328 return 0;
1330 read_unlock(&mrt_lock);
1334 read_lock(&mrt_lock);
1335 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1338 * No usable cache entry
1340 if (cache==NULL) {
1341 int vif;
1343 if (local) {
1344 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1345 ip_local_deliver(skb);
1346 if (skb2 == NULL) {
1347 read_unlock(&mrt_lock);
1348 return -ENOBUFS;
1350 skb = skb2;
1353 vif = ipmr_find_vif(skb->dev);
1354 if (vif >= 0) {
1355 int err = ipmr_cache_unresolved(vif, skb);
1356 read_unlock(&mrt_lock);
1358 return err;
1360 read_unlock(&mrt_lock);
1361 kfree_skb(skb);
1362 return -ENODEV;
1365 ip_mr_forward(skb, cache, local);
1367 read_unlock(&mrt_lock);
1369 if (local)
1370 return ip_local_deliver(skb);
1372 return 0;
1374 dont_forward:
1375 if (local)
1376 return ip_local_deliver(skb);
1377 kfree_skb(skb);
1378 return 0;
1381 #ifdef CONFIG_IP_PIMSM_V1
1383 * Handle IGMP messages of PIMv1
1386 int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
1388 struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1389 struct iphdr *encap;
1390 struct net_device *reg_dev = NULL;
1392 if (!mroute_do_pim ||
1393 len < sizeof(*pim) + sizeof(*encap) ||
1394 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1395 kfree_skb(skb);
1396 return -EINVAL;
1399 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1401 Check that:
1402 a. packet is really destinted to a multicast group
1403 b. packet is not a NULL-REGISTER
1404 c. packet is not truncated
1406 if (!MULTICAST(encap->daddr) ||
1407 ntohs(encap->tot_len) == 0 ||
1408 ntohs(encap->tot_len) + sizeof(*pim) > len) {
1409 kfree_skb(skb);
1410 return -EINVAL;
1413 read_lock(&mrt_lock);
1414 if (reg_vif_num >= 0)
1415 reg_dev = vif_table[reg_vif_num].dev;
1416 if (reg_dev)
1417 dev_hold(reg_dev);
1418 read_unlock(&mrt_lock);
1420 if (reg_dev == NULL) {
1421 kfree_skb(skb);
1422 return -EINVAL;
1425 skb->mac.raw = skb->nh.raw;
1426 skb_pull(skb, (u8*)encap - skb->data);
1427 skb->nh.iph = (struct iphdr *)skb->data;
1428 skb->dev = reg_dev;
1429 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1430 skb->protocol = __constant_htons(ETH_P_IP);
1431 skb->ip_summed = 0;
1432 skb->pkt_type = PACKET_HOST;
1433 dst_release(skb->dst);
1434 skb->dst = NULL;
1435 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1436 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1437 #ifdef CONFIG_NETFILTER
1438 nf_conntrack_put(skb->nfct);
1439 skb->nfct = NULL;
1440 #endif
1441 netif_rx(skb);
1442 dev_put(reg_dev);
1443 return 0;
1445 #endif
1447 #ifdef CONFIG_IP_PIMSM_V2
1448 int pim_rcv(struct sk_buff * skb, unsigned short len)
1450 struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1451 struct iphdr *encap;
1452 struct net_device *reg_dev = NULL;
1454 if (len < sizeof(*pim) + sizeof(*encap) ||
1455 pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1456 (pim->flags&PIM_NULL_REGISTER) ||
1457 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1458 ip_compute_csum((void *)pim, len))) {
1459 kfree_skb(skb);
1460 return -EINVAL;
1463 /* check if the inner packet is destined to mcast group */
1464 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1465 if (!MULTICAST(encap->daddr) ||
1466 ntohs(encap->tot_len) == 0 ||
1467 ntohs(encap->tot_len) + sizeof(*pim) > len) {
1468 kfree_skb(skb);
1469 return -EINVAL;
1472 read_lock(&mrt_lock);
1473 if (reg_vif_num >= 0)
1474 reg_dev = vif_table[reg_vif_num].dev;
1475 if (reg_dev)
1476 dev_hold(reg_dev);
1477 read_unlock(&mrt_lock);
1479 if (reg_dev == NULL) {
1480 kfree_skb(skb);
1481 return -EINVAL;
1484 skb->mac.raw = skb->nh.raw;
1485 skb_pull(skb, (u8*)encap - skb->data);
1486 skb->nh.iph = (struct iphdr *)skb->data;
1487 skb->dev = reg_dev;
1488 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1489 skb->protocol = __constant_htons(ETH_P_IP);
1490 skb->ip_summed = 0;
1491 skb->pkt_type = PACKET_HOST;
1492 dst_release(skb->dst);
1493 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1494 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1495 skb->dst = NULL;
1496 #ifdef CONFIG_NETFILTER
1497 nf_conntrack_put(skb->nfct);
1498 skb->nfct = NULL;
1499 #endif
1500 netif_rx(skb);
1501 dev_put(reg_dev);
1502 return 0;
1504 #endif
1506 #ifdef CONFIG_RTNETLINK
1508 static int
1509 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1511 int ct;
1512 struct rtnexthop *nhp;
1513 struct net_device *dev = vif_table[c->mfc_parent].dev;
1514 u8 *b = skb->tail;
1515 struct rtattr *mp_head;
1517 if (dev)
1518 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1520 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1522 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1523 if (c->mfc_un.res.ttls[ct] < 255) {
1524 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1525 goto rtattr_failure;
1526 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1527 nhp->rtnh_flags = 0;
1528 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1529 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1530 nhp->rtnh_len = sizeof(*nhp);
1533 mp_head->rta_type = RTA_MULTIPATH;
1534 mp_head->rta_len = skb->tail - (u8*)mp_head;
1535 rtm->rtm_type = RTN_MULTICAST;
1536 return 1;
1538 rtattr_failure:
1539 skb_trim(skb, b - skb->data);
1540 return -EMSGSIZE;
1543 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1545 int err;
1546 struct mfc_cache *cache;
1547 struct rtable *rt = (struct rtable*)skb->dst;
1549 read_lock(&mrt_lock);
1550 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1552 if (cache==NULL) {
1553 struct net_device *dev;
1554 int vif;
1556 if (nowait) {
1557 read_unlock(&mrt_lock);
1558 return -EAGAIN;
1561 dev = skb->dev;
1562 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1563 read_unlock(&mrt_lock);
1564 return -ENODEV;
1566 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1567 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1568 skb->nh.iph->saddr = rt->rt_src;
1569 skb->nh.iph->daddr = rt->rt_dst;
1570 skb->nh.iph->version = 0;
1571 err = ipmr_cache_unresolved(vif, skb);
1572 read_unlock(&mrt_lock);
1573 return err;
1576 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1577 cache->mfc_flags |= MFC_NOTIFY;
1578 err = ipmr_fill_mroute(skb, cache, rtm);
1579 read_unlock(&mrt_lock);
1580 return err;
1582 #endif
1584 #ifdef CONFIG_PROC_FS
1586 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1589 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1591 struct vif_device *vif;
1592 int len=0;
1593 off_t pos=0;
1594 off_t begin=0;
1595 int size;
1596 int ct;
1598 len += sprintf(buffer,
1599 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1600 pos=len;
1602 read_lock(&mrt_lock);
1603 for (ct=0;ct<maxvif;ct++)
1605 char *name = "none";
1606 vif=&vif_table[ct];
1607 if(!VIF_EXISTS(ct))
1608 continue;
1609 if (vif->dev)
1610 name = vif->dev->name;
1611 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1612 ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1613 vif->flags, vif->local, vif->remote);
1614 len+=size;
1615 pos+=size;
1616 if(pos<offset)
1618 len=0;
1619 begin=pos;
1621 if(pos>offset+length)
1622 break;
1624 read_unlock(&mrt_lock);
1626 *start=buffer+(offset-begin);
1627 len-=(offset-begin);
1628 if(len>length)
1629 len=length;
1630 if (len<0)
1631 len = 0;
1632 return len;
1635 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1637 struct mfc_cache *mfc;
1638 int len=0;
1639 off_t pos=0;
1640 off_t begin=0;
1641 int size;
1642 int ct;
1644 len += sprintf(buffer,
1645 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1646 pos=len;
1648 read_lock(&mrt_lock);
1649 for (ct=0;ct<MFC_LINES;ct++)
1651 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1653 int n;
1656 * Interface forwarding map
1658 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1659 (unsigned long)mfc->mfc_mcastgrp,
1660 (unsigned long)mfc->mfc_origin,
1661 mfc->mfc_parent,
1662 mfc->mfc_un.res.pkt,
1663 mfc->mfc_un.res.bytes,
1664 mfc->mfc_un.res.wrong_if);
1665 for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1667 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1668 size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1670 size += sprintf(buffer+len+size, "\n");
1671 len+=size;
1672 pos+=size;
1673 if(pos<offset)
1675 len=0;
1676 begin=pos;
1678 if(pos>offset+length)
1679 goto done;
1683 spin_lock_bh(&mfc_unres_lock);
1684 for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1685 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1686 (unsigned long)mfc->mfc_mcastgrp,
1687 (unsigned long)mfc->mfc_origin,
1689 (long)mfc->mfc_un.unres.unresolved.qlen,
1690 0L, 0L);
1691 len+=size;
1692 pos+=size;
1693 if(pos<offset)
1695 len=0;
1696 begin=pos;
1698 if(pos>offset+length)
1699 break;
1701 spin_unlock_bh(&mfc_unres_lock);
1703 done:
1704 read_unlock(&mrt_lock);
1705 *start=buffer+(offset-begin);
1706 len-=(offset-begin);
1707 if(len>length)
1708 len=length;
1709 if (len < 0) {
1710 len = 0;
1712 return len;
1715 #endif
1717 #ifdef CONFIG_IP_PIMSM_V2
1718 struct inet_protocol pim_protocol =
1720 pim_rcv, /* PIM handler */
1721 NULL, /* PIM error control */
1722 NULL, /* next */
1723 IPPROTO_PIM, /* protocol ID */
1724 0, /* copy */
1725 NULL, /* data */
1726 "PIM" /* name */
1728 #endif
1732 * Setup for IP multicast routing
1735 void __init ip_mr_init(void)
1737 printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1738 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1739 sizeof(struct mfc_cache),
1740 0, SLAB_HWCACHE_ALIGN,
1741 NULL, NULL);
1742 init_timer(&ipmr_expire_timer);
1743 ipmr_expire_timer.function=ipmr_expire_process;
1744 register_netdevice_notifier(&ip_mr_notifier);
1745 #ifdef CONFIG_PROC_FS
1746 proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1747 proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1748 #endif