ppp: fix BUG on non-linear SKB (multilink receive)
[linux-2.6/mini2440.git] / net / ipv4 / ipmr.c
blob99508d66a64227fd532718486fa9eed924963e26
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
74 static DEFINE_RWLOCK(mrt_lock);
77 * Multicast router control variables
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
87 /* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
92 In this case data path is free of exclusive locks at all.
95 static struct kmem_cache *mrt_cachep __read_mostly;
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
102 static struct timer_list ipmr_expire_timer;
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
108 struct net *net = dev_net(dev);
110 dev_close(dev);
112 dev = __dev_get_by_name(net, "tunl0");
113 if (dev) {
114 const struct net_device_ops *ops = dev->netdev_ops;
115 struct ifreq ifr;
116 struct ip_tunnel_parm p;
118 memset(&p, 0, sizeof(p));
119 p.iph.daddr = v->vifc_rmt_addr.s_addr;
120 p.iph.saddr = v->vifc_lcl_addr.s_addr;
121 p.iph.version = 4;
122 p.iph.ihl = 5;
123 p.iph.protocol = IPPROTO_IPIP;
124 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127 if (ops->ndo_do_ioctl) {
128 mm_segment_t oldfs = get_fs();
130 set_fs(KERNEL_DS);
131 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132 set_fs(oldfs);
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
140 struct net_device *dev;
142 dev = __dev_get_by_name(net, "tunl0");
144 if (dev) {
145 const struct net_device_ops *ops = dev->netdev_ops;
146 int err;
147 struct ifreq ifr;
148 struct ip_tunnel_parm p;
149 struct in_device *in_dev;
151 memset(&p, 0, sizeof(p));
152 p.iph.daddr = v->vifc_rmt_addr.s_addr;
153 p.iph.saddr = v->vifc_lcl_addr.s_addr;
154 p.iph.version = 4;
155 p.iph.ihl = 5;
156 p.iph.protocol = IPPROTO_IPIP;
157 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
160 if (ops->ndo_do_ioctl) {
161 mm_segment_t oldfs = get_fs();
163 set_fs(KERNEL_DS);
164 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165 set_fs(oldfs);
166 } else
167 err = -EOPNOTSUPP;
169 dev = NULL;
171 if (err == 0 &&
172 (dev = __dev_get_by_name(net, p.name)) != NULL) {
173 dev->flags |= IFF_MULTICAST;
175 in_dev = __in_dev_get_rtnl(dev);
176 if (in_dev == NULL)
177 goto failure;
179 ipv4_devconf_setall(in_dev);
180 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
182 if (dev_open(dev))
183 goto failure;
184 dev_hold(dev);
187 return dev;
189 failure:
190 /* allow the register to be completed before unregistering. */
191 rtnl_unlock();
192 rtnl_lock();
194 unregister_netdevice(dev);
195 return NULL;
198 #ifdef CONFIG_IP_PIMSM
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
202 struct net *net = dev_net(dev);
204 read_lock(&mrt_lock);
205 dev->stats.tx_bytes += skb->len;
206 dev->stats.tx_packets++;
207 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208 IGMPMSG_WHOLEPKT);
209 read_unlock(&mrt_lock);
210 kfree_skb(skb);
211 return NETDEV_TX_OK;
214 static const struct net_device_ops reg_vif_netdev_ops = {
215 .ndo_start_xmit = reg_vif_xmit,
218 static void reg_vif_setup(struct net_device *dev)
220 dev->type = ARPHRD_PIMREG;
221 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222 dev->flags = IFF_NOARP;
223 dev->netdev_ops = &reg_vif_netdev_ops,
224 dev->destructor = free_netdev;
225 dev->features |= NETIF_F_NETNS_LOCAL;
228 static struct net_device *ipmr_reg_vif(struct net *net)
230 struct net_device *dev;
231 struct in_device *in_dev;
233 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
235 if (dev == NULL)
236 return NULL;
238 dev_net_set(dev, net);
240 if (register_netdevice(dev)) {
241 free_netdev(dev);
242 return NULL;
244 dev->iflink = 0;
246 rcu_read_lock();
247 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248 rcu_read_unlock();
249 goto failure;
252 ipv4_devconf_setall(in_dev);
253 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254 rcu_read_unlock();
256 if (dev_open(dev))
257 goto failure;
259 dev_hold(dev);
261 return dev;
263 failure:
264 /* allow the register to be completed before unregistering. */
265 rtnl_unlock();
266 rtnl_lock();
268 unregister_netdevice(dev);
269 return NULL;
271 #endif
274 * Delete a VIF entry
275 * @notify: Set to 1, if the caller is a notifier_call
278 static int vif_delete(struct net *net, int vifi, int notify)
280 struct vif_device *v;
281 struct net_device *dev;
282 struct in_device *in_dev;
284 if (vifi < 0 || vifi >= net->ipv4.maxvif)
285 return -EADDRNOTAVAIL;
287 v = &net->ipv4.vif_table[vifi];
289 write_lock_bh(&mrt_lock);
290 dev = v->dev;
291 v->dev = NULL;
293 if (!dev) {
294 write_unlock_bh(&mrt_lock);
295 return -EADDRNOTAVAIL;
298 #ifdef CONFIG_IP_PIMSM
299 if (vifi == net->ipv4.mroute_reg_vif_num)
300 net->ipv4.mroute_reg_vif_num = -1;
301 #endif
303 if (vifi+1 == net->ipv4.maxvif) {
304 int tmp;
305 for (tmp=vifi-1; tmp>=0; tmp--) {
306 if (VIF_EXISTS(net, tmp))
307 break;
309 net->ipv4.maxvif = tmp+1;
312 write_unlock_bh(&mrt_lock);
314 dev_set_allmulti(dev, -1);
316 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
317 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
318 ip_rt_multicast_event(in_dev);
321 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
322 unregister_netdevice(dev);
324 dev_put(dev);
325 return 0;
328 static inline void ipmr_cache_free(struct mfc_cache *c)
330 release_net(mfc_net(c));
331 kmem_cache_free(mrt_cachep, c);
334 /* Destroy an unresolved cache entry, killing queued skbs
335 and reporting error to netlink readers.
338 static void ipmr_destroy_unres(struct mfc_cache *c)
340 struct sk_buff *skb;
341 struct nlmsgerr *e;
342 struct net *net = mfc_net(c);
344 atomic_dec(&net->ipv4.cache_resolve_queue_len);
346 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
347 if (ip_hdr(skb)->version == 0) {
348 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
349 nlh->nlmsg_type = NLMSG_ERROR;
350 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
351 skb_trim(skb, nlh->nlmsg_len);
352 e = NLMSG_DATA(nlh);
353 e->error = -ETIMEDOUT;
354 memset(&e->msg, 0, sizeof(e->msg));
356 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
357 } else
358 kfree_skb(skb);
361 ipmr_cache_free(c);
365 /* Single timer process for all the unresolved queue. */
367 static void ipmr_expire_process(unsigned long dummy)
369 unsigned long now;
370 unsigned long expires;
371 struct mfc_cache *c, **cp;
373 if (!spin_trylock(&mfc_unres_lock)) {
374 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
375 return;
378 if (mfc_unres_queue == NULL)
379 goto out;
381 now = jiffies;
382 expires = 10*HZ;
383 cp = &mfc_unres_queue;
385 while ((c=*cp) != NULL) {
386 if (time_after(c->mfc_un.unres.expires, now)) {
387 unsigned long interval = c->mfc_un.unres.expires - now;
388 if (interval < expires)
389 expires = interval;
390 cp = &c->next;
391 continue;
394 *cp = c->next;
396 ipmr_destroy_unres(c);
399 if (mfc_unres_queue != NULL)
400 mod_timer(&ipmr_expire_timer, jiffies + expires);
402 out:
403 spin_unlock(&mfc_unres_lock);
406 /* Fill oifs list. It is called under write locked mrt_lock. */
408 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 int vifi;
411 struct net *net = mfc_net(cache);
413 cache->mfc_un.res.minvif = MAXVIFS;
414 cache->mfc_un.res.maxvif = 0;
415 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
418 if (VIF_EXISTS(net, vifi) &&
419 ttls[vifi] && ttls[vifi] < 255) {
420 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
421 if (cache->mfc_un.res.minvif > vifi)
422 cache->mfc_un.res.minvif = vifi;
423 if (cache->mfc_un.res.maxvif <= vifi)
424 cache->mfc_un.res.maxvif = vifi + 1;
429 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 int vifi = vifc->vifc_vifi;
432 struct vif_device *v = &net->ipv4.vif_table[vifi];
433 struct net_device *dev;
434 struct in_device *in_dev;
435 int err;
437 /* Is vif busy ? */
438 if (VIF_EXISTS(net, vifi))
439 return -EADDRINUSE;
441 switch (vifc->vifc_flags) {
442 #ifdef CONFIG_IP_PIMSM
443 case VIFF_REGISTER:
445 * Special Purpose VIF in PIM
446 * All the packets will be sent to the daemon
448 if (net->ipv4.mroute_reg_vif_num >= 0)
449 return -EADDRINUSE;
450 dev = ipmr_reg_vif(net);
451 if (!dev)
452 return -ENOBUFS;
453 err = dev_set_allmulti(dev, 1);
454 if (err) {
455 unregister_netdevice(dev);
456 dev_put(dev);
457 return err;
459 break;
460 #endif
461 case VIFF_TUNNEL:
462 dev = ipmr_new_tunnel(net, vifc);
463 if (!dev)
464 return -ENOBUFS;
465 err = dev_set_allmulti(dev, 1);
466 if (err) {
467 ipmr_del_tunnel(dev, vifc);
468 dev_put(dev);
469 return err;
471 break;
472 case 0:
473 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
474 if (!dev)
475 return -EADDRNOTAVAIL;
476 err = dev_set_allmulti(dev, 1);
477 if (err) {
478 dev_put(dev);
479 return err;
481 break;
482 default:
483 return -EINVAL;
486 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
487 dev_put(dev);
488 return -EADDRNOTAVAIL;
490 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
491 ip_rt_multicast_event(in_dev);
494 * Fill in the VIF structures
496 v->rate_limit = vifc->vifc_rate_limit;
497 v->local = vifc->vifc_lcl_addr.s_addr;
498 v->remote = vifc->vifc_rmt_addr.s_addr;
499 v->flags = vifc->vifc_flags;
500 if (!mrtsock)
501 v->flags |= VIFF_STATIC;
502 v->threshold = vifc->vifc_threshold;
503 v->bytes_in = 0;
504 v->bytes_out = 0;
505 v->pkt_in = 0;
506 v->pkt_out = 0;
507 v->link = dev->ifindex;
508 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
509 v->link = dev->iflink;
511 /* And finish update writing critical data */
512 write_lock_bh(&mrt_lock);
513 v->dev = dev;
514 #ifdef CONFIG_IP_PIMSM
515 if (v->flags&VIFF_REGISTER)
516 net->ipv4.mroute_reg_vif_num = vifi;
517 #endif
518 if (vifi+1 > net->ipv4.maxvif)
519 net->ipv4.maxvif = vifi+1;
520 write_unlock_bh(&mrt_lock);
521 return 0;
524 static struct mfc_cache *ipmr_cache_find(struct net *net,
525 __be32 origin,
526 __be32 mcastgrp)
528 int line = MFC_HASH(mcastgrp, origin);
529 struct mfc_cache *c;
531 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
532 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
533 break;
535 return c;
539 * Allocate a multicast cache entry
541 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
543 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
544 if (c == NULL)
545 return NULL;
546 c->mfc_un.res.minvif = MAXVIFS;
547 mfc_net_set(c, net);
548 return c;
551 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
553 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
554 if (c == NULL)
555 return NULL;
556 skb_queue_head_init(&c->mfc_un.unres.unresolved);
557 c->mfc_un.unres.expires = jiffies + 10*HZ;
558 mfc_net_set(c, net);
559 return c;
563 * A cache entry has gone into a resolved state from queued
566 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
568 struct sk_buff *skb;
569 struct nlmsgerr *e;
572 * Play the pending entries through our router
575 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
576 if (ip_hdr(skb)->version == 0) {
577 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
579 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
580 nlh->nlmsg_len = (skb_tail_pointer(skb) -
581 (u8 *)nlh);
582 } else {
583 nlh->nlmsg_type = NLMSG_ERROR;
584 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
585 skb_trim(skb, nlh->nlmsg_len);
586 e = NLMSG_DATA(nlh);
587 e->error = -EMSGSIZE;
588 memset(&e->msg, 0, sizeof(e->msg));
591 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
592 } else
593 ip_mr_forward(skb, c, 0);
598 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
599 * expects the following bizarre scheme.
601 * Called under mrt_lock.
604 static int ipmr_cache_report(struct net *net,
605 struct sk_buff *pkt, vifi_t vifi, int assert)
607 struct sk_buff *skb;
608 const int ihl = ip_hdrlen(pkt);
609 struct igmphdr *igmp;
610 struct igmpmsg *msg;
611 int ret;
613 #ifdef CONFIG_IP_PIMSM
614 if (assert == IGMPMSG_WHOLEPKT)
615 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
616 else
617 #endif
618 skb = alloc_skb(128, GFP_ATOMIC);
620 if (!skb)
621 return -ENOBUFS;
623 #ifdef CONFIG_IP_PIMSM
624 if (assert == IGMPMSG_WHOLEPKT) {
625 /* Ugly, but we have no choice with this interface.
626 Duplicate old header, fix ihl, length etc.
627 And all this only to mangle msg->im_msgtype and
628 to set msg->im_mbz to "mbz" :-)
630 skb_push(skb, sizeof(struct iphdr));
631 skb_reset_network_header(skb);
632 skb_reset_transport_header(skb);
633 msg = (struct igmpmsg *)skb_network_header(skb);
634 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
635 msg->im_msgtype = IGMPMSG_WHOLEPKT;
636 msg->im_mbz = 0;
637 msg->im_vif = net->ipv4.mroute_reg_vif_num;
638 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
639 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
640 sizeof(struct iphdr));
641 } else
642 #endif
646 * Copy the IP header
649 skb->network_header = skb->tail;
650 skb_put(skb, ihl);
651 skb_copy_to_linear_data(skb, pkt->data, ihl);
652 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
653 msg = (struct igmpmsg *)skb_network_header(skb);
654 msg->im_vif = vifi;
655 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
658 * Add our header
661 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
662 igmp->type =
663 msg->im_msgtype = assert;
664 igmp->code = 0;
665 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
666 skb->transport_header = skb->network_header;
669 if (net->ipv4.mroute_sk == NULL) {
670 kfree_skb(skb);
671 return -EINVAL;
675 * Deliver to mrouted
677 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
678 if (ret < 0) {
679 if (net_ratelimit())
680 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
681 kfree_skb(skb);
684 return ret;
688 * Queue a packet for resolution. It gets locked cache entry!
691 static int
692 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
694 int err;
695 struct mfc_cache *c;
696 const struct iphdr *iph = ip_hdr(skb);
698 spin_lock_bh(&mfc_unres_lock);
699 for (c=mfc_unres_queue; c; c=c->next) {
700 if (net_eq(mfc_net(c), net) &&
701 c->mfc_mcastgrp == iph->daddr &&
702 c->mfc_origin == iph->saddr)
703 break;
706 if (c == NULL) {
708 * Create a new entry if allowable
711 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
712 (c = ipmr_cache_alloc_unres(net)) == NULL) {
713 spin_unlock_bh(&mfc_unres_lock);
715 kfree_skb(skb);
716 return -ENOBUFS;
720 * Fill in the new cache entry
722 c->mfc_parent = -1;
723 c->mfc_origin = iph->saddr;
724 c->mfc_mcastgrp = iph->daddr;
727 * Reflect first query at mrouted.
729 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
730 if (err < 0) {
731 /* If the report failed throw the cache entry
732 out - Brad Parker
734 spin_unlock_bh(&mfc_unres_lock);
736 ipmr_cache_free(c);
737 kfree_skb(skb);
738 return err;
741 atomic_inc(&net->ipv4.cache_resolve_queue_len);
742 c->next = mfc_unres_queue;
743 mfc_unres_queue = c;
745 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
749 * See if we can append the packet
751 if (c->mfc_un.unres.unresolved.qlen>3) {
752 kfree_skb(skb);
753 err = -ENOBUFS;
754 } else {
755 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
756 err = 0;
759 spin_unlock_bh(&mfc_unres_lock);
760 return err;
764 * MFC cache manipulation by user space mroute daemon
767 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
769 int line;
770 struct mfc_cache *c, **cp;
772 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
774 for (cp = &net->ipv4.mfc_cache_array[line];
775 (c = *cp) != NULL; cp = &c->next) {
776 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
777 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
778 write_lock_bh(&mrt_lock);
779 *cp = c->next;
780 write_unlock_bh(&mrt_lock);
782 ipmr_cache_free(c);
783 return 0;
786 return -ENOENT;
789 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
791 int line;
792 struct mfc_cache *uc, *c, **cp;
794 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
796 for (cp = &net->ipv4.mfc_cache_array[line];
797 (c = *cp) != NULL; cp = &c->next) {
798 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
799 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
800 break;
803 if (c != NULL) {
804 write_lock_bh(&mrt_lock);
805 c->mfc_parent = mfc->mfcc_parent;
806 ipmr_update_thresholds(c, mfc->mfcc_ttls);
807 if (!mrtsock)
808 c->mfc_flags |= MFC_STATIC;
809 write_unlock_bh(&mrt_lock);
810 return 0;
813 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
814 return -EINVAL;
816 c = ipmr_cache_alloc(net);
817 if (c == NULL)
818 return -ENOMEM;
820 c->mfc_origin = mfc->mfcc_origin.s_addr;
821 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
822 c->mfc_parent = mfc->mfcc_parent;
823 ipmr_update_thresholds(c, mfc->mfcc_ttls);
824 if (!mrtsock)
825 c->mfc_flags |= MFC_STATIC;
827 write_lock_bh(&mrt_lock);
828 c->next = net->ipv4.mfc_cache_array[line];
829 net->ipv4.mfc_cache_array[line] = c;
830 write_unlock_bh(&mrt_lock);
833 * Check to see if we resolved a queued list. If so we
834 * need to send on the frames and tidy up.
836 spin_lock_bh(&mfc_unres_lock);
837 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
838 cp = &uc->next) {
839 if (net_eq(mfc_net(uc), net) &&
840 uc->mfc_origin == c->mfc_origin &&
841 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
842 *cp = uc->next;
843 atomic_dec(&net->ipv4.cache_resolve_queue_len);
844 break;
847 if (mfc_unres_queue == NULL)
848 del_timer(&ipmr_expire_timer);
849 spin_unlock_bh(&mfc_unres_lock);
851 if (uc) {
852 ipmr_cache_resolve(uc, c);
853 ipmr_cache_free(uc);
855 return 0;
859 * Close the multicast socket, and clear the vif tables etc
862 static void mroute_clean_tables(struct net *net)
864 int i;
867 * Shut down all active vif entries
869 for (i = 0; i < net->ipv4.maxvif; i++) {
870 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
871 vif_delete(net, i, 0);
875 * Wipe the cache
877 for (i=0; i<MFC_LINES; i++) {
878 struct mfc_cache *c, **cp;
880 cp = &net->ipv4.mfc_cache_array[i];
881 while ((c = *cp) != NULL) {
882 if (c->mfc_flags&MFC_STATIC) {
883 cp = &c->next;
884 continue;
886 write_lock_bh(&mrt_lock);
887 *cp = c->next;
888 write_unlock_bh(&mrt_lock);
890 ipmr_cache_free(c);
894 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
895 struct mfc_cache *c, **cp;
897 spin_lock_bh(&mfc_unres_lock);
898 cp = &mfc_unres_queue;
899 while ((c = *cp) != NULL) {
900 if (!net_eq(mfc_net(c), net)) {
901 cp = &c->next;
902 continue;
904 *cp = c->next;
906 ipmr_destroy_unres(c);
908 spin_unlock_bh(&mfc_unres_lock);
912 static void mrtsock_destruct(struct sock *sk)
914 struct net *net = sock_net(sk);
916 rtnl_lock();
917 if (sk == net->ipv4.mroute_sk) {
918 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
920 write_lock_bh(&mrt_lock);
921 net->ipv4.mroute_sk = NULL;
922 write_unlock_bh(&mrt_lock);
924 mroute_clean_tables(net);
926 rtnl_unlock();
930 * Socket options and virtual interface manipulation. The whole
931 * virtual interface system is a complete heap, but unfortunately
932 * that's how BSD mrouted happens to think. Maybe one day with a proper
933 * MOSPF/PIM router set up we can clean this up.
936 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
938 int ret;
939 struct vifctl vif;
940 struct mfcctl mfc;
941 struct net *net = sock_net(sk);
943 if (optname != MRT_INIT) {
944 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
945 return -EACCES;
948 switch (optname) {
949 case MRT_INIT:
950 if (sk->sk_type != SOCK_RAW ||
951 inet_sk(sk)->num != IPPROTO_IGMP)
952 return -EOPNOTSUPP;
953 if (optlen != sizeof(int))
954 return -ENOPROTOOPT;
956 rtnl_lock();
957 if (net->ipv4.mroute_sk) {
958 rtnl_unlock();
959 return -EADDRINUSE;
962 ret = ip_ra_control(sk, 1, mrtsock_destruct);
963 if (ret == 0) {
964 write_lock_bh(&mrt_lock);
965 net->ipv4.mroute_sk = sk;
966 write_unlock_bh(&mrt_lock);
968 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
970 rtnl_unlock();
971 return ret;
972 case MRT_DONE:
973 if (sk != net->ipv4.mroute_sk)
974 return -EACCES;
975 return ip_ra_control(sk, 0, NULL);
976 case MRT_ADD_VIF:
977 case MRT_DEL_VIF:
978 if (optlen != sizeof(vif))
979 return -EINVAL;
980 if (copy_from_user(&vif, optval, sizeof(vif)))
981 return -EFAULT;
982 if (vif.vifc_vifi >= MAXVIFS)
983 return -ENFILE;
984 rtnl_lock();
985 if (optname == MRT_ADD_VIF) {
986 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
987 } else {
988 ret = vif_delete(net, vif.vifc_vifi, 0);
990 rtnl_unlock();
991 return ret;
994 * Manipulate the forwarding caches. These live
995 * in a sort of kernel/user symbiosis.
997 case MRT_ADD_MFC:
998 case MRT_DEL_MFC:
999 if (optlen != sizeof(mfc))
1000 return -EINVAL;
1001 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1002 return -EFAULT;
1003 rtnl_lock();
1004 if (optname == MRT_DEL_MFC)
1005 ret = ipmr_mfc_delete(net, &mfc);
1006 else
1007 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1008 rtnl_unlock();
1009 return ret;
1011 * Control PIM assert.
1013 case MRT_ASSERT:
1015 int v;
1016 if (get_user(v,(int __user *)optval))
1017 return -EFAULT;
1018 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1019 return 0;
1021 #ifdef CONFIG_IP_PIMSM
1022 case MRT_PIM:
1024 int v;
1026 if (get_user(v,(int __user *)optval))
1027 return -EFAULT;
1028 v = (v) ? 1 : 0;
1030 rtnl_lock();
1031 ret = 0;
1032 if (v != net->ipv4.mroute_do_pim) {
1033 net->ipv4.mroute_do_pim = v;
1034 net->ipv4.mroute_do_assert = v;
1036 rtnl_unlock();
1037 return ret;
1039 #endif
1041 * Spurious command, or MRT_VERSION which you cannot
1042 * set.
1044 default:
1045 return -ENOPROTOOPT;
1050 * Getsock opt support for the multicast routing system.
1053 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1055 int olr;
1056 int val;
1057 struct net *net = sock_net(sk);
1059 if (optname != MRT_VERSION &&
1060 #ifdef CONFIG_IP_PIMSM
1061 optname!=MRT_PIM &&
1062 #endif
1063 optname!=MRT_ASSERT)
1064 return -ENOPROTOOPT;
1066 if (get_user(olr, optlen))
1067 return -EFAULT;
1069 olr = min_t(unsigned int, olr, sizeof(int));
1070 if (olr < 0)
1071 return -EINVAL;
1073 if (put_user(olr, optlen))
1074 return -EFAULT;
1075 if (optname == MRT_VERSION)
1076 val = 0x0305;
1077 #ifdef CONFIG_IP_PIMSM
1078 else if (optname == MRT_PIM)
1079 val = net->ipv4.mroute_do_pim;
1080 #endif
1081 else
1082 val = net->ipv4.mroute_do_assert;
1083 if (copy_to_user(optval, &val, olr))
1084 return -EFAULT;
1085 return 0;
1089 * The IP multicast ioctl support routines.
1092 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1094 struct sioc_sg_req sr;
1095 struct sioc_vif_req vr;
1096 struct vif_device *vif;
1097 struct mfc_cache *c;
1098 struct net *net = sock_net(sk);
1100 switch (cmd) {
1101 case SIOCGETVIFCNT:
1102 if (copy_from_user(&vr, arg, sizeof(vr)))
1103 return -EFAULT;
1104 if (vr.vifi >= net->ipv4.maxvif)
1105 return -EINVAL;
1106 read_lock(&mrt_lock);
1107 vif = &net->ipv4.vif_table[vr.vifi];
1108 if (VIF_EXISTS(net, vr.vifi)) {
1109 vr.icount = vif->pkt_in;
1110 vr.ocount = vif->pkt_out;
1111 vr.ibytes = vif->bytes_in;
1112 vr.obytes = vif->bytes_out;
1113 read_unlock(&mrt_lock);
1115 if (copy_to_user(arg, &vr, sizeof(vr)))
1116 return -EFAULT;
1117 return 0;
1119 read_unlock(&mrt_lock);
1120 return -EADDRNOTAVAIL;
1121 case SIOCGETSGCNT:
1122 if (copy_from_user(&sr, arg, sizeof(sr)))
1123 return -EFAULT;
1125 read_lock(&mrt_lock);
1126 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1127 if (c) {
1128 sr.pktcnt = c->mfc_un.res.pkt;
1129 sr.bytecnt = c->mfc_un.res.bytes;
1130 sr.wrong_if = c->mfc_un.res.wrong_if;
1131 read_unlock(&mrt_lock);
1133 if (copy_to_user(arg, &sr, sizeof(sr)))
1134 return -EFAULT;
1135 return 0;
1137 read_unlock(&mrt_lock);
1138 return -EADDRNOTAVAIL;
1139 default:
1140 return -ENOIOCTLCMD;
1145 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1147 struct net_device *dev = ptr;
1148 struct net *net = dev_net(dev);
1149 struct vif_device *v;
1150 int ct;
1152 if (!net_eq(dev_net(dev), net))
1153 return NOTIFY_DONE;
1155 if (event != NETDEV_UNREGISTER)
1156 return NOTIFY_DONE;
1157 v = &net->ipv4.vif_table[0];
1158 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1159 if (v->dev == dev)
1160 vif_delete(net, ct, 1);
1162 return NOTIFY_DONE;
1166 static struct notifier_block ip_mr_notifier = {
1167 .notifier_call = ipmr_device_event,
1171 * Encapsulate a packet by attaching a valid IPIP header to it.
1172 * This avoids tunnel drivers and other mess and gives us the speed so
1173 * important for multicast video.
1176 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1178 struct iphdr *iph;
1179 struct iphdr *old_iph = ip_hdr(skb);
1181 skb_push(skb, sizeof(struct iphdr));
1182 skb->transport_header = skb->network_header;
1183 skb_reset_network_header(skb);
1184 iph = ip_hdr(skb);
1186 iph->version = 4;
1187 iph->tos = old_iph->tos;
1188 iph->ttl = old_iph->ttl;
1189 iph->frag_off = 0;
1190 iph->daddr = daddr;
1191 iph->saddr = saddr;
1192 iph->protocol = IPPROTO_IPIP;
1193 iph->ihl = 5;
1194 iph->tot_len = htons(skb->len);
1195 ip_select_ident(iph, skb_dst(skb), NULL);
1196 ip_send_check(iph);
1198 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1199 nf_reset(skb);
1202 static inline int ipmr_forward_finish(struct sk_buff *skb)
1204 struct ip_options * opt = &(IPCB(skb)->opt);
1206 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1208 if (unlikely(opt->optlen))
1209 ip_forward_options(skb);
1211 return dst_output(skb);
1215 * Processing handlers for ipmr_forward
1218 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1220 struct net *net = mfc_net(c);
1221 const struct iphdr *iph = ip_hdr(skb);
1222 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1223 struct net_device *dev;
1224 struct rtable *rt;
1225 int encap = 0;
1227 if (vif->dev == NULL)
1228 goto out_free;
1230 #ifdef CONFIG_IP_PIMSM
1231 if (vif->flags & VIFF_REGISTER) {
1232 vif->pkt_out++;
1233 vif->bytes_out += skb->len;
1234 vif->dev->stats.tx_bytes += skb->len;
1235 vif->dev->stats.tx_packets++;
1236 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1237 goto out_free;
1239 #endif
1241 if (vif->flags&VIFF_TUNNEL) {
1242 struct flowi fl = { .oif = vif->link,
1243 .nl_u = { .ip4_u =
1244 { .daddr = vif->remote,
1245 .saddr = vif->local,
1246 .tos = RT_TOS(iph->tos) } },
1247 .proto = IPPROTO_IPIP };
1248 if (ip_route_output_key(net, &rt, &fl))
1249 goto out_free;
1250 encap = sizeof(struct iphdr);
1251 } else {
1252 struct flowi fl = { .oif = vif->link,
1253 .nl_u = { .ip4_u =
1254 { .daddr = iph->daddr,
1255 .tos = RT_TOS(iph->tos) } },
1256 .proto = IPPROTO_IPIP };
1257 if (ip_route_output_key(net, &rt, &fl))
1258 goto out_free;
1261 dev = rt->u.dst.dev;
1263 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1264 /* Do not fragment multicasts. Alas, IPv4 does not
1265 allow to send ICMP, so that packets will disappear
1266 to blackhole.
1269 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1270 ip_rt_put(rt);
1271 goto out_free;
1274 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1276 if (skb_cow(skb, encap)) {
1277 ip_rt_put(rt);
1278 goto out_free;
1281 vif->pkt_out++;
1282 vif->bytes_out += skb->len;
1284 skb_dst_drop(skb);
1285 skb_dst_set(skb, &rt->u.dst);
1286 ip_decrease_ttl(ip_hdr(skb));
1288 /* FIXME: forward and output firewalls used to be called here.
1289 * What do we do with netfilter? -- RR */
1290 if (vif->flags & VIFF_TUNNEL) {
1291 ip_encap(skb, vif->local, vif->remote);
1292 /* FIXME: extra output firewall step used to be here. --RR */
1293 vif->dev->stats.tx_packets++;
1294 vif->dev->stats.tx_bytes += skb->len;
1297 IPCB(skb)->flags |= IPSKB_FORWARDED;
1300 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1301 * not only before forwarding, but after forwarding on all output
1302 * interfaces. It is clear, if mrouter runs a multicasting
1303 * program, it should receive packets not depending to what interface
1304 * program is joined.
1305 * If we will not make it, the program will have to join on all
1306 * interfaces. On the other hand, multihoming host (or router, but
1307 * not mrouter) cannot join to more than one interface - it will
1308 * result in receiving multiple packets.
1310 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1311 ipmr_forward_finish);
1312 return;
1314 out_free:
1315 kfree_skb(skb);
1316 return;
1319 static int ipmr_find_vif(struct net_device *dev)
1321 struct net *net = dev_net(dev);
1322 int ct;
1323 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1324 if (net->ipv4.vif_table[ct].dev == dev)
1325 break;
1327 return ct;
1330 /* "local" means that we should preserve one skb (for local delivery) */
1332 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1334 int psend = -1;
1335 int vif, ct;
1336 struct net *net = mfc_net(cache);
1338 vif = cache->mfc_parent;
1339 cache->mfc_un.res.pkt++;
1340 cache->mfc_un.res.bytes += skb->len;
1343 * Wrong interface: drop packet and (maybe) send PIM assert.
1345 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1346 int true_vifi;
1348 if (skb_rtable(skb)->fl.iif == 0) {
1349 /* It is our own packet, looped back.
1350 Very complicated situation...
1352 The best workaround until routing daemons will be
1353 fixed is not to redistribute packet, if it was
1354 send through wrong interface. It means, that
1355 multicast applications WILL NOT work for
1356 (S,G), which have default multicast route pointing
1357 to wrong oif. In any case, it is not a good
1358 idea to use multicasting applications on router.
1360 goto dont_forward;
1363 cache->mfc_un.res.wrong_if++;
1364 true_vifi = ipmr_find_vif(skb->dev);
1366 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1367 /* pimsm uses asserts, when switching from RPT to SPT,
1368 so that we cannot check that packet arrived on an oif.
1369 It is bad, but otherwise we would need to move pretty
1370 large chunk of pimd to kernel. Ough... --ANK
1372 (net->ipv4.mroute_do_pim ||
1373 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1374 time_after(jiffies,
1375 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1376 cache->mfc_un.res.last_assert = jiffies;
1377 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1379 goto dont_forward;
1382 net->ipv4.vif_table[vif].pkt_in++;
1383 net->ipv4.vif_table[vif].bytes_in += skb->len;
1386 * Forward the frame
1388 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1389 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1390 if (psend != -1) {
1391 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1392 if (skb2)
1393 ipmr_queue_xmit(skb2, cache, psend);
1395 psend = ct;
1398 if (psend != -1) {
1399 if (local) {
1400 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401 if (skb2)
1402 ipmr_queue_xmit(skb2, cache, psend);
1403 } else {
1404 ipmr_queue_xmit(skb, cache, psend);
1405 return 0;
1409 dont_forward:
1410 if (!local)
1411 kfree_skb(skb);
1412 return 0;
1417 * Multicast packets for forwarding arrive here
1420 int ip_mr_input(struct sk_buff *skb)
1422 struct mfc_cache *cache;
1423 struct net *net = dev_net(skb->dev);
1424 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1426 /* Packet is looped back after forward, it should not be
1427 forwarded second time, but still can be delivered locally.
1429 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1430 goto dont_forward;
1432 if (!local) {
1433 if (IPCB(skb)->opt.router_alert) {
1434 if (ip_call_ra_chain(skb))
1435 return 0;
1436 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1437 /* IGMPv1 (and broken IGMPv2 implementations sort of
1438 Cisco IOS <= 11.2(8)) do not put router alert
1439 option to IGMP packets destined to routable
1440 groups. It is very bad, because it means
1441 that we can forward NO IGMP messages.
1443 read_lock(&mrt_lock);
1444 if (net->ipv4.mroute_sk) {
1445 nf_reset(skb);
1446 raw_rcv(net->ipv4.mroute_sk, skb);
1447 read_unlock(&mrt_lock);
1448 return 0;
1450 read_unlock(&mrt_lock);
1454 read_lock(&mrt_lock);
1455 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1458 * No usable cache entry
1460 if (cache == NULL) {
1461 int vif;
1463 if (local) {
1464 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1465 ip_local_deliver(skb);
1466 if (skb2 == NULL) {
1467 read_unlock(&mrt_lock);
1468 return -ENOBUFS;
1470 skb = skb2;
1473 vif = ipmr_find_vif(skb->dev);
1474 if (vif >= 0) {
1475 int err = ipmr_cache_unresolved(net, vif, skb);
1476 read_unlock(&mrt_lock);
1478 return err;
1480 read_unlock(&mrt_lock);
1481 kfree_skb(skb);
1482 return -ENODEV;
1485 ip_mr_forward(skb, cache, local);
1487 read_unlock(&mrt_lock);
1489 if (local)
1490 return ip_local_deliver(skb);
1492 return 0;
1494 dont_forward:
1495 if (local)
1496 return ip_local_deliver(skb);
1497 kfree_skb(skb);
1498 return 0;
1501 #ifdef CONFIG_IP_PIMSM
1502 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1504 struct net_device *reg_dev = NULL;
1505 struct iphdr *encap;
1506 struct net *net = dev_net(skb->dev);
1508 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1510 Check that:
1511 a. packet is really destinted to a multicast group
1512 b. packet is not a NULL-REGISTER
1513 c. packet is not truncated
1515 if (!ipv4_is_multicast(encap->daddr) ||
1516 encap->tot_len == 0 ||
1517 ntohs(encap->tot_len) + pimlen > skb->len)
1518 return 1;
1520 read_lock(&mrt_lock);
1521 if (net->ipv4.mroute_reg_vif_num >= 0)
1522 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1523 if (reg_dev)
1524 dev_hold(reg_dev);
1525 read_unlock(&mrt_lock);
1527 if (reg_dev == NULL)
1528 return 1;
1530 skb->mac_header = skb->network_header;
1531 skb_pull(skb, (u8*)encap - skb->data);
1532 skb_reset_network_header(skb);
1533 skb->dev = reg_dev;
1534 skb->protocol = htons(ETH_P_IP);
1535 skb->ip_summed = 0;
1536 skb->pkt_type = PACKET_HOST;
1537 skb_dst_drop(skb);
1538 reg_dev->stats.rx_bytes += skb->len;
1539 reg_dev->stats.rx_packets++;
1540 nf_reset(skb);
1541 netif_rx(skb);
1542 dev_put(reg_dev);
1544 return 0;
1546 #endif
1548 #ifdef CONFIG_IP_PIMSM_V1
1550 * Handle IGMP messages of PIMv1
1553 int pim_rcv_v1(struct sk_buff * skb)
1555 struct igmphdr *pim;
1556 struct net *net = dev_net(skb->dev);
1558 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1559 goto drop;
1561 pim = igmp_hdr(skb);
1563 if (!net->ipv4.mroute_do_pim ||
1564 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1565 goto drop;
1567 if (__pim_rcv(skb, sizeof(*pim))) {
1568 drop:
1569 kfree_skb(skb);
1571 return 0;
1573 #endif
1575 #ifdef CONFIG_IP_PIMSM_V2
1576 static int pim_rcv(struct sk_buff * skb)
1578 struct pimreghdr *pim;
1580 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1581 goto drop;
1583 pim = (struct pimreghdr *)skb_transport_header(skb);
1584 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1585 (pim->flags&PIM_NULL_REGISTER) ||
1586 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1587 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1588 goto drop;
1590 if (__pim_rcv(skb, sizeof(*pim))) {
1591 drop:
1592 kfree_skb(skb);
1594 return 0;
1596 #endif
1598 static int
1599 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1601 int ct;
1602 struct rtnexthop *nhp;
1603 struct net *net = mfc_net(c);
1604 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1605 u8 *b = skb_tail_pointer(skb);
1606 struct rtattr *mp_head;
1608 if (dev)
1609 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1611 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1613 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1614 if (c->mfc_un.res.ttls[ct] < 255) {
1615 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1616 goto rtattr_failure;
1617 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1618 nhp->rtnh_flags = 0;
1619 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1620 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1621 nhp->rtnh_len = sizeof(*nhp);
1624 mp_head->rta_type = RTA_MULTIPATH;
1625 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1626 rtm->rtm_type = RTN_MULTICAST;
1627 return 1;
1629 rtattr_failure:
1630 nlmsg_trim(skb, b);
1631 return -EMSGSIZE;
1634 int ipmr_get_route(struct net *net,
1635 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1637 int err;
1638 struct mfc_cache *cache;
1639 struct rtable *rt = skb_rtable(skb);
1641 read_lock(&mrt_lock);
1642 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1644 if (cache == NULL) {
1645 struct sk_buff *skb2;
1646 struct iphdr *iph;
1647 struct net_device *dev;
1648 int vif;
1650 if (nowait) {
1651 read_unlock(&mrt_lock);
1652 return -EAGAIN;
1655 dev = skb->dev;
1656 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1657 read_unlock(&mrt_lock);
1658 return -ENODEV;
1660 skb2 = skb_clone(skb, GFP_ATOMIC);
1661 if (!skb2) {
1662 read_unlock(&mrt_lock);
1663 return -ENOMEM;
1666 skb_push(skb2, sizeof(struct iphdr));
1667 skb_reset_network_header(skb2);
1668 iph = ip_hdr(skb2);
1669 iph->ihl = sizeof(struct iphdr) >> 2;
1670 iph->saddr = rt->rt_src;
1671 iph->daddr = rt->rt_dst;
1672 iph->version = 0;
1673 err = ipmr_cache_unresolved(net, vif, skb2);
1674 read_unlock(&mrt_lock);
1675 return err;
1678 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1679 cache->mfc_flags |= MFC_NOTIFY;
1680 err = ipmr_fill_mroute(skb, cache, rtm);
1681 read_unlock(&mrt_lock);
1682 return err;
1685 #ifdef CONFIG_PROC_FS
1687 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1689 struct ipmr_vif_iter {
1690 struct seq_net_private p;
1691 int ct;
1694 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1695 struct ipmr_vif_iter *iter,
1696 loff_t pos)
1698 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1699 if (!VIF_EXISTS(net, iter->ct))
1700 continue;
1701 if (pos-- == 0)
1702 return &net->ipv4.vif_table[iter->ct];
1704 return NULL;
1707 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1708 __acquires(mrt_lock)
1710 struct net *net = seq_file_net(seq);
1712 read_lock(&mrt_lock);
1713 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1714 : SEQ_START_TOKEN;
1717 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1719 struct ipmr_vif_iter *iter = seq->private;
1720 struct net *net = seq_file_net(seq);
1722 ++*pos;
1723 if (v == SEQ_START_TOKEN)
1724 return ipmr_vif_seq_idx(net, iter, 0);
1726 while (++iter->ct < net->ipv4.maxvif) {
1727 if (!VIF_EXISTS(net, iter->ct))
1728 continue;
1729 return &net->ipv4.vif_table[iter->ct];
1731 return NULL;
1734 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1735 __releases(mrt_lock)
1737 read_unlock(&mrt_lock);
1740 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1742 struct net *net = seq_file_net(seq);
1744 if (v == SEQ_START_TOKEN) {
1745 seq_puts(seq,
1746 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1747 } else {
1748 const struct vif_device *vif = v;
1749 const char *name = vif->dev ? vif->dev->name : "none";
1751 seq_printf(seq,
1752 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1753 vif - net->ipv4.vif_table,
1754 name, vif->bytes_in, vif->pkt_in,
1755 vif->bytes_out, vif->pkt_out,
1756 vif->flags, vif->local, vif->remote);
1758 return 0;
1761 static const struct seq_operations ipmr_vif_seq_ops = {
1762 .start = ipmr_vif_seq_start,
1763 .next = ipmr_vif_seq_next,
1764 .stop = ipmr_vif_seq_stop,
1765 .show = ipmr_vif_seq_show,
1768 static int ipmr_vif_open(struct inode *inode, struct file *file)
1770 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1771 sizeof(struct ipmr_vif_iter));
1774 static const struct file_operations ipmr_vif_fops = {
1775 .owner = THIS_MODULE,
1776 .open = ipmr_vif_open,
1777 .read = seq_read,
1778 .llseek = seq_lseek,
1779 .release = seq_release_net,
1782 struct ipmr_mfc_iter {
1783 struct seq_net_private p;
1784 struct mfc_cache **cache;
1785 int ct;
1789 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1790 struct ipmr_mfc_iter *it, loff_t pos)
1792 struct mfc_cache *mfc;
1794 it->cache = net->ipv4.mfc_cache_array;
1795 read_lock(&mrt_lock);
1796 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1797 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1798 mfc; mfc = mfc->next)
1799 if (pos-- == 0)
1800 return mfc;
1801 read_unlock(&mrt_lock);
1803 it->cache = &mfc_unres_queue;
1804 spin_lock_bh(&mfc_unres_lock);
1805 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1806 if (net_eq(mfc_net(mfc), net) &&
1807 pos-- == 0)
1808 return mfc;
1809 spin_unlock_bh(&mfc_unres_lock);
1811 it->cache = NULL;
1812 return NULL;
1816 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1818 struct ipmr_mfc_iter *it = seq->private;
1819 struct net *net = seq_file_net(seq);
1821 it->cache = NULL;
1822 it->ct = 0;
1823 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1824 : SEQ_START_TOKEN;
1827 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1829 struct mfc_cache *mfc = v;
1830 struct ipmr_mfc_iter *it = seq->private;
1831 struct net *net = seq_file_net(seq);
1833 ++*pos;
1835 if (v == SEQ_START_TOKEN)
1836 return ipmr_mfc_seq_idx(net, seq->private, 0);
1838 if (mfc->next)
1839 return mfc->next;
1841 if (it->cache == &mfc_unres_queue)
1842 goto end_of_list;
1844 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1846 while (++it->ct < MFC_LINES) {
1847 mfc = net->ipv4.mfc_cache_array[it->ct];
1848 if (mfc)
1849 return mfc;
1852 /* exhausted cache_array, show unresolved */
1853 read_unlock(&mrt_lock);
1854 it->cache = &mfc_unres_queue;
1855 it->ct = 0;
1857 spin_lock_bh(&mfc_unres_lock);
1858 mfc = mfc_unres_queue;
1859 while (mfc && !net_eq(mfc_net(mfc), net))
1860 mfc = mfc->next;
1861 if (mfc)
1862 return mfc;
1864 end_of_list:
1865 spin_unlock_bh(&mfc_unres_lock);
1866 it->cache = NULL;
1868 return NULL;
1871 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1873 struct ipmr_mfc_iter *it = seq->private;
1874 struct net *net = seq_file_net(seq);
1876 if (it->cache == &mfc_unres_queue)
1877 spin_unlock_bh(&mfc_unres_lock);
1878 else if (it->cache == net->ipv4.mfc_cache_array)
1879 read_unlock(&mrt_lock);
1882 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1884 int n;
1885 struct net *net = seq_file_net(seq);
1887 if (v == SEQ_START_TOKEN) {
1888 seq_puts(seq,
1889 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1890 } else {
1891 const struct mfc_cache *mfc = v;
1892 const struct ipmr_mfc_iter *it = seq->private;
1894 seq_printf(seq, "%08lX %08lX %-3hd",
1895 (unsigned long) mfc->mfc_mcastgrp,
1896 (unsigned long) mfc->mfc_origin,
1897 mfc->mfc_parent);
1899 if (it->cache != &mfc_unres_queue) {
1900 seq_printf(seq, " %8lu %8lu %8lu",
1901 mfc->mfc_un.res.pkt,
1902 mfc->mfc_un.res.bytes,
1903 mfc->mfc_un.res.wrong_if);
1904 for (n = mfc->mfc_un.res.minvif;
1905 n < mfc->mfc_un.res.maxvif; n++ ) {
1906 if (VIF_EXISTS(net, n) &&
1907 mfc->mfc_un.res.ttls[n] < 255)
1908 seq_printf(seq,
1909 " %2d:%-3d",
1910 n, mfc->mfc_un.res.ttls[n]);
1912 } else {
1913 /* unresolved mfc_caches don't contain
1914 * pkt, bytes and wrong_if values
1916 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1918 seq_putc(seq, '\n');
1920 return 0;
1923 static const struct seq_operations ipmr_mfc_seq_ops = {
1924 .start = ipmr_mfc_seq_start,
1925 .next = ipmr_mfc_seq_next,
1926 .stop = ipmr_mfc_seq_stop,
1927 .show = ipmr_mfc_seq_show,
1930 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1932 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1933 sizeof(struct ipmr_mfc_iter));
1936 static const struct file_operations ipmr_mfc_fops = {
1937 .owner = THIS_MODULE,
1938 .open = ipmr_mfc_open,
1939 .read = seq_read,
1940 .llseek = seq_lseek,
1941 .release = seq_release_net,
1943 #endif
1945 #ifdef CONFIG_IP_PIMSM_V2
1946 static const struct net_protocol pim_protocol = {
1947 .handler = pim_rcv,
1948 .netns_ok = 1,
1950 #endif
1954 * Setup for IP multicast routing
1956 static int __net_init ipmr_net_init(struct net *net)
1958 int err = 0;
1960 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1961 GFP_KERNEL);
1962 if (!net->ipv4.vif_table) {
1963 err = -ENOMEM;
1964 goto fail;
1967 /* Forwarding cache */
1968 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1969 sizeof(struct mfc_cache *),
1970 GFP_KERNEL);
1971 if (!net->ipv4.mfc_cache_array) {
1972 err = -ENOMEM;
1973 goto fail_mfc_cache;
1976 #ifdef CONFIG_IP_PIMSM
1977 net->ipv4.mroute_reg_vif_num = -1;
1978 #endif
1980 #ifdef CONFIG_PROC_FS
1981 err = -ENOMEM;
1982 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1983 goto proc_vif_fail;
1984 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1985 goto proc_cache_fail;
1986 #endif
1987 return 0;
1989 #ifdef CONFIG_PROC_FS
1990 proc_cache_fail:
1991 proc_net_remove(net, "ip_mr_vif");
1992 proc_vif_fail:
1993 kfree(net->ipv4.mfc_cache_array);
1994 #endif
1995 fail_mfc_cache:
1996 kfree(net->ipv4.vif_table);
1997 fail:
1998 return err;
2001 static void __net_exit ipmr_net_exit(struct net *net)
2003 #ifdef CONFIG_PROC_FS
2004 proc_net_remove(net, "ip_mr_cache");
2005 proc_net_remove(net, "ip_mr_vif");
2006 #endif
2007 kfree(net->ipv4.mfc_cache_array);
2008 kfree(net->ipv4.vif_table);
2011 static struct pernet_operations ipmr_net_ops = {
2012 .init = ipmr_net_init,
2013 .exit = ipmr_net_exit,
2016 int __init ip_mr_init(void)
2018 int err;
2020 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2021 sizeof(struct mfc_cache),
2022 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2023 NULL);
2024 if (!mrt_cachep)
2025 return -ENOMEM;
2027 err = register_pernet_subsys(&ipmr_net_ops);
2028 if (err)
2029 goto reg_pernet_fail;
2031 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2032 err = register_netdevice_notifier(&ip_mr_notifier);
2033 if (err)
2034 goto reg_notif_fail;
2035 #ifdef CONFIG_IP_PIMSM_V2
2036 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2037 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2038 err = -EAGAIN;
2039 goto add_proto_fail;
2041 #endif
2042 return 0;
2044 #ifdef CONFIG_IP_PIMSM_V2
2045 add_proto_fail:
2046 unregister_netdevice_notifier(&ip_mr_notifier);
2047 #endif
2048 reg_notif_fail:
2049 del_timer(&ipmr_expire_timer);
2050 unregister_pernet_subsys(&ipmr_net_ops);
2051 reg_pernet_fail:
2052 kmem_cache_destroy(mrt_cachep);
2053 return err;