ipv6: ip6mr: move mroute data into seperate structure
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ip6mr.c
blob9419fceeed411d49d804c8cc6004587db79ab23c
1 /*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_route.h>
48 #include <linux/mroute6.h>
49 #include <linux/pim.h>
50 #include <net/addrconf.h>
51 #include <linux/netfilter_ipv6.h>
52 #include <net/ip6_checksum.h>
54 struct mr6_table {
55 #ifdef CONFIG_NET_NS
56 struct net *net;
57 #endif
58 struct sock *mroute6_sk;
59 struct timer_list ipmr_expire_timer;
60 struct list_head mfc6_unres_queue;
61 struct list_head mfc6_cache_array[MFC6_LINES];
62 struct mif_device vif6_table[MAXMIFS];
63 int maxvif;
64 atomic_t cache_resolve_queue_len;
65 int mroute_do_assert;
66 int mroute_do_pim;
67 #ifdef CONFIG_IPV6_PIMSM_V2
68 int mroute_reg_vif_num;
69 #endif
72 /* Big lock, protecting vif table, mrt cache and mroute socket state.
73 Note that the changes are semaphored via rtnl_lock.
76 static DEFINE_RWLOCK(mrt_lock);
79 * Multicast router control variables
82 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
87 /* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
92 In this case data path is free of exclusive locks at all.
95 static struct kmem_cache *mrt_cachep __read_mostly;
97 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
98 struct sk_buff *skb, struct mfc6_cache *cache);
99 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
100 mifi_t mifi, int assert);
101 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
102 struct mfc6_cache *c, struct rtmsg *rtm);
103 static void mroute_clean_tables(struct mr6_table *mrt);
106 #ifdef CONFIG_PROC_FS
108 struct ipmr_mfc_iter {
109 struct seq_net_private p;
110 struct list_head *cache;
111 int ct;
115 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
116 struct ipmr_mfc_iter *it, loff_t pos)
118 struct mr6_table *mrt = net->ipv6.mrt6;
119 struct mfc6_cache *mfc;
121 read_lock(&mrt_lock);
122 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
123 it->cache = &mrt->mfc6_cache_array[it->ct];
124 list_for_each_entry(mfc, it->cache, list)
125 if (pos-- == 0)
126 return mfc;
128 read_unlock(&mrt_lock);
130 spin_lock_bh(&mfc_unres_lock);
131 it->cache = &mrt->mfc6_unres_queue;
132 list_for_each_entry(mfc, it->cache, list)
133 if (pos-- == 0)
134 return mfc;
135 spin_unlock_bh(&mfc_unres_lock);
137 it->cache = NULL;
138 return NULL;
142 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
145 struct ipmr_vif_iter {
146 struct seq_net_private p;
147 int ct;
150 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
151 struct ipmr_vif_iter *iter,
152 loff_t pos)
154 struct mr6_table *mrt = net->ipv6.mrt6;
156 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
157 if (!MIF_EXISTS(mrt, iter->ct))
158 continue;
159 if (pos-- == 0)
160 return &mrt->vif6_table[iter->ct];
162 return NULL;
165 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
166 __acquires(mrt_lock)
168 struct net *net = seq_file_net(seq);
170 read_lock(&mrt_lock);
171 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
172 : SEQ_START_TOKEN;
175 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
177 struct ipmr_vif_iter *iter = seq->private;
178 struct net *net = seq_file_net(seq);
179 struct mr6_table *mrt = net->ipv6.mrt6;
181 ++*pos;
182 if (v == SEQ_START_TOKEN)
183 return ip6mr_vif_seq_idx(net, iter, 0);
185 while (++iter->ct < mrt->maxvif) {
186 if (!MIF_EXISTS(mrt, iter->ct))
187 continue;
188 return &mrt->vif6_table[iter->ct];
190 return NULL;
193 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
194 __releases(mrt_lock)
196 read_unlock(&mrt_lock);
199 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
201 struct net *net = seq_file_net(seq);
202 struct mr6_table *mrt = net->ipv6.mrt6;
204 if (v == SEQ_START_TOKEN) {
205 seq_puts(seq,
206 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
207 } else {
208 const struct mif_device *vif = v;
209 const char *name = vif->dev ? vif->dev->name : "none";
211 seq_printf(seq,
212 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
213 vif - mrt->vif6_table,
214 name, vif->bytes_in, vif->pkt_in,
215 vif->bytes_out, vif->pkt_out,
216 vif->flags);
218 return 0;
221 static const struct seq_operations ip6mr_vif_seq_ops = {
222 .start = ip6mr_vif_seq_start,
223 .next = ip6mr_vif_seq_next,
224 .stop = ip6mr_vif_seq_stop,
225 .show = ip6mr_vif_seq_show,
228 static int ip6mr_vif_open(struct inode *inode, struct file *file)
230 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
231 sizeof(struct ipmr_vif_iter));
234 static const struct file_operations ip6mr_vif_fops = {
235 .owner = THIS_MODULE,
236 .open = ip6mr_vif_open,
237 .read = seq_read,
238 .llseek = seq_lseek,
239 .release = seq_release_net,
242 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
244 struct net *net = seq_file_net(seq);
246 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
247 : SEQ_START_TOKEN;
250 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
252 struct mfc6_cache *mfc = v;
253 struct ipmr_mfc_iter *it = seq->private;
254 struct net *net = seq_file_net(seq);
255 struct mr6_table *mrt = net->ipv6.mrt6;
257 ++*pos;
259 if (v == SEQ_START_TOKEN)
260 return ipmr_mfc_seq_idx(net, seq->private, 0);
262 if (mfc->list.next != it->cache)
263 return list_entry(mfc->list.next, struct mfc6_cache, list);
265 if (it->cache == &mrt->mfc6_unres_queue)
266 goto end_of_list;
268 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
270 while (++it->ct < MFC6_LINES) {
271 it->cache = &mrt->mfc6_cache_array[it->ct];
272 if (list_empty(it->cache))
273 continue;
274 return list_first_entry(it->cache, struct mfc6_cache, list);
277 /* exhausted cache_array, show unresolved */
278 read_unlock(&mrt_lock);
279 it->cache = &mrt->mfc6_unres_queue;
280 it->ct = 0;
282 spin_lock_bh(&mfc_unres_lock);
283 if (!list_empty(it->cache))
284 return list_first_entry(it->cache, struct mfc6_cache, list);
286 end_of_list:
287 spin_unlock_bh(&mfc_unres_lock);
288 it->cache = NULL;
290 return NULL;
293 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
295 struct ipmr_mfc_iter *it = seq->private;
296 struct net *net = seq_file_net(seq);
297 struct mr6_table *mrt = net->ipv6.mrt6;
299 if (it->cache == &mrt->mfc6_unres_queue)
300 spin_unlock_bh(&mfc_unres_lock);
301 else if (it->cache == mrt->mfc6_cache_array)
302 read_unlock(&mrt_lock);
305 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
307 int n;
308 struct net *net = seq_file_net(seq);
309 struct mr6_table *mrt = net->ipv6.mrt6;
311 if (v == SEQ_START_TOKEN) {
312 seq_puts(seq,
313 "Group "
314 "Origin "
315 "Iif Pkts Bytes Wrong Oifs\n");
316 } else {
317 const struct mfc6_cache *mfc = v;
318 const struct ipmr_mfc_iter *it = seq->private;
320 seq_printf(seq, "%pI6 %pI6 %-3hd",
321 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
322 mfc->mf6c_parent);
324 if (it->cache != &mrt->mfc6_unres_queue) {
325 seq_printf(seq, " %8lu %8lu %8lu",
326 mfc->mfc_un.res.pkt,
327 mfc->mfc_un.res.bytes,
328 mfc->mfc_un.res.wrong_if);
329 for (n = mfc->mfc_un.res.minvif;
330 n < mfc->mfc_un.res.maxvif; n++) {
331 if (MIF_EXISTS(mrt, n) &&
332 mfc->mfc_un.res.ttls[n] < 255)
333 seq_printf(seq,
334 " %2d:%-3d",
335 n, mfc->mfc_un.res.ttls[n]);
337 } else {
338 /* unresolved mfc_caches don't contain
339 * pkt, bytes and wrong_if values
341 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
343 seq_putc(seq, '\n');
345 return 0;
348 static const struct seq_operations ipmr_mfc_seq_ops = {
349 .start = ipmr_mfc_seq_start,
350 .next = ipmr_mfc_seq_next,
351 .stop = ipmr_mfc_seq_stop,
352 .show = ipmr_mfc_seq_show,
355 static int ipmr_mfc_open(struct inode *inode, struct file *file)
357 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
358 sizeof(struct ipmr_mfc_iter));
361 static const struct file_operations ip6mr_mfc_fops = {
362 .owner = THIS_MODULE,
363 .open = ipmr_mfc_open,
364 .read = seq_read,
365 .llseek = seq_lseek,
366 .release = seq_release_net,
368 #endif
370 #ifdef CONFIG_IPV6_PIMSM_V2
372 static int pim6_rcv(struct sk_buff *skb)
374 struct pimreghdr *pim;
375 struct ipv6hdr *encap;
376 struct net_device *reg_dev = NULL;
377 struct net *net = dev_net(skb->dev);
378 struct mr6_table *mrt = net->ipv6.mrt6;
379 int reg_vif_num = mrt->mroute_reg_vif_num;
381 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
382 goto drop;
384 pim = (struct pimreghdr *)skb_transport_header(skb);
385 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
386 (pim->flags & PIM_NULL_REGISTER) ||
387 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
388 sizeof(*pim), IPPROTO_PIM,
389 csum_partial((void *)pim, sizeof(*pim), 0)) &&
390 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
391 goto drop;
393 /* check if the inner packet is destined to mcast group */
394 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
395 sizeof(*pim));
397 if (!ipv6_addr_is_multicast(&encap->daddr) ||
398 encap->payload_len == 0 ||
399 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
400 goto drop;
402 read_lock(&mrt_lock);
403 if (reg_vif_num >= 0)
404 reg_dev = mrt->vif6_table[reg_vif_num].dev;
405 if (reg_dev)
406 dev_hold(reg_dev);
407 read_unlock(&mrt_lock);
409 if (reg_dev == NULL)
410 goto drop;
412 skb->mac_header = skb->network_header;
413 skb_pull(skb, (u8 *)encap - skb->data);
414 skb_reset_network_header(skb);
415 skb->dev = reg_dev;
416 skb->protocol = htons(ETH_P_IPV6);
417 skb->ip_summed = 0;
418 skb->pkt_type = PACKET_HOST;
419 skb_dst_drop(skb);
420 reg_dev->stats.rx_bytes += skb->len;
421 reg_dev->stats.rx_packets++;
422 nf_reset(skb);
423 netif_rx(skb);
424 dev_put(reg_dev);
425 return 0;
426 drop:
427 kfree_skb(skb);
428 return 0;
431 static const struct inet6_protocol pim6_protocol = {
432 .handler = pim6_rcv,
435 /* Service routines creating virtual interfaces: PIMREG */
437 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
438 struct net_device *dev)
440 struct net *net = dev_net(dev);
441 struct mr6_table *mrt = net->ipv6.mrt6;
443 read_lock(&mrt_lock);
444 dev->stats.tx_bytes += skb->len;
445 dev->stats.tx_packets++;
446 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
447 read_unlock(&mrt_lock);
448 kfree_skb(skb);
449 return NETDEV_TX_OK;
452 static const struct net_device_ops reg_vif_netdev_ops = {
453 .ndo_start_xmit = reg_vif_xmit,
456 static void reg_vif_setup(struct net_device *dev)
458 dev->type = ARPHRD_PIMREG;
459 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
460 dev->flags = IFF_NOARP;
461 dev->netdev_ops = &reg_vif_netdev_ops;
462 dev->destructor = free_netdev;
463 dev->features |= NETIF_F_NETNS_LOCAL;
466 static struct net_device *ip6mr_reg_vif(struct net *net)
468 struct net_device *dev;
470 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
471 if (dev == NULL)
472 return NULL;
474 dev_net_set(dev, net);
476 if (register_netdevice(dev)) {
477 free_netdev(dev);
478 return NULL;
480 dev->iflink = 0;
482 if (dev_open(dev))
483 goto failure;
485 dev_hold(dev);
486 return dev;
488 failure:
489 /* allow the register to be completed before unregistering. */
490 rtnl_unlock();
491 rtnl_lock();
493 unregister_netdevice(dev);
494 return NULL;
496 #endif
499 * Delete a VIF entry
502 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
504 struct mif_device *v;
505 struct net_device *dev;
506 struct inet6_dev *in6_dev;
508 if (vifi < 0 || vifi >= mrt->maxvif)
509 return -EADDRNOTAVAIL;
511 v = &mrt->vif6_table[vifi];
513 write_lock_bh(&mrt_lock);
514 dev = v->dev;
515 v->dev = NULL;
517 if (!dev) {
518 write_unlock_bh(&mrt_lock);
519 return -EADDRNOTAVAIL;
522 #ifdef CONFIG_IPV6_PIMSM_V2
523 if (vifi == mrt->mroute_reg_vif_num)
524 mrt->mroute_reg_vif_num = -1;
525 #endif
527 if (vifi + 1 == mrt->maxvif) {
528 int tmp;
529 for (tmp = vifi - 1; tmp >= 0; tmp--) {
530 if (MIF_EXISTS(mrt, tmp))
531 break;
533 mrt->maxvif = tmp + 1;
536 write_unlock_bh(&mrt_lock);
538 dev_set_allmulti(dev, -1);
540 in6_dev = __in6_dev_get(dev);
541 if (in6_dev)
542 in6_dev->cnf.mc_forwarding--;
544 if (v->flags & MIFF_REGISTER)
545 unregister_netdevice_queue(dev, head);
547 dev_put(dev);
548 return 0;
551 static inline void ip6mr_cache_free(struct mfc6_cache *c)
553 kmem_cache_free(mrt_cachep, c);
556 /* Destroy an unresolved cache entry, killing queued skbs
557 and reporting error to netlink readers.
560 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
562 struct net *net = read_pnet(&mrt->net);
563 struct sk_buff *skb;
565 atomic_dec(&mrt->cache_resolve_queue_len);
567 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
568 if (ipv6_hdr(skb)->version == 0) {
569 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
570 nlh->nlmsg_type = NLMSG_ERROR;
571 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
572 skb_trim(skb, nlh->nlmsg_len);
573 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
574 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
575 } else
576 kfree_skb(skb);
579 ip6mr_cache_free(c);
583 /* Timer process for all the unresolved queue. */
585 static void ipmr_do_expire_process(struct mr6_table *mrt)
587 unsigned long now = jiffies;
588 unsigned long expires = 10 * HZ;
589 struct mfc6_cache *c, *next;
591 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
592 if (time_after(c->mfc_un.unres.expires, now)) {
593 /* not yet... */
594 unsigned long interval = c->mfc_un.unres.expires - now;
595 if (interval < expires)
596 expires = interval;
597 continue;
600 list_del(&c->list);
601 ip6mr_destroy_unres(mrt, c);
604 if (!list_empty(&mrt->mfc6_unres_queue))
605 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
608 static void ipmr_expire_process(unsigned long arg)
610 struct mr6_table *mrt = (struct mr6_table *)arg;
612 if (!spin_trylock(&mfc_unres_lock)) {
613 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
614 return;
617 if (!list_empty(&mrt->mfc6_unres_queue))
618 ipmr_do_expire_process(mrt);
620 spin_unlock(&mfc_unres_lock);
623 /* Fill oifs list. It is called under write locked mrt_lock. */
625 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
626 unsigned char *ttls)
628 int vifi;
630 cache->mfc_un.res.minvif = MAXMIFS;
631 cache->mfc_un.res.maxvif = 0;
632 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
634 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
635 if (MIF_EXISTS(mrt, vifi) &&
636 ttls[vifi] && ttls[vifi] < 255) {
637 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
638 if (cache->mfc_un.res.minvif > vifi)
639 cache->mfc_un.res.minvif = vifi;
640 if (cache->mfc_un.res.maxvif <= vifi)
641 cache->mfc_un.res.maxvif = vifi + 1;
646 static int mif6_add(struct net *net, struct mr6_table *mrt,
647 struct mif6ctl *vifc, int mrtsock)
649 int vifi = vifc->mif6c_mifi;
650 struct mif_device *v = &mrt->vif6_table[vifi];
651 struct net_device *dev;
652 struct inet6_dev *in6_dev;
653 int err;
655 /* Is vif busy ? */
656 if (MIF_EXISTS(mrt, vifi))
657 return -EADDRINUSE;
659 switch (vifc->mif6c_flags) {
660 #ifdef CONFIG_IPV6_PIMSM_V2
661 case MIFF_REGISTER:
663 * Special Purpose VIF in PIM
664 * All the packets will be sent to the daemon
666 if (mrt->mroute_reg_vif_num >= 0)
667 return -EADDRINUSE;
668 dev = ip6mr_reg_vif(net);
669 if (!dev)
670 return -ENOBUFS;
671 err = dev_set_allmulti(dev, 1);
672 if (err) {
673 unregister_netdevice(dev);
674 dev_put(dev);
675 return err;
677 break;
678 #endif
679 case 0:
680 dev = dev_get_by_index(net, vifc->mif6c_pifi);
681 if (!dev)
682 return -EADDRNOTAVAIL;
683 err = dev_set_allmulti(dev, 1);
684 if (err) {
685 dev_put(dev);
686 return err;
688 break;
689 default:
690 return -EINVAL;
693 in6_dev = __in6_dev_get(dev);
694 if (in6_dev)
695 in6_dev->cnf.mc_forwarding++;
698 * Fill in the VIF structures
700 v->rate_limit = vifc->vifc_rate_limit;
701 v->flags = vifc->mif6c_flags;
702 if (!mrtsock)
703 v->flags |= VIFF_STATIC;
704 v->threshold = vifc->vifc_threshold;
705 v->bytes_in = 0;
706 v->bytes_out = 0;
707 v->pkt_in = 0;
708 v->pkt_out = 0;
709 v->link = dev->ifindex;
710 if (v->flags & MIFF_REGISTER)
711 v->link = dev->iflink;
713 /* And finish update writing critical data */
714 write_lock_bh(&mrt_lock);
715 v->dev = dev;
716 #ifdef CONFIG_IPV6_PIMSM_V2
717 if (v->flags & MIFF_REGISTER)
718 mrt->mroute_reg_vif_num = vifi;
719 #endif
720 if (vifi + 1 > mrt->maxvif)
721 mrt->maxvif = vifi + 1;
722 write_unlock_bh(&mrt_lock);
723 return 0;
726 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
727 struct in6_addr *origin,
728 struct in6_addr *mcastgrp)
730 int line = MFC6_HASH(mcastgrp, origin);
731 struct mfc6_cache *c;
733 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
734 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
735 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
736 return c;
738 return NULL;
742 * Allocate a multicast cache entry
744 static struct mfc6_cache *ip6mr_cache_alloc(void)
746 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
747 if (c == NULL)
748 return NULL;
749 c->mfc_un.res.minvif = MAXMIFS;
750 return c;
753 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
755 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
756 if (c == NULL)
757 return NULL;
758 skb_queue_head_init(&c->mfc_un.unres.unresolved);
759 c->mfc_un.unres.expires = jiffies + 10 * HZ;
760 return c;
764 * A cache entry has gone into a resolved state from queued
767 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
768 struct mfc6_cache *uc, struct mfc6_cache *c)
770 struct sk_buff *skb;
773 * Play the pending entries through our router
776 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
777 if (ipv6_hdr(skb)->version == 0) {
778 int err;
779 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
781 if (ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
782 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
783 } else {
784 nlh->nlmsg_type = NLMSG_ERROR;
785 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
786 skb_trim(skb, nlh->nlmsg_len);
787 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
789 err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
790 } else
791 ip6_mr_forward(net, mrt, skb, c);
796 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
797 * expects the following bizarre scheme.
799 * Called under mrt_lock.
802 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
803 mifi_t mifi, int assert)
805 struct sk_buff *skb;
806 struct mrt6msg *msg;
807 int ret;
809 #ifdef CONFIG_IPV6_PIMSM_V2
810 if (assert == MRT6MSG_WHOLEPKT)
811 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
812 +sizeof(*msg));
813 else
814 #endif
815 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
817 if (!skb)
818 return -ENOBUFS;
820 /* I suppose that internal messages
821 * do not require checksums */
823 skb->ip_summed = CHECKSUM_UNNECESSARY;
825 #ifdef CONFIG_IPV6_PIMSM_V2
826 if (assert == MRT6MSG_WHOLEPKT) {
827 /* Ugly, but we have no choice with this interface.
828 Duplicate old header, fix length etc.
829 And all this only to mangle msg->im6_msgtype and
830 to set msg->im6_mbz to "mbz" :-)
832 skb_push(skb, -skb_network_offset(pkt));
834 skb_push(skb, sizeof(*msg));
835 skb_reset_transport_header(skb);
836 msg = (struct mrt6msg *)skb_transport_header(skb);
837 msg->im6_mbz = 0;
838 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
839 msg->im6_mif = mrt->mroute_reg_vif_num;
840 msg->im6_pad = 0;
841 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
842 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
844 skb->ip_summed = CHECKSUM_UNNECESSARY;
845 } else
846 #endif
849 * Copy the IP header
852 skb_put(skb, sizeof(struct ipv6hdr));
853 skb_reset_network_header(skb);
854 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
857 * Add our header
859 skb_put(skb, sizeof(*msg));
860 skb_reset_transport_header(skb);
861 msg = (struct mrt6msg *)skb_transport_header(skb);
863 msg->im6_mbz = 0;
864 msg->im6_msgtype = assert;
865 msg->im6_mif = mifi;
866 msg->im6_pad = 0;
867 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
868 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
870 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
871 skb->ip_summed = CHECKSUM_UNNECESSARY;
874 if (mrt->mroute6_sk == NULL) {
875 kfree_skb(skb);
876 return -EINVAL;
880 * Deliver to user space multicast routing algorithms
882 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
883 if (ret < 0) {
884 if (net_ratelimit())
885 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
886 kfree_skb(skb);
889 return ret;
893 * Queue a packet for resolution. It gets locked cache entry!
896 static int
897 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
899 bool found = false;
900 int err;
901 struct mfc6_cache *c;
903 spin_lock_bh(&mfc_unres_lock);
904 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
905 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
906 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
907 found = true;
908 break;
912 if (!found) {
914 * Create a new entry if allowable
917 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
918 (c = ip6mr_cache_alloc_unres()) == NULL) {
919 spin_unlock_bh(&mfc_unres_lock);
921 kfree_skb(skb);
922 return -ENOBUFS;
926 * Fill in the new cache entry
928 c->mf6c_parent = -1;
929 c->mf6c_origin = ipv6_hdr(skb)->saddr;
930 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
933 * Reflect first query at pim6sd
935 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
936 if (err < 0) {
937 /* If the report failed throw the cache entry
938 out - Brad Parker
940 spin_unlock_bh(&mfc_unres_lock);
942 ip6mr_cache_free(c);
943 kfree_skb(skb);
944 return err;
947 atomic_inc(&mrt->cache_resolve_queue_len);
948 list_add(&c->list, &mrt->mfc6_unres_queue);
950 ipmr_do_expire_process(mrt);
954 * See if we can append the packet
956 if (c->mfc_un.unres.unresolved.qlen > 3) {
957 kfree_skb(skb);
958 err = -ENOBUFS;
959 } else {
960 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
961 err = 0;
964 spin_unlock_bh(&mfc_unres_lock);
965 return err;
969 * MFC6 cache manipulation by user space
972 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
974 int line;
975 struct mfc6_cache *c, *next;
977 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
979 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
980 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
981 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
982 write_lock_bh(&mrt_lock);
983 list_del(&c->list);
984 write_unlock_bh(&mrt_lock);
986 ip6mr_cache_free(c);
987 return 0;
990 return -ENOENT;
993 static int ip6mr_device_event(struct notifier_block *this,
994 unsigned long event, void *ptr)
996 struct net_device *dev = ptr;
997 struct net *net = dev_net(dev);
998 struct mr6_table *mrt = net->ipv6.mrt6;
999 struct mif_device *v;
1000 int ct;
1001 LIST_HEAD(list);
1003 if (event != NETDEV_UNREGISTER)
1004 return NOTIFY_DONE;
1006 v = &mrt->vif6_table[0];
1007 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1008 if (v->dev == dev)
1009 mif6_delete(mrt, ct, &list);
1011 unregister_netdevice_many(&list);
1013 return NOTIFY_DONE;
1016 static struct notifier_block ip6_mr_notifier = {
1017 .notifier_call = ip6mr_device_event
1021 * Setup for IP multicast routing
1024 static int __net_init ip6mr_net_init(struct net *net)
1026 struct mr6_table *mrt;
1027 unsigned int i;
1028 int err = 0;
1030 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
1031 if (mrt == NULL) {
1032 err = -ENOMEM;
1033 goto fail;
1036 write_pnet(&mrt->net, net);
1038 for (i = 0; i < MFC6_LINES; i++)
1039 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
1041 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
1043 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
1044 (unsigned long)mrt);
1046 #ifdef CONFIG_IPV6_PIMSM_V2
1047 mrt->mroute_reg_vif_num = -1;
1048 #endif
1050 #ifdef CONFIG_PROC_FS
1051 err = -ENOMEM;
1052 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1053 goto proc_vif_fail;
1054 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1055 goto proc_cache_fail;
1056 #endif
1058 net->ipv6.mrt6 = mrt;
1059 return 0;
1061 #ifdef CONFIG_PROC_FS
1062 proc_cache_fail:
1063 proc_net_remove(net, "ip6_mr_vif");
1064 proc_vif_fail:
1065 kfree(mrt);
1066 #endif
1067 fail:
1068 return err;
1071 static void __net_exit ip6mr_net_exit(struct net *net)
1073 struct mr6_table *mrt = net->ipv6.mrt6;
1075 #ifdef CONFIG_PROC_FS
1076 proc_net_remove(net, "ip6_mr_cache");
1077 proc_net_remove(net, "ip6_mr_vif");
1078 #endif
1079 del_timer(&mrt->ipmr_expire_timer);
1080 mroute_clean_tables(mrt);
1081 kfree(mrt);
1084 static struct pernet_operations ip6mr_net_ops = {
1085 .init = ip6mr_net_init,
1086 .exit = ip6mr_net_exit,
1089 int __init ip6_mr_init(void)
1091 int err;
1093 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1094 sizeof(struct mfc6_cache),
1095 0, SLAB_HWCACHE_ALIGN,
1096 NULL);
1097 if (!mrt_cachep)
1098 return -ENOMEM;
1100 err = register_pernet_subsys(&ip6mr_net_ops);
1101 if (err)
1102 goto reg_pernet_fail;
1104 err = register_netdevice_notifier(&ip6_mr_notifier);
1105 if (err)
1106 goto reg_notif_fail;
1107 #ifdef CONFIG_IPV6_PIMSM_V2
1108 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1109 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1110 err = -EAGAIN;
1111 goto add_proto_fail;
1113 #endif
1114 return 0;
1115 #ifdef CONFIG_IPV6_PIMSM_V2
1116 add_proto_fail:
1117 unregister_netdevice_notifier(&ip6_mr_notifier);
1118 #endif
1119 reg_notif_fail:
1120 unregister_pernet_subsys(&ip6mr_net_ops);
1121 reg_pernet_fail:
1122 kmem_cache_destroy(mrt_cachep);
1123 return err;
1126 void ip6_mr_cleanup(void)
1128 unregister_netdevice_notifier(&ip6_mr_notifier);
1129 unregister_pernet_subsys(&ip6mr_net_ops);
1130 kmem_cache_destroy(mrt_cachep);
1133 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1134 struct mf6cctl *mfc, int mrtsock)
1136 bool found = false;
1137 int line;
1138 struct mfc6_cache *uc, *c;
1139 unsigned char ttls[MAXMIFS];
1140 int i;
1142 if (mfc->mf6cc_parent >= MAXMIFS)
1143 return -ENFILE;
1145 memset(ttls, 255, MAXMIFS);
1146 for (i = 0; i < MAXMIFS; i++) {
1147 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1148 ttls[i] = 1;
1152 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1154 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1155 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1156 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1157 found = true;
1158 break;
1162 if (found) {
1163 write_lock_bh(&mrt_lock);
1164 c->mf6c_parent = mfc->mf6cc_parent;
1165 ip6mr_update_thresholds(mrt, c, ttls);
1166 if (!mrtsock)
1167 c->mfc_flags |= MFC_STATIC;
1168 write_unlock_bh(&mrt_lock);
1169 return 0;
1172 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1173 return -EINVAL;
1175 c = ip6mr_cache_alloc();
1176 if (c == NULL)
1177 return -ENOMEM;
1179 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1180 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1181 c->mf6c_parent = mfc->mf6cc_parent;
1182 ip6mr_update_thresholds(mrt, c, ttls);
1183 if (!mrtsock)
1184 c->mfc_flags |= MFC_STATIC;
1186 write_lock_bh(&mrt_lock);
1187 list_add(&c->list, &mrt->mfc6_cache_array[line]);
1188 write_unlock_bh(&mrt_lock);
1191 * Check to see if we resolved a queued list. If so we
1192 * need to send on the frames and tidy up.
1194 found = false;
1195 spin_lock_bh(&mfc_unres_lock);
1196 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1197 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1198 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1199 list_del(&uc->list);
1200 atomic_dec(&mrt->cache_resolve_queue_len);
1201 found = true;
1202 break;
1205 if (list_empty(&mrt->mfc6_unres_queue))
1206 del_timer(&mrt->ipmr_expire_timer);
1207 spin_unlock_bh(&mfc_unres_lock);
1209 if (found) {
1210 ip6mr_cache_resolve(net, mrt, uc, c);
1211 ip6mr_cache_free(uc);
1213 return 0;
1217 * Close the multicast socket, and clear the vif tables etc
1220 static void mroute_clean_tables(struct mr6_table *mrt)
1222 int i;
1223 LIST_HEAD(list);
1224 struct mfc6_cache *c, *next;
1227 * Shut down all active vif entries
1229 for (i = 0; i < mrt->maxvif; i++) {
1230 if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1231 mif6_delete(mrt, i, &list);
1233 unregister_netdevice_many(&list);
1236 * Wipe the cache
1238 for (i = 0; i < MFC6_LINES; i++) {
1239 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1240 if (c->mfc_flags & MFC_STATIC)
1241 continue;
1242 write_lock_bh(&mrt_lock);
1243 list_del(&c->list);
1244 write_unlock_bh(&mrt_lock);
1246 ip6mr_cache_free(c);
1250 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1251 spin_lock_bh(&mfc_unres_lock);
1252 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1253 list_del(&c->list);
1254 ip6mr_destroy_unres(mrt, c);
1256 spin_unlock_bh(&mfc_unres_lock);
1260 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1262 int err = 0;
1263 struct net *net = sock_net(sk);
1265 rtnl_lock();
1266 write_lock_bh(&mrt_lock);
1267 if (likely(mrt->mroute6_sk == NULL)) {
1268 mrt->mroute6_sk = sk;
1269 net->ipv6.devconf_all->mc_forwarding++;
1271 else
1272 err = -EADDRINUSE;
1273 write_unlock_bh(&mrt_lock);
1275 rtnl_unlock();
1277 return err;
1280 int ip6mr_sk_done(struct sock *sk)
1282 int err = 0;
1283 struct net *net = sock_net(sk);
1284 struct mr6_table *mrt = net->ipv6.mrt6;
1286 rtnl_lock();
1287 if (sk == mrt->mroute6_sk) {
1288 write_lock_bh(&mrt_lock);
1289 mrt->mroute6_sk = NULL;
1290 net->ipv6.devconf_all->mc_forwarding--;
1291 write_unlock_bh(&mrt_lock);
1293 mroute_clean_tables(mrt);
1294 } else
1295 err = -EACCES;
1296 rtnl_unlock();
1298 return err;
1301 struct sock *mroute6_socket(struct net *net)
1303 struct mr6_table *mrt = net->ipv6.mrt6;
1305 return mrt->mroute6_sk;
1309 * Socket options and virtual interface manipulation. The whole
1310 * virtual interface system is a complete heap, but unfortunately
1311 * that's how BSD mrouted happens to think. Maybe one day with a proper
1312 * MOSPF/PIM router set up we can clean this up.
1315 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1317 int ret;
1318 struct mif6ctl vif;
1319 struct mf6cctl mfc;
1320 mifi_t mifi;
1321 struct net *net = sock_net(sk);
1322 struct mr6_table *mrt = net->ipv6.mrt6;
1324 if (optname != MRT6_INIT) {
1325 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1326 return -EACCES;
1329 switch (optname) {
1330 case MRT6_INIT:
1331 if (sk->sk_type != SOCK_RAW ||
1332 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1333 return -EOPNOTSUPP;
1334 if (optlen < sizeof(int))
1335 return -EINVAL;
1337 return ip6mr_sk_init(mrt, sk);
1339 case MRT6_DONE:
1340 return ip6mr_sk_done(sk);
1342 case MRT6_ADD_MIF:
1343 if (optlen < sizeof(vif))
1344 return -EINVAL;
1345 if (copy_from_user(&vif, optval, sizeof(vif)))
1346 return -EFAULT;
1347 if (vif.mif6c_mifi >= MAXMIFS)
1348 return -ENFILE;
1349 rtnl_lock();
1350 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1351 rtnl_unlock();
1352 return ret;
1354 case MRT6_DEL_MIF:
1355 if (optlen < sizeof(mifi_t))
1356 return -EINVAL;
1357 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1358 return -EFAULT;
1359 rtnl_lock();
1360 ret = mif6_delete(mrt, mifi, NULL);
1361 rtnl_unlock();
1362 return ret;
1365 * Manipulate the forwarding caches. These live
1366 * in a sort of kernel/user symbiosis.
1368 case MRT6_ADD_MFC:
1369 case MRT6_DEL_MFC:
1370 if (optlen < sizeof(mfc))
1371 return -EINVAL;
1372 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1373 return -EFAULT;
1374 rtnl_lock();
1375 if (optname == MRT6_DEL_MFC)
1376 ret = ip6mr_mfc_delete(mrt, &mfc);
1377 else
1378 ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1379 rtnl_unlock();
1380 return ret;
1383 * Control PIM assert (to activate pim will activate assert)
1385 case MRT6_ASSERT:
1387 int v;
1388 if (get_user(v, (int __user *)optval))
1389 return -EFAULT;
1390 mrt->mroute_do_assert = !!v;
1391 return 0;
1394 #ifdef CONFIG_IPV6_PIMSM_V2
1395 case MRT6_PIM:
1397 int v;
1398 if (get_user(v, (int __user *)optval))
1399 return -EFAULT;
1400 v = !!v;
1401 rtnl_lock();
1402 ret = 0;
1403 if (v != mrt->mroute_do_pim) {
1404 mrt->mroute_do_pim = v;
1405 mrt->mroute_do_assert = v;
1407 rtnl_unlock();
1408 return ret;
1411 #endif
1413 * Spurious command, or MRT6_VERSION which you cannot
1414 * set.
1416 default:
1417 return -ENOPROTOOPT;
1422 * Getsock opt support for the multicast routing system.
1425 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1426 int __user *optlen)
1428 int olr;
1429 int val;
1430 struct net *net = sock_net(sk);
1431 struct mr6_table *mrt = net->ipv6.mrt6;
1433 switch (optname) {
1434 case MRT6_VERSION:
1435 val = 0x0305;
1436 break;
1437 #ifdef CONFIG_IPV6_PIMSM_V2
1438 case MRT6_PIM:
1439 val = mrt->mroute_do_pim;
1440 break;
1441 #endif
1442 case MRT6_ASSERT:
1443 val = mrt->mroute_do_assert;
1444 break;
1445 default:
1446 return -ENOPROTOOPT;
1449 if (get_user(olr, optlen))
1450 return -EFAULT;
1452 olr = min_t(int, olr, sizeof(int));
1453 if (olr < 0)
1454 return -EINVAL;
1456 if (put_user(olr, optlen))
1457 return -EFAULT;
1458 if (copy_to_user(optval, &val, olr))
1459 return -EFAULT;
1460 return 0;
1464 * The IP multicast ioctl support routines.
1467 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1469 struct sioc_sg_req6 sr;
1470 struct sioc_mif_req6 vr;
1471 struct mif_device *vif;
1472 struct mfc6_cache *c;
1473 struct net *net = sock_net(sk);
1474 struct mr6_table *mrt = net->ipv6.mrt6;
1476 switch (cmd) {
1477 case SIOCGETMIFCNT_IN6:
1478 if (copy_from_user(&vr, arg, sizeof(vr)))
1479 return -EFAULT;
1480 if (vr.mifi >= mrt->maxvif)
1481 return -EINVAL;
1482 read_lock(&mrt_lock);
1483 vif = &mrt->vif6_table[vr.mifi];
1484 if (MIF_EXISTS(mrt, vr.mifi)) {
1485 vr.icount = vif->pkt_in;
1486 vr.ocount = vif->pkt_out;
1487 vr.ibytes = vif->bytes_in;
1488 vr.obytes = vif->bytes_out;
1489 read_unlock(&mrt_lock);
1491 if (copy_to_user(arg, &vr, sizeof(vr)))
1492 return -EFAULT;
1493 return 0;
1495 read_unlock(&mrt_lock);
1496 return -EADDRNOTAVAIL;
1497 case SIOCGETSGCNT_IN6:
1498 if (copy_from_user(&sr, arg, sizeof(sr)))
1499 return -EFAULT;
1501 read_lock(&mrt_lock);
1502 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1503 if (c) {
1504 sr.pktcnt = c->mfc_un.res.pkt;
1505 sr.bytecnt = c->mfc_un.res.bytes;
1506 sr.wrong_if = c->mfc_un.res.wrong_if;
1507 read_unlock(&mrt_lock);
1509 if (copy_to_user(arg, &sr, sizeof(sr)))
1510 return -EFAULT;
1511 return 0;
1513 read_unlock(&mrt_lock);
1514 return -EADDRNOTAVAIL;
1515 default:
1516 return -ENOIOCTLCMD;
1521 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1523 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1524 IPSTATS_MIB_OUTFORWDATAGRAMS);
1525 return dst_output(skb);
1529 * Processing handlers for ip6mr_forward
1532 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1533 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1535 struct ipv6hdr *ipv6h;
1536 struct mif_device *vif = &mrt->vif6_table[vifi];
1537 struct net_device *dev;
1538 struct dst_entry *dst;
1539 struct flowi fl;
1541 if (vif->dev == NULL)
1542 goto out_free;
1544 #ifdef CONFIG_IPV6_PIMSM_V2
1545 if (vif->flags & MIFF_REGISTER) {
1546 vif->pkt_out++;
1547 vif->bytes_out += skb->len;
1548 vif->dev->stats.tx_bytes += skb->len;
1549 vif->dev->stats.tx_packets++;
1550 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1551 goto out_free;
1553 #endif
1555 ipv6h = ipv6_hdr(skb);
1557 fl = (struct flowi) {
1558 .oif = vif->link,
1559 .nl_u = { .ip6_u =
1560 { .daddr = ipv6h->daddr, }
1564 dst = ip6_route_output(net, NULL, &fl);
1565 if (!dst)
1566 goto out_free;
1568 skb_dst_drop(skb);
1569 skb_dst_set(skb, dst);
1572 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1573 * not only before forwarding, but after forwarding on all output
1574 * interfaces. It is clear, if mrouter runs a multicasting
1575 * program, it should receive packets not depending to what interface
1576 * program is joined.
1577 * If we will not make it, the program will have to join on all
1578 * interfaces. On the other hand, multihoming host (or router, but
1579 * not mrouter) cannot join to more than one interface - it will
1580 * result in receiving multiple packets.
1582 dev = vif->dev;
1583 skb->dev = dev;
1584 vif->pkt_out++;
1585 vif->bytes_out += skb->len;
1587 /* We are about to write */
1588 /* XXX: extension headers? */
1589 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1590 goto out_free;
1592 ipv6h = ipv6_hdr(skb);
1593 ipv6h->hop_limit--;
1595 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1597 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1598 ip6mr_forward2_finish);
1600 out_free:
1601 kfree_skb(skb);
1602 return 0;
1605 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1607 int ct;
1609 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1610 if (mrt->vif6_table[ct].dev == dev)
1611 break;
1613 return ct;
1616 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1617 struct sk_buff *skb, struct mfc6_cache *cache)
1619 int psend = -1;
1620 int vif, ct;
1622 vif = cache->mf6c_parent;
1623 cache->mfc_un.res.pkt++;
1624 cache->mfc_un.res.bytes += skb->len;
1627 * Wrong interface: drop packet and (maybe) send PIM assert.
1629 if (mrt->vif6_table[vif].dev != skb->dev) {
1630 int true_vifi;
1632 cache->mfc_un.res.wrong_if++;
1633 true_vifi = ip6mr_find_vif(mrt, skb->dev);
1635 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1636 /* pimsm uses asserts, when switching from RPT to SPT,
1637 so that we cannot check that packet arrived on an oif.
1638 It is bad, but otherwise we would need to move pretty
1639 large chunk of pimd to kernel. Ough... --ANK
1641 (mrt->mroute_do_pim ||
1642 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1643 time_after(jiffies,
1644 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1645 cache->mfc_un.res.last_assert = jiffies;
1646 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
1648 goto dont_forward;
1651 mrt->vif6_table[vif].pkt_in++;
1652 mrt->vif6_table[vif].bytes_in += skb->len;
1655 * Forward the frame
1657 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1658 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1659 if (psend != -1) {
1660 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1661 if (skb2)
1662 ip6mr_forward2(net, mrt, skb2, cache, psend);
1664 psend = ct;
1667 if (psend != -1) {
1668 ip6mr_forward2(net, mrt, skb, cache, psend);
1669 return 0;
1672 dont_forward:
1673 kfree_skb(skb);
1674 return 0;
1679 * Multicast packets for forwarding arrive here
1682 int ip6_mr_input(struct sk_buff *skb)
1684 struct mfc6_cache *cache;
1685 struct net *net = dev_net(skb->dev);
1686 struct mr6_table *mrt = net->ipv6.mrt6;
1688 read_lock(&mrt_lock);
1689 cache = ip6mr_cache_find(mrt,
1690 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1693 * No usable cache entry
1695 if (cache == NULL) {
1696 int vif;
1698 vif = ip6mr_find_vif(mrt, skb->dev);
1699 if (vif >= 0) {
1700 int err = ip6mr_cache_unresolved(mrt, vif, skb);
1701 read_unlock(&mrt_lock);
1703 return err;
1705 read_unlock(&mrt_lock);
1706 kfree_skb(skb);
1707 return -ENODEV;
1710 ip6_mr_forward(net, mrt, skb, cache);
1712 read_unlock(&mrt_lock);
1714 return 0;
1718 static int
1719 ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
1720 struct mfc6_cache *c, struct rtmsg *rtm)
1722 int ct;
1723 struct rtnexthop *nhp;
1724 u8 *b = skb_tail_pointer(skb);
1725 struct rtattr *mp_head;
1727 /* If cache is unresolved, don't try to parse IIF and OIF */
1728 if (c->mf6c_parent > MAXMIFS)
1729 return -ENOENT;
1731 if (MIF_EXISTS(mrt, c->mf6c_parent))
1732 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
1734 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1736 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1737 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1738 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1739 goto rtattr_failure;
1740 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1741 nhp->rtnh_flags = 0;
1742 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1743 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
1744 nhp->rtnh_len = sizeof(*nhp);
1747 mp_head->rta_type = RTA_MULTIPATH;
1748 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1749 rtm->rtm_type = RTN_MULTICAST;
1750 return 1;
1752 rtattr_failure:
1753 nlmsg_trim(skb, b);
1754 return -EMSGSIZE;
1757 int ip6mr_get_route(struct net *net,
1758 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1760 int err;
1761 struct mr6_table *mrt = net->ipv6.mrt6;
1762 struct mfc6_cache *cache;
1763 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1765 read_lock(&mrt_lock);
1766 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1768 if (!cache) {
1769 struct sk_buff *skb2;
1770 struct ipv6hdr *iph;
1771 struct net_device *dev;
1772 int vif;
1774 if (nowait) {
1775 read_unlock(&mrt_lock);
1776 return -EAGAIN;
1779 dev = skb->dev;
1780 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
1781 read_unlock(&mrt_lock);
1782 return -ENODEV;
1785 /* really correct? */
1786 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1787 if (!skb2) {
1788 read_unlock(&mrt_lock);
1789 return -ENOMEM;
1792 skb_reset_transport_header(skb2);
1794 skb_put(skb2, sizeof(struct ipv6hdr));
1795 skb_reset_network_header(skb2);
1797 iph = ipv6_hdr(skb2);
1798 iph->version = 0;
1799 iph->priority = 0;
1800 iph->flow_lbl[0] = 0;
1801 iph->flow_lbl[1] = 0;
1802 iph->flow_lbl[2] = 0;
1803 iph->payload_len = 0;
1804 iph->nexthdr = IPPROTO_NONE;
1805 iph->hop_limit = 0;
1806 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1807 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1809 err = ip6mr_cache_unresolved(mrt, vif, skb2);
1810 read_unlock(&mrt_lock);
1812 return err;
1815 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1816 cache->mfc_flags |= MFC_NOTIFY;
1818 err = ip6mr_fill_mroute(mrt, skb, cache, rtm);
1819 read_unlock(&mrt_lock);
1820 return err;