2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/inetdevice.h>
34 #include <linux/igmp.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/mroute.h>
38 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <linux/skbuff.h>
46 #include <net/route.h>
47 #include <linux/notifier.h>
48 #include <linux/if_arp.h>
49 #include <linux/netfilter_ipv4.h>
51 #include <net/checksum.h>
52 #include <net/netlink.h>
55 #include <net/ip6_route.h>
56 #include <linux/mroute6.h>
57 #include <linux/pim.h>
58 #include <net/addrconf.h>
59 #include <linux/netfilter_ipv6.h>
61 struct sock
*mroute6_socket
;
64 /* Big lock, protecting vif table, mrt cache and mroute socket state.
65 Note that the changes are semaphored via rtnl_lock.
68 static DEFINE_RWLOCK(mrt_lock
);
71 * Multicast router control variables
74 static struct mif_device vif6_table
[MAXMIFS
]; /* Devices */
77 #define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
79 static int mroute_do_assert
; /* Set in PIM assert */
80 #ifdef CONFIG_IPV6_PIMSM_V2
81 static int mroute_do_pim
;
83 #define mroute_do_pim 0
86 static struct mfc6_cache
*mfc6_cache_array
[MFC_LINES
]; /* Forwarding cache */
88 static struct mfc6_cache
*mfc_unres_queue
; /* Queue of unresolved entries */
89 static atomic_t cache_resolve_queue_len
; /* Size of unresolved */
91 /* Special spinlock for queue of unresolved entries */
92 static DEFINE_SPINLOCK(mfc_unres_lock
);
94 /* We return to original Alan's scheme. Hash table of resolved
95 entries is changed only in process context and protected
96 with weak lock mrt_lock. Queue of unresolved entries is protected
97 with strong spinlock mfc_unres_lock.
99 In this case data path is free of exclusive locks at all.
102 static struct kmem_cache
*mrt_cachep __read_mostly
;
104 static int ip6_mr_forward(struct sk_buff
*skb
, struct mfc6_cache
*cache
);
105 static int ip6mr_cache_report(struct sk_buff
*pkt
, vifi_t vifi
, int assert);
106 static int ip6mr_fill_mroute(struct sk_buff
*skb
, struct mfc6_cache
*c
, struct rtmsg
*rtm
);
108 #ifdef CONFIG_IPV6_PIMSM_V2
109 static struct inet6_protocol pim6_protocol
;
112 static struct timer_list ipmr_expire_timer
;
115 #ifdef CONFIG_PROC_FS
117 struct ipmr_mfc_iter
{
118 struct mfc6_cache
**cache
;
123 static struct mfc6_cache
*ipmr_mfc_seq_idx(struct ipmr_mfc_iter
*it
, loff_t pos
)
125 struct mfc6_cache
*mfc
;
127 it
->cache
= mfc6_cache_array
;
128 read_lock(&mrt_lock
);
129 for (it
->ct
= 0; it
->ct
< ARRAY_SIZE(mfc6_cache_array
); it
->ct
++)
130 for (mfc
= mfc6_cache_array
[it
->ct
]; mfc
; mfc
= mfc
->next
)
133 read_unlock(&mrt_lock
);
135 it
->cache
= &mfc_unres_queue
;
136 spin_lock_bh(&mfc_unres_lock
);
137 for (mfc
= mfc_unres_queue
; mfc
; mfc
= mfc
->next
)
140 spin_unlock_bh(&mfc_unres_lock
);
150 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
153 struct ipmr_vif_iter
{
157 static struct mif_device
*ip6mr_vif_seq_idx(struct ipmr_vif_iter
*iter
,
160 for (iter
->ct
= 0; iter
->ct
< maxvif
; ++iter
->ct
) {
161 if (!MIF_EXISTS(iter
->ct
))
164 return &vif6_table
[iter
->ct
];
169 static void *ip6mr_vif_seq_start(struct seq_file
*seq
, loff_t
*pos
)
172 read_lock(&mrt_lock
);
173 return (*pos
? ip6mr_vif_seq_idx(seq
->private, *pos
- 1)
177 static void *ip6mr_vif_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
179 struct ipmr_vif_iter
*iter
= seq
->private;
182 if (v
== SEQ_START_TOKEN
)
183 return ip6mr_vif_seq_idx(iter
, 0);
185 while (++iter
->ct
< maxvif
) {
186 if (!MIF_EXISTS(iter
->ct
))
188 return &vif6_table
[iter
->ct
];
193 static void ip6mr_vif_seq_stop(struct seq_file
*seq
, void *v
)
196 read_unlock(&mrt_lock
);
199 static int ip6mr_vif_seq_show(struct seq_file
*seq
, void *v
)
201 if (v
== SEQ_START_TOKEN
) {
203 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
205 const struct mif_device
*vif
= v
;
206 const char *name
= vif
->dev
? vif
->dev
->name
: "none";
209 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n",
211 name
, vif
->bytes_in
, vif
->pkt_in
,
212 vif
->bytes_out
, vif
->pkt_out
,
218 static struct seq_operations ip6mr_vif_seq_ops
= {
219 .start
= ip6mr_vif_seq_start
,
220 .next
= ip6mr_vif_seq_next
,
221 .stop
= ip6mr_vif_seq_stop
,
222 .show
= ip6mr_vif_seq_show
,
225 static int ip6mr_vif_open(struct inode
*inode
, struct file
*file
)
227 return seq_open_private(file
, &ip6mr_vif_seq_ops
,
228 sizeof(struct ipmr_vif_iter
));
231 static struct file_operations ip6mr_vif_fops
= {
232 .owner
= THIS_MODULE
,
233 .open
= ip6mr_vif_open
,
236 .release
= seq_release
,
239 static void *ipmr_mfc_seq_start(struct seq_file
*seq
, loff_t
*pos
)
241 return (*pos
? ipmr_mfc_seq_idx(seq
->private, *pos
- 1)
245 static void *ipmr_mfc_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
247 struct mfc6_cache
*mfc
= v
;
248 struct ipmr_mfc_iter
*it
= seq
->private;
252 if (v
== SEQ_START_TOKEN
)
253 return ipmr_mfc_seq_idx(seq
->private, 0);
258 if (it
->cache
== &mfc_unres_queue
)
261 BUG_ON(it
->cache
!= mfc6_cache_array
);
263 while (++it
->ct
< ARRAY_SIZE(mfc6_cache_array
)) {
264 mfc
= mfc6_cache_array
[it
->ct
];
269 /* exhausted cache_array, show unresolved */
270 read_unlock(&mrt_lock
);
271 it
->cache
= &mfc_unres_queue
;
274 spin_lock_bh(&mfc_unres_lock
);
275 mfc
= mfc_unres_queue
;
280 spin_unlock_bh(&mfc_unres_lock
);
286 static void ipmr_mfc_seq_stop(struct seq_file
*seq
, void *v
)
288 struct ipmr_mfc_iter
*it
= seq
->private;
290 if (it
->cache
== &mfc_unres_queue
)
291 spin_unlock_bh(&mfc_unres_lock
);
292 else if (it
->cache
== mfc6_cache_array
)
293 read_unlock(&mrt_lock
);
296 static int ipmr_mfc_seq_show(struct seq_file
*seq
, void *v
)
300 if (v
== SEQ_START_TOKEN
) {
304 "Iif Pkts Bytes Wrong Oifs\n");
306 const struct mfc6_cache
*mfc
= v
;
307 const struct ipmr_mfc_iter
*it
= seq
->private;
310 NIP6_FMT
" " NIP6_FMT
" %-3d %8ld %8ld %8ld",
311 NIP6(mfc
->mf6c_mcastgrp
), NIP6(mfc
->mf6c_origin
),
314 mfc
->mfc_un
.res
.bytes
,
315 mfc
->mfc_un
.res
.wrong_if
);
317 if (it
->cache
!= &mfc_unres_queue
) {
318 for (n
= mfc
->mfc_un
.res
.minvif
;
319 n
< mfc
->mfc_un
.res
.maxvif
; n
++) {
321 mfc
->mfc_un
.res
.ttls
[n
] < 255)
324 n
, mfc
->mfc_un
.res
.ttls
[n
]);
332 static struct seq_operations ipmr_mfc_seq_ops
= {
333 .start
= ipmr_mfc_seq_start
,
334 .next
= ipmr_mfc_seq_next
,
335 .stop
= ipmr_mfc_seq_stop
,
336 .show
= ipmr_mfc_seq_show
,
339 static int ipmr_mfc_open(struct inode
*inode
, struct file
*file
)
341 return seq_open_private(file
, &ipmr_mfc_seq_ops
,
342 sizeof(struct ipmr_mfc_iter
));
345 static struct file_operations ip6mr_mfc_fops
= {
346 .owner
= THIS_MODULE
,
347 .open
= ipmr_mfc_open
,
350 .release
= seq_release
,
354 #ifdef CONFIG_IPV6_PIMSM_V2
355 static int reg_vif_num
= -1;
357 static int pim6_rcv(struct sk_buff
*skb
)
359 struct pimreghdr
*pim
;
360 struct ipv6hdr
*encap
;
361 struct net_device
*reg_dev
= NULL
;
363 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(*encap
)))
366 pim
= (struct pimreghdr
*)skb_transport_header(skb
);
367 if (pim
->type
!= ((PIM_VERSION
<< 4) | PIM_REGISTER
) ||
368 (pim
->flags
& PIM_NULL_REGISTER
) ||
369 (ip_compute_csum((void *)pim
, sizeof(*pim
)) != 0 &&
370 (u16
)csum_fold(skb_checksum(skb
, 0, skb
->len
, 0))))
373 /* check if the inner packet is destined to mcast group */
374 encap
= (struct ipv6hdr
*)(skb_transport_header(skb
) +
377 if (!ipv6_addr_is_multicast(&encap
->daddr
) ||
378 encap
->payload_len
== 0 ||
379 ntohs(encap
->payload_len
) + sizeof(*pim
) > skb
->len
)
382 read_lock(&mrt_lock
);
383 if (reg_vif_num
>= 0)
384 reg_dev
= vif6_table
[reg_vif_num
].dev
;
387 read_unlock(&mrt_lock
);
392 skb
->mac_header
= skb
->network_header
;
393 skb_pull(skb
, (u8
*)encap
- skb
->data
);
394 skb_reset_network_header(skb
);
396 skb
->protocol
= htons(ETH_P_IP
);
398 skb
->pkt_type
= PACKET_HOST
;
399 dst_release(skb
->dst
);
400 ((struct net_device_stats
*)netdev_priv(reg_dev
))->rx_bytes
+= skb
->len
;
401 ((struct net_device_stats
*)netdev_priv(reg_dev
))->rx_packets
++;
412 static struct inet6_protocol pim6_protocol
= {
416 /* Service routines creating virtual interfaces: PIMREG */
418 static int reg_vif_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
420 read_lock(&mrt_lock
);
421 ((struct net_device_stats
*)netdev_priv(dev
))->tx_bytes
+= skb
->len
;
422 ((struct net_device_stats
*)netdev_priv(dev
))->tx_packets
++;
423 ip6mr_cache_report(skb
, reg_vif_num
, MRT6MSG_WHOLEPKT
);
424 read_unlock(&mrt_lock
);
429 static struct net_device_stats
*reg_vif_get_stats(struct net_device
*dev
)
431 return (struct net_device_stats
*)netdev_priv(dev
);
434 static void reg_vif_setup(struct net_device
*dev
)
436 dev
->type
= ARPHRD_PIMREG
;
437 dev
->mtu
= 1500 - sizeof(struct ipv6hdr
) - 8;
438 dev
->flags
= IFF_NOARP
;
439 dev
->hard_start_xmit
= reg_vif_xmit
;
440 dev
->get_stats
= reg_vif_get_stats
;
441 dev
->destructor
= free_netdev
;
444 static struct net_device
*ip6mr_reg_vif(void)
446 struct net_device
*dev
;
447 struct inet6_dev
*in_dev
;
449 dev
= alloc_netdev(sizeof(struct net_device_stats
), "pim6reg",
455 if (register_netdevice(dev
)) {
461 in_dev
= ipv6_find_idev(dev
);
471 /* allow the register to be completed before unregistering. */
475 unregister_netdevice(dev
);
484 static int mif6_delete(int vifi
)
486 struct mif_device
*v
;
487 struct net_device
*dev
;
488 if (vifi
< 0 || vifi
>= maxvif
)
489 return -EADDRNOTAVAIL
;
491 v
= &vif6_table
[vifi
];
493 write_lock_bh(&mrt_lock
);
498 write_unlock_bh(&mrt_lock
);
499 return -EADDRNOTAVAIL
;
502 #ifdef CONFIG_IPV6_PIMSM_V2
503 if (vifi
== reg_vif_num
)
507 if (vifi
+ 1 == maxvif
) {
509 for (tmp
= vifi
- 1; tmp
>= 0; tmp
--) {
516 write_unlock_bh(&mrt_lock
);
518 dev_set_allmulti(dev
, -1);
520 if (v
->flags
& MIFF_REGISTER
)
521 unregister_netdevice(dev
);
527 /* Destroy an unresolved cache entry, killing queued skbs
528 and reporting error to netlink readers.
531 static void ip6mr_destroy_unres(struct mfc6_cache
*c
)
535 atomic_dec(&cache_resolve_queue_len
);
537 while((skb
= skb_dequeue(&c
->mfc_un
.unres
.unresolved
)) != NULL
) {
538 if (ipv6_hdr(skb
)->version
== 0) {
539 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct ipv6hdr
));
540 nlh
->nlmsg_type
= NLMSG_ERROR
;
541 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
542 skb_trim(skb
, nlh
->nlmsg_len
);
543 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -ETIMEDOUT
;
544 rtnl_unicast(skb
, &init_net
, NETLINK_CB(skb
).pid
);
549 kmem_cache_free(mrt_cachep
, c
);
553 /* Single timer process for all the unresolved queue. */
555 static void ipmr_do_expire_process(unsigned long dummy
)
557 unsigned long now
= jiffies
;
558 unsigned long expires
= 10 * HZ
;
559 struct mfc6_cache
*c
, **cp
;
561 cp
= &mfc_unres_queue
;
563 while ((c
= *cp
) != NULL
) {
564 if (time_after(c
->mfc_un
.unres
.expires
, now
)) {
566 unsigned long interval
= c
->mfc_un
.unres
.expires
- now
;
567 if (interval
< expires
)
574 ip6mr_destroy_unres(c
);
577 if (atomic_read(&cache_resolve_queue_len
))
578 mod_timer(&ipmr_expire_timer
, jiffies
+ expires
);
581 static void ipmr_expire_process(unsigned long dummy
)
583 if (!spin_trylock(&mfc_unres_lock
)) {
584 mod_timer(&ipmr_expire_timer
, jiffies
+ 1);
588 if (atomic_read(&cache_resolve_queue_len
))
589 ipmr_do_expire_process(dummy
);
591 spin_unlock(&mfc_unres_lock
);
594 /* Fill oifs list. It is called under write locked mrt_lock. */
596 static void ip6mr_update_thresholds(struct mfc6_cache
*cache
, unsigned char *ttls
)
600 cache
->mfc_un
.res
.minvif
= MAXVIFS
;
601 cache
->mfc_un
.res
.maxvif
= 0;
602 memset(cache
->mfc_un
.res
.ttls
, 255, MAXVIFS
);
604 for (vifi
= 0; vifi
< maxvif
; vifi
++) {
605 if (MIF_EXISTS(vifi
) && ttls
[vifi
] && ttls
[vifi
] < 255) {
606 cache
->mfc_un
.res
.ttls
[vifi
] = ttls
[vifi
];
607 if (cache
->mfc_un
.res
.minvif
> vifi
)
608 cache
->mfc_un
.res
.minvif
= vifi
;
609 if (cache
->mfc_un
.res
.maxvif
<= vifi
)
610 cache
->mfc_un
.res
.maxvif
= vifi
+ 1;
615 static int mif6_add(struct mif6ctl
*vifc
, int mrtsock
)
617 int vifi
= vifc
->mif6c_mifi
;
618 struct mif_device
*v
= &vif6_table
[vifi
];
619 struct net_device
*dev
;
622 if (MIF_EXISTS(vifi
))
625 switch (vifc
->mif6c_flags
) {
626 #ifdef CONFIG_IPV6_PIMSM_V2
629 * Special Purpose VIF in PIM
630 * All the packets will be sent to the daemon
632 if (reg_vif_num
>= 0)
634 dev
= ip6mr_reg_vif();
640 dev
= dev_get_by_index(&init_net
, vifc
->mif6c_pifi
);
642 return -EADDRNOTAVAIL
;
649 dev_set_allmulti(dev
, 1);
652 * Fill in the VIF structures
654 v
->rate_limit
= vifc
->vifc_rate_limit
;
655 v
->flags
= vifc
->mif6c_flags
;
657 v
->flags
|= VIFF_STATIC
;
658 v
->threshold
= vifc
->vifc_threshold
;
663 v
->link
= dev
->ifindex
;
664 if (v
->flags
& MIFF_REGISTER
)
665 v
->link
= dev
->iflink
;
667 /* And finish update writing critical data */
668 write_lock_bh(&mrt_lock
);
671 #ifdef CONFIG_IPV6_PIMSM_V2
672 if (v
->flags
& MIFF_REGISTER
)
675 if (vifi
+ 1 > maxvif
)
677 write_unlock_bh(&mrt_lock
);
681 static struct mfc6_cache
*ip6mr_cache_find(struct in6_addr
*origin
, struct in6_addr
*mcastgrp
)
683 int line
= MFC6_HASH(mcastgrp
, origin
);
684 struct mfc6_cache
*c
;
686 for (c
= mfc6_cache_array
[line
]; c
; c
= c
->next
) {
687 if (ipv6_addr_equal(&c
->mf6c_origin
, origin
) &&
688 ipv6_addr_equal(&c
->mf6c_mcastgrp
, mcastgrp
))
695 * Allocate a multicast cache entry
697 static struct mfc6_cache
*ip6mr_cache_alloc(void)
699 struct mfc6_cache
*c
= kmem_cache_alloc(mrt_cachep
, GFP_KERNEL
);
702 memset(c
, 0, sizeof(*c
));
703 c
->mfc_un
.res
.minvif
= MAXVIFS
;
707 static struct mfc6_cache
*ip6mr_cache_alloc_unres(void)
709 struct mfc6_cache
*c
= kmem_cache_alloc(mrt_cachep
, GFP_ATOMIC
);
712 memset(c
, 0, sizeof(*c
));
713 skb_queue_head_init(&c
->mfc_un
.unres
.unresolved
);
714 c
->mfc_un
.unres
.expires
= jiffies
+ 10 * HZ
;
719 * A cache entry has gone into a resolved state from queued
722 static void ip6mr_cache_resolve(struct mfc6_cache
*uc
, struct mfc6_cache
*c
)
727 * Play the pending entries through our router
730 while((skb
= __skb_dequeue(&uc
->mfc_un
.unres
.unresolved
))) {
731 if (ipv6_hdr(skb
)->version
== 0) {
733 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct ipv6hdr
));
735 if (ip6mr_fill_mroute(skb
, c
, NLMSG_DATA(nlh
)) > 0) {
736 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - (u8
*)nlh
;
738 nlh
->nlmsg_type
= NLMSG_ERROR
;
739 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
740 skb_trim(skb
, nlh
->nlmsg_len
);
741 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -EMSGSIZE
;
743 err
= rtnl_unicast(skb
, &init_net
, NETLINK_CB(skb
).pid
);
745 ip6_mr_forward(skb
, c
);
750 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
751 * expects the following bizarre scheme.
753 * Called under mrt_lock.
756 static int ip6mr_cache_report(struct sk_buff
*pkt
, vifi_t vifi
, int assert)
762 #ifdef CONFIG_IPV6_PIMSM_V2
763 if (assert == MRT6MSG_WHOLEPKT
)
764 skb
= skb_realloc_headroom(pkt
, -skb_network_offset(pkt
)
768 skb
= alloc_skb(sizeof(struct ipv6hdr
) + sizeof(*msg
), GFP_ATOMIC
);
773 /* I suppose that internal messages
774 * do not require checksums */
776 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
778 #ifdef CONFIG_IPV6_PIMSM_V2
779 if (assert == MRT6MSG_WHOLEPKT
) {
780 /* Ugly, but we have no choice with this interface.
781 Duplicate old header, fix length etc.
782 And all this only to mangle msg->im6_msgtype and
783 to set msg->im6_mbz to "mbz" :-)
785 skb_push(skb
, -skb_network_offset(pkt
));
787 skb_push(skb
, sizeof(*msg
));
788 skb_reset_transport_header(skb
);
789 msg
= (struct mrt6msg
*)skb_transport_header(skb
);
791 msg
->im6_msgtype
= MRT6MSG_WHOLEPKT
;
792 msg
->im6_mif
= reg_vif_num
;
794 ipv6_addr_copy(&msg
->im6_src
, &ipv6_hdr(pkt
)->saddr
);
795 ipv6_addr_copy(&msg
->im6_dst
, &ipv6_hdr(pkt
)->daddr
);
797 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
805 skb_put(skb
, sizeof(struct ipv6hdr
));
806 skb_reset_network_header(skb
);
807 skb_copy_to_linear_data(skb
, ipv6_hdr(pkt
), sizeof(struct ipv6hdr
));
812 skb_put(skb
, sizeof(*msg
));
813 skb_reset_transport_header(skb
);
814 msg
= (struct mrt6msg
*)skb_transport_header(skb
);
817 msg
->im6_msgtype
= assert;
820 ipv6_addr_copy(&msg
->im6_src
, &ipv6_hdr(pkt
)->saddr
);
821 ipv6_addr_copy(&msg
->im6_dst
, &ipv6_hdr(pkt
)->daddr
);
823 skb
->dst
= dst_clone(pkt
->dst
);
824 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
826 skb_pull(skb
, sizeof(struct ipv6hdr
));
829 if (mroute6_socket
== NULL
) {
835 * Deliver to user space multicast routing algorithms
837 if ((ret
= sock_queue_rcv_skb(mroute6_socket
, skb
)) < 0) {
839 printk(KERN_WARNING
"mroute6: pending queue full, dropping entries.\n");
847 * Queue a packet for resolution. It gets locked cache entry!
851 ip6mr_cache_unresolved(vifi_t vifi
, struct sk_buff
*skb
)
854 struct mfc6_cache
*c
;
856 spin_lock_bh(&mfc_unres_lock
);
857 for (c
= mfc_unres_queue
; c
; c
= c
->next
) {
858 if (ipv6_addr_equal(&c
->mf6c_mcastgrp
, &ipv6_hdr(skb
)->daddr
) &&
859 ipv6_addr_equal(&c
->mf6c_origin
, &ipv6_hdr(skb
)->saddr
))
865 * Create a new entry if allowable
868 if (atomic_read(&cache_resolve_queue_len
) >= 10 ||
869 (c
= ip6mr_cache_alloc_unres()) == NULL
) {
870 spin_unlock_bh(&mfc_unres_lock
);
877 * Fill in the new cache entry
880 c
->mf6c_origin
= ipv6_hdr(skb
)->saddr
;
881 c
->mf6c_mcastgrp
= ipv6_hdr(skb
)->daddr
;
884 * Reflect first query at pim6sd
886 if ((err
= ip6mr_cache_report(skb
, vifi
, MRT6MSG_NOCACHE
)) < 0) {
887 /* If the report failed throw the cache entry
890 spin_unlock_bh(&mfc_unres_lock
);
892 kmem_cache_free(mrt_cachep
, c
);
897 atomic_inc(&cache_resolve_queue_len
);
898 c
->next
= mfc_unres_queue
;
901 ipmr_do_expire_process(1);
905 * See if we can append the packet
907 if (c
->mfc_un
.unres
.unresolved
.qlen
> 3) {
911 skb_queue_tail(&c
->mfc_un
.unres
.unresolved
, skb
);
915 spin_unlock_bh(&mfc_unres_lock
);
920 * MFC6 cache manipulation by user space
923 static int ip6mr_mfc_delete(struct mf6cctl
*mfc
)
926 struct mfc6_cache
*c
, **cp
;
928 line
= MFC6_HASH(&mfc
->mf6cc_mcastgrp
.sin6_addr
, &mfc
->mf6cc_origin
.sin6_addr
);
930 for (cp
= &mfc6_cache_array
[line
]; (c
= *cp
) != NULL
; cp
= &c
->next
) {
931 if (ipv6_addr_equal(&c
->mf6c_origin
, &mfc
->mf6cc_origin
.sin6_addr
) &&
932 ipv6_addr_equal(&c
->mf6c_mcastgrp
, &mfc
->mf6cc_mcastgrp
.sin6_addr
)) {
933 write_lock_bh(&mrt_lock
);
935 write_unlock_bh(&mrt_lock
);
937 kmem_cache_free(mrt_cachep
, c
);
944 static int ip6mr_device_event(struct notifier_block
*this,
945 unsigned long event
, void *ptr
)
947 struct net_device
*dev
= ptr
;
948 struct mif_device
*v
;
951 if (dev_net(dev
) != &init_net
)
954 if (event
!= NETDEV_UNREGISTER
)
958 for (ct
= 0; ct
< maxvif
; ct
++, v
++) {
965 static struct notifier_block ip6_mr_notifier
= {
966 .notifier_call
= ip6mr_device_event
970 * Setup for IP multicast routing
973 void __init
ip6_mr_init(void)
975 mrt_cachep
= kmem_cache_create("ip6_mrt_cache",
976 sizeof(struct mfc6_cache
),
977 0, SLAB_HWCACHE_ALIGN
,
980 panic("cannot allocate ip6_mrt_cache");
982 setup_timer(&ipmr_expire_timer
, ipmr_expire_process
, 0);
983 register_netdevice_notifier(&ip6_mr_notifier
);
984 #ifdef CONFIG_PROC_FS
985 proc_net_fops_create(&init_net
, "ip6_mr_vif", 0, &ip6mr_vif_fops
);
986 proc_net_fops_create(&init_net
, "ip6_mr_cache", 0, &ip6mr_mfc_fops
);
991 static int ip6mr_mfc_add(struct mf6cctl
*mfc
, int mrtsock
)
994 struct mfc6_cache
*uc
, *c
, **cp
;
995 unsigned char ttls
[MAXVIFS
];
998 memset(ttls
, 255, MAXVIFS
);
999 for (i
= 0; i
< MAXVIFS
; i
++) {
1000 if (IF_ISSET(i
, &mfc
->mf6cc_ifset
))
1005 line
= MFC6_HASH(&mfc
->mf6cc_mcastgrp
.sin6_addr
, &mfc
->mf6cc_origin
.sin6_addr
);
1007 for (cp
= &mfc6_cache_array
[line
]; (c
= *cp
) != NULL
; cp
= &c
->next
) {
1008 if (ipv6_addr_equal(&c
->mf6c_origin
, &mfc
->mf6cc_origin
.sin6_addr
) &&
1009 ipv6_addr_equal(&c
->mf6c_mcastgrp
, &mfc
->mf6cc_mcastgrp
.sin6_addr
))
1014 write_lock_bh(&mrt_lock
);
1015 c
->mf6c_parent
= mfc
->mf6cc_parent
;
1016 ip6mr_update_thresholds(c
, ttls
);
1018 c
->mfc_flags
|= MFC_STATIC
;
1019 write_unlock_bh(&mrt_lock
);
1023 if (!ipv6_addr_is_multicast(&mfc
->mf6cc_mcastgrp
.sin6_addr
))
1026 c
= ip6mr_cache_alloc();
1030 c
->mf6c_origin
= mfc
->mf6cc_origin
.sin6_addr
;
1031 c
->mf6c_mcastgrp
= mfc
->mf6cc_mcastgrp
.sin6_addr
;
1032 c
->mf6c_parent
= mfc
->mf6cc_parent
;
1033 ip6mr_update_thresholds(c
, ttls
);
1035 c
->mfc_flags
|= MFC_STATIC
;
1037 write_lock_bh(&mrt_lock
);
1038 c
->next
= mfc6_cache_array
[line
];
1039 mfc6_cache_array
[line
] = c
;
1040 write_unlock_bh(&mrt_lock
);
1043 * Check to see if we resolved a queued list. If so we
1044 * need to send on the frames and tidy up.
1046 spin_lock_bh(&mfc_unres_lock
);
1047 for (cp
= &mfc_unres_queue
; (uc
= *cp
) != NULL
;
1049 if (ipv6_addr_equal(&uc
->mf6c_origin
, &c
->mf6c_origin
) &&
1050 ipv6_addr_equal(&uc
->mf6c_mcastgrp
, &c
->mf6c_mcastgrp
)) {
1052 if (atomic_dec_and_test(&cache_resolve_queue_len
))
1053 del_timer(&ipmr_expire_timer
);
1057 spin_unlock_bh(&mfc_unres_lock
);
1060 ip6mr_cache_resolve(uc
, c
);
1061 kmem_cache_free(mrt_cachep
, uc
);
1067 * Close the multicast socket, and clear the vif tables etc
1070 static void mroute_clean_tables(struct sock
*sk
)
1075 * Shut down all active vif entries
1077 for (i
= 0; i
< maxvif
; i
++) {
1078 if (!(vif6_table
[i
].flags
& VIFF_STATIC
))
1085 for (i
= 0; i
< ARRAY_SIZE(mfc6_cache_array
); i
++) {
1086 struct mfc6_cache
*c
, **cp
;
1088 cp
= &mfc6_cache_array
[i
];
1089 while ((c
= *cp
) != NULL
) {
1090 if (c
->mfc_flags
& MFC_STATIC
) {
1094 write_lock_bh(&mrt_lock
);
1096 write_unlock_bh(&mrt_lock
);
1098 kmem_cache_free(mrt_cachep
, c
);
1102 if (atomic_read(&cache_resolve_queue_len
) != 0) {
1103 struct mfc6_cache
*c
;
1105 spin_lock_bh(&mfc_unres_lock
);
1106 while (mfc_unres_queue
!= NULL
) {
1107 c
= mfc_unres_queue
;
1108 mfc_unres_queue
= c
->next
;
1109 spin_unlock_bh(&mfc_unres_lock
);
1111 ip6mr_destroy_unres(c
);
1113 spin_lock_bh(&mfc_unres_lock
);
1115 spin_unlock_bh(&mfc_unres_lock
);
1119 static int ip6mr_sk_init(struct sock
*sk
)
1124 write_lock_bh(&mrt_lock
);
1125 if (likely(mroute6_socket
== NULL
))
1126 mroute6_socket
= sk
;
1129 write_unlock_bh(&mrt_lock
);
1136 int ip6mr_sk_done(struct sock
*sk
)
1141 if (sk
== mroute6_socket
) {
1142 write_lock_bh(&mrt_lock
);
1143 mroute6_socket
= NULL
;
1144 write_unlock_bh(&mrt_lock
);
1146 mroute_clean_tables(sk
);
1155 * Socket options and virtual interface manipulation. The whole
1156 * virtual interface system is a complete heap, but unfortunately
1157 * that's how BSD mrouted happens to think. Maybe one day with a proper
1158 * MOSPF/PIM router set up we can clean this up.
1161 int ip6_mroute_setsockopt(struct sock
*sk
, int optname
, char __user
*optval
, int optlen
)
1168 if (optname
!= MRT6_INIT
) {
1169 if (sk
!= mroute6_socket
&& !capable(CAP_NET_ADMIN
))
1175 if (sk
->sk_type
!= SOCK_RAW
||
1176 inet_sk(sk
)->num
!= IPPROTO_ICMPV6
)
1178 if (optlen
< sizeof(int))
1181 return ip6mr_sk_init(sk
);
1184 return ip6mr_sk_done(sk
);
1187 if (optlen
< sizeof(vif
))
1189 if (copy_from_user(&vif
, optval
, sizeof(vif
)))
1191 if (vif
.mif6c_mifi
>= MAXVIFS
)
1194 ret
= mif6_add(&vif
, sk
== mroute6_socket
);
1199 if (optlen
< sizeof(mifi_t
))
1201 if (copy_from_user(&mifi
, optval
, sizeof(mifi_t
)))
1204 ret
= mif6_delete(mifi
);
1209 * Manipulate the forwarding caches. These live
1210 * in a sort of kernel/user symbiosis.
1214 if (optlen
< sizeof(mfc
))
1216 if (copy_from_user(&mfc
, optval
, sizeof(mfc
)))
1219 if (optname
== MRT6_DEL_MFC
)
1220 ret
= ip6mr_mfc_delete(&mfc
);
1222 ret
= ip6mr_mfc_add(&mfc
, sk
== mroute6_socket
);
1227 * Control PIM assert (to activate pim will activate assert)
1232 if (get_user(v
, (int __user
*)optval
))
1234 mroute_do_assert
= !!v
;
1238 #ifdef CONFIG_IPV6_PIMSM_V2
1242 if (get_user(v
, (int __user
*)optval
))
1247 if (v
!= mroute_do_pim
) {
1249 mroute_do_assert
= v
;
1251 ret
= inet6_add_protocol(&pim6_protocol
,
1254 ret
= inet6_del_protocol(&pim6_protocol
,
1265 * Spurious command, or MRT_VERSION which you cannot
1269 return -ENOPROTOOPT
;
1274 * Getsock opt support for the multicast routing system.
1277 int ip6_mroute_getsockopt(struct sock
*sk
, int optname
, char __user
*optval
,
1287 #ifdef CONFIG_IPV6_PIMSM_V2
1289 val
= mroute_do_pim
;
1293 val
= mroute_do_assert
;
1296 return -ENOPROTOOPT
;
1299 if (get_user(olr
, optlen
))
1302 olr
= min_t(int, olr
, sizeof(int));
1306 if (put_user(olr
, optlen
))
1308 if (copy_to_user(optval
, &val
, olr
))
1314 * The IP multicast ioctl support routines.
1317 int ip6mr_ioctl(struct sock
*sk
, int cmd
, void __user
*arg
)
1319 struct sioc_sg_req6 sr
;
1320 struct sioc_mif_req6 vr
;
1321 struct mif_device
*vif
;
1322 struct mfc6_cache
*c
;
1325 case SIOCGETMIFCNT_IN6
:
1326 if (copy_from_user(&vr
, arg
, sizeof(vr
)))
1328 if (vr
.mifi
>= maxvif
)
1330 read_lock(&mrt_lock
);
1331 vif
= &vif6_table
[vr
.mifi
];
1332 if (MIF_EXISTS(vr
.mifi
)) {
1333 vr
.icount
= vif
->pkt_in
;
1334 vr
.ocount
= vif
->pkt_out
;
1335 vr
.ibytes
= vif
->bytes_in
;
1336 vr
.obytes
= vif
->bytes_out
;
1337 read_unlock(&mrt_lock
);
1339 if (copy_to_user(arg
, &vr
, sizeof(vr
)))
1343 read_unlock(&mrt_lock
);
1344 return -EADDRNOTAVAIL
;
1345 case SIOCGETSGCNT_IN6
:
1346 if (copy_from_user(&sr
, arg
, sizeof(sr
)))
1349 read_lock(&mrt_lock
);
1350 c
= ip6mr_cache_find(&sr
.src
.sin6_addr
, &sr
.grp
.sin6_addr
);
1352 sr
.pktcnt
= c
->mfc_un
.res
.pkt
;
1353 sr
.bytecnt
= c
->mfc_un
.res
.bytes
;
1354 sr
.wrong_if
= c
->mfc_un
.res
.wrong_if
;
1355 read_unlock(&mrt_lock
);
1357 if (copy_to_user(arg
, &sr
, sizeof(sr
)))
1361 read_unlock(&mrt_lock
);
1362 return -EADDRNOTAVAIL
;
1364 return -ENOIOCTLCMD
;
1369 static inline int ip6mr_forward2_finish(struct sk_buff
*skb
)
1372 return dst_output(skb
);
1376 * Processing handlers for ip6mr_forward
1379 static int ip6mr_forward2(struct sk_buff
*skb
, struct mfc6_cache
*c
, int vifi
)
1381 struct ipv6hdr
*ipv6h
;
1382 struct mif_device
*vif
= &vif6_table
[vifi
];
1383 struct net_device
*dev
;
1384 struct dst_entry
*dst
;
1387 if (vif
->dev
== NULL
)
1390 #ifdef CONFIG_IPV6_PIMSM_V2
1391 if (vif
->flags
& MIFF_REGISTER
) {
1393 vif
->bytes_out
+= skb
->len
;
1394 ((struct net_device_stats
*)netdev_priv(vif
->dev
))->tx_bytes
+= skb
->len
;
1395 ((struct net_device_stats
*)netdev_priv(vif
->dev
))->tx_packets
++;
1396 ip6mr_cache_report(skb
, vifi
, MRT6MSG_WHOLEPKT
);
1402 ipv6h
= ipv6_hdr(skb
);
1404 fl
= (struct flowi
) {
1407 { .daddr
= ipv6h
->daddr
, }
1411 dst
= ip6_route_output(&init_net
, NULL
, &fl
);
1415 dst_release(skb
->dst
);
1419 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1420 * not only before forwarding, but after forwarding on all output
1421 * interfaces. It is clear, if mrouter runs a multicasting
1422 * program, it should receive packets not depending to what interface
1423 * program is joined.
1424 * If we will not make it, the program will have to join on all
1425 * interfaces. On the other hand, multihoming host (or router, but
1426 * not mrouter) cannot join to more than one interface - it will
1427 * result in receiving multiple packets.
1432 vif
->bytes_out
+= skb
->len
;
1434 /* We are about to write */
1435 /* XXX: extension headers? */
1436 if (skb_cow(skb
, sizeof(*ipv6h
) + LL_RESERVED_SPACE(dev
)))
1439 ipv6h
= ipv6_hdr(skb
);
1442 IP6CB(skb
)->flags
|= IP6SKB_FORWARDED
;
1444 return NF_HOOK(PF_INET6
, NF_INET_FORWARD
, skb
, skb
->dev
, dev
,
1445 ip6mr_forward2_finish
);
1452 static int ip6mr_find_vif(struct net_device
*dev
)
1455 for (ct
= maxvif
- 1; ct
>= 0; ct
--) {
1456 if (vif6_table
[ct
].dev
== dev
)
1462 static int ip6_mr_forward(struct sk_buff
*skb
, struct mfc6_cache
*cache
)
1467 vif
= cache
->mf6c_parent
;
1468 cache
->mfc_un
.res
.pkt
++;
1469 cache
->mfc_un
.res
.bytes
+= skb
->len
;
1472 * Wrong interface: drop packet and (maybe) send PIM assert.
1474 if (vif6_table
[vif
].dev
!= skb
->dev
) {
1477 cache
->mfc_un
.res
.wrong_if
++;
1478 true_vifi
= ip6mr_find_vif(skb
->dev
);
1480 if (true_vifi
>= 0 && mroute_do_assert
&&
1481 /* pimsm uses asserts, when switching from RPT to SPT,
1482 so that we cannot check that packet arrived on an oif.
1483 It is bad, but otherwise we would need to move pretty
1484 large chunk of pimd to kernel. Ough... --ANK
1486 (mroute_do_pim
|| cache
->mfc_un
.res
.ttls
[true_vifi
] < 255) &&
1488 cache
->mfc_un
.res
.last_assert
+ MFC_ASSERT_THRESH
)) {
1489 cache
->mfc_un
.res
.last_assert
= jiffies
;
1490 ip6mr_cache_report(skb
, true_vifi
, MRT6MSG_WRONGMIF
);
1495 vif6_table
[vif
].pkt_in
++;
1496 vif6_table
[vif
].bytes_in
+= skb
->len
;
1501 for (ct
= cache
->mfc_un
.res
.maxvif
- 1; ct
>= cache
->mfc_un
.res
.minvif
; ct
--) {
1502 if (ipv6_hdr(skb
)->hop_limit
> cache
->mfc_un
.res
.ttls
[ct
]) {
1504 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1506 ip6mr_forward2(skb2
, cache
, psend
);
1512 ip6mr_forward2(skb
, cache
, psend
);
1523 * Multicast packets for forwarding arrive here
1526 int ip6_mr_input(struct sk_buff
*skb
)
1528 struct mfc6_cache
*cache
;
1530 read_lock(&mrt_lock
);
1531 cache
= ip6mr_cache_find(&ipv6_hdr(skb
)->saddr
, &ipv6_hdr(skb
)->daddr
);
1534 * No usable cache entry
1536 if (cache
== NULL
) {
1539 vif
= ip6mr_find_vif(skb
->dev
);
1541 int err
= ip6mr_cache_unresolved(vif
, skb
);
1542 read_unlock(&mrt_lock
);
1546 read_unlock(&mrt_lock
);
1551 ip6_mr_forward(skb
, cache
);
1553 read_unlock(&mrt_lock
);
1560 ip6mr_fill_mroute(struct sk_buff
*skb
, struct mfc6_cache
*c
, struct rtmsg
*rtm
)
1563 struct rtnexthop
*nhp
;
1564 struct net_device
*dev
= vif6_table
[c
->mf6c_parent
].dev
;
1565 u8
*b
= skb_tail_pointer(skb
);
1566 struct rtattr
*mp_head
;
1569 RTA_PUT(skb
, RTA_IIF
, 4, &dev
->ifindex
);
1571 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_LENGTH(0));
1573 for (ct
= c
->mfc_un
.res
.minvif
; ct
< c
->mfc_un
.res
.maxvif
; ct
++) {
1574 if (c
->mfc_un
.res
.ttls
[ct
] < 255) {
1575 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
1576 goto rtattr_failure
;
1577 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
1578 nhp
->rtnh_flags
= 0;
1579 nhp
->rtnh_hops
= c
->mfc_un
.res
.ttls
[ct
];
1580 nhp
->rtnh_ifindex
= vif6_table
[ct
].dev
->ifindex
;
1581 nhp
->rtnh_len
= sizeof(*nhp
);
1584 mp_head
->rta_type
= RTA_MULTIPATH
;
1585 mp_head
->rta_len
= skb_tail_pointer(skb
) - (u8
*)mp_head
;
1586 rtm
->rtm_type
= RTN_MULTICAST
;
1594 int ip6mr_get_route(struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
1597 struct mfc6_cache
*cache
;
1598 struct rt6_info
*rt
= (struct rt6_info
*)skb
->dst
;
1600 read_lock(&mrt_lock
);
1601 cache
= ip6mr_cache_find(&rt
->rt6i_src
.addr
, &rt
->rt6i_dst
.addr
);
1604 struct sk_buff
*skb2
;
1605 struct ipv6hdr
*iph
;
1606 struct net_device
*dev
;
1610 read_unlock(&mrt_lock
);
1615 if (dev
== NULL
|| (vif
= ip6mr_find_vif(dev
)) < 0) {
1616 read_unlock(&mrt_lock
);
1620 /* really correct? */
1621 skb2
= alloc_skb(sizeof(struct ipv6hdr
), GFP_ATOMIC
);
1623 read_unlock(&mrt_lock
);
1627 skb_reset_transport_header(skb2
);
1629 skb_put(skb2
, sizeof(struct ipv6hdr
));
1630 skb_reset_network_header(skb2
);
1632 iph
= ipv6_hdr(skb2
);
1635 iph
->flow_lbl
[0] = 0;
1636 iph
->flow_lbl
[1] = 0;
1637 iph
->flow_lbl
[2] = 0;
1638 iph
->payload_len
= 0;
1639 iph
->nexthdr
= IPPROTO_NONE
;
1641 ipv6_addr_copy(&iph
->saddr
, &rt
->rt6i_src
.addr
);
1642 ipv6_addr_copy(&iph
->daddr
, &rt
->rt6i_dst
.addr
);
1644 err
= ip6mr_cache_unresolved(vif
, skb2
);
1645 read_unlock(&mrt_lock
);
1650 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
1651 cache
->mfc_flags
|= MFC_NOTIFY
;
1653 err
= ip6mr_fill_mroute(skb
, cache
, rtm
);
1654 read_unlock(&mrt_lock
);