2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@cymru.net>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Version: $Id: ipmr.c,v 1.40 1999/03/25 10:04:25 davem Exp $
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
25 * Carlos Picoto : PIMv1 Support
29 #include <linux/config.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <linux/types.h>
33 #include <linux/sched.h>
34 #include <linux/errno.h>
35 #include <linux/timer.h>
37 #include <linux/kernel.h>
38 #include <linux/fcntl.h>
39 #include <linux/stat.h>
40 #include <linux/socket.h>
42 #include <linux/inet.h>
43 #include <linux/netdevice.h>
44 #include <linux/inetdevice.h>
45 #include <linux/igmp.h>
46 #include <linux/proc_fs.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
50 #include <net/protocol.h>
51 #include <linux/skbuff.h>
56 #include <linux/notifier.h>
57 #include <linux/if_arp.h>
58 #include <linux/ip_fw.h>
59 #include <linux/firewall.h>
61 #include <net/checksum.h>
63 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
64 #define CONFIG_IP_PIMSM 1
68 * Multicast router control variables
71 static struct vif_device vif_table
[MAXVIFS
]; /* Devices */
72 static unsigned long vifc_map
; /* Active device map */
74 int mroute_do_assert
= 0; /* Set in PIM assert */
75 int mroute_do_pim
= 0;
76 static struct mfc_cache
*mfc_cache_array
[MFC_LINES
]; /* Forwarding cache */
77 int cache_resolve_queue_len
= 0; /* Size of unresolved */
79 static int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
);
80 static int ipmr_cache_report(struct sk_buff
*pkt
, vifi_t vifi
, int assert);
81 static int ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
);
83 extern struct inet_protocol pim_protocol
;
86 struct device
*ipmr_new_tunnel(struct vifctl
*v
)
88 struct device
*dev
= NULL
;
91 dev
= dev_get("tunl0");
97 struct ip_tunnel_parm p
;
98 struct in_device
*in_dev
;
100 memset(&p
, 0, sizeof(p
));
101 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
102 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
105 p
.iph
.protocol
= IPPROTO_IPIP
;
106 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
107 ifr
.ifr_ifru
.ifru_data
= (void*)&p
;
109 oldfs
= get_fs(); set_fs(KERNEL_DS
);
110 err
= dev
->do_ioctl(dev
, &ifr
, SIOCADDTUNNEL
);
113 if (err
== 0 && (dev
= dev_get(p
.name
)) != NULL
) {
114 dev
->flags
|= IFF_MULTICAST
;
116 in_dev
= dev
->ip_ptr
;
117 if (in_dev
== NULL
&& (in_dev
= inetdev_init(dev
)) == NULL
)
119 in_dev
->cnf
.rp_filter
= 0;
129 unregister_netdevice(dev
);
134 #ifdef CONFIG_IP_PIMSM
136 static int reg_vif_num
= -1;
137 static struct device
* reg_dev
;
139 static int reg_vif_xmit(struct sk_buff
*skb
, struct device
*dev
)
141 ((struct net_device_stats
*)dev
->priv
)->tx_bytes
+= skb
->len
;
142 ((struct net_device_stats
*)dev
->priv
)->tx_packets
++;
143 ipmr_cache_report(skb
, reg_vif_num
, IGMPMSG_WHOLEPKT
);
148 static struct net_device_stats
*reg_vif_get_stats(struct device
*dev
)
150 return (struct net_device_stats
*)dev
->priv
;
154 struct device
*ipmr_reg_vif(struct vifctl
*v
)
157 struct in_device
*in_dev
;
160 size
= sizeof(*dev
) + IFNAMSIZ
+ sizeof(struct net_device_stats
);
161 dev
= kmalloc(size
, GFP_KERNEL
);
165 memset(dev
, 0, size
);
168 dev
->name
= dev
->priv
+ sizeof(struct net_device_stats
);
170 strcpy(dev
->name
, "pimreg");
172 dev
->type
= ARPHRD_PIMREG
;
173 dev
->mtu
= 1500 - sizeof(struct iphdr
) - 8;
174 dev
->flags
= IFF_NOARP
;
175 dev
->hard_start_xmit
= reg_vif_xmit
;
176 dev
->get_stats
= reg_vif_get_stats
;
180 if (register_netdevice(dev
)) {
187 if ((in_dev
= inetdev_init(dev
)) == NULL
)
190 in_dev
->cnf
.rp_filter
= 0;
200 unregister_netdevice(dev
);
211 static int vif_delete(int vifi
)
213 struct vif_device
*v
;
215 struct in_device
*in_dev
;
217 if (vifi
< 0 || vifi
>= maxvif
|| !(vifc_map
&(1<<vifi
)))
218 return -EADDRNOTAVAIL
;
220 v
= &vif_table
[vifi
];
224 vifc_map
&= ~(1<<vifi
);
226 if ((in_dev
= dev
->ip_ptr
) != NULL
)
227 in_dev
->cnf
.mc_forwarding
= 0;
229 dev_set_allmulti(dev
, -1);
230 ip_rt_multicast_event(in_dev
);
232 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
)) {
233 #ifdef CONFIG_IP_PIMSM
234 if (vifi
== reg_vif_num
) {
239 unregister_netdevice(dev
);
240 if (v
->flags
&VIFF_REGISTER
)
244 if (vifi
+1 == maxvif
) {
246 for (tmp
=vifi
-1; tmp
>=0; tmp
--) {
247 if (vifc_map
&(1<<tmp
))
255 static void ipmr_update_threshoulds(struct mfc_cache
*cache
, unsigned char *ttls
)
261 cache
->mfc_minvif
= MAXVIFS
;
262 cache
->mfc_maxvif
= 0;
263 memset(cache
->mfc_ttls
, 255, MAXVIFS
);
265 for (vifi
=0; vifi
<maxvif
; vifi
++) {
266 if (vifc_map
&(1<<vifi
) && ttls
[vifi
] && ttls
[vifi
] < 255) {
267 cache
->mfc_ttls
[vifi
] = ttls
[vifi
];
268 if (cache
->mfc_minvif
> vifi
)
269 cache
->mfc_minvif
= vifi
;
270 if (cache
->mfc_maxvif
<= vifi
)
271 cache
->mfc_maxvif
= vifi
+ 1;
278 * Delete a multicast route cache entry
281 static void ipmr_cache_delete(struct mfc_cache
*cache
)
285 struct mfc_cache
**cp
;
288 * Find the right cache line
291 line
=MFC_HASH(cache
->mfc_mcastgrp
,cache
->mfc_origin
);
292 cp
=&(mfc_cache_array
[line
]);
294 if(cache
->mfc_flags
&MFC_QUEUED
)
295 del_timer(&cache
->mfc_timer
);
312 * Free the buffer. If it is a pending resolution
313 * clean up the other resources.
316 if(cache
->mfc_flags
&MFC_QUEUED
)
318 cache_resolve_queue_len
--;
319 while((skb
=skb_dequeue(&cache
->mfc_unresolved
))) {
320 #ifdef CONFIG_RTNETLINK
321 if (skb
->nh
.iph
->version
== 0) {
322 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
323 nlh
->nlmsg_type
= NLMSG_ERROR
;
324 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
325 skb_trim(skb
, nlh
->nlmsg_len
);
326 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -ETIMEDOUT
;
327 netlink_unicast(rtnl
, skb
, NETLINK_CB(skb
).dst_pid
, MSG_DONTWAIT
);
333 kfree_s(cache
,sizeof(cache
));
340 static void ipmr_cache_timer(unsigned long data
)
342 struct mfc_cache
*cache
=(struct mfc_cache
*)data
;
343 ipmr_cache_delete(cache
);
347 * Insert a multicast cache entry
350 static void ipmr_cache_insert(struct mfc_cache
*c
)
352 int line
=MFC_HASH(c
->mfc_mcastgrp
,c
->mfc_origin
);
353 c
->next
=mfc_cache_array
[line
];
354 mfc_cache_array
[line
]=c
;
358 * Find a multicast cache entry
361 struct mfc_cache
*ipmr_cache_find(__u32 origin
, __u32 mcastgrp
)
363 int line
=MFC_HASH(mcastgrp
,origin
);
364 struct mfc_cache
*cache
;
366 cache
=mfc_cache_array
[line
];
369 if(cache
->mfc_origin
==origin
&& cache
->mfc_mcastgrp
==mcastgrp
)
377 * Allocate a multicast cache entry
380 static struct mfc_cache
*ipmr_cache_alloc(int priority
)
382 struct mfc_cache
*c
=(struct mfc_cache
*)kmalloc(sizeof(struct mfc_cache
), priority
);
385 memset(c
, 0, sizeof(*c
));
386 skb_queue_head_init(&c
->mfc_unresolved
);
387 init_timer(&c
->mfc_timer
);
388 c
->mfc_timer
.data
=(long)c
;
389 c
->mfc_timer
.function
=ipmr_cache_timer
;
390 c
->mfc_minvif
= MAXVIFS
;
395 * A cache entry has gone into a resolved state from queued
398 static void ipmr_cache_resolve(struct mfc_cache
*cache
)
405 * Kill the queue entry timer.
408 del_timer(&cache
->mfc_timer
);
410 if (cache
->mfc_flags
&MFC_QUEUED
) {
411 cache
->mfc_flags
&=~MFC_QUEUED
;
412 cache_resolve_queue_len
--;
418 * Play the pending entries through our router
420 while((skb
=skb_dequeue(&cache
->mfc_unresolved
))) {
421 #ifdef CONFIG_RTNETLINK
422 if (skb
->nh
.iph
->version
== 0) {
424 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
426 if (ipmr_fill_mroute(skb
, cache
, NLMSG_DATA(nlh
)) > 0) {
427 nlh
->nlmsg_len
= skb
->tail
- (u8
*)nlh
;
429 nlh
->nlmsg_type
= NLMSG_ERROR
;
430 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
431 skb_trim(skb
, nlh
->nlmsg_len
);
432 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -EMSGSIZE
;
434 err
= netlink_unicast(rtnl
, skb
, NETLINK_CB(skb
).pid
, MSG_DONTWAIT
);
437 ip_mr_forward(skb
, cache
, 0);
442 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
443 * expects the following bizarre scheme..
446 static int ipmr_cache_report(struct sk_buff
*pkt
, vifi_t vifi
, int assert)
449 int ihl
= pkt
->nh
.iph
->ihl
<<2;
450 struct igmphdr
*igmp
;
454 if (mroute_socket
==NULL
)
457 #ifdef CONFIG_IP_PIMSM
458 if (assert == IGMPMSG_WHOLEPKT
)
459 skb
= skb_realloc_headroom(pkt
, sizeof(struct iphdr
));
462 skb
= alloc_skb(128, GFP_ATOMIC
);
467 #ifdef CONFIG_IP_PIMSM
468 if (assert == IGMPMSG_WHOLEPKT
) {
469 /* Ugly, but we have no choice with this interface.
470 Duplicate old header, fix ihl, length etc.
471 And all this only to mangle msg->im_msgtype and
472 to set msg->im_mbz to "mbz" :-)
474 msg
= (struct igmpmsg
*)skb_push(skb
, sizeof(struct iphdr
));
475 skb
->nh
.raw
= skb
->h
.raw
= (u8
*)msg
;
476 memcpy(msg
, pkt
->nh
.raw
, sizeof(struct iphdr
));
477 msg
->im_msgtype
= IGMPMSG_WHOLEPKT
;
479 msg
->im_vif
= reg_vif_num
;
480 skb
->nh
.iph
->ihl
= sizeof(struct iphdr
) >> 2;
481 skb
->nh
.iph
->tot_len
= htons(ntohs(pkt
->nh
.iph
->tot_len
) + sizeof(struct iphdr
));
490 skb
->nh
.iph
= (struct iphdr
*)skb_put(skb
, ihl
);
491 memcpy(skb
->data
,pkt
->data
,ihl
);
492 skb
->nh
.iph
->protocol
= 0; /* Flag to the kernel this is a route add */
493 msg
= (struct igmpmsg
*)skb
->nh
.iph
;
495 skb
->dst
= dst_clone(pkt
->dst
);
501 igmp
=(struct igmphdr
*)skb_put(skb
,sizeof(struct igmphdr
));
503 msg
->im_msgtype
= assert;
505 skb
->nh
.iph
->tot_len
=htons(skb
->len
); /* Fix the length */
506 skb
->h
.raw
= skb
->nh
.raw
;
512 if ((ret
=sock_queue_rcv_skb(mroute_socket
,skb
))<0) {
514 printk(KERN_WARNING
"mroute: pending queue full, dropping entries.\n");
522 * Queue a packet for resolution
525 static int ipmr_cache_unresolved(struct mfc_cache
*cache
, vifi_t vifi
, struct sk_buff
*skb
)
530 * Create a new entry if allowable
532 if(cache_resolve_queue_len
>=10 || (cache
=ipmr_cache_alloc(GFP_ATOMIC
))==NULL
)
538 * Fill in the new cache entry
540 cache
->mfc_parent
=ALL_VIFS
;
541 cache
->mfc_origin
=skb
->nh
.iph
->saddr
;
542 cache
->mfc_mcastgrp
=skb
->nh
.iph
->daddr
;
543 cache
->mfc_flags
=MFC_QUEUED
;
545 * Link to the unresolved list
547 ipmr_cache_insert(cache
);
548 cache_resolve_queue_len
++;
550 * Fire off the expiry timer
552 cache
->mfc_timer
.expires
=jiffies
+10*HZ
;
553 add_timer(&cache
->mfc_timer
);
555 * Reflect first query at mrouted.
559 /* If the report failed throw the cache entry
562 OK, OK, Brad. Only do not forget to free skb
565 if (ipmr_cache_report(skb
, vifi
, IGMPMSG_NOCACHE
)<0) {
566 ipmr_cache_delete(cache
);
573 * See if we can append the packet
575 if(cache
->mfc_queuelen
>3)
580 cache
->mfc_queuelen
++;
581 skb_queue_tail(&cache
->mfc_unresolved
,skb
);
586 * MFC cache manipulation by user space mroute daemon
589 int ipmr_mfc_modify(int action
, struct mfcctl
*mfc
)
591 struct mfc_cache
*cache
;
593 if(!MULTICAST(mfc
->mfcc_mcastgrp
.s_addr
))
596 * Find the cache line
601 cache
=ipmr_cache_find(mfc
->mfcc_origin
.s_addr
,mfc
->mfcc_mcastgrp
.s_addr
);
606 if(action
==MRT_DEL_MFC
)
610 ipmr_cache_delete(cache
);
621 * Update the cache, see if it frees a pending queue
624 cache
->mfc_flags
|=MFC_RESOLVED
;
625 cache
->mfc_parent
=mfc
->mfcc_parent
;
626 ipmr_update_threshoulds(cache
, mfc
->mfcc_ttls
);
629 * Check to see if we resolved a queued list. If so we
630 * need to send on the frames and tidy up.
633 if(cache
->mfc_flags
&MFC_QUEUED
)
634 ipmr_cache_resolve(cache
); /* Unhook & send the frames */
640 * Unsolicited update - that's ok, add anyway.
644 cache
=ipmr_cache_alloc(GFP_ATOMIC
);
650 cache
->mfc_flags
=MFC_RESOLVED
;
651 cache
->mfc_origin
=mfc
->mfcc_origin
.s_addr
;
652 cache
->mfc_mcastgrp
=mfc
->mfcc_mcastgrp
.s_addr
;
653 cache
->mfc_parent
=mfc
->mfcc_parent
;
654 ipmr_update_threshoulds(cache
, mfc
->mfcc_ttls
);
655 ipmr_cache_insert(cache
);
660 static void mrtsock_destruct(struct sock
*sk
)
662 if (sk
== mroute_socket
) {
663 ipv4_devconf
.mc_forwarding
= 0;
673 * Socket options and virtual interface manipulation. The whole
674 * virtual interface system is a complete heap, but unfortunately
675 * that's how BSD mrouted happens to think. Maybe one day with a proper
676 * MOSPF/PIM router set up we can clean this up.
679 int ip_mroute_setsockopt(struct sock
*sk
,int optname
,char *optval
,int optlen
)
684 if(optname
!=MRT_INIT
)
686 if(sk
!=mroute_socket
)
693 if(sk
->type
!=SOCK_RAW
|| sk
->num
!=IPPROTO_IGMP
)
695 if(optlen
!=sizeof(int))
699 if (get_user(opt
,(int *)optval
))
707 ipv4_devconf
.mc_forwarding
= 1;
708 if (ip_ra_control(sk
, 1, mrtsock_destruct
) == 0)
710 mrtsock_destruct(sk
);
713 return ip_ra_control(sk
, 0, NULL
);
716 if(optlen
!=sizeof(vif
))
718 if (copy_from_user(&vif
,optval
,sizeof(vif
)))
720 if(vif
.vifc_vifi
>= MAXVIFS
)
722 if(optname
==MRT_ADD_VIF
)
724 struct vif_device
*v
=&vif_table
[vif
.vifc_vifi
];
726 struct in_device
*in_dev
;
729 if (vifc_map
&(1<<vif
.vifc_vifi
))
732 switch (vif
.vifc_flags
) {
733 #ifdef CONFIG_IP_PIMSM
737 * Special Purpose VIF in PIM
738 * All the packets will be sent to the daemon
740 if (reg_vif_num
>= 0)
742 reg_vif_num
= vif
.vifc_vifi
;
743 dev
= ipmr_reg_vif(&vif
);
751 dev
= ipmr_new_tunnel(&vif
);
756 dev
=ip_dev_find(vif
.vifc_lcl_addr
.s_addr
);
758 return -EADDRNOTAVAIL
;
762 printk(KERN_DEBUG
"ipmr_add_vif: flags %02x\n", vif
.vifc_flags
);
767 if ((in_dev
= dev
->ip_ptr
) == NULL
)
768 return -EADDRNOTAVAIL
;
769 if (in_dev
->cnf
.mc_forwarding
)
771 in_dev
->cnf
.mc_forwarding
= 1;
772 dev_set_allmulti(dev
, +1);
773 ip_rt_multicast_event(in_dev
);
776 * Fill in the VIF structures
779 v
->rate_limit
=vif
.vifc_rate_limit
;
780 v
->local
=vif
.vifc_lcl_addr
.s_addr
;
781 v
->remote
=vif
.vifc_rmt_addr
.s_addr
;
782 v
->flags
=vif
.vifc_flags
;
783 v
->threshold
=vif
.vifc_threshold
;
789 v
->link
= dev
->ifindex
;
790 if (vif
.vifc_flags
&(VIFF_TUNNEL
|VIFF_REGISTER
))
791 v
->link
= dev
->iflink
;
792 vifc_map
|=(1<<vif
.vifc_vifi
);
793 if (vif
.vifc_vifi
+1 > maxvif
)
794 maxvif
= vif
.vifc_vifi
+1;
800 ret
= vif_delete(vif
.vifc_vifi
);
806 * Manipulate the forwarding caches. These live
807 * in a sort of kernel/user symbiosis.
811 if(optlen
!=sizeof(mfc
))
813 if (copy_from_user(&mfc
,optval
, sizeof(mfc
)))
815 return ipmr_mfc_modify(optname
, &mfc
);
817 * Control PIM assert.
822 if(get_user(v
,(int *)optval
))
824 mroute_do_assert
=(v
)?1:0;
827 #ifdef CONFIG_IP_PIMSM
831 if(get_user(v
,(int *)optval
))
834 if (v
!= mroute_do_pim
) {
836 mroute_do_assert
= v
;
837 #ifdef CONFIG_IP_PIMSM_V2
839 inet_add_protocol(&pim_protocol
);
841 inet_del_protocol(&pim_protocol
);
848 * Spurious command, or MRT_VERSION which you cannot
857 * Getsock opt support for the multicast routing system.
860 int ip_mroute_getsockopt(struct sock
*sk
,int optname
,char *optval
,int *optlen
)
865 if(sk
!=mroute_socket
)
867 if(optname
!=MRT_VERSION
&&
868 #ifdef CONFIG_IP_PIMSM
874 if(get_user(olr
, optlen
))
877 olr
=min(olr
,sizeof(int));
878 if(put_user(olr
,optlen
))
880 if(optname
==MRT_VERSION
)
882 #ifdef CONFIG_IP_PIMSM
883 else if(optname
==MRT_PIM
)
887 val
=mroute_do_assert
;
888 if(copy_to_user(optval
,&val
,olr
))
894 * The IP multicast ioctl support routines.
897 int ipmr_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
899 struct sioc_sg_req sr
;
900 struct sioc_vif_req vr
;
901 struct vif_device
*vif
;
907 if (copy_from_user(&vr
,(void *)arg
,sizeof(vr
)))
911 vif
=&vif_table
[vr
.vifi
];
912 if(vifc_map
&(1<<vr
.vifi
))
914 vr
.icount
=vif
->pkt_in
;
915 vr
.ocount
=vif
->pkt_out
;
916 vr
.ibytes
=vif
->bytes_in
;
917 vr
.obytes
=vif
->bytes_out
;
918 if (copy_to_user((void *)arg
,&vr
,sizeof(vr
)))
922 return -EADDRNOTAVAIL
;
924 if (copy_from_user(&sr
,(void *)arg
,sizeof(sr
)))
926 for (c
= mfc_cache_array
[MFC_HASH(sr
.grp
.s_addr
, sr
.src
.s_addr
)];
928 if (sr
.grp
.s_addr
== c
->mfc_mcastgrp
&&
929 sr
.src
.s_addr
== c
->mfc_origin
) {
930 sr
.pktcnt
= c
->mfc_pkt
;
931 sr
.bytecnt
= c
->mfc_bytes
;
932 sr
.wrong_if
= c
->mfc_wrong_if
;
933 if (copy_to_user((void *)arg
,&sr
,sizeof(sr
)))
938 return -EADDRNOTAVAIL
;
945 * Close the multicast socket, and clear the vif tables etc
948 void mroute_close(struct sock
*sk
)
953 * Shut down all active vif entries
956 for(i
=0; i
<maxvif
; i
++)
963 for(i
=0;i
<MFC_LINES
;i
++)
966 while(mfc_cache_array
[i
]!=NULL
)
967 ipmr_cache_delete(mfc_cache_array
[i
]);
972 static int ipmr_device_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
974 struct vif_device
*v
;
976 if (event
!= NETDEV_UNREGISTER
)
979 for(ct
=0;ct
<maxvif
;ct
++) {
980 if (vifc_map
&(1<<ct
) && v
->dev
==ptr
)
988 static struct notifier_block ip_mr_notifier
={
995 * Encapsulate a packet by attaching a valid IPIP header to it.
996 * This avoids tunnel drivers and other mess and gives us the speed so
997 * important for multicast video.
1000 static void ip_encap(struct sk_buff
*skb
, u32 saddr
, u32 daddr
)
1002 struct iphdr
*iph
= (struct iphdr
*)skb_push(skb
,sizeof(struct iphdr
));
1005 iph
->tos
= skb
->nh
.iph
->tos
;
1006 iph
->ttl
= skb
->nh
.iph
->ttl
;
1010 iph
->protocol
= IPPROTO_IPIP
;
1012 iph
->tot_len
= htons(skb
->len
);
1013 iph
->id
= htons(ip_id_count
++);
1016 skb
->h
.ipiph
= skb
->nh
.iph
;
1021 * Processing handlers for ipmr_forward
1024 static void ipmr_queue_xmit(struct sk_buff
*skb
, struct mfc_cache
*c
,
1027 struct iphdr
*iph
= skb
->nh
.iph
;
1028 struct vif_device
*vif
= &vif_table
[vifi
];
1032 struct sk_buff
*skb2
;
1034 #ifdef CONFIG_IP_PIMSM
1035 if (vif
->flags
& VIFF_REGISTER
) {
1037 vif
->bytes_out
+=skb
->len
;
1038 ((struct net_device_stats
*)vif
->dev
->priv
)->tx_bytes
+= skb
->len
;
1039 ((struct net_device_stats
*)vif
->dev
->priv
)->tx_packets
++;
1040 ipmr_cache_report(skb
, vifi
, IGMPMSG_WHOLEPKT
);
1045 if (vif
->flags
&VIFF_TUNNEL
) {
1046 if (ip_route_output(&rt
, vif
->remote
, vif
->local
, RT_TOS(iph
->tos
), vif
->link
))
1048 encap
= sizeof(struct iphdr
);
1050 if (ip_route_output(&rt
, iph
->daddr
, 0, RT_TOS(iph
->tos
), vif
->link
))
1054 dev
= rt
->u
.dst
.dev
;
1056 if (skb
->len
+encap
> rt
->u
.dst
.pmtu
&& (ntohs(iph
->frag_off
) & IP_DF
)) {
1057 /* Do not fragment multicasts. Alas, IPv4 does not
1058 allow to send ICMP, so that packets will disappear
1062 ip_statistics
.IpFragFails
++;
1067 encap
+= dev
->hard_header_len
;
1069 if (skb_headroom(skb
) < encap
|| skb_cloned(skb
) || !last
)
1070 skb2
= skb_realloc_headroom(skb
, (encap
+ 15)&~15);
1071 else if (atomic_read(&skb
->users
) != 1)
1072 skb2
= skb_clone(skb
, GFP_ATOMIC
);
1074 atomic_inc(&skb
->users
);
1084 vif
->bytes_out
+=skb
->len
;
1086 dst_release(skb2
->dst
);
1087 skb2
->dst
= &rt
->u
.dst
;
1089 ip_decrease_ttl(iph
);
1091 #ifdef CONFIG_FIREWALL
1092 if (call_fw_firewall(PF_INET
, vif
->dev
, skb2
->nh
.iph
, NULL
, &skb2
) < FW_ACCEPT
) {
1096 if (call_out_firewall(PF_INET
, vif
->dev
, skb2
->nh
.iph
, NULL
, &skb2
) < FW_ACCEPT
) {
1101 if (vif
->flags
& VIFF_TUNNEL
) {
1102 ip_encap(skb2
, vif
->local
, vif
->remote
);
1103 #ifdef CONFIG_FIREWALL
1104 /* Double output firewalling on tunnels: one is on tunnel
1105 another one is on real device.
1107 if (call_out_firewall(PF_INET
, dev
, skb2
->nh
.iph
, NULL
, &skb2
) < FW_ACCEPT
) {
1112 ((struct ip_tunnel
*)vif
->dev
->priv
)->stat
.tx_packets
++;
1113 ((struct ip_tunnel
*)vif
->dev
->priv
)->stat
.tx_bytes
+=skb2
->len
;
1116 IPCB(skb2
)->flags
|= IPSKB_FORWARDED
;
1120 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1121 * not only before forwarding, but after forwarding on all output
1122 * interfaces. It is clear, if mrouter runs a multicasting
1123 * program, it should receive packets not depending to what interface
1124 * program is joined.
1125 * If we will not make it, the program will have to join on all
1126 * interfaces. On the other hand, multihoming host (or router, but
1127 * not mrouter) cannot join to more than one interface - it will
1128 * result in receiving multiple packets.
1130 if (skb2
->len
<= rt
->u
.dst
.pmtu
)
1131 skb2
->dst
->output(skb2
);
1133 ip_fragment(skb2
, skb2
->dst
->output
);
1136 int ipmr_find_vif(struct device
*dev
)
1139 for (ct
=0; ct
<maxvif
; ct
++) {
1140 if (vifc_map
&(1<<ct
) && vif_table
[ct
].dev
== dev
)
1146 /* "local" means that we should preserve one skb (for local delivery) */
1148 int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
)
1153 vif
= cache
->mfc_parent
;
1155 cache
->mfc_bytes
+= skb
->len
;
1158 * Wrong interface: drop packet and (maybe) send PIM assert.
1160 if (vif_table
[vif
].dev
!= skb
->dev
) {
1163 if (((struct rtable
*)skb
->dst
)->key
.iif
== 0) {
1164 /* It is our own packet, looped back.
1165 Very complicated situation...
1167 The best workaround until routing daemons will be
1168 fixed is not to redistribute packet, if it was
1169 send through wrong interface. It means, that
1170 multicast applications WILL NOT work for
1171 (S,G), which have default multicast route pointing
1172 to wrong oif. In any case, it is not a good
1173 idea to use multicasting applications on router.
1178 cache
->mfc_wrong_if
++;
1179 true_vifi
= ipmr_find_vif(skb
->dev
);
1181 if (true_vifi
< MAXVIFS
&& mroute_do_assert
&&
1182 /* pimsm uses asserts, when switching from RPT to SPT,
1183 so that we cannot check that packet arrived on an oif.
1184 It is bad, but otherwise we would need to move pretty
1185 large chunk of pimd to kernel. Ough... --ANK
1187 (mroute_do_pim
|| cache
->mfc_ttls
[true_vifi
] < 255) &&
1188 jiffies
- cache
->mfc_last_assert
> MFC_ASSERT_THRESH
) {
1189 cache
->mfc_last_assert
= jiffies
;
1190 ipmr_cache_report(skb
, true_vifi
, IGMPMSG_WRONGVIF
);
1195 vif_table
[vif
].pkt_in
++;
1196 vif_table
[vif
].bytes_in
+=skb
->len
;
1201 for (ct
= cache
->mfc_maxvif
-1; ct
>= cache
->mfc_minvif
; ct
--) {
1202 if (skb
->nh
.iph
->ttl
> cache
->mfc_ttls
[ct
]) {
1204 ipmr_queue_xmit(skb
, cache
, psend
, 0);
1209 ipmr_queue_xmit(skb
, cache
, psend
, !local
);
1219 * Multicast packets for forwarding arrive here
1222 int ip_mr_input(struct sk_buff
*skb
)
1224 struct mfc_cache
*cache
;
1225 int local
= ((struct rtable
*)skb
->dst
)->rt_flags
&RTCF_LOCAL
;
1227 /* Packet is looped back after forward, it should not be
1228 forwarded second time, but still can be delivered locally.
1230 if (IPCB(skb
)->flags
&IPSKB_FORWARDED
)
1234 if (IPCB(skb
)->opt
.router_alert
) {
1235 if (ip_call_ra_chain(skb
))
1237 } else if (skb
->nh
.iph
->protocol
== IPPROTO_IGMP
&& mroute_socket
) {
1238 /* IGMPv1 (and broken IGMPv2 implementations sort of
1239 Cisco IOS <= 11.2(8)) do not put router alert
1240 option to IGMP packets destined to routable
1241 groups. It is very bad, because it means
1242 that we can forward NO IGMP messages.
1244 raw_rcv(mroute_socket
, skb
);
1249 cache
= ipmr_cache_find(skb
->nh
.iph
->saddr
, skb
->nh
.iph
->daddr
);
1252 * No usable cache entry
1255 if (cache
==NULL
|| (cache
->mfc_flags
&MFC_QUEUED
)) {
1259 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1260 ip_local_deliver(skb
);
1266 vif
= ipmr_find_vif(skb
->dev
);
1267 if (vif
!= ALL_VIFS
) {
1268 ipmr_cache_unresolved(cache
, vif
, skb
);
1275 ip_mr_forward(skb
, cache
, local
);
1278 return ip_local_deliver(skb
);
1283 return ip_local_deliver(skb
);
1288 #ifdef CONFIG_IP_PIMSM_V1
1290 * Handle IGMP messages of PIMv1
1293 int pim_rcv_v1(struct sk_buff
* skb
, unsigned short len
)
1295 struct igmphdr
*pim
= (struct igmphdr
*)skb
->h
.raw
;
1296 struct iphdr
*encap
;
1298 if (!mroute_do_pim
||
1299 len
< sizeof(*pim
) + sizeof(*encap
) ||
1300 pim
->group
!= PIM_V1_VERSION
|| pim
->code
!= PIM_V1_REGISTER
||
1306 encap
= (struct iphdr
*)(skb
->h
.raw
+ sizeof(struct igmphdr
));
1309 a. packet is really destinted to a multicast group
1310 b. packet is not a NULL-REGISTER
1311 c. packet is not truncated
1313 if (!MULTICAST(encap
->daddr
) ||
1314 ntohs(encap
->tot_len
) == 0 ||
1315 ntohs(encap
->tot_len
) + sizeof(*pim
) > len
) {
1319 skb
->mac
.raw
= skb
->nh
.raw
;
1320 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1321 skb
->nh
.iph
= (struct iphdr
*)skb
->data
;
1323 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
1324 skb
->protocol
= __constant_htons(ETH_P_IP
);
1326 skb
->pkt_type
= PACKET_HOST
;
1327 dst_release(skb
->dst
);
1329 ((struct net_device_stats
*)reg_dev
->priv
)->rx_bytes
+= skb
->len
;
1330 ((struct net_device_stats
*)reg_dev
->priv
)->rx_packets
++;
1336 #ifdef CONFIG_IP_PIMSM_V2
1337 int pim_rcv(struct sk_buff
* skb
, unsigned short len
)
1339 struct pimreghdr
*pim
= (struct pimreghdr
*)skb
->h
.raw
;
1340 struct iphdr
*encap
;
1342 if (len
< sizeof(*pim
) + sizeof(*encap
) ||
1343 pim
->type
!= ((PIM_VERSION
<<4)|(PIM_REGISTER
)) ||
1344 (pim
->flags
&PIM_NULL_REGISTER
) ||
1346 ip_compute_csum((void *)pim
, len
)) {
1351 /* check if the inner packet is destined to mcast group */
1352 encap
= (struct iphdr
*)(skb
->h
.raw
+ sizeof(struct pimreghdr
));
1353 if (!MULTICAST(encap
->daddr
) ||
1354 ntohs(encap
->tot_len
) == 0 ||
1355 ntohs(encap
->tot_len
) + sizeof(*pim
) > len
) {
1359 skb
->mac
.raw
= skb
->nh
.raw
;
1360 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1361 skb
->nh
.iph
= (struct iphdr
*)skb
->data
;
1363 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
1364 skb
->protocol
= __constant_htons(ETH_P_IP
);
1366 skb
->pkt_type
= PACKET_HOST
;
1367 dst_release(skb
->dst
);
1368 ((struct net_device_stats
*)reg_dev
->priv
)->rx_bytes
+= skb
->len
;
1369 ((struct net_device_stats
*)reg_dev
->priv
)->rx_packets
++;
1376 #ifdef CONFIG_RTNETLINK
1379 ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
)
1382 struct rtnexthop
*nhp
;
1383 struct device
*dev
= vif_table
[c
->mfc_parent
].dev
;
1385 struct rtattr
*mp_head
;
1388 RTA_PUT(skb
, RTA_IIF
, 4, &dev
->ifindex
);
1390 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_LENGTH(0));
1392 for (ct
= c
->mfc_minvif
; ct
< c
->mfc_maxvif
; ct
++) {
1393 if (c
->mfc_ttls
[ct
] < 255) {
1394 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
1395 goto rtattr_failure
;
1396 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
1397 nhp
->rtnh_flags
= 0;
1398 nhp
->rtnh_hops
= c
->mfc_ttls
[ct
];
1399 nhp
->rtnh_ifindex
= vif_table
[ct
].dev
->ifindex
;
1400 nhp
->rtnh_len
= sizeof(*nhp
);
1403 mp_head
->rta_type
= RTA_MULTIPATH
;
1404 mp_head
->rta_len
= skb
->tail
- (u8
*)mp_head
;
1405 rtm
->rtm_type
= RTN_MULTICAST
;
1409 skb_trim(skb
, b
- skb
->data
);
1413 int ipmr_get_route(struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
1415 struct mfc_cache
*cache
;
1416 struct rtable
*rt
= (struct rtable
*)skb
->dst
;
1419 cache
= ipmr_cache_find(rt
->rt_src
, rt
->rt_dst
);
1420 if (cache
==NULL
|| (cache
->mfc_flags
&MFC_QUEUED
)) {
1431 if (dev
== NULL
|| (vif
= ipmr_find_vif(dev
)) == ALL_VIFS
) {
1435 skb
->nh
.raw
= skb_push(skb
, sizeof(struct iphdr
));
1436 skb
->nh
.iph
->ihl
= sizeof(struct iphdr
)>>2;
1437 skb
->nh
.iph
->saddr
= rt
->rt_src
;
1438 skb
->nh
.iph
->daddr
= rt
->rt_dst
;
1439 skb
->nh
.iph
->version
= 0;
1440 err
= ipmr_cache_unresolved(cache
, vif
, skb
);
1444 /* Resolved cache entry is not changed by net bh,
1445 so that we are allowed to enable it.
1449 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
1450 cache
->mfc_flags
|= MFC_NOTIFY
;
1451 return ipmr_fill_mroute(skb
, cache
, rtm
);
1456 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1459 int ipmr_vif_info(char *buffer
, char **start
, off_t offset
, int length
, int dummy
)
1461 struct vif_device
*vif
;
1468 len
+= sprintf(buffer
,
1469 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1472 for (ct
=0;ct
<maxvif
;ct
++)
1474 char *name
= "none";
1476 if(!(vifc_map
&(1<<ct
)))
1479 name
= vif
->dev
->name
;
1480 size
= sprintf(buffer
+len
, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1481 ct
, name
, vif
->bytes_in
, vif
->pkt_in
, vif
->bytes_out
, vif
->pkt_out
,
1482 vif
->flags
, vif
->local
, vif
->remote
);
1490 if(pos
>offset
+length
)
1494 *start
=buffer
+(offset
-begin
);
1495 len
-=(offset
-begin
);
1501 int ipmr_mfc_info(char *buffer
, char **start
, off_t offset
, int length
, int dummy
)
1503 struct mfc_cache
*mfc
;
1510 len
+= sprintf(buffer
,
1511 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1514 for (ct
=0;ct
<MFC_LINES
;ct
++)
1517 mfc
=mfc_cache_array
[ct
];
1523 * Interface forwarding map
1525 size
= sprintf(buffer
+len
, "%08lX %08lX %-3d %8ld %8ld %8ld",
1526 (unsigned long)mfc
->mfc_mcastgrp
,
1527 (unsigned long)mfc
->mfc_origin
,
1528 mfc
->mfc_parent
== ALL_VIFS
? -1 : mfc
->mfc_parent
,
1529 (mfc
->mfc_flags
& MFC_QUEUED
) ? mfc
->mfc_unresolved
.qlen
: mfc
->mfc_pkt
,
1532 for(n
=mfc
->mfc_minvif
;n
<mfc
->mfc_maxvif
;n
++)
1534 if(vifc_map
&(1<<n
) && mfc
->mfc_ttls
[n
] < 255)
1535 size
+= sprintf(buffer
+len
+size
, " %2d:%-3d", n
, mfc
->mfc_ttls
[n
]);
1537 size
+= sprintf(buffer
+len
+size
, "\n");
1545 if(pos
>offset
+length
)
1555 *start
=buffer
+(offset
-begin
);
1556 len
-=(offset
-begin
);
1565 #ifdef CONFIG_PROC_FS
1566 static struct proc_dir_entry proc_net_ipmr_vif
= {
1567 PROC_NET_IPMR_VIF
, 9 ,"ip_mr_vif",
1568 S_IFREG
| S_IRUGO
, 1, 0, 0,
1569 0, &proc_net_inode_operations
,
1572 static struct proc_dir_entry proc_net_ipmr_mfc
= {
1573 PROC_NET_IPMR_MFC
, 11 ,"ip_mr_cache",
1574 S_IFREG
| S_IRUGO
, 1, 0, 0,
1575 0, &proc_net_inode_operations
,
1580 #ifdef CONFIG_IP_PIMSM_V2
1581 struct inet_protocol pim_protocol
=
1583 pim_rcv
, /* PIM handler */
1584 NULL
, /* PIM error control */
1586 IPPROTO_PIM
, /* protocol ID */
1595 * Setup for IP multicast routing
1598 __initfunc(void ip_mr_init(void))
1600 printk(KERN_INFO
"Linux IP multicast router 0.06 plus PIM-SM\n");
1601 register_netdevice_notifier(&ip_mr_notifier
);
1602 #ifdef CONFIG_PROC_FS
1603 proc_net_register(&proc_net_ipmr_vif
);
1604 proc_net_register(&proc_net_ipmr_mfc
);