2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@cymru.net>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Version: $Id: ipmr.c,v 1.38 1999/01/12 14:34:40 davem Exp $
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
25 * Carlos Picoto : PIMv1 Support
29 #include <linux/config.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <linux/types.h>
33 #include <linux/sched.h>
34 #include <linux/errno.h>
35 #include <linux/timer.h>
37 #include <linux/kernel.h>
38 #include <linux/fcntl.h>
39 #include <linux/stat.h>
40 #include <linux/socket.h>
42 #include <linux/inet.h>
43 #include <linux/netdevice.h>
44 #include <linux/inetdevice.h>
45 #include <linux/igmp.h>
46 #include <linux/proc_fs.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
50 #include <net/protocol.h>
51 #include <linux/skbuff.h>
56 #include <linux/notifier.h>
57 #include <linux/if_arp.h>
58 #include <linux/ip_fw.h>
59 #include <linux/firewall.h>
61 #include <net/checksum.h>
63 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
64 #define CONFIG_IP_PIMSM 1
68 * Multicast router control variables
71 static struct vif_device vif_table
[MAXVIFS
]; /* Devices */
72 static unsigned long vifc_map
; /* Active device map */
74 int mroute_do_assert
= 0; /* Set in PIM assert */
75 int mroute_do_pim
= 0;
76 static struct mfc_cache
*mfc_cache_array
[MFC_LINES
]; /* Forwarding cache */
77 int cache_resolve_queue_len
= 0; /* Size of unresolved */
79 static int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
);
80 static int ipmr_cache_report(struct sk_buff
*pkt
, vifi_t vifi
, int assert);
81 static int ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
);
83 extern struct inet_protocol pim_protocol
;
86 struct device
*ipmr_new_tunnel(struct vifctl
*v
)
88 struct device
*dev
= NULL
;
91 dev
= dev_get("tunl0");
97 struct ip_tunnel_parm p
;
98 struct in_device
*in_dev
;
100 memset(&p
, 0, sizeof(p
));
101 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
102 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
105 p
.iph
.protocol
= IPPROTO_IPIP
;
106 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
107 ifr
.ifr_ifru
.ifru_data
= (void*)&p
;
109 oldfs
= get_fs(); set_fs(KERNEL_DS
);
110 err
= dev
->do_ioctl(dev
, &ifr
, SIOCADDTUNNEL
);
113 if (err
== 0 && (dev
= dev_get(p
.name
)) != NULL
) {
114 dev
->flags
|= IFF_MULTICAST
;
116 in_dev
= dev
->ip_ptr
;
117 if (in_dev
== NULL
&& (in_dev
= inetdev_init(dev
)) == NULL
)
119 in_dev
->cnf
.rp_filter
= 0;
129 unregister_netdevice(dev
);
134 #ifdef CONFIG_IP_PIMSM
136 static int reg_vif_num
= -1;
137 static struct device
* reg_dev
;
139 static int reg_vif_xmit(struct sk_buff
*skb
, struct device
*dev
)
141 ipmr_cache_report(skb
, reg_vif_num
, IGMPMSG_WHOLEPKT
);
146 static struct net_device_stats
*reg_vif_get_stats(struct device
*dev
)
148 return (struct net_device_stats
*)dev
->priv
;
152 struct device
*ipmr_reg_vif(struct vifctl
*v
)
155 struct in_device
*in_dev
;
158 size
= sizeof(*dev
) + IFNAMSIZ
+ sizeof(struct net_device_stats
);
159 dev
= kmalloc(size
, GFP_KERNEL
);
163 memset(dev
, 0, size
);
166 dev
->name
= dev
->priv
+ sizeof(struct net_device_stats
);
168 strcpy(dev
->name
, "pimreg");
170 dev
->type
= ARPHRD_PIMREG
;
171 dev
->mtu
= 1500 - sizeof(struct iphdr
) - 8;
172 dev
->flags
= IFF_NOARP
;
173 dev
->hard_start_xmit
= reg_vif_xmit
;
174 dev
->get_stats
= reg_vif_get_stats
;
178 if (register_netdevice(dev
)) {
185 if ((in_dev
= inetdev_init(dev
)) == NULL
)
188 in_dev
->cnf
.rp_filter
= 0;
198 unregister_netdevice(dev
);
209 static int vif_delete(int vifi
)
211 struct vif_device
*v
;
213 struct in_device
*in_dev
;
215 if (vifi
< 0 || vifi
>= maxvif
|| !(vifc_map
&(1<<vifi
)))
216 return -EADDRNOTAVAIL
;
218 v
= &vif_table
[vifi
];
222 vifc_map
&= ~(1<<vifi
);
224 if ((in_dev
= dev
->ip_ptr
) != NULL
)
225 in_dev
->cnf
.mc_forwarding
= 0;
227 dev_set_allmulti(dev
, -1);
228 ip_rt_multicast_event(in_dev
);
230 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
)) {
231 #ifdef CONFIG_IP_PIMSM
232 if (vifi
== reg_vif_num
) {
237 unregister_netdevice(dev
);
238 if (v
->flags
&VIFF_REGISTER
)
242 if (vifi
+1 == maxvif
) {
244 for (tmp
=vifi
-1; tmp
>=0; tmp
--) {
245 if (vifc_map
&(1<<tmp
))
253 static void ipmr_update_threshoulds(struct mfc_cache
*cache
, unsigned char *ttls
)
259 cache
->mfc_minvif
= MAXVIFS
;
260 cache
->mfc_maxvif
= 0;
261 memset(cache
->mfc_ttls
, 255, MAXVIFS
);
263 for (vifi
=0; vifi
<maxvif
; vifi
++) {
264 if (vifc_map
&(1<<vifi
) && ttls
[vifi
] && ttls
[vifi
] < 255) {
265 cache
->mfc_ttls
[vifi
] = ttls
[vifi
];
266 if (cache
->mfc_minvif
> vifi
)
267 cache
->mfc_minvif
= vifi
;
268 if (cache
->mfc_maxvif
<= vifi
)
269 cache
->mfc_maxvif
= vifi
+ 1;
276 * Delete a multicast route cache entry
279 static void ipmr_cache_delete(struct mfc_cache
*cache
)
283 struct mfc_cache
**cp
;
286 * Find the right cache line
289 line
=MFC_HASH(cache
->mfc_mcastgrp
,cache
->mfc_origin
);
290 cp
=&(mfc_cache_array
[line
]);
292 if(cache
->mfc_flags
&MFC_QUEUED
)
293 del_timer(&cache
->mfc_timer
);
310 * Free the buffer. If it is a pending resolution
311 * clean up the other resources.
314 if(cache
->mfc_flags
&MFC_QUEUED
)
316 cache_resolve_queue_len
--;
317 while((skb
=skb_dequeue(&cache
->mfc_unresolved
))) {
318 #ifdef CONFIG_RTNETLINK
319 if (skb
->nh
.iph
->version
== 0) {
320 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
321 nlh
->nlmsg_type
= NLMSG_ERROR
;
322 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
323 skb_trim(skb
, nlh
->nlmsg_len
);
324 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -ETIMEDOUT
;
325 netlink_unicast(rtnl
, skb
, NETLINK_CB(skb
).dst_pid
, MSG_DONTWAIT
);
331 kfree_s(cache
,sizeof(cache
));
338 static void ipmr_cache_timer(unsigned long data
)
340 struct mfc_cache
*cache
=(struct mfc_cache
*)data
;
341 ipmr_cache_delete(cache
);
345 * Insert a multicast cache entry
348 static void ipmr_cache_insert(struct mfc_cache
*c
)
350 int line
=MFC_HASH(c
->mfc_mcastgrp
,c
->mfc_origin
);
351 c
->next
=mfc_cache_array
[line
];
352 mfc_cache_array
[line
]=c
;
356 * Find a multicast cache entry
359 struct mfc_cache
*ipmr_cache_find(__u32 origin
, __u32 mcastgrp
)
361 int line
=MFC_HASH(mcastgrp
,origin
);
362 struct mfc_cache
*cache
;
364 cache
=mfc_cache_array
[line
];
367 if(cache
->mfc_origin
==origin
&& cache
->mfc_mcastgrp
==mcastgrp
)
375 * Allocate a multicast cache entry
378 static struct mfc_cache
*ipmr_cache_alloc(int priority
)
380 struct mfc_cache
*c
=(struct mfc_cache
*)kmalloc(sizeof(struct mfc_cache
), priority
);
383 memset(c
, 0, sizeof(*c
));
384 skb_queue_head_init(&c
->mfc_unresolved
);
385 init_timer(&c
->mfc_timer
);
386 c
->mfc_timer
.data
=(long)c
;
387 c
->mfc_timer
.function
=ipmr_cache_timer
;
388 c
->mfc_minvif
= MAXVIFS
;
393 * A cache entry has gone into a resolved state from queued
396 static void ipmr_cache_resolve(struct mfc_cache
*cache
)
403 * Kill the queue entry timer.
406 del_timer(&cache
->mfc_timer
);
408 if (cache
->mfc_flags
&MFC_QUEUED
) {
409 cache
->mfc_flags
&=~MFC_QUEUED
;
410 cache_resolve_queue_len
--;
416 * Play the pending entries through our router
418 while((skb
=skb_dequeue(&cache
->mfc_unresolved
))) {
419 #ifdef CONFIG_RTNETLINK
420 if (skb
->nh
.iph
->version
== 0) {
422 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
424 if (ipmr_fill_mroute(skb
, cache
, NLMSG_DATA(nlh
)) > 0) {
425 nlh
->nlmsg_len
= skb
->tail
- (u8
*)nlh
;
427 nlh
->nlmsg_type
= NLMSG_ERROR
;
428 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
429 skb_trim(skb
, nlh
->nlmsg_len
);
430 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -EMSGSIZE
;
432 err
= netlink_unicast(rtnl
, skb
, NETLINK_CB(skb
).pid
, MSG_DONTWAIT
);
435 ip_mr_forward(skb
, cache
, 0);
440 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
441 * expects the following bizarre scheme..
444 static int ipmr_cache_report(struct sk_buff
*pkt
, vifi_t vifi
, int assert)
447 int ihl
= pkt
->nh
.iph
->ihl
<<2;
448 struct igmphdr
*igmp
;
452 #ifdef CONFIG_IP_PIMSM
453 if (assert == IGMPMSG_WHOLEPKT
)
454 skb
= skb_realloc_headroom(pkt
, sizeof(struct iphdr
));
457 skb
= alloc_skb(128, GFP_ATOMIC
);
462 #ifdef CONFIG_IP_PIMSM
463 if (assert == IGMPMSG_WHOLEPKT
) {
464 /* Ugly, but we have no choice with this interface.
465 Duplicate old header, fix ihl, length etc.
466 And all this only to mangle msg->im_msgtype and
467 to set msg->im_mbz to "mbz" :-)
469 msg
= (struct igmpmsg
*)skb_push(skb
, sizeof(struct iphdr
));
470 skb
->nh
.raw
= skb
->h
.raw
= (u8
*)msg
;
471 memcpy(msg
, pkt
->nh
.raw
, sizeof(struct iphdr
));
472 msg
->im_msgtype
= IGMPMSG_WHOLEPKT
;
474 msg
->im_vif
= reg_vif_num
;
475 skb
->nh
.iph
->ihl
= sizeof(struct iphdr
) >> 2;
476 skb
->nh
.iph
->tot_len
= htons(ntohs(pkt
->nh
.iph
->tot_len
) + sizeof(struct iphdr
));
485 skb
->nh
.iph
= (struct iphdr
*)skb_put(skb
, ihl
);
486 memcpy(skb
->data
,pkt
->data
,ihl
);
487 skb
->nh
.iph
->protocol
= 0; /* Flag to the kernel this is a route add */
488 msg
= (struct igmpmsg
*)skb
->nh
.iph
;
490 skb
->dst
= dst_clone(pkt
->dst
);
496 igmp
=(struct igmphdr
*)skb_put(skb
,sizeof(struct igmphdr
));
498 msg
->im_msgtype
= assert;
500 skb
->nh
.iph
->tot_len
=htons(skb
->len
); /* Fix the length */
501 skb
->h
.raw
= skb
->nh
.raw
;
507 if ((ret
=sock_queue_rcv_skb(mroute_socket
,skb
))<0) {
509 printk(KERN_WARNING
"mroute: pending queue full, dropping entries.\n");
517 * Queue a packet for resolution
520 static int ipmr_cache_unresolved(struct mfc_cache
*cache
, vifi_t vifi
, struct sk_buff
*skb
)
525 * Create a new entry if allowable
527 if(cache_resolve_queue_len
>=10 || (cache
=ipmr_cache_alloc(GFP_ATOMIC
))==NULL
)
533 * Fill in the new cache entry
535 cache
->mfc_parent
=ALL_VIFS
;
536 cache
->mfc_origin
=skb
->nh
.iph
->saddr
;
537 cache
->mfc_mcastgrp
=skb
->nh
.iph
->daddr
;
538 cache
->mfc_flags
=MFC_QUEUED
;
540 * Link to the unresolved list
542 ipmr_cache_insert(cache
);
543 cache_resolve_queue_len
++;
545 * Fire off the expiry timer
547 cache
->mfc_timer
.expires
=jiffies
+10*HZ
;
548 add_timer(&cache
->mfc_timer
);
550 * Reflect first query at mrouted.
554 /* If the report failed throw the cache entry
557 OK, OK, Brad. Only do not forget to free skb
560 if (ipmr_cache_report(skb
, vifi
, IGMPMSG_NOCACHE
)<0) {
561 ipmr_cache_delete(cache
);
568 * See if we can append the packet
570 if(cache
->mfc_queuelen
>3)
575 cache
->mfc_queuelen
++;
576 skb_queue_tail(&cache
->mfc_unresolved
,skb
);
581 * MFC cache manipulation by user space mroute daemon
584 int ipmr_mfc_modify(int action
, struct mfcctl
*mfc
)
586 struct mfc_cache
*cache
;
588 if(!MULTICAST(mfc
->mfcc_mcastgrp
.s_addr
))
591 * Find the cache line
596 cache
=ipmr_cache_find(mfc
->mfcc_origin
.s_addr
,mfc
->mfcc_mcastgrp
.s_addr
);
601 if(action
==MRT_DEL_MFC
)
605 ipmr_cache_delete(cache
);
616 * Update the cache, see if it frees a pending queue
619 cache
->mfc_flags
|=MFC_RESOLVED
;
620 cache
->mfc_parent
=mfc
->mfcc_parent
;
621 ipmr_update_threshoulds(cache
, mfc
->mfcc_ttls
);
624 * Check to see if we resolved a queued list. If so we
625 * need to send on the frames and tidy up.
628 if(cache
->mfc_flags
&MFC_QUEUED
)
629 ipmr_cache_resolve(cache
); /* Unhook & send the frames */
635 * Unsolicited update - that's ok, add anyway.
639 cache
=ipmr_cache_alloc(GFP_ATOMIC
);
645 cache
->mfc_flags
=MFC_RESOLVED
;
646 cache
->mfc_origin
=mfc
->mfcc_origin
.s_addr
;
647 cache
->mfc_mcastgrp
=mfc
->mfcc_mcastgrp
.s_addr
;
648 cache
->mfc_parent
=mfc
->mfcc_parent
;
649 ipmr_update_threshoulds(cache
, mfc
->mfcc_ttls
);
650 ipmr_cache_insert(cache
);
655 static void mrtsock_destruct(struct sock
*sk
)
657 if (sk
== mroute_socket
) {
658 ipv4_devconf
.mc_forwarding
= 0;
665 * Socket options and virtual interface manipulation. The whole
666 * virtual interface system is a complete heap, but unfortunately
667 * that's how BSD mrouted happens to think. Maybe one day with a proper
668 * MOSPF/PIM router set up we can clean this up.
671 int ip_mroute_setsockopt(struct sock
*sk
,int optname
,char *optval
,int optlen
)
676 if(optname
!=MRT_INIT
)
678 if(sk
!=mroute_socket
)
685 if(sk
->type
!=SOCK_RAW
|| sk
->num
!=IPPROTO_IGMP
)
687 if(optlen
!=sizeof(int))
691 if (get_user(opt
,(int *)optval
))
699 ipv4_devconf
.mc_forwarding
= 1;
700 if (ip_ra_control(sk
, 1, mrtsock_destruct
) == 0)
702 mrtsock_destruct(sk
);
705 return ip_ra_control(sk
, 0, NULL
);
708 if(optlen
!=sizeof(vif
))
710 if (copy_from_user(&vif
,optval
,sizeof(vif
)))
712 if(vif
.vifc_vifi
>= MAXVIFS
)
714 if(optname
==MRT_ADD_VIF
)
716 struct vif_device
*v
=&vif_table
[vif
.vifc_vifi
];
718 struct in_device
*in_dev
;
721 if (vifc_map
&(1<<vif
.vifc_vifi
))
724 switch (vif
.vifc_flags
) {
725 #ifdef CONFIG_IP_PIMSM
729 * Special Purpose VIF in PIM
730 * All the packets will be sent to the daemon
732 if (reg_vif_num
>= 0)
734 reg_vif_num
= vif
.vifc_vifi
;
735 dev
= ipmr_reg_vif(&vif
);
743 dev
= ipmr_new_tunnel(&vif
);
748 dev
=ip_dev_find(vif
.vifc_lcl_addr
.s_addr
);
750 return -EADDRNOTAVAIL
;
754 printk(KERN_DEBUG
"ipmr_add_vif: flags %02x\n", vif
.vifc_flags
);
759 if ((in_dev
= dev
->ip_ptr
) == NULL
)
760 return -EADDRNOTAVAIL
;
761 if (in_dev
->cnf
.mc_forwarding
)
763 in_dev
->cnf
.mc_forwarding
= 1;
764 dev_set_allmulti(dev
, +1);
765 ip_rt_multicast_event(in_dev
);
768 * Fill in the VIF structures
771 v
->rate_limit
=vif
.vifc_rate_limit
;
772 v
->local
=vif
.vifc_lcl_addr
.s_addr
;
773 v
->remote
=vif
.vifc_rmt_addr
.s_addr
;
774 v
->flags
=vif
.vifc_flags
;
775 v
->threshold
=vif
.vifc_threshold
;
781 v
->link
= dev
->ifindex
;
782 if (vif
.vifc_flags
&(VIFF_TUNNEL
|VIFF_REGISTER
))
783 v
->link
= dev
->iflink
;
784 vifc_map
|=(1<<vif
.vifc_vifi
);
785 if (vif
.vifc_vifi
+1 > maxvif
)
786 maxvif
= vif
.vifc_vifi
+1;
792 ret
= vif_delete(vif
.vifc_vifi
);
798 * Manipulate the forwarding caches. These live
799 * in a sort of kernel/user symbiosis.
803 if(optlen
!=sizeof(mfc
))
805 if (copy_from_user(&mfc
,optval
, sizeof(mfc
)))
807 return ipmr_mfc_modify(optname
, &mfc
);
809 * Control PIM assert.
814 if(get_user(v
,(int *)optval
))
816 mroute_do_assert
=(v
)?1:0;
819 #ifdef CONFIG_IP_PIMSM
823 if(get_user(v
,(int *)optval
))
826 if (v
!= mroute_do_pim
) {
828 mroute_do_assert
= v
;
829 #ifdef CONFIG_IP_PIMSM_V2
831 inet_add_protocol(&pim_protocol
);
833 inet_del_protocol(&pim_protocol
);
840 * Spurious command, or MRT_VERSION which you cannot
849 * Getsock opt support for the multicast routing system.
852 int ip_mroute_getsockopt(struct sock
*sk
,int optname
,char *optval
,int *optlen
)
857 if(sk
!=mroute_socket
)
859 if(optname
!=MRT_VERSION
&&
860 #ifdef CONFIG_IP_PIMSM
866 if(get_user(olr
, optlen
))
869 olr
=min(olr
,sizeof(int));
870 if(put_user(olr
,optlen
))
872 if(optname
==MRT_VERSION
)
874 #ifdef CONFIG_IP_PIMSM
875 else if(optname
==MRT_PIM
)
879 val
=mroute_do_assert
;
880 if(copy_to_user(optval
,&val
,olr
))
886 * The IP multicast ioctl support routines.
889 int ipmr_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
891 struct sioc_sg_req sr
;
892 struct sioc_vif_req vr
;
893 struct vif_device
*vif
;
899 if (copy_from_user(&vr
,(void *)arg
,sizeof(vr
)))
903 vif
=&vif_table
[vr
.vifi
];
904 if(vifc_map
&(1<<vr
.vifi
))
906 vr
.icount
=vif
->pkt_in
;
907 vr
.ocount
=vif
->pkt_out
;
908 vr
.ibytes
=vif
->bytes_in
;
909 vr
.obytes
=vif
->bytes_out
;
910 if (copy_to_user((void *)arg
,&vr
,sizeof(vr
)))
914 return -EADDRNOTAVAIL
;
916 if (copy_from_user(&sr
,(void *)arg
,sizeof(sr
)))
918 for (c
= mfc_cache_array
[MFC_HASH(sr
.grp
.s_addr
, sr
.src
.s_addr
)];
920 if (sr
.grp
.s_addr
== c
->mfc_mcastgrp
&&
921 sr
.src
.s_addr
== c
->mfc_origin
) {
922 sr
.pktcnt
= c
->mfc_pkt
;
923 sr
.bytecnt
= c
->mfc_bytes
;
924 sr
.wrong_if
= c
->mfc_wrong_if
;
925 if (copy_to_user((void *)arg
,&sr
,sizeof(sr
)))
930 return -EADDRNOTAVAIL
;
937 * Close the multicast socket, and clear the vif tables etc
940 void mroute_close(struct sock
*sk
)
945 * Shut down all active vif entries
948 for(i
=0; i
<maxvif
; i
++)
955 for(i
=0;i
<MFC_LINES
;i
++)
958 while(mfc_cache_array
[i
]!=NULL
)
959 ipmr_cache_delete(mfc_cache_array
[i
]);
964 static int ipmr_device_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
966 struct vif_device
*v
;
968 if (event
!= NETDEV_UNREGISTER
)
971 for(ct
=0;ct
<maxvif
;ct
++) {
972 if (vifc_map
&(1<<ct
) && v
->dev
==ptr
)
980 static struct notifier_block ip_mr_notifier
={
987 * Encapsulate a packet by attaching a valid IPIP header to it.
988 * This avoids tunnel drivers and other mess and gives us the speed so
989 * important for multicast video.
992 static void ip_encap(struct sk_buff
*skb
, u32 saddr
, u32 daddr
)
994 struct iphdr
*iph
= (struct iphdr
*)skb_push(skb
,sizeof(struct iphdr
));
997 iph
->tos
= skb
->nh
.iph
->tos
;
998 iph
->ttl
= skb
->nh
.iph
->ttl
;
1002 iph
->protocol
= IPPROTO_IPIP
;
1004 iph
->tot_len
= htons(skb
->len
);
1005 iph
->id
= htons(ip_id_count
++);
1008 skb
->h
.ipiph
= skb
->nh
.iph
;
1013 * Processing handlers for ipmr_forward
1016 static void ipmr_queue_xmit(struct sk_buff
*skb
, struct mfc_cache
*c
,
1019 struct iphdr
*iph
= skb
->nh
.iph
;
1020 struct vif_device
*vif
= &vif_table
[vifi
];
1024 struct sk_buff
*skb2
;
1026 #ifdef CONFIG_IP_PIMSM
1027 if (vif
->flags
& VIFF_REGISTER
) {
1029 vif
->bytes_out
+=skb
->len
;
1030 ((struct net_device_stats
*)vif
->dev
->priv
)->tx_bytes
+= skb
->len
;
1031 ((struct net_device_stats
*)vif
->dev
->priv
)->tx_packets
++;
1032 ipmr_cache_report(skb
, vifi
, IGMPMSG_WHOLEPKT
);
1037 if (vif
->flags
&VIFF_TUNNEL
) {
1038 if (ip_route_output(&rt
, vif
->remote
, vif
->local
, RT_TOS(iph
->tos
), vif
->link
))
1040 encap
= sizeof(struct iphdr
);
1042 if (ip_route_output(&rt
, iph
->daddr
, 0, RT_TOS(iph
->tos
), vif
->link
))
1046 dev
= rt
->u
.dst
.dev
;
1048 if (skb
->len
+encap
> rt
->u
.dst
.pmtu
/* && (ntohs(iph->frag_off) & IP_DF) */) {
1049 /* Do not fragment multicasts. Alas, IPv4 does not
1050 allow to send ICMP, so that packets will disappear
1054 ip_statistics
.IpFragFails
++;
1059 encap
+= dev
->hard_header_len
;
1061 if (skb_headroom(skb
) < encap
|| skb_cloned(skb
) || !last
)
1062 skb2
= skb_realloc_headroom(skb
, (encap
+ 15)&~15);
1063 else if (atomic_read(&skb
->users
) != 1)
1064 skb2
= skb_clone(skb
, GFP_ATOMIC
);
1066 atomic_inc(&skb
->users
);
1076 vif
->bytes_out
+=skb
->len
;
1078 dst_release(skb2
->dst
);
1079 skb2
->dst
= &rt
->u
.dst
;
1081 ip_decrease_ttl(iph
);
1083 #ifdef CONFIG_FIREWALL
1084 if (call_fw_firewall(PF_INET
, vif
->dev
, skb2
->nh
.iph
, NULL
, &skb2
) < FW_ACCEPT
) {
1088 if (call_out_firewall(PF_INET
, vif
->dev
, skb2
->nh
.iph
, NULL
, &skb2
) < FW_ACCEPT
) {
1093 if (vif
->flags
& VIFF_TUNNEL
) {
1094 ip_encap(skb2
, vif
->local
, vif
->remote
);
1095 #ifdef CONFIG_FIREWALL
1096 /* Double output firewalling on tunnels: one is on tunnel
1097 another one is on real device.
1099 if (call_out_firewall(PF_INET
, dev
, skb2
->nh
.iph
, NULL
, &skb2
) < FW_ACCEPT
) {
1104 ((struct ip_tunnel
*)vif
->dev
->priv
)->stat
.tx_packets
++;
1105 ((struct ip_tunnel
*)vif
->dev
->priv
)->stat
.tx_bytes
+=skb2
->len
;
1108 IPCB(skb2
)->flags
|= IPSKB_FORWARDED
;
1112 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1113 * not only before forwarding, but after forwarding on all output
1114 * interfaces. It is clear, if mrouter runs a multicasting
1115 * program, it should receive packets not depending to what interface
1116 * program is joined.
1117 * If we will not make it, the program will have to join on all
1118 * interfaces. On the other hand, multihoming host (or router, but
1119 * not mrouter) cannot join to more than one interface - it will
1120 * result in receiving multiple packets.
1122 skb2
->dst
->output(skb2
);
1125 int ipmr_find_vif(struct device
*dev
)
1128 for (ct
=0; ct
<maxvif
; ct
++) {
1129 if (vifc_map
&(1<<ct
) && vif_table
[ct
].dev
== dev
)
1135 /* "local" means that we should preserve one skb (for local delivery) */
1137 int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
)
1142 vif
= cache
->mfc_parent
;
1144 cache
->mfc_bytes
+= skb
->len
;
1147 * Wrong interface: drop packet and (maybe) send PIM assert.
1149 if (vif_table
[vif
].dev
!= skb
->dev
) {
1152 if (((struct rtable
*)skb
->dst
)->key
.iif
== 0) {
1153 /* It is our own packet, looped back.
1154 Very complicated situation...
1156 The best workaround until routing daemons will be
1157 fixed is not to redistribute packet, if it was
1158 send through wrong interface. It means, that
1159 multicast applications WILL NOT work for
1160 (S,G), which have default multicast route pointing
1161 to wrong oif. In any case, it is not a good
1162 idea to use multicasting applications on router.
1167 cache
->mfc_wrong_if
++;
1168 true_vifi
= ipmr_find_vif(skb
->dev
);
1170 if (true_vifi
< MAXVIFS
&& mroute_do_assert
&&
1171 /* pimsm uses asserts, when switching from RPT to SPT,
1172 so that we cannot check that packet arrived on an oif.
1173 It is bad, but otherwise we would need to move pretty
1174 large chunk of pimd to kernel. Ough... --ANK
1176 (mroute_do_pim
|| cache
->mfc_ttls
[true_vifi
] < 255) &&
1177 jiffies
- cache
->mfc_last_assert
> MFC_ASSERT_THRESH
) {
1178 cache
->mfc_last_assert
= jiffies
;
1179 ipmr_cache_report(skb
, true_vifi
, IGMPMSG_WRONGVIF
);
1184 vif_table
[vif
].pkt_in
++;
1185 vif_table
[vif
].bytes_in
+=skb
->len
;
1190 for (ct
= cache
->mfc_maxvif
-1; ct
>= cache
->mfc_minvif
; ct
--) {
1191 if (skb
->nh
.iph
->ttl
> cache
->mfc_ttls
[ct
]) {
1193 ipmr_queue_xmit(skb
, cache
, psend
, 0);
1198 ipmr_queue_xmit(skb
, cache
, psend
, !local
);
1208 * Multicast packets for forwarding arrive here
1211 int ip_mr_input(struct sk_buff
*skb
)
1213 struct mfc_cache
*cache
;
1214 int local
= ((struct rtable
*)skb
->dst
)->rt_flags
&RTCF_LOCAL
;
1216 /* Packet is looped back after forward, it should not be
1217 forwarded second time, but still can be delivered locally.
1219 if (IPCB(skb
)->flags
&IPSKB_FORWARDED
)
1223 if (IPCB(skb
)->opt
.router_alert
) {
1224 if (ip_call_ra_chain(skb
))
1226 } else if (skb
->nh
.iph
->protocol
== IPPROTO_IGMP
&& mroute_socket
) {
1227 /* IGMPv1 (and broken IGMPv2 implementations sort of
1228 Cisco IOS <= 11.2(8)) do not put router alert
1229 option to IGMP packets destined to routable
1230 groups. It is very bad, because it means
1231 that we can forward NO IGMP messages.
1233 raw_rcv(mroute_socket
, skb
);
1238 cache
= ipmr_cache_find(skb
->nh
.iph
->saddr
, skb
->nh
.iph
->daddr
);
1241 * No usable cache entry
1244 if (cache
==NULL
|| (cache
->mfc_flags
&MFC_QUEUED
)) {
1248 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1249 ip_local_deliver(skb
);
1255 vif
= ipmr_find_vif(skb
->dev
);
1256 if (vif
!= ALL_VIFS
) {
1257 ipmr_cache_unresolved(cache
, vif
, skb
);
1264 ip_mr_forward(skb
, cache
, local
);
1267 return ip_local_deliver(skb
);
1272 return ip_local_deliver(skb
);
1277 #ifdef CONFIG_IP_PIMSM_V1
1279 * Handle IGMP messages of PIMv1
1282 int pim_rcv_v1(struct sk_buff
* skb
, unsigned short len
)
1284 struct igmphdr
*pim
= (struct igmphdr
*)skb
->h
.raw
;
1285 struct iphdr
*encap
;
1287 if (!mroute_do_pim
||
1288 len
< sizeof(*pim
) + sizeof(*encap
) ||
1289 pim
->group
!= PIM_V1_VERSION
|| pim
->code
!= PIM_V1_REGISTER
||
1295 encap
= (struct iphdr
*)(skb
->h
.raw
+ sizeof(struct igmphdr
));
1298 a. packet is really destinted to a multicast group
1299 b. packet is not a NULL-REGISTER
1300 c. packet is not truncated
1302 if (!MULTICAST(encap
->daddr
) ||
1303 ntohs(encap
->tot_len
) == 0 ||
1304 ntohs(encap
->tot_len
) + sizeof(*pim
) > len
) {
1308 skb
->mac
.raw
= skb
->nh
.raw
;
1309 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1310 skb
->nh
.iph
= (struct iphdr
*)skb
->data
;
1312 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
1313 skb
->protocol
= __constant_htons(ETH_P_IP
);
1315 skb
->pkt_type
= PACKET_HOST
;
1316 dst_release(skb
->dst
);
1318 ((struct net_device_stats
*)reg_dev
->priv
)->rx_bytes
+= skb
->len
;
1319 ((struct net_device_stats
*)reg_dev
->priv
)->rx_packets
++;
1325 #ifdef CONFIG_IP_PIMSM_V2
1326 int pim_rcv(struct sk_buff
* skb
, unsigned short len
)
1328 struct pimreghdr
*pim
= (struct pimreghdr
*)skb
->h
.raw
;
1329 struct iphdr
*encap
;
1331 if (len
< sizeof(*pim
) + sizeof(*encap
) ||
1332 pim
->type
!= ((PIM_VERSION
<<4)|(PIM_REGISTER
)) ||
1333 (pim
->flags
&PIM_NULL_REGISTER
) ||
1335 ip_compute_csum((void *)pim
, len
)) {
1340 /* check if the inner packet is destined to mcast group */
1341 encap
= (struct iphdr
*)(skb
->h
.raw
+ sizeof(struct pimreghdr
));
1342 if (!MULTICAST(encap
->daddr
) ||
1343 ntohs(encap
->tot_len
) == 0 ||
1344 ntohs(encap
->tot_len
) + sizeof(*pim
) > len
) {
1348 skb
->mac
.raw
= skb
->nh
.raw
;
1349 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1350 skb
->nh
.iph
= (struct iphdr
*)skb
->data
;
1352 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
1353 skb
->protocol
= __constant_htons(ETH_P_IP
);
1355 skb
->pkt_type
= PACKET_HOST
;
1356 dst_release(skb
->dst
);
1357 ((struct net_device_stats
*)reg_dev
->priv
)->rx_bytes
+= skb
->len
;
1358 ((struct net_device_stats
*)reg_dev
->priv
)->rx_packets
++;
1365 #ifdef CONFIG_RTNETLINK
1368 ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
)
1371 struct rtnexthop
*nhp
;
1372 struct device
*dev
= vif_table
[c
->mfc_parent
].dev
;
1374 struct rtattr
*mp_head
;
1377 RTA_PUT(skb
, RTA_IIF
, 4, &dev
->ifindex
);
1379 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_LENGTH(0));
1381 for (ct
= c
->mfc_minvif
; ct
< c
->mfc_maxvif
; ct
++) {
1382 if (c
->mfc_ttls
[ct
] < 255) {
1383 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
1384 goto rtattr_failure
;
1385 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
1386 nhp
->rtnh_flags
= 0;
1387 nhp
->rtnh_hops
= c
->mfc_ttls
[ct
];
1388 nhp
->rtnh_ifindex
= vif_table
[ct
].dev
->ifindex
;
1389 nhp
->rtnh_len
= sizeof(*nhp
);
1392 mp_head
->rta_type
= RTA_MULTIPATH
;
1393 mp_head
->rta_len
= skb
->tail
- (u8
*)mp_head
;
1394 rtm
->rtm_type
= RTN_MULTICAST
;
1398 skb_trim(skb
, b
- skb
->data
);
1402 int ipmr_get_route(struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
1404 struct mfc_cache
*cache
;
1405 struct rtable
*rt
= (struct rtable
*)skb
->dst
;
1408 cache
= ipmr_cache_find(rt
->rt_src
, rt
->rt_dst
);
1409 if (cache
==NULL
|| (cache
->mfc_flags
&MFC_QUEUED
)) {
1420 if (dev
== NULL
|| (vif
= ipmr_find_vif(dev
)) == ALL_VIFS
) {
1424 skb
->nh
.raw
= skb_push(skb
, sizeof(struct iphdr
));
1425 skb
->nh
.iph
->ihl
= sizeof(struct iphdr
)>>2;
1426 skb
->nh
.iph
->saddr
= rt
->rt_src
;
1427 skb
->nh
.iph
->daddr
= rt
->rt_dst
;
1428 skb
->nh
.iph
->version
= 0;
1429 err
= ipmr_cache_unresolved(cache
, vif
, skb
);
1433 /* Resolved cache entry is not changed by net bh,
1434 so that we are allowed to enable it.
1438 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
1439 cache
->mfc_flags
|= MFC_NOTIFY
;
1440 return ipmr_fill_mroute(skb
, cache
, rtm
);
1445 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1448 int ipmr_vif_info(char *buffer
, char **start
, off_t offset
, int length
, int dummy
)
1450 struct vif_device
*vif
;
1457 len
+= sprintf(buffer
,
1458 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1461 for (ct
=0;ct
<maxvif
;ct
++)
1463 char *name
= "none";
1465 if(!(vifc_map
&(1<<ct
)))
1468 name
= vif
->dev
->name
;
1469 size
= sprintf(buffer
+len
, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1470 ct
, name
, vif
->bytes_in
, vif
->pkt_in
, vif
->bytes_out
, vif
->pkt_out
,
1471 vif
->flags
, vif
->local
, vif
->remote
);
1479 if(pos
>offset
+length
)
1483 *start
=buffer
+(offset
-begin
);
1484 len
-=(offset
-begin
);
1490 int ipmr_mfc_info(char *buffer
, char **start
, off_t offset
, int length
, int dummy
)
1492 struct mfc_cache
*mfc
;
1499 len
+= sprintf(buffer
,
1500 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1503 for (ct
=0;ct
<MFC_LINES
;ct
++)
1506 mfc
=mfc_cache_array
[ct
];
1512 * Interface forwarding map
1514 size
= sprintf(buffer
+len
, "%08lX %08lX %-3d %8ld %8ld %8ld",
1515 (unsigned long)mfc
->mfc_mcastgrp
,
1516 (unsigned long)mfc
->mfc_origin
,
1517 mfc
->mfc_parent
== ALL_VIFS
? -1 : mfc
->mfc_parent
,
1518 (mfc
->mfc_flags
& MFC_QUEUED
) ? mfc
->mfc_unresolved
.qlen
: mfc
->mfc_pkt
,
1521 for(n
=mfc
->mfc_minvif
;n
<mfc
->mfc_maxvif
;n
++)
1523 if(vifc_map
&(1<<n
) && mfc
->mfc_ttls
[n
] < 255)
1524 size
+= sprintf(buffer
+len
+size
, " %2d:%-3d", n
, mfc
->mfc_ttls
[n
]);
1526 size
+= sprintf(buffer
+len
+size
, "\n");
1534 if(pos
>offset
+length
)
1544 *start
=buffer
+(offset
-begin
);
1545 len
-=(offset
-begin
);
1554 #ifdef CONFIG_PROC_FS
1555 static struct proc_dir_entry proc_net_ipmr_vif
= {
1556 PROC_NET_IPMR_VIF
, 9 ,"ip_mr_vif",
1557 S_IFREG
| S_IRUGO
, 1, 0, 0,
1558 0, &proc_net_inode_operations
,
1561 static struct proc_dir_entry proc_net_ipmr_mfc
= {
1562 PROC_NET_IPMR_MFC
, 11 ,"ip_mr_cache",
1563 S_IFREG
| S_IRUGO
, 1, 0, 0,
1564 0, &proc_net_inode_operations
,
1569 #ifdef CONFIG_IP_PIMSM_V2
1570 struct inet_protocol pim_protocol
=
1572 pim_rcv
, /* PIM handler */
1573 NULL
, /* PIM error control */
1575 IPPROTO_PIM
, /* protocol ID */
1584 * Setup for IP multicast routing
1587 __initfunc(void ip_mr_init(void))
1589 printk(KERN_INFO
"Linux IP multicast router 0.06 plus PIM-SM\n");
1590 register_netdevice_notifier(&ip_mr_notifier
);
1591 #ifdef CONFIG_PROC_FS
1592 proc_net_register(&proc_net_ipmr_vif
);
1593 proc_net_register(&proc_net_ipmr_mfc
);