2 * Linux NET3: IP/IP protocol decoder modified to support
3 * virtual tunnel interface
6 * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
18 For comments look at net/ipv4/ip_gre.c --ANK
22 #include <linux/capability.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/netfilter_ipv4.h>
36 #include <linux/if_ether.h>
41 #include <net/ip_tunnels.h>
42 #include <net/inet_ecn.h>
44 #include <net/net_namespace.h>
45 #include <net/netns/generic.h>
47 static struct rtnl_link_ops vti_link_ops __read_mostly
;
49 static int vti_net_id __read_mostly
;
50 static int vti_tunnel_init(struct net_device
*dev
);
52 static int vti_err(struct sk_buff
*skb
, u32 info
)
55 /* All the routers (except for Linux) return only
56 * 8 bytes of packet payload. It means, that precise relaying of
57 * ICMP in the real Internet is absolutely infeasible.
59 struct net
*net
= dev_net(skb
->dev
);
60 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
61 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
62 const int type
= icmp_hdr(skb
)->type
;
63 const int code
= icmp_hdr(skb
)->code
;
69 case ICMP_PARAMETERPROB
:
72 case ICMP_DEST_UNREACH
:
75 case ICMP_PORT_UNREACH
:
76 /* Impossible event. */
79 /* All others are translated to HOST_UNREACH. */
83 case ICMP_TIME_EXCEEDED
:
84 if (code
!= ICMP_EXC_TTL
)
91 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
92 iph
->daddr
, iph
->saddr
, 0);
96 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
97 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
98 t
->parms
.link
, 0, IPPROTO_IPIP
, 0);
104 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
107 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
111 t
->err_time
= jiffies
;
116 /* We dont digest the packet therefore let the packet pass */
117 static int vti_rcv(struct sk_buff
*skb
)
119 struct ip_tunnel
*tunnel
;
120 const struct iphdr
*iph
= ip_hdr(skb
);
121 struct net
*net
= dev_net(skb
->dev
);
122 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
124 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
125 iph
->saddr
, iph
->daddr
, 0);
126 if (tunnel
!= NULL
) {
127 struct pcpu_tstats
*tstats
;
129 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
132 tstats
= this_cpu_ptr(tunnel
->dev
->tstats
);
133 u64_stats_update_begin(&tstats
->syncp
);
134 tstats
->rx_packets
++;
135 tstats
->rx_bytes
+= skb
->len
;
136 u64_stats_update_end(&tstats
->syncp
);
140 skb
->dev
= tunnel
->dev
;
147 /* This function assumes it is being called from dev_queue_xmit()
148 * and that skb is filled properly by that function.
151 static netdev_tx_t
vti_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
153 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
154 struct iphdr
*tiph
= &tunnel
->parms
.iph
;
156 struct rtable
*rt
; /* Route to the other host */
157 struct net_device
*tdev
; /* Device to other host */
158 struct iphdr
*old_iph
= ip_hdr(skb
);
159 __be32 dst
= tiph
->daddr
;
163 if (skb
->protocol
!= htons(ETH_P_IP
))
168 memset(&fl4
, 0, sizeof(fl4
));
169 flowi4_init_output(&fl4
, tunnel
->parms
.link
,
170 be32_to_cpu(tunnel
->parms
.i_key
), RT_TOS(tos
),
173 dst
, tiph
->saddr
, 0, 0);
174 rt
= ip_route_output_key(dev_net(dev
), &fl4
);
176 dev
->stats
.tx_carrier_errors
++;
179 /* if there is no transform then this tunnel is not functional.
180 * Or if the xfrm is not mode tunnel.
183 rt
->dst
.xfrm
->props
.mode
!= XFRM_MODE_TUNNEL
) {
184 dev
->stats
.tx_carrier_errors
++;
191 dev
->stats
.collisions
++;
195 if (tunnel
->err_count
> 0) {
196 if (time_before(jiffies
,
197 tunnel
->err_time
+ IPTUNNEL_ERR_TIMEO
)) {
199 dst_link_failure(skb
);
201 tunnel
->err_count
= 0;
204 memset(IPCB(skb
), 0, sizeof(*IPCB(skb
)));
206 skb_dst_set(skb
, &rt
->dst
);
208 skb
->dev
= skb_dst(skb
)->dev
;
210 err
= dst_output(skb
);
211 if (net_xmit_eval(err
) == 0)
213 iptunnel_xmit_stats(err
, &dev
->stats
, dev
->tstats
);
217 dst_link_failure(skb
);
219 dev
->stats
.tx_errors
++;
225 vti_tunnel_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
228 struct ip_tunnel_parm p
;
230 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
233 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
234 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_IPIP
||
239 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
243 if (cmd
!= SIOCDELTUNNEL
) {
244 p
.i_flags
|= GRE_KEY
| VTI_ISVTI
;
245 p
.o_flags
|= GRE_KEY
;
248 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
253 static const struct net_device_ops vti_netdev_ops
= {
254 .ndo_init
= vti_tunnel_init
,
255 .ndo_uninit
= ip_tunnel_uninit
,
256 .ndo_start_xmit
= vti_tunnel_xmit
,
257 .ndo_do_ioctl
= vti_tunnel_ioctl
,
258 .ndo_change_mtu
= ip_tunnel_change_mtu
,
259 .ndo_get_stats64
= ip_tunnel_get_stats64
,
262 static void vti_tunnel_setup(struct net_device
*dev
)
264 dev
->netdev_ops
= &vti_netdev_ops
;
265 ip_tunnel_setup(dev
, vti_net_id
);
268 static int vti_tunnel_init(struct net_device
*dev
)
270 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
271 struct iphdr
*iph
= &tunnel
->parms
.iph
;
273 memcpy(dev
->dev_addr
, &iph
->saddr
, 4);
274 memcpy(dev
->broadcast
, &iph
->daddr
, 4);
276 dev
->type
= ARPHRD_TUNNEL
;
277 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
);
278 dev
->mtu
= ETH_DATA_LEN
;
279 dev
->flags
= IFF_NOARP
;
282 dev
->features
|= NETIF_F_NETNS_LOCAL
;
283 dev
->features
|= NETIF_F_LLTX
;
284 dev
->priv_flags
&= ~IFF_XMIT_DST_RELEASE
;
286 return ip_tunnel_init(dev
);
289 static void __net_init
vti_fb_tunnel_init(struct net_device
*dev
)
291 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
292 struct iphdr
*iph
= &tunnel
->parms
.iph
;
295 iph
->protocol
= IPPROTO_IPIP
;
299 static struct xfrm_tunnel vti_handler __read_mostly
= {
301 .err_handler
= vti_err
,
305 static int __net_init
vti_init_net(struct net
*net
)
308 struct ip_tunnel_net
*itn
;
310 err
= ip_tunnel_init_net(net
, vti_net_id
, &vti_link_ops
, "ip_vti0");
313 itn
= net_generic(net
, vti_net_id
);
314 vti_fb_tunnel_init(itn
->fb_tunnel_dev
);
318 static void __net_exit
vti_exit_net(struct net
*net
)
320 struct ip_tunnel_net
*itn
= net_generic(net
, vti_net_id
);
321 ip_tunnel_delete_net(itn
, &vti_link_ops
);
324 static struct pernet_operations vti_net_ops
= {
325 .init
= vti_init_net
,
326 .exit
= vti_exit_net
,
328 .size
= sizeof(struct ip_tunnel_net
),
331 static int vti_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
336 static void vti_netlink_parms(struct nlattr
*data
[],
337 struct ip_tunnel_parm
*parms
)
339 memset(parms
, 0, sizeof(*parms
));
341 parms
->iph
.protocol
= IPPROTO_IPIP
;
346 if (data
[IFLA_VTI_LINK
])
347 parms
->link
= nla_get_u32(data
[IFLA_VTI_LINK
]);
349 if (data
[IFLA_VTI_IKEY
])
350 parms
->i_key
= nla_get_be32(data
[IFLA_VTI_IKEY
]);
352 if (data
[IFLA_VTI_OKEY
])
353 parms
->o_key
= nla_get_be32(data
[IFLA_VTI_OKEY
]);
355 if (data
[IFLA_VTI_LOCAL
])
356 parms
->iph
.saddr
= nla_get_be32(data
[IFLA_VTI_LOCAL
]);
358 if (data
[IFLA_VTI_REMOTE
])
359 parms
->iph
.daddr
= nla_get_be32(data
[IFLA_VTI_REMOTE
]);
363 static int vti_newlink(struct net
*src_net
, struct net_device
*dev
,
364 struct nlattr
*tb
[], struct nlattr
*data
[])
366 struct ip_tunnel_parm parms
;
368 vti_netlink_parms(data
, &parms
);
369 return ip_tunnel_newlink(dev
, tb
, &parms
);
372 static int vti_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
373 struct nlattr
*data
[])
375 struct ip_tunnel_parm p
;
377 vti_netlink_parms(data
, &p
);
378 return ip_tunnel_changelink(dev
, tb
, &p
);
381 static size_t vti_get_size(const struct net_device
*dev
)
392 /* IFLA_VTI_REMOTE */
397 static int vti_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
399 struct ip_tunnel
*t
= netdev_priv(dev
);
400 struct ip_tunnel_parm
*p
= &t
->parms
;
402 nla_put_u32(skb
, IFLA_VTI_LINK
, p
->link
);
403 nla_put_be32(skb
, IFLA_VTI_IKEY
, p
->i_key
);
404 nla_put_be32(skb
, IFLA_VTI_OKEY
, p
->o_key
);
405 nla_put_be32(skb
, IFLA_VTI_LOCAL
, p
->iph
.saddr
);
406 nla_put_be32(skb
, IFLA_VTI_REMOTE
, p
->iph
.daddr
);
411 static const struct nla_policy vti_policy
[IFLA_VTI_MAX
+ 1] = {
412 [IFLA_VTI_LINK
] = { .type
= NLA_U32
},
413 [IFLA_VTI_IKEY
] = { .type
= NLA_U32
},
414 [IFLA_VTI_OKEY
] = { .type
= NLA_U32
},
415 [IFLA_VTI_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
416 [IFLA_VTI_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
419 static struct rtnl_link_ops vti_link_ops __read_mostly
= {
421 .maxtype
= IFLA_VTI_MAX
,
422 .policy
= vti_policy
,
423 .priv_size
= sizeof(struct ip_tunnel
),
424 .setup
= vti_tunnel_setup
,
425 .validate
= vti_tunnel_validate
,
426 .newlink
= vti_newlink
,
427 .changelink
= vti_changelink
,
428 .get_size
= vti_get_size
,
429 .fill_info
= vti_fill_info
,
432 static int __init
vti_init(void)
436 pr_info("IPv4 over IPSec tunneling driver\n");
438 err
= register_pernet_device(&vti_net_ops
);
441 err
= xfrm4_mode_tunnel_input_register(&vti_handler
);
443 unregister_pernet_device(&vti_net_ops
);
444 pr_info("vti init: can't register tunnel\n");
447 err
= rtnl_link_register(&vti_link_ops
);
449 goto rtnl_link_failed
;
454 xfrm4_mode_tunnel_input_deregister(&vti_handler
);
455 unregister_pernet_device(&vti_net_ops
);
459 static void __exit
vti_fini(void)
461 rtnl_link_unregister(&vti_link_ops
);
462 if (xfrm4_mode_tunnel_input_deregister(&vti_handler
))
463 pr_info("vti close: can't deregister tunnel\n");
465 unregister_pernet_device(&vti_net_ops
);
468 module_init(vti_init
);
469 module_exit(vti_fini
);
470 MODULE_LICENSE("GPL");
471 MODULE_ALIAS_RTNL_LINK("vti");
472 MODULE_ALIAS_NETDEV("ip_vti0");