net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Authors:
   5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6  *
   7  *      Fixes:
   8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9  *                                      a module taking up 2 pages).
  10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11  *                                      to keep ip_forward happy.
  12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14  *              David Woodhouse :       Perform some basic ICMP handling.
  15  *                                      IPIP Routing without decapsulation.
  16  *              Carlos Picoto   :       GRE over IP support
  17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18  *                                      I do not want to merge them together.
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  *
  25  */
  26
  27 /* tunnel.c: an IP tunnel driver
  28
  29         The purpose of this driver is to provide an IP tunnel through
  30         which you can tunnel network traffic transparently across subnets.
  31
  32         This was written by looking at Nick Holloway's dummy driver
  33         Thanks for the great code!
  34
  35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37         Minor tweaks:
  38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                 dev->hard_header/hard_header_len changed to use no headers.
  40                 Comments/bracketing tweaked.
  41                 Made the tunnels use dev->name not tunnel: when error reporting.
  42                 Added tx_dropped stat
  43
  44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46         Reworked:
  47                 Changed to tunnel to destination gateway in addition to the
  48                         tunnel's pointopoint address
  49                 Almost completely rewritten
  50                 Note:  There is currently no firewall or ICMP handling done.
  51
  52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54 */
  55
  56 /* Things I wish I had known when writing the tunnel driver:
  57
  58         When the tunnel_xmit() function is called, the skb contains the
  59         packet to be sent (plus a great deal of extra info), and dev
  60         contains the tunnel device that _we_ are.
  61
  62         When we are passed a packet, we are expected to fill in the
  63         source address with our source IP address.
  64
  65         What is the proper way to allocate, copy and free a buffer?
  66         After you allocate it, it is a "0 length" chunk of memory
  67         starting at zero.  If you want to add headers to the buffer
  68         later, you'll have to call "skb_reserve(skb, amount)" with
  69         the amount of memory you want reserved.  Then, you call
  70         "skb_put(skb, amount)" with the amount of space you want in
  71         the buffer.  skb_put() returns a pointer to the top (#0) of
  72         that buffer.  skb->len is set to the amount of space you have
  73         "allocated" with skb_put().  You can then write up to skb->len
  74         bytes to that buffer.  If you need more, you can call skb_put()
  75         again with the additional amount of space you need.  You can
  76         find out how much more space you can allocate by calling
  77         "skb_tailroom(skb)".
  78         Now, to add header space, call "skb_push(skb, header_len)".
  79         This creates space at the beginning of the buffer and returns
  80         a pointer to this new space.  If later you need to strip a
  81         header from a buffer, call "skb_pull(skb, header_len)".
  82         skb_headroom() will return how much space is left at the top
  83         of the buffer (before the main data).  Remember, this headroom
  84         space must be reserved before the skb_put() function is called.
  85         */
  86
  87 /*
  88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90    For comments look at net/ipv4/ip_gre.c --ANK
  91  */
  92
  93
  94 #include <linux/capability.h>
  95 #include <linux/module.h>
  96 #include <linux/types.h>
  97 #include <linux/kernel.h>
  98 #include <linux/slab.h>
  99 #include <asm/uaccess.h>
 100 #include <linux/skbuff.h>
 101 #include <linux/netdevice.h>
 102 #include <linux/in.h>
 103 #include <linux/tcp.h>
 104 #include <linux/udp.h>
 105 #include <linux/if_arp.h>
 106 #include <linux/mroute.h>
 107 #include <linux/init.h>
 108 #include <linux/netfilter_ipv4.h>
 109 #include <linux/if_ether.h>
 110
 111 #include <net/sock.h>
 112 #include <net/ip.h>
 113 #include <net/icmp.h>
 114 #include <net/ipip.h>
 115 #include <net/inet_ecn.h>
 116 #include <net/xfrm.h>
 117 #include <net/net_namespace.h>
 118 #include <net/netns/generic.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123 static int ipip_net_id __read_mostly;
 124 struct ipip_net {
 125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
 126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
 127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
 128         struct ip_tunnel __rcu *tunnels_wc[1];
 129         struct ip_tunnel __rcu **tunnels[4];
 130
 131         struct net_device *fb_tunnel_dev;
 132 };
 133
 134 static int ipip_tunnel_init(struct net_device *dev);
 135 static void ipip_tunnel_setup(struct net_device *dev);
 136 static void ipip_dev_free(struct net_device *dev);
 137
 138 /*
 139  * Locking : hash tables are protected by RCU and RTNL
 140  */
 141
 142 #define for_each_ip_tunnel_rcu(start) \
 143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 144
 145 /* often modified stats are per cpu, other are shared (netdev->stats) */
 146 struct pcpu_tstats {
 147         unsigned long   rx_packets;
 148         unsigned long   rx_bytes;
 149         unsigned long   tx_packets;
 150         unsigned long   tx_bytes;
 151 };
 152
 153 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
 154 {
 155         struct pcpu_tstats sum = { 0 };
 156         int i;
 157
 158         for_each_possible_cpu(i) {
 159                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 160
 161                 sum.rx_packets += tstats->rx_packets;
 162                 sum.rx_bytes   += tstats->rx_bytes;
 163                 sum.tx_packets += tstats->tx_packets;
 164                 sum.tx_bytes   += tstats->tx_bytes;
 165         }
 166         dev->stats.rx_packets = sum.rx_packets;
 167         dev->stats.rx_bytes   = sum.rx_bytes;
 168         dev->stats.tx_packets = sum.tx_packets;
 169         dev->stats.tx_bytes   = sum.tx_bytes;
 170         return &dev->stats;
 171 }
 172
 173 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
 174                 __be32 remote, __be32 local)
 175 {
 176         unsigned int h0 = HASH(remote);
 177         unsigned int h1 = HASH(local);
 178         struct ip_tunnel *t;
 179         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 180
 181         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 182                 if (local == t->parms.iph.saddr &&
 183                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 184                         return t;
 185
 186         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 187                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 188                         return t;
 189
 190         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 191                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 192                         return t;
 193
 194         t = rcu_dereference(ipn->tunnels_wc[0]);
 195         if (t && (t->dev->flags&IFF_UP))
 196                 return t;
 197         return NULL;
 198 }
 199
 200 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
 201                 struct ip_tunnel_parm *parms)
 202 {
 203         __be32 remote = parms->iph.daddr;
 204         __be32 local = parms->iph.saddr;
 205         unsigned int h = 0;
 206         int prio = 0;
 207
 208         if (remote) {
 209                 prio |= 2;
 210                 h ^= HASH(remote);
 211         }
 212         if (local) {
 213                 prio |= 1;
 214                 h ^= HASH(local);
 215         }
 216         return &ipn->tunnels[prio][h];
 217 }
 218
 219 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
 220                 struct ip_tunnel *t)
 221 {
 222         return __ipip_bucket(ipn, &t->parms);
 223 }
 224
 225 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 226 {
 227         struct ip_tunnel __rcu **tp;
 228         struct ip_tunnel *iter;
 229
 230         for (tp = ipip_bucket(ipn, t);
 231              (iter = rtnl_dereference(*tp)) != NULL;
 232              tp = &iter->next) {
 233                 if (t == iter) {
 234                         rcu_assign_pointer(*tp, t->next);
 235                         break;
 236                 }
 237         }
 238 }
 239
 240 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 241 {
 242         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 243
 244         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 245         rcu_assign_pointer(*tp, t);
 246 }
 247
 248 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
 249                 struct ip_tunnel_parm *parms, int create)
 250 {
 251         __be32 remote = parms->iph.daddr;
 252         __be32 local = parms->iph.saddr;
 253         struct ip_tunnel *t, *nt;
 254         struct ip_tunnel __rcu **tp;
 255         struct net_device *dev;
 256         char name[IFNAMSIZ];
 257         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 258
 259         for (tp = __ipip_bucket(ipn, parms);
 260                  (t = rtnl_dereference(*tp)) != NULL;
 261                  tp = &t->next) {
 262                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 263                         return t;
 264         }
 265         if (!create)
 266                 return NULL;
 267
 268         if (parms->name[0])
 269                 strlcpy(name, parms->name, IFNAMSIZ);
 270         else
 271                 strcpy(name, "tunl%d");
 272
 273         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 274         if (dev == NULL)
 275                 return NULL;
 276
 277         dev_net_set(dev, net);
 278
 279         nt = netdev_priv(dev);
 280         nt->parms = *parms;
 281
 282         if (ipip_tunnel_init(dev) < 0)
 283                 goto failed_free;
 284
 285         if (register_netdevice(dev) < 0)
 286                 goto failed_free;
 287
 288         dev_hold(dev);
 289         ipip_tunnel_link(ipn, nt);
 290         return nt;
 291
 292 failed_free:
 293         ipip_dev_free(dev);
 294         return NULL;
 295 }
 296
 297 /* called with RTNL */
 298 static void ipip_tunnel_uninit(struct net_device *dev)
 299 {
 300         struct net *net = dev_net(dev);
 301         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 302
 303         if (dev == ipn->fb_tunnel_dev)
 304                 rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
 305         else
 306                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
 307         dev_put(dev);
 308 }
 309
 310 static int ipip_err(struct sk_buff *skb, u32 info)
 311 {
 312
 313 /* All the routers (except for Linux) return only
 314    8 bytes of packet payload. It means, that precise relaying of
 315    ICMP in the real Internet is absolutely infeasible.
 316  */
 317         const struct iphdr *iph = (const struct iphdr *)skb->data;
 318         const int type = icmp_hdr(skb)->type;
 319         const int code = icmp_hdr(skb)->code;
 320         struct ip_tunnel *t;
 321         int err;
 322
 323         switch (type) {
 324         default:
 325         case ICMP_PARAMETERPROB:
 326                 return 0;
 327
 328         case ICMP_DEST_UNREACH:
 329                 switch (code) {
 330                 case ICMP_SR_FAILED:
 331                 case ICMP_PORT_UNREACH:
 332                         /* Impossible event. */
 333                         return 0;
 334                 case ICMP_FRAG_NEEDED:
 335                         /* Soft state for pmtu is maintained by IP core. */
 336                         return 0;
 337                 default:
 338                         /* All others are translated to HOST_UNREACH.
 339                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 340                            I believe they are just ether pollution. --ANK
 341                          */
 342                         break;
 343                 }
 344                 break;
 345         case ICMP_TIME_EXCEEDED:
 346                 if (code != ICMP_EXC_TTL)
 347                         return 0;
 348                 break;
 349         }
 350
 351         err = -ENOENT;
 352
 353         rcu_read_lock();
 354         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 355         if (t == NULL || t->parms.iph.daddr == 0)
 356                 goto out;
 357
 358         err = 0;
 359         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 360                 goto out;
 361
 362         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 363                 t->err_count++;
 364         else
 365                 t->err_count = 1;
 366         t->err_time = jiffies;
 367 out:
 368         rcu_read_unlock();
 369         return err;
 370 }
 371
 372 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 373                                         struct sk_buff *skb)
 374 {
 375         struct iphdr *inner_iph = ip_hdr(skb);
 376
 377         if (INET_ECN_is_ce(outer_iph->tos))
 378                 IP_ECN_set_ce(inner_iph);
 379 }
 380
 381 static int ipip_rcv(struct sk_buff *skb)
 382 {
 383         struct ip_tunnel *tunnel;
 384         const struct iphdr *iph = ip_hdr(skb);
 385
 386         rcu_read_lock();
 387         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 388         if (tunnel != NULL) {
 389                 struct pcpu_tstats *tstats;
 390
 391                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 392                         rcu_read_unlock();
 393                         kfree_skb(skb);
 394                         return 0;
 395                 }
 396
 397                 secpath_reset(skb);
 398
 399                 skb->mac_header = skb->network_header;
 400                 skb_reset_network_header(skb);
 401                 skb->protocol = htons(ETH_P_IP);
 402                 skb->pkt_type = PACKET_HOST;
 403
 404                 tstats = this_cpu_ptr(tunnel->dev->tstats);
 405                 tstats->rx_packets++;
 406                 tstats->rx_bytes += skb->len;
 407
 408                 __skb_tunnel_rx(skb, tunnel->dev);
 409
 410                 ipip_ecn_decapsulate(iph, skb);
 411
 412                 netif_rx(skb);
 413
 414                 rcu_read_unlock();
 415                 return 0;
 416         }
 417         rcu_read_unlock();
 418
 419         return -1;
 420 }
 421
 422 /*
 423  *      This function assumes it is being called from dev_queue_xmit()
 424  *      and that skb is filled properly by that function.
 425  */
 426
 427 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 428 {
 429         struct ip_tunnel *tunnel = netdev_priv(dev);
 430         struct pcpu_tstats *tstats;
 431         const struct iphdr  *tiph = &tunnel->parms.iph;
 432         u8     tos = tunnel->parms.iph.tos;
 433         __be16 df = tiph->frag_off;
 434         struct rtable *rt;                      /* Route to the other host */
 435         struct net_device *tdev;                /* Device to other host */
 436         const struct iphdr  *old_iph = ip_hdr(skb);
 437         struct iphdr  *iph;                     /* Our new IP header */
 438         unsigned int max_headroom;              /* The extra header space needed */
 439         __be32 dst = tiph->daddr;
 440         struct flowi4 fl4;
 441         int    mtu;
 442
 443         if (skb->protocol != htons(ETH_P_IP))
 444                 goto tx_error;
 445
 446         if (tos & 1)
 447                 tos = old_iph->tos;
 448
 449         if (!dst) {
 450                 /* NBMA tunnel */
 451                 if ((rt = skb_rtable(skb)) == NULL) {
 452                         dev->stats.tx_fifo_errors++;
 453                         goto tx_error;
 454                 }
 455                 if ((dst = rt->rt_gateway) == 0)
 456                         goto tx_error_icmp;
 457         }
 458
 459         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 460                                    dst, tiph->saddr,
 461                                    0, 0,
 462                                    IPPROTO_IPIP, RT_TOS(tos),
 463                                    tunnel->parms.link);
 464         if (IS_ERR(rt)) {
 465                 dev->stats.tx_carrier_errors++;
 466                 goto tx_error_icmp;
 467         }
 468         tdev = rt->dst.dev;
 469
 470         if (tdev == dev) {
 471                 ip_rt_put(rt);
 472                 dev->stats.collisions++;
 473                 goto tx_error;
 474         }
 475
 476         df |= old_iph->frag_off & htons(IP_DF);
 477
 478         if (df) {
 479                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 480
 481                 if (mtu < 68) {
 482                         dev->stats.collisions++;
 483                         ip_rt_put(rt);
 484                         goto tx_error;
 485                 }
 486
 487                 if (skb_dst(skb))
 488                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 489
 490                 if ((old_iph->frag_off & htons(IP_DF)) &&
 491                     mtu < ntohs(old_iph->tot_len)) {
 492                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 493                                   htonl(mtu));
 494                         ip_rt_put(rt);
 495                         goto tx_error;
 496                 }
 497         }
 498
 499         if (tunnel->err_count > 0) {
 500                 if (time_before(jiffies,
 501                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 502                         tunnel->err_count--;
 503                         dst_link_failure(skb);
 504                 } else
 505                         tunnel->err_count = 0;
 506         }
 507
 508         /*
 509          * Okay, now see if we can stuff it in the buffer as-is.
 510          */
 511         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 512
 513         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 514             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 515                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 516                 if (!new_skb) {
 517                         ip_rt_put(rt);
 518                         dev->stats.tx_dropped++;
 519                         dev_kfree_skb(skb);
 520                         return NETDEV_TX_OK;
 521                 }
 522                 if (skb->sk)
 523                         skb_set_owner_w(new_skb, skb->sk);
 524                 dev_kfree_skb(skb);
 525                 skb = new_skb;
 526                 old_iph = ip_hdr(skb);
 527         }
 528
 529         skb->transport_header = skb->network_header;
 530         skb_push(skb, sizeof(struct iphdr));
 531         skb_reset_network_header(skb);
 532         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 533         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 534                               IPSKB_REROUTED);
 535         skb_dst_drop(skb);
 536         skb_dst_set(skb, &rt->dst);
 537
 538         /*
 539          *      Push down and install the IPIP header.
 540          */
 541
 542         iph                     =       ip_hdr(skb);
 543         iph->version            =       4;
 544         iph->ihl                =       sizeof(struct iphdr)>>2;
 545         iph->frag_off           =       df;
 546         iph->protocol           =       IPPROTO_IPIP;
 547         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 548         iph->daddr              =       fl4.daddr;
 549         iph->saddr              =       fl4.saddr;
 550
 551         if ((iph->ttl = tiph->ttl) == 0)
 552                 iph->ttl        =       old_iph->ttl;
 553
 554         nf_reset(skb);
 555         tstats = this_cpu_ptr(dev->tstats);
 556         __IPTUNNEL_XMIT(tstats, &dev->stats);
 557         return NETDEV_TX_OK;
 558
 559 tx_error_icmp:
 560         dst_link_failure(skb);
 561 tx_error:
 562         dev->stats.tx_errors++;
 563         dev_kfree_skb(skb);
 564         return NETDEV_TX_OK;
 565 }
 566
 567 static void ipip_tunnel_bind_dev(struct net_device *dev)
 568 {
 569         struct net_device *tdev = NULL;
 570         struct ip_tunnel *tunnel;
 571         const struct iphdr *iph;
 572
 573         tunnel = netdev_priv(dev);
 574         iph = &tunnel->parms.iph;
 575
 576         if (iph->daddr) {
 577                 struct rtable *rt;
 578                 struct flowi4 fl4;
 579
 580                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 581                                            iph->daddr, iph->saddr,
 582                                            0, 0,
 583                                            IPPROTO_IPIP,
 584                                            RT_TOS(iph->tos),
 585                                            tunnel->parms.link);
 586                 if (!IS_ERR(rt)) {
 587                         tdev = rt->dst.dev;
 588                         ip_rt_put(rt);
 589                 }
 590                 dev->flags |= IFF_POINTOPOINT;
 591         }
 592
 593         if (!tdev && tunnel->parms.link)
 594                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 595
 596         if (tdev) {
 597                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 598                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 599         }
 600         dev->iflink = tunnel->parms.link;
 601 }
 602
 603 static int
 604 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 605 {
 606         int err = 0;
 607         struct ip_tunnel_parm p;
 608         struct ip_tunnel *t;
 609         struct net *net = dev_net(dev);
 610         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 611
 612         switch (cmd) {
 613         case SIOCGETTUNNEL:
 614                 t = NULL;
 615                 if (dev == ipn->fb_tunnel_dev) {
 616                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 617                                 err = -EFAULT;
 618                                 break;
 619                         }
 620                         t = ipip_tunnel_locate(net, &p, 0);
 621                 }
 622                 if (t == NULL)
 623                         t = netdev_priv(dev);
 624                 memcpy(&p, &t->parms, sizeof(p));
 625                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 626                         err = -EFAULT;
 627                 break;
 628
 629         case SIOCADDTUNNEL:
 630         case SIOCCHGTUNNEL:
 631                 err = -EPERM;
 632                 if (!capable(CAP_NET_ADMIN))
 633                         goto done;
 634
 635                 err = -EFAULT;
 636                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 637                         goto done;
 638
 639                 err = -EINVAL;
 640                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 641                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 642                         goto done;
 643                 if (p.iph.ttl)
 644                         p.iph.frag_off |= htons(IP_DF);
 645
 646                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 647
 648                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 649                         if (t != NULL) {
 650                                 if (t->dev != dev) {
 651                                         err = -EEXIST;
 652                                         break;
 653                                 }
 654                         } else {
 655                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 656                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 657                                         err = -EINVAL;
 658                                         break;
 659                                 }
 660                                 t = netdev_priv(dev);
 661                                 ipip_tunnel_unlink(ipn, t);
 662                                 synchronize_net();
 663                                 t->parms.iph.saddr = p.iph.saddr;
 664                                 t->parms.iph.daddr = p.iph.daddr;
 665                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 666                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 667                                 ipip_tunnel_link(ipn, t);
 668                                 netdev_state_change(dev);
 669                         }
 670                 }
 671
 672                 if (t) {
 673                         err = 0;
 674                         if (cmd == SIOCCHGTUNNEL) {
 675                                 t->parms.iph.ttl = p.iph.ttl;
 676                                 t->parms.iph.tos = p.iph.tos;
 677                                 t->parms.iph.frag_off = p.iph.frag_off;
 678                                 if (t->parms.link != p.link) {
 679                                         t->parms.link = p.link;
 680                                         ipip_tunnel_bind_dev(dev);
 681                                         netdev_state_change(dev);
 682                                 }
 683                         }
 684                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 685                                 err = -EFAULT;
 686                 } else
 687                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 688                 break;
 689
 690         case SIOCDELTUNNEL:
 691                 err = -EPERM;
 692                 if (!capable(CAP_NET_ADMIN))
 693                         goto done;
 694
 695                 if (dev == ipn->fb_tunnel_dev) {
 696                         err = -EFAULT;
 697                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 698                                 goto done;
 699                         err = -ENOENT;
 700                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 701                                 goto done;
 702                         err = -EPERM;
 703                         if (t->dev == ipn->fb_tunnel_dev)
 704                                 goto done;
 705                         dev = t->dev;
 706                 }
 707                 unregister_netdevice(dev);
 708                 err = 0;
 709                 break;
 710
 711         default:
 712                 err = -EINVAL;
 713         }
 714
 715 done:
 716         return err;
 717 }
 718
 719 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 720 {
 721         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 722                 return -EINVAL;
 723         dev->mtu = new_mtu;
 724         return 0;
 725 }
 726
 727 static const struct net_device_ops ipip_netdev_ops = {
 728         .ndo_uninit     = ipip_tunnel_uninit,
 729         .ndo_start_xmit = ipip_tunnel_xmit,
 730         .ndo_do_ioctl   = ipip_tunnel_ioctl,
 731         .ndo_change_mtu = ipip_tunnel_change_mtu,
 732         .ndo_get_stats  = ipip_get_stats,
 733 };
 734
 735 static void ipip_dev_free(struct net_device *dev)
 736 {
 737         free_percpu(dev->tstats);
 738         free_netdev(dev);
 739 }
 740
 741 static void ipip_tunnel_setup(struct net_device *dev)
 742 {
 743         dev->netdev_ops         = &ipip_netdev_ops;
 744         dev->destructor         = ipip_dev_free;
 745
 746         dev->type               = ARPHRD_TUNNEL;
 747         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 748         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 749         dev->flags              = IFF_NOARP;
 750         dev->iflink             = 0;
 751         dev->addr_len           = 4;
 752         dev->features           |= NETIF_F_NETNS_LOCAL;
 753         dev->features           |= NETIF_F_LLTX;
 754         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 755 }
 756
 757 static int ipip_tunnel_init(struct net_device *dev)
 758 {
 759         struct ip_tunnel *tunnel = netdev_priv(dev);
 760
 761         tunnel->dev = dev;
 762         strcpy(tunnel->parms.name, dev->name);
 763
 764         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 765         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 766
 767         ipip_tunnel_bind_dev(dev);
 768
 769         dev->tstats = alloc_percpu(struct pcpu_tstats);
 770         if (!dev->tstats)
 771                 return -ENOMEM;
 772
 773         return 0;
 774 }
 775
 776 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
 777 {
 778         struct ip_tunnel *tunnel = netdev_priv(dev);
 779         struct iphdr *iph = &tunnel->parms.iph;
 780         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 781
 782         tunnel->dev = dev;
 783         strcpy(tunnel->parms.name, dev->name);
 784
 785         iph->version            = 4;
 786         iph->protocol           = IPPROTO_IPIP;
 787         iph->ihl                = 5;
 788
 789         dev->tstats = alloc_percpu(struct pcpu_tstats);
 790         if (!dev->tstats)
 791                 return -ENOMEM;
 792
 793         dev_hold(dev);
 794         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 795         return 0;
 796 }
 797
 798 static struct xfrm_tunnel ipip_handler __read_mostly = {
 799         .handler        =       ipip_rcv,
 800         .err_handler    =       ipip_err,
 801         .priority       =       1,
 802 };
 803
 804 static const char banner[] __initconst =
 805         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 806
 807 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 808 {
 809         int prio;
 810
 811         for (prio = 1; prio < 4; prio++) {
 812                 int h;
 813                 for (h = 0; h < HASH_SIZE; h++) {
 814                         struct ip_tunnel *t;
 815
 816                         t = rtnl_dereference(ipn->tunnels[prio][h]);
 817                         while (t != NULL) {
 818                                 unregister_netdevice_queue(t->dev, head);
 819                                 t = rtnl_dereference(t->next);
 820                         }
 821                 }
 822         }
 823 }
 824
 825 static int __net_init ipip_init_net(struct net *net)
 826 {
 827         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 828         int err;
 829
 830         ipn->tunnels[0] = ipn->tunnels_wc;
 831         ipn->tunnels[1] = ipn->tunnels_l;
 832         ipn->tunnels[2] = ipn->tunnels_r;
 833         ipn->tunnels[3] = ipn->tunnels_r_l;
 834
 835         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 836                                            "tunl0",
 837                                            ipip_tunnel_setup);
 838         if (!ipn->fb_tunnel_dev) {
 839                 err = -ENOMEM;
 840                 goto err_alloc_dev;
 841         }
 842         dev_net_set(ipn->fb_tunnel_dev, net);
 843
 844         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
 845         if (err)
 846                 goto err_reg_dev;
 847
 848         if ((err = register_netdev(ipn->fb_tunnel_dev)))
 849                 goto err_reg_dev;
 850
 851         return 0;
 852
 853 err_reg_dev:
 854         ipip_dev_free(ipn->fb_tunnel_dev);
 855 err_alloc_dev:
 856         /* nothing */
 857         return err;
 858 }
 859
 860 static void __net_exit ipip_exit_net(struct net *net)
 861 {
 862         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 863         LIST_HEAD(list);
 864
 865         rtnl_lock();
 866         ipip_destroy_tunnels(ipn, &list);
 867         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
 868         unregister_netdevice_many(&list);
 869         rtnl_unlock();
 870 }
 871
 872 static struct pernet_operations ipip_net_ops = {
 873         .init = ipip_init_net,
 874         .exit = ipip_exit_net,
 875         .id   = &ipip_net_id,
 876         .size = sizeof(struct ipip_net),
 877 };
 878
 879 static int __init ipip_init(void)
 880 {
 881         int err;
 882
 883         printk(banner);
 884
 885         err = register_pernet_device(&ipip_net_ops);
 886         if (err < 0)
 887                 return err;
 888         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 889         if (err < 0) {
 890                 unregister_pernet_device(&ipip_net_ops);
 891                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 892         }
 893         return err;
 894 }
 895
 896 static void __exit ipip_fini(void)
 897 {
 898         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 899                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 900
 901         unregister_pernet_device(&ipip_net_ops);
 902 }
 903
 904 module_init(ipip_init);
 905 module_exit(ipip_fini);
 906 MODULE_LICENSE("GPL");
 907 MODULE_ALIAS_NETDEV("tunl0");