net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Authors:
   5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6  *
   7  *      Fixes:
   8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9  *                                      a module taking up 2 pages).
  10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11  *                                      to keep ip_forward happy.
  12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14  *              David Woodhouse :       Perform some basic ICMP handling.
  15  *                                      IPIP Routing without decapsulation.
  16  *              Carlos Picoto   :       GRE over IP support
  17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18  *                                      I do not want to merge them together.
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  *
  25  */
  26
  27 /* tunnel.c: an IP tunnel driver
  28
  29         The purpose of this driver is to provide an IP tunnel through
  30         which you can tunnel network traffic transparently across subnets.
  31
  32         This was written by looking at Nick Holloway's dummy driver
  33         Thanks for the great code!
  34
  35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37         Minor tweaks:
  38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                 dev->hard_header/hard_header_len changed to use no headers.
  40                 Comments/bracketing tweaked.
  41                 Made the tunnels use dev->name not tunnel: when error reporting.
  42                 Added tx_dropped stat
  43
  44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46         Reworked:
  47                 Changed to tunnel to destination gateway in addition to the
  48                         tunnel's pointopoint address
  49                 Almost completely rewritten
  50                 Note:  There is currently no firewall or ICMP handling done.
  51
  52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54 */
  55
  56 /* Things I wish I had known when writing the tunnel driver:
  57
  58         When the tunnel_xmit() function is called, the skb contains the
  59         packet to be sent (plus a great deal of extra info), and dev
  60         contains the tunnel device that _we_ are.
  61
  62         When we are passed a packet, we are expected to fill in the
  63         source address with our source IP address.
  64
  65         What is the proper way to allocate, copy and free a buffer?
  66         After you allocate it, it is a "0 length" chunk of memory
  67         starting at zero.  If you want to add headers to the buffer
  68         later, you'll have to call "skb_reserve(skb, amount)" with
  69         the amount of memory you want reserved.  Then, you call
  70         "skb_put(skb, amount)" with the amount of space you want in
  71         the buffer.  skb_put() returns a pointer to the top (#0) of
  72         that buffer.  skb->len is set to the amount of space you have
  73         "allocated" with skb_put().  You can then write up to skb->len
  74         bytes to that buffer.  If you need more, you can call skb_put()
  75         again with the additional amount of space you need.  You can
  76         find out how much more space you can allocate by calling
  77         "skb_tailroom(skb)".
  78         Now, to add header space, call "skb_push(skb, header_len)".
  79         This creates space at the beginning of the buffer and returns
  80         a pointer to this new space.  If later you need to strip a
  81         header from a buffer, call "skb_pull(skb, header_len)".
  82         skb_headroom() will return how much space is left at the top
  83         of the buffer (before the main data).  Remember, this headroom
  84         space must be reserved before the skb_put() function is called.
  85         */
  86
  87 /*
  88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90    For comments look at net/ipv4/ip_gre.c --ANK
  91  */
  92
  93
  94 #include <linux/capability.h>
  95 #include <linux/module.h>
  96 #include <linux/types.h>
  97 #include <linux/kernel.h>
  98 #include <linux/slab.h>
  99 #include <asm/uaccess.h>
 100 #include <linux/skbuff.h>
 101 #include <linux/netdevice.h>
 102 #include <linux/in.h>
 103 #include <linux/tcp.h>
 104 #include <linux/udp.h>
 105 #include <linux/if_arp.h>
 106 #include <linux/mroute.h>
 107 #include <linux/init.h>
 108 #include <linux/netfilter_ipv4.h>
 109 #include <linux/if_ether.h>
 110
 111 #include <net/sock.h>
 112 #include <net/ip.h>
 113 #include <net/icmp.h>
 114 #include <net/ipip.h>
 115 #include <net/inet_ecn.h>
 116 #include <net/xfrm.h>
 117 #include <net/net_namespace.h>
 118 #include <net/netns/generic.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123 static int ipip_net_id __read_mostly;
 124 struct ipip_net {
 125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
 126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
 127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
 128         struct ip_tunnel __rcu *tunnels_wc[1];
 129         struct ip_tunnel __rcu **tunnels[4];
 130
 131         struct net_device *fb_tunnel_dev;
 132 };
 133
 134 static int ipip_tunnel_init(struct net_device *dev);
 135 static void ipip_tunnel_setup(struct net_device *dev);
 136 static void ipip_dev_free(struct net_device *dev);
 137
 138 /*
 139  * Locking : hash tables are protected by RCU and RTNL
 140  */
 141
 142 #define for_each_ip_tunnel_rcu(start) \
 143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 144
 145 /* often modified stats are per cpu, other are shared (netdev->stats) */
 146 struct pcpu_tstats {
 147         unsigned long   rx_packets;
 148         unsigned long   rx_bytes;
 149         unsigned long   tx_packets;
 150         unsigned long   tx_bytes;
 151 };
 152
 153 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
 154 {
 155         struct pcpu_tstats sum = { 0 };
 156         int i;
 157
 158         for_each_possible_cpu(i) {
 159                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 160
 161                 sum.rx_packets += tstats->rx_packets;
 162                 sum.rx_bytes   += tstats->rx_bytes;
 163                 sum.tx_packets += tstats->tx_packets;
 164                 sum.tx_bytes   += tstats->tx_bytes;
 165         }
 166         dev->stats.rx_packets = sum.rx_packets;
 167         dev->stats.rx_bytes   = sum.rx_bytes;
 168         dev->stats.tx_packets = sum.tx_packets;
 169         dev->stats.tx_bytes   = sum.tx_bytes;
 170         return &dev->stats;
 171 }
 172
 173 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
 174                 __be32 remote, __be32 local)
 175 {
 176         unsigned int h0 = HASH(remote);
 177         unsigned int h1 = HASH(local);
 178         struct ip_tunnel *t;
 179         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 180
 181         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 182                 if (local == t->parms.iph.saddr &&
 183                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 184                         return t;
 185
 186         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 187                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 188                         return t;
 189
 190         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 191                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 192                         return t;
 193
 194         t = rcu_dereference(ipn->tunnels_wc[0]);
 195         if (t && (t->dev->flags&IFF_UP))
 196                 return t;
 197         return NULL;
 198 }
 199
 200 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
 201                 struct ip_tunnel_parm *parms)
 202 {
 203         __be32 remote = parms->iph.daddr;
 204         __be32 local = parms->iph.saddr;
 205         unsigned int h = 0;
 206         int prio = 0;
 207
 208         if (remote) {
 209                 prio |= 2;
 210                 h ^= HASH(remote);
 211         }
 212         if (local) {
 213                 prio |= 1;
 214                 h ^= HASH(local);
 215         }
 216         return &ipn->tunnels[prio][h];
 217 }
 218
 219 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
 220                 struct ip_tunnel *t)
 221 {
 222         return __ipip_bucket(ipn, &t->parms);
 223 }
 224
 225 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 226 {
 227         struct ip_tunnel __rcu **tp;
 228         struct ip_tunnel *iter;
 229
 230         for (tp = ipip_bucket(ipn, t);
 231              (iter = rtnl_dereference(*tp)) != NULL;
 232              tp = &iter->next) {
 233                 if (t == iter) {
 234                         rcu_assign_pointer(*tp, t->next);
 235                         break;
 236                 }
 237         }
 238 }
 239
 240 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 241 {
 242         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 243
 244         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 245         rcu_assign_pointer(*tp, t);
 246 }
 247
 248 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
 249                 struct ip_tunnel_parm *parms, int create)
 250 {
 251         __be32 remote = parms->iph.daddr;
 252         __be32 local = parms->iph.saddr;
 253         struct ip_tunnel *t, *nt;
 254         struct ip_tunnel __rcu **tp;
 255         struct net_device *dev;
 256         char name[IFNAMSIZ];
 257         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 258
 259         for (tp = __ipip_bucket(ipn, parms);
 260                  (t = rtnl_dereference(*tp)) != NULL;
 261                  tp = &t->next) {
 262                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 263                         return t;
 264         }
 265         if (!create)
 266                 return NULL;
 267
 268         if (parms->name[0])
 269                 strlcpy(name, parms->name, IFNAMSIZ);
 270         else
 271                 strcpy(name, "tunl%d");
 272
 273         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 274         if (dev == NULL)
 275                 return NULL;
 276
 277         dev_net_set(dev, net);
 278
 279         nt = netdev_priv(dev);
 280         nt->parms = *parms;
 281
 282         if (ipip_tunnel_init(dev) < 0)
 283                 goto failed_free;
 284
 285         if (register_netdevice(dev) < 0)
 286                 goto failed_free;
 287
 288         strcpy(nt->parms.name, dev->name);
 289
 290         dev_hold(dev);
 291         ipip_tunnel_link(ipn, nt);
 292         return nt;
 293
 294 failed_free:
 295         ipip_dev_free(dev);
 296         return NULL;
 297 }
 298
 299 /* called with RTNL */
 300 static void ipip_tunnel_uninit(struct net_device *dev)
 301 {
 302         struct net *net = dev_net(dev);
 303         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 304
 305         if (dev == ipn->fb_tunnel_dev)
 306                 rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
 307         else
 308                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
 309         dev_put(dev);
 310 }
 311
 312 static int ipip_err(struct sk_buff *skb, u32 info)
 313 {
 314
 315 /* All the routers (except for Linux) return only
 316    8 bytes of packet payload. It means, that precise relaying of
 317    ICMP in the real Internet is absolutely infeasible.
 318  */
 319         const struct iphdr *iph = (const struct iphdr *)skb->data;
 320         const int type = icmp_hdr(skb)->type;
 321         const int code = icmp_hdr(skb)->code;
 322         struct ip_tunnel *t;
 323         int err;
 324
 325         switch (type) {
 326         default:
 327         case ICMP_PARAMETERPROB:
 328                 return 0;
 329
 330         case ICMP_DEST_UNREACH:
 331                 switch (code) {
 332                 case ICMP_SR_FAILED:
 333                 case ICMP_PORT_UNREACH:
 334                         /* Impossible event. */
 335                         return 0;
 336                 case ICMP_FRAG_NEEDED:
 337                         /* Soft state for pmtu is maintained by IP core. */
 338                         return 0;
 339                 default:
 340                         /* All others are translated to HOST_UNREACH.
 341                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 342                            I believe they are just ether pollution. --ANK
 343                          */
 344                         break;
 345                 }
 346                 break;
 347         case ICMP_TIME_EXCEEDED:
 348                 if (code != ICMP_EXC_TTL)
 349                         return 0;
 350                 break;
 351         }
 352
 353         err = -ENOENT;
 354
 355         rcu_read_lock();
 356         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 357         if (t == NULL || t->parms.iph.daddr == 0)
 358                 goto out;
 359
 360         err = 0;
 361         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 362                 goto out;
 363
 364         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 365                 t->err_count++;
 366         else
 367                 t->err_count = 1;
 368         t->err_time = jiffies;
 369 out:
 370         rcu_read_unlock();
 371         return err;
 372 }
 373
 374 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 375                                         struct sk_buff *skb)
 376 {
 377         struct iphdr *inner_iph = ip_hdr(skb);
 378
 379         if (INET_ECN_is_ce(outer_iph->tos))
 380                 IP_ECN_set_ce(inner_iph);
 381 }
 382
 383 static int ipip_rcv(struct sk_buff *skb)
 384 {
 385         struct ip_tunnel *tunnel;
 386         const struct iphdr *iph = ip_hdr(skb);
 387
 388         rcu_read_lock();
 389         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 390         if (tunnel != NULL) {
 391                 struct pcpu_tstats *tstats;
 392
 393                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 394                         rcu_read_unlock();
 395                         kfree_skb(skb);
 396                         return 0;
 397                 }
 398
 399                 secpath_reset(skb);
 400
 401                 skb->mac_header = skb->network_header;
 402                 skb_reset_network_header(skb);
 403                 skb->protocol = htons(ETH_P_IP);
 404                 skb->pkt_type = PACKET_HOST;
 405
 406                 tstats = this_cpu_ptr(tunnel->dev->tstats);
 407                 tstats->rx_packets++;
 408                 tstats->rx_bytes += skb->len;
 409
 410                 __skb_tunnel_rx(skb, tunnel->dev);
 411
 412                 ipip_ecn_decapsulate(iph, skb);
 413
 414                 netif_rx(skb);
 415
 416                 rcu_read_unlock();
 417                 return 0;
 418         }
 419         rcu_read_unlock();
 420
 421         return -1;
 422 }
 423
 424 /*
 425  *      This function assumes it is being called from dev_queue_xmit()
 426  *      and that skb is filled properly by that function.
 427  */
 428
 429 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 430 {
 431         struct ip_tunnel *tunnel = netdev_priv(dev);
 432         struct pcpu_tstats *tstats;
 433         const struct iphdr  *tiph = &tunnel->parms.iph;
 434         u8     tos = tunnel->parms.iph.tos;
 435         __be16 df = tiph->frag_off;
 436         struct rtable *rt;                      /* Route to the other host */
 437         struct net_device *tdev;                /* Device to other host */
 438         const struct iphdr  *old_iph = ip_hdr(skb);
 439         struct iphdr  *iph;                     /* Our new IP header */
 440         unsigned int max_headroom;              /* The extra header space needed */
 441         __be32 dst = tiph->daddr;
 442         struct flowi4 fl4;
 443         int    mtu;
 444
 445         if (skb->protocol != htons(ETH_P_IP))
 446                 goto tx_error;
 447
 448         if (tos & 1)
 449                 tos = old_iph->tos;
 450
 451         if (!dst) {
 452                 /* NBMA tunnel */
 453                 if ((rt = skb_rtable(skb)) == NULL) {
 454                         dev->stats.tx_fifo_errors++;
 455                         goto tx_error;
 456                 }
 457                 if ((dst = rt->rt_gateway) == 0)
 458                         goto tx_error_icmp;
 459         }
 460
 461         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 462                                    dst, tiph->saddr,
 463                                    0, 0,
 464                                    IPPROTO_IPIP, RT_TOS(tos),
 465                                    tunnel->parms.link);
 466         if (IS_ERR(rt)) {
 467                 dev->stats.tx_carrier_errors++;
 468                 goto tx_error_icmp;
 469         }
 470         tdev = rt->dst.dev;
 471
 472         if (tdev == dev) {
 473                 ip_rt_put(rt);
 474                 dev->stats.collisions++;
 475                 goto tx_error;
 476         }
 477
 478         df |= old_iph->frag_off & htons(IP_DF);
 479
 480         if (df) {
 481                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 482
 483                 if (mtu < 68) {
 484                         dev->stats.collisions++;
 485                         ip_rt_put(rt);
 486                         goto tx_error;
 487                 }
 488
 489                 if (skb_dst(skb))
 490                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 491
 492                 if ((old_iph->frag_off & htons(IP_DF)) &&
 493                     mtu < ntohs(old_iph->tot_len)) {
 494                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 495                                   htonl(mtu));
 496                         ip_rt_put(rt);
 497                         goto tx_error;
 498                 }
 499         }
 500
 501         if (tunnel->err_count > 0) {
 502                 if (time_before(jiffies,
 503                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 504                         tunnel->err_count--;
 505                         dst_link_failure(skb);
 506                 } else
 507                         tunnel->err_count = 0;
 508         }
 509
 510         /*
 511          * Okay, now see if we can stuff it in the buffer as-is.
 512          */
 513         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 514
 515         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 516             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 517                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 518                 if (!new_skb) {
 519                         ip_rt_put(rt);
 520                         dev->stats.tx_dropped++;
 521                         dev_kfree_skb(skb);
 522                         return NETDEV_TX_OK;
 523                 }
 524                 if (skb->sk)
 525                         skb_set_owner_w(new_skb, skb->sk);
 526                 dev_kfree_skb(skb);
 527                 skb = new_skb;
 528                 old_iph = ip_hdr(skb);
 529         }
 530
 531         skb->transport_header = skb->network_header;
 532         skb_push(skb, sizeof(struct iphdr));
 533         skb_reset_network_header(skb);
 534         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 535         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 536                               IPSKB_REROUTED);
 537         skb_dst_drop(skb);
 538         skb_dst_set(skb, &rt->dst);
 539
 540         /*
 541          *      Push down and install the IPIP header.
 542          */
 543
 544         iph                     =       ip_hdr(skb);
 545         iph->version            =       4;
 546         iph->ihl                =       sizeof(struct iphdr)>>2;
 547         iph->frag_off           =       df;
 548         iph->protocol           =       IPPROTO_IPIP;
 549         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 550         iph->daddr              =       fl4.daddr;
 551         iph->saddr              =       fl4.saddr;
 552
 553         if ((iph->ttl = tiph->ttl) == 0)
 554                 iph->ttl        =       old_iph->ttl;
 555
 556         nf_reset(skb);
 557         tstats = this_cpu_ptr(dev->tstats);
 558         __IPTUNNEL_XMIT(tstats, &dev->stats);
 559         return NETDEV_TX_OK;
 560
 561 tx_error_icmp:
 562         dst_link_failure(skb);
 563 tx_error:
 564         dev->stats.tx_errors++;
 565         dev_kfree_skb(skb);
 566         return NETDEV_TX_OK;
 567 }
 568
 569 static void ipip_tunnel_bind_dev(struct net_device *dev)
 570 {
 571         struct net_device *tdev = NULL;
 572         struct ip_tunnel *tunnel;
 573         const struct iphdr *iph;
 574
 575         tunnel = netdev_priv(dev);
 576         iph = &tunnel->parms.iph;
 577
 578         if (iph->daddr) {
 579                 struct rtable *rt;
 580                 struct flowi4 fl4;
 581
 582                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 583                                            iph->daddr, iph->saddr,
 584                                            0, 0,
 585                                            IPPROTO_IPIP,
 586                                            RT_TOS(iph->tos),
 587                                            tunnel->parms.link);
 588                 if (!IS_ERR(rt)) {
 589                         tdev = rt->dst.dev;
 590                         ip_rt_put(rt);
 591                 }
 592                 dev->flags |= IFF_POINTOPOINT;
 593         }
 594
 595         if (!tdev && tunnel->parms.link)
 596                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 597
 598         if (tdev) {
 599                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 600                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 601         }
 602         dev->iflink = tunnel->parms.link;
 603 }
 604
 605 static int
 606 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 607 {
 608         int err = 0;
 609         struct ip_tunnel_parm p;
 610         struct ip_tunnel *t;
 611         struct net *net = dev_net(dev);
 612         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 613
 614         switch (cmd) {
 615         case SIOCGETTUNNEL:
 616                 t = NULL;
 617                 if (dev == ipn->fb_tunnel_dev) {
 618                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 619                                 err = -EFAULT;
 620                                 break;
 621                         }
 622                         t = ipip_tunnel_locate(net, &p, 0);
 623                 }
 624                 if (t == NULL)
 625                         t = netdev_priv(dev);
 626                 memcpy(&p, &t->parms, sizeof(p));
 627                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 628                         err = -EFAULT;
 629                 break;
 630
 631         case SIOCADDTUNNEL:
 632         case SIOCCHGTUNNEL:
 633                 err = -EPERM;
 634                 if (!capable(CAP_NET_ADMIN))
 635                         goto done;
 636
 637                 err = -EFAULT;
 638                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 639                         goto done;
 640
 641                 err = -EINVAL;
 642                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 643                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 644                         goto done;
 645                 if (p.iph.ttl)
 646                         p.iph.frag_off |= htons(IP_DF);
 647
 648                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 649
 650                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 651                         if (t != NULL) {
 652                                 if (t->dev != dev) {
 653                                         err = -EEXIST;
 654                                         break;
 655                                 }
 656                         } else {
 657                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 658                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 659                                         err = -EINVAL;
 660                                         break;
 661                                 }
 662                                 t = netdev_priv(dev);
 663                                 ipip_tunnel_unlink(ipn, t);
 664                                 synchronize_net();
 665                                 t->parms.iph.saddr = p.iph.saddr;
 666                                 t->parms.iph.daddr = p.iph.daddr;
 667                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 668                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 669                                 ipip_tunnel_link(ipn, t);
 670                                 netdev_state_change(dev);
 671                         }
 672                 }
 673
 674                 if (t) {
 675                         err = 0;
 676                         if (cmd == SIOCCHGTUNNEL) {
 677                                 t->parms.iph.ttl = p.iph.ttl;
 678                                 t->parms.iph.tos = p.iph.tos;
 679                                 t->parms.iph.frag_off = p.iph.frag_off;
 680                                 if (t->parms.link != p.link) {
 681                                         t->parms.link = p.link;
 682                                         ipip_tunnel_bind_dev(dev);
 683                                         netdev_state_change(dev);
 684                                 }
 685                         }
 686                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 687                                 err = -EFAULT;
 688                 } else
 689                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 690                 break;
 691
 692         case SIOCDELTUNNEL:
 693                 err = -EPERM;
 694                 if (!capable(CAP_NET_ADMIN))
 695                         goto done;
 696
 697                 if (dev == ipn->fb_tunnel_dev) {
 698                         err = -EFAULT;
 699                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 700                                 goto done;
 701                         err = -ENOENT;
 702                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 703                                 goto done;
 704                         err = -EPERM;
 705                         if (t->dev == ipn->fb_tunnel_dev)
 706                                 goto done;
 707                         dev = t->dev;
 708                 }
 709                 unregister_netdevice(dev);
 710                 err = 0;
 711                 break;
 712
 713         default:
 714                 err = -EINVAL;
 715         }
 716
 717 done:
 718         return err;
 719 }
 720
 721 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 722 {
 723         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 724                 return -EINVAL;
 725         dev->mtu = new_mtu;
 726         return 0;
 727 }
 728
 729 static const struct net_device_ops ipip_netdev_ops = {
 730         .ndo_uninit     = ipip_tunnel_uninit,
 731         .ndo_start_xmit = ipip_tunnel_xmit,
 732         .ndo_do_ioctl   = ipip_tunnel_ioctl,
 733         .ndo_change_mtu = ipip_tunnel_change_mtu,
 734         .ndo_get_stats  = ipip_get_stats,
 735 };
 736
 737 static void ipip_dev_free(struct net_device *dev)
 738 {
 739         free_percpu(dev->tstats);
 740         free_netdev(dev);
 741 }
 742
 743 static void ipip_tunnel_setup(struct net_device *dev)
 744 {
 745         dev->netdev_ops         = &ipip_netdev_ops;
 746         dev->destructor         = ipip_dev_free;
 747
 748         dev->type               = ARPHRD_TUNNEL;
 749         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 750         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 751         dev->flags              = IFF_NOARP;
 752         dev->iflink             = 0;
 753         dev->addr_len           = 4;
 754         dev->features           |= NETIF_F_NETNS_LOCAL;
 755         dev->features           |= NETIF_F_LLTX;
 756         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 757 }
 758
 759 static int ipip_tunnel_init(struct net_device *dev)
 760 {
 761         struct ip_tunnel *tunnel = netdev_priv(dev);
 762
 763         tunnel->dev = dev;
 764
 765         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 766         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 767
 768         ipip_tunnel_bind_dev(dev);
 769
 770         dev->tstats = alloc_percpu(struct pcpu_tstats);
 771         if (!dev->tstats)
 772                 return -ENOMEM;
 773
 774         return 0;
 775 }
 776
 777 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
 778 {
 779         struct ip_tunnel *tunnel = netdev_priv(dev);
 780         struct iphdr *iph = &tunnel->parms.iph;
 781         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 782
 783         tunnel->dev = dev;
 784         strcpy(tunnel->parms.name, dev->name);
 785
 786         iph->version            = 4;
 787         iph->protocol           = IPPROTO_IPIP;
 788         iph->ihl                = 5;
 789
 790         dev->tstats = alloc_percpu(struct pcpu_tstats);
 791         if (!dev->tstats)
 792                 return -ENOMEM;
 793
 794         dev_hold(dev);
 795         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 796         return 0;
 797 }
 798
 799 static struct xfrm_tunnel ipip_handler __read_mostly = {
 800         .handler        =       ipip_rcv,
 801         .err_handler    =       ipip_err,
 802         .priority       =       1,
 803 };
 804
 805 static const char banner[] __initconst =
 806         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 807
 808 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 809 {
 810         int prio;
 811
 812         for (prio = 1; prio < 4; prio++) {
 813                 int h;
 814                 for (h = 0; h < HASH_SIZE; h++) {
 815                         struct ip_tunnel *t;
 816
 817                         t = rtnl_dereference(ipn->tunnels[prio][h]);
 818                         while (t != NULL) {
 819                                 unregister_netdevice_queue(t->dev, head);
 820                                 t = rtnl_dereference(t->next);
 821                         }
 822                 }
 823         }
 824 }
 825
 826 static int __net_init ipip_init_net(struct net *net)
 827 {
 828         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 829         struct ip_tunnel *t;
 830         int err;
 831
 832         ipn->tunnels[0] = ipn->tunnels_wc;
 833         ipn->tunnels[1] = ipn->tunnels_l;
 834         ipn->tunnels[2] = ipn->tunnels_r;
 835         ipn->tunnels[3] = ipn->tunnels_r_l;
 836
 837         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 838                                            "tunl0",
 839                                            ipip_tunnel_setup);
 840         if (!ipn->fb_tunnel_dev) {
 841                 err = -ENOMEM;
 842                 goto err_alloc_dev;
 843         }
 844         dev_net_set(ipn->fb_tunnel_dev, net);
 845
 846         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
 847         if (err)
 848                 goto err_reg_dev;
 849
 850         if ((err = register_netdev(ipn->fb_tunnel_dev)))
 851                 goto err_reg_dev;
 852
 853         t = netdev_priv(ipn->fb_tunnel_dev);
 854
 855         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
 856         return 0;
 857
 858 err_reg_dev:
 859         ipip_dev_free(ipn->fb_tunnel_dev);
 860 err_alloc_dev:
 861         /* nothing */
 862         return err;
 863 }
 864
 865 static void __net_exit ipip_exit_net(struct net *net)
 866 {
 867         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 868         LIST_HEAD(list);
 869
 870         rtnl_lock();
 871         ipip_destroy_tunnels(ipn, &list);
 872         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
 873         unregister_netdevice_many(&list);
 874         rtnl_unlock();
 875 }
 876
 877 static struct pernet_operations ipip_net_ops = {
 878         .init = ipip_init_net,
 879         .exit = ipip_exit_net,
 880         .id   = &ipip_net_id,
 881         .size = sizeof(struct ipip_net),
 882 };
 883
 884 static int __init ipip_init(void)
 885 {
 886         int err;
 887
 888         printk(banner);
 889
 890         err = register_pernet_device(&ipip_net_ops);
 891         if (err < 0)
 892                 return err;
 893         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 894         if (err < 0) {
 895                 unregister_pernet_device(&ipip_net_ops);
 896                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 897         }
 898         return err;
 899 }
 900
 901 static void __exit ipip_fini(void)
 902 {
 903         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 904                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 905
 906         unregister_pernet_device(&ipip_net_ops);
 907 }
 908
 909 module_init(ipip_init);
 910 module_exit(ipip_fini);
 911 MODULE_LICENSE("GPL");
 912 MODULE_ALIAS_NETDEV("tunl0");