net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/kernel.h>
 100 #include <asm/uaccess.h>
 101 #include <linux/skbuff.h>
 102 #include <linux/netdevice.h>
 103 #include <linux/in.h>
 104 #include <linux/tcp.h>
 105 #include <linux/udp.h>
 106 #include <linux/if_arp.h>
 107 #include <linux/mroute.h>
 108 #include <linux/init.h>
 109 #include <linux/netfilter_ipv4.h>
 110 #include <linux/if_ether.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ipip.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118 #include <net/net_namespace.h>
 119 #include <net/netns/generic.h>
 120
 121 #define HASH_SIZE  16
 122 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 123
 124 static int ipip_net_id;
 125 struct ipip_net {
 126         struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 127         struct ip_tunnel *tunnels_r[HASH_SIZE];
 128         struct ip_tunnel *tunnels_l[HASH_SIZE];
 129         struct ip_tunnel *tunnels_wc[1];
 130         struct ip_tunnel **tunnels[4];
 131
 132         struct net_device *fb_tunnel_dev;
 133 };
 134
 135 static int ipip_fb_tunnel_init(struct net_device *dev);
 136 static int ipip_tunnel_init(struct net_device *dev);
 137 static void ipip_tunnel_setup(struct net_device *dev);
 138
 139 static DEFINE_RWLOCK(ipip_lock);
 140
 141 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
 142                 __be32 remote, __be32 local)
 143 {
 144         unsigned h0 = HASH(remote);
 145         unsigned h1 = HASH(local);
 146         struct ip_tunnel *t;
 147         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 148
 149         for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
 150                 if (local == t->parms.iph.saddr &&
 151                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 152                         return t;
 153         }
 154         for (t = ipn->tunnels_r[h0]; t; t = t->next) {
 155                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 156                         return t;
 157         }
 158         for (t = ipn->tunnels_l[h1]; t; t = t->next) {
 159                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 160                         return t;
 161         }
 162         if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 163                 return t;
 164         return NULL;
 165 }
 166
 167 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
 168                 struct ip_tunnel_parm *parms)
 169 {
 170         __be32 remote = parms->iph.daddr;
 171         __be32 local = parms->iph.saddr;
 172         unsigned h = 0;
 173         int prio = 0;
 174
 175         if (remote) {
 176                 prio |= 2;
 177                 h ^= HASH(remote);
 178         }
 179         if (local) {
 180                 prio |= 1;
 181                 h ^= HASH(local);
 182         }
 183         return &ipn->tunnels[prio][h];
 184 }
 185
 186 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
 187                 struct ip_tunnel *t)
 188 {
 189         return __ipip_bucket(ipn, &t->parms);
 190 }
 191
 192 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 193 {
 194         struct ip_tunnel **tp;
 195
 196         for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
 197                 if (t == *tp) {
 198                         write_lock_bh(&ipip_lock);
 199                         *tp = t->next;
 200                         write_unlock_bh(&ipip_lock);
 201                         break;
 202                 }
 203         }
 204 }
 205
 206 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 207 {
 208         struct ip_tunnel **tp = ipip_bucket(ipn, t);
 209
 210         t->next = *tp;
 211         write_lock_bh(&ipip_lock);
 212         *tp = t;
 213         write_unlock_bh(&ipip_lock);
 214 }
 215
 216 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
 217                 struct ip_tunnel_parm *parms, int create)
 218 {
 219         __be32 remote = parms->iph.daddr;
 220         __be32 local = parms->iph.saddr;
 221         struct ip_tunnel *t, **tp, *nt;
 222         struct net_device *dev;
 223         char name[IFNAMSIZ];
 224         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 225
 226         for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
 227                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 228                         return t;
 229         }
 230         if (!create)
 231                 return NULL;
 232
 233         if (parms->name[0])
 234                 strlcpy(name, parms->name, IFNAMSIZ);
 235         else
 236                 sprintf(name, "tunl%%d");
 237
 238         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 239         if (dev == NULL)
 240                 return NULL;
 241
 242         dev_net_set(dev, net);
 243
 244         if (strchr(name, '%')) {
 245                 if (dev_alloc_name(dev, name) < 0)
 246                         goto failed_free;
 247         }
 248
 249         nt = netdev_priv(dev);
 250         dev->init = ipip_tunnel_init;
 251         nt->parms = *parms;
 252
 253         if (register_netdevice(dev) < 0)
 254                 goto failed_free;
 255
 256         dev_hold(dev);
 257         ipip_tunnel_link(ipn, nt);
 258         return nt;
 259
 260 failed_free:
 261         free_netdev(dev);
 262         return NULL;
 263 }
 264
 265 static void ipip_tunnel_uninit(struct net_device *dev)
 266 {
 267         struct net *net = dev_net(dev);
 268         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 269
 270         if (dev == ipn->fb_tunnel_dev) {
 271                 write_lock_bh(&ipip_lock);
 272                 ipn->tunnels_wc[0] = NULL;
 273                 write_unlock_bh(&ipip_lock);
 274         } else
 275                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
 276         dev_put(dev);
 277 }
 278
 279 static int ipip_err(struct sk_buff *skb, u32 info)
 280 {
 281 #ifndef I_WISH_WORLD_WERE_PERFECT
 282
 283 /* It is not :-( All the routers (except for Linux) return only
 284    8 bytes of packet payload. It means, that precise relaying of
 285    ICMP in the real Internet is absolutely infeasible.
 286  */
 287         struct iphdr *iph = (struct iphdr*)skb->data;
 288         const int type = icmp_hdr(skb)->type;
 289         const int code = icmp_hdr(skb)->code;
 290         struct ip_tunnel *t;
 291         int err;
 292
 293         switch (type) {
 294         default:
 295         case ICMP_PARAMETERPROB:
 296                 return 0;
 297
 298         case ICMP_DEST_UNREACH:
 299                 switch (code) {
 300                 case ICMP_SR_FAILED:
 301                 case ICMP_PORT_UNREACH:
 302                         /* Impossible event. */
 303                         return 0;
 304                 case ICMP_FRAG_NEEDED:
 305                         /* Soft state for pmtu is maintained by IP core. */
 306                         return 0;
 307                 default:
 308                         /* All others are translated to HOST_UNREACH.
 309                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 310                            I believe they are just ether pollution. --ANK
 311                          */
 312                         break;
 313                 }
 314                 break;
 315         case ICMP_TIME_EXCEEDED:
 316                 if (code != ICMP_EXC_TTL)
 317                         return 0;
 318                 break;
 319         }
 320
 321         err = -ENOENT;
 322
 323         read_lock(&ipip_lock);
 324         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 325         if (t == NULL || t->parms.iph.daddr == 0)
 326                 goto out;
 327
 328         err = 0;
 329         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 330                 goto out;
 331
 332         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 333                 t->err_count++;
 334         else
 335                 t->err_count = 1;
 336         t->err_time = jiffies;
 337 out:
 338         read_unlock(&ipip_lock);
 339         return err;
 340 #else
 341         struct iphdr *iph = (struct iphdr*)dp;
 342         int hlen = iph->ihl<<2;
 343         struct iphdr *eiph;
 344         const int type = icmp_hdr(skb)->type;
 345         const int code = icmp_hdr(skb)->code;
 346         int rel_type = 0;
 347         int rel_code = 0;
 348         __be32 rel_info = 0;
 349         __u32 n = 0;
 350         struct sk_buff *skb2;
 351         struct flowi fl;
 352         struct rtable *rt;
 353
 354         if (len < hlen + sizeof(struct iphdr))
 355                 return 0;
 356         eiph = (struct iphdr*)(dp + hlen);
 357
 358         switch (type) {
 359         default:
 360                 return 0;
 361         case ICMP_PARAMETERPROB:
 362                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 363                 if (n < hlen)
 364                         return 0;
 365
 366                 /* So... This guy found something strange INSIDE encapsulated
 367                    packet. Well, he is fool, but what can we do ?
 368                  */
 369                 rel_type = ICMP_PARAMETERPROB;
 370                 rel_info = htonl((n - hlen) << 24);
 371                 break;
 372
 373         case ICMP_DEST_UNREACH:
 374                 switch (code) {
 375                 case ICMP_SR_FAILED:
 376                 case ICMP_PORT_UNREACH:
 377                         /* Impossible event. */
 378                         return 0;
 379                 case ICMP_FRAG_NEEDED:
 380                         /* And it is the only really necessary thing :-) */
 381                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 382                         if (n < hlen+68)
 383                                 return 0;
 384                         n -= hlen;
 385                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 386                         if (n > ntohs(eiph->tot_len))
 387                                 return 0;
 388                         rel_info = htonl(n);
 389                         break;
 390                 default:
 391                         /* All others are translated to HOST_UNREACH.
 392                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 393                            I believe, it is just ether pollution. --ANK
 394                          */
 395                         rel_type = ICMP_DEST_UNREACH;
 396                         rel_code = ICMP_HOST_UNREACH;
 397                         break;
 398                 }
 399                 break;
 400         case ICMP_TIME_EXCEEDED:
 401                 if (code != ICMP_EXC_TTL)
 402                         return 0;
 403                 break;
 404         }
 405
 406         /* Prepare fake skb to feed it to icmp_send */
 407         skb2 = skb_clone(skb, GFP_ATOMIC);
 408         if (skb2 == NULL)
 409                 return 0;
 410         dst_release(skb2->dst);
 411         skb2->dst = NULL;
 412         skb_pull(skb2, skb->data - (u8*)eiph);
 413         skb_reset_network_header(skb2);
 414
 415         /* Try to guess incoming interface */
 416         memset(&fl, 0, sizeof(fl));
 417         fl.fl4_daddr = eiph->saddr;
 418         fl.fl4_tos = RT_TOS(eiph->tos);
 419         fl.proto = IPPROTO_IPIP;
 420         if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
 421                 kfree_skb(skb2);
 422                 return 0;
 423         }
 424         skb2->dev = rt->u.dst.dev;
 425
 426         /* route "incoming" packet */
 427         if (rt->rt_flags&RTCF_LOCAL) {
 428                 ip_rt_put(rt);
 429                 rt = NULL;
 430                 fl.fl4_daddr = eiph->daddr;
 431                 fl.fl4_src = eiph->saddr;
 432                 fl.fl4_tos = eiph->tos;
 433                 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
 434                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 435                         ip_rt_put(rt);
 436                         kfree_skb(skb2);
 437                         return 0;
 438                 }
 439         } else {
 440                 ip_rt_put(rt);
 441                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 442                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 443                         kfree_skb(skb2);
 444                         return 0;
 445                 }
 446         }
 447
 448         /* change mtu on this route */
 449         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 450                 if (n > dst_mtu(skb2->dst)) {
 451                         kfree_skb(skb2);
 452                         return 0;
 453                 }
 454                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 455         } else if (type == ICMP_TIME_EXCEEDED) {
 456                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 457                 if (t->parms.iph.ttl) {
 458                         rel_type = ICMP_DEST_UNREACH;
 459                         rel_code = ICMP_HOST_UNREACH;
 460                 }
 461         }
 462
 463         icmp_send(skb2, rel_type, rel_code, rel_info);
 464         kfree_skb(skb2);
 465         return 0;
 466 #endif
 467 }
 468
 469 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 470                                         struct sk_buff *skb)
 471 {
 472         struct iphdr *inner_iph = ip_hdr(skb);
 473
 474         if (INET_ECN_is_ce(outer_iph->tos))
 475                 IP_ECN_set_ce(inner_iph);
 476 }
 477
 478 static int ipip_rcv(struct sk_buff *skb)
 479 {
 480         struct ip_tunnel *tunnel;
 481         const struct iphdr *iph = ip_hdr(skb);
 482
 483         read_lock(&ipip_lock);
 484         if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
 485                                         iph->saddr, iph->daddr)) != NULL) {
 486                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 487                         read_unlock(&ipip_lock);
 488                         kfree_skb(skb);
 489                         return 0;
 490                 }
 491
 492                 secpath_reset(skb);
 493
 494                 skb->mac_header = skb->network_header;
 495                 skb_reset_network_header(skb);
 496                 skb->protocol = htons(ETH_P_IP);
 497                 skb->pkt_type = PACKET_HOST;
 498
 499                 tunnel->stat.rx_packets++;
 500                 tunnel->stat.rx_bytes += skb->len;
 501                 skb->dev = tunnel->dev;
 502                 dst_release(skb->dst);
 503                 skb->dst = NULL;
 504                 nf_reset(skb);
 505                 ipip_ecn_decapsulate(iph, skb);
 506                 netif_rx(skb);
 507                 read_unlock(&ipip_lock);
 508                 return 0;
 509         }
 510         read_unlock(&ipip_lock);
 511
 512         return -1;
 513 }
 514
 515 /*
 516  *      This function assumes it is being called from dev_queue_xmit()
 517  *      and that skb is filled properly by that function.
 518  */
 519
 520 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 521 {
 522         struct ip_tunnel *tunnel = netdev_priv(dev);
 523         struct net_device_stats *stats = &tunnel->stat;
 524         struct iphdr  *tiph = &tunnel->parms.iph;
 525         u8     tos = tunnel->parms.iph.tos;
 526         __be16 df = tiph->frag_off;
 527         struct rtable *rt;                      /* Route to the other host */
 528         struct net_device *tdev;                        /* Device to other host */
 529         struct iphdr  *old_iph = ip_hdr(skb);
 530         struct iphdr  *iph;                     /* Our new IP header */
 531         unsigned int max_headroom;              /* The extra header space needed */
 532         __be32 dst = tiph->daddr;
 533         int    mtu;
 534
 535         if (tunnel->recursion++) {
 536                 tunnel->stat.collisions++;
 537                 goto tx_error;
 538         }
 539
 540         if (skb->protocol != htons(ETH_P_IP))
 541                 goto tx_error;
 542
 543         if (tos&1)
 544                 tos = old_iph->tos;
 545
 546         if (!dst) {
 547                 /* NBMA tunnel */
 548                 if ((rt = skb->rtable) == NULL) {
 549                         tunnel->stat.tx_fifo_errors++;
 550                         goto tx_error;
 551                 }
 552                 if ((dst = rt->rt_gateway) == 0)
 553                         goto tx_error_icmp;
 554         }
 555
 556         {
 557                 struct flowi fl = { .oif = tunnel->parms.link,
 558                                     .nl_u = { .ip4_u =
 559                                               { .daddr = dst,
 560                                                 .saddr = tiph->saddr,
 561                                                 .tos = RT_TOS(tos) } },
 562                                     .proto = IPPROTO_IPIP };
 563                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
 564                         tunnel->stat.tx_carrier_errors++;
 565                         goto tx_error_icmp;
 566                 }
 567         }
 568         tdev = rt->u.dst.dev;
 569
 570         if (tdev == dev) {
 571                 ip_rt_put(rt);
 572                 tunnel->stat.collisions++;
 573                 goto tx_error;
 574         }
 575
 576         if (tiph->frag_off)
 577                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 578         else
 579                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 580
 581         if (mtu < 68) {
 582                 tunnel->stat.collisions++;
 583                 ip_rt_put(rt);
 584                 goto tx_error;
 585         }
 586         if (skb->dst)
 587                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 588
 589         df |= (old_iph->frag_off&htons(IP_DF));
 590
 591         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 592                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 593                 ip_rt_put(rt);
 594                 goto tx_error;
 595         }
 596
 597         if (tunnel->err_count > 0) {
 598                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 599                         tunnel->err_count--;
 600                         dst_link_failure(skb);
 601                 } else
 602                         tunnel->err_count = 0;
 603         }
 604
 605         /*
 606          * Okay, now see if we can stuff it in the buffer as-is.
 607          */
 608         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 609
 610         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 611             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 612                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 613                 if (!new_skb) {
 614                         ip_rt_put(rt);
 615                         stats->tx_dropped++;
 616                         dev_kfree_skb(skb);
 617                         tunnel->recursion--;
 618                         return 0;
 619                 }
 620                 if (skb->sk)
 621                         skb_set_owner_w(new_skb, skb->sk);
 622                 dev_kfree_skb(skb);
 623                 skb = new_skb;
 624                 old_iph = ip_hdr(skb);
 625         }
 626
 627         skb->transport_header = skb->network_header;
 628         skb_push(skb, sizeof(struct iphdr));
 629         skb_reset_network_header(skb);
 630         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 631         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 632                               IPSKB_REROUTED);
 633         dst_release(skb->dst);
 634         skb->dst = &rt->u.dst;
 635
 636         /*
 637          *      Push down and install the IPIP header.
 638          */
 639
 640         iph                     =       ip_hdr(skb);
 641         iph->version            =       4;
 642         iph->ihl                =       sizeof(struct iphdr)>>2;
 643         iph->frag_off           =       df;
 644         iph->protocol           =       IPPROTO_IPIP;
 645         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 646         iph->daddr              =       rt->rt_dst;
 647         iph->saddr              =       rt->rt_src;
 648
 649         if ((iph->ttl = tiph->ttl) == 0)
 650                 iph->ttl        =       old_iph->ttl;
 651
 652         nf_reset(skb);
 653
 654         IPTUNNEL_XMIT();
 655         tunnel->recursion--;
 656         return 0;
 657
 658 tx_error_icmp:
 659         dst_link_failure(skb);
 660 tx_error:
 661         stats->tx_errors++;
 662         dev_kfree_skb(skb);
 663         tunnel->recursion--;
 664         return 0;
 665 }
 666
 667 static void ipip_tunnel_bind_dev(struct net_device *dev)
 668 {
 669         struct net_device *tdev = NULL;
 670         struct ip_tunnel *tunnel;
 671         struct iphdr *iph;
 672
 673         tunnel = netdev_priv(dev);
 674         iph = &tunnel->parms.iph;
 675
 676         if (iph->daddr) {
 677                 struct flowi fl = { .oif = tunnel->parms.link,
 678                                     .nl_u = { .ip4_u =
 679                                               { .daddr = iph->daddr,
 680                                                 .saddr = iph->saddr,
 681                                                 .tos = RT_TOS(iph->tos) } },
 682                                     .proto = IPPROTO_IPIP };
 683                 struct rtable *rt;
 684                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
 685                         tdev = rt->u.dst.dev;
 686                         ip_rt_put(rt);
 687                 }
 688                 dev->flags |= IFF_POINTOPOINT;
 689         }
 690
 691         if (!tdev && tunnel->parms.link)
 692                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 693
 694         if (tdev) {
 695                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 696                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 697         }
 698         dev->iflink = tunnel->parms.link;
 699 }
 700
 701 static int
 702 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 703 {
 704         int err = 0;
 705         struct ip_tunnel_parm p;
 706         struct ip_tunnel *t;
 707         struct net *net = dev_net(dev);
 708         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 709
 710         switch (cmd) {
 711         case SIOCGETTUNNEL:
 712                 t = NULL;
 713                 if (dev == ipn->fb_tunnel_dev) {
 714                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 715                                 err = -EFAULT;
 716                                 break;
 717                         }
 718                         t = ipip_tunnel_locate(net, &p, 0);
 719                 }
 720                 if (t == NULL)
 721                         t = netdev_priv(dev);
 722                 memcpy(&p, &t->parms, sizeof(p));
 723                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 724                         err = -EFAULT;
 725                 break;
 726
 727         case SIOCADDTUNNEL:
 728         case SIOCCHGTUNNEL:
 729                 err = -EPERM;
 730                 if (!capable(CAP_NET_ADMIN))
 731                         goto done;
 732
 733                 err = -EFAULT;
 734                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 735                         goto done;
 736
 737                 err = -EINVAL;
 738                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 739                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 740                         goto done;
 741                 if (p.iph.ttl)
 742                         p.iph.frag_off |= htons(IP_DF);
 743
 744                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 745
 746                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 747                         if (t != NULL) {
 748                                 if (t->dev != dev) {
 749                                         err = -EEXIST;
 750                                         break;
 751                                 }
 752                         } else {
 753                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 754                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 755                                         err = -EINVAL;
 756                                         break;
 757                                 }
 758                                 t = netdev_priv(dev);
 759                                 ipip_tunnel_unlink(ipn, t);
 760                                 t->parms.iph.saddr = p.iph.saddr;
 761                                 t->parms.iph.daddr = p.iph.daddr;
 762                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 763                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 764                                 ipip_tunnel_link(ipn, t);
 765                                 netdev_state_change(dev);
 766                         }
 767                 }
 768
 769                 if (t) {
 770                         err = 0;
 771                         if (cmd == SIOCCHGTUNNEL) {
 772                                 t->parms.iph.ttl = p.iph.ttl;
 773                                 t->parms.iph.tos = p.iph.tos;
 774                                 t->parms.iph.frag_off = p.iph.frag_off;
 775                                 if (t->parms.link != p.link) {
 776                                         t->parms.link = p.link;
 777                                         ipip_tunnel_bind_dev(dev);
 778                                         netdev_state_change(dev);
 779                                 }
 780                         }
 781                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 782                                 err = -EFAULT;
 783                 } else
 784                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 785                 break;
 786
 787         case SIOCDELTUNNEL:
 788                 err = -EPERM;
 789                 if (!capable(CAP_NET_ADMIN))
 790                         goto done;
 791
 792                 if (dev == ipn->fb_tunnel_dev) {
 793                         err = -EFAULT;
 794                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 795                                 goto done;
 796                         err = -ENOENT;
 797                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 798                                 goto done;
 799                         err = -EPERM;
 800                         if (t->dev == ipn->fb_tunnel_dev)
 801                                 goto done;
 802                         dev = t->dev;
 803                 }
 804                 unregister_netdevice(dev);
 805                 err = 0;
 806                 break;
 807
 808         default:
 809                 err = -EINVAL;
 810         }
 811
 812 done:
 813         return err;
 814 }
 815
 816 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 817 {
 818         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 819 }
 820
 821 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 822 {
 823         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 824                 return -EINVAL;
 825         dev->mtu = new_mtu;
 826         return 0;
 827 }
 828
 829 static void ipip_tunnel_setup(struct net_device *dev)
 830 {
 831         dev->uninit             = ipip_tunnel_uninit;
 832         dev->hard_start_xmit    = ipip_tunnel_xmit;
 833         dev->get_stats          = ipip_tunnel_get_stats;
 834         dev->do_ioctl           = ipip_tunnel_ioctl;
 835         dev->change_mtu         = ipip_tunnel_change_mtu;
 836         dev->destructor         = free_netdev;
 837
 838         dev->type               = ARPHRD_TUNNEL;
 839         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 840         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 841         dev->flags              = IFF_NOARP;
 842         dev->iflink             = 0;
 843         dev->addr_len           = 4;
 844         dev->features           |= NETIF_F_NETNS_LOCAL;
 845 }
 846
 847 static int ipip_tunnel_init(struct net_device *dev)
 848 {
 849         struct ip_tunnel *tunnel;
 850
 851         tunnel = netdev_priv(dev);
 852
 853         tunnel->dev = dev;
 854         strcpy(tunnel->parms.name, dev->name);
 855
 856         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 857         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 858
 859         ipip_tunnel_bind_dev(dev);
 860
 861         return 0;
 862 }
 863
 864 static int ipip_fb_tunnel_init(struct net_device *dev)
 865 {
 866         struct ip_tunnel *tunnel = netdev_priv(dev);
 867         struct iphdr *iph = &tunnel->parms.iph;
 868         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 869
 870         tunnel->dev = dev;
 871         strcpy(tunnel->parms.name, dev->name);
 872
 873         iph->version            = 4;
 874         iph->protocol           = IPPROTO_IPIP;
 875         iph->ihl                = 5;
 876
 877         dev_hold(dev);
 878         ipn->tunnels_wc[0]      = tunnel;
 879         return 0;
 880 }
 881
 882 static struct xfrm_tunnel ipip_handler = {
 883         .handler        =       ipip_rcv,
 884         .err_handler    =       ipip_err,
 885         .priority       =       1,
 886 };
 887
 888 static char banner[] __initdata =
 889         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 890
 891 static void ipip_destroy_tunnels(struct ipip_net *ipn)
 892 {
 893         int prio;
 894
 895         for (prio = 1; prio < 4; prio++) {
 896                 int h;
 897                 for (h = 0; h < HASH_SIZE; h++) {
 898                         struct ip_tunnel *t;
 899                         while ((t = ipn->tunnels[prio][h]) != NULL)
 900                                 unregister_netdevice(t->dev);
 901                 }
 902         }
 903 }
 904
 905 static int ipip_init_net(struct net *net)
 906 {
 907         int err;
 908         struct ipip_net *ipn;
 909
 910         err = -ENOMEM;
 911         ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
 912         if (ipn == NULL)
 913                 goto err_alloc;
 914
 915         err = net_assign_generic(net, ipip_net_id, ipn);
 916         if (err < 0)
 917                 goto err_assign;
 918
 919         ipn->tunnels[0] = ipn->tunnels_wc;
 920         ipn->tunnels[1] = ipn->tunnels_l;
 921         ipn->tunnels[2] = ipn->tunnels_r;
 922         ipn->tunnels[3] = ipn->tunnels_r_l;
 923
 924         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 925                                            "tunl0",
 926                                            ipip_tunnel_setup);
 927         if (!ipn->fb_tunnel_dev) {
 928                 err = -ENOMEM;
 929                 goto err_alloc_dev;
 930         }
 931
 932         ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
 933         dev_net_set(ipn->fb_tunnel_dev, net);
 934
 935         if ((err = register_netdev(ipn->fb_tunnel_dev)))
 936                 goto err_reg_dev;
 937
 938         return 0;
 939
 940 err_reg_dev:
 941         free_netdev(ipn->fb_tunnel_dev);
 942 err_alloc_dev:
 943         /* nothing */
 944 err_assign:
 945         kfree(ipn);
 946 err_alloc:
 947         return err;
 948 }
 949
 950 static void ipip_exit_net(struct net *net)
 951 {
 952         struct ipip_net *ipn;
 953
 954         ipn = net_generic(net, ipip_net_id);
 955         rtnl_lock();
 956         ipip_destroy_tunnels(ipn);
 957         unregister_netdevice(ipn->fb_tunnel_dev);
 958         rtnl_unlock();
 959         kfree(ipn);
 960 }
 961
 962 static struct pernet_operations ipip_net_ops = {
 963         .init = ipip_init_net,
 964         .exit = ipip_exit_net,
 965 };
 966
 967 static int __init ipip_init(void)
 968 {
 969         int err;
 970
 971         printk(banner);
 972
 973         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
 974                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 975                 return -EAGAIN;
 976         }
 977
 978         err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
 979         if (err)
 980                 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 981
 982         return err;
 983 }
 984
 985 static void __exit ipip_fini(void)
 986 {
 987         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 988                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 989
 990         unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
 991 }
 992
 993 module_init(ipip_init);
 994 module_exit(ipip_fini);
 995 MODULE_LICENSE("GPL");