net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/kernel.h>
 100 #include <asm/uaccess.h>
 101 #include <linux/skbuff.h>
 102 #include <linux/netdevice.h>
 103 #include <linux/in.h>
 104 #include <linux/tcp.h>
 105 #include <linux/udp.h>
 106 #include <linux/if_arp.h>
 107 #include <linux/mroute.h>
 108 #include <linux/init.h>
 109 #include <linux/netfilter_ipv4.h>
 110 #include <linux/if_ether.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ipip.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118
 119 #define HASH_SIZE  16
 120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 121
 122 static int ipip_fb_tunnel_init(struct net_device *dev);
 123 static int ipip_tunnel_init(struct net_device *dev);
 124 static void ipip_tunnel_setup(struct net_device *dev);
 125
 126 static struct net_device *ipip_fb_tunnel_dev;
 127
 128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_wc[1];
 132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 133
 134 static DEFINE_RWLOCK(ipip_lock);
 135
 136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 137 {
 138         unsigned h0 = HASH(remote);
 139         unsigned h1 = HASH(local);
 140         struct ip_tunnel *t;
 141
 142         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 143                 if (local == t->parms.iph.saddr &&
 144                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 145                         return t;
 146         }
 147         for (t = tunnels_r[h0]; t; t = t->next) {
 148                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 149                         return t;
 150         }
 151         for (t = tunnels_l[h1]; t; t = t->next) {
 152                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 153                         return t;
 154         }
 155         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 156                 return t;
 157         return NULL;
 158 }
 159
 160 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 161 {
 162         __be32 remote = t->parms.iph.daddr;
 163         __be32 local = t->parms.iph.saddr;
 164         unsigned h = 0;
 165         int prio = 0;
 166
 167         if (remote) {
 168                 prio |= 2;
 169                 h ^= HASH(remote);
 170         }
 171         if (local) {
 172                 prio |= 1;
 173                 h ^= HASH(local);
 174         }
 175         return &tunnels[prio][h];
 176 }
 177
 178
 179 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 180 {
 181         struct ip_tunnel **tp;
 182
 183         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 184                 if (t == *tp) {
 185                         write_lock_bh(&ipip_lock);
 186                         *tp = t->next;
 187                         write_unlock_bh(&ipip_lock);
 188                         break;
 189                 }
 190         }
 191 }
 192
 193 static void ipip_tunnel_link(struct ip_tunnel *t)
 194 {
 195         struct ip_tunnel **tp = ipip_bucket(t);
 196
 197         t->next = *tp;
 198         write_lock_bh(&ipip_lock);
 199         *tp = t;
 200         write_unlock_bh(&ipip_lock);
 201 }
 202
 203 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 204 {
 205         __be32 remote = parms->iph.daddr;
 206         __be32 local = parms->iph.saddr;
 207         struct ip_tunnel *t, **tp, *nt;
 208         struct net_device *dev;
 209         unsigned h = 0;
 210         int prio = 0;
 211         char name[IFNAMSIZ];
 212
 213         if (remote) {
 214                 prio |= 2;
 215                 h ^= HASH(remote);
 216         }
 217         if (local) {
 218                 prio |= 1;
 219                 h ^= HASH(local);
 220         }
 221         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 222                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 223                         return t;
 224         }
 225         if (!create)
 226                 return NULL;
 227
 228         if (parms->name[0])
 229                 strlcpy(name, parms->name, IFNAMSIZ);
 230         else {
 231                 int i;
 232                 for (i=1; i<100; i++) {
 233                         sprintf(name, "tunl%d", i);
 234                         if (__dev_get_by_name(name) == NULL)
 235                                 break;
 236                 }
 237                 if (i==100)
 238                         goto failed;
 239         }
 240
 241         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 242         if (dev == NULL)
 243                 return NULL;
 244
 245         nt = netdev_priv(dev);
 246         SET_MODULE_OWNER(dev);
 247         dev->init = ipip_tunnel_init;
 248         nt->parms = *parms;
 249
 250         if (register_netdevice(dev) < 0) {
 251                 free_netdev(dev);
 252                 goto failed;
 253         }
 254
 255         dev_hold(dev);
 256         ipip_tunnel_link(nt);
 257         return nt;
 258
 259 failed:
 260         return NULL;
 261 }
 262
 263 static void ipip_tunnel_uninit(struct net_device *dev)
 264 {
 265         if (dev == ipip_fb_tunnel_dev) {
 266                 write_lock_bh(&ipip_lock);
 267                 tunnels_wc[0] = NULL;
 268                 write_unlock_bh(&ipip_lock);
 269         } else
 270                 ipip_tunnel_unlink(netdev_priv(dev));
 271         dev_put(dev);
 272 }
 273
 274 static int ipip_err(struct sk_buff *skb, u32 info)
 275 {
 276 #ifndef I_WISH_WORLD_WERE_PERFECT
 277
 278 /* It is not :-( All the routers (except for Linux) return only
 279    8 bytes of packet payload. It means, that precise relaying of
 280    ICMP in the real Internet is absolutely infeasible.
 281  */
 282         struct iphdr *iph = (struct iphdr*)skb->data;
 283         int type = skb->h.icmph->type;
 284         int code = skb->h.icmph->code;
 285         struct ip_tunnel *t;
 286         int err;
 287
 288         switch (type) {
 289         default:
 290         case ICMP_PARAMETERPROB:
 291                 return 0;
 292
 293         case ICMP_DEST_UNREACH:
 294                 switch (code) {
 295                 case ICMP_SR_FAILED:
 296                 case ICMP_PORT_UNREACH:
 297                         /* Impossible event. */
 298                         return 0;
 299                 case ICMP_FRAG_NEEDED:
 300                         /* Soft state for pmtu is maintained by IP core. */
 301                         return 0;
 302                 default:
 303                         /* All others are translated to HOST_UNREACH.
 304                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 305                            I believe they are just ether pollution. --ANK
 306                          */
 307                         break;
 308                 }
 309                 break;
 310         case ICMP_TIME_EXCEEDED:
 311                 if (code != ICMP_EXC_TTL)
 312                         return 0;
 313                 break;
 314         }
 315
 316         err = -ENOENT;
 317
 318         read_lock(&ipip_lock);
 319         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 320         if (t == NULL || t->parms.iph.daddr == 0)
 321                 goto out;
 322
 323         err = 0;
 324         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 325                 goto out;
 326
 327         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 328                 t->err_count++;
 329         else
 330                 t->err_count = 1;
 331         t->err_time = jiffies;
 332 out:
 333         read_unlock(&ipip_lock);
 334         return err;
 335 #else
 336         struct iphdr *iph = (struct iphdr*)dp;
 337         int hlen = iph->ihl<<2;
 338         struct iphdr *eiph;
 339         int type = skb->h.icmph->type;
 340         int code = skb->h.icmph->code;
 341         int rel_type = 0;
 342         int rel_code = 0;
 343         __be32 rel_info = 0;
 344         __u32 n = 0;
 345         struct sk_buff *skb2;
 346         struct flowi fl;
 347         struct rtable *rt;
 348
 349         if (len < hlen + sizeof(struct iphdr))
 350                 return 0;
 351         eiph = (struct iphdr*)(dp + hlen);
 352
 353         switch (type) {
 354         default:
 355                 return 0;
 356         case ICMP_PARAMETERPROB:
 357                 n = ntohl(skb->h.icmph->un.gateway) >> 24;
 358                 if (n < hlen)
 359                         return 0;
 360
 361                 /* So... This guy found something strange INSIDE encapsulated
 362                    packet. Well, he is fool, but what can we do ?
 363                  */
 364                 rel_type = ICMP_PARAMETERPROB;
 365                 rel_info = htonl((n - hlen) << 24);
 366                 break;
 367
 368         case ICMP_DEST_UNREACH:
 369                 switch (code) {
 370                 case ICMP_SR_FAILED:
 371                 case ICMP_PORT_UNREACH:
 372                         /* Impossible event. */
 373                         return 0;
 374                 case ICMP_FRAG_NEEDED:
 375                         /* And it is the only really necessary thing :-) */
 376                         n = ntohs(skb->h.icmph->un.frag.mtu);
 377                         if (n < hlen+68)
 378                                 return 0;
 379                         n -= hlen;
 380                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 381                         if (n > ntohs(eiph->tot_len))
 382                                 return 0;
 383                         rel_info = htonl(n);
 384                         break;
 385                 default:
 386                         /* All others are translated to HOST_UNREACH.
 387                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 388                            I believe, it is just ether pollution. --ANK
 389                          */
 390                         rel_type = ICMP_DEST_UNREACH;
 391                         rel_code = ICMP_HOST_UNREACH;
 392                         break;
 393                 }
 394                 break;
 395         case ICMP_TIME_EXCEEDED:
 396                 if (code != ICMP_EXC_TTL)
 397                         return 0;
 398                 break;
 399         }
 400
 401         /* Prepare fake skb to feed it to icmp_send */
 402         skb2 = skb_clone(skb, GFP_ATOMIC);
 403         if (skb2 == NULL)
 404                 return 0;
 405         dst_release(skb2->dst);
 406         skb2->dst = NULL;
 407         skb_pull(skb2, skb->data - (u8*)eiph);
 408         skb2->nh.raw = skb2->data;
 409
 410         /* Try to guess incoming interface */
 411         memset(&fl, 0, sizeof(fl));
 412         fl.fl4_daddr = eiph->saddr;
 413         fl.fl4_tos = RT_TOS(eiph->tos);
 414         fl.proto = IPPROTO_IPIP;
 415         if (ip_route_output_key(&rt, &key)) {
 416                 kfree_skb(skb2);
 417                 return 0;
 418         }
 419         skb2->dev = rt->u.dst.dev;
 420
 421         /* route "incoming" packet */
 422         if (rt->rt_flags&RTCF_LOCAL) {
 423                 ip_rt_put(rt);
 424                 rt = NULL;
 425                 fl.fl4_daddr = eiph->daddr;
 426                 fl.fl4_src = eiph->saddr;
 427                 fl.fl4_tos = eiph->tos;
 428                 if (ip_route_output_key(&rt, &fl) ||
 429                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 430                         ip_rt_put(rt);
 431                         kfree_skb(skb2);
 432                         return 0;
 433                 }
 434         } else {
 435                 ip_rt_put(rt);
 436                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 437                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 438                         kfree_skb(skb2);
 439                         return 0;
 440                 }
 441         }
 442
 443         /* change mtu on this route */
 444         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 445                 if (n > dst_mtu(skb2->dst)) {
 446                         kfree_skb(skb2);
 447                         return 0;
 448                 }
 449                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 450         } else if (type == ICMP_TIME_EXCEEDED) {
 451                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 452                 if (t->parms.iph.ttl) {
 453                         rel_type = ICMP_DEST_UNREACH;
 454                         rel_code = ICMP_HOST_UNREACH;
 455                 }
 456         }
 457
 458         icmp_send(skb2, rel_type, rel_code, rel_info);
 459         kfree_skb(skb2);
 460         return 0;
 461 #endif
 462 }
 463
 464 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
 465 {
 466         struct iphdr *inner_iph = skb->nh.iph;
 467
 468         if (INET_ECN_is_ce(outer_iph->tos))
 469                 IP_ECN_set_ce(inner_iph);
 470 }
 471
 472 static int ipip_rcv(struct sk_buff *skb)
 473 {
 474         struct iphdr *iph;
 475         struct ip_tunnel *tunnel;
 476
 477         iph = skb->nh.iph;
 478
 479         read_lock(&ipip_lock);
 480         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 481                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 482                         read_unlock(&ipip_lock);
 483                         kfree_skb(skb);
 484                         return 0;
 485                 }
 486
 487                 secpath_reset(skb);
 488
 489                 skb->mac.raw = skb->nh.raw;
 490                 skb->nh.raw = skb->data;
 491                 skb->protocol = htons(ETH_P_IP);
 492                 skb->pkt_type = PACKET_HOST;
 493
 494                 tunnel->stat.rx_packets++;
 495                 tunnel->stat.rx_bytes += skb->len;
 496                 skb->dev = tunnel->dev;
 497                 dst_release(skb->dst);
 498                 skb->dst = NULL;
 499                 nf_reset(skb);
 500                 ipip_ecn_decapsulate(iph, skb);
 501                 netif_rx(skb);
 502                 read_unlock(&ipip_lock);
 503                 return 0;
 504         }
 505         read_unlock(&ipip_lock);
 506
 507         return -1;
 508 }
 509
 510 /*
 511  *      This function assumes it is being called from dev_queue_xmit()
 512  *      and that skb is filled properly by that function.
 513  */
 514
 515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 516 {
 517         struct ip_tunnel *tunnel = netdev_priv(dev);
 518         struct net_device_stats *stats = &tunnel->stat;
 519         struct iphdr  *tiph = &tunnel->parms.iph;
 520         u8     tos = tunnel->parms.iph.tos;
 521         __be16 df = tiph->frag_off;
 522         struct rtable *rt;                      /* Route to the other host */
 523         struct net_device *tdev;                        /* Device to other host */
 524         struct iphdr  *old_iph = skb->nh.iph;
 525         struct iphdr  *iph;                     /* Our new IP header */
 526         int    max_headroom;                    /* The extra header space needed */
 527         __be32 dst = tiph->daddr;
 528         int    mtu;
 529
 530         if (tunnel->recursion++) {
 531                 tunnel->stat.collisions++;
 532                 goto tx_error;
 533         }
 534
 535         if (skb->protocol != htons(ETH_P_IP))
 536                 goto tx_error;
 537
 538         if (tos&1)
 539                 tos = old_iph->tos;
 540
 541         if (!dst) {
 542                 /* NBMA tunnel */
 543                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 544                         tunnel->stat.tx_fifo_errors++;
 545                         goto tx_error;
 546                 }
 547                 if ((dst = rt->rt_gateway) == 0)
 548                         goto tx_error_icmp;
 549         }
 550
 551         {
 552                 struct flowi fl = { .oif = tunnel->parms.link,
 553                                     .nl_u = { .ip4_u =
 554                                               { .daddr = dst,
 555                                                 .saddr = tiph->saddr,
 556                                                 .tos = RT_TOS(tos) } },
 557                                     .proto = IPPROTO_IPIP };
 558                 if (ip_route_output_key(&rt, &fl)) {
 559                         tunnel->stat.tx_carrier_errors++;
 560                         goto tx_error_icmp;
 561                 }
 562         }
 563         tdev = rt->u.dst.dev;
 564
 565         if (tdev == dev) {
 566                 ip_rt_put(rt);
 567                 tunnel->stat.collisions++;
 568                 goto tx_error;
 569         }
 570
 571         if (tiph->frag_off)
 572                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 573         else
 574                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 575
 576         if (mtu < 68) {
 577                 tunnel->stat.collisions++;
 578                 ip_rt_put(rt);
 579                 goto tx_error;
 580         }
 581         if (skb->dst)
 582                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 583
 584         df |= (old_iph->frag_off&htons(IP_DF));
 585
 586         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 587                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 588                 ip_rt_put(rt);
 589                 goto tx_error;
 590         }
 591
 592         if (tunnel->err_count > 0) {
 593                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 594                         tunnel->err_count--;
 595                         dst_link_failure(skb);
 596                 } else
 597                         tunnel->err_count = 0;
 598         }
 599
 600         /*
 601          * Okay, now see if we can stuff it in the buffer as-is.
 602          */
 603         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 604
 605         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 606                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 607                 if (!new_skb) {
 608                         ip_rt_put(rt);
 609                         stats->tx_dropped++;
 610                         dev_kfree_skb(skb);
 611                         tunnel->recursion--;
 612                         return 0;
 613                 }
 614                 if (skb->sk)
 615                         skb_set_owner_w(new_skb, skb->sk);
 616                 dev_kfree_skb(skb);
 617                 skb = new_skb;
 618                 old_iph = skb->nh.iph;
 619         }
 620
 621         skb->h.raw = skb->nh.raw;
 622         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 623         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 624         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 625                               IPSKB_REROUTED);
 626         dst_release(skb->dst);
 627         skb->dst = &rt->u.dst;
 628
 629         /*
 630          *      Push down and install the IPIP header.
 631          */
 632
 633         iph                     =       skb->nh.iph;
 634         iph->version            =       4;
 635         iph->ihl                =       sizeof(struct iphdr)>>2;
 636         iph->frag_off           =       df;
 637         iph->protocol           =       IPPROTO_IPIP;
 638         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 639         iph->daddr              =       rt->rt_dst;
 640         iph->saddr              =       rt->rt_src;
 641
 642         if ((iph->ttl = tiph->ttl) == 0)
 643                 iph->ttl        =       old_iph->ttl;
 644
 645         nf_reset(skb);
 646
 647         IPTUNNEL_XMIT();
 648         tunnel->recursion--;
 649         return 0;
 650
 651 tx_error_icmp:
 652         dst_link_failure(skb);
 653 tx_error:
 654         stats->tx_errors++;
 655         dev_kfree_skb(skb);
 656         tunnel->recursion--;
 657         return 0;
 658 }
 659
 660 static int
 661 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 662 {
 663         int err = 0;
 664         struct ip_tunnel_parm p;
 665         struct ip_tunnel *t;
 666
 667         switch (cmd) {
 668         case SIOCGETTUNNEL:
 669                 t = NULL;
 670                 if (dev == ipip_fb_tunnel_dev) {
 671                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 672                                 err = -EFAULT;
 673                                 break;
 674                         }
 675                         t = ipip_tunnel_locate(&p, 0);
 676                 }
 677                 if (t == NULL)
 678                         t = netdev_priv(dev);
 679                 memcpy(&p, &t->parms, sizeof(p));
 680                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 681                         err = -EFAULT;
 682                 break;
 683
 684         case SIOCADDTUNNEL:
 685         case SIOCCHGTUNNEL:
 686                 err = -EPERM;
 687                 if (!capable(CAP_NET_ADMIN))
 688                         goto done;
 689
 690                 err = -EFAULT;
 691                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 692                         goto done;
 693
 694                 err = -EINVAL;
 695                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 696                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 697                         goto done;
 698                 if (p.iph.ttl)
 699                         p.iph.frag_off |= htons(IP_DF);
 700
 701                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 702
 703                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 704                         if (t != NULL) {
 705                                 if (t->dev != dev) {
 706                                         err = -EEXIST;
 707                                         break;
 708                                 }
 709                         } else {
 710                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 711                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 712                                         err = -EINVAL;
 713                                         break;
 714                                 }
 715                                 t = netdev_priv(dev);
 716                                 ipip_tunnel_unlink(t);
 717                                 t->parms.iph.saddr = p.iph.saddr;
 718                                 t->parms.iph.daddr = p.iph.daddr;
 719                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 720                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 721                                 ipip_tunnel_link(t);
 722                                 netdev_state_change(dev);
 723                         }
 724                 }
 725
 726                 if (t) {
 727                         err = 0;
 728                         if (cmd == SIOCCHGTUNNEL) {
 729                                 t->parms.iph.ttl = p.iph.ttl;
 730                                 t->parms.iph.tos = p.iph.tos;
 731                                 t->parms.iph.frag_off = p.iph.frag_off;
 732                         }
 733                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 734                                 err = -EFAULT;
 735                 } else
 736                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 737                 break;
 738
 739         case SIOCDELTUNNEL:
 740                 err = -EPERM;
 741                 if (!capable(CAP_NET_ADMIN))
 742                         goto done;
 743
 744                 if (dev == ipip_fb_tunnel_dev) {
 745                         err = -EFAULT;
 746                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 747                                 goto done;
 748                         err = -ENOENT;
 749                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 750                                 goto done;
 751                         err = -EPERM;
 752                         if (t->dev == ipip_fb_tunnel_dev)
 753                                 goto done;
 754                         dev = t->dev;
 755                 }
 756                 unregister_netdevice(dev);
 757                 err = 0;
 758                 break;
 759
 760         default:
 761                 err = -EINVAL;
 762         }
 763
 764 done:
 765         return err;
 766 }
 767
 768 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 769 {
 770         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 771 }
 772
 773 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 774 {
 775         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 776                 return -EINVAL;
 777         dev->mtu = new_mtu;
 778         return 0;
 779 }
 780
 781 static void ipip_tunnel_setup(struct net_device *dev)
 782 {
 783         SET_MODULE_OWNER(dev);
 784         dev->uninit             = ipip_tunnel_uninit;
 785         dev->hard_start_xmit    = ipip_tunnel_xmit;
 786         dev->get_stats          = ipip_tunnel_get_stats;
 787         dev->do_ioctl           = ipip_tunnel_ioctl;
 788         dev->change_mtu         = ipip_tunnel_change_mtu;
 789         dev->destructor         = free_netdev;
 790
 791         dev->type               = ARPHRD_TUNNEL;
 792         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 793         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 794         dev->flags              = IFF_NOARP;
 795         dev->iflink             = 0;
 796         dev->addr_len           = 4;
 797 }
 798
 799 static int ipip_tunnel_init(struct net_device *dev)
 800 {
 801         struct net_device *tdev = NULL;
 802         struct ip_tunnel *tunnel;
 803         struct iphdr *iph;
 804
 805         tunnel = netdev_priv(dev);
 806         iph = &tunnel->parms.iph;
 807
 808         tunnel->dev = dev;
 809         strcpy(tunnel->parms.name, dev->name);
 810
 811         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 812         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 813
 814         if (iph->daddr) {
 815                 struct flowi fl = { .oif = tunnel->parms.link,
 816                                     .nl_u = { .ip4_u =
 817                                               { .daddr = iph->daddr,
 818                                                 .saddr = iph->saddr,
 819                                                 .tos = RT_TOS(iph->tos) } },
 820                                     .proto = IPPROTO_IPIP };
 821                 struct rtable *rt;
 822                 if (!ip_route_output_key(&rt, &fl)) {
 823                         tdev = rt->u.dst.dev;
 824                         ip_rt_put(rt);
 825                 }
 826                 dev->flags |= IFF_POINTOPOINT;
 827         }
 828
 829         if (!tdev && tunnel->parms.link)
 830                 tdev = __dev_get_by_index(tunnel->parms.link);
 831
 832         if (tdev) {
 833                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 834                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 835         }
 836         dev->iflink = tunnel->parms.link;
 837
 838         return 0;
 839 }
 840
 841 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 842 {
 843         struct ip_tunnel *tunnel = netdev_priv(dev);
 844         struct iphdr *iph = &tunnel->parms.iph;
 845
 846         tunnel->dev = dev;
 847         strcpy(tunnel->parms.name, dev->name);
 848
 849         iph->version            = 4;
 850         iph->protocol           = IPPROTO_IPIP;
 851         iph->ihl                = 5;
 852
 853         dev_hold(dev);
 854         tunnels_wc[0]           = tunnel;
 855         return 0;
 856 }
 857
 858 static struct xfrm_tunnel ipip_handler = {
 859         .handler        =       ipip_rcv,
 860         .err_handler    =       ipip_err,
 861         .priority       =       1,
 862 };
 863
 864 static char banner[] __initdata =
 865         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 866
 867 static int __init ipip_init(void)
 868 {
 869         int err;
 870
 871         printk(banner);
 872
 873         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
 874                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 875                 return -EAGAIN;
 876         }
 877
 878         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 879                                            "tunl0",
 880                                            ipip_tunnel_setup);
 881         if (!ipip_fb_tunnel_dev) {
 882                 err = -ENOMEM;
 883                 goto err1;
 884         }
 885
 886         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 887
 888         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 889                 goto err2;
 890  out:
 891         return err;
 892  err2:
 893         free_netdev(ipip_fb_tunnel_dev);
 894  err1:
 895         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 896         goto out;
 897 }
 898
 899 static void __exit ipip_destroy_tunnels(void)
 900 {
 901         int prio;
 902
 903         for (prio = 1; prio < 4; prio++) {
 904                 int h;
 905                 for (h = 0; h < HASH_SIZE; h++) {
 906                         struct ip_tunnel *t;
 907                         while ((t = tunnels[prio][h]) != NULL)
 908                                 unregister_netdevice(t->dev);
 909                 }
 910         }
 911 }
 912
 913 static void __exit ipip_fini(void)
 914 {
 915         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 916                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 917
 918         rtnl_lock();
 919         ipip_destroy_tunnels();
 920         unregister_netdevice(ipip_fb_tunnel_dev);
 921         rtnl_unlock();
 922 }
 923
 924 module_init(ipip_init);
 925 module_exit(ipip_fini);
 926 MODULE_LICENSE("GPL");