net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/sched.h>
 100 #include <linux/kernel.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <linux/in.h>
 105 #include <linux/tcp.h>
 106 #include <linux/udp.h>
 107 #include <linux/if_arp.h>
 108 #include <linux/mroute.h>
 109 #include <linux/init.h>
 110 #include <linux/netfilter_ipv4.h>
 111 #include <linux/if_ether.h>
 112
 113 #include <net/sock.h>
 114 #include <net/ip.h>
 115 #include <net/icmp.h>
 116 #include <net/ipip.h>
 117 #include <net/inet_ecn.h>
 118 #include <net/xfrm.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123 static int ipip_fb_tunnel_init(struct net_device *dev);
 124 static int ipip_tunnel_init(struct net_device *dev);
 125 static void ipip_tunnel_setup(struct net_device *dev);
 126
 127 static struct net_device *ipip_fb_tunnel_dev;
 128
 129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 132 static struct ip_tunnel *tunnels_wc[1];
 133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 134
 135 static DEFINE_RWLOCK(ipip_lock);
 136
 137 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 138 {
 139         unsigned h0 = HASH(remote);
 140         unsigned h1 = HASH(local);
 141         struct ip_tunnel *t;
 142
 143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 144                 if (local == t->parms.iph.saddr &&
 145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 146                         return t;
 147         }
 148         for (t = tunnels_r[h0]; t; t = t->next) {
 149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 150                         return t;
 151         }
 152         for (t = tunnels_l[h1]; t; t = t->next) {
 153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 154                         return t;
 155         }
 156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 157                 return t;
 158         return NULL;
 159 }
 160
 161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 162 {
 163         __be32 remote = t->parms.iph.daddr;
 164         __be32 local = t->parms.iph.saddr;
 165         unsigned h = 0;
 166         int prio = 0;
 167
 168         if (remote) {
 169                 prio |= 2;
 170                 h ^= HASH(remote);
 171         }
 172         if (local) {
 173                 prio |= 1;
 174                 h ^= HASH(local);
 175         }
 176         return &tunnels[prio][h];
 177 }
 178
 179
 180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 181 {
 182         struct ip_tunnel **tp;
 183
 184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 185                 if (t == *tp) {
 186                         write_lock_bh(&ipip_lock);
 187                         *tp = t->next;
 188                         write_unlock_bh(&ipip_lock);
 189                         break;
 190                 }
 191         }
 192 }
 193
 194 static void ipip_tunnel_link(struct ip_tunnel *t)
 195 {
 196         struct ip_tunnel **tp = ipip_bucket(t);
 197
 198         t->next = *tp;
 199         write_lock_bh(&ipip_lock);
 200         *tp = t;
 201         write_unlock_bh(&ipip_lock);
 202 }
 203
 204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 205 {
 206         __be32 remote = parms->iph.daddr;
 207         __be32 local = parms->iph.saddr;
 208         struct ip_tunnel *t, **tp, *nt;
 209         struct net_device *dev;
 210         unsigned h = 0;
 211         int prio = 0;
 212         char name[IFNAMSIZ];
 213
 214         if (remote) {
 215                 prio |= 2;
 216                 h ^= HASH(remote);
 217         }
 218         if (local) {
 219                 prio |= 1;
 220                 h ^= HASH(local);
 221         }
 222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 224                         return t;
 225         }
 226         if (!create)
 227                 return NULL;
 228
 229         if (parms->name[0])
 230                 strlcpy(name, parms->name, IFNAMSIZ);
 231         else {
 232                 int i;
 233                 for (i=1; i<100; i++) {
 234                         sprintf(name, "tunl%d", i);
 235                         if (__dev_get_by_name(name) == NULL)
 236                                 break;
 237                 }
 238                 if (i==100)
 239                         goto failed;
 240         }
 241
 242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 243         if (dev == NULL)
 244                 return NULL;
 245
 246         nt = netdev_priv(dev);
 247         SET_MODULE_OWNER(dev);
 248         dev->init = ipip_tunnel_init;
 249         nt->parms = *parms;
 250
 251         if (register_netdevice(dev) < 0) {
 252                 free_netdev(dev);
 253                 goto failed;
 254         }
 255
 256         dev_hold(dev);
 257         ipip_tunnel_link(nt);
 258         return nt;
 259
 260 failed:
 261         return NULL;
 262 }
 263
 264 static void ipip_tunnel_uninit(struct net_device *dev)
 265 {
 266         if (dev == ipip_fb_tunnel_dev) {
 267                 write_lock_bh(&ipip_lock);
 268                 tunnels_wc[0] = NULL;
 269                 write_unlock_bh(&ipip_lock);
 270         } else
 271                 ipip_tunnel_unlink(netdev_priv(dev));
 272         dev_put(dev);
 273 }
 274
 275 static int ipip_err(struct sk_buff *skb, u32 info)
 276 {
 277 #ifndef I_WISH_WORLD_WERE_PERFECT
 278
 279 /* It is not :-( All the routers (except for Linux) return only
 280    8 bytes of packet payload. It means, that precise relaying of
 281    ICMP in the real Internet is absolutely infeasible.
 282  */
 283         struct iphdr *iph = (struct iphdr*)skb->data;
 284         int type = skb->h.icmph->type;
 285         int code = skb->h.icmph->code;
 286         struct ip_tunnel *t;
 287         int err;
 288
 289         switch (type) {
 290         default:
 291         case ICMP_PARAMETERPROB:
 292                 return 0;
 293
 294         case ICMP_DEST_UNREACH:
 295                 switch (code) {
 296                 case ICMP_SR_FAILED:
 297                 case ICMP_PORT_UNREACH:
 298                         /* Impossible event. */
 299                         return 0;
 300                 case ICMP_FRAG_NEEDED:
 301                         /* Soft state for pmtu is maintained by IP core. */
 302                         return 0;
 303                 default:
 304                         /* All others are translated to HOST_UNREACH.
 305                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 306                            I believe they are just ether pollution. --ANK
 307                          */
 308                         break;
 309                 }
 310                 break;
 311         case ICMP_TIME_EXCEEDED:
 312                 if (code != ICMP_EXC_TTL)
 313                         return 0;
 314                 break;
 315         }
 316
 317         err = -ENOENT;
 318
 319         read_lock(&ipip_lock);
 320         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 321         if (t == NULL || t->parms.iph.daddr == 0)
 322                 goto out;
 323
 324         err = 0;
 325         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 326                 goto out;
 327
 328         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 329                 t->err_count++;
 330         else
 331                 t->err_count = 1;
 332         t->err_time = jiffies;
 333 out:
 334         read_unlock(&ipip_lock);
 335         return err;
 336 #else
 337         struct iphdr *iph = (struct iphdr*)dp;
 338         int hlen = iph->ihl<<2;
 339         struct iphdr *eiph;
 340         int type = skb->h.icmph->type;
 341         int code = skb->h.icmph->code;
 342         int rel_type = 0;
 343         int rel_code = 0;
 344         __be32 rel_info = 0;
 345         __u32 n = 0;
 346         struct sk_buff *skb2;
 347         struct flowi fl;
 348         struct rtable *rt;
 349
 350         if (len < hlen + sizeof(struct iphdr))
 351                 return 0;
 352         eiph = (struct iphdr*)(dp + hlen);
 353
 354         switch (type) {
 355         default:
 356                 return 0;
 357         case ICMP_PARAMETERPROB:
 358                 n = ntohl(skb->h.icmph->un.gateway) >> 24;
 359                 if (n < hlen)
 360                         return 0;
 361
 362                 /* So... This guy found something strange INSIDE encapsulated
 363                    packet. Well, he is fool, but what can we do ?
 364                  */
 365                 rel_type = ICMP_PARAMETERPROB;
 366                 rel_info = htonl((n - hlen) << 24);
 367                 break;
 368
 369         case ICMP_DEST_UNREACH:
 370                 switch (code) {
 371                 case ICMP_SR_FAILED:
 372                 case ICMP_PORT_UNREACH:
 373                         /* Impossible event. */
 374                         return 0;
 375                 case ICMP_FRAG_NEEDED:
 376                         /* And it is the only really necessary thing :-) */
 377                         n = ntohs(skb->h.icmph->un.frag.mtu);
 378                         if (n < hlen+68)
 379                                 return 0;
 380                         n -= hlen;
 381                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 382                         if (n > ntohs(eiph->tot_len))
 383                                 return 0;
 384                         rel_info = htonl(n);
 385                         break;
 386                 default:
 387                         /* All others are translated to HOST_UNREACH.
 388                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 389                            I believe, it is just ether pollution. --ANK
 390                          */
 391                         rel_type = ICMP_DEST_UNREACH;
 392                         rel_code = ICMP_HOST_UNREACH;
 393                         break;
 394                 }
 395                 break;
 396         case ICMP_TIME_EXCEEDED:
 397                 if (code != ICMP_EXC_TTL)
 398                         return 0;
 399                 break;
 400         }
 401
 402         /* Prepare fake skb to feed it to icmp_send */
 403         skb2 = skb_clone(skb, GFP_ATOMIC);
 404         if (skb2 == NULL)
 405                 return 0;
 406         dst_release(skb2->dst);
 407         skb2->dst = NULL;
 408         skb_pull(skb2, skb->data - (u8*)eiph);
 409         skb2->nh.raw = skb2->data;
 410
 411         /* Try to guess incoming interface */
 412         memset(&fl, 0, sizeof(fl));
 413         fl.fl4_daddr = eiph->saddr;
 414         fl.fl4_tos = RT_TOS(eiph->tos);
 415         fl.proto = IPPROTO_IPIP;
 416         if (ip_route_output_key(&rt, &key)) {
 417                 kfree_skb(skb2);
 418                 return 0;
 419         }
 420         skb2->dev = rt->u.dst.dev;
 421
 422         /* route "incoming" packet */
 423         if (rt->rt_flags&RTCF_LOCAL) {
 424                 ip_rt_put(rt);
 425                 rt = NULL;
 426                 fl.fl4_daddr = eiph->daddr;
 427                 fl.fl4_src = eiph->saddr;
 428                 fl.fl4_tos = eiph->tos;
 429                 if (ip_route_output_key(&rt, &fl) ||
 430                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 431                         ip_rt_put(rt);
 432                         kfree_skb(skb2);
 433                         return 0;
 434                 }
 435         } else {
 436                 ip_rt_put(rt);
 437                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 438                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 439                         kfree_skb(skb2);
 440                         return 0;
 441                 }
 442         }
 443
 444         /* change mtu on this route */
 445         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 446                 if (n > dst_mtu(skb2->dst)) {
 447                         kfree_skb(skb2);
 448                         return 0;
 449                 }
 450                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 451         } else if (type == ICMP_TIME_EXCEEDED) {
 452                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 453                 if (t->parms.iph.ttl) {
 454                         rel_type = ICMP_DEST_UNREACH;
 455                         rel_code = ICMP_HOST_UNREACH;
 456                 }
 457         }
 458
 459         icmp_send(skb2, rel_type, rel_code, rel_info);
 460         kfree_skb(skb2);
 461         return 0;
 462 #endif
 463 }
 464
 465 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
 466 {
 467         struct iphdr *inner_iph = skb->nh.iph;
 468
 469         if (INET_ECN_is_ce(outer_iph->tos))
 470                 IP_ECN_set_ce(inner_iph);
 471 }
 472
 473 static int ipip_rcv(struct sk_buff *skb)
 474 {
 475         struct iphdr *iph;
 476         struct ip_tunnel *tunnel;
 477
 478         iph = skb->nh.iph;
 479
 480         read_lock(&ipip_lock);
 481         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 482                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 483                         read_unlock(&ipip_lock);
 484                         kfree_skb(skb);
 485                         return 0;
 486                 }
 487
 488                 secpath_reset(skb);
 489
 490                 skb->mac.raw = skb->nh.raw;
 491                 skb->nh.raw = skb->data;
 492                 skb->protocol = htons(ETH_P_IP);
 493                 skb->pkt_type = PACKET_HOST;
 494
 495                 tunnel->stat.rx_packets++;
 496                 tunnel->stat.rx_bytes += skb->len;
 497                 skb->dev = tunnel->dev;
 498                 dst_release(skb->dst);
 499                 skb->dst = NULL;
 500                 nf_reset(skb);
 501                 ipip_ecn_decapsulate(iph, skb);
 502                 netif_rx(skb);
 503                 read_unlock(&ipip_lock);
 504                 return 0;
 505         }
 506         read_unlock(&ipip_lock);
 507
 508         return -1;
 509 }
 510
 511 /*
 512  *      This function assumes it is being called from dev_queue_xmit()
 513  *      and that skb is filled properly by that function.
 514  */
 515
 516 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 517 {
 518         struct ip_tunnel *tunnel = netdev_priv(dev);
 519         struct net_device_stats *stats = &tunnel->stat;
 520         struct iphdr  *tiph = &tunnel->parms.iph;
 521         u8     tos = tunnel->parms.iph.tos;
 522         __be16 df = tiph->frag_off;
 523         struct rtable *rt;                      /* Route to the other host */
 524         struct net_device *tdev;                        /* Device to other host */
 525         struct iphdr  *old_iph = skb->nh.iph;
 526         struct iphdr  *iph;                     /* Our new IP header */
 527         int    max_headroom;                    /* The extra header space needed */
 528         __be32 dst = tiph->daddr;
 529         int    mtu;
 530
 531         if (tunnel->recursion++) {
 532                 tunnel->stat.collisions++;
 533                 goto tx_error;
 534         }
 535
 536         if (skb->protocol != htons(ETH_P_IP))
 537                 goto tx_error;
 538
 539         if (tos&1)
 540                 tos = old_iph->tos;
 541
 542         if (!dst) {
 543                 /* NBMA tunnel */
 544                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 545                         tunnel->stat.tx_fifo_errors++;
 546                         goto tx_error;
 547                 }
 548                 if ((dst = rt->rt_gateway) == 0)
 549                         goto tx_error_icmp;
 550         }
 551
 552         {
 553                 struct flowi fl = { .oif = tunnel->parms.link,
 554                                     .nl_u = { .ip4_u =
 555                                               { .daddr = dst,
 556                                                 .saddr = tiph->saddr,
 557                                                 .tos = RT_TOS(tos) } },
 558                                     .proto = IPPROTO_IPIP };
 559                 if (ip_route_output_key(&rt, &fl)) {
 560                         tunnel->stat.tx_carrier_errors++;
 561                         goto tx_error_icmp;
 562                 }
 563         }
 564         tdev = rt->u.dst.dev;
 565
 566         if (tdev == dev) {
 567                 ip_rt_put(rt);
 568                 tunnel->stat.collisions++;
 569                 goto tx_error;
 570         }
 571
 572         if (tiph->frag_off)
 573                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 574         else
 575                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 576
 577         if (mtu < 68) {
 578                 tunnel->stat.collisions++;
 579                 ip_rt_put(rt);
 580                 goto tx_error;
 581         }
 582         if (skb->dst)
 583                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 584
 585         df |= (old_iph->frag_off&htons(IP_DF));
 586
 587         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 588                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 589                 ip_rt_put(rt);
 590                 goto tx_error;
 591         }
 592
 593         if (tunnel->err_count > 0) {
 594                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 595                         tunnel->err_count--;
 596                         dst_link_failure(skb);
 597                 } else
 598                         tunnel->err_count = 0;
 599         }
 600
 601         /*
 602          * Okay, now see if we can stuff it in the buffer as-is.
 603          */
 604         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 605
 606         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 607                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 608                 if (!new_skb) {
 609                         ip_rt_put(rt);
 610                         stats->tx_dropped++;
 611                         dev_kfree_skb(skb);
 612                         tunnel->recursion--;
 613                         return 0;
 614                 }
 615                 if (skb->sk)
 616                         skb_set_owner_w(new_skb, skb->sk);
 617                 dev_kfree_skb(skb);
 618                 skb = new_skb;
 619                 old_iph = skb->nh.iph;
 620         }
 621
 622         skb->h.raw = skb->nh.raw;
 623         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 624         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 625         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 626                               IPSKB_REROUTED);
 627         dst_release(skb->dst);
 628         skb->dst = &rt->u.dst;
 629
 630         /*
 631          *      Push down and install the IPIP header.
 632          */
 633
 634         iph                     =       skb->nh.iph;
 635         iph->version            =       4;
 636         iph->ihl                =       sizeof(struct iphdr)>>2;
 637         iph->frag_off           =       df;
 638         iph->protocol           =       IPPROTO_IPIP;
 639         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 640         iph->daddr              =       rt->rt_dst;
 641         iph->saddr              =       rt->rt_src;
 642
 643         if ((iph->ttl = tiph->ttl) == 0)
 644                 iph->ttl        =       old_iph->ttl;
 645
 646         nf_reset(skb);
 647
 648         IPTUNNEL_XMIT();
 649         tunnel->recursion--;
 650         return 0;
 651
 652 tx_error_icmp:
 653         dst_link_failure(skb);
 654 tx_error:
 655         stats->tx_errors++;
 656         dev_kfree_skb(skb);
 657         tunnel->recursion--;
 658         return 0;
 659 }
 660
 661 static int
 662 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 663 {
 664         int err = 0;
 665         struct ip_tunnel_parm p;
 666         struct ip_tunnel *t;
 667
 668         switch (cmd) {
 669         case SIOCGETTUNNEL:
 670                 t = NULL;
 671                 if (dev == ipip_fb_tunnel_dev) {
 672                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 673                                 err = -EFAULT;
 674                                 break;
 675                         }
 676                         t = ipip_tunnel_locate(&p, 0);
 677                 }
 678                 if (t == NULL)
 679                         t = netdev_priv(dev);
 680                 memcpy(&p, &t->parms, sizeof(p));
 681                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 682                         err = -EFAULT;
 683                 break;
 684
 685         case SIOCADDTUNNEL:
 686         case SIOCCHGTUNNEL:
 687                 err = -EPERM;
 688                 if (!capable(CAP_NET_ADMIN))
 689                         goto done;
 690
 691                 err = -EFAULT;
 692                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 693                         goto done;
 694
 695                 err = -EINVAL;
 696                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 697                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 698                         goto done;
 699                 if (p.iph.ttl)
 700                         p.iph.frag_off |= htons(IP_DF);
 701
 702                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 703
 704                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 705                         if (t != NULL) {
 706                                 if (t->dev != dev) {
 707                                         err = -EEXIST;
 708                                         break;
 709                                 }
 710                         } else {
 711                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 712                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 713                                         err = -EINVAL;
 714                                         break;
 715                                 }
 716                                 t = netdev_priv(dev);
 717                                 ipip_tunnel_unlink(t);
 718                                 t->parms.iph.saddr = p.iph.saddr;
 719                                 t->parms.iph.daddr = p.iph.daddr;
 720                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 721                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 722                                 ipip_tunnel_link(t);
 723                                 netdev_state_change(dev);
 724                         }
 725                 }
 726
 727                 if (t) {
 728                         err = 0;
 729                         if (cmd == SIOCCHGTUNNEL) {
 730                                 t->parms.iph.ttl = p.iph.ttl;
 731                                 t->parms.iph.tos = p.iph.tos;
 732                                 t->parms.iph.frag_off = p.iph.frag_off;
 733                         }
 734                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 735                                 err = -EFAULT;
 736                 } else
 737                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 738                 break;
 739
 740         case SIOCDELTUNNEL:
 741                 err = -EPERM;
 742                 if (!capable(CAP_NET_ADMIN))
 743                         goto done;
 744
 745                 if (dev == ipip_fb_tunnel_dev) {
 746                         err = -EFAULT;
 747                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 748                                 goto done;
 749                         err = -ENOENT;
 750                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 751                                 goto done;
 752                         err = -EPERM;
 753                         if (t->dev == ipip_fb_tunnel_dev)
 754                                 goto done;
 755                         dev = t->dev;
 756                 }
 757                 unregister_netdevice(dev);
 758                 err = 0;
 759                 break;
 760
 761         default:
 762                 err = -EINVAL;
 763         }
 764
 765 done:
 766         return err;
 767 }
 768
 769 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 770 {
 771         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 772 }
 773
 774 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 775 {
 776         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 777                 return -EINVAL;
 778         dev->mtu = new_mtu;
 779         return 0;
 780 }
 781
 782 static void ipip_tunnel_setup(struct net_device *dev)
 783 {
 784         SET_MODULE_OWNER(dev);
 785         dev->uninit             = ipip_tunnel_uninit;
 786         dev->hard_start_xmit    = ipip_tunnel_xmit;
 787         dev->get_stats          = ipip_tunnel_get_stats;
 788         dev->do_ioctl           = ipip_tunnel_ioctl;
 789         dev->change_mtu         = ipip_tunnel_change_mtu;
 790         dev->destructor         = free_netdev;
 791
 792         dev->type               = ARPHRD_TUNNEL;
 793         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 794         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 795         dev->flags              = IFF_NOARP;
 796         dev->iflink             = 0;
 797         dev->addr_len           = 4;
 798 }
 799
 800 static int ipip_tunnel_init(struct net_device *dev)
 801 {
 802         struct net_device *tdev = NULL;
 803         struct ip_tunnel *tunnel;
 804         struct iphdr *iph;
 805
 806         tunnel = netdev_priv(dev);
 807         iph = &tunnel->parms.iph;
 808
 809         tunnel->dev = dev;
 810         strcpy(tunnel->parms.name, dev->name);
 811
 812         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 813         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 814
 815         if (iph->daddr) {
 816                 struct flowi fl = { .oif = tunnel->parms.link,
 817                                     .nl_u = { .ip4_u =
 818                                               { .daddr = iph->daddr,
 819                                                 .saddr = iph->saddr,
 820                                                 .tos = RT_TOS(iph->tos) } },
 821                                     .proto = IPPROTO_IPIP };
 822                 struct rtable *rt;
 823                 if (!ip_route_output_key(&rt, &fl)) {
 824                         tdev = rt->u.dst.dev;
 825                         ip_rt_put(rt);
 826                 }
 827                 dev->flags |= IFF_POINTOPOINT;
 828         }
 829
 830         if (!tdev && tunnel->parms.link)
 831                 tdev = __dev_get_by_index(tunnel->parms.link);
 832
 833         if (tdev) {
 834                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 835                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 836         }
 837         dev->iflink = tunnel->parms.link;
 838
 839         return 0;
 840 }
 841
 842 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 843 {
 844         struct ip_tunnel *tunnel = netdev_priv(dev);
 845         struct iphdr *iph = &tunnel->parms.iph;
 846
 847         tunnel->dev = dev;
 848         strcpy(tunnel->parms.name, dev->name);
 849
 850         iph->version            = 4;
 851         iph->protocol           = IPPROTO_IPIP;
 852         iph->ihl                = 5;
 853
 854         dev_hold(dev);
 855         tunnels_wc[0]           = tunnel;
 856         return 0;
 857 }
 858
 859 static struct xfrm_tunnel ipip_handler = {
 860         .handler        =       ipip_rcv,
 861         .err_handler    =       ipip_err,
 862         .priority       =       1,
 863 };
 864
 865 static char banner[] __initdata =
 866         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 867
 868 static int __init ipip_init(void)
 869 {
 870         int err;
 871
 872         printk(banner);
 873
 874         if (xfrm4_tunnel_register(&ipip_handler)) {
 875                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 876                 return -EAGAIN;
 877         }
 878
 879         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 880                                            "tunl0",
 881                                            ipip_tunnel_setup);
 882         if (!ipip_fb_tunnel_dev) {
 883                 err = -ENOMEM;
 884                 goto err1;
 885         }
 886
 887         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 888
 889         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 890                 goto err2;
 891  out:
 892         return err;
 893  err2:
 894         free_netdev(ipip_fb_tunnel_dev);
 895  err1:
 896         xfrm4_tunnel_deregister(&ipip_handler);
 897         goto out;
 898 }
 899
 900 static void __exit ipip_destroy_tunnels(void)
 901 {
 902         int prio;
 903
 904         for (prio = 1; prio < 4; prio++) {
 905                 int h;
 906                 for (h = 0; h < HASH_SIZE; h++) {
 907                         struct ip_tunnel *t;
 908                         while ((t = tunnels[prio][h]) != NULL)
 909                                 unregister_netdevice(t->dev);
 910                 }
 911         }
 912 }
 913
 914 static void __exit ipip_fini(void)
 915 {
 916         if (xfrm4_tunnel_deregister(&ipip_handler))
 917                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 918
 919         rtnl_lock();
 920         ipip_destroy_tunnels();
 921         unregister_netdevice(ipip_fb_tunnel_dev);
 922         rtnl_unlock();
 923 }
 924
 925 module_init(ipip_init);
 926 module_exit(ipip_fini);
 927 MODULE_LICENSE("GPL");