net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/config.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/sched.h>
 100 #include <linux/kernel.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <linux/in.h>
 105 #include <linux/tcp.h>
 106 #include <linux/udp.h>
 107 #include <linux/if_arp.h>
 108 #include <linux/mroute.h>
 109 #include <linux/init.h>
 110 #include <linux/netfilter_ipv4.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/protocol.h>
 116 #include <net/ipip.h>
 117 #include <net/inet_ecn.h>
 118 #include <net/xfrm.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) ((addr^(addr>>4))&0xF)
 122
 123 static int ipip_fb_tunnel_init(struct net_device *dev);
 124 static int ipip_tunnel_init(struct net_device *dev);
 125 static void ipip_tunnel_setup(struct net_device *dev);
 126
 127 static struct net_device *ipip_fb_tunnel_dev;
 128
 129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 132 static struct ip_tunnel *tunnels_wc[1];
 133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 134
 135 static DEFINE_RWLOCK(ipip_lock);
 136
 137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
 138 {
 139         unsigned h0 = HASH(remote);
 140         unsigned h1 = HASH(local);
 141         struct ip_tunnel *t;
 142
 143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 144                 if (local == t->parms.iph.saddr &&
 145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 146                         return t;
 147         }
 148         for (t = tunnels_r[h0]; t; t = t->next) {
 149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 150                         return t;
 151         }
 152         for (t = tunnels_l[h1]; t; t = t->next) {
 153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 154                         return t;
 155         }
 156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 157                 return t;
 158         return NULL;
 159 }
 160
 161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 162 {
 163         u32 remote = t->parms.iph.daddr;
 164         u32 local = t->parms.iph.saddr;
 165         unsigned h = 0;
 166         int prio = 0;
 167
 168         if (remote) {
 169                 prio |= 2;
 170                 h ^= HASH(remote);
 171         }
 172         if (local) {
 173                 prio |= 1;
 174                 h ^= HASH(local);
 175         }
 176         return &tunnels[prio][h];
 177 }
 178
 179
 180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 181 {
 182         struct ip_tunnel **tp;
 183
 184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 185                 if (t == *tp) {
 186                         write_lock_bh(&ipip_lock);
 187                         *tp = t->next;
 188                         write_unlock_bh(&ipip_lock);
 189                         break;
 190                 }
 191         }
 192 }
 193
 194 static void ipip_tunnel_link(struct ip_tunnel *t)
 195 {
 196         struct ip_tunnel **tp = ipip_bucket(t);
 197
 198         t->next = *tp;
 199         write_lock_bh(&ipip_lock);
 200         *tp = t;
 201         write_unlock_bh(&ipip_lock);
 202 }
 203
 204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 205 {
 206         u32 remote = parms->iph.daddr;
 207         u32 local = parms->iph.saddr;
 208         struct ip_tunnel *t, **tp, *nt;
 209         struct net_device *dev;
 210         unsigned h = 0;
 211         int prio = 0;
 212         char name[IFNAMSIZ];
 213
 214         if (remote) {
 215                 prio |= 2;
 216                 h ^= HASH(remote);
 217         }
 218         if (local) {
 219                 prio |= 1;
 220                 h ^= HASH(local);
 221         }
 222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 224                         return t;
 225         }
 226         if (!create)
 227                 return NULL;
 228
 229         if (parms->name[0])
 230                 strlcpy(name, parms->name, IFNAMSIZ);
 231         else {
 232                 int i;
 233                 for (i=1; i<100; i++) {
 234                         sprintf(name, "tunl%d", i);
 235                         if (__dev_get_by_name(name) == NULL)
 236                                 break;
 237                 }
 238                 if (i==100)
 239                         goto failed;
 240         }
 241
 242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 243         if (dev == NULL)
 244                 return NULL;
 245
 246         nt = dev->priv;
 247         SET_MODULE_OWNER(dev);
 248         dev->init = ipip_tunnel_init;
 249         nt->parms = *parms;
 250
 251         if (register_netdevice(dev) < 0) {
 252                 free_netdev(dev);
 253                 goto failed;
 254         }
 255
 256         dev_hold(dev);
 257         ipip_tunnel_link(nt);
 258         return nt;
 259
 260 failed:
 261         return NULL;
 262 }
 263
 264 static void ipip_tunnel_uninit(struct net_device *dev)
 265 {
 266         if (dev == ipip_fb_tunnel_dev) {
 267                 write_lock_bh(&ipip_lock);
 268                 tunnels_wc[0] = NULL;
 269                 write_unlock_bh(&ipip_lock);
 270         } else
 271                 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
 272         dev_put(dev);
 273 }
 274
 275 static void ipip_err(struct sk_buff *skb, u32 info)
 276 {
 277 #ifndef I_WISH_WORLD_WERE_PERFECT
 278
 279 /* It is not :-( All the routers (except for Linux) return only
 280    8 bytes of packet payload. It means, that precise relaying of
 281    ICMP in the real Internet is absolutely infeasible.
 282  */
 283         struct iphdr *iph = (struct iphdr*)skb->data;
 284         int type = skb->h.icmph->type;
 285         int code = skb->h.icmph->code;
 286         struct ip_tunnel *t;
 287
 288         switch (type) {
 289         default:
 290         case ICMP_PARAMETERPROB:
 291                 return;
 292
 293         case ICMP_DEST_UNREACH:
 294                 switch (code) {
 295                 case ICMP_SR_FAILED:
 296                 case ICMP_PORT_UNREACH:
 297                         /* Impossible event. */
 298                         return;
 299                 case ICMP_FRAG_NEEDED:
 300                         /* Soft state for pmtu is maintained by IP core. */
 301                         return;
 302                 default:
 303                         /* All others are translated to HOST_UNREACH.
 304                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 305                            I believe they are just ether pollution. --ANK
 306                          */
 307                         break;
 308                 }
 309                 break;
 310         case ICMP_TIME_EXCEEDED:
 311                 if (code != ICMP_EXC_TTL)
 312                         return;
 313                 break;
 314         }
 315
 316         read_lock(&ipip_lock);
 317         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 318         if (t == NULL || t->parms.iph.daddr == 0)
 319                 goto out;
 320         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 321                 goto out;
 322
 323         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 324                 t->err_count++;
 325         else
 326                 t->err_count = 1;
 327         t->err_time = jiffies;
 328 out:
 329         read_unlock(&ipip_lock);
 330         return;
 331 #else
 332         struct iphdr *iph = (struct iphdr*)dp;
 333         int hlen = iph->ihl<<2;
 334         struct iphdr *eiph;
 335         int type = skb->h.icmph->type;
 336         int code = skb->h.icmph->code;
 337         int rel_type = 0;
 338         int rel_code = 0;
 339         int rel_info = 0;
 340         struct sk_buff *skb2;
 341         struct flowi fl;
 342         struct rtable *rt;
 343
 344         if (len < hlen + sizeof(struct iphdr))
 345                 return;
 346         eiph = (struct iphdr*)(dp + hlen);
 347
 348         switch (type) {
 349         default:
 350                 return;
 351         case ICMP_PARAMETERPROB:
 352                 if (skb->h.icmph->un.gateway < hlen)
 353                         return;
 354
 355                 /* So... This guy found something strange INSIDE encapsulated
 356                    packet. Well, he is fool, but what can we do ?
 357                  */
 358                 rel_type = ICMP_PARAMETERPROB;
 359                 rel_info = skb->h.icmph->un.gateway - hlen;
 360                 break;
 361
 362         case ICMP_DEST_UNREACH:
 363                 switch (code) {
 364                 case ICMP_SR_FAILED:
 365                 case ICMP_PORT_UNREACH:
 366                         /* Impossible event. */
 367                         return;
 368                 case ICMP_FRAG_NEEDED:
 369                         /* And it is the only really necessary thing :-) */
 370                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
 371                         if (rel_info < hlen+68)
 372                                 return;
 373                         rel_info -= hlen;
 374                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 375                         if (rel_info > ntohs(eiph->tot_len))
 376                                 return;
 377                         break;
 378                 default:
 379                         /* All others are translated to HOST_UNREACH.
 380                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 381                            I believe, it is just ether pollution. --ANK
 382                          */
 383                         rel_type = ICMP_DEST_UNREACH;
 384                         rel_code = ICMP_HOST_UNREACH;
 385                         break;
 386                 }
 387                 break;
 388         case ICMP_TIME_EXCEEDED:
 389                 if (code != ICMP_EXC_TTL)
 390                         return;
 391                 break;
 392         }
 393
 394         /* Prepare fake skb to feed it to icmp_send */
 395         skb2 = skb_clone(skb, GFP_ATOMIC);
 396         if (skb2 == NULL)
 397                 return;
 398         dst_release(skb2->dst);
 399         skb2->dst = NULL;
 400         skb_pull(skb2, skb->data - (u8*)eiph);
 401         skb2->nh.raw = skb2->data;
 402
 403         /* Try to guess incoming interface */
 404         memset(&fl, 0, sizeof(fl));
 405         fl.fl4_daddr = eiph->saddr;
 406         fl.fl4_tos = RT_TOS(eiph->tos);
 407         fl.proto = IPPROTO_IPIP;
 408         if (ip_route_output_key(&rt, &key)) {
 409                 kfree_skb(skb2);
 410                 return;
 411         }
 412         skb2->dev = rt->u.dst.dev;
 413
 414         /* route "incoming" packet */
 415         if (rt->rt_flags&RTCF_LOCAL) {
 416                 ip_rt_put(rt);
 417                 rt = NULL;
 418                 fl.fl4_daddr = eiph->daddr;
 419                 fl.fl4_src = eiph->saddr;
 420                 fl.fl4_tos = eiph->tos;
 421                 if (ip_route_output_key(&rt, &fl) ||
 422                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 423                         ip_rt_put(rt);
 424                         kfree_skb(skb2);
 425                         return;
 426                 }
 427         } else {
 428                 ip_rt_put(rt);
 429                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 430                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 431                         kfree_skb(skb2);
 432                         return;
 433                 }
 434         }
 435
 436         /* change mtu on this route */
 437         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 438                 if (rel_info > dst_mtu(skb2->dst)) {
 439                         kfree_skb(skb2);
 440                         return;
 441                 }
 442                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 443                 rel_info = htonl(rel_info);
 444         } else if (type == ICMP_TIME_EXCEEDED) {
 445                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
 446                 if (t->parms.iph.ttl) {
 447                         rel_type = ICMP_DEST_UNREACH;
 448                         rel_code = ICMP_HOST_UNREACH;
 449                 }
 450         }
 451
 452         icmp_send(skb2, rel_type, rel_code, rel_info);
 453         kfree_skb(skb2);
 454         return;
 455 #endif
 456 }
 457
 458 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
 459 {
 460         struct iphdr *inner_iph = skb->nh.iph;
 461
 462         if (INET_ECN_is_ce(outer_iph->tos))
 463                 IP_ECN_set_ce(inner_iph);
 464 }
 465
 466 static int ipip_rcv(struct sk_buff *skb)
 467 {
 468         struct iphdr *iph;
 469         struct ip_tunnel *tunnel;
 470
 471         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 472                 goto out;
 473
 474         iph = skb->nh.iph;
 475
 476         read_lock(&ipip_lock);
 477         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 478                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 479                         read_unlock(&ipip_lock);
 480                         kfree_skb(skb);
 481                         return 0;
 482                 }
 483
 484                 secpath_reset(skb);
 485
 486                 skb->mac.raw = skb->nh.raw;
 487                 skb->nh.raw = skb->data;
 488                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 489                 skb->protocol = htons(ETH_P_IP);
 490                 skb->pkt_type = PACKET_HOST;
 491
 492                 tunnel->stat.rx_packets++;
 493                 tunnel->stat.rx_bytes += skb->len;
 494                 skb->dev = tunnel->dev;
 495                 dst_release(skb->dst);
 496                 skb->dst = NULL;
 497                 nf_reset(skb);
 498                 ipip_ecn_decapsulate(iph, skb);
 499                 netif_rx(skb);
 500                 read_unlock(&ipip_lock);
 501                 return 0;
 502         }
 503         read_unlock(&ipip_lock);
 504
 505 out:
 506         return -1;
 507 }
 508
 509 /*
 510  *      This function assumes it is being called from dev_queue_xmit()
 511  *      and that skb is filled properly by that function.
 512  */
 513
 514 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 515 {
 516         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
 517         struct net_device_stats *stats = &tunnel->stat;
 518         struct iphdr  *tiph = &tunnel->parms.iph;
 519         u8     tos = tunnel->parms.iph.tos;
 520         u16    df = tiph->frag_off;
 521         struct rtable *rt;                      /* Route to the other host */
 522         struct net_device *tdev;                        /* Device to other host */
 523         struct iphdr  *old_iph = skb->nh.iph;
 524         struct iphdr  *iph;                     /* Our new IP header */
 525         int    max_headroom;                    /* The extra header space needed */
 526         u32    dst = tiph->daddr;
 527         int    mtu;
 528
 529         if (tunnel->recursion++) {
 530                 tunnel->stat.collisions++;
 531                 goto tx_error;
 532         }
 533
 534         if (skb->protocol != htons(ETH_P_IP))
 535                 goto tx_error;
 536
 537         if (tos&1)
 538                 tos = old_iph->tos;
 539
 540         if (!dst) {
 541                 /* NBMA tunnel */
 542                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 543                         tunnel->stat.tx_fifo_errors++;
 544                         goto tx_error;
 545                 }
 546                 if ((dst = rt->rt_gateway) == 0)
 547                         goto tx_error_icmp;
 548         }
 549
 550         {
 551                 struct flowi fl = { .oif = tunnel->parms.link,
 552                                     .nl_u = { .ip4_u =
 553                                               { .daddr = dst,
 554                                                 .saddr = tiph->saddr,
 555                                                 .tos = RT_TOS(tos) } },
 556                                     .proto = IPPROTO_IPIP };
 557                 if (ip_route_output_key(&rt, &fl)) {
 558                         tunnel->stat.tx_carrier_errors++;
 559                         goto tx_error_icmp;
 560                 }
 561         }
 562         tdev = rt->u.dst.dev;
 563
 564         if (tdev == dev) {
 565                 ip_rt_put(rt);
 566                 tunnel->stat.collisions++;
 567                 goto tx_error;
 568         }
 569
 570         if (tiph->frag_off)
 571                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 572         else
 573                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 574
 575         if (mtu < 68) {
 576                 tunnel->stat.collisions++;
 577                 ip_rt_put(rt);
 578                 goto tx_error;
 579         }
 580         if (skb->dst)
 581                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 582
 583         df |= (old_iph->frag_off&htons(IP_DF));
 584
 585         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 586                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 587                 ip_rt_put(rt);
 588                 goto tx_error;
 589         }
 590
 591         if (tunnel->err_count > 0) {
 592                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 593                         tunnel->err_count--;
 594                         dst_link_failure(skb);
 595                 } else
 596                         tunnel->err_count = 0;
 597         }
 598
 599         /*
 600          * Okay, now see if we can stuff it in the buffer as-is.
 601          */
 602         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 603
 604         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 605                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 606                 if (!new_skb) {
 607                         ip_rt_put(rt);
 608                         stats->tx_dropped++;
 609                         dev_kfree_skb(skb);
 610                         tunnel->recursion--;
 611                         return 0;
 612                 }
 613                 if (skb->sk)
 614                         skb_set_owner_w(new_skb, skb->sk);
 615                 dev_kfree_skb(skb);
 616                 skb = new_skb;
 617                 old_iph = skb->nh.iph;
 618         }
 619
 620         skb->h.raw = skb->nh.raw;
 621         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 622         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 623         dst_release(skb->dst);
 624         skb->dst = &rt->u.dst;
 625
 626         /*
 627          *      Push down and install the IPIP header.
 628          */
 629
 630         iph                     =       skb->nh.iph;
 631         iph->version            =       4;
 632         iph->ihl                =       sizeof(struct iphdr)>>2;
 633         iph->frag_off           =       df;
 634         iph->protocol           =       IPPROTO_IPIP;
 635         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 636         iph->daddr              =       rt->rt_dst;
 637         iph->saddr              =       rt->rt_src;
 638
 639         if ((iph->ttl = tiph->ttl) == 0)
 640                 iph->ttl        =       old_iph->ttl;
 641
 642         nf_reset(skb);
 643
 644         IPTUNNEL_XMIT();
 645         tunnel->recursion--;
 646         return 0;
 647
 648 tx_error_icmp:
 649         dst_link_failure(skb);
 650 tx_error:
 651         stats->tx_errors++;
 652         dev_kfree_skb(skb);
 653         tunnel->recursion--;
 654         return 0;
 655 }
 656
 657 static int
 658 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 659 {
 660         int err = 0;
 661         struct ip_tunnel_parm p;
 662         struct ip_tunnel *t;
 663
 664         switch (cmd) {
 665         case SIOCGETTUNNEL:
 666                 t = NULL;
 667                 if (dev == ipip_fb_tunnel_dev) {
 668                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 669                                 err = -EFAULT;
 670                                 break;
 671                         }
 672                         t = ipip_tunnel_locate(&p, 0);
 673                 }
 674                 if (t == NULL)
 675                         t = (struct ip_tunnel*)dev->priv;
 676                 memcpy(&p, &t->parms, sizeof(p));
 677                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 678                         err = -EFAULT;
 679                 break;
 680
 681         case SIOCADDTUNNEL:
 682         case SIOCCHGTUNNEL:
 683                 err = -EPERM;
 684                 if (!capable(CAP_NET_ADMIN))
 685                         goto done;
 686
 687                 err = -EFAULT;
 688                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 689                         goto done;
 690
 691                 err = -EINVAL;
 692                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 693                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 694                         goto done;
 695                 if (p.iph.ttl)
 696                         p.iph.frag_off |= htons(IP_DF);
 697
 698                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 699
 700                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 701                         if (t != NULL) {
 702                                 if (t->dev != dev) {
 703                                         err = -EEXIST;
 704                                         break;
 705                                 }
 706                         } else {
 707                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 708                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 709                                         err = -EINVAL;
 710                                         break;
 711                                 }
 712                                 t = (struct ip_tunnel*)dev->priv;
 713                                 ipip_tunnel_unlink(t);
 714                                 t->parms.iph.saddr = p.iph.saddr;
 715                                 t->parms.iph.daddr = p.iph.daddr;
 716                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 717                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 718                                 ipip_tunnel_link(t);
 719                                 netdev_state_change(dev);
 720                         }
 721                 }
 722
 723                 if (t) {
 724                         err = 0;
 725                         if (cmd == SIOCCHGTUNNEL) {
 726                                 t->parms.iph.ttl = p.iph.ttl;
 727                                 t->parms.iph.tos = p.iph.tos;
 728                                 t->parms.iph.frag_off = p.iph.frag_off;
 729                         }
 730                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 731                                 err = -EFAULT;
 732                 } else
 733                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 734                 break;
 735
 736         case SIOCDELTUNNEL:
 737                 err = -EPERM;
 738                 if (!capable(CAP_NET_ADMIN))
 739                         goto done;
 740
 741                 if (dev == ipip_fb_tunnel_dev) {
 742                         err = -EFAULT;
 743                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 744                                 goto done;
 745                         err = -ENOENT;
 746                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 747                                 goto done;
 748                         err = -EPERM;
 749                         if (t->dev == ipip_fb_tunnel_dev)
 750                                 goto done;
 751                         dev = t->dev;
 752                 }
 753                 err = unregister_netdevice(dev);
 754                 break;
 755
 756         default:
 757                 err = -EINVAL;
 758         }
 759
 760 done:
 761         return err;
 762 }
 763
 764 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 765 {
 766         return &(((struct ip_tunnel*)dev->priv)->stat);
 767 }
 768
 769 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 770 {
 771         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 772                 return -EINVAL;
 773         dev->mtu = new_mtu;
 774         return 0;
 775 }
 776
 777 static void ipip_tunnel_setup(struct net_device *dev)
 778 {
 779         SET_MODULE_OWNER(dev);
 780         dev->uninit             = ipip_tunnel_uninit;
 781         dev->hard_start_xmit    = ipip_tunnel_xmit;
 782         dev->get_stats          = ipip_tunnel_get_stats;
 783         dev->do_ioctl           = ipip_tunnel_ioctl;
 784         dev->change_mtu         = ipip_tunnel_change_mtu;
 785         dev->destructor         = free_netdev;
 786
 787         dev->type               = ARPHRD_TUNNEL;
 788         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 789         dev->mtu                = 1500 - sizeof(struct iphdr);
 790         dev->flags              = IFF_NOARP;
 791         dev->iflink             = 0;
 792         dev->addr_len           = 4;
 793 }
 794
 795 static int ipip_tunnel_init(struct net_device *dev)
 796 {
 797         struct net_device *tdev = NULL;
 798         struct ip_tunnel *tunnel;
 799         struct iphdr *iph;
 800
 801         tunnel = (struct ip_tunnel*)dev->priv;
 802         iph = &tunnel->parms.iph;
 803
 804         tunnel->dev = dev;
 805         strcpy(tunnel->parms.name, dev->name);
 806
 807         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 808         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 809
 810         if (iph->daddr) {
 811                 struct flowi fl = { .oif = tunnel->parms.link,
 812                                     .nl_u = { .ip4_u =
 813                                               { .daddr = iph->daddr,
 814                                                 .saddr = iph->saddr,
 815                                                 .tos = RT_TOS(iph->tos) } },
 816                                     .proto = IPPROTO_IPIP };
 817                 struct rtable *rt;
 818                 if (!ip_route_output_key(&rt, &fl)) {
 819                         tdev = rt->u.dst.dev;
 820                         ip_rt_put(rt);
 821                 }
 822                 dev->flags |= IFF_POINTOPOINT;
 823         }
 824
 825         if (!tdev && tunnel->parms.link)
 826                 tdev = __dev_get_by_index(tunnel->parms.link);
 827
 828         if (tdev) {
 829                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 830                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 831         }
 832         dev->iflink = tunnel->parms.link;
 833
 834         return 0;
 835 }
 836
 837 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 838 {
 839         struct ip_tunnel *tunnel = dev->priv;
 840         struct iphdr *iph = &tunnel->parms.iph;
 841
 842         tunnel->dev = dev;
 843         strcpy(tunnel->parms.name, dev->name);
 844
 845         iph->version            = 4;
 846         iph->protocol           = IPPROTO_IPIP;
 847         iph->ihl                = 5;
 848
 849         dev_hold(dev);
 850         tunnels_wc[0]           = tunnel;
 851         return 0;
 852 }
 853
 854 #ifdef CONFIG_INET_TUNNEL
 855 static struct xfrm_tunnel ipip_handler = {
 856         .handler        =       ipip_rcv,
 857         .err_handler    =       ipip_err,
 858 };
 859
 860 static inline int ipip_register(void)
 861 {
 862         return xfrm4_tunnel_register(&ipip_handler);
 863 }
 864
 865 static inline int ipip_unregister(void)
 866 {
 867         return xfrm4_tunnel_deregister(&ipip_handler);
 868 }
 869 #else
 870 static struct net_protocol ipip_protocol = {
 871         .handler        =       ipip_rcv,
 872         .err_handler    =       ipip_err,
 873         .no_policy      =       1,
 874 };
 875
 876 static inline int ipip_register(void)
 877 {
 878         return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
 879 }
 880
 881 static inline int ipip_unregister(void)
 882 {
 883         return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
 884 }
 885 #endif
 886
 887 static char banner[] __initdata =
 888         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 889
 890 static int __init ipip_init(void)
 891 {
 892         int err;
 893
 894         printk(banner);
 895
 896         if (ipip_register() < 0) {
 897                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 898                 return -EAGAIN;
 899         }
 900
 901         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 902                                            "tunl0",
 903                                            ipip_tunnel_setup);
 904         if (!ipip_fb_tunnel_dev) {
 905                 err = -ENOMEM;
 906                 goto err1;
 907         }
 908
 909         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 910
 911         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 912                 goto err2;
 913  out:
 914         return err;
 915  err2:
 916         free_netdev(ipip_fb_tunnel_dev);
 917  err1:
 918         ipip_unregister();
 919         goto out;
 920 }
 921
 922 static void __exit ipip_destroy_tunnels(void)
 923 {
 924         int prio;
 925
 926         for (prio = 1; prio < 4; prio++) {
 927                 int h;
 928                 for (h = 0; h < HASH_SIZE; h++) {
 929                         struct ip_tunnel *t;
 930                         while ((t = tunnels[prio][h]) != NULL)
 931                                 unregister_netdevice(t->dev);
 932                 }
 933         }
 934 }
 935
 936 static void __exit ipip_fini(void)
 937 {
 938         if (ipip_unregister() < 0)
 939                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 940
 941         rtnl_lock();
 942         ipip_destroy_tunnels();
 943         unregister_netdevice(ipip_fb_tunnel_dev);
 944         rtnl_unlock();
 945 }
 946
 947 module_init(ipip_init);
 948 module_exit(ipip_fini);
 949 MODULE_LICENSE("GPL");