release/src-rt/linux/linux-2.6/net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/kernel.h>
 100 #include <asm/uaccess.h>
 101 #include <linux/skbuff.h>
 102 #include <linux/netdevice.h>
 103 #include <linux/in.h>
 104 #include <linux/tcp.h>
 105 #include <linux/udp.h>
 106 #include <linux/if_arp.h>
 107 #include <linux/mroute.h>
 108 #include <linux/init.h>
 109 #include <linux/netfilter_ipv4.h>
 110 #include <linux/if_ether.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ipip.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118
 119 #define HASH_SIZE  16
 120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 121
 122 static int ipip_fb_tunnel_init(struct net_device *dev);
 123 static int ipip_tunnel_init(struct net_device *dev);
 124 static void ipip_tunnel_setup(struct net_device *dev);
 125
 126 static struct net_device *ipip_fb_tunnel_dev;
 127
 128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_wc[1];
 132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 133
 134 static DEFINE_RWLOCK(ipip_lock);
 135
 136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 137 {
 138         unsigned h0 = HASH(remote);
 139         unsigned h1 = HASH(local);
 140         struct ip_tunnel *t;
 141
 142         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 143                 if (local == t->parms.iph.saddr &&
 144                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 145                         return t;
 146         }
 147         for (t = tunnels_r[h0]; t; t = t->next) {
 148                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 149                         return t;
 150         }
 151         for (t = tunnels_l[h1]; t; t = t->next) {
 152                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 153                         return t;
 154         }
 155         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 156                 return t;
 157         return NULL;
 158 }
 159
 160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
 161 {
 162         __be32 remote = parms->iph.daddr;
 163         __be32 local = parms->iph.saddr;
 164         unsigned h = 0;
 165         int prio = 0;
 166
 167         if (remote) {
 168                 prio |= 2;
 169                 h ^= HASH(remote);
 170         }
 171         if (local) {
 172                 prio |= 1;
 173                 h ^= HASH(local);
 174         }
 175         return &tunnels[prio][h];
 176 }
 177
 178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 179 {
 180         return __ipip_bucket(&t->parms);
 181 }
 182
 183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 184 {
 185         struct ip_tunnel **tp;
 186
 187         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 188                 if (t == *tp) {
 189                         write_lock_bh(&ipip_lock);
 190                         *tp = t->next;
 191                         write_unlock_bh(&ipip_lock);
 192                         break;
 193                 }
 194         }
 195 }
 196
 197 static void ipip_tunnel_link(struct ip_tunnel *t)
 198 {
 199         struct ip_tunnel **tp = ipip_bucket(t);
 200
 201         t->next = *tp;
 202         write_lock_bh(&ipip_lock);
 203         *tp = t;
 204         write_unlock_bh(&ipip_lock);
 205 }
 206
 207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 208 {
 209         __be32 remote = parms->iph.daddr;
 210         __be32 local = parms->iph.saddr;
 211         struct ip_tunnel *t, **tp, *nt;
 212         struct net_device *dev;
 213         char name[IFNAMSIZ];
 214
 215         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 216                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 217                         return t;
 218         }
 219         if (!create)
 220                 return NULL;
 221
 222         if (parms->name[0])
 223                 strlcpy(name, parms->name, IFNAMSIZ);
 224         else {
 225                 int i;
 226                 for (i=1; i<100; i++) {
 227                         sprintf(name, "tunl%d", i);
 228                         if (__dev_get_by_name(name) == NULL)
 229                                 break;
 230                 }
 231                 if (i==100)
 232                         goto failed;
 233         }
 234
 235         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 236         if (dev == NULL)
 237                 return NULL;
 238
 239         nt = netdev_priv(dev);
 240         SET_MODULE_OWNER(dev);
 241         dev->init = ipip_tunnel_init;
 242         nt->parms = *parms;
 243
 244         if (register_netdevice(dev) < 0) {
 245                 free_netdev(dev);
 246                 goto failed;
 247         }
 248
 249         dev_hold(dev);
 250         ipip_tunnel_link(nt);
 251         return nt;
 252
 253 failed:
 254         return NULL;
 255 }
 256
 257 static void ipip_tunnel_uninit(struct net_device *dev)
 258 {
 259         if (dev == ipip_fb_tunnel_dev) {
 260                 write_lock_bh(&ipip_lock);
 261                 tunnels_wc[0] = NULL;
 262                 write_unlock_bh(&ipip_lock);
 263         } else
 264                 ipip_tunnel_unlink(netdev_priv(dev));
 265         dev_put(dev);
 266 }
 267
 268 static int ipip_err(struct sk_buff *skb, u32 info)
 269 {
 270 #ifndef I_WISH_WORLD_WERE_PERFECT
 271
 272 /* It is not :-( All the routers (except for Linux) return only
 273    8 bytes of packet payload. It means, that precise relaying of
 274    ICMP in the real Internet is absolutely infeasible.
 275  */
 276         struct iphdr *iph = (struct iphdr*)skb->data;
 277         const int type = icmp_hdr(skb)->type;
 278         const int code = icmp_hdr(skb)->code;
 279         struct ip_tunnel *t;
 280         int err;
 281
 282         switch (type) {
 283         default:
 284         case ICMP_PARAMETERPROB:
 285                 return 0;
 286
 287         case ICMP_DEST_UNREACH:
 288                 switch (code) {
 289                 case ICMP_SR_FAILED:
 290                 case ICMP_PORT_UNREACH:
 291                         /* Impossible event. */
 292                         return 0;
 293                 case ICMP_FRAG_NEEDED:
 294                         /* Soft state for pmtu is maintained by IP core. */
 295                         return 0;
 296                 default:
 297                         /* All others are translated to HOST_UNREACH.
 298                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 299                            I believe they are just ether pollution. --ANK
 300                          */
 301                         break;
 302                 }
 303                 break;
 304         case ICMP_TIME_EXCEEDED:
 305                 if (code != ICMP_EXC_TTL)
 306                         return 0;
 307                 break;
 308         }
 309
 310         err = -ENOENT;
 311
 312         read_lock(&ipip_lock);
 313         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 314         if (t == NULL || t->parms.iph.daddr == 0)
 315                 goto out;
 316
 317         err = 0;
 318         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 319                 goto out;
 320
 321         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 322                 t->err_count++;
 323         else
 324                 t->err_count = 1;
 325         t->err_time = jiffies;
 326 out:
 327         read_unlock(&ipip_lock);
 328         return err;
 329 #else
 330         struct iphdr *iph = (struct iphdr*)dp;
 331         int hlen = iph->ihl<<2;
 332         struct iphdr *eiph;
 333         const int type = icmp_hdr(skb)->type;
 334         const int code = icmp_hdr(skb)->code;
 335         int rel_type = 0;
 336         int rel_code = 0;
 337         __be32 rel_info = 0;
 338         __u32 n = 0;
 339         struct sk_buff *skb2;
 340         struct flowi fl;
 341         struct rtable *rt;
 342
 343         if (len < hlen + sizeof(struct iphdr))
 344                 return 0;
 345         eiph = (struct iphdr*)(dp + hlen);
 346
 347         switch (type) {
 348         default:
 349                 return 0;
 350         case ICMP_PARAMETERPROB:
 351                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 352                 if (n < hlen)
 353                         return 0;
 354
 355                 /* So... This guy found something strange INSIDE encapsulated
 356                    packet. Well, he is fool, but what can we do ?
 357                  */
 358                 rel_type = ICMP_PARAMETERPROB;
 359                 rel_info = htonl((n - hlen) << 24);
 360                 break;
 361
 362         case ICMP_DEST_UNREACH:
 363                 switch (code) {
 364                 case ICMP_SR_FAILED:
 365                 case ICMP_PORT_UNREACH:
 366                         /* Impossible event. */
 367                         return 0;
 368                 case ICMP_FRAG_NEEDED:
 369                         /* And it is the only really necessary thing :-) */
 370                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 371                         if (n < hlen+68)
 372                                 return 0;
 373                         n -= hlen;
 374                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 375                         if (n > ntohs(eiph->tot_len))
 376                                 return 0;
 377                         rel_info = htonl(n);
 378                         break;
 379                 default:
 380                         /* All others are translated to HOST_UNREACH.
 381                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 382                            I believe, it is just ether pollution. --ANK
 383                          */
 384                         rel_type = ICMP_DEST_UNREACH;
 385                         rel_code = ICMP_HOST_UNREACH;
 386                         break;
 387                 }
 388                 break;
 389         case ICMP_TIME_EXCEEDED:
 390                 if (code != ICMP_EXC_TTL)
 391                         return 0;
 392                 break;
 393         }
 394
 395         /* Prepare fake skb to feed it to icmp_send */
 396         skb2 = skb_clone(skb, GFP_ATOMIC);
 397         if (skb2 == NULL)
 398                 return 0;
 399         dst_release(skb2->dst);
 400         skb2->dst = NULL;
 401         skb_pull(skb2, skb->data - (u8*)eiph);
 402         skb_reset_network_header(skb2);
 403
 404         /* Try to guess incoming interface */
 405         memset(&fl, 0, sizeof(fl));
 406         fl.fl4_daddr = eiph->saddr;
 407         fl.fl4_tos = RT_TOS(eiph->tos);
 408         fl.proto = IPPROTO_IPIP;
 409         if (ip_route_output_key(&rt, &key)) {
 410                 kfree_skb(skb2);
 411                 return 0;
 412         }
 413         skb2->dev = rt->u.dst.dev;
 414
 415         /* route "incoming" packet */
 416         if (rt->rt_flags&RTCF_LOCAL) {
 417                 ip_rt_put(rt);
 418                 rt = NULL;
 419                 fl.fl4_daddr = eiph->daddr;
 420                 fl.fl4_src = eiph->saddr;
 421                 fl.fl4_tos = eiph->tos;
 422                 if (ip_route_output_key(&rt, &fl) ||
 423                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 424                         ip_rt_put(rt);
 425                         kfree_skb(skb2);
 426                         return 0;
 427                 }
 428         } else {
 429                 ip_rt_put(rt);
 430                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 431                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 432                         kfree_skb(skb2);
 433                         return 0;
 434                 }
 435         }
 436
 437         /* change mtu on this route */
 438         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 439                 if (n > dst_mtu(skb2->dst)) {
 440                         kfree_skb(skb2);
 441                         return 0;
 442                 }
 443                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 444         } else if (type == ICMP_TIME_EXCEEDED) {
 445                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 446                 if (t->parms.iph.ttl) {
 447                         rel_type = ICMP_DEST_UNREACH;
 448                         rel_code = ICMP_HOST_UNREACH;
 449                 }
 450         }
 451
 452         icmp_send(skb2, rel_type, rel_code, rel_info);
 453         kfree_skb(skb2);
 454         return 0;
 455 #endif
 456 }
 457
 458 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 459                                         struct sk_buff *skb)
 460 {
 461         struct iphdr *inner_iph = ip_hdr(skb);
 462
 463         if (INET_ECN_is_ce(outer_iph->tos))
 464                 IP_ECN_set_ce(inner_iph);
 465 }
 466
 467 static int ipip_rcv(struct sk_buff *skb)
 468 {
 469         struct ip_tunnel *tunnel;
 470         const struct iphdr *iph = ip_hdr(skb);
 471
 472         read_lock(&ipip_lock);
 473         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 474                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 475                         read_unlock(&ipip_lock);
 476                         kfree_skb(skb);
 477                         return 0;
 478                 }
 479
 480                 secpath_reset(skb);
 481
 482                 skb->mac_header = skb->network_header;
 483                 skb_reset_network_header(skb);
 484                 skb->protocol = htons(ETH_P_IP);
 485                 skb->pkt_type = PACKET_HOST;
 486
 487                 tunnel->stat.rx_packets++;
 488                 tunnel->stat.rx_bytes += skb->len;
 489                 skb->dev = tunnel->dev;
 490                 dst_release(skb->dst);
 491                 skb->dst = NULL;
 492                 nf_reset(skb);
 493                 ipip_ecn_decapsulate(iph, skb);
 494                 netif_rx(skb);
 495                 read_unlock(&ipip_lock);
 496                 return 0;
 497         }
 498         read_unlock(&ipip_lock);
 499
 500         return -1;
 501 }
 502
 503 /*
 504  *      This function assumes it is being called from dev_queue_xmit()
 505  *      and that skb is filled properly by that function.
 506  */
 507
 508 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 509 {
 510         struct ip_tunnel *tunnel = netdev_priv(dev);
 511         struct net_device_stats *stats = &tunnel->stat;
 512         struct iphdr  *tiph = &tunnel->parms.iph;
 513         u8     tos = tunnel->parms.iph.tos;
 514         __be16 df = tiph->frag_off;
 515         struct rtable *rt;                      /* Route to the other host */
 516         struct net_device *tdev;                        /* Device to other host */
 517         struct iphdr  *old_iph = ip_hdr(skb);
 518         struct iphdr  *iph;                     /* Our new IP header */
 519         int    max_headroom;                    /* The extra header space needed */
 520         __be32 dst = tiph->daddr;
 521         int    mtu;
 522
 523         if (tunnel->recursion++) {
 524                 tunnel->stat.collisions++;
 525                 goto tx_error;
 526         }
 527
 528         if (skb->protocol != htons(ETH_P_IP))
 529                 goto tx_error;
 530
 531         if (tos&1)
 532                 tos = old_iph->tos;
 533
 534         if (!dst) {
 535                 /* NBMA tunnel */
 536                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 537                         tunnel->stat.tx_fifo_errors++;
 538                         goto tx_error;
 539                 }
 540                 if ((dst = rt->rt_gateway) == 0)
 541                         goto tx_error_icmp;
 542         }
 543
 544         {
 545                 struct flowi fl = { .oif = tunnel->parms.link,
 546                                     .nl_u = { .ip4_u =
 547                                               { .daddr = dst,
 548                                                 .saddr = tiph->saddr,
 549                                                 .tos = RT_TOS(tos) } },
 550                                     .proto = IPPROTO_IPIP };
 551                 if (ip_route_output_key(&rt, &fl)) {
 552                         tunnel->stat.tx_carrier_errors++;
 553                         goto tx_error_icmp;
 554                 }
 555         }
 556         tdev = rt->u.dst.dev;
 557
 558         if (tdev == dev) {
 559                 ip_rt_put(rt);
 560                 tunnel->stat.collisions++;
 561                 goto tx_error;
 562         }
 563
 564         if (tiph->frag_off)
 565                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 566         else
 567                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 568
 569         if (mtu < 68) {
 570                 tunnel->stat.collisions++;
 571                 ip_rt_put(rt);
 572                 goto tx_error;
 573         }
 574         if (skb->dst)
 575                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 576
 577         df |= (old_iph->frag_off&htons(IP_DF));
 578
 579         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 580                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 581                 ip_rt_put(rt);
 582                 goto tx_error;
 583         }
 584
 585         if (tunnel->err_count > 0) {
 586                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 587                         tunnel->err_count--;
 588                         dst_link_failure(skb);
 589                 } else
 590                         tunnel->err_count = 0;
 591         }
 592
 593         /*
 594          * Okay, now see if we can stuff it in the buffer as-is.
 595          */
 596         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 597
 598         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 599             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 600                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 601                 if (!new_skb) {
 602                         ip_rt_put(rt);
 603                         stats->tx_dropped++;
 604                         dev_kfree_skb(skb);
 605                         tunnel->recursion--;
 606                         return 0;
 607                 }
 608                 if (skb->sk)
 609                         skb_set_owner_w(new_skb, skb->sk);
 610                 dev_kfree_skb(skb);
 611                 skb = new_skb;
 612                 old_iph = ip_hdr(skb);
 613         }
 614
 615         skb->transport_header = skb->network_header;
 616         skb_push(skb, sizeof(struct iphdr));
 617         skb_reset_network_header(skb);
 618         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 619         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 620                               IPSKB_REROUTED);
 621         dst_release(skb->dst);
 622         skb->dst = &rt->u.dst;
 623
 624         /*
 625          *      Push down and install the IPIP header.
 626          */
 627
 628         iph                     =       ip_hdr(skb);
 629         iph->version            =       4;
 630         iph->ihl                =       sizeof(struct iphdr)>>2;
 631         iph->frag_off           =       df;
 632         iph->protocol           =       IPPROTO_IPIP;
 633         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 634         iph->daddr              =       rt->rt_dst;
 635         iph->saddr              =       rt->rt_src;
 636
 637         if ((iph->ttl = tiph->ttl) == 0)
 638                 iph->ttl        =       old_iph->ttl;
 639
 640         nf_reset(skb);
 641
 642         IPTUNNEL_XMIT();
 643         tunnel->recursion--;
 644         return 0;
 645
 646 tx_error_icmp:
 647         dst_link_failure(skb);
 648 tx_error:
 649         stats->tx_errors++;
 650         dev_kfree_skb(skb);
 651         tunnel->recursion--;
 652         return 0;
 653 }
 654
 655 static int
 656 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 657 {
 658         int err = 0;
 659         struct ip_tunnel_parm p;
 660         struct ip_tunnel *t;
 661
 662         switch (cmd) {
 663         case SIOCGETTUNNEL:
 664                 t = NULL;
 665                 if (dev == ipip_fb_tunnel_dev) {
 666                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 667                                 err = -EFAULT;
 668                                 break;
 669                         }
 670                         t = ipip_tunnel_locate(&p, 0);
 671                 }
 672                 if (t == NULL)
 673                         t = netdev_priv(dev);
 674                 memcpy(&p, &t->parms, sizeof(p));
 675                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 676                         err = -EFAULT;
 677                 break;
 678
 679         case SIOCADDTUNNEL:
 680         case SIOCCHGTUNNEL:
 681                 err = -EPERM;
 682                 if (!capable(CAP_NET_ADMIN))
 683                         goto done;
 684
 685                 err = -EFAULT;
 686                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 687                         goto done;
 688
 689                 err = -EINVAL;
 690                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 691                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 692                         goto done;
 693                 if (p.iph.ttl)
 694                         p.iph.frag_off |= htons(IP_DF);
 695
 696                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 697
 698                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 699                         if (t != NULL) {
 700                                 if (t->dev != dev) {
 701                                         err = -EEXIST;
 702                                         break;
 703                                 }
 704                         } else {
 705                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 706                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 707                                         err = -EINVAL;
 708                                         break;
 709                                 }
 710                                 t = netdev_priv(dev);
 711                                 ipip_tunnel_unlink(t);
 712                                 t->parms.iph.saddr = p.iph.saddr;
 713                                 t->parms.iph.daddr = p.iph.daddr;
 714                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 715                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 716                                 ipip_tunnel_link(t);
 717                                 netdev_state_change(dev);
 718                         }
 719                 }
 720
 721                 if (t) {
 722                         err = 0;
 723                         if (cmd == SIOCCHGTUNNEL) {
 724                                 t->parms.iph.ttl = p.iph.ttl;
 725                                 t->parms.iph.tos = p.iph.tos;
 726                                 t->parms.iph.frag_off = p.iph.frag_off;
 727                         }
 728                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 729                                 err = -EFAULT;
 730                 } else
 731                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 732                 break;
 733
 734         case SIOCDELTUNNEL:
 735                 err = -EPERM;
 736                 if (!capable(CAP_NET_ADMIN))
 737                         goto done;
 738
 739                 if (dev == ipip_fb_tunnel_dev) {
 740                         err = -EFAULT;
 741                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 742                                 goto done;
 743                         err = -ENOENT;
 744                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 745                                 goto done;
 746                         err = -EPERM;
 747                         if (t->dev == ipip_fb_tunnel_dev)
 748                                 goto done;
 749                         dev = t->dev;
 750                 }
 751                 unregister_netdevice(dev);
 752                 err = 0;
 753                 break;
 754
 755         default:
 756                 err = -EINVAL;
 757         }
 758
 759 done:
 760         return err;
 761 }
 762
 763 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 764 {
 765         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 766 }
 767
 768 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 769 {
 770         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 771                 return -EINVAL;
 772         dev->mtu = new_mtu;
 773         return 0;
 774 }
 775
 776 static void ipip_tunnel_setup(struct net_device *dev)
 777 {
 778         SET_MODULE_OWNER(dev);
 779         dev->uninit             = ipip_tunnel_uninit;
 780         dev->hard_start_xmit    = ipip_tunnel_xmit;
 781         dev->get_stats          = ipip_tunnel_get_stats;
 782         dev->do_ioctl           = ipip_tunnel_ioctl;
 783         dev->change_mtu         = ipip_tunnel_change_mtu;
 784         dev->destructor         = free_netdev;
 785
 786         dev->type               = ARPHRD_TUNNEL;
 787         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 788         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 789         dev->flags              = IFF_NOARP;
 790         dev->iflink             = 0;
 791         dev->addr_len           = 4;
 792 }
 793
 794 static int ipip_tunnel_init(struct net_device *dev)
 795 {
 796         struct net_device *tdev = NULL;
 797         struct ip_tunnel *tunnel;
 798         struct iphdr *iph;
 799
 800         tunnel = netdev_priv(dev);
 801         iph = &tunnel->parms.iph;
 802
 803         tunnel->dev = dev;
 804         strcpy(tunnel->parms.name, dev->name);
 805
 806         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 807         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 808
 809         if (iph->daddr) {
 810                 struct flowi fl = { .oif = tunnel->parms.link,
 811                                     .nl_u = { .ip4_u =
 812                                               { .daddr = iph->daddr,
 813                                                 .saddr = iph->saddr,
 814                                                 .tos = RT_TOS(iph->tos) } },
 815                                     .proto = IPPROTO_IPIP };
 816                 struct rtable *rt;
 817                 if (!ip_route_output_key(&rt, &fl)) {
 818                         tdev = rt->u.dst.dev;
 819                         ip_rt_put(rt);
 820                 }
 821                 dev->flags |= IFF_POINTOPOINT;
 822         }
 823
 824         if (!tdev && tunnel->parms.link)
 825                 tdev = __dev_get_by_index(tunnel->parms.link);
 826
 827         if (tdev) {
 828                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 829                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 830         }
 831         dev->iflink = tunnel->parms.link;
 832
 833         return 0;
 834 }
 835
 836 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 837 {
 838         struct ip_tunnel *tunnel = netdev_priv(dev);
 839         struct iphdr *iph = &tunnel->parms.iph;
 840
 841         tunnel->dev = dev;
 842         strcpy(tunnel->parms.name, dev->name);
 843
 844         iph->version            = 4;
 845         iph->protocol           = IPPROTO_IPIP;
 846         iph->ihl                = 5;
 847
 848         dev_hold(dev);
 849         tunnels_wc[0]           = tunnel;
 850         return 0;
 851 }
 852
 853 static struct xfrm_tunnel ipip_handler = {
 854         .handler        =       ipip_rcv,
 855         .err_handler    =       ipip_err,
 856         .priority       =       1,
 857 };
 858
 859 static char banner[] __initdata =
 860         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 861
 862 static int __init ipip_init(void)
 863 {
 864         int err;
 865
 866         printk(banner);
 867
 868         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
 869                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 870                 return -EAGAIN;
 871         }
 872
 873         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 874                                            "tunl0",
 875                                            ipip_tunnel_setup);
 876         if (!ipip_fb_tunnel_dev) {
 877                 err = -ENOMEM;
 878                 goto err1;
 879         }
 880
 881         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 882
 883         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 884                 goto err2;
 885  out:
 886         return err;
 887  err2:
 888         free_netdev(ipip_fb_tunnel_dev);
 889  err1:
 890         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 891         goto out;
 892 }
 893
 894 static void __exit ipip_destroy_tunnels(void)
 895 {
 896         int prio;
 897
 898         for (prio = 1; prio < 4; prio++) {
 899                 int h;
 900                 for (h = 0; h < HASH_SIZE; h++) {
 901                         struct ip_tunnel *t;
 902                         while ((t = tunnels[prio][h]) != NULL)
 903                                 unregister_netdevice(t->dev);
 904                 }
 905         }
 906 }
 907
 908 static void __exit ipip_fini(void)
 909 {
 910         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 911                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 912
 913         rtnl_lock();
 914         ipip_destroy_tunnels();
 915         unregister_netdevice(ipip_fb_tunnel_dev);
 916         rtnl_unlock();
 917 }
 918
 919 module_init(ipip_init);
 920 module_exit(ipip_fini);
 921 MODULE_LICENSE("GPL");