net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/kernel.h>
 100 #include <asm/uaccess.h>
 101 #include <linux/skbuff.h>
 102 #include <linux/netdevice.h>
 103 #include <linux/in.h>
 104 #include <linux/tcp.h>
 105 #include <linux/udp.h>
 106 #include <linux/if_arp.h>
 107 #include <linux/mroute.h>
 108 #include <linux/init.h>
 109 #include <linux/netfilter_ipv4.h>
 110 #include <linux/if_ether.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ipip.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118
 119 #define HASH_SIZE  16
 120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 121
 122 static int ipip_fb_tunnel_init(struct net_device *dev);
 123 static int ipip_tunnel_init(struct net_device *dev);
 124 static void ipip_tunnel_setup(struct net_device *dev);
 125
 126 static struct net_device *ipip_fb_tunnel_dev;
 127
 128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_wc[1];
 132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 133
 134 static DEFINE_RWLOCK(ipip_lock);
 135
 136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 137 {
 138         unsigned h0 = HASH(remote);
 139         unsigned h1 = HASH(local);
 140         struct ip_tunnel *t;
 141
 142         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 143                 if (local == t->parms.iph.saddr &&
 144                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 145                         return t;
 146         }
 147         for (t = tunnels_r[h0]; t; t = t->next) {
 148                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 149                         return t;
 150         }
 151         for (t = tunnels_l[h1]; t; t = t->next) {
 152                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 153                         return t;
 154         }
 155         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 156                 return t;
 157         return NULL;
 158 }
 159
 160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
 161 {
 162         __be32 remote = parms->iph.daddr;
 163         __be32 local = parms->iph.saddr;
 164         unsigned h = 0;
 165         int prio = 0;
 166
 167         if (remote) {
 168                 prio |= 2;
 169                 h ^= HASH(remote);
 170         }
 171         if (local) {
 172                 prio |= 1;
 173                 h ^= HASH(local);
 174         }
 175         return &tunnels[prio][h];
 176 }
 177
 178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 179 {
 180         return __ipip_bucket(&t->parms);
 181 }
 182
 183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 184 {
 185         struct ip_tunnel **tp;
 186
 187         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 188                 if (t == *tp) {
 189                         write_lock_bh(&ipip_lock);
 190                         *tp = t->next;
 191                         write_unlock_bh(&ipip_lock);
 192                         break;
 193                 }
 194         }
 195 }
 196
 197 static void ipip_tunnel_link(struct ip_tunnel *t)
 198 {
 199         struct ip_tunnel **tp = ipip_bucket(t);
 200
 201         t->next = *tp;
 202         write_lock_bh(&ipip_lock);
 203         *tp = t;
 204         write_unlock_bh(&ipip_lock);
 205 }
 206
 207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 208 {
 209         __be32 remote = parms->iph.daddr;
 210         __be32 local = parms->iph.saddr;
 211         struct ip_tunnel *t, **tp, *nt;
 212         struct net_device *dev;
 213         char name[IFNAMSIZ];
 214
 215         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 216                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 217                         return t;
 218         }
 219         if (!create)
 220                 return NULL;
 221
 222         if (parms->name[0])
 223                 strlcpy(name, parms->name, IFNAMSIZ);
 224         else {
 225                 int i;
 226                 for (i=1; i<100; i++) {
 227                         sprintf(name, "tunl%d", i);
 228                         if (__dev_get_by_name(&init_net, name) == NULL)
 229                                 break;
 230                 }
 231                 if (i==100)
 232                         goto failed;
 233         }
 234
 235         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 236         if (dev == NULL)
 237                 return NULL;
 238
 239         nt = netdev_priv(dev);
 240         dev->init = ipip_tunnel_init;
 241         nt->parms = *parms;
 242
 243         if (register_netdevice(dev) < 0) {
 244                 free_netdev(dev);
 245                 goto failed;
 246         }
 247
 248         dev_hold(dev);
 249         ipip_tunnel_link(nt);
 250         return nt;
 251
 252 failed:
 253         return NULL;
 254 }
 255
 256 static void ipip_tunnel_uninit(struct net_device *dev)
 257 {
 258         if (dev == ipip_fb_tunnel_dev) {
 259                 write_lock_bh(&ipip_lock);
 260                 tunnels_wc[0] = NULL;
 261                 write_unlock_bh(&ipip_lock);
 262         } else
 263                 ipip_tunnel_unlink(netdev_priv(dev));
 264         dev_put(dev);
 265 }
 266
 267 static int ipip_err(struct sk_buff *skb, u32 info)
 268 {
 269 #ifndef I_WISH_WORLD_WERE_PERFECT
 270
 271 /* It is not :-( All the routers (except for Linux) return only
 272    8 bytes of packet payload. It means, that precise relaying of
 273    ICMP in the real Internet is absolutely infeasible.
 274  */
 275         struct iphdr *iph = (struct iphdr*)skb->data;
 276         const int type = icmp_hdr(skb)->type;
 277         const int code = icmp_hdr(skb)->code;
 278         struct ip_tunnel *t;
 279         int err;
 280
 281         switch (type) {
 282         default:
 283         case ICMP_PARAMETERPROB:
 284                 return 0;
 285
 286         case ICMP_DEST_UNREACH:
 287                 switch (code) {
 288                 case ICMP_SR_FAILED:
 289                 case ICMP_PORT_UNREACH:
 290                         /* Impossible event. */
 291                         return 0;
 292                 case ICMP_FRAG_NEEDED:
 293                         /* Soft state for pmtu is maintained by IP core. */
 294                         return 0;
 295                 default:
 296                         /* All others are translated to HOST_UNREACH.
 297                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 298                            I believe they are just ether pollution. --ANK
 299                          */
 300                         break;
 301                 }
 302                 break;
 303         case ICMP_TIME_EXCEEDED:
 304                 if (code != ICMP_EXC_TTL)
 305                         return 0;
 306                 break;
 307         }
 308
 309         err = -ENOENT;
 310
 311         read_lock(&ipip_lock);
 312         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 313         if (t == NULL || t->parms.iph.daddr == 0)
 314                 goto out;
 315
 316         err = 0;
 317         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 318                 goto out;
 319
 320         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 321                 t->err_count++;
 322         else
 323                 t->err_count = 1;
 324         t->err_time = jiffies;
 325 out:
 326         read_unlock(&ipip_lock);
 327         return err;
 328 #else
 329         struct iphdr *iph = (struct iphdr*)dp;
 330         int hlen = iph->ihl<<2;
 331         struct iphdr *eiph;
 332         const int type = icmp_hdr(skb)->type;
 333         const int code = icmp_hdr(skb)->code;
 334         int rel_type = 0;
 335         int rel_code = 0;
 336         __be32 rel_info = 0;
 337         __u32 n = 0;
 338         struct sk_buff *skb2;
 339         struct flowi fl;
 340         struct rtable *rt;
 341
 342         if (len < hlen + sizeof(struct iphdr))
 343                 return 0;
 344         eiph = (struct iphdr*)(dp + hlen);
 345
 346         switch (type) {
 347         default:
 348                 return 0;
 349         case ICMP_PARAMETERPROB:
 350                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 351                 if (n < hlen)
 352                         return 0;
 353
 354                 /* So... This guy found something strange INSIDE encapsulated
 355                    packet. Well, he is fool, but what can we do ?
 356                  */
 357                 rel_type = ICMP_PARAMETERPROB;
 358                 rel_info = htonl((n - hlen) << 24);
 359                 break;
 360
 361         case ICMP_DEST_UNREACH:
 362                 switch (code) {
 363                 case ICMP_SR_FAILED:
 364                 case ICMP_PORT_UNREACH:
 365                         /* Impossible event. */
 366                         return 0;
 367                 case ICMP_FRAG_NEEDED:
 368                         /* And it is the only really necessary thing :-) */
 369                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 370                         if (n < hlen+68)
 371                                 return 0;
 372                         n -= hlen;
 373                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 374                         if (n > ntohs(eiph->tot_len))
 375                                 return 0;
 376                         rel_info = htonl(n);
 377                         break;
 378                 default:
 379                         /* All others are translated to HOST_UNREACH.
 380                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 381                            I believe, it is just ether pollution. --ANK
 382                          */
 383                         rel_type = ICMP_DEST_UNREACH;
 384                         rel_code = ICMP_HOST_UNREACH;
 385                         break;
 386                 }
 387                 break;
 388         case ICMP_TIME_EXCEEDED:
 389                 if (code != ICMP_EXC_TTL)
 390                         return 0;
 391                 break;
 392         }
 393
 394         /* Prepare fake skb to feed it to icmp_send */
 395         skb2 = skb_clone(skb, GFP_ATOMIC);
 396         if (skb2 == NULL)
 397                 return 0;
 398         dst_release(skb2->dst);
 399         skb2->dst = NULL;
 400         skb_pull(skb2, skb->data - (u8*)eiph);
 401         skb_reset_network_header(skb2);
 402
 403         /* Try to guess incoming interface */
 404         memset(&fl, 0, sizeof(fl));
 405         fl.fl4_daddr = eiph->saddr;
 406         fl.fl4_tos = RT_TOS(eiph->tos);
 407         fl.proto = IPPROTO_IPIP;
 408         if (ip_route_output_key(&init_net, &rt, &key)) {
 409                 kfree_skb(skb2);
 410                 return 0;
 411         }
 412         skb2->dev = rt->u.dst.dev;
 413
 414         /* route "incoming" packet */
 415         if (rt->rt_flags&RTCF_LOCAL) {
 416                 ip_rt_put(rt);
 417                 rt = NULL;
 418                 fl.fl4_daddr = eiph->daddr;
 419                 fl.fl4_src = eiph->saddr;
 420                 fl.fl4_tos = eiph->tos;
 421                 if (ip_route_output_key(&init_net, &rt, &fl) ||
 422                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 423                         ip_rt_put(rt);
 424                         kfree_skb(skb2);
 425                         return 0;
 426                 }
 427         } else {
 428                 ip_rt_put(rt);
 429                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 430                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 431                         kfree_skb(skb2);
 432                         return 0;
 433                 }
 434         }
 435
 436         /* change mtu on this route */
 437         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 438                 if (n > dst_mtu(skb2->dst)) {
 439                         kfree_skb(skb2);
 440                         return 0;
 441                 }
 442                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 443         } else if (type == ICMP_TIME_EXCEEDED) {
 444                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 445                 if (t->parms.iph.ttl) {
 446                         rel_type = ICMP_DEST_UNREACH;
 447                         rel_code = ICMP_HOST_UNREACH;
 448                 }
 449         }
 450
 451         icmp_send(skb2, rel_type, rel_code, rel_info);
 452         kfree_skb(skb2);
 453         return 0;
 454 #endif
 455 }
 456
 457 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 458                                         struct sk_buff *skb)
 459 {
 460         struct iphdr *inner_iph = ip_hdr(skb);
 461
 462         if (INET_ECN_is_ce(outer_iph->tos))
 463                 IP_ECN_set_ce(inner_iph);
 464 }
 465
 466 static int ipip_rcv(struct sk_buff *skb)
 467 {
 468         struct ip_tunnel *tunnel;
 469         const struct iphdr *iph = ip_hdr(skb);
 470
 471         read_lock(&ipip_lock);
 472         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 473                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 474                         read_unlock(&ipip_lock);
 475                         kfree_skb(skb);
 476                         return 0;
 477                 }
 478
 479                 secpath_reset(skb);
 480
 481                 skb->mac_header = skb->network_header;
 482                 skb_reset_network_header(skb);
 483                 skb->protocol = htons(ETH_P_IP);
 484                 skb->pkt_type = PACKET_HOST;
 485
 486                 tunnel->stat.rx_packets++;
 487                 tunnel->stat.rx_bytes += skb->len;
 488                 skb->dev = tunnel->dev;
 489                 dst_release(skb->dst);
 490                 skb->dst = NULL;
 491                 nf_reset(skb);
 492                 ipip_ecn_decapsulate(iph, skb);
 493                 netif_rx(skb);
 494                 read_unlock(&ipip_lock);
 495                 return 0;
 496         }
 497         read_unlock(&ipip_lock);
 498
 499         return -1;
 500 }
 501
 502 /*
 503  *      This function assumes it is being called from dev_queue_xmit()
 504  *      and that skb is filled properly by that function.
 505  */
 506
 507 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 508 {
 509         struct ip_tunnel *tunnel = netdev_priv(dev);
 510         struct net_device_stats *stats = &tunnel->stat;
 511         struct iphdr  *tiph = &tunnel->parms.iph;
 512         u8     tos = tunnel->parms.iph.tos;
 513         __be16 df = tiph->frag_off;
 514         struct rtable *rt;                      /* Route to the other host */
 515         struct net_device *tdev;                        /* Device to other host */
 516         struct iphdr  *old_iph = ip_hdr(skb);
 517         struct iphdr  *iph;                     /* Our new IP header */
 518         unsigned int max_headroom;              /* The extra header space needed */
 519         __be32 dst = tiph->daddr;
 520         int    mtu;
 521
 522         if (tunnel->recursion++) {
 523                 tunnel->stat.collisions++;
 524                 goto tx_error;
 525         }
 526
 527         if (skb->protocol != htons(ETH_P_IP))
 528                 goto tx_error;
 529
 530         if (tos&1)
 531                 tos = old_iph->tos;
 532
 533         if (!dst) {
 534                 /* NBMA tunnel */
 535                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 536                         tunnel->stat.tx_fifo_errors++;
 537                         goto tx_error;
 538                 }
 539                 if ((dst = rt->rt_gateway) == 0)
 540                         goto tx_error_icmp;
 541         }
 542
 543         {
 544                 struct flowi fl = { .oif = tunnel->parms.link,
 545                                     .nl_u = { .ip4_u =
 546                                               { .daddr = dst,
 547                                                 .saddr = tiph->saddr,
 548                                                 .tos = RT_TOS(tos) } },
 549                                     .proto = IPPROTO_IPIP };
 550                 if (ip_route_output_key(&init_net, &rt, &fl)) {
 551                         tunnel->stat.tx_carrier_errors++;
 552                         goto tx_error_icmp;
 553                 }
 554         }
 555         tdev = rt->u.dst.dev;
 556
 557         if (tdev == dev) {
 558                 ip_rt_put(rt);
 559                 tunnel->stat.collisions++;
 560                 goto tx_error;
 561         }
 562
 563         if (tiph->frag_off)
 564                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 565         else
 566                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 567
 568         if (mtu < 68) {
 569                 tunnel->stat.collisions++;
 570                 ip_rt_put(rt);
 571                 goto tx_error;
 572         }
 573         if (skb->dst)
 574                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 575
 576         df |= (old_iph->frag_off&htons(IP_DF));
 577
 578         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 579                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 580                 ip_rt_put(rt);
 581                 goto tx_error;
 582         }
 583
 584         if (tunnel->err_count > 0) {
 585                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 586                         tunnel->err_count--;
 587                         dst_link_failure(skb);
 588                 } else
 589                         tunnel->err_count = 0;
 590         }
 591
 592         /*
 593          * Okay, now see if we can stuff it in the buffer as-is.
 594          */
 595         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 596
 597         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 598             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 599                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 600                 if (!new_skb) {
 601                         ip_rt_put(rt);
 602                         stats->tx_dropped++;
 603                         dev_kfree_skb(skb);
 604                         tunnel->recursion--;
 605                         return 0;
 606                 }
 607                 if (skb->sk)
 608                         skb_set_owner_w(new_skb, skb->sk);
 609                 dev_kfree_skb(skb);
 610                 skb = new_skb;
 611                 old_iph = ip_hdr(skb);
 612         }
 613
 614         skb->transport_header = skb->network_header;
 615         skb_push(skb, sizeof(struct iphdr));
 616         skb_reset_network_header(skb);
 617         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 618         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 619                               IPSKB_REROUTED);
 620         dst_release(skb->dst);
 621         skb->dst = &rt->u.dst;
 622
 623         /*
 624          *      Push down and install the IPIP header.
 625          */
 626
 627         iph                     =       ip_hdr(skb);
 628         iph->version            =       4;
 629         iph->ihl                =       sizeof(struct iphdr)>>2;
 630         iph->frag_off           =       df;
 631         iph->protocol           =       IPPROTO_IPIP;
 632         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 633         iph->daddr              =       rt->rt_dst;
 634         iph->saddr              =       rt->rt_src;
 635
 636         if ((iph->ttl = tiph->ttl) == 0)
 637                 iph->ttl        =       old_iph->ttl;
 638
 639         nf_reset(skb);
 640
 641         IPTUNNEL_XMIT();
 642         tunnel->recursion--;
 643         return 0;
 644
 645 tx_error_icmp:
 646         dst_link_failure(skb);
 647 tx_error:
 648         stats->tx_errors++;
 649         dev_kfree_skb(skb);
 650         tunnel->recursion--;
 651         return 0;
 652 }
 653
 654 static void ipip_tunnel_bind_dev(struct net_device *dev)
 655 {
 656         struct net_device *tdev = NULL;
 657         struct ip_tunnel *tunnel;
 658         struct iphdr *iph;
 659
 660         tunnel = netdev_priv(dev);
 661         iph = &tunnel->parms.iph;
 662
 663         if (iph->daddr) {
 664                 struct flowi fl = { .oif = tunnel->parms.link,
 665                                     .nl_u = { .ip4_u =
 666                                               { .daddr = iph->daddr,
 667                                                 .saddr = iph->saddr,
 668                                                 .tos = RT_TOS(iph->tos) } },
 669                                     .proto = IPPROTO_IPIP };
 670                 struct rtable *rt;
 671                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
 672                         tdev = rt->u.dst.dev;
 673                         ip_rt_put(rt);
 674                 }
 675                 dev->flags |= IFF_POINTOPOINT;
 676         }
 677
 678         if (!tdev && tunnel->parms.link)
 679                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
 680
 681         if (tdev) {
 682                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 683                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 684         }
 685         dev->iflink = tunnel->parms.link;
 686 }
 687
 688 static int
 689 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 690 {
 691         int err = 0;
 692         struct ip_tunnel_parm p;
 693         struct ip_tunnel *t;
 694
 695         switch (cmd) {
 696         case SIOCGETTUNNEL:
 697                 t = NULL;
 698                 if (dev == ipip_fb_tunnel_dev) {
 699                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 700                                 err = -EFAULT;
 701                                 break;
 702                         }
 703                         t = ipip_tunnel_locate(&p, 0);
 704                 }
 705                 if (t == NULL)
 706                         t = netdev_priv(dev);
 707                 memcpy(&p, &t->parms, sizeof(p));
 708                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 709                         err = -EFAULT;
 710                 break;
 711
 712         case SIOCADDTUNNEL:
 713         case SIOCCHGTUNNEL:
 714                 err = -EPERM;
 715                 if (!capable(CAP_NET_ADMIN))
 716                         goto done;
 717
 718                 err = -EFAULT;
 719                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 720                         goto done;
 721
 722                 err = -EINVAL;
 723                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 724                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 725                         goto done;
 726                 if (p.iph.ttl)
 727                         p.iph.frag_off |= htons(IP_DF);
 728
 729                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 730
 731                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 732                         if (t != NULL) {
 733                                 if (t->dev != dev) {
 734                                         err = -EEXIST;
 735                                         break;
 736                                 }
 737                         } else {
 738                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 739                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 740                                         err = -EINVAL;
 741                                         break;
 742                                 }
 743                                 t = netdev_priv(dev);
 744                                 ipip_tunnel_unlink(t);
 745                                 t->parms.iph.saddr = p.iph.saddr;
 746                                 t->parms.iph.daddr = p.iph.daddr;
 747                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 748                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 749                                 ipip_tunnel_link(t);
 750                                 netdev_state_change(dev);
 751                         }
 752                 }
 753
 754                 if (t) {
 755                         err = 0;
 756                         if (cmd == SIOCCHGTUNNEL) {
 757                                 t->parms.iph.ttl = p.iph.ttl;
 758                                 t->parms.iph.tos = p.iph.tos;
 759                                 t->parms.iph.frag_off = p.iph.frag_off;
 760                                 if (t->parms.link != p.link) {
 761                                         t->parms.link = p.link;
 762                                         ipip_tunnel_bind_dev(dev);
 763                                         netdev_state_change(dev);
 764                                 }
 765                         }
 766                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 767                                 err = -EFAULT;
 768                 } else
 769                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 770                 break;
 771
 772         case SIOCDELTUNNEL:
 773                 err = -EPERM;
 774                 if (!capable(CAP_NET_ADMIN))
 775                         goto done;
 776
 777                 if (dev == ipip_fb_tunnel_dev) {
 778                         err = -EFAULT;
 779                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 780                                 goto done;
 781                         err = -ENOENT;
 782                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 783                                 goto done;
 784                         err = -EPERM;
 785                         if (t->dev == ipip_fb_tunnel_dev)
 786                                 goto done;
 787                         dev = t->dev;
 788                 }
 789                 unregister_netdevice(dev);
 790                 err = 0;
 791                 break;
 792
 793         default:
 794                 err = -EINVAL;
 795         }
 796
 797 done:
 798         return err;
 799 }
 800
 801 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 802 {
 803         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 804 }
 805
 806 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 807 {
 808         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 809                 return -EINVAL;
 810         dev->mtu = new_mtu;
 811         return 0;
 812 }
 813
 814 static void ipip_tunnel_setup(struct net_device *dev)
 815 {
 816         dev->uninit             = ipip_tunnel_uninit;
 817         dev->hard_start_xmit    = ipip_tunnel_xmit;
 818         dev->get_stats          = ipip_tunnel_get_stats;
 819         dev->do_ioctl           = ipip_tunnel_ioctl;
 820         dev->change_mtu         = ipip_tunnel_change_mtu;
 821         dev->destructor         = free_netdev;
 822
 823         dev->type               = ARPHRD_TUNNEL;
 824         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 825         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 826         dev->flags              = IFF_NOARP;
 827         dev->iflink             = 0;
 828         dev->addr_len           = 4;
 829 }
 830
 831 static int ipip_tunnel_init(struct net_device *dev)
 832 {
 833         struct ip_tunnel *tunnel;
 834
 835         tunnel = netdev_priv(dev);
 836
 837         tunnel->dev = dev;
 838         strcpy(tunnel->parms.name, dev->name);
 839
 840         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 841         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 842
 843         ipip_tunnel_bind_dev(dev);
 844
 845         return 0;
 846 }
 847
 848 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 849 {
 850         struct ip_tunnel *tunnel = netdev_priv(dev);
 851         struct iphdr *iph = &tunnel->parms.iph;
 852
 853         tunnel->dev = dev;
 854         strcpy(tunnel->parms.name, dev->name);
 855
 856         iph->version            = 4;
 857         iph->protocol           = IPPROTO_IPIP;
 858         iph->ihl                = 5;
 859
 860         dev_hold(dev);
 861         tunnels_wc[0]           = tunnel;
 862         return 0;
 863 }
 864
 865 static struct xfrm_tunnel ipip_handler = {
 866         .handler        =       ipip_rcv,
 867         .err_handler    =       ipip_err,
 868         .priority       =       1,
 869 };
 870
 871 static char banner[] __initdata =
 872         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 873
 874 static int __init ipip_init(void)
 875 {
 876         int err;
 877
 878         printk(banner);
 879
 880         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
 881                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 882                 return -EAGAIN;
 883         }
 884
 885         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 886                                            "tunl0",
 887                                            ipip_tunnel_setup);
 888         if (!ipip_fb_tunnel_dev) {
 889                 err = -ENOMEM;
 890                 goto err1;
 891         }
 892
 893         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 894
 895         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 896                 goto err2;
 897  out:
 898         return err;
 899  err2:
 900         free_netdev(ipip_fb_tunnel_dev);
 901  err1:
 902         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 903         goto out;
 904 }
 905
 906 static void __exit ipip_destroy_tunnels(void)
 907 {
 908         int prio;
 909
 910         for (prio = 1; prio < 4; prio++) {
 911                 int h;
 912                 for (h = 0; h < HASH_SIZE; h++) {
 913                         struct ip_tunnel *t;
 914                         while ((t = tunnels[prio][h]) != NULL)
 915                                 unregister_netdevice(t->dev);
 916                 }
 917         }
 918 }
 919
 920 static void __exit ipip_fini(void)
 921 {
 922         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 923                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 924
 925         rtnl_lock();
 926         ipip_destroy_tunnels();
 927         unregister_netdevice(ipip_fb_tunnel_dev);
 928         rtnl_unlock();
 929 }
 930
 931 module_init(ipip_init);
 932 module_exit(ipip_fini);
 933 MODULE_LICENSE("GPL");