net/ipv4/ipmr.c

   1 /*
   2  *      IP multicast routing support for mrouted 3.6/3.8
   3  *
   4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *        Linux Consultancy and Custom Driver Development
   6  *
   7  *      This program is free software; you can redistribute it and/or
   8  *      modify it under the terms of the GNU General Public License
   9  *      as published by the Free Software Foundation; either version
  10  *      2 of the License, or (at your option) any later version.
  11  *
  12  *      Fixes:
  13  *      Michael Chastain        :       Incorrect size of copying.
  14  *      Alan Cox                :       Added the cache manager code
  15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
  16  *      Mike McLagan            :       Routing by source
  17  *      Malcolm Beattie         :       Buffer handling fixes.
  18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  19  *      SVR Anand               :       Fixed several multicast bugs and problems.
  20  *      Alexey Kuznetsov        :       Status, optimisations and more.
  21  *      Brad Parker             :       Better behaviour on mrouted upcall
  22  *                                      overflow.
  23  *      Carlos Picoto           :       PIMv1 Support
  24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  25  *                                      Relax this requrement to work with older peers.
  26  *
  27  */
  28
  29 #include <asm/system.h>
  30 #include <asm/uaccess.h>
  31 #include <linux/types.h>
  32 #include <linux/capability.h>
  33 #include <linux/errno.h>
  34 #include <linux/timer.h>
  35 #include <linux/mm.h>
  36 #include <linux/kernel.h>
  37 #include <linux/fcntl.h>
  38 #include <linux/stat.h>
  39 #include <linux/socket.h>
  40 #include <linux/in.h>
  41 #include <linux/inet.h>
  42 #include <linux/netdevice.h>
  43 #include <linux/inetdevice.h>
  44 #include <linux/igmp.h>
  45 #include <linux/proc_fs.h>
  46 #include <linux/seq_file.h>
  47 #include <linux/mroute.h>
  48 #include <linux/init.h>
  49 #include <linux/if_ether.h>
  50 #include <net/net_namespace.h>
  51 #include <net/ip.h>
  52 #include <net/protocol.h>
  53 #include <linux/skbuff.h>
  54 #include <net/route.h>
  55 #include <net/sock.h>
  56 #include <net/icmp.h>
  57 #include <net/udp.h>
  58 #include <net/raw.h>
  59 #include <linux/notifier.h>
  60 #include <linux/if_arp.h>
  61 #include <linux/netfilter_ipv4.h>
  62 #include <net/ipip.h>
  63 #include <net/checksum.h>
  64 #include <net/netlink.h>
  65
  66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  67 #define CONFIG_IP_PIMSM 1
  68 #endif
  69
  70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
  71    Note that the changes are semaphored via rtnl_lock.
  72  */
  73
  74 static DEFINE_RWLOCK(mrt_lock);
  75
  76 /*
  77  *      Multicast router control variables
  78  */
  79
  80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
  81
  82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  83
  84 /* Special spinlock for queue of unresolved entries */
  85 static DEFINE_SPINLOCK(mfc_unres_lock);
  86
  87 /* We return to original Alan's scheme. Hash table of resolved
  88    entries is changed only in process context and protected
  89    with weak lock mrt_lock. Queue of unresolved entries is protected
  90    with strong spinlock mfc_unres_lock.
  91
  92    In this case data path is free of exclusive locks at all.
  93  */
  94
  95 static struct kmem_cache *mrt_cachep __read_mostly;
  96
  97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  98 static int ipmr_cache_report(struct net *net,
  99                              struct sk_buff *pkt, vifi_t vifi, int assert);
 100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 101
 102 static struct timer_list ipmr_expire_timer;
 103
 104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 105
 106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 107 {
 108         struct net *net = dev_net(dev);
 109
 110         dev_close(dev);
 111
 112         dev = __dev_get_by_name(net, "tunl0");
 113         if (dev) {
 114                 const struct net_device_ops *ops = dev->netdev_ops;
 115                 struct ifreq ifr;
 116                 struct ip_tunnel_parm p;
 117
 118                 memset(&p, 0, sizeof(p));
 119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 121                 p.iph.version = 4;
 122                 p.iph.ihl = 5;
 123                 p.iph.protocol = IPPROTO_IPIP;
 124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 126
 127                 if (ops->ndo_do_ioctl) {
 128                         mm_segment_t oldfs = get_fs();
 129
 130                         set_fs(KERNEL_DS);
 131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 132                         set_fs(oldfs);
 133                 }
 134         }
 135 }
 136
 137 static
 138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 139 {
 140         struct net_device  *dev;
 141
 142         dev = __dev_get_by_name(net, "tunl0");
 143
 144         if (dev) {
 145                 const struct net_device_ops *ops = dev->netdev_ops;
 146                 int err;
 147                 struct ifreq ifr;
 148                 struct ip_tunnel_parm p;
 149                 struct in_device  *in_dev;
 150
 151                 memset(&p, 0, sizeof(p));
 152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 154                 p.iph.version = 4;
 155                 p.iph.ihl = 5;
 156                 p.iph.protocol = IPPROTO_IPIP;
 157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 159
 160                 if (ops->ndo_do_ioctl) {
 161                         mm_segment_t oldfs = get_fs();
 162
 163                         set_fs(KERNEL_DS);
 164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 165                         set_fs(oldfs);
 166                 } else
 167                         err = -EOPNOTSUPP;
 168
 169                 dev = NULL;
 170
 171                 if (err == 0 &&
 172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
 173                         dev->flags |= IFF_MULTICAST;
 174
 175                         in_dev = __in_dev_get_rtnl(dev);
 176                         if (in_dev == NULL)
 177                                 goto failure;
 178
 179                         ipv4_devconf_setall(in_dev);
 180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 181
 182                         if (dev_open(dev))
 183                                 goto failure;
 184                         dev_hold(dev);
 185                 }
 186         }
 187         return dev;
 188
 189 failure:
 190         /* allow the register to be completed before unregistering. */
 191         rtnl_unlock();
 192         rtnl_lock();
 193
 194         unregister_netdevice(dev);
 195         return NULL;
 196 }
 197
 198 #ifdef CONFIG_IP_PIMSM
 199
 200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 201 {
 202         struct net *net = dev_net(dev);
 203
 204         read_lock(&mrt_lock);
 205         dev->stats.tx_bytes += skb->len;
 206         dev->stats.tx_packets++;
 207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
 208                           IGMPMSG_WHOLEPKT);
 209         read_unlock(&mrt_lock);
 210         kfree_skb(skb);
 211         return NETDEV_TX_OK;
 212 }
 213
 214 static const struct net_device_ops reg_vif_netdev_ops = {
 215         .ndo_start_xmit = reg_vif_xmit,
 216 };
 217
 218 static void reg_vif_setup(struct net_device *dev)
 219 {
 220         dev->type               = ARPHRD_PIMREG;
 221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 222         dev->flags              = IFF_NOARP;
 223         dev->netdev_ops         = &reg_vif_netdev_ops,
 224         dev->destructor         = free_netdev;
 225         dev->features           |= NETIF_F_NETNS_LOCAL;
 226 }
 227
 228 static struct net_device *ipmr_reg_vif(struct net *net)
 229 {
 230         struct net_device *dev;
 231         struct in_device *in_dev;
 232
 233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 234
 235         if (dev == NULL)
 236                 return NULL;
 237
 238         dev_net_set(dev, net);
 239
 240         if (register_netdevice(dev)) {
 241                 free_netdev(dev);
 242                 return NULL;
 243         }
 244         dev->iflink = 0;
 245
 246         rcu_read_lock();
 247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 248                 rcu_read_unlock();
 249                 goto failure;
 250         }
 251
 252         ipv4_devconf_setall(in_dev);
 253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 254         rcu_read_unlock();
 255
 256         if (dev_open(dev))
 257                 goto failure;
 258
 259         dev_hold(dev);
 260
 261         return dev;
 262
 263 failure:
 264         /* allow the register to be completed before unregistering. */
 265         rtnl_unlock();
 266         rtnl_lock();
 267
 268         unregister_netdevice(dev);
 269         return NULL;
 270 }
 271 #endif
 272
 273 /*
 274  *      Delete a VIF entry
 275  *      @notify: Set to 1, if the caller is a notifier_call
 276  */
 277
 278 static int vif_delete(struct net *net, int vifi, int notify)
 279 {
 280         struct vif_device *v;
 281         struct net_device *dev;
 282         struct in_device *in_dev;
 283
 284         if (vifi < 0 || vifi >= net->ipv4.maxvif)
 285                 return -EADDRNOTAVAIL;
 286
 287         v = &net->ipv4.vif_table[vifi];
 288
 289         write_lock_bh(&mrt_lock);
 290         dev = v->dev;
 291         v->dev = NULL;
 292
 293         if (!dev) {
 294                 write_unlock_bh(&mrt_lock);
 295                 return -EADDRNOTAVAIL;
 296         }
 297
 298 #ifdef CONFIG_IP_PIMSM
 299         if (vifi == net->ipv4.mroute_reg_vif_num)
 300                 net->ipv4.mroute_reg_vif_num = -1;
 301 #endif
 302
 303         if (vifi+1 == net->ipv4.maxvif) {
 304                 int tmp;
 305                 for (tmp=vifi-1; tmp>=0; tmp--) {
 306                         if (VIF_EXISTS(net, tmp))
 307                                 break;
 308                 }
 309                 net->ipv4.maxvif = tmp+1;
 310         }
 311
 312         write_unlock_bh(&mrt_lock);
 313
 314         dev_set_allmulti(dev, -1);
 315
 316         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 317                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 318                 ip_rt_multicast_event(in_dev);
 319         }
 320
 321         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 322                 unregister_netdevice(dev);
 323
 324         dev_put(dev);
 325         return 0;
 326 }
 327
 328 static inline void ipmr_cache_free(struct mfc_cache *c)
 329 {
 330         release_net(mfc_net(c));
 331         kmem_cache_free(mrt_cachep, c);
 332 }
 333
 334 /* Destroy an unresolved cache entry, killing queued skbs
 335    and reporting error to netlink readers.
 336  */
 337
 338 static void ipmr_destroy_unres(struct mfc_cache *c)
 339 {
 340         struct sk_buff *skb;
 341         struct nlmsgerr *e;
 342         struct net *net = mfc_net(c);
 343
 344         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 345
 346         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 347                 if (ip_hdr(skb)->version == 0) {
 348                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 349                         nlh->nlmsg_type = NLMSG_ERROR;
 350                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 351                         skb_trim(skb, nlh->nlmsg_len);
 352                         e = NLMSG_DATA(nlh);
 353                         e->error = -ETIMEDOUT;
 354                         memset(&e->msg, 0, sizeof(e->msg));
 355
 356                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 357                 } else
 358                         kfree_skb(skb);
 359         }
 360
 361         ipmr_cache_free(c);
 362 }
 363
 364
 365 /* Single timer process for all the unresolved queue. */
 366
 367 static void ipmr_expire_process(unsigned long dummy)
 368 {
 369         unsigned long now;
 370         unsigned long expires;
 371         struct mfc_cache *c, **cp;
 372
 373         if (!spin_trylock(&mfc_unres_lock)) {
 374                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 375                 return;
 376         }
 377
 378         if (mfc_unres_queue == NULL)
 379                 goto out;
 380
 381         now = jiffies;
 382         expires = 10*HZ;
 383         cp = &mfc_unres_queue;
 384
 385         while ((c=*cp) != NULL) {
 386                 if (time_after(c->mfc_un.unres.expires, now)) {
 387                         unsigned long interval = c->mfc_un.unres.expires - now;
 388                         if (interval < expires)
 389                                 expires = interval;
 390                         cp = &c->next;
 391                         continue;
 392                 }
 393
 394                 *cp = c->next;
 395
 396                 ipmr_destroy_unres(c);
 397         }
 398
 399         if (mfc_unres_queue != NULL)
 400                 mod_timer(&ipmr_expire_timer, jiffies + expires);
 401
 402 out:
 403         spin_unlock(&mfc_unres_lock);
 404 }
 405
 406 /* Fill oifs list. It is called under write locked mrt_lock. */
 407
 408 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 409 {
 410         int vifi;
 411         struct net *net = mfc_net(cache);
 412
 413         cache->mfc_un.res.minvif = MAXVIFS;
 414         cache->mfc_un.res.maxvif = 0;
 415         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 416
 417         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
 418                 if (VIF_EXISTS(net, vifi) &&
 419                     ttls[vifi] && ttls[vifi] < 255) {
 420                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 421                         if (cache->mfc_un.res.minvif > vifi)
 422                                 cache->mfc_un.res.minvif = vifi;
 423                         if (cache->mfc_un.res.maxvif <= vifi)
 424                                 cache->mfc_un.res.maxvif = vifi + 1;
 425                 }
 426         }
 427 }
 428
 429 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 430 {
 431         int vifi = vifc->vifc_vifi;
 432         struct vif_device *v = &net->ipv4.vif_table[vifi];
 433         struct net_device *dev;
 434         struct in_device *in_dev;
 435         int err;
 436
 437         /* Is vif busy ? */
 438         if (VIF_EXISTS(net, vifi))
 439                 return -EADDRINUSE;
 440
 441         switch (vifc->vifc_flags) {
 442 #ifdef CONFIG_IP_PIMSM
 443         case VIFF_REGISTER:
 444                 /*
 445                  * Special Purpose VIF in PIM
 446                  * All the packets will be sent to the daemon
 447                  */
 448                 if (net->ipv4.mroute_reg_vif_num >= 0)
 449                         return -EADDRINUSE;
 450                 dev = ipmr_reg_vif(net);
 451                 if (!dev)
 452                         return -ENOBUFS;
 453                 err = dev_set_allmulti(dev, 1);
 454                 if (err) {
 455                         unregister_netdevice(dev);
 456                         dev_put(dev);
 457                         return err;
 458                 }
 459                 break;
 460 #endif
 461         case VIFF_TUNNEL:
 462                 dev = ipmr_new_tunnel(net, vifc);
 463                 if (!dev)
 464                         return -ENOBUFS;
 465                 err = dev_set_allmulti(dev, 1);
 466                 if (err) {
 467                         ipmr_del_tunnel(dev, vifc);
 468                         dev_put(dev);
 469                         return err;
 470                 }
 471                 break;
 472         case 0:
 473                 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 474                 if (!dev)
 475                         return -EADDRNOTAVAIL;
 476                 err = dev_set_allmulti(dev, 1);
 477                 if (err) {
 478                         dev_put(dev);
 479                         return err;
 480                 }
 481                 break;
 482         default:
 483                 return -EINVAL;
 484         }
 485
 486         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 487                 return -EADDRNOTAVAIL;
 488         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 489         ip_rt_multicast_event(in_dev);
 490
 491         /*
 492          *      Fill in the VIF structures
 493          */
 494         v->rate_limit = vifc->vifc_rate_limit;
 495         v->local = vifc->vifc_lcl_addr.s_addr;
 496         v->remote = vifc->vifc_rmt_addr.s_addr;
 497         v->flags = vifc->vifc_flags;
 498         if (!mrtsock)
 499                 v->flags |= VIFF_STATIC;
 500         v->threshold = vifc->vifc_threshold;
 501         v->bytes_in = 0;
 502         v->bytes_out = 0;
 503         v->pkt_in = 0;
 504         v->pkt_out = 0;
 505         v->link = dev->ifindex;
 506         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 507                 v->link = dev->iflink;
 508
 509         /* And finish update writing critical data */
 510         write_lock_bh(&mrt_lock);
 511         v->dev = dev;
 512 #ifdef CONFIG_IP_PIMSM
 513         if (v->flags&VIFF_REGISTER)
 514                 net->ipv4.mroute_reg_vif_num = vifi;
 515 #endif
 516         if (vifi+1 > net->ipv4.maxvif)
 517                 net->ipv4.maxvif = vifi+1;
 518         write_unlock_bh(&mrt_lock);
 519         return 0;
 520 }
 521
 522 static struct mfc_cache *ipmr_cache_find(struct net *net,
 523                                          __be32 origin,
 524                                          __be32 mcastgrp)
 525 {
 526         int line = MFC_HASH(mcastgrp, origin);
 527         struct mfc_cache *c;
 528
 529         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 530                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 531                         break;
 532         }
 533         return c;
 534 }
 535
 536 /*
 537  *      Allocate a multicast cache entry
 538  */
 539 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 540 {
 541         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 542         if (c == NULL)
 543                 return NULL;
 544         c->mfc_un.res.minvif = MAXVIFS;
 545         mfc_net_set(c, net);
 546         return c;
 547 }
 548
 549 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 550 {
 551         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 552         if (c == NULL)
 553                 return NULL;
 554         skb_queue_head_init(&c->mfc_un.unres.unresolved);
 555         c->mfc_un.unres.expires = jiffies + 10*HZ;
 556         mfc_net_set(c, net);
 557         return c;
 558 }
 559
 560 /*
 561  *      A cache entry has gone into a resolved state from queued
 562  */
 563
 564 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 565 {
 566         struct sk_buff *skb;
 567         struct nlmsgerr *e;
 568
 569         /*
 570          *      Play the pending entries through our router
 571          */
 572
 573         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 574                 if (ip_hdr(skb)->version == 0) {
 575                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 576
 577                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 578                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
 579                                                   (u8 *)nlh);
 580                         } else {
 581                                 nlh->nlmsg_type = NLMSG_ERROR;
 582                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 583                                 skb_trim(skb, nlh->nlmsg_len);
 584                                 e = NLMSG_DATA(nlh);
 585                                 e->error = -EMSGSIZE;
 586                                 memset(&e->msg, 0, sizeof(e->msg));
 587                         }
 588
 589                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 590                 } else
 591                         ip_mr_forward(skb, c, 0);
 592         }
 593 }
 594
 595 /*
 596  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 597  *      expects the following bizarre scheme.
 598  *
 599  *      Called under mrt_lock.
 600  */
 601
 602 static int ipmr_cache_report(struct net *net,
 603                              struct sk_buff *pkt, vifi_t vifi, int assert)
 604 {
 605         struct sk_buff *skb;
 606         const int ihl = ip_hdrlen(pkt);
 607         struct igmphdr *igmp;
 608         struct igmpmsg *msg;
 609         int ret;
 610
 611 #ifdef CONFIG_IP_PIMSM
 612         if (assert == IGMPMSG_WHOLEPKT)
 613                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 614         else
 615 #endif
 616                 skb = alloc_skb(128, GFP_ATOMIC);
 617
 618         if (!skb)
 619                 return -ENOBUFS;
 620
 621 #ifdef CONFIG_IP_PIMSM
 622         if (assert == IGMPMSG_WHOLEPKT) {
 623                 /* Ugly, but we have no choice with this interface.
 624                    Duplicate old header, fix ihl, length etc.
 625                    And all this only to mangle msg->im_msgtype and
 626                    to set msg->im_mbz to "mbz" :-)
 627                  */
 628                 skb_push(skb, sizeof(struct iphdr));
 629                 skb_reset_network_header(skb);
 630                 skb_reset_transport_header(skb);
 631                 msg = (struct igmpmsg *)skb_network_header(skb);
 632                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 633                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
 634                 msg->im_mbz = 0;
 635                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
 636                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 637                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 638                                              sizeof(struct iphdr));
 639         } else
 640 #endif
 641         {
 642
 643         /*
 644          *      Copy the IP header
 645          */
 646
 647         skb->network_header = skb->tail;
 648         skb_put(skb, ihl);
 649         skb_copy_to_linear_data(skb, pkt->data, ihl);
 650         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
 651         msg = (struct igmpmsg *)skb_network_header(skb);
 652         msg->im_vif = vifi;
 653         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 654
 655         /*
 656          *      Add our header
 657          */
 658
 659         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 660         igmp->type      =
 661         msg->im_msgtype = assert;
 662         igmp->code      =       0;
 663         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
 664         skb->transport_header = skb->network_header;
 665         }
 666
 667         if (net->ipv4.mroute_sk == NULL) {
 668                 kfree_skb(skb);
 669                 return -EINVAL;
 670         }
 671
 672         /*
 673          *      Deliver to mrouted
 674          */
 675         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 676         if (ret < 0) {
 677                 if (net_ratelimit())
 678                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 679                 kfree_skb(skb);
 680         }
 681
 682         return ret;
 683 }
 684
 685 /*
 686  *      Queue a packet for resolution. It gets locked cache entry!
 687  */
 688
 689 static int
 690 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 691 {
 692         int err;
 693         struct mfc_cache *c;
 694         const struct iphdr *iph = ip_hdr(skb);
 695
 696         spin_lock_bh(&mfc_unres_lock);
 697         for (c=mfc_unres_queue; c; c=c->next) {
 698                 if (net_eq(mfc_net(c), net) &&
 699                     c->mfc_mcastgrp == iph->daddr &&
 700                     c->mfc_origin == iph->saddr)
 701                         break;
 702         }
 703
 704         if (c == NULL) {
 705                 /*
 706                  *      Create a new entry if allowable
 707                  */
 708
 709                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
 710                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
 711                         spin_unlock_bh(&mfc_unres_lock);
 712
 713                         kfree_skb(skb);
 714                         return -ENOBUFS;
 715                 }
 716
 717                 /*
 718                  *      Fill in the new cache entry
 719                  */
 720                 c->mfc_parent   = -1;
 721                 c->mfc_origin   = iph->saddr;
 722                 c->mfc_mcastgrp = iph->daddr;
 723
 724                 /*
 725                  *      Reflect first query at mrouted.
 726                  */
 727                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
 728                 if (err < 0) {
 729                         /* If the report failed throw the cache entry
 730                            out - Brad Parker
 731                          */
 732                         spin_unlock_bh(&mfc_unres_lock);
 733
 734                         ipmr_cache_free(c);
 735                         kfree_skb(skb);
 736                         return err;
 737                 }
 738
 739                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
 740                 c->next = mfc_unres_queue;
 741                 mfc_unres_queue = c;
 742
 743                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 744         }
 745
 746         /*
 747          *      See if we can append the packet
 748          */
 749         if (c->mfc_un.unres.unresolved.qlen>3) {
 750                 kfree_skb(skb);
 751                 err = -ENOBUFS;
 752         } else {
 753                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 754                 err = 0;
 755         }
 756
 757         spin_unlock_bh(&mfc_unres_lock);
 758         return err;
 759 }
 760
 761 /*
 762  *      MFC cache manipulation by user space mroute daemon
 763  */
 764
 765 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 766 {
 767         int line;
 768         struct mfc_cache *c, **cp;
 769
 770         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 771
 772         for (cp = &net->ipv4.mfc_cache_array[line];
 773              (c = *cp) != NULL; cp = &c->next) {
 774                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 775                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 776                         write_lock_bh(&mrt_lock);
 777                         *cp = c->next;
 778                         write_unlock_bh(&mrt_lock);
 779
 780                         ipmr_cache_free(c);
 781                         return 0;
 782                 }
 783         }
 784         return -ENOENT;
 785 }
 786
 787 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 788 {
 789         int line;
 790         struct mfc_cache *uc, *c, **cp;
 791
 792         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 793
 794         for (cp = &net->ipv4.mfc_cache_array[line];
 795              (c = *cp) != NULL; cp = &c->next) {
 796                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 797                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 798                         break;
 799         }
 800
 801         if (c != NULL) {
 802                 write_lock_bh(&mrt_lock);
 803                 c->mfc_parent = mfc->mfcc_parent;
 804                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
 805                 if (!mrtsock)
 806                         c->mfc_flags |= MFC_STATIC;
 807                 write_unlock_bh(&mrt_lock);
 808                 return 0;
 809         }
 810
 811         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 812                 return -EINVAL;
 813
 814         c = ipmr_cache_alloc(net);
 815         if (c == NULL)
 816                 return -ENOMEM;
 817
 818         c->mfc_origin = mfc->mfcc_origin.s_addr;
 819         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 820         c->mfc_parent = mfc->mfcc_parent;
 821         ipmr_update_thresholds(c, mfc->mfcc_ttls);
 822         if (!mrtsock)
 823                 c->mfc_flags |= MFC_STATIC;
 824
 825         write_lock_bh(&mrt_lock);
 826         c->next = net->ipv4.mfc_cache_array[line];
 827         net->ipv4.mfc_cache_array[line] = c;
 828         write_unlock_bh(&mrt_lock);
 829
 830         /*
 831          *      Check to see if we resolved a queued list. If so we
 832          *      need to send on the frames and tidy up.
 833          */
 834         spin_lock_bh(&mfc_unres_lock);
 835         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 836              cp = &uc->next) {
 837                 if (net_eq(mfc_net(uc), net) &&
 838                     uc->mfc_origin == c->mfc_origin &&
 839                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 840                         *cp = uc->next;
 841                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 842                         break;
 843                 }
 844         }
 845         if (mfc_unres_queue == NULL)
 846                 del_timer(&ipmr_expire_timer);
 847         spin_unlock_bh(&mfc_unres_lock);
 848
 849         if (uc) {
 850                 ipmr_cache_resolve(uc, c);
 851                 ipmr_cache_free(uc);
 852         }
 853         return 0;
 854 }
 855
 856 /*
 857  *      Close the multicast socket, and clear the vif tables etc
 858  */
 859
 860 static void mroute_clean_tables(struct net *net)
 861 {
 862         int i;
 863
 864         /*
 865          *      Shut down all active vif entries
 866          */
 867         for (i = 0; i < net->ipv4.maxvif; i++) {
 868                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
 869                         vif_delete(net, i, 0);
 870         }
 871
 872         /*
 873          *      Wipe the cache
 874          */
 875         for (i=0; i<MFC_LINES; i++) {
 876                 struct mfc_cache *c, **cp;
 877
 878                 cp = &net->ipv4.mfc_cache_array[i];
 879                 while ((c = *cp) != NULL) {
 880                         if (c->mfc_flags&MFC_STATIC) {
 881                                 cp = &c->next;
 882                                 continue;
 883                         }
 884                         write_lock_bh(&mrt_lock);
 885                         *cp = c->next;
 886                         write_unlock_bh(&mrt_lock);
 887
 888                         ipmr_cache_free(c);
 889                 }
 890         }
 891
 892         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 893                 struct mfc_cache *c, **cp;
 894
 895                 spin_lock_bh(&mfc_unres_lock);
 896                 cp = &mfc_unres_queue;
 897                 while ((c = *cp) != NULL) {
 898                         if (!net_eq(mfc_net(c), net)) {
 899                                 cp = &c->next;
 900                                 continue;
 901                         }
 902                         *cp = c->next;
 903
 904                         ipmr_destroy_unres(c);
 905                 }
 906                 spin_unlock_bh(&mfc_unres_lock);
 907         }
 908 }
 909
 910 static void mrtsock_destruct(struct sock *sk)
 911 {
 912         struct net *net = sock_net(sk);
 913
 914         rtnl_lock();
 915         if (sk == net->ipv4.mroute_sk) {
 916                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 917
 918                 write_lock_bh(&mrt_lock);
 919                 net->ipv4.mroute_sk = NULL;
 920                 write_unlock_bh(&mrt_lock);
 921
 922                 mroute_clean_tables(net);
 923         }
 924         rtnl_unlock();
 925 }
 926
 927 /*
 928  *      Socket options and virtual interface manipulation. The whole
 929  *      virtual interface system is a complete heap, but unfortunately
 930  *      that's how BSD mrouted happens to think. Maybe one day with a proper
 931  *      MOSPF/PIM router set up we can clean this up.
 932  */
 933
 934 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 935 {
 936         int ret;
 937         struct vifctl vif;
 938         struct mfcctl mfc;
 939         struct net *net = sock_net(sk);
 940
 941         if (optname != MRT_INIT) {
 942                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 943                         return -EACCES;
 944         }
 945
 946         switch (optname) {
 947         case MRT_INIT:
 948                 if (sk->sk_type != SOCK_RAW ||
 949                     inet_sk(sk)->num != IPPROTO_IGMP)
 950                         return -EOPNOTSUPP;
 951                 if (optlen != sizeof(int))
 952                         return -ENOPROTOOPT;
 953
 954                 rtnl_lock();
 955                 if (net->ipv4.mroute_sk) {
 956                         rtnl_unlock();
 957                         return -EADDRINUSE;
 958                 }
 959
 960                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
 961                 if (ret == 0) {
 962                         write_lock_bh(&mrt_lock);
 963                         net->ipv4.mroute_sk = sk;
 964                         write_unlock_bh(&mrt_lock);
 965
 966                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 967                 }
 968                 rtnl_unlock();
 969                 return ret;
 970         case MRT_DONE:
 971                 if (sk != net->ipv4.mroute_sk)
 972                         return -EACCES;
 973                 return ip_ra_control(sk, 0, NULL);
 974         case MRT_ADD_VIF:
 975         case MRT_DEL_VIF:
 976                 if (optlen != sizeof(vif))
 977                         return -EINVAL;
 978                 if (copy_from_user(&vif, optval, sizeof(vif)))
 979                         return -EFAULT;
 980                 if (vif.vifc_vifi >= MAXVIFS)
 981                         return -ENFILE;
 982                 rtnl_lock();
 983                 if (optname == MRT_ADD_VIF) {
 984                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
 985                 } else {
 986                         ret = vif_delete(net, vif.vifc_vifi, 0);
 987                 }
 988                 rtnl_unlock();
 989                 return ret;
 990
 991                 /*
 992                  *      Manipulate the forwarding caches. These live
 993                  *      in a sort of kernel/user symbiosis.
 994                  */
 995         case MRT_ADD_MFC:
 996         case MRT_DEL_MFC:
 997                 if (optlen != sizeof(mfc))
 998                         return -EINVAL;
 999                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1000                         return -EFAULT;
1001                 rtnl_lock();
1002                 if (optname == MRT_DEL_MFC)
1003                         ret = ipmr_mfc_delete(net, &mfc);
1004                 else
1005                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1006                 rtnl_unlock();
1007                 return ret;
1008                 /*
1009                  *      Control PIM assert.
1010                  */
1011         case MRT_ASSERT:
1012         {
1013                 int v;
1014                 if (get_user(v,(int __user *)optval))
1015                         return -EFAULT;
1016                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1017                 return 0;
1018         }
1019 #ifdef CONFIG_IP_PIMSM
1020         case MRT_PIM:
1021         {
1022                 int v;
1023
1024                 if (get_user(v,(int __user *)optval))
1025                         return -EFAULT;
1026                 v = (v) ? 1 : 0;
1027
1028                 rtnl_lock();
1029                 ret = 0;
1030                 if (v != net->ipv4.mroute_do_pim) {
1031                         net->ipv4.mroute_do_pim = v;
1032                         net->ipv4.mroute_do_assert = v;
1033                 }
1034                 rtnl_unlock();
1035                 return ret;
1036         }
1037 #endif
1038         /*
1039          *      Spurious command, or MRT_VERSION which you cannot
1040          *      set.
1041          */
1042         default:
1043                 return -ENOPROTOOPT;
1044         }
1045 }
1046
1047 /*
1048  *      Getsock opt support for the multicast routing system.
1049  */
1050
1051 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1052 {
1053         int olr;
1054         int val;
1055         struct net *net = sock_net(sk);
1056
1057         if (optname != MRT_VERSION &&
1058 #ifdef CONFIG_IP_PIMSM
1059            optname!=MRT_PIM &&
1060 #endif
1061            optname!=MRT_ASSERT)
1062                 return -ENOPROTOOPT;
1063
1064         if (get_user(olr, optlen))
1065                 return -EFAULT;
1066
1067         olr = min_t(unsigned int, olr, sizeof(int));
1068         if (olr < 0)
1069                 return -EINVAL;
1070
1071         if (put_user(olr, optlen))
1072                 return -EFAULT;
1073         if (optname == MRT_VERSION)
1074                 val = 0x0305;
1075 #ifdef CONFIG_IP_PIMSM
1076         else if (optname == MRT_PIM)
1077                 val = net->ipv4.mroute_do_pim;
1078 #endif
1079         else
1080                 val = net->ipv4.mroute_do_assert;
1081         if (copy_to_user(optval, &val, olr))
1082                 return -EFAULT;
1083         return 0;
1084 }
1085
1086 /*
1087  *      The IP multicast ioctl support routines.
1088  */
1089
1090 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1091 {
1092         struct sioc_sg_req sr;
1093         struct sioc_vif_req vr;
1094         struct vif_device *vif;
1095         struct mfc_cache *c;
1096         struct net *net = sock_net(sk);
1097
1098         switch (cmd) {
1099         case SIOCGETVIFCNT:
1100                 if (copy_from_user(&vr, arg, sizeof(vr)))
1101                         return -EFAULT;
1102                 if (vr.vifi >= net->ipv4.maxvif)
1103                         return -EINVAL;
1104                 read_lock(&mrt_lock);
1105                 vif = &net->ipv4.vif_table[vr.vifi];
1106                 if (VIF_EXISTS(net, vr.vifi)) {
1107                         vr.icount = vif->pkt_in;
1108                         vr.ocount = vif->pkt_out;
1109                         vr.ibytes = vif->bytes_in;
1110                         vr.obytes = vif->bytes_out;
1111                         read_unlock(&mrt_lock);
1112
1113                         if (copy_to_user(arg, &vr, sizeof(vr)))
1114                                 return -EFAULT;
1115                         return 0;
1116                 }
1117                 read_unlock(&mrt_lock);
1118                 return -EADDRNOTAVAIL;
1119         case SIOCGETSGCNT:
1120                 if (copy_from_user(&sr, arg, sizeof(sr)))
1121                         return -EFAULT;
1122
1123                 read_lock(&mrt_lock);
1124                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1125                 if (c) {
1126                         sr.pktcnt = c->mfc_un.res.pkt;
1127                         sr.bytecnt = c->mfc_un.res.bytes;
1128                         sr.wrong_if = c->mfc_un.res.wrong_if;
1129                         read_unlock(&mrt_lock);
1130
1131                         if (copy_to_user(arg, &sr, sizeof(sr)))
1132                                 return -EFAULT;
1133                         return 0;
1134                 }
1135                 read_unlock(&mrt_lock);
1136                 return -EADDRNOTAVAIL;
1137         default:
1138                 return -ENOIOCTLCMD;
1139         }
1140 }
1141
1142
1143 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1144 {
1145         struct net_device *dev = ptr;
1146         struct net *net = dev_net(dev);
1147         struct vif_device *v;
1148         int ct;
1149
1150         if (!net_eq(dev_net(dev), net))
1151                 return NOTIFY_DONE;
1152
1153         if (event != NETDEV_UNREGISTER)
1154                 return NOTIFY_DONE;
1155         v = &net->ipv4.vif_table[0];
1156         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1157                 if (v->dev == dev)
1158                         vif_delete(net, ct, 1);
1159         }
1160         return NOTIFY_DONE;
1161 }
1162
1163
1164 static struct notifier_block ip_mr_notifier = {
1165         .notifier_call = ipmr_device_event,
1166 };
1167
1168 /*
1169  *      Encapsulate a packet by attaching a valid IPIP header to it.
1170  *      This avoids tunnel drivers and other mess and gives us the speed so
1171  *      important for multicast video.
1172  */
1173
1174 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1175 {
1176         struct iphdr *iph;
1177         struct iphdr *old_iph = ip_hdr(skb);
1178
1179         skb_push(skb, sizeof(struct iphdr));
1180         skb->transport_header = skb->network_header;
1181         skb_reset_network_header(skb);
1182         iph = ip_hdr(skb);
1183
1184         iph->version    =       4;
1185         iph->tos        =       old_iph->tos;
1186         iph->ttl        =       old_iph->ttl;
1187         iph->frag_off   =       0;
1188         iph->daddr      =       daddr;
1189         iph->saddr      =       saddr;
1190         iph->protocol   =       IPPROTO_IPIP;
1191         iph->ihl        =       5;
1192         iph->tot_len    =       htons(skb->len);
1193         ip_select_ident(iph, skb_dst(skb), NULL);
1194         ip_send_check(iph);
1195
1196         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1197         nf_reset(skb);
1198 }
1199
1200 static inline int ipmr_forward_finish(struct sk_buff *skb)
1201 {
1202         struct ip_options * opt = &(IPCB(skb)->opt);
1203
1204         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1205
1206         if (unlikely(opt->optlen))
1207                 ip_forward_options(skb);
1208
1209         return dst_output(skb);
1210 }
1211
1212 /*
1213  *      Processing handlers for ipmr_forward
1214  */
1215
1216 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1217 {
1218         struct net *net = mfc_net(c);
1219         const struct iphdr *iph = ip_hdr(skb);
1220         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1221         struct net_device *dev;
1222         struct rtable *rt;
1223         int    encap = 0;
1224
1225         if (vif->dev == NULL)
1226                 goto out_free;
1227
1228 #ifdef CONFIG_IP_PIMSM
1229         if (vif->flags & VIFF_REGISTER) {
1230                 vif->pkt_out++;
1231                 vif->bytes_out += skb->len;
1232                 vif->dev->stats.tx_bytes += skb->len;
1233                 vif->dev->stats.tx_packets++;
1234                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1235                 goto out_free;
1236         }
1237 #endif
1238
1239         if (vif->flags&VIFF_TUNNEL) {
1240                 struct flowi fl = { .oif = vif->link,
1241                                     .nl_u = { .ip4_u =
1242                                               { .daddr = vif->remote,
1243                                                 .saddr = vif->local,
1244                                                 .tos = RT_TOS(iph->tos) } },
1245                                     .proto = IPPROTO_IPIP };
1246                 if (ip_route_output_key(net, &rt, &fl))
1247                         goto out_free;
1248                 encap = sizeof(struct iphdr);
1249         } else {
1250                 struct flowi fl = { .oif = vif->link,
1251                                     .nl_u = { .ip4_u =
1252                                               { .daddr = iph->daddr,
1253                                                 .tos = RT_TOS(iph->tos) } },
1254                                     .proto = IPPROTO_IPIP };
1255                 if (ip_route_output_key(net, &rt, &fl))
1256                         goto out_free;
1257         }
1258
1259         dev = rt->u.dst.dev;
1260
1261         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1262                 /* Do not fragment multicasts. Alas, IPv4 does not
1263                    allow to send ICMP, so that packets will disappear
1264                    to blackhole.
1265                  */
1266
1267                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1268                 ip_rt_put(rt);
1269                 goto out_free;
1270         }
1271
1272         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1273
1274         if (skb_cow(skb, encap)) {
1275                 ip_rt_put(rt);
1276                 goto out_free;
1277         }
1278
1279         vif->pkt_out++;
1280         vif->bytes_out += skb->len;
1281
1282         skb_dst_drop(skb);
1283         skb_dst_set(skb, &rt->u.dst);
1284         ip_decrease_ttl(ip_hdr(skb));
1285
1286         /* FIXME: forward and output firewalls used to be called here.
1287          * What do we do with netfilter? -- RR */
1288         if (vif->flags & VIFF_TUNNEL) {
1289                 ip_encap(skb, vif->local, vif->remote);
1290                 /* FIXME: extra output firewall step used to be here. --RR */
1291                 vif->dev->stats.tx_packets++;
1292                 vif->dev->stats.tx_bytes += skb->len;
1293         }
1294
1295         IPCB(skb)->flags |= IPSKB_FORWARDED;
1296
1297         /*
1298          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1299          * not only before forwarding, but after forwarding on all output
1300          * interfaces. It is clear, if mrouter runs a multicasting
1301          * program, it should receive packets not depending to what interface
1302          * program is joined.
1303          * If we will not make it, the program will have to join on all
1304          * interfaces. On the other hand, multihoming host (or router, but
1305          * not mrouter) cannot join to more than one interface - it will
1306          * result in receiving multiple packets.
1307          */
1308         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1309                 ipmr_forward_finish);
1310         return;
1311
1312 out_free:
1313         kfree_skb(skb);
1314         return;
1315 }
1316
1317 static int ipmr_find_vif(struct net_device *dev)
1318 {
1319         struct net *net = dev_net(dev);
1320         int ct;
1321         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1322                 if (net->ipv4.vif_table[ct].dev == dev)
1323                         break;
1324         }
1325         return ct;
1326 }
1327
1328 /* "local" means that we should preserve one skb (for local delivery) */
1329
1330 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1331 {
1332         int psend = -1;
1333         int vif, ct;
1334         struct net *net = mfc_net(cache);
1335
1336         vif = cache->mfc_parent;
1337         cache->mfc_un.res.pkt++;
1338         cache->mfc_un.res.bytes += skb->len;
1339
1340         /*
1341          * Wrong interface: drop packet and (maybe) send PIM assert.
1342          */
1343         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1344                 int true_vifi;
1345
1346                 if (skb_rtable(skb)->fl.iif == 0) {
1347                         /* It is our own packet, looped back.
1348                            Very complicated situation...
1349
1350                            The best workaround until routing daemons will be
1351                            fixed is not to redistribute packet, if it was
1352                            send through wrong interface. It means, that
1353                            multicast applications WILL NOT work for
1354                            (S,G), which have default multicast route pointing
1355                            to wrong oif. In any case, it is not a good
1356                            idea to use multicasting applications on router.
1357                          */
1358                         goto dont_forward;
1359                 }
1360
1361                 cache->mfc_un.res.wrong_if++;
1362                 true_vifi = ipmr_find_vif(skb->dev);
1363
1364                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1365                     /* pimsm uses asserts, when switching from RPT to SPT,
1366                        so that we cannot check that packet arrived on an oif.
1367                        It is bad, but otherwise we would need to move pretty
1368                        large chunk of pimd to kernel. Ough... --ANK
1369                      */
1370                     (net->ipv4.mroute_do_pim ||
1371                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1372                     time_after(jiffies,
1373                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1374                         cache->mfc_un.res.last_assert = jiffies;
1375                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1376                 }
1377                 goto dont_forward;
1378         }
1379
1380         net->ipv4.vif_table[vif].pkt_in++;
1381         net->ipv4.vif_table[vif].bytes_in += skb->len;
1382
1383         /*
1384          *      Forward the frame
1385          */
1386         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1387                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1388                         if (psend != -1) {
1389                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1390                                 if (skb2)
1391                                         ipmr_queue_xmit(skb2, cache, psend);
1392                         }
1393                         psend = ct;
1394                 }
1395         }
1396         if (psend != -1) {
1397                 if (local) {
1398                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1399                         if (skb2)
1400                                 ipmr_queue_xmit(skb2, cache, psend);
1401                 } else {
1402                         ipmr_queue_xmit(skb, cache, psend);
1403                         return 0;
1404                 }
1405         }
1406
1407 dont_forward:
1408         if (!local)
1409                 kfree_skb(skb);
1410         return 0;
1411 }
1412
1413
1414 /*
1415  *      Multicast packets for forwarding arrive here
1416  */
1417
1418 int ip_mr_input(struct sk_buff *skb)
1419 {
1420         struct mfc_cache *cache;
1421         struct net *net = dev_net(skb->dev);
1422         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1423
1424         /* Packet is looped back after forward, it should not be
1425            forwarded second time, but still can be delivered locally.
1426          */
1427         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1428                 goto dont_forward;
1429
1430         if (!local) {
1431                     if (IPCB(skb)->opt.router_alert) {
1432                             if (ip_call_ra_chain(skb))
1433                                     return 0;
1434                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1435                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1436                                Cisco IOS <= 11.2(8)) do not put router alert
1437                                option to IGMP packets destined to routable
1438                                groups. It is very bad, because it means
1439                                that we can forward NO IGMP messages.
1440                              */
1441                             read_lock(&mrt_lock);
1442                             if (net->ipv4.mroute_sk) {
1443                                     nf_reset(skb);
1444                                     raw_rcv(net->ipv4.mroute_sk, skb);
1445                                     read_unlock(&mrt_lock);
1446                                     return 0;
1447                             }
1448                             read_unlock(&mrt_lock);
1449                     }
1450         }
1451
1452         read_lock(&mrt_lock);
1453         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1454
1455         /*
1456          *      No usable cache entry
1457          */
1458         if (cache == NULL) {
1459                 int vif;
1460
1461                 if (local) {
1462                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1463                         ip_local_deliver(skb);
1464                         if (skb2 == NULL) {
1465                                 read_unlock(&mrt_lock);
1466                                 return -ENOBUFS;
1467                         }
1468                         skb = skb2;
1469                 }
1470
1471                 vif = ipmr_find_vif(skb->dev);
1472                 if (vif >= 0) {
1473                         int err = ipmr_cache_unresolved(net, vif, skb);
1474                         read_unlock(&mrt_lock);
1475
1476                         return err;
1477                 }
1478                 read_unlock(&mrt_lock);
1479                 kfree_skb(skb);
1480                 return -ENODEV;
1481         }
1482
1483         ip_mr_forward(skb, cache, local);
1484
1485         read_unlock(&mrt_lock);
1486
1487         if (local)
1488                 return ip_local_deliver(skb);
1489
1490         return 0;
1491
1492 dont_forward:
1493         if (local)
1494                 return ip_local_deliver(skb);
1495         kfree_skb(skb);
1496         return 0;
1497 }
1498
1499 #ifdef CONFIG_IP_PIMSM
1500 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1501 {
1502         struct net_device *reg_dev = NULL;
1503         struct iphdr *encap;
1504         struct net *net = dev_net(skb->dev);
1505
1506         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1507         /*
1508            Check that:
1509            a. packet is really destinted to a multicast group
1510            b. packet is not a NULL-REGISTER
1511            c. packet is not truncated
1512          */
1513         if (!ipv4_is_multicast(encap->daddr) ||
1514             encap->tot_len == 0 ||
1515             ntohs(encap->tot_len) + pimlen > skb->len)
1516                 return 1;
1517
1518         read_lock(&mrt_lock);
1519         if (net->ipv4.mroute_reg_vif_num >= 0)
1520                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1521         if (reg_dev)
1522                 dev_hold(reg_dev);
1523         read_unlock(&mrt_lock);
1524
1525         if (reg_dev == NULL)
1526                 return 1;
1527
1528         skb->mac_header = skb->network_header;
1529         skb_pull(skb, (u8*)encap - skb->data);
1530         skb_reset_network_header(skb);
1531         skb->dev = reg_dev;
1532         skb->protocol = htons(ETH_P_IP);
1533         skb->ip_summed = 0;
1534         skb->pkt_type = PACKET_HOST;
1535         skb_dst_drop(skb);
1536         reg_dev->stats.rx_bytes += skb->len;
1537         reg_dev->stats.rx_packets++;
1538         nf_reset(skb);
1539         netif_rx(skb);
1540         dev_put(reg_dev);
1541
1542         return 0;
1543 }
1544 #endif
1545
1546 #ifdef CONFIG_IP_PIMSM_V1
1547 /*
1548  * Handle IGMP messages of PIMv1
1549  */
1550
1551 int pim_rcv_v1(struct sk_buff * skb)
1552 {
1553         struct igmphdr *pim;
1554         struct net *net = dev_net(skb->dev);
1555
1556         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1557                 goto drop;
1558
1559         pim = igmp_hdr(skb);
1560
1561         if (!net->ipv4.mroute_do_pim ||
1562             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1563                 goto drop;
1564
1565         if (__pim_rcv(skb, sizeof(*pim))) {
1566 drop:
1567                 kfree_skb(skb);
1568         }
1569         return 0;
1570 }
1571 #endif
1572
1573 #ifdef CONFIG_IP_PIMSM_V2
1574 static int pim_rcv(struct sk_buff * skb)
1575 {
1576         struct pimreghdr *pim;
1577
1578         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1579                 goto drop;
1580
1581         pim = (struct pimreghdr *)skb_transport_header(skb);
1582         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1583             (pim->flags&PIM_NULL_REGISTER) ||
1584             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1585              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1586                 goto drop;
1587
1588         if (__pim_rcv(skb, sizeof(*pim))) {
1589 drop:
1590                 kfree_skb(skb);
1591         }
1592         return 0;
1593 }
1594 #endif
1595
1596 static int
1597 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1598 {
1599         int ct;
1600         struct rtnexthop *nhp;
1601         struct net *net = mfc_net(c);
1602         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1603         u8 *b = skb_tail_pointer(skb);
1604         struct rtattr *mp_head;
1605
1606         if (dev)
1607                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1608
1609         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1610
1611         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1612                 if (c->mfc_un.res.ttls[ct] < 255) {
1613                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1614                                 goto rtattr_failure;
1615                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1616                         nhp->rtnh_flags = 0;
1617                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1618                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1619                         nhp->rtnh_len = sizeof(*nhp);
1620                 }
1621         }
1622         mp_head->rta_type = RTA_MULTIPATH;
1623         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1624         rtm->rtm_type = RTN_MULTICAST;
1625         return 1;
1626
1627 rtattr_failure:
1628         nlmsg_trim(skb, b);
1629         return -EMSGSIZE;
1630 }
1631
1632 int ipmr_get_route(struct net *net,
1633                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1634 {
1635         int err;
1636         struct mfc_cache *cache;
1637         struct rtable *rt = skb_rtable(skb);
1638
1639         read_lock(&mrt_lock);
1640         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1641
1642         if (cache == NULL) {
1643                 struct sk_buff *skb2;
1644                 struct iphdr *iph;
1645                 struct net_device *dev;
1646                 int vif;
1647
1648                 if (nowait) {
1649                         read_unlock(&mrt_lock);
1650                         return -EAGAIN;
1651                 }
1652
1653                 dev = skb->dev;
1654                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1655                         read_unlock(&mrt_lock);
1656                         return -ENODEV;
1657                 }
1658                 skb2 = skb_clone(skb, GFP_ATOMIC);
1659                 if (!skb2) {
1660                         read_unlock(&mrt_lock);
1661                         return -ENOMEM;
1662                 }
1663
1664                 skb_push(skb2, sizeof(struct iphdr));
1665                 skb_reset_network_header(skb2);
1666                 iph = ip_hdr(skb2);
1667                 iph->ihl = sizeof(struct iphdr) >> 2;
1668                 iph->saddr = rt->rt_src;
1669                 iph->daddr = rt->rt_dst;
1670                 iph->version = 0;
1671                 err = ipmr_cache_unresolved(net, vif, skb2);
1672                 read_unlock(&mrt_lock);
1673                 return err;
1674         }
1675
1676         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1677                 cache->mfc_flags |= MFC_NOTIFY;
1678         err = ipmr_fill_mroute(skb, cache, rtm);
1679         read_unlock(&mrt_lock);
1680         return err;
1681 }
1682
1683 #ifdef CONFIG_PROC_FS
1684 /*
1685  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1686  */
1687 struct ipmr_vif_iter {
1688         struct seq_net_private p;
1689         int ct;
1690 };
1691
1692 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1693                                            struct ipmr_vif_iter *iter,
1694                                            loff_t pos)
1695 {
1696         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1697                 if (!VIF_EXISTS(net, iter->ct))
1698                         continue;
1699                 if (pos-- == 0)
1700                         return &net->ipv4.vif_table[iter->ct];
1701         }
1702         return NULL;
1703 }
1704
1705 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1706         __acquires(mrt_lock)
1707 {
1708         struct net *net = seq_file_net(seq);
1709
1710         read_lock(&mrt_lock);
1711         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1712                 : SEQ_START_TOKEN;
1713 }
1714
1715 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1716 {
1717         struct ipmr_vif_iter *iter = seq->private;
1718         struct net *net = seq_file_net(seq);
1719
1720         ++*pos;
1721         if (v == SEQ_START_TOKEN)
1722                 return ipmr_vif_seq_idx(net, iter, 0);
1723
1724         while (++iter->ct < net->ipv4.maxvif) {
1725                 if (!VIF_EXISTS(net, iter->ct))
1726                         continue;
1727                 return &net->ipv4.vif_table[iter->ct];
1728         }
1729         return NULL;
1730 }
1731
1732 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1733         __releases(mrt_lock)
1734 {
1735         read_unlock(&mrt_lock);
1736 }
1737
1738 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1739 {
1740         struct net *net = seq_file_net(seq);
1741
1742         if (v == SEQ_START_TOKEN) {
1743                 seq_puts(seq,
1744                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1745         } else {
1746                 const struct vif_device *vif = v;
1747                 const char *name =  vif->dev ? vif->dev->name : "none";
1748
1749                 seq_printf(seq,
1750                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1751                            vif - net->ipv4.vif_table,
1752                            name, vif->bytes_in, vif->pkt_in,
1753                            vif->bytes_out, vif->pkt_out,
1754                            vif->flags, vif->local, vif->remote);
1755         }
1756         return 0;
1757 }
1758
1759 static const struct seq_operations ipmr_vif_seq_ops = {
1760         .start = ipmr_vif_seq_start,
1761         .next  = ipmr_vif_seq_next,
1762         .stop  = ipmr_vif_seq_stop,
1763         .show  = ipmr_vif_seq_show,
1764 };
1765
1766 static int ipmr_vif_open(struct inode *inode, struct file *file)
1767 {
1768         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1769                             sizeof(struct ipmr_vif_iter));
1770 }
1771
1772 static const struct file_operations ipmr_vif_fops = {
1773         .owner   = THIS_MODULE,
1774         .open    = ipmr_vif_open,
1775         .read    = seq_read,
1776         .llseek  = seq_lseek,
1777         .release = seq_release_net,
1778 };
1779
1780 struct ipmr_mfc_iter {
1781         struct seq_net_private p;
1782         struct mfc_cache **cache;
1783         int ct;
1784 };
1785
1786
1787 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1788                                           struct ipmr_mfc_iter *it, loff_t pos)
1789 {
1790         struct mfc_cache *mfc;
1791
1792         it->cache = net->ipv4.mfc_cache_array;
1793         read_lock(&mrt_lock);
1794         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1795                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1796                      mfc; mfc = mfc->next)
1797                         if (pos-- == 0)
1798                                 return mfc;
1799         read_unlock(&mrt_lock);
1800
1801         it->cache = &mfc_unres_queue;
1802         spin_lock_bh(&mfc_unres_lock);
1803         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1804                 if (net_eq(mfc_net(mfc), net) &&
1805                     pos-- == 0)
1806                         return mfc;
1807         spin_unlock_bh(&mfc_unres_lock);
1808
1809         it->cache = NULL;
1810         return NULL;
1811 }
1812
1813
1814 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1815 {
1816         struct ipmr_mfc_iter *it = seq->private;
1817         struct net *net = seq_file_net(seq);
1818
1819         it->cache = NULL;
1820         it->ct = 0;
1821         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1822                 : SEQ_START_TOKEN;
1823 }
1824
1825 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1826 {
1827         struct mfc_cache *mfc = v;
1828         struct ipmr_mfc_iter *it = seq->private;
1829         struct net *net = seq_file_net(seq);
1830
1831         ++*pos;
1832
1833         if (v == SEQ_START_TOKEN)
1834                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1835
1836         if (mfc->next)
1837                 return mfc->next;
1838
1839         if (it->cache == &mfc_unres_queue)
1840                 goto end_of_list;
1841
1842         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1843
1844         while (++it->ct < MFC_LINES) {
1845                 mfc = net->ipv4.mfc_cache_array[it->ct];
1846                 if (mfc)
1847                         return mfc;
1848         }
1849
1850         /* exhausted cache_array, show unresolved */
1851         read_unlock(&mrt_lock);
1852         it->cache = &mfc_unres_queue;
1853         it->ct = 0;
1854
1855         spin_lock_bh(&mfc_unres_lock);
1856         mfc = mfc_unres_queue;
1857         while (mfc && !net_eq(mfc_net(mfc), net))
1858                 mfc = mfc->next;
1859         if (mfc)
1860                 return mfc;
1861
1862  end_of_list:
1863         spin_unlock_bh(&mfc_unres_lock);
1864         it->cache = NULL;
1865
1866         return NULL;
1867 }
1868
1869 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1870 {
1871         struct ipmr_mfc_iter *it = seq->private;
1872         struct net *net = seq_file_net(seq);
1873
1874         if (it->cache == &mfc_unres_queue)
1875                 spin_unlock_bh(&mfc_unres_lock);
1876         else if (it->cache == net->ipv4.mfc_cache_array)
1877                 read_unlock(&mrt_lock);
1878 }
1879
1880 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1881 {
1882         int n;
1883         struct net *net = seq_file_net(seq);
1884
1885         if (v == SEQ_START_TOKEN) {
1886                 seq_puts(seq,
1887                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1888         } else {
1889                 const struct mfc_cache *mfc = v;
1890                 const struct ipmr_mfc_iter *it = seq->private;
1891
1892                 seq_printf(seq, "%08lX %08lX %-3hd",
1893                            (unsigned long) mfc->mfc_mcastgrp,
1894                            (unsigned long) mfc->mfc_origin,
1895                            mfc->mfc_parent);
1896
1897                 if (it->cache != &mfc_unres_queue) {
1898                         seq_printf(seq, " %8lu %8lu %8lu",
1899                                    mfc->mfc_un.res.pkt,
1900                                    mfc->mfc_un.res.bytes,
1901                                    mfc->mfc_un.res.wrong_if);
1902                         for (n = mfc->mfc_un.res.minvif;
1903                              n < mfc->mfc_un.res.maxvif; n++ ) {
1904                                 if (VIF_EXISTS(net, n) &&
1905                                     mfc->mfc_un.res.ttls[n] < 255)
1906                                         seq_printf(seq,
1907                                            " %2d:%-3d",
1908                                            n, mfc->mfc_un.res.ttls[n]);
1909                         }
1910                 } else {
1911                         /* unresolved mfc_caches don't contain
1912                          * pkt, bytes and wrong_if values
1913                          */
1914                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1915                 }
1916                 seq_putc(seq, '\n');
1917         }
1918         return 0;
1919 }
1920
1921 static const struct seq_operations ipmr_mfc_seq_ops = {
1922         .start = ipmr_mfc_seq_start,
1923         .next  = ipmr_mfc_seq_next,
1924         .stop  = ipmr_mfc_seq_stop,
1925         .show  = ipmr_mfc_seq_show,
1926 };
1927
1928 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1929 {
1930         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1931                             sizeof(struct ipmr_mfc_iter));
1932 }
1933
1934 static const struct file_operations ipmr_mfc_fops = {
1935         .owner   = THIS_MODULE,
1936         .open    = ipmr_mfc_open,
1937         .read    = seq_read,
1938         .llseek  = seq_lseek,
1939         .release = seq_release_net,
1940 };
1941 #endif
1942
1943 #ifdef CONFIG_IP_PIMSM_V2
1944 static const struct net_protocol pim_protocol = {
1945         .handler        =       pim_rcv,
1946         .netns_ok       =       1,
1947 };
1948 #endif
1949
1950
1951 /*
1952  *      Setup for IP multicast routing
1953  */
1954 static int __net_init ipmr_net_init(struct net *net)
1955 {
1956         int err = 0;
1957
1958         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1959                                       GFP_KERNEL);
1960         if (!net->ipv4.vif_table) {
1961                 err = -ENOMEM;
1962                 goto fail;
1963         }
1964
1965         /* Forwarding cache */
1966         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1967                                             sizeof(struct mfc_cache *),
1968                                             GFP_KERNEL);
1969         if (!net->ipv4.mfc_cache_array) {
1970                 err = -ENOMEM;
1971                 goto fail_mfc_cache;
1972         }
1973
1974 #ifdef CONFIG_IP_PIMSM
1975         net->ipv4.mroute_reg_vif_num = -1;
1976 #endif
1977
1978 #ifdef CONFIG_PROC_FS
1979         err = -ENOMEM;
1980         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1981                 goto proc_vif_fail;
1982         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1983                 goto proc_cache_fail;
1984 #endif
1985         return 0;
1986
1987 #ifdef CONFIG_PROC_FS
1988 proc_cache_fail:
1989         proc_net_remove(net, "ip_mr_vif");
1990 proc_vif_fail:
1991         kfree(net->ipv4.mfc_cache_array);
1992 #endif
1993 fail_mfc_cache:
1994         kfree(net->ipv4.vif_table);
1995 fail:
1996         return err;
1997 }
1998
1999 static void __net_exit ipmr_net_exit(struct net *net)
2000 {
2001 #ifdef CONFIG_PROC_FS
2002         proc_net_remove(net, "ip_mr_cache");
2003         proc_net_remove(net, "ip_mr_vif");
2004 #endif
2005         kfree(net->ipv4.mfc_cache_array);
2006         kfree(net->ipv4.vif_table);
2007 }
2008
2009 static struct pernet_operations ipmr_net_ops = {
2010         .init = ipmr_net_init,
2011         .exit = ipmr_net_exit,
2012 };
2013
2014 int __init ip_mr_init(void)
2015 {
2016         int err;
2017
2018         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2019                                        sizeof(struct mfc_cache),
2020                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2021                                        NULL);
2022         if (!mrt_cachep)
2023                 return -ENOMEM;
2024
2025         err = register_pernet_subsys(&ipmr_net_ops);
2026         if (err)
2027                 goto reg_pernet_fail;
2028
2029         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2030         err = register_netdevice_notifier(&ip_mr_notifier);
2031         if (err)
2032                 goto reg_notif_fail;
2033 #ifdef CONFIG_IP_PIMSM_V2
2034         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2035                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2036                 err = -EAGAIN;
2037                 goto add_proto_fail;
2038         }
2039 #endif
2040         return 0;
2041
2042 #ifdef CONFIG_IP_PIMSM_V2
2043 add_proto_fail:
2044         unregister_netdevice_notifier(&ip_mr_notifier);
2045 #endif
2046 reg_notif_fail:
2047         del_timer(&ipmr_expire_timer);
2048         unregister_pernet_subsys(&ipmr_net_ops);
2049 reg_pernet_fail:
2050         kmem_cache_destroy(mrt_cachep);
2051         return err;
2052 }