release/src-rt-6.x/linux/linux-2.6/net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/sock.h>
  96 #include <linux/rtnetlink.h>
  97 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
  98 #include <linux/imq.h>
  99 #endif
 100 #include <linux/proc_fs.h>
 101 #include <linux/seq_file.h>
 102 #include <linux/stat.h>
 103 #include <linux/if_bridge.h>
 104 #include <net/dst.h>
 105 #include <net/pkt_sched.h>
 106 #include <net/checksum.h>
 107 #include <linux/highmem.h>
 108 #include <linux/init.h>
 109 #include <linux/kmod.h>
 110 #include <linux/module.h>
 111 #include <linux/kallsyms.h>
 112 #include <linux/netpoll.h>
 113 #include <linux/rcupdate.h>
 114 #include <linux/delay.h>
 115 #include <net/wext.h>
 116 #include <net/iw_handler.h>
 117 #include <asm/current.h>
 118 #include <linux/audit.h>
 119 #include <linux/dmaengine.h>
 120 #include <linux/err.h>
 121 #include <linux/ctype.h>
 122 #include <linux/if_arp.h>
 123 #include <typedefs.h>
 124 #include <bcmdefs.h>
 125
 126 /*
 127  *      The list of packet types we will receive (as opposed to discard)
 128  *      and the routines to invoke.
 129  *
 130  *      Why 16. Because with 16 the only overlap we get on a hash of the
 131  *      low nibble of the protocol value is RARP/SNAP/X.25.
 132  *
 133  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 134  *             sure which should go first, but I bet it won't make much
 135  *             difference if we are running VLANs.  The good news is that
 136  *             this protocol won't be in the list unless compiled in, so
 137  *             the average user (w/out VLANs) will not be adversely affected.
 138  *             --BLG
 139  *
 140  *              0800    IP
 141  *              8100    802.1Q VLAN
 142  *              0001    802.3
 143  *              0002    AX.25
 144  *              0004    802.2
 145  *              8035    RARP
 146  *              0005    SNAP
 147  *              0805    X.25
 148  *              0806    ARP
 149  *              8137    IPX
 150  *              0009    Localtalk
 151  *              86DD    IPv6
 152  */
 153
 154 static DEFINE_SPINLOCK(ptype_lock);
 155 static struct list_head ptype_base[16] __read_mostly;   /* 16 way hashed list */
 156 static struct list_head ptype_all __read_mostly;        /* Taps */
 157
 158 #ifdef CONFIG_NET_DMA
 159 static struct dma_client *net_dma_client;
 160 static unsigned int net_dma_count;
 161 static spinlock_t net_dma_event_lock;
 162 #endif
 163
 164 /*
 165  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 166  * semaphore.
 167  *
 168  * Pure readers hold dev_base_lock for reading.
 169  *
 170  * Writers must hold the rtnl semaphore while they loop through the
 171  * dev_base_head list, and hold dev_base_lock for writing when they do the
 172  * actual updates.  This allows pure readers to access the list even
 173  * while a writer is preparing to update it.
 174  *
 175  * To put it another way, dev_base_lock is held for writing only to
 176  * protect against pure readers; the rtnl semaphore provides the
 177  * protection against other writers.
 178  *
 179  * See, for example usages, register_netdevice() and
 180  * unregister_netdevice(), which must be called with the rtnl
 181  * semaphore held.
 182  */
 183 LIST_HEAD(dev_base_head);
 184 DEFINE_RWLOCK(dev_base_lock);
 185
 186 EXPORT_SYMBOL(dev_base_head);
 187 EXPORT_SYMBOL(dev_base_lock);
 188
 189 #define NETDEV_HASHBITS 8
 190 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 191 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 192
 193 static inline struct hlist_head *dev_name_hash(const char *name)
 194 {
 195         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 196         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 197 }
 198
 199 static inline struct hlist_head *dev_index_hash(int ifindex)
 200 {
 201         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 202 }
 203
 204 /*
 205  *      Our notifier list
 206  */
 207
 208 static RAW_NOTIFIER_HEAD(netdev_chain);
 209
 210 /*
 211  *      Device drivers call our routines to queue packets here. We empty the
 212  *      queue in the local softnet handler.
 213  */
 214 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
 215
 216 #ifdef CONFIG_SYSFS
 217 extern int netdev_sysfs_init(void);
 218 extern int netdev_register_sysfs(struct net_device *);
 219 extern void netdev_unregister_sysfs(struct net_device *);
 220 #else
 221 #define netdev_sysfs_init()             (0)
 222 #define netdev_register_sysfs(dev)      (0)
 223 #define netdev_unregister_sysfs(dev)    do { } while(0)
 224 #endif
 225
 226 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 227 /*
 228  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
 229  * according to dev->type
 230  */
 231 static const unsigned short netdev_lock_type[] =
 232         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 233          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 234          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 235          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 236          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 237          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 238          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 239          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 240          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 241          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 242          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 243          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 244          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 245          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 246          ARPHRD_NONE};
 247
 248 static const char *netdev_lock_name[] =
 249         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 250          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 251          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 252          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 253          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 254          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 255          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 256          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 257          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 258          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 259          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 260          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 261          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 262          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 263          "_xmit_NONE"};
 264
 265 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 266
 267 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 268 {
 269         int i;
 270
 271         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 272                 if (netdev_lock_type[i] == dev_type)
 273                         return i;
 274         /* the last key is used by default */
 275         return ARRAY_SIZE(netdev_lock_type) - 1;
 276 }
 277
 278 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 279                                             unsigned short dev_type)
 280 {
 281         int i;
 282
 283         i = netdev_lock_pos(dev_type);
 284         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 285                                    netdev_lock_name[i]);
 286 }
 287 #else
 288 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 289                                             unsigned short dev_type)
 290 {
 291 }
 292 #endif
 293
 294 /*******************************************************************************
 295
 296                 Protocol management and registration routines
 297
 298 *******************************************************************************/
 299
 300 /*
 301  *      Add a protocol ID to the list. Now that the input handler is
 302  *      smarter we can dispense with all the messy stuff that used to be
 303  *      here.
 304  *
 305  *      BEWARE!!! Protocol handlers, mangling input packets,
 306  *      MUST BE last in hash buckets and checking protocol handlers
 307  *      MUST start from promiscuous ptype_all chain in net_bh.
 308  *      It is true now, do not change it.
 309  *      Explanation follows: if protocol handler, mangling packet, will
 310  *      be the first on list, it is not able to sense, that packet
 311  *      is cloned and should be copied-on-write, so that it will
 312  *      change it and subsequent readers will get broken packet.
 313  *                                                      --ANK (980803)
 314  */
 315
 316 /**
 317  *      dev_add_pack - add packet handler
 318  *      @pt: packet type declaration
 319  *
 320  *      Add a protocol handler to the networking stack. The passed &packet_type
 321  *      is linked into kernel lists and may not be freed until it has been
 322  *      removed from the kernel lists.
 323  *
 324  *      This call does not sleep therefore it can not
 325  *      guarantee all CPU's that are in middle of receiving packets
 326  *      will see the new packet type (until the next received packet).
 327  */
 328
 329 void dev_add_pack(struct packet_type *pt)
 330 {
 331         int hash;
 332
 333         spin_lock_bh(&ptype_lock);
 334         if (pt->type == htons(ETH_P_ALL))
 335                 list_add_rcu(&pt->list, &ptype_all);
 336         else {
 337                 hash = ntohs(pt->type) & 15;
 338                 list_add_rcu(&pt->list, &ptype_base[hash]);
 339         }
 340         spin_unlock_bh(&ptype_lock);
 341 }
 342
 343 /**
 344  *      __dev_remove_pack        - remove packet handler
 345  *      @pt: packet type declaration
 346  *
 347  *      Remove a protocol handler that was previously added to the kernel
 348  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 349  *      from the kernel lists and can be freed or reused once this function
 350  *      returns.
 351  *
 352  *      The packet type might still be in use by receivers
 353  *      and must not be freed until after all the CPU's have gone
 354  *      through a quiescent state.
 355  */
 356 void __dev_remove_pack(struct packet_type *pt)
 357 {
 358         struct list_head *head;
 359         struct packet_type *pt1;
 360
 361         spin_lock_bh(&ptype_lock);
 362
 363         if (pt->type == htons(ETH_P_ALL))
 364                 head = &ptype_all;
 365         else
 366                 head = &ptype_base[ntohs(pt->type) & 15];
 367
 368         list_for_each_entry(pt1, head, list) {
 369                 if (pt == pt1) {
 370                         list_del_rcu(&pt->list);
 371                         goto out;
 372                 }
 373         }
 374
 375         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 376 out:
 377         spin_unlock_bh(&ptype_lock);
 378 }
 379 /**
 380  *      dev_remove_pack  - remove packet handler
 381  *      @pt: packet type declaration
 382  *
 383  *      Remove a protocol handler that was previously added to the kernel
 384  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 385  *      from the kernel lists and can be freed or reused once this function
 386  *      returns.
 387  *
 388  *      This call sleeps to guarantee that no CPU is looking at the packet
 389  *      type after return.
 390  */
 391 void dev_remove_pack(struct packet_type *pt)
 392 {
 393         __dev_remove_pack(pt);
 394
 395         synchronize_net();
 396 }
 397
 398 /******************************************************************************
 399
 400                       Device Boot-time Settings Routines
 401
 402 *******************************************************************************/
 403
 404 /* Boot time configuration table */
 405 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 406
 407 /**
 408  *      netdev_boot_setup_add   - add new setup entry
 409  *      @name: name of the device
 410  *      @map: configured settings for the device
 411  *
 412  *      Adds new setup entry to the dev_boot_setup list.  The function
 413  *      returns 0 on error and 1 on success.  This is a generic routine to
 414  *      all netdevices.
 415  */
 416 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 417 {
 418         struct netdev_boot_setup *s;
 419         int i;
 420
 421         s = dev_boot_setup;
 422         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 423                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 424                         memset(s[i].name, 0, sizeof(s[i].name));
 425                         strcpy(s[i].name, name);
 426                         memcpy(&s[i].map, map, sizeof(s[i].map));
 427                         break;
 428                 }
 429         }
 430
 431         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 432 }
 433
 434 /**
 435  *      netdev_boot_setup_check - check boot time settings
 436  *      @dev: the netdevice
 437  *
 438  *      Check boot time settings for the device.
 439  *      The found settings are set for the device to be used
 440  *      later in the device probing.
 441  *      Returns 0 if no settings found, 1 if they are.
 442  */
 443 int netdev_boot_setup_check(struct net_device *dev)
 444 {
 445         struct netdev_boot_setup *s = dev_boot_setup;
 446         int i;
 447
 448         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 449                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 450                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 451                         dev->irq        = s[i].map.irq;
 452                         dev->base_addr  = s[i].map.base_addr;
 453                         dev->mem_start  = s[i].map.mem_start;
 454                         dev->mem_end    = s[i].map.mem_end;
 455                         return 1;
 456                 }
 457         }
 458         return 0;
 459 }
 460
 461
 462 /**
 463  *      netdev_boot_base        - get address from boot time settings
 464  *      @prefix: prefix for network device
 465  *      @unit: id for network device
 466  *
 467  *      Check boot time settings for the base address of device.
 468  *      The found settings are set for the device to be used
 469  *      later in the device probing.
 470  *      Returns 0 if no settings found.
 471  */
 472 unsigned long netdev_boot_base(const char *prefix, int unit)
 473 {
 474         const struct netdev_boot_setup *s = dev_boot_setup;
 475         char name[IFNAMSIZ];
 476         int i;
 477
 478         sprintf(name, "%s%d", prefix, unit);
 479
 480         /*
 481          * If device already registered then return base of 1
 482          * to indicate not to probe for this interface
 483          */
 484         if (__dev_get_by_name(name))
 485                 return 1;
 486
 487         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 488                 if (!strcmp(name, s[i].name))
 489                         return s[i].map.base_addr;
 490         return 0;
 491 }
 492
 493 /*
 494  * Saves at boot time configured settings for any netdevice.
 495  */
 496 int __init netdev_boot_setup(char *str)
 497 {
 498         int ints[5];
 499         struct ifmap map;
 500
 501         str = get_options(str, ARRAY_SIZE(ints), ints);
 502         if (!str || !*str)
 503                 return 0;
 504
 505         /* Save settings */
 506         memset(&map, 0, sizeof(map));
 507         if (ints[0] > 0)
 508                 map.irq = ints[1];
 509         if (ints[0] > 1)
 510                 map.base_addr = ints[2];
 511         if (ints[0] > 2)
 512                 map.mem_start = ints[3];
 513         if (ints[0] > 3)
 514                 map.mem_end = ints[4];
 515
 516         /* Add new entry to the list */
 517         return netdev_boot_setup_add(str, &map);
 518 }
 519
 520 __setup("netdev=", netdev_boot_setup);
 521
 522 /*******************************************************************************
 523
 524                             Device Interface Subroutines
 525
 526 *******************************************************************************/
 527
 528 /**
 529  *      __dev_get_by_name       - find a device by its name
 530  *      @name: name to find
 531  *
 532  *      Find an interface by name. Must be called under RTNL semaphore
 533  *      or @dev_base_lock. If the name is found a pointer to the device
 534  *      is returned. If the name is not found then %NULL is returned. The
 535  *      reference counters are not incremented so the caller must be
 536  *      careful with locks.
 537  */
 538
 539 struct net_device *__dev_get_by_name(const char *name)
 540 {
 541         struct hlist_node *p;
 542
 543         hlist_for_each(p, dev_name_hash(name)) {
 544                 struct net_device *dev
 545                         = hlist_entry(p, struct net_device, name_hlist);
 546                 if (!strncmp(dev->name, name, IFNAMSIZ))
 547                         return dev;
 548         }
 549         return NULL;
 550 }
 551
 552 /**
 553  *      dev_get_by_name         - find a device by its name
 554  *      @name: name to find
 555  *
 556  *      Find an interface by name. This can be called from any
 557  *      context and does its own locking. The returned handle has
 558  *      the usage count incremented and the caller must use dev_put() to
 559  *      release it when it is no longer needed. %NULL is returned if no
 560  *      matching device is found.
 561  */
 562
 563 struct net_device *dev_get_by_name(const char *name)
 564 {
 565         struct net_device *dev;
 566
 567         read_lock(&dev_base_lock);
 568         dev = __dev_get_by_name(name);
 569         if (dev)
 570                 dev_hold(dev);
 571         read_unlock(&dev_base_lock);
 572         return dev;
 573 }
 574
 575 /**
 576  *      __dev_get_by_index - find a device by its ifindex
 577  *      @ifindex: index of device
 578  *
 579  *      Search for an interface by index. Returns %NULL if the device
 580  *      is not found or a pointer to the device. The device has not
 581  *      had its reference counter increased so the caller must be careful
 582  *      about locking. The caller must hold either the RTNL semaphore
 583  *      or @dev_base_lock.
 584  */
 585
 586 struct net_device *__dev_get_by_index(int ifindex)
 587 {
 588         struct hlist_node *p;
 589
 590         hlist_for_each(p, dev_index_hash(ifindex)) {
 591                 struct net_device *dev
 592                         = hlist_entry(p, struct net_device, index_hlist);
 593                 if (dev->ifindex == ifindex)
 594                         return dev;
 595         }
 596         return NULL;
 597 }
 598
 599
 600 /**
 601  *      dev_get_by_index - find a device by its ifindex
 602  *      @ifindex: index of device
 603  *
 604  *      Search for an interface by index. Returns NULL if the device
 605  *      is not found or a pointer to the device. The device returned has
 606  *      had a reference added and the pointer is safe until the user calls
 607  *      dev_put to indicate they have finished with it.
 608  */
 609
 610 struct net_device *dev_get_by_index(int ifindex)
 611 {
 612         struct net_device *dev;
 613
 614         read_lock(&dev_base_lock);
 615         dev = __dev_get_by_index(ifindex);
 616         if (dev)
 617                 dev_hold(dev);
 618         read_unlock(&dev_base_lock);
 619         return dev;
 620 }
 621
 622 /**
 623  *      dev_getbyhwaddr - find a device by its hardware address
 624  *      @type: media type of device
 625  *      @ha: hardware address
 626  *
 627  *      Search for an interface by MAC address. Returns NULL if the device
 628  *      is not found or a pointer to the device. The caller must hold the
 629  *      rtnl semaphore. The returned device has not had its ref count increased
 630  *      and the caller must therefore be careful about locking
 631  *
 632  *      BUGS:
 633  *      If the API was consistent this would be __dev_get_by_hwaddr
 634  */
 635
 636 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 637 {
 638         struct net_device *dev;
 639
 640         ASSERT_RTNL();
 641
 642         for_each_netdev(dev)
 643                 if (dev->type == type &&
 644                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 645                         return dev;
 646
 647         return NULL;
 648 }
 649
 650 EXPORT_SYMBOL(dev_getbyhwaddr);
 651
 652 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
 653 {
 654         struct net_device *dev;
 655
 656         ASSERT_RTNL();
 657         for_each_netdev(dev)
 658                 if (dev->type == type)
 659                         return dev;
 660
 661         return NULL;
 662 }
 663
 664 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 665
 666 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 667 {
 668         struct net_device *dev;
 669
 670         rtnl_lock();
 671         dev = __dev_getfirstbyhwtype(type);
 672         if (dev)
 673                 dev_hold(dev);
 674         rtnl_unlock();
 675         return dev;
 676 }
 677
 678 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 679
 680 /**
 681  *      dev_get_by_flags - find any device with given flags
 682  *      @if_flags: IFF_* values
 683  *      @mask: bitmask of bits in if_flags to check
 684  *
 685  *      Search for any interface with the given flags. Returns NULL if a device
 686  *      is not found or a pointer to the device. The device returned has
 687  *      had a reference added and the pointer is safe until the user calls
 688  *      dev_put to indicate they have finished with it.
 689  */
 690
 691 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 692 {
 693         struct net_device *dev, *ret;
 694
 695         ret = NULL;
 696         read_lock(&dev_base_lock);
 697         for_each_netdev(dev) {
 698                 if (((dev->flags ^ if_flags) & mask) == 0) {
 699                         dev_hold(dev);
 700                         ret = dev;
 701                         break;
 702                 }
 703         }
 704         read_unlock(&dev_base_lock);
 705         return ret;
 706 }
 707
 708 /**
 709  *      dev_valid_name - check if name is okay for network device
 710  *      @name: name string
 711  *
 712  *      Network device names need to be valid file names to
 713  *      to allow sysfs to work.  We also disallow any kind of
 714  *      whitespace.
 715  */
 716 int dev_valid_name(const char *name)
 717 {
 718         if (*name == '\0')
 719                 return 0;
 720         if (strlen(name) >= IFNAMSIZ)
 721                 return 0;
 722         if (!strcmp(name, ".") || !strcmp(name, ".."))
 723                 return 0;
 724
 725         while (*name) {
 726                 if (*name == '/' || isspace(*name))
 727                         return 0;
 728                 name++;
 729         }
 730         return 1;
 731 }
 732
 733 /**
 734  *      dev_alloc_name - allocate a name for a device
 735  *      @dev: device
 736  *      @name: name format string
 737  *
 738  *      Passed a format string - eg "lt%d" it will try and find a suitable
 739  *      id. It scans list of devices to build up a free map, then chooses
 740  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 741  *      while allocating the name and adding the device in order to avoid
 742  *      duplicates.
 743  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 744  *      Returns the number of the unit assigned or a negative errno code.
 745  */
 746
 747 int dev_alloc_name(struct net_device *dev, const char *name)
 748 {
 749         int i = 0;
 750         char buf[IFNAMSIZ];
 751         const char *p;
 752         const int max_netdevices = 8*PAGE_SIZE;
 753         long *inuse;
 754         struct net_device *d;
 755
 756         p = strnchr(name, IFNAMSIZ-1, '%');
 757         if (p) {
 758                 /*
 759                  * Verify the string as this thing may have come from
 760                  * the user.  There must be either one "%d" and no other "%"
 761                  * characters.
 762                  */
 763                 if (p[1] != 'd' || strchr(p + 2, '%'))
 764                         return -EINVAL;
 765
 766                 /* Use one page as a bit array of possible slots */
 767                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 768                 if (!inuse)
 769                         return -ENOMEM;
 770
 771                 for_each_netdev(d) {
 772                         if (!sscanf(d->name, name, &i))
 773                                 continue;
 774                         if (i < 0 || i >= max_netdevices)
 775                                 continue;
 776
 777                         /*  avoid cases where sscanf is not exact inverse of printf */
 778                         snprintf(buf, sizeof(buf), name, i);
 779                         if (!strncmp(buf, d->name, IFNAMSIZ))
 780                                 set_bit(i, inuse);
 781                 }
 782
 783                 i = find_first_zero_bit(inuse, max_netdevices);
 784                 free_page((unsigned long) inuse);
 785         }
 786
 787         snprintf(buf, sizeof(buf), name, i);
 788         if (!__dev_get_by_name(buf)) {
 789                 strlcpy(dev->name, buf, IFNAMSIZ);
 790                 return i;
 791         }
 792
 793         /* It is possible to run out of possible slots
 794          * when the name is long and there isn't enough space left
 795          * for the digits, or if all bits are used.
 796          */
 797         return -ENFILE;
 798 }
 799
 800
 801 /**
 802  *      dev_change_name - change name of a device
 803  *      @dev: device
 804  *      @newname: name (or format string) must be at least IFNAMSIZ
 805  *
 806  *      Change name of a device, can pass format strings "eth%d".
 807  *      for wildcarding.
 808  */
 809 int dev_change_name(struct net_device *dev, char *newname)
 810 {
 811         int err = 0;
 812
 813         ASSERT_RTNL();
 814
 815         if (dev->flags & IFF_UP)
 816                 return -EBUSY;
 817
 818         if (!dev_valid_name(newname))
 819                 return -EINVAL;
 820
 821         if (strchr(newname, '%')) {
 822                 err = dev_alloc_name(dev, newname);
 823                 if (err < 0)
 824                         return err;
 825                 strcpy(newname, dev->name);
 826         }
 827         else if (__dev_get_by_name(newname))
 828                 return -EEXIST;
 829         else
 830                 strlcpy(dev->name, newname, IFNAMSIZ);
 831
 832         device_rename(&dev->dev, dev->name);
 833         hlist_del(&dev->name_hlist);
 834         hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 835         raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 836
 837         return err;
 838 }
 839
 840 /**
 841  *      netdev_features_change - device changes features
 842  *      @dev: device to cause notification
 843  *
 844  *      Called to indicate a device has changed features.
 845  */
 846 void netdev_features_change(struct net_device *dev)
 847 {
 848         raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 849 }
 850 EXPORT_SYMBOL(netdev_features_change);
 851
 852 /**
 853  *      netdev_state_change - device changes state
 854  *      @dev: device to cause notification
 855  *
 856  *      Called to indicate a device has changed state. This function calls
 857  *      the notifier chains for netdev_chain and sends a NEWLINK message
 858  *      to the routing socket.
 859  */
 860 void netdev_state_change(struct net_device *dev)
 861 {
 862         if (dev->flags & IFF_UP) {
 863                 raw_notifier_call_chain(&netdev_chain,
 864                                 NETDEV_CHANGE, dev);
 865                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 866         }
 867 }
 868
 869 /**
 870  *      dev_load        - load a network module
 871  *      @name: name of interface
 872  *
 873  *      If a network interface is not present and the process has suitable
 874  *      privileges this function loads the module. If module loading is not
 875  *      available in this kernel then it becomes a nop.
 876  */
 877
 878 void dev_load(const char *name)
 879 {
 880         struct net_device *dev;
 881
 882         read_lock(&dev_base_lock);
 883         dev = __dev_get_by_name(name);
 884         read_unlock(&dev_base_lock);
 885
 886         if (!dev && capable(CAP_SYS_MODULE))
 887                 request_module("%s", name);
 888 }
 889
 890 static int default_rebuild_header(struct sk_buff *skb)
 891 {
 892         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 893                skb->dev ? skb->dev->name : "NULL!!!");
 894         kfree_skb(skb);
 895         return 1;
 896 }
 897
 898 /**
 899  *      dev_open        - prepare an interface for use.
 900  *      @dev:   device to open
 901  *
 902  *      Takes a device from down to up state. The device's private open
 903  *      function is invoked and then the multicast lists are loaded. Finally
 904  *      the device is moved into the up state and a %NETDEV_UP message is
 905  *      sent to the netdev notifier chain.
 906  *
 907  *      Calling this function on an active interface is a nop. On a failure
 908  *      a negative errno code is returned.
 909  */
 910 int dev_open(struct net_device *dev)
 911 {
 912         int ret = 0;
 913
 914         /*
 915          *      Is it already up?
 916          */
 917
 918         if (dev->flags & IFF_UP)
 919                 return 0;
 920
 921         /*
 922          *      Is it even present?
 923          */
 924         if (!netif_device_present(dev))
 925                 return -ENODEV;
 926
 927         /*
 928          *      Call device private open method
 929          */
 930         set_bit(__LINK_STATE_START, &dev->state);
 931         if (dev->open) {
 932                 ret = dev->open(dev);
 933                 if (ret)
 934                         clear_bit(__LINK_STATE_START, &dev->state);
 935         }
 936
 937         /*
 938          *      If it went open OK then:
 939          */
 940
 941         if (!ret) {
 942                 /*
 943                  *      Set the flags.
 944                  */
 945                 dev->flags |= IFF_UP;
 946
 947                 /*
 948                  *      Initialize multicasting status
 949                  */
 950                 dev_mc_upload(dev);
 951
 952                 /*
 953                  *      Wakeup transmit queue engine
 954                  */
 955                 dev_activate(dev);
 956
 957                 /*
 958                  *      ... and announce new interface.
 959                  */
 960                 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 961         }
 962         return ret;
 963 }
 964
 965 /**
 966  *      dev_close - shutdown an interface.
 967  *      @dev: device to shutdown
 968  *
 969  *      This function moves an active device into down state. A
 970  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 971  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 972  *      chain.
 973  */
 974 int dev_close(struct net_device *dev)
 975 {
 976         if (!(dev->flags & IFF_UP))
 977                 return 0;
 978
 979         /*
 980          *      Tell people we are going down, so that they can
 981          *      prepare to death, when device is still operating.
 982          */
 983         raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 984
 985         dev_deactivate(dev);
 986
 987         clear_bit(__LINK_STATE_START, &dev->state);
 988
 989         /* Synchronize to scheduled poll. We cannot touch poll list,
 990          * it can be even on different cpu. So just clear netif_running(),
 991          * and wait when poll really will happen. Actually, the best place
 992          * for this is inside dev->stop() after device stopped its irq
 993          * engine, but this requires more changes in devices. */
 994
 995         smp_mb__after_clear_bit(); /* Commit netif_running(). */
 996         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
 997                 /* No hurry. */
 998                 msleep(1);
 999         }
1000
1001         /*
1002          *      Call the device specific close. This cannot fail.
1003          *      Only if device is UP
1004          *
1005          *      We allow it to be called even after a DETACH hot-plug
1006          *      event.
1007          */
1008         if (dev->stop)
1009                 dev->stop(dev);
1010
1011         /*
1012          *      Device is now down.
1013          */
1014
1015         dev->flags &= ~IFF_UP;
1016
1017         /*
1018          * Tell people we are down
1019          */
1020         raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1021
1022         return 0;
1023 }
1024
1025
1026 /*
1027  *      Device change register/unregister. These are not inline or static
1028  *      as we export them to the world.
1029  */
1030
1031 /**
1032  *      register_netdevice_notifier - register a network notifier block
1033  *      @nb: notifier
1034  *
1035  *      Register a notifier to be called when network device events occur.
1036  *      The notifier passed is linked into the kernel structures and must
1037  *      not be reused until it has been unregistered. A negative errno code
1038  *      is returned on a failure.
1039  *
1040  *      When registered all registration and up events are replayed
1041  *      to the new notifier to allow device to have a race free
1042  *      view of the network device list.
1043  */
1044
1045 int register_netdevice_notifier(struct notifier_block *nb)
1046 {
1047         struct net_device *dev;
1048         int err;
1049
1050         rtnl_lock();
1051         err = raw_notifier_chain_register(&netdev_chain, nb);
1052         if (!err) {
1053                 for_each_netdev(dev) {
1054                         nb->notifier_call(nb, NETDEV_REGISTER, dev);
1055
1056                         if (dev->flags & IFF_UP)
1057                                 nb->notifier_call(nb, NETDEV_UP, dev);
1058                 }
1059         }
1060         rtnl_unlock();
1061         return err;
1062 }
1063
1064 /**
1065  *      unregister_netdevice_notifier - unregister a network notifier block
1066  *      @nb: notifier
1067  *
1068  *      Unregister a notifier previously registered by
1069  *      register_netdevice_notifier(). The notifier is unlinked into the
1070  *      kernel structures and may then be reused. A negative errno code
1071  *      is returned on a failure.
1072  */
1073
1074 int unregister_netdevice_notifier(struct notifier_block *nb)
1075 {
1076         int err;
1077
1078         rtnl_lock();
1079         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1080         rtnl_unlock();
1081         return err;
1082 }
1083
1084 /**
1085  *      call_netdevice_notifiers - call all network notifier blocks
1086  *      @val: value passed unmodified to notifier function
1087  *      @v:   pointer passed unmodified to notifier function
1088  *
1089  *      Call all network notifier blocks.  Parameters and return value
1090  *      are as for raw_notifier_call_chain().
1091  */
1092
1093 int call_netdevice_notifiers(unsigned long val, void *v)
1094 {
1095         return raw_notifier_call_chain(&netdev_chain, val, v);
1096 }
1097
1098 /* When > 0 there are consumers of rx skb time stamps */
1099 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1100
1101 void net_enable_timestamp(void)
1102 {
1103         atomic_inc(&netstamp_needed);
1104 }
1105
1106 void net_disable_timestamp(void)
1107 {
1108         atomic_dec(&netstamp_needed);
1109 }
1110
1111 static inline void net_timestamp(struct sk_buff *skb)
1112 {
1113         if (atomic_read(&netstamp_needed))
1114                 __net_timestamp(skb);
1115         else
1116                 skb->tstamp.tv64 = 0;
1117 }
1118
1119 /*
1120  *      Support routine. Sends outgoing frames to any network
1121  *      taps currently in use.
1122  */
1123
1124 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1125 {
1126         struct packet_type *ptype;
1127
1128
1129         rcu_read_lock();
1130         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1131                 /* Never send packets back to the socket
1132                  * they originated from - MvS (miquels@drinkel.ow.org)
1133                  */
1134                 if ((ptype->dev == dev || !ptype->dev) &&
1135                     (ptype->af_packet_priv == NULL ||
1136                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1137                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1138                         if (!skb2)
1139                                 break;
1140
1141                         net_timestamp(skb2);
1142
1143                         /* skb->nh should be correctly
1144                            set by sender, so that the second statement is
1145                            just protection against buggy protocols.
1146                          */
1147                         skb_reset_mac_header(skb2);
1148
1149                         if (skb_network_header(skb2) < skb2->data ||
1150                             skb2->network_header > skb2->tail) {
1151                                 if (net_ratelimit())
1152                                         printk(KERN_CRIT "protocol %04x is "
1153                                                "buggy, dev %s\n",
1154                                                skb2->protocol, dev->name);
1155                                 skb_reset_network_header(skb2);
1156                         }
1157
1158                         skb2->transport_header = skb2->network_header;
1159                         skb2->pkt_type = PACKET_OUTGOING;
1160                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1161                 }
1162         }
1163         rcu_read_unlock();
1164 }
1165
1166
1167 void __netif_schedule(struct net_device *dev)
1168 {
1169         if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1170                 unsigned long flags;
1171                 struct softnet_data *sd;
1172
1173                 local_irq_save(flags);
1174                 sd = &__get_cpu_var(softnet_data);
1175                 dev->next_sched = sd->output_queue;
1176                 sd->output_queue = dev;
1177                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1178                 local_irq_restore(flags);
1179         }
1180 }
1181 EXPORT_SYMBOL(__netif_schedule);
1182
1183 void __netif_rx_schedule(struct net_device *dev)
1184 {
1185         unsigned long flags;
1186
1187         local_irq_save(flags);
1188         dev_hold(dev);
1189         list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1190         if (dev->quota < 0)
1191                 dev->quota += dev->weight;
1192         else
1193                 dev->quota = dev->weight;
1194         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1195         local_irq_restore(flags);
1196 }
1197 EXPORT_SYMBOL(__netif_rx_schedule);
1198
1199 void dev_kfree_skb_any(struct sk_buff *skb)
1200 {
1201         if (in_irq() || irqs_disabled())
1202                 dev_kfree_skb_irq(skb);
1203         else
1204                 dev_kfree_skb(skb);
1205 }
1206 EXPORT_SYMBOL(dev_kfree_skb_any);
1207
1208
1209 /* Hot-plugging. */
1210 void netif_device_detach(struct net_device *dev)
1211 {
1212         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1213             netif_running(dev)) {
1214                 netif_stop_queue(dev);
1215         }
1216 }
1217 EXPORT_SYMBOL(netif_device_detach);
1218
1219 void netif_device_attach(struct net_device *dev)
1220 {
1221         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1222             netif_running(dev)) {
1223                 netif_wake_queue(dev);
1224                 __netdev_watchdog_up(dev);
1225         }
1226 }
1227 EXPORT_SYMBOL(netif_device_attach);
1228
1229
1230 /*
1231  * Invalidate hardware checksum when packet is to be mangled, and
1232  * complete checksum manually on outgoing path.
1233  */
1234 int skb_checksum_help(struct sk_buff *skb)
1235 {
1236         __wsum csum;
1237         int ret = 0, offset;
1238
1239         if (skb->ip_summed == CHECKSUM_COMPLETE)
1240                 goto out_set_summed;
1241
1242         if (unlikely(skb_shinfo(skb)->gso_size)) {
1243                 /* Let GSO fix up the checksum. */
1244                 goto out_set_summed;
1245         }
1246
1247         offset = skb->csum_start - skb_headroom(skb);
1248         BUG_ON(offset >= skb_headlen(skb));
1249         csum = skb_checksum(skb, offset, skb->len - offset, 0);
1250
1251         offset += skb->csum_offset;
1252         BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1253
1254         if (skb_cloned(skb) &&
1255             !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1256                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1257                 if (ret)
1258                         goto out;
1259         }
1260
1261         *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1262 out_set_summed:
1263         skb->ip_summed = CHECKSUM_NONE;
1264 out:
1265         return ret;
1266 }
1267
1268 /**
1269  *      skb_gso_segment - Perform segmentation on skb.
1270  *      @skb: buffer to segment
1271  *      @features: features for the output path (see dev->features)
1272  *
1273  *      This function segments the given skb and returns a list of segments.
1274  *
1275  *      It may return NULL if the skb requires no segmentation.  This is
1276  *      only possible when GSO is used for verifying header integrity.
1277  */
1278 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1279 {
1280         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1281         struct packet_type *ptype;
1282         __be16 type = skb->protocol;
1283         int err;
1284
1285         BUG_ON(skb_shinfo(skb)->frag_list);
1286
1287         skb_reset_mac_header(skb);
1288         skb->mac_len = skb->network_header - skb->mac_header;
1289         __skb_pull(skb, skb->mac_len);
1290
1291         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1292                 if (skb_header_cloned(skb) &&
1293                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1294                         return ERR_PTR(err);
1295         }
1296
1297         rcu_read_lock();
1298         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1299                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1300                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1301                                 err = ptype->gso_send_check(skb);
1302                                 segs = ERR_PTR(err);
1303                                 if (err || skb_gso_ok(skb, features))
1304                                         break;
1305                                 __skb_push(skb, (skb->data -
1306                                                  skb_network_header(skb)));
1307                         }
1308                         segs = ptype->gso_segment(skb, features);
1309                         break;
1310                 }
1311         }
1312         rcu_read_unlock();
1313
1314         __skb_push(skb, skb->data - skb_mac_header(skb));
1315
1316         return segs;
1317 }
1318
1319 EXPORT_SYMBOL(skb_gso_segment);
1320
1321 /* Take action when hardware reception checksum errors are detected. */
1322 #ifdef CONFIG_BUG
1323 void netdev_rx_csum_fault(struct net_device *dev)
1324 {
1325         if (net_ratelimit()) {
1326                 printk(KERN_ERR "%s: hw csum failure.\n",
1327                         dev ? dev->name : "<unknown>");
1328                 dump_stack();
1329         }
1330 }
1331 EXPORT_SYMBOL(netdev_rx_csum_fault);
1332 #endif
1333
1334 /* Actually, we should eliminate this check as soon as we know, that:
1335  * 1. IOMMU is present and allows to map all the memory.
1336  * 2. No high memory really exists on this machine.
1337  */
1338
1339 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1340 {
1341 #ifdef CONFIG_HIGHMEM
1342         int i;
1343
1344         if (dev->features & NETIF_F_HIGHDMA)
1345                 return 0;
1346
1347         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1348                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1349                         return 1;
1350
1351 #endif
1352         return 0;
1353 }
1354
1355 struct dev_gso_cb {
1356         void (*destructor)(struct sk_buff *skb);
1357 };
1358
1359 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1360
1361 static void dev_gso_skb_destructor(struct sk_buff *skb)
1362 {
1363         struct dev_gso_cb *cb;
1364
1365         do {
1366                 struct sk_buff *nskb = skb->next;
1367
1368                 skb->next = nskb->next;
1369                 nskb->next = NULL;
1370                 kfree_skb(nskb);
1371         } while (skb->next);
1372
1373         cb = DEV_GSO_CB(skb);
1374         if (cb->destructor)
1375                 cb->destructor(skb);
1376 }
1377
1378 /**
1379  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1380  *      @skb: buffer to segment
1381  *
1382  *      This function segments the given skb and stores the list of segments
1383  *      in skb->next.
1384  */
1385 static int dev_gso_segment(struct sk_buff *skb)
1386 {
1387         struct net_device *dev = skb->dev;
1388         struct sk_buff *segs;
1389         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1390                                          NETIF_F_SG : 0);
1391
1392         segs = skb_gso_segment(skb, features);
1393
1394         /* Verifying header integrity only. */
1395         if (!segs)
1396                 return 0;
1397
1398         if (unlikely(IS_ERR(segs)))
1399                 return PTR_ERR(segs);
1400
1401         skb->next = segs;
1402         DEV_GSO_CB(skb)->destructor = skb->destructor;
1403         skb->destructor = dev_gso_skb_destructor;
1404
1405         return 0;
1406 }
1407
1408 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1409 {
1410         if (likely(!skb->next)) {
1411                 if (!list_empty(&ptype_all))
1412 #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1413                     if (!(skb->imq_flags & IMQ_F_ENQUEUE))
1414 #endif
1415                         dev_queue_xmit_nit(skb, dev);
1416
1417                 if (netif_needs_gso(dev, skb)) {
1418                         if (unlikely(dev_gso_segment(skb)))
1419                                 goto out_kfree_skb;
1420                         if (skb->next)
1421                                 goto gso;
1422                 }
1423
1424                 return dev->hard_start_xmit(skb, dev);
1425         }
1426
1427 gso:
1428         do {
1429                 struct sk_buff *nskb = skb->next;
1430                 int rc;
1431
1432                 skb->next = nskb->next;
1433                 nskb->next = NULL;
1434                 rc = dev->hard_start_xmit(nskb, dev);
1435                 if (unlikely(rc)) {
1436                         nskb->next = skb->next;
1437                         skb->next = nskb;
1438                         return rc;
1439                 }
1440                 if (unlikely(netif_queue_stopped(dev) && skb->next))
1441                         return NETDEV_TX_BUSY;
1442         } while (skb->next);
1443
1444         skb->destructor = DEV_GSO_CB(skb)->destructor;
1445
1446 out_kfree_skb:
1447         kfree_skb(skb);
1448         return 0;
1449 }
1450
1451 #define HARD_TX_LOCK(dev, cpu) {                        \
1452         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1453                 netif_tx_lock(dev);                     \
1454         }                                               \
1455 }
1456
1457 #define HARD_TX_UNLOCK(dev) {                           \
1458         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1459                 netif_tx_unlock(dev);                   \
1460         }                                               \
1461 }
1462
1463 /**
1464  *      dev_queue_xmit - transmit a buffer
1465  *      @skb: buffer to transmit
1466  *
1467  *      Queue a buffer for transmission to a network device. The caller must
1468  *      have set the device and priority and built the buffer before calling
1469  *      this function. The function can be called from an interrupt.
1470  *
1471  *      A negative errno code is returned on a failure. A success does not
1472  *      guarantee the frame will be transmitted as it may be dropped due
1473  *      to congestion or traffic shaping.
1474  *
1475  * -----------------------------------------------------------------------------------
1476  *      I notice this method can also return errors from the queue disciplines,
1477  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1478  *      be positive.
1479  *
1480  *      Regardless of the return value, the skb is consumed, so it is currently
1481  *      difficult to retry a send to this method.  (You can bump the ref count
1482  *      before sending to hold a reference for retry if you are careful.)
1483  *
1484  *      When calling this method, interrupts MUST be enabled.  This is because
1485  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1486  *          --BLG
1487  */
1488
1489 int BCMFASTPATH dev_queue_xmit(struct sk_buff *skb)
1490 {
1491         struct net_device *dev = skb->dev;
1492         struct Qdisc *q;
1493         int rc = -ENOMEM;
1494
1495         /* GSO will handle the following emulations directly. */
1496         if (netif_needs_gso(dev, skb))
1497                 goto gso;
1498
1499         if (skb_shinfo(skb)->frag_list &&
1500             !(dev->features & NETIF_F_FRAGLIST) &&
1501             __skb_linearize(skb))
1502                 goto out_kfree_skb;
1503
1504         /* Fragmented skb is linearized if device does not support SG,
1505          * or if at least one of fragments is in highmem and device
1506          * does not support DMA from it.
1507          */
1508         if (skb_shinfo(skb)->nr_frags &&
1509             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1510             __skb_linearize(skb))
1511                 goto out_kfree_skb;
1512
1513         /* If packet is not checksummed and device does not support
1514          * checksumming for this protocol, complete checksumming here.
1515          */
1516         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1517                 skb_set_transport_header(skb, skb->csum_start -
1518                                               skb_headroom(skb));
1519
1520                 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1521                     (!(dev->features & NETIF_F_IP_CSUM) ||
1522                      skb->protocol != htons(ETH_P_IP)))
1523                         if (skb_checksum_help(skb))
1524                                 goto out_kfree_skb;
1525         }
1526
1527 gso:
1528         spin_lock_prefetch(&dev->queue_lock);
1529
1530         /* Disable soft irqs for various locks below. Also
1531          * stops preemption for RCU.
1532          */
1533         rcu_read_lock_bh();
1534
1535         /* Updates of qdisc are serialized by queue_lock.
1536          * The struct Qdisc which is pointed to by qdisc is now a
1537          * rcu structure - it may be accessed without acquiring
1538          * a lock (but the structure may be stale.) The freeing of the
1539          * qdisc will be deferred until it's known that there are no
1540          * more references to it.
1541          *
1542          * If the qdisc has an enqueue function, we still need to
1543          * hold the queue_lock before calling it, since queue_lock
1544          * also serializes access to the device queue.
1545          */
1546
1547         q = rcu_dereference(dev->qdisc);
1548 #ifdef CONFIG_NET_CLS_ACT
1549         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1550 #endif
1551         if (q->enqueue) {
1552                 /* Grab device queue */
1553                 spin_lock(&dev->queue_lock);
1554                 q = dev->qdisc;
1555                 if (q->enqueue) {
1556                         rc = q->enqueue(skb, q);
1557                         qdisc_run(dev);
1558                         spin_unlock(&dev->queue_lock);
1559
1560                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1561                         goto out;
1562                 }
1563                 spin_unlock(&dev->queue_lock);
1564         }
1565
1566         /* The device has no queue. Common case for software devices:
1567            loopback, all the sorts of tunnels...
1568
1569            Really, it is unlikely that netif_tx_lock protection is necessary
1570            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1571            counters.)
1572            However, it is possible, that they rely on protection
1573            made by us here.
1574
1575            Check this and shot the lock. It is not prone from deadlocks.
1576            Either shot noqueue qdisc, it is even simpler 8)
1577          */
1578         if (dev->flags & IFF_UP) {
1579                 int cpu = smp_processor_id(); /* ok because BHs are off */
1580
1581                 if (dev->xmit_lock_owner != cpu) {
1582
1583                         HARD_TX_LOCK(dev, cpu);
1584
1585                         if (!netif_queue_stopped(dev)) {
1586                                 rc = 0;
1587                                 if (!dev_hard_start_xmit(skb, dev)) {
1588                                         HARD_TX_UNLOCK(dev);
1589                                         goto out;
1590                                 }
1591                         }
1592                         HARD_TX_UNLOCK(dev);
1593                         if (net_ratelimit())
1594                                 printk(KERN_CRIT "Virtual device %s asks to "
1595                                        "queue packet!\n", dev->name);
1596                 } else {
1597                         /* Recursion is detected! It is possible,
1598                          * unfortunately */
1599                         if (net_ratelimit())
1600                                 printk(KERN_CRIT "Dead loop on virtual device "
1601                                        "%s, fix it urgently!\n", dev->name);
1602                 }
1603         }
1604
1605         rc = -ENETDOWN;
1606         rcu_read_unlock_bh();
1607
1608 out_kfree_skb:
1609         kfree_skb(skb);
1610         return rc;
1611 out:
1612         rcu_read_unlock_bh();
1613         return rc;
1614 }
1615
1616
1617 /*=======================================================================
1618                         Receiver routines
1619   =======================================================================*/
1620
1621 int netdev_max_backlog __read_mostly = 1000;
1622 int netdev_budget __read_mostly = 300;
1623 int weight_p __read_mostly = 64;            /* old backlog weight */
1624
1625 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1626
1627
1628 /**
1629  *      netif_rx        -       post buffer to the network code
1630  *      @skb: buffer to post
1631  *
1632  *      This function receives a packet from a device driver and queues it for
1633  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1634  *      may be dropped during processing for congestion control or by the
1635  *      protocol layers.
1636  *
1637  *      return values:
1638  *      NET_RX_SUCCESS  (no congestion)
1639  *      NET_RX_CN_LOW   (low congestion)
1640  *      NET_RX_CN_MOD   (moderate congestion)
1641  *      NET_RX_CN_HIGH  (high congestion)
1642  *      NET_RX_DROP     (packet was dropped)
1643  *
1644  */
1645
1646 int netif_rx(struct sk_buff *skb)
1647 {
1648         struct softnet_data *queue;
1649         unsigned long flags;
1650
1651         /* if netpoll wants it, pretend we never saw it */
1652         if (netpoll_rx(skb))
1653                 return NET_RX_DROP;
1654
1655         if (!skb->tstamp.tv64)
1656                 net_timestamp(skb);
1657
1658         /*
1659          * The code is rearranged so that the path is the most
1660          * short when CPU is congested, but is still operating.
1661          */
1662         local_irq_save(flags);
1663         queue = &__get_cpu_var(softnet_data);
1664
1665         __get_cpu_var(netdev_rx_stat).total++;
1666         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1667                 if (queue->input_pkt_queue.qlen) {
1668 enqueue:
1669                         dev_hold(skb->dev);
1670                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1671                         local_irq_restore(flags);
1672                         return NET_RX_SUCCESS;
1673                 }
1674
1675                 netif_rx_schedule(&queue->backlog_dev);
1676                 goto enqueue;
1677         }
1678
1679         __get_cpu_var(netdev_rx_stat).dropped++;
1680         local_irq_restore(flags);
1681
1682         kfree_skb(skb);
1683         return NET_RX_DROP;
1684 }
1685
1686 int netif_rx_ni(struct sk_buff *skb)
1687 {
1688         int err;
1689
1690         preempt_disable();
1691         err = netif_rx(skb);
1692         if (local_softirq_pending())
1693                 do_softirq();
1694         preempt_enable();
1695
1696         return err;
1697 }
1698
1699 EXPORT_SYMBOL(netif_rx_ni);
1700
1701 static inline struct net_device *skb_bond(struct sk_buff *skb)
1702 {
1703         struct net_device *dev = skb->dev;
1704
1705         if (dev->master) {
1706                 if (skb_bond_should_drop(skb)) {
1707                         kfree_skb(skb);
1708                         return NULL;
1709                 }
1710                 skb->dev = dev->master;
1711         }
1712
1713         return dev;
1714 }
1715
1716 static void net_tx_action(struct softirq_action *h)
1717 {
1718         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1719
1720         if (sd->completion_queue) {
1721                 struct sk_buff *clist;
1722
1723                 local_irq_disable();
1724                 clist = sd->completion_queue;
1725                 sd->completion_queue = NULL;
1726                 local_irq_enable();
1727
1728                 while (clist) {
1729                         struct sk_buff *skb = clist;
1730                         clist = clist->next;
1731
1732                         BUG_TRAP(!atomic_read(&skb->users));
1733                         __kfree_skb(skb);
1734                 }
1735         }
1736
1737         if (sd->output_queue) {
1738                 struct net_device *head;
1739
1740                 local_irq_disable();
1741                 head = sd->output_queue;
1742                 sd->output_queue = NULL;
1743                 local_irq_enable();
1744
1745                 while (head) {
1746                         struct net_device *dev = head;
1747                         head = head->next_sched;
1748
1749                         smp_mb__before_clear_bit();
1750                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1751
1752                         if (spin_trylock(&dev->queue_lock)) {
1753                                 qdisc_run(dev);
1754                                 spin_unlock(&dev->queue_lock);
1755                         } else {
1756                                 netif_schedule(dev);
1757                         }
1758                 }
1759         }
1760 }
1761
1762 static inline int deliver_skb(struct sk_buff *skb,
1763                               struct packet_type *pt_prev,
1764                               struct net_device *orig_dev)
1765 {
1766         atomic_inc(&skb->users);
1767         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1768 }
1769
1770 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1771 /* These hooks defined here for ATM */
1772 struct net_bridge;
1773 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1774                                                 unsigned char *addr);
1775 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1776
1777 /*
1778  * If bridge module is loaded call bridging hook.
1779  *  returns NULL if packet was consumed.
1780  */
1781 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1782                                         struct sk_buff *skb) __read_mostly;
1783 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1784                                             struct packet_type **pt_prev, int *ret,
1785                                             struct net_device *orig_dev)
1786 {
1787         struct net_bridge_port *port;
1788
1789         if (skb->pkt_type == PACKET_LOOPBACK ||
1790             (port = rcu_dereference(skb->dev->br_port)) == NULL)
1791                 return skb;
1792
1793         if (*pt_prev) {
1794                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1795                 *pt_prev = NULL;
1796         }
1797
1798         return br_handle_frame_hook(port, skb);
1799 }
1800 #else
1801 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
1802 #endif
1803
1804 #ifdef CONFIG_NET_CLS_ACT
1805 /* TODO: Maybe we should just force sch_ingress to be compiled in
1806  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1807  * a compare and 2 stores extra right now if we dont have it on
1808  * but have CONFIG_NET_CLS_ACT
1809  * NOTE: This doesnt stop any functionality; if you dont have
1810  * the ingress scheduler, you just cant add policies on ingress.
1811  *
1812  */
1813 static int ing_filter(struct sk_buff *skb)
1814 {
1815         struct Qdisc *q;
1816         struct net_device *dev = skb->dev;
1817         int result = TC_ACT_OK;
1818         u32 ttl = G_TC_RTTL(skb->tc_verd);
1819
1820         if (MAX_RED_LOOP < ttl++) {
1821                 printk(KERN_WARNING
1822                        "Redir loop detected Dropping packet (%d->%d)\n",
1823                        skb->iif, dev->ifindex);
1824                 return TC_ACT_SHOT;
1825         }
1826
1827         skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1828         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1829
1830         spin_lock(&dev->ingress_lock);
1831         if ((q = dev->qdisc_ingress) != NULL)
1832                 result = q->enqueue(skb, q);
1833         spin_unlock(&dev->ingress_lock);
1834
1835         return result;
1836 }
1837
1838 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
1839                                          struct packet_type **pt_prev,
1840                                          int *ret, struct net_device *orig_dev)
1841 {
1842         if (!skb->dev->qdisc_ingress)
1843                 goto out;
1844
1845         if (*pt_prev) {
1846                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1847                 *pt_prev = NULL;
1848         } else {
1849                 /* Huh? Why does turning on AF_PACKET affect this? */
1850                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1851         }
1852
1853         switch (ing_filter(skb)) {
1854         case TC_ACT_SHOT:
1855         case TC_ACT_STOLEN:
1856                 kfree_skb(skb);
1857                 return NULL;
1858         }
1859
1860 out:
1861         skb->tc_verd = 0;
1862         return skb;
1863 }
1864 #endif
1865
1866 int netif_receive_skb(struct sk_buff *skb)
1867 {
1868         struct packet_type *ptype, *pt_prev;
1869         struct net_device *orig_dev;
1870         int ret = NET_RX_DROP;
1871         __be16 type;
1872
1873         /* if we've gotten here through NAPI, check netpoll */
1874         if (skb->dev->poll && netpoll_rx(skb))
1875                 return NET_RX_DROP;
1876
1877         if (!skb->tstamp.tv64)
1878                 net_timestamp(skb);
1879
1880         if (!skb->iif)
1881                 skb->iif = skb->dev->ifindex;
1882
1883         orig_dev = skb_bond(skb);
1884
1885         if (!orig_dev)
1886                 return NET_RX_DROP;
1887
1888         __get_cpu_var(netdev_rx_stat).total++;
1889
1890         skb_reset_network_header(skb);
1891         skb_reset_transport_header(skb);
1892         skb->mac_len = skb->network_header - skb->mac_header;
1893
1894         pt_prev = NULL;
1895
1896         rcu_read_lock();
1897
1898 #ifdef CONFIG_NET_CLS_ACT
1899         if (skb->tc_verd & TC_NCLS) {
1900                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1901                 goto ncls;
1902         }
1903 #endif
1904
1905         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1906                 if (!ptype->dev || ptype->dev == skb->dev) {
1907                         if (pt_prev)
1908                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1909                         pt_prev = ptype;
1910                 }
1911         }
1912
1913 #ifdef CONFIG_NET_CLS_ACT
1914         skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
1915         if (!skb)
1916                 goto out;
1917 ncls:
1918 #endif
1919
1920         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
1921         if (!skb)
1922                 goto out;
1923
1924         type = skb->protocol;
1925         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1926                 if (ptype->type == type &&
1927                     (!ptype->dev || ptype->dev == skb->dev)) {
1928                         if (pt_prev)
1929                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1930                         pt_prev = ptype;
1931                 }
1932         }
1933
1934         if (pt_prev) {
1935                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1936         } else {
1937                 kfree_skb(skb);
1938                 /* Jamal, now you will not able to escape explaining
1939                  * me how you were going to use this. :-)
1940                  */
1941                 ret = NET_RX_DROP;
1942         }
1943
1944 out:
1945         rcu_read_unlock();
1946         return ret;
1947 }
1948
1949 static int process_backlog(struct net_device *backlog_dev, int *budget)
1950 {
1951         int work = 0;
1952         int quota = min(backlog_dev->quota, *budget);
1953         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1954
1955         backlog_dev->weight = weight_p;
1956         for (;;) {
1957                 struct sk_buff *skb;
1958                 struct net_device *dev;
1959
1960                 local_irq_disable();
1961                 skb = __skb_dequeue(&queue->input_pkt_queue);
1962                 if (!skb)
1963                         goto job_done;
1964                 local_irq_enable();
1965
1966                 dev = skb->dev;
1967
1968                 netif_receive_skb(skb);
1969
1970                 dev_put(dev);
1971
1972                 work++;
1973
1974                 if (work >= quota)
1975                         break;
1976
1977         }
1978
1979         backlog_dev->quota -= work;
1980         *budget -= work;
1981         return -1;
1982
1983 job_done:
1984         backlog_dev->quota -= work;
1985         *budget -= work;
1986
1987         list_del(&backlog_dev->poll_list);
1988         smp_mb__before_clear_bit();
1989         netif_poll_enable(backlog_dev);
1990
1991         local_irq_enable();
1992         return 0;
1993 }
1994
1995 static void net_rx_action(struct softirq_action *h)
1996 {
1997         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1998         unsigned long start_time = jiffies;
1999         int budget = netdev_budget;
2000         void *have;
2001
2002         local_irq_disable();
2003
2004         while (!list_empty(&queue->poll_list)) {
2005                 struct net_device *dev;
2006
2007                 if (budget <= 0 || jiffies - start_time > 1)
2008                         goto softnet_break;
2009
2010                 local_irq_enable();
2011
2012                 dev = list_entry(queue->poll_list.next,
2013                                  struct net_device, poll_list);
2014                 have = netpoll_poll_lock(dev);
2015
2016                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
2017                         netpoll_poll_unlock(have);
2018                         local_irq_disable();
2019                         list_move_tail(&dev->poll_list, &queue->poll_list);
2020                         if (dev->quota < 0)
2021                                 dev->quota += dev->weight;
2022                         else
2023                                 dev->quota = dev->weight;
2024                 } else {
2025                         netpoll_poll_unlock(have);
2026                         dev_put(dev);
2027                         local_irq_disable();
2028                 }
2029         }
2030 out:
2031         local_irq_enable();
2032 #ifdef CONFIG_NET_DMA
2033         /*
2034          * There may not be any more sk_buffs coming right now, so push
2035          * any pending DMA copies to hardware
2036          */
2037         if (net_dma_client) {
2038                 struct dma_chan *chan;
2039                 rcu_read_lock();
2040                 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
2041                         dma_async_memcpy_issue_pending(chan);
2042                 rcu_read_unlock();
2043         }
2044 #endif
2045         return;
2046
2047 softnet_break:
2048         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2049         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2050         goto out;
2051 }
2052
2053 static gifconf_func_t * gifconf_list [NPROTO];
2054
2055 /**
2056  *      register_gifconf        -       register a SIOCGIF handler
2057  *      @family: Address family
2058  *      @gifconf: Function handler
2059  *
2060  *      Register protocol dependent address dumping routines. The handler
2061  *      that is passed must not be freed or reused until it has been replaced
2062  *      by another handler.
2063  */
2064 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2065 {
2066         if (family >= NPROTO)
2067                 return -EINVAL;
2068         gifconf_list[family] = gifconf;
2069         return 0;
2070 }
2071
2072
2073 /*
2074  *      Map an interface index to its name (SIOCGIFNAME)
2075  */
2076
2077 /*
2078  *      We need this ioctl for efficient implementation of the
2079  *      if_indextoname() function required by the IPv6 API.  Without
2080  *      it, we would have to search all the interfaces to find a
2081  *      match.  --pb
2082  */
2083
2084 static int dev_ifname(struct ifreq __user *arg)
2085 {
2086         struct net_device *dev;
2087         struct ifreq ifr;
2088
2089         /*
2090          *      Fetch the caller's info block.
2091          */
2092
2093         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2094                 return -EFAULT;
2095
2096         read_lock(&dev_base_lock);
2097         dev = __dev_get_by_index(ifr.ifr_ifindex);
2098         if (!dev) {
2099                 read_unlock(&dev_base_lock);
2100                 return -ENODEV;
2101         }
2102
2103         strcpy(ifr.ifr_name, dev->name);
2104         read_unlock(&dev_base_lock);
2105
2106         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2107                 return -EFAULT;
2108         return 0;
2109 }
2110
2111 /*
2112  *      Perform a SIOCGIFCONF call. This structure will change
2113  *      size eventually, and there is nothing I can do about it.
2114  *      Thus we will need a 'compatibility mode'.
2115  */
2116
2117 static int dev_ifconf(char __user *arg)
2118 {
2119         struct ifconf ifc;
2120         struct net_device *dev;
2121         char __user *pos;
2122         int len;
2123         int total;
2124         int i;
2125
2126         /*
2127          *      Fetch the caller's info block.
2128          */
2129
2130         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2131                 return -EFAULT;
2132
2133         pos = ifc.ifc_buf;
2134         len = ifc.ifc_len;
2135
2136         /*
2137          *      Loop over the interfaces, and write an info block for each.
2138          */
2139
2140         total = 0;
2141         for_each_netdev(dev) {
2142                 for (i = 0; i < NPROTO; i++) {
2143                         if (gifconf_list[i]) {
2144                                 int done;
2145                                 if (!pos)
2146                                         done = gifconf_list[i](dev, NULL, 0);
2147                                 else
2148                                         done = gifconf_list[i](dev, pos + total,
2149                                                                len - total);
2150                                 if (done < 0)
2151                                         return -EFAULT;
2152                                 total += done;
2153                         }
2154                 }
2155         }
2156
2157         /*
2158          *      All done.  Write the updated control block back to the caller.
2159          */
2160         ifc.ifc_len = total;
2161
2162         /*
2163          *      Both BSD and Solaris return 0 here, so we do too.
2164          */
2165         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2166 }
2167
2168 #ifdef CONFIG_PROC_FS
2169 /*
2170  *      This is invoked by the /proc filesystem handler to display a device
2171  *      in detail.
2172  */
2173 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2174 {
2175         loff_t off;
2176         struct net_device *dev;
2177
2178         read_lock(&dev_base_lock);
2179         if (!*pos)
2180                 return SEQ_START_TOKEN;
2181
2182         off = 1;
2183         for_each_netdev(dev)
2184                 if (off++ == *pos)
2185                         return dev;
2186
2187         return NULL;
2188 }
2189
2190 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2191 {
2192         ++*pos;
2193         return v == SEQ_START_TOKEN ?
2194                 first_net_device() : next_net_device((struct net_device *)v);
2195 }
2196
2197 void dev_seq_stop(struct seq_file *seq, void *v)
2198 {
2199         read_unlock(&dev_base_lock);
2200 }
2201
2202 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2203 {
2204         struct net_device_stats *stats = dev->get_stats(dev);
2205
2206         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2207                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2208                    dev->name, stats->rx_bytes, stats->rx_packets,
2209                    stats->rx_errors,
2210                    stats->rx_dropped + stats->rx_missed_errors,
2211                    stats->rx_fifo_errors,
2212                    stats->rx_length_errors + stats->rx_over_errors +
2213                     stats->rx_crc_errors + stats->rx_frame_errors,
2214                    stats->rx_compressed, stats->multicast,
2215                    stats->tx_bytes, stats->tx_packets,
2216                    stats->tx_errors, stats->tx_dropped,
2217                    stats->tx_fifo_errors, stats->collisions,
2218                    stats->tx_carrier_errors +
2219                     stats->tx_aborted_errors +
2220                     stats->tx_window_errors +
2221                     stats->tx_heartbeat_errors,
2222                    stats->tx_compressed);
2223 }
2224
2225 /*
2226  *      Called from the PROCfs module. This now uses the new arbitrary sized
2227  *      /proc/net interface to create /proc/net/dev
2228  */
2229 static int dev_seq_show(struct seq_file *seq, void *v)
2230 {
2231         if (v == SEQ_START_TOKEN)
2232                 seq_puts(seq, "Inter-|   Receive                            "
2233                               "                    |  Transmit\n"
2234                               " face |bytes    packets errs drop fifo frame "
2235                               "compressed multicast|bytes    packets errs "
2236                               "drop fifo colls carrier compressed\n");
2237         else
2238                 dev_seq_printf_stats(seq, v);
2239         return 0;
2240 }
2241
2242 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2243 {
2244         struct netif_rx_stats *rc = NULL;
2245
2246         while (*pos < NR_CPUS)
2247                 if (cpu_online(*pos)) {
2248                         rc = &per_cpu(netdev_rx_stat, *pos);
2249                         break;
2250                 } else
2251                         ++*pos;
2252         return rc;
2253 }
2254
2255 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2256 {
2257         return softnet_get_online(pos);
2258 }
2259
2260 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2261 {
2262         ++*pos;
2263         return softnet_get_online(pos);
2264 }
2265
2266 static void softnet_seq_stop(struct seq_file *seq, void *v)
2267 {
2268 }
2269
2270 static int softnet_seq_show(struct seq_file *seq, void *v)
2271 {
2272         struct netif_rx_stats *s = v;
2273
2274         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2275                    s->total, s->dropped, s->time_squeeze, 0,
2276                    0, 0, 0, 0, /* was fastroute */
2277                    s->cpu_collision );
2278         return 0;
2279 }
2280
2281 static const struct seq_operations dev_seq_ops = {
2282         .start = dev_seq_start,
2283         .next  = dev_seq_next,
2284         .stop  = dev_seq_stop,
2285         .show  = dev_seq_show,
2286 };
2287
2288 static int dev_seq_open(struct inode *inode, struct file *file)
2289 {
2290         return seq_open(file, &dev_seq_ops);
2291 }
2292
2293 static const struct file_operations dev_seq_fops = {
2294         .owner   = THIS_MODULE,
2295         .open    = dev_seq_open,
2296         .read    = seq_read,
2297         .llseek  = seq_lseek,
2298         .release = seq_release,
2299 };
2300
2301 static const struct seq_operations softnet_seq_ops = {
2302         .start = softnet_seq_start,
2303         .next  = softnet_seq_next,
2304         .stop  = softnet_seq_stop,
2305         .show  = softnet_seq_show,
2306 };
2307
2308 static int softnet_seq_open(struct inode *inode, struct file *file)
2309 {
2310         return seq_open(file, &softnet_seq_ops);
2311 }
2312
2313 static const struct file_operations softnet_seq_fops = {
2314         .owner   = THIS_MODULE,
2315         .open    = softnet_seq_open,
2316         .read    = seq_read,
2317         .llseek  = seq_lseek,
2318         .release = seq_release,
2319 };
2320
2321 static void *ptype_get_idx(loff_t pos)
2322 {
2323         struct packet_type *pt = NULL;
2324         loff_t i = 0;
2325         int t;
2326
2327         list_for_each_entry_rcu(pt, &ptype_all, list) {
2328                 if (i == pos)
2329                         return pt;
2330                 ++i;
2331         }
2332
2333         for (t = 0; t < 16; t++) {
2334                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2335                         if (i == pos)
2336                                 return pt;
2337                         ++i;
2338                 }
2339         }
2340         return NULL;
2341 }
2342
2343 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2344 {
2345         rcu_read_lock();
2346         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2347 }
2348
2349 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2350 {
2351         struct packet_type *pt;
2352         struct list_head *nxt;
2353         int hash;
2354
2355         ++*pos;
2356         if (v == SEQ_START_TOKEN)
2357                 return ptype_get_idx(0);
2358
2359         pt = v;
2360         nxt = pt->list.next;
2361         if (pt->type == htons(ETH_P_ALL)) {
2362                 if (nxt != &ptype_all)
2363                         goto found;
2364                 hash = 0;
2365                 nxt = ptype_base[0].next;
2366         } else
2367                 hash = ntohs(pt->type) & 15;
2368
2369         while (nxt == &ptype_base[hash]) {
2370                 if (++hash >= 16)
2371                         return NULL;
2372                 nxt = ptype_base[hash].next;
2373         }
2374 found:
2375         return list_entry(nxt, struct packet_type, list);
2376 }
2377
2378 static void ptype_seq_stop(struct seq_file *seq, void *v)
2379 {
2380         rcu_read_unlock();
2381 }
2382
2383 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2384 {
2385 #ifdef CONFIG_KALLSYMS
2386         unsigned long offset = 0, symsize;
2387         const char *symname;
2388         char *modname;
2389         char namebuf[128];
2390
2391         symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2392                                   &modname, namebuf);
2393
2394         if (symname) {
2395                 char *delim = ":";
2396
2397                 if (!modname)
2398                         modname = delim = "";
2399                 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2400                            symname, offset);
2401                 return;
2402         }
2403 #endif
2404
2405         seq_printf(seq, "[%p]", sym);
2406 }
2407
2408 static int ptype_seq_show(struct seq_file *seq, void *v)
2409 {
2410         struct packet_type *pt = v;
2411
2412         if (v == SEQ_START_TOKEN)
2413                 seq_puts(seq, "Type Device      Function\n");
2414         else {
2415                 if (pt->type == htons(ETH_P_ALL))
2416                         seq_puts(seq, "ALL ");
2417                 else
2418                         seq_printf(seq, "%04x", ntohs(pt->type));
2419
2420                 seq_printf(seq, " %-8s ",
2421                            pt->dev ? pt->dev->name : "");
2422                 ptype_seq_decode(seq,  pt->func);
2423                 seq_putc(seq, '\n');
2424         }
2425
2426         return 0;
2427 }
2428
2429 static const struct seq_operations ptype_seq_ops = {
2430         .start = ptype_seq_start,
2431         .next  = ptype_seq_next,
2432         .stop  = ptype_seq_stop,
2433         .show  = ptype_seq_show,
2434 };
2435
2436 static int ptype_seq_open(struct inode *inode, struct file *file)
2437 {
2438         return seq_open(file, &ptype_seq_ops);
2439 }
2440
2441 static const struct file_operations ptype_seq_fops = {
2442         .owner   = THIS_MODULE,
2443         .open    = ptype_seq_open,
2444         .read    = seq_read,
2445         .llseek  = seq_lseek,
2446         .release = seq_release,
2447 };
2448
2449
2450 static int __init dev_proc_init(void)
2451 {
2452         int rc = -ENOMEM;
2453
2454         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2455                 goto out;
2456         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2457                 goto out_dev;
2458         if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2459                 goto out_dev2;
2460
2461         if (wext_proc_init())
2462                 goto out_softnet;
2463         rc = 0;
2464 out:
2465         return rc;
2466 out_softnet:
2467         proc_net_remove("ptype");
2468 out_dev2:
2469         proc_net_remove("softnet_stat");
2470 out_dev:
2471         proc_net_remove("dev");
2472         goto out;
2473 }
2474 #else
2475 #define dev_proc_init() 0
2476 #endif  /* CONFIG_PROC_FS */
2477
2478
2479 /**
2480  *      netdev_set_master       -       set up master/slave pair
2481  *      @slave: slave device
2482  *      @master: new master device
2483  *
2484  *      Changes the master device of the slave. Pass %NULL to break the
2485  *      bonding. The caller must hold the RTNL semaphore. On a failure
2486  *      a negative errno code is returned. On success the reference counts
2487  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2488  *      function returns zero.
2489  */
2490 int netdev_set_master(struct net_device *slave, struct net_device *master)
2491 {
2492         struct net_device *old = slave->master;
2493
2494         ASSERT_RTNL();
2495
2496         if (master) {
2497                 if (old)
2498                         return -EBUSY;
2499                 dev_hold(master);
2500         }
2501
2502         slave->master = master;
2503
2504         if (old) {
2505                 synchronize_net();
2506                 dev_put(old);
2507         }
2508         if (master)
2509                 slave->flags |= IFF_SLAVE;
2510         else
2511                 slave->flags &= ~IFF_SLAVE;
2512
2513         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2514         return 0;
2515 }
2516
2517 /**
2518  *      dev_set_promiscuity     - update promiscuity count on a device
2519  *      @dev: device
2520  *      @inc: modifier
2521  *
2522  *      Add or remove promiscuity from a device. While the count in the device
2523  *      remains above zero the interface remains promiscuous. Once it hits zero
2524  *      the device reverts back to normal filtering operation. A negative inc
2525  *      value is used to drop promiscuity on the device.
2526  *      Return 0 if successful or a negative errno code on error.
2527  */
2528 int dev_set_promiscuity(struct net_device *dev, int inc)
2529 {
2530         unsigned short old_flags = dev->flags;
2531
2532         dev->flags |= IFF_PROMISC;
2533         dev->promiscuity += inc;
2534         if (dev->promiscuity == 0) {
2535                 /*
2536                  * Avoid overflow.
2537                  * If inc causes overflow, untouch promisc and return error.
2538                  */
2539                 if (inc < 0)
2540                         dev->flags &= ~IFF_PROMISC;
2541                 else {
2542                         dev->promiscuity -= inc;
2543                         printk(KERN_WARNING "%s: promiscuity touches roof, "
2544                                 "set promiscuity failed, promiscuity feature "
2545                                 "of device might be broken.\n", dev->name);
2546                         return -EOVERFLOW;
2547                 }
2548         }
2549         if (dev->flags != old_flags) {
2550                 dev_mc_upload(dev);
2551                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2552                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2553                                                                "left");
2554                 audit_log(current->audit_context, GFP_ATOMIC,
2555                         AUDIT_ANOM_PROMISCUOUS,
2556                         "dev=%s prom=%d old_prom=%d auid=%u",
2557                         dev->name, (dev->flags & IFF_PROMISC),
2558                         (old_flags & IFF_PROMISC),
2559                         audit_get_loginuid(current->audit_context));
2560         }
2561         return 0;
2562 }
2563
2564 /**
2565  *      dev_set_allmulti        - update allmulti count on a device
2566  *      @dev: device
2567  *      @inc: modifier
2568  *
2569  *      Add or remove reception of all multicast frames to a device. While the
2570  *      count in the device remains above zero the interface remains listening
2571  *      to all interfaces. Once it hits zero the device reverts back to normal
2572  *      filtering operation. A negative @inc value is used to drop the counter
2573  *      when releasing a resource needing all multicasts.
2574  *      Return 0 if successful or a negative errno code on error.
2575  */
2576
2577 int dev_set_allmulti(struct net_device *dev, int inc)
2578 {
2579         unsigned short old_flags = dev->flags;
2580
2581         dev->flags |= IFF_ALLMULTI;
2582         dev->allmulti += inc;
2583         if (dev->allmulti == 0) {
2584                 /*
2585                  * Avoid overflow.
2586                  * If inc causes overflow, untouch allmulti and return error.
2587                  */
2588                 if (inc < 0)
2589                         dev->flags &= ~IFF_ALLMULTI;
2590                 else {
2591                         dev->allmulti -= inc;
2592                         printk(KERN_WARNING "%s: allmulti touches roof, "
2593                                 "set allmulti failed, allmulti feature of "
2594                                 "device might be broken.\n", dev->name);
2595                         return -EOVERFLOW;
2596                 }
2597         }
2598         if (dev->flags ^ old_flags)
2599                 dev_mc_upload(dev);
2600         return 0;
2601 }
2602
2603 unsigned dev_get_flags(const struct net_device *dev)
2604 {
2605         unsigned flags;
2606
2607         flags = (dev->flags & ~(IFF_PROMISC |
2608                                 IFF_ALLMULTI |
2609                                 IFF_RUNNING |
2610                                 IFF_LOWER_UP |
2611                                 IFF_DORMANT)) |
2612                 (dev->gflags & (IFF_PROMISC |
2613                                 IFF_ALLMULTI));
2614
2615         if (netif_running(dev)) {
2616                 if (netif_oper_up(dev))
2617                         flags |= IFF_RUNNING;
2618                 if (netif_carrier_ok(dev))
2619                         flags |= IFF_LOWER_UP;
2620                 if (netif_dormant(dev))
2621                         flags |= IFF_DORMANT;
2622         }
2623
2624         return flags;
2625 }
2626
2627 int dev_change_flags(struct net_device *dev, unsigned flags)
2628 {
2629         int ret, changes;
2630         int old_flags = dev->flags;
2631
2632         /*
2633          *      Set the flags on our device.
2634          */
2635
2636         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2637                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2638                                IFF_AUTOMEDIA)) |
2639                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2640                                     IFF_ALLMULTI));
2641
2642         /*
2643          *      Load in the correct multicast list now the flags have changed.
2644          */
2645
2646         dev_mc_upload(dev);
2647
2648         /*
2649          *      Have we downed the interface. We handle IFF_UP ourselves
2650          *      according to user attempts to set it, rather than blindly
2651          *      setting it.
2652          */
2653
2654         ret = 0;
2655         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2656                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2657
2658                 if (!ret)
2659                         dev_mc_upload(dev);
2660         }
2661
2662         if (dev->flags & IFF_UP &&
2663             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2664                                           IFF_VOLATILE)))
2665                 raw_notifier_call_chain(&netdev_chain,
2666                                 NETDEV_CHANGE, dev);
2667
2668         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2669                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2670                 dev->gflags ^= IFF_PROMISC;
2671                 dev_set_promiscuity(dev, inc);
2672         }
2673
2674         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2675            is important. Some (broken) drivers set IFF_PROMISC, when
2676            IFF_ALLMULTI is requested not asking us and not reporting.
2677          */
2678         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2679                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2680                 dev->gflags ^= IFF_ALLMULTI;
2681                 dev_set_allmulti(dev, inc);
2682         }
2683
2684         /* Exclude state transition flags, already notified */
2685         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
2686         if (changes)
2687                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
2688
2689         return ret;
2690 }
2691
2692 int dev_set_mtu(struct net_device *dev, int new_mtu)
2693 {
2694         int err;
2695
2696         if (new_mtu == dev->mtu)
2697                 return 0;
2698
2699         /*      MTU must be positive.    */
2700         if (new_mtu < 0)
2701                 return -EINVAL;
2702
2703         if (!netif_device_present(dev))
2704                 return -ENODEV;
2705
2706         err = 0;
2707         if (dev->change_mtu)
2708                 err = dev->change_mtu(dev, new_mtu);
2709         else
2710                 dev->mtu = new_mtu;
2711         if (!err && dev->flags & IFF_UP)
2712                 raw_notifier_call_chain(&netdev_chain,
2713                                 NETDEV_CHANGEMTU, dev);
2714         return err;
2715 }
2716
2717 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2718 {
2719         int err;
2720
2721         if (!dev->set_mac_address)
2722                 return -EOPNOTSUPP;
2723         if (sa->sa_family != dev->type)
2724                 return -EINVAL;
2725         if (!netif_device_present(dev))
2726                 return -ENODEV;
2727         err = dev->set_mac_address(dev, sa);
2728         if (!err)
2729                 raw_notifier_call_chain(&netdev_chain,
2730                                 NETDEV_CHANGEADDR, dev);
2731         return err;
2732 }
2733
2734 /*
2735  *      Perform the SIOCxIFxxx calls.
2736  */
2737 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2738 {
2739         int err;
2740         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2741
2742         if (!dev)
2743                 return -ENODEV;
2744
2745         switch (cmd) {
2746                 case SIOCGIFFLAGS:      /* Get interface flags */
2747                         ifr->ifr_flags = dev_get_flags(dev);
2748                         return 0;
2749
2750                 case SIOCSIFFLAGS:      /* Set interface flags */
2751                         return dev_change_flags(dev, ifr->ifr_flags);
2752
2753                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2754                                            (currently unused) */
2755                         ifr->ifr_metric = 0;
2756                         return 0;
2757
2758                 case SIOCSIFMETRIC:     /* Set the metric on the interface
2759                                            (currently unused) */
2760                         return -EOPNOTSUPP;
2761
2762                 case SIOCGIFMTU:        /* Get the MTU of a device */
2763                         ifr->ifr_mtu = dev->mtu;
2764                         return 0;
2765
2766                 case SIOCSIFMTU:        /* Set the MTU of a device */
2767                         return dev_set_mtu(dev, ifr->ifr_mtu);
2768
2769                 case SIOCGIFHWADDR:
2770                         if (!dev->addr_len)
2771                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2772                         else
2773                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2774                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2775                         ifr->ifr_hwaddr.sa_family = dev->type;
2776                         return 0;
2777
2778                 case SIOCSIFHWADDR:
2779                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2780
2781                 case SIOCSIFHWBROADCAST:
2782                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2783                                 return -EINVAL;
2784                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2785                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2786                         raw_notifier_call_chain(&netdev_chain,
2787                                             NETDEV_CHANGEADDR, dev);
2788                         return 0;
2789
2790                 case SIOCGIFMAP:
2791                         ifr->ifr_map.mem_start = dev->mem_start;
2792                         ifr->ifr_map.mem_end   = dev->mem_end;
2793                         ifr->ifr_map.base_addr = dev->base_addr;
2794                         ifr->ifr_map.irq       = dev->irq;
2795                         ifr->ifr_map.dma       = dev->dma;
2796                         ifr->ifr_map.port      = dev->if_port;
2797                         return 0;
2798
2799                 case SIOCSIFMAP:
2800                         if (dev->set_config) {
2801                                 if (!netif_device_present(dev))
2802                                         return -ENODEV;
2803                                 return dev->set_config(dev, &ifr->ifr_map);
2804                         }
2805                         return -EOPNOTSUPP;
2806
2807                 case SIOCADDMULTI:
2808                         if (!dev->set_multicast_list ||
2809                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2810                                 return -EINVAL;
2811                         if (!netif_device_present(dev))
2812                                 return -ENODEV;
2813                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2814                                           dev->addr_len, 1);
2815
2816                 case SIOCDELMULTI:
2817                         if (!dev->set_multicast_list ||
2818                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2819                                 return -EINVAL;
2820                         if (!netif_device_present(dev))
2821                                 return -ENODEV;
2822                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2823                                              dev->addr_len, 1);
2824
2825                 case SIOCGIFINDEX:
2826                         ifr->ifr_ifindex = dev->ifindex;
2827                         return 0;
2828
2829                 case SIOCGIFTXQLEN:
2830                         ifr->ifr_qlen = dev->tx_queue_len;
2831                         return 0;
2832
2833                 case SIOCSIFTXQLEN:
2834                         if (ifr->ifr_qlen < 0)
2835                                 return -EINVAL;
2836                         dev->tx_queue_len = ifr->ifr_qlen;
2837                         return 0;
2838
2839                 case SIOCSIFNAME:
2840                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2841                         return dev_change_name(dev, ifr->ifr_newname);
2842
2843                 /*
2844                  *      Unknown or private ioctl
2845                  */
2846
2847                 default:
2848                         if ((cmd >= SIOCDEVPRIVATE &&
2849                             cmd <= SIOCDEVPRIVATE + 15) ||
2850                             cmd == SIOCBONDENSLAVE ||
2851                             cmd == SIOCBONDRELEASE ||
2852                             cmd == SIOCBONDSETHWADDR ||
2853                             cmd == SIOCBONDSLAVEINFOQUERY ||
2854                             cmd == SIOCBONDINFOQUERY ||
2855                             cmd == SIOCBONDCHANGEACTIVE ||
2856                             cmd == SIOCGMIIPHY ||
2857                             cmd == SIOCGMIIREG ||
2858                             cmd == SIOCSMIIREG ||
2859                             cmd == SIOCBRADDIF ||
2860                             cmd == SIOCBRDELIF ||
2861                             cmd == SIOCWANDEV) {
2862                                 err = -EOPNOTSUPP;
2863                                 if (dev->do_ioctl) {
2864                                         if (netif_device_present(dev))
2865                                                 err = dev->do_ioctl(dev, ifr,
2866                                                                     cmd);
2867                                         else
2868                                                 err = -ENODEV;
2869                                 }
2870                         } else
2871                                 err = -EINVAL;
2872
2873         }
2874         return err;
2875 }
2876
2877 /*
2878  *      This function handles all "interface"-type I/O control requests. The actual
2879  *      'doing' part of this is dev_ifsioc above.
2880  */
2881
2882 /**
2883  *      dev_ioctl       -       network device ioctl
2884  *      @cmd: command to issue
2885  *      @arg: pointer to a struct ifreq in user space
2886  *
2887  *      Issue ioctl functions to devices. This is normally called by the
2888  *      user space syscall interfaces but can sometimes be useful for
2889  *      other purposes. The return value is the return from the syscall if
2890  *      positive or a negative errno code on error.
2891  */
2892
2893 int dev_ioctl(unsigned int cmd, void __user *arg)
2894 {
2895         struct ifreq ifr;
2896         int ret;
2897         char *colon;
2898
2899         /* One special case: SIOCGIFCONF takes ifconf argument
2900            and requires shared lock, because it sleeps writing
2901            to user space.
2902          */
2903
2904         if (cmd == SIOCGIFCONF) {
2905                 rtnl_lock();
2906                 ret = dev_ifconf((char __user *) arg);
2907                 rtnl_unlock();
2908                 return ret;
2909         }
2910         if (cmd == SIOCGIFNAME)
2911                 return dev_ifname((struct ifreq __user *)arg);
2912
2913         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2914                 return -EFAULT;
2915
2916         ifr.ifr_name[IFNAMSIZ-1] = 0;
2917
2918         colon = strchr(ifr.ifr_name, ':');
2919         if (colon)
2920                 *colon = 0;
2921
2922         /*
2923          *      See which interface the caller is talking about.
2924          */
2925
2926         switch (cmd) {
2927                 /*
2928                  *      These ioctl calls:
2929                  *      - can be done by all.
2930                  *      - atomic and do not require locking.
2931                  *      - return a value
2932                  */
2933                 case SIOCGIFFLAGS:
2934                 case SIOCGIFMETRIC:
2935                 case SIOCGIFMTU:
2936                 case SIOCGIFHWADDR:
2937                 case SIOCGIFSLAVE:
2938                 case SIOCGIFMAP:
2939                 case SIOCGIFINDEX:
2940                 case SIOCGIFTXQLEN:
2941                         dev_load(ifr.ifr_name);
2942                         read_lock(&dev_base_lock);
2943                         ret = dev_ifsioc(&ifr, cmd);
2944                         read_unlock(&dev_base_lock);
2945                         if (!ret) {
2946                                 if (colon)
2947                                         *colon = ':';
2948                                 if (copy_to_user(arg, &ifr,
2949                                                  sizeof(struct ifreq)))
2950                                         ret = -EFAULT;
2951                         }
2952                         return ret;
2953
2954                 case SIOCETHTOOL:
2955                         dev_load(ifr.ifr_name);
2956                         rtnl_lock();
2957                         ret = dev_ethtool(&ifr);
2958                         rtnl_unlock();
2959                         if (!ret) {
2960                                 if (colon)
2961                                         *colon = ':';
2962                                 if (copy_to_user(arg, &ifr,
2963                                                  sizeof(struct ifreq)))
2964                                         ret = -EFAULT;
2965                         }
2966                         return ret;
2967
2968                 /*
2969                  *      These ioctl calls:
2970                  *      - require superuser power.
2971                  *      - require strict serialization.
2972                  *      - return a value
2973                  */
2974                 case SIOCGMIIPHY:
2975                 case SIOCGMIIREG:
2976                 case SIOCSIFNAME:
2977                         if (!capable(CAP_NET_ADMIN))
2978                                 return -EPERM;
2979                         dev_load(ifr.ifr_name);
2980                         rtnl_lock();
2981                         ret = dev_ifsioc(&ifr, cmd);
2982                         rtnl_unlock();
2983                         if (!ret) {
2984                                 if (colon)
2985                                         *colon = ':';
2986                                 if (copy_to_user(arg, &ifr,
2987                                                  sizeof(struct ifreq)))
2988                                         ret = -EFAULT;
2989                         }
2990                         return ret;
2991
2992                 /*
2993                  *      These ioctl calls:
2994                  *      - require superuser power.
2995                  *      - require strict serialization.
2996                  *      - do not return a value
2997                  */
2998                 case SIOCSIFFLAGS:
2999                 case SIOCSIFMETRIC:
3000                 case SIOCSIFMTU:
3001                 case SIOCSIFMAP:
3002                 case SIOCSIFHWADDR:
3003                 case SIOCSIFSLAVE:
3004                 case SIOCADDMULTI:
3005                 case SIOCDELMULTI:
3006                 case SIOCSIFHWBROADCAST:
3007                 case SIOCSIFTXQLEN:
3008                 case SIOCSMIIREG:
3009                 case SIOCBONDENSLAVE:
3010                 case SIOCBONDRELEASE:
3011                 case SIOCBONDSETHWADDR:
3012                 case SIOCBONDCHANGEACTIVE:
3013                 case SIOCBRADDIF:
3014                 case SIOCBRDELIF:
3015                         if (!capable(CAP_NET_ADMIN))
3016                                 return -EPERM;
3017                         /* fall through */
3018                 case SIOCBONDSLAVEINFOQUERY:
3019                 case SIOCBONDINFOQUERY:
3020                         dev_load(ifr.ifr_name);
3021                         rtnl_lock();
3022                         ret = dev_ifsioc(&ifr, cmd);
3023                         rtnl_unlock();
3024                         return ret;
3025
3026                 case SIOCGIFMEM:
3027                         /* Get the per device memory space. We can add this but
3028                          * currently do not support it */
3029                 case SIOCSIFMEM:
3030                         /* Set the per device memory buffer space.
3031                          * Not applicable in our case */
3032                 case SIOCSIFLINK:
3033                         return -ENOTTY;
3034
3035                 /*
3036                  *      Unknown or private ioctl.
3037                  */
3038                 default:
3039                         if (cmd == SIOCWANDEV ||
3040                             (cmd >= SIOCDEVPRIVATE &&
3041                              cmd <= SIOCDEVPRIVATE + 15)) {
3042                                 dev_load(ifr.ifr_name);
3043                                 rtnl_lock();
3044                                 ret = dev_ifsioc(&ifr, cmd);
3045                                 rtnl_unlock();
3046                                 if (!ret && copy_to_user(arg, &ifr,
3047                                                          sizeof(struct ifreq)))
3048                                         ret = -EFAULT;
3049                                 return ret;
3050                         }
3051                         /* Take care of Wireless Extensions */
3052                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3053                                 return wext_handle_ioctl(&ifr, cmd, arg);
3054                         return -ENOTTY;
3055         }
3056 }
3057
3058
3059 /**
3060  *      dev_new_index   -       allocate an ifindex
3061  *
3062  *      Returns a suitable unique value for a new device interface
3063  *      number.  The caller must hold the rtnl semaphore or the
3064  *      dev_base_lock to be sure it remains unique.
3065  */
3066 static int dev_new_index(void)
3067 {
3068         static int ifindex;
3069         for (;;) {
3070                 if (++ifindex <= 0)
3071                         ifindex = 1;
3072                 if (!__dev_get_by_index(ifindex))
3073                         return ifindex;
3074         }
3075 }
3076
3077 static int dev_boot_phase = 1;
3078
3079 /* Delayed registration/unregisteration */
3080 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3081
3082 static void net_set_todo(struct net_device *dev)
3083 {
3084         list_add_tail(&dev->todo_list, &net_todo_list);
3085 }
3086
3087 /**
3088  *      register_netdevice      - register a network device
3089  *      @dev: device to register
3090  *
3091  *      Take a completed network device structure and add it to the kernel
3092  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3093  *      chain. 0 is returned on success. A negative errno code is returned
3094  *      on a failure to set up the device, or if the name is a duplicate.
3095  *
3096  *      Callers must hold the rtnl semaphore. You may want
3097  *      register_netdev() instead of this.
3098  *
3099  *      BUGS:
3100  *      The locking appears insufficient to guarantee two parallel registers
3101  *      will not get the same name.
3102  */
3103
3104 int register_netdevice(struct net_device *dev)
3105 {
3106         struct hlist_head *head;
3107         struct hlist_node *p;
3108         int ret;
3109
3110         BUG_ON(dev_boot_phase);
3111         ASSERT_RTNL();
3112
3113         might_sleep();
3114
3115         /* When net_device's are persistent, this will be fatal. */
3116         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3117
3118         spin_lock_init(&dev->queue_lock);
3119         spin_lock_init(&dev->_xmit_lock);
3120         netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3121         dev->xmit_lock_owner = -1;
3122         spin_lock_init(&dev->ingress_lock);
3123
3124         dev->iflink = -1;
3125
3126         /* Init, if this function is available */
3127         if (dev->init) {
3128                 ret = dev->init(dev);
3129                 if (ret) {
3130                         if (ret > 0)
3131                                 ret = -EIO;
3132                         goto out;
3133                 }
3134         }
3135
3136         if (!dev_valid_name(dev->name)) {
3137                 ret = -EINVAL;
3138                 goto err_uninit;
3139         }
3140
3141         dev->ifindex = dev_new_index();
3142         if (dev->iflink == -1)
3143                 dev->iflink = dev->ifindex;
3144
3145         /* Check for existence of name */
3146         head = dev_name_hash(dev->name);
3147         hlist_for_each(p, head) {
3148                 struct net_device *d
3149                         = hlist_entry(p, struct net_device, name_hlist);
3150                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3151                         ret = -EEXIST;
3152                         goto err_uninit;
3153                 }
3154         }
3155
3156         /* Fix illegal SG+CSUM combinations. */
3157         if ((dev->features & NETIF_F_SG) &&
3158             !(dev->features & NETIF_F_ALL_CSUM)) {
3159                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3160                        dev->name);
3161                 dev->features &= ~NETIF_F_SG;
3162         }
3163
3164         /* TSO requires that SG is present as well. */
3165         if ((dev->features & NETIF_F_TSO) &&
3166             !(dev->features & NETIF_F_SG)) {
3167                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3168                        dev->name);
3169                 dev->features &= ~NETIF_F_TSO;
3170         }
3171         if (dev->features & NETIF_F_UFO) {
3172                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3173                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3174                                         "NETIF_F_HW_CSUM feature.\n",
3175                                                         dev->name);
3176                         dev->features &= ~NETIF_F_UFO;
3177                 }
3178                 if (!(dev->features & NETIF_F_SG)) {
3179                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3180                                         "NETIF_F_SG feature.\n",
3181                                         dev->name);
3182                         dev->features &= ~NETIF_F_UFO;
3183                 }
3184         }
3185
3186         /*
3187          *      nil rebuild_header routine,
3188          *      that should be never called and used as just bug trap.
3189          */
3190
3191         if (!dev->rebuild_header)
3192                 dev->rebuild_header = default_rebuild_header;
3193
3194         ret = netdev_register_sysfs(dev);
3195         if (ret)
3196                 goto err_uninit;
3197         dev->reg_state = NETREG_REGISTERED;
3198
3199         /*
3200          *      Default initial state at registry is that the
3201          *      device is present.
3202          */
3203
3204         set_bit(__LINK_STATE_PRESENT, &dev->state);
3205
3206         dev_init_scheduler(dev);
3207         write_lock_bh(&dev_base_lock);
3208         list_add_tail(&dev->dev_list, &dev_base_head);
3209         hlist_add_head(&dev->name_hlist, head);
3210         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3211         dev_hold(dev);
3212         write_unlock_bh(&dev_base_lock);
3213
3214         /* Notify protocols, that a new device appeared. */
3215         raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3216
3217         ret = 0;
3218
3219 out:
3220         return ret;
3221
3222 err_uninit:
3223         if (dev->uninit)
3224                 dev->uninit(dev);
3225         goto out;
3226 }
3227
3228 /**
3229  *      register_netdev - register a network device
3230  *      @dev: device to register
3231  *
3232  *      Take a completed network device structure and add it to the kernel
3233  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3234  *      chain. 0 is returned on success. A negative errno code is returned
3235  *      on a failure to set up the device, or if the name is a duplicate.
3236  *
3237  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
3238  *      and expands the device name if you passed a format string to
3239  *      alloc_netdev.
3240  */
3241 int register_netdev(struct net_device *dev)
3242 {
3243         int err;
3244
3245         rtnl_lock();
3246
3247         /*
3248          * If the name is a format string the caller wants us to do a
3249          * name allocation.
3250          */
3251         if (strchr(dev->name, '%')) {
3252                 err = dev_alloc_name(dev, dev->name);
3253                 if (err < 0)
3254                         goto out;
3255         }
3256
3257         err = register_netdevice(dev);
3258 out:
3259         rtnl_unlock();
3260         return err;
3261 }
3262 EXPORT_SYMBOL(register_netdev);
3263
3264 /*
3265  * netdev_wait_allrefs - wait until all references are gone.
3266  *
3267  * This is called when unregistering network devices.
3268  *
3269  * Any protocol or device that holds a reference should register
3270  * for netdevice notification, and cleanup and put back the
3271  * reference if they receive an UNREGISTER event.
3272  * We can get stuck here if buggy protocols don't correctly
3273  * call dev_put.
3274  */
3275 static void netdev_wait_allrefs(struct net_device *dev)
3276 {
3277         unsigned long rebroadcast_time, warning_time;
3278
3279         rebroadcast_time = warning_time = jiffies;
3280         while (atomic_read(&dev->refcnt) != 0) {
3281                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3282                         rtnl_lock();
3283
3284                         /* Rebroadcast unregister notification */
3285                         raw_notifier_call_chain(&netdev_chain,
3286                                             NETDEV_UNREGISTER, dev);
3287
3288                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3289                                      &dev->state)) {
3290                                 /* We must not have linkwatch events
3291                                  * pending on unregister. If this
3292                                  * happens, we simply run the queue
3293                                  * unscheduled, resulting in a noop
3294                                  * for this device.
3295                                  */
3296                                 linkwatch_run_queue();
3297                         }
3298
3299                         __rtnl_unlock();
3300
3301                         rebroadcast_time = jiffies;
3302                 }
3303
3304                 msleep(250);
3305
3306                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3307                         printk(KERN_EMERG "unregister_netdevice: "
3308                                "waiting for %s to become free. Usage "
3309                                "count = %d\n",
3310                                dev->name, atomic_read(&dev->refcnt));
3311                         warning_time = jiffies;
3312                 }
3313         }
3314 }
3315
3316 /* The sequence is:
3317  *
3318  *      rtnl_lock();
3319  *      ...
3320  *      register_netdevice(x1);
3321  *      register_netdevice(x2);
3322  *      ...
3323  *      unregister_netdevice(y1);
3324  *      unregister_netdevice(y2);
3325  *      ...
3326  *      rtnl_unlock();
3327  *      free_netdev(y1);
3328  *      free_netdev(y2);
3329  *
3330  * We are invoked by rtnl_unlock().
3331  * This allows us to deal with problems:
3332  * 1) We can delete sysfs objects which invoke hotplug
3333  *    without deadlocking with linkwatch via keventd.
3334  * 2) Since we run with the RTNL semaphore not held, we can sleep
3335  *    safely in order to wait for the netdev refcnt to drop to zero.
3336  *
3337  * We must not return until all unregister events added during
3338  * the interval the lock was held have been completed.
3339  */
3340 void netdev_run_todo(void)
3341 {
3342         struct list_head list;
3343
3344         /* Snapshot list, allow later requests */
3345         list_replace_init(&net_todo_list, &list);
3346
3347         __rtnl_unlock();
3348
3349         while (!list_empty(&list)) {
3350                 struct net_device *dev
3351                         = list_entry(list.next, struct net_device, todo_list);
3352                 list_del(&dev->todo_list);
3353
3354                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3355                         printk(KERN_ERR "network todo '%s' but state %d\n",
3356                                dev->name, dev->reg_state);
3357                         dump_stack();
3358                         continue;
3359                 }
3360
3361                 dev->reg_state = NETREG_UNREGISTERED;
3362
3363                 netdev_wait_allrefs(dev);
3364
3365                 /* paranoia */
3366                 BUG_ON(atomic_read(&dev->refcnt));
3367                 BUG_TRAP(!dev->ip_ptr);
3368                 BUG_TRAP(!dev->ip6_ptr);
3369                 BUG_TRAP(!dev->dn_ptr);
3370
3371                 if (dev->destructor)
3372                         dev->destructor(dev);
3373
3374                 /* Free network device */
3375                 kobject_put(&dev->dev.kobj);
3376         }
3377 }
3378
3379 static struct net_device_stats *internal_stats(struct net_device *dev)
3380 {
3381         return &dev->stats;
3382 }
3383
3384 /**
3385  *      alloc_netdev - allocate network device
3386  *      @sizeof_priv:   size of private data to allocate space for
3387  *      @name:          device name format string
3388  *      @setup:         callback to initialize device
3389  *
3390  *      Allocates a struct net_device with private data area for driver use
3391  *      and performs basic initialization.
3392  */
3393 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3394                 void (*setup)(struct net_device *))
3395 {
3396         struct net_device *dev;
3397         int alloc_size;
3398         struct net_device *p;
3399
3400         BUG_ON(strlen(name) >= sizeof(dev->name));
3401
3402         alloc_size = sizeof(struct net_device);
3403         if (sizeof_priv) {
3404                 /* ensure 32-byte alignment of private area */
3405                 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
3406                 alloc_size += sizeof_priv;
3407         }
3408         /* ensure 32-byte alignment of whole construct */
3409         alloc_size += NETDEV_ALIGN - 1;
3410
3411         p = kzalloc(alloc_size, GFP_KERNEL);
3412         if (!p) {
3413                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3414                 return NULL;
3415         }
3416
3417         dev = PTR_ALIGN(p, NETDEV_ALIGN);
3418         dev->padded = (char *)dev - (char *)p;
3419
3420         if (sizeof_priv)
3421                 dev->priv = netdev_priv(dev);
3422
3423         dev->get_stats = internal_stats;
3424         setup(dev);
3425         strcpy(dev->name, name);
3426         return dev;
3427 }
3428 EXPORT_SYMBOL(alloc_netdev);
3429
3430 /**
3431  *      free_netdev - free network device
3432  *      @dev: device
3433  *
3434  *      This function does the last stage of destroying an allocated device
3435  *      interface. The reference to the device object is released.
3436  *      If this is the last reference then it will be freed.
3437  */
3438 void free_netdev(struct net_device *dev)
3439 {
3440 #ifdef CONFIG_SYSFS
3441         /*  Compatibility with error handling in drivers */
3442         if (dev->reg_state == NETREG_UNINITIALIZED) {
3443                 kfree((char *)dev - dev->padded);
3444                 return;
3445         }
3446
3447         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3448         dev->reg_state = NETREG_RELEASED;
3449
3450         /* will free via device release */
3451         put_device(&dev->dev);
3452 #else
3453         kfree((char *)dev - dev->padded);
3454 #endif
3455 }
3456
3457 /* Synchronize with packet receive processing. */
3458 void synchronize_net(void)
3459 {
3460         might_sleep();
3461         synchronize_rcu();
3462 }
3463
3464 /**
3465  *      unregister_netdevice - remove device from the kernel
3466  *      @dev: device
3467  *
3468  *      This function shuts down a device interface and removes it
3469  *      from the kernel tables. On success 0 is returned, on a failure
3470  *      a negative errno code is returned.
3471  *
3472  *      Callers must hold the rtnl semaphore.  You may want
3473  *      unregister_netdev() instead of this.
3474  */
3475
3476 void unregister_netdevice(struct net_device *dev)
3477 {
3478         BUG_ON(dev_boot_phase);
3479         ASSERT_RTNL();
3480
3481         /* Some devices call without registering for initialization unwind. */
3482         if (dev->reg_state == NETREG_UNINITIALIZED) {
3483                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3484                                   "was registered\n", dev->name, dev);
3485
3486                 WARN_ON(1);
3487                 return;
3488         }
3489
3490         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3491
3492         /* If device is running, close it first. */
3493         if (dev->flags & IFF_UP)
3494                 dev_close(dev);
3495
3496         /* And unlink it from device chain. */
3497         write_lock_bh(&dev_base_lock);
3498         list_del(&dev->dev_list);
3499         hlist_del(&dev->name_hlist);
3500         hlist_del(&dev->index_hlist);
3501         write_unlock_bh(&dev_base_lock);
3502
3503         dev->reg_state = NETREG_UNREGISTERING;
3504
3505         synchronize_net();
3506
3507         /* Shutdown queueing discipline. */
3508         dev_shutdown(dev);
3509
3510
3511         /* Notify protocols, that we are about to destroy
3512            this device. They should clean all the things.
3513         */
3514         raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3515
3516         /*
3517          *      Flush the multicast chain
3518          */
3519         dev_mc_discard(dev);
3520
3521         if (dev->uninit)
3522                 dev->uninit(dev);
3523
3524         /* Notifier chain MUST detach us from master device. */
3525         BUG_TRAP(!dev->master);
3526
3527         /* Remove entries from sysfs */
3528         netdev_unregister_sysfs(dev);
3529
3530         /* Finish processing unregister after unlock */
3531         net_set_todo(dev);
3532
3533         synchronize_net();
3534
3535         dev_put(dev);
3536 }
3537
3538 /**
3539  *      unregister_netdev - remove device from the kernel
3540  *      @dev: device
3541  *
3542  *      This function shuts down a device interface and removes it
3543  *      from the kernel tables. On success 0 is returned, on a failure
3544  *      a negative errno code is returned.
3545  *
3546  *      This is just a wrapper for unregister_netdevice that takes
3547  *      the rtnl semaphore.  In general you want to use this and not
3548  *      unregister_netdevice.
3549  */
3550 void unregister_netdev(struct net_device *dev)
3551 {
3552         rtnl_lock();
3553         unregister_netdevice(dev);
3554         rtnl_unlock();
3555 }
3556
3557 EXPORT_SYMBOL(unregister_netdev);
3558
3559 static int dev_cpu_callback(struct notifier_block *nfb,
3560                             unsigned long action,
3561                             void *ocpu)
3562 {
3563         struct sk_buff **list_skb;
3564         struct net_device **list_net;
3565         struct sk_buff *skb;
3566         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3567         struct softnet_data *sd, *oldsd;
3568
3569         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3570                 return NOTIFY_OK;
3571
3572         local_irq_disable();
3573         cpu = smp_processor_id();
3574         sd = &per_cpu(softnet_data, cpu);
3575         oldsd = &per_cpu(softnet_data, oldcpu);
3576
3577         /* Find end of our completion_queue. */
3578         list_skb = &sd->completion_queue;
3579         while (*list_skb)
3580                 list_skb = &(*list_skb)->next;
3581         /* Append completion queue from offline CPU. */
3582         *list_skb = oldsd->completion_queue;
3583         oldsd->completion_queue = NULL;
3584
3585         /* Find end of our output_queue. */
3586         list_net = &sd->output_queue;
3587         while (*list_net)
3588                 list_net = &(*list_net)->next_sched;
3589         /* Append output queue from offline CPU. */
3590         *list_net = oldsd->output_queue;
3591         oldsd->output_queue = NULL;
3592
3593         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3594         local_irq_enable();
3595
3596         /* Process offline CPU's input_pkt_queue */
3597         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3598                 netif_rx(skb);
3599
3600         return NOTIFY_OK;
3601 }
3602
3603 #ifdef CONFIG_NET_DMA
3604 /**
3605  * net_dma_rebalance -
3606  * This is called when the number of channels allocated to the net_dma_client
3607  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3608  */
3609 static void net_dma_rebalance(void)
3610 {
3611         unsigned int cpu, i, n;
3612         struct dma_chan *chan;
3613
3614         if (net_dma_count == 0) {
3615                 for_each_online_cpu(cpu)
3616                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3617                 return;
3618         }
3619
3620         i = 0;
3621         cpu = first_cpu(cpu_online_map);
3622
3623         rcu_read_lock();
3624         list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3625                 n = ((num_online_cpus() / net_dma_count)
3626                    + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3627
3628                 while(n) {
3629                         per_cpu(softnet_data, cpu).net_dma = chan;
3630                         cpu = next_cpu(cpu, cpu_online_map);
3631                         n--;
3632                 }
3633                 i++;
3634         }
3635         rcu_read_unlock();
3636 }
3637
3638 /**
3639  * netdev_dma_event - event callback for the net_dma_client
3640  * @client: should always be net_dma_client
3641  * @chan: DMA channel for the event
3642  * @event: event type
3643  */
3644 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3645         enum dma_event event)
3646 {
3647         spin_lock(&net_dma_event_lock);
3648         switch (event) {
3649         case DMA_RESOURCE_ADDED:
3650                 net_dma_count++;
3651                 net_dma_rebalance();
3652                 break;
3653         case DMA_RESOURCE_REMOVED:
3654                 net_dma_count--;
3655                 net_dma_rebalance();
3656                 break;
3657         default:
3658                 break;
3659         }
3660         spin_unlock(&net_dma_event_lock);
3661 }
3662
3663 /**
3664  * netdev_dma_regiser - register the networking subsystem as a DMA client
3665  */
3666 static int __init netdev_dma_register(void)
3667 {
3668         spin_lock_init(&net_dma_event_lock);
3669         net_dma_client = dma_async_client_register(netdev_dma_event);
3670         if (net_dma_client == NULL)
3671                 return -ENOMEM;
3672
3673         dma_async_client_chan_request(net_dma_client, num_online_cpus());
3674         return 0;
3675 }
3676
3677 #else
3678 static int __init netdev_dma_register(void) { return -ENODEV; }
3679 #endif /* CONFIG_NET_DMA */
3680
3681 /**
3682  *      netdev_compute_feature - compute conjunction of two feature sets
3683  *      @all: first feature set
3684  *      @one: second feature set
3685  *
3686  *      Computes a new feature set after adding a device with feature set
3687  *      @one to the master device with current feature set @all.  Returns
3688  *      the new feature set.
3689  */
3690 int netdev_compute_features(unsigned long all, unsigned long one)
3691 {
3692         /* if device needs checksumming, downgrade to hw checksumming */
3693         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
3694                 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
3695
3696         /* if device can't do all checksum, downgrade to ipv4 */
3697         if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
3698                 all ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
3699
3700         if (one & NETIF_F_GSO)
3701                 one |= NETIF_F_GSO_SOFTWARE;
3702         one |= NETIF_F_GSO;
3703
3704         /* If even one device supports robust GSO, enable it for all. */
3705         if (one & NETIF_F_GSO_ROBUST)
3706                 all |= NETIF_F_GSO_ROBUST;
3707
3708         all &= one | NETIF_F_LLTX;
3709
3710         if (!(all & NETIF_F_ALL_CSUM))
3711                 all &= ~NETIF_F_SG;
3712         if (!(all & NETIF_F_SG))
3713                 all &= ~NETIF_F_GSO_MASK;
3714
3715         return all;
3716 }
3717 EXPORT_SYMBOL(netdev_compute_features);
3718
3719 /*
3720  *      Initialize the DEV module. At boot time this walks the device list and
3721  *      unhooks any devices that fail to initialise (normally hardware not
3722  *      present) and leaves us with a valid list of present and active devices.
3723  *
3724  */
3725
3726 /*
3727  *       This is called single threaded during boot, so no need
3728  *       to take the rtnl semaphore.
3729  */
3730 static int __init net_dev_init(void)
3731 {
3732         int i, rc = -ENOMEM;
3733
3734         BUG_ON(!dev_boot_phase);
3735
3736         if (dev_proc_init())
3737                 goto out;
3738
3739         if (netdev_sysfs_init())
3740                 goto out;
3741
3742         INIT_LIST_HEAD(&ptype_all);
3743         for (i = 0; i < 16; i++)
3744                 INIT_LIST_HEAD(&ptype_base[i]);
3745
3746         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3747                 INIT_HLIST_HEAD(&dev_name_head[i]);
3748
3749         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3750                 INIT_HLIST_HEAD(&dev_index_head[i]);
3751
3752         /*
3753          *      Initialise the packet receive queues.
3754          */
3755
3756         for_each_possible_cpu(i) {
3757                 struct softnet_data *queue;
3758
3759                 queue = &per_cpu(softnet_data, i);
3760                 skb_queue_head_init(&queue->input_pkt_queue);
3761                 queue->completion_queue = NULL;
3762                 INIT_LIST_HEAD(&queue->poll_list);
3763                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3764                 queue->backlog_dev.weight = weight_p;
3765                 queue->backlog_dev.poll = process_backlog;
3766                 atomic_set(&queue->backlog_dev.refcnt, 1);
3767         }
3768
3769         netdev_dma_register();
3770
3771         dev_boot_phase = 0;
3772
3773         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3774         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3775
3776         hotcpu_notifier(dev_cpu_callback, 0);
3777         dst_init();
3778         dev_mcast_init();
3779         rc = 0;
3780 out:
3781         return rc;
3782 }
3783
3784 subsys_initcall(net_dev_init);
3785
3786 EXPORT_SYMBOL(__dev_get_by_index);
3787 EXPORT_SYMBOL(__dev_get_by_name);
3788 EXPORT_SYMBOL(__dev_remove_pack);
3789 EXPORT_SYMBOL(dev_valid_name);
3790 EXPORT_SYMBOL(dev_add_pack);
3791 EXPORT_SYMBOL(dev_alloc_name);
3792 EXPORT_SYMBOL(dev_close);
3793 EXPORT_SYMBOL(dev_get_by_flags);
3794 EXPORT_SYMBOL(dev_get_by_index);
3795 EXPORT_SYMBOL(dev_get_by_name);
3796 EXPORT_SYMBOL(dev_open);
3797 EXPORT_SYMBOL(dev_queue_xmit);
3798 EXPORT_SYMBOL(dev_remove_pack);
3799 EXPORT_SYMBOL(dev_set_allmulti);
3800 EXPORT_SYMBOL(dev_set_promiscuity);
3801 EXPORT_SYMBOL(dev_change_flags);
3802 EXPORT_SYMBOL(dev_set_mtu);
3803 EXPORT_SYMBOL(dev_set_mac_address);
3804 EXPORT_SYMBOL(free_netdev);
3805 EXPORT_SYMBOL(netdev_boot_setup_check);
3806 EXPORT_SYMBOL(netdev_set_master);
3807 EXPORT_SYMBOL(netdev_state_change);
3808 EXPORT_SYMBOL(netif_receive_skb);
3809 EXPORT_SYMBOL(netif_rx);
3810 EXPORT_SYMBOL(register_gifconf);
3811 EXPORT_SYMBOL(register_netdevice);
3812 EXPORT_SYMBOL(register_netdevice_notifier);
3813 EXPORT_SYMBOL(skb_checksum_help);
3814 EXPORT_SYMBOL(synchronize_net);
3815 EXPORT_SYMBOL(unregister_netdevice);
3816 EXPORT_SYMBOL(unregister_netdevice_notifier);
3817 EXPORT_SYMBOL(net_enable_timestamp);
3818 EXPORT_SYMBOL(net_disable_timestamp);
3819 EXPORT_SYMBOL(dev_get_flags);
3820
3821 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3822 EXPORT_SYMBOL(br_handle_frame_hook);
3823 EXPORT_SYMBOL(br_fdb_get_hook);
3824 EXPORT_SYMBOL(br_fdb_put_hook);
3825 #endif
3826
3827 #ifdef CONFIG_KMOD
3828 EXPORT_SYMBOL(dev_load);
3829 #endif
3830
3831 EXPORT_PER_CPU_SYMBOL(softnet_data);