ixgbe: function comment typo
[linux-2.6/mini2440.git] / net / core / dev.c
blob4615e9a443aa6314a23112816485bc4885c5853d
1 /*
2 * NET3 Protocol independent device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
130 #include "net-sysfs.h"
133 * The list of packet types we will receive (as opposed to discard)
134 * and the routines to invoke.
136 * Why 16. Because with 16 the only overlap we get on a hash of the
137 * low nibble of the protocol value is RARP/SNAP/X.25.
139 * NOTE: That is no longer true with the addition of VLAN tags. Not
140 * sure which should go first, but I bet it won't make much
141 * difference if we are running VLANs. The good news is that
142 * this protocol won't be in the list unless compiled in, so
143 * the average user (w/out VLANs) will not be adversely affected.
144 * --BLG
146 * 0800 IP
147 * 8100 802.1Q VLAN
148 * 0001 802.3
149 * 0002 AX.25
150 * 0004 802.2
151 * 8035 RARP
152 * 0005 SNAP
153 * 0805 X.25
154 * 0806 ARP
155 * 8137 IPX
156 * 0009 Localtalk
157 * 86DD IPv6
160 #define PTYPE_HASH_SIZE (16)
161 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
163 static DEFINE_SPINLOCK(ptype_lock);
164 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
165 static struct list_head ptype_all __read_mostly; /* Taps */
167 #ifdef CONFIG_NET_DMA
168 struct net_dma {
169 struct dma_client client;
170 spinlock_t lock;
171 cpumask_t channel_mask;
172 struct dma_chan **channels;
175 static enum dma_state_client
176 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
177 enum dma_state state);
179 static struct net_dma net_dma = {
180 .client = {
181 .event_callback = netdev_dma_event,
184 #endif
187 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
188 * semaphore.
190 * Pure readers hold dev_base_lock for reading.
192 * Writers must hold the rtnl semaphore while they loop through the
193 * dev_base_head list, and hold dev_base_lock for writing when they do the
194 * actual updates. This allows pure readers to access the list even
195 * while a writer is preparing to update it.
197 * To put it another way, dev_base_lock is held for writing only to
198 * protect against pure readers; the rtnl semaphore provides the
199 * protection against other writers.
201 * See, for example usages, register_netdevice() and
202 * unregister_netdevice(), which must be called with the rtnl
203 * semaphore held.
205 DEFINE_RWLOCK(dev_base_lock);
207 EXPORT_SYMBOL(dev_base_lock);
209 #define NETDEV_HASHBITS 8
210 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
212 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
214 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
215 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
218 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
220 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
223 /* Device list insertion */
224 static int list_netdevice(struct net_device *dev)
226 struct net *net = dev_net(dev);
228 ASSERT_RTNL();
230 write_lock_bh(&dev_base_lock);
231 list_add_tail(&dev->dev_list, &net->dev_base_head);
232 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
233 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
234 write_unlock_bh(&dev_base_lock);
235 return 0;
238 /* Device list removal */
239 static void unlist_netdevice(struct net_device *dev)
241 ASSERT_RTNL();
243 /* Unlink dev from the device chain */
244 write_lock_bh(&dev_base_lock);
245 list_del(&dev->dev_list);
246 hlist_del(&dev->name_hlist);
247 hlist_del(&dev->index_hlist);
248 write_unlock_bh(&dev_base_lock);
252 * Our notifier list
255 static RAW_NOTIFIER_HEAD(netdev_chain);
258 * Device drivers call our routines to queue packets here. We empty the
259 * queue in the local softnet handler.
262 DEFINE_PER_CPU(struct softnet_data, softnet_data);
264 #ifdef CONFIG_LOCKDEP
266 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
267 * according to dev->type
269 static const unsigned short netdev_lock_type[] =
270 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
271 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
272 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
273 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
274 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
275 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
276 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
277 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
278 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
279 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
280 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
281 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
282 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
283 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
284 ARPHRD_NONE};
286 static const char *netdev_lock_name[] =
287 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
288 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
289 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
290 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
291 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
292 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
293 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
294 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
295 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
296 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
297 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
298 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
299 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
300 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
301 "_xmit_NONE"};
303 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
304 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
306 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
308 int i;
310 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
311 if (netdev_lock_type[i] == dev_type)
312 return i;
313 /* the last key is used by default */
314 return ARRAY_SIZE(netdev_lock_type) - 1;
317 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
318 unsigned short dev_type)
320 int i;
322 i = netdev_lock_pos(dev_type);
323 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
324 netdev_lock_name[i]);
327 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
329 int i;
331 i = netdev_lock_pos(dev->type);
332 lockdep_set_class_and_name(&dev->addr_list_lock,
333 &netdev_addr_lock_key[i],
334 netdev_lock_name[i]);
336 #else
337 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
338 unsigned short dev_type)
341 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
344 #endif
346 /*******************************************************************************
348 Protocol management and registration routines
350 *******************************************************************************/
353 * Add a protocol ID to the list. Now that the input handler is
354 * smarter we can dispense with all the messy stuff that used to be
355 * here.
357 * BEWARE!!! Protocol handlers, mangling input packets,
358 * MUST BE last in hash buckets and checking protocol handlers
359 * MUST start from promiscuous ptype_all chain in net_bh.
360 * It is true now, do not change it.
361 * Explanation follows: if protocol handler, mangling packet, will
362 * be the first on list, it is not able to sense, that packet
363 * is cloned and should be copied-on-write, so that it will
364 * change it and subsequent readers will get broken packet.
365 * --ANK (980803)
369 * dev_add_pack - add packet handler
370 * @pt: packet type declaration
372 * Add a protocol handler to the networking stack. The passed &packet_type
373 * is linked into kernel lists and may not be freed until it has been
374 * removed from the kernel lists.
376 * This call does not sleep therefore it can not
377 * guarantee all CPU's that are in middle of receiving packets
378 * will see the new packet type (until the next received packet).
381 void dev_add_pack(struct packet_type *pt)
383 int hash;
385 spin_lock_bh(&ptype_lock);
386 if (pt->type == htons(ETH_P_ALL))
387 list_add_rcu(&pt->list, &ptype_all);
388 else {
389 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
390 list_add_rcu(&pt->list, &ptype_base[hash]);
392 spin_unlock_bh(&ptype_lock);
396 * __dev_remove_pack - remove packet handler
397 * @pt: packet type declaration
399 * Remove a protocol handler that was previously added to the kernel
400 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
401 * from the kernel lists and can be freed or reused once this function
402 * returns.
404 * The packet type might still be in use by receivers
405 * and must not be freed until after all the CPU's have gone
406 * through a quiescent state.
408 void __dev_remove_pack(struct packet_type *pt)
410 struct list_head *head;
411 struct packet_type *pt1;
413 spin_lock_bh(&ptype_lock);
415 if (pt->type == htons(ETH_P_ALL))
416 head = &ptype_all;
417 else
418 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
420 list_for_each_entry(pt1, head, list) {
421 if (pt == pt1) {
422 list_del_rcu(&pt->list);
423 goto out;
427 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
428 out:
429 spin_unlock_bh(&ptype_lock);
432 * dev_remove_pack - remove packet handler
433 * @pt: packet type declaration
435 * Remove a protocol handler that was previously added to the kernel
436 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
437 * from the kernel lists and can be freed or reused once this function
438 * returns.
440 * This call sleeps to guarantee that no CPU is looking at the packet
441 * type after return.
443 void dev_remove_pack(struct packet_type *pt)
445 __dev_remove_pack(pt);
447 synchronize_net();
450 /******************************************************************************
452 Device Boot-time Settings Routines
454 *******************************************************************************/
456 /* Boot time configuration table */
457 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
460 * netdev_boot_setup_add - add new setup entry
461 * @name: name of the device
462 * @map: configured settings for the device
464 * Adds new setup entry to the dev_boot_setup list. The function
465 * returns 0 on error and 1 on success. This is a generic routine to
466 * all netdevices.
468 static int netdev_boot_setup_add(char *name, struct ifmap *map)
470 struct netdev_boot_setup *s;
471 int i;
473 s = dev_boot_setup;
474 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
475 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
476 memset(s[i].name, 0, sizeof(s[i].name));
477 strlcpy(s[i].name, name, IFNAMSIZ);
478 memcpy(&s[i].map, map, sizeof(s[i].map));
479 break;
483 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
487 * netdev_boot_setup_check - check boot time settings
488 * @dev: the netdevice
490 * Check boot time settings for the device.
491 * The found settings are set for the device to be used
492 * later in the device probing.
493 * Returns 0 if no settings found, 1 if they are.
495 int netdev_boot_setup_check(struct net_device *dev)
497 struct netdev_boot_setup *s = dev_boot_setup;
498 int i;
500 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
502 !strcmp(dev->name, s[i].name)) {
503 dev->irq = s[i].map.irq;
504 dev->base_addr = s[i].map.base_addr;
505 dev->mem_start = s[i].map.mem_start;
506 dev->mem_end = s[i].map.mem_end;
507 return 1;
510 return 0;
515 * netdev_boot_base - get address from boot time settings
516 * @prefix: prefix for network device
517 * @unit: id for network device
519 * Check boot time settings for the base address of device.
520 * The found settings are set for the device to be used
521 * later in the device probing.
522 * Returns 0 if no settings found.
524 unsigned long netdev_boot_base(const char *prefix, int unit)
526 const struct netdev_boot_setup *s = dev_boot_setup;
527 char name[IFNAMSIZ];
528 int i;
530 sprintf(name, "%s%d", prefix, unit);
533 * If device already registered then return base of 1
534 * to indicate not to probe for this interface
536 if (__dev_get_by_name(&init_net, name))
537 return 1;
539 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
540 if (!strcmp(name, s[i].name))
541 return s[i].map.base_addr;
542 return 0;
546 * Saves at boot time configured settings for any netdevice.
548 int __init netdev_boot_setup(char *str)
550 int ints[5];
551 struct ifmap map;
553 str = get_options(str, ARRAY_SIZE(ints), ints);
554 if (!str || !*str)
555 return 0;
557 /* Save settings */
558 memset(&map, 0, sizeof(map));
559 if (ints[0] > 0)
560 map.irq = ints[1];
561 if (ints[0] > 1)
562 map.base_addr = ints[2];
563 if (ints[0] > 2)
564 map.mem_start = ints[3];
565 if (ints[0] > 3)
566 map.mem_end = ints[4];
568 /* Add new entry to the list */
569 return netdev_boot_setup_add(str, &map);
572 __setup("netdev=", netdev_boot_setup);
574 /*******************************************************************************
576 Device Interface Subroutines
578 *******************************************************************************/
581 * __dev_get_by_name - find a device by its name
582 * @net: the applicable net namespace
583 * @name: name to find
585 * Find an interface by name. Must be called under RTNL semaphore
586 * or @dev_base_lock. If the name is found a pointer to the device
587 * is returned. If the name is not found then %NULL is returned. The
588 * reference counters are not incremented so the caller must be
589 * careful with locks.
592 struct net_device *__dev_get_by_name(struct net *net, const char *name)
594 struct hlist_node *p;
596 hlist_for_each(p, dev_name_hash(net, name)) {
597 struct net_device *dev
598 = hlist_entry(p, struct net_device, name_hlist);
599 if (!strncmp(dev->name, name, IFNAMSIZ))
600 return dev;
602 return NULL;
606 * dev_get_by_name - find a device by its name
607 * @net: the applicable net namespace
608 * @name: name to find
610 * Find an interface by name. This can be called from any
611 * context and does its own locking. The returned handle has
612 * the usage count incremented and the caller must use dev_put() to
613 * release it when it is no longer needed. %NULL is returned if no
614 * matching device is found.
617 struct net_device *dev_get_by_name(struct net *net, const char *name)
619 struct net_device *dev;
621 read_lock(&dev_base_lock);
622 dev = __dev_get_by_name(net, name);
623 if (dev)
624 dev_hold(dev);
625 read_unlock(&dev_base_lock);
626 return dev;
630 * __dev_get_by_index - find a device by its ifindex
631 * @net: the applicable net namespace
632 * @ifindex: index of device
634 * Search for an interface by index. Returns %NULL if the device
635 * is not found or a pointer to the device. The device has not
636 * had its reference counter increased so the caller must be careful
637 * about locking. The caller must hold either the RTNL semaphore
638 * or @dev_base_lock.
641 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
643 struct hlist_node *p;
645 hlist_for_each(p, dev_index_hash(net, ifindex)) {
646 struct net_device *dev
647 = hlist_entry(p, struct net_device, index_hlist);
648 if (dev->ifindex == ifindex)
649 return dev;
651 return NULL;
656 * dev_get_by_index - find a device by its ifindex
657 * @net: the applicable net namespace
658 * @ifindex: index of device
660 * Search for an interface by index. Returns NULL if the device
661 * is not found or a pointer to the device. The device returned has
662 * had a reference added and the pointer is safe until the user calls
663 * dev_put to indicate they have finished with it.
666 struct net_device *dev_get_by_index(struct net *net, int ifindex)
668 struct net_device *dev;
670 read_lock(&dev_base_lock);
671 dev = __dev_get_by_index(net, ifindex);
672 if (dev)
673 dev_hold(dev);
674 read_unlock(&dev_base_lock);
675 return dev;
679 * dev_getbyhwaddr - find a device by its hardware address
680 * @net: the applicable net namespace
681 * @type: media type of device
682 * @ha: hardware address
684 * Search for an interface by MAC address. Returns NULL if the device
685 * is not found or a pointer to the device. The caller must hold the
686 * rtnl semaphore. The returned device has not had its ref count increased
687 * and the caller must therefore be careful about locking
689 * BUGS:
690 * If the API was consistent this would be __dev_get_by_hwaddr
693 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
695 struct net_device *dev;
697 ASSERT_RTNL();
699 for_each_netdev(net, dev)
700 if (dev->type == type &&
701 !memcmp(dev->dev_addr, ha, dev->addr_len))
702 return dev;
704 return NULL;
707 EXPORT_SYMBOL(dev_getbyhwaddr);
709 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
711 struct net_device *dev;
713 ASSERT_RTNL();
714 for_each_netdev(net, dev)
715 if (dev->type == type)
716 return dev;
718 return NULL;
721 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
723 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
725 struct net_device *dev;
727 rtnl_lock();
728 dev = __dev_getfirstbyhwtype(net, type);
729 if (dev)
730 dev_hold(dev);
731 rtnl_unlock();
732 return dev;
735 EXPORT_SYMBOL(dev_getfirstbyhwtype);
738 * dev_get_by_flags - find any device with given flags
739 * @net: the applicable net namespace
740 * @if_flags: IFF_* values
741 * @mask: bitmask of bits in if_flags to check
743 * Search for any interface with the given flags. Returns NULL if a device
744 * is not found or a pointer to the device. The device returned has
745 * had a reference added and the pointer is safe until the user calls
746 * dev_put to indicate they have finished with it.
749 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
751 struct net_device *dev, *ret;
753 ret = NULL;
754 read_lock(&dev_base_lock);
755 for_each_netdev(net, dev) {
756 if (((dev->flags ^ if_flags) & mask) == 0) {
757 dev_hold(dev);
758 ret = dev;
759 break;
762 read_unlock(&dev_base_lock);
763 return ret;
767 * dev_valid_name - check if name is okay for network device
768 * @name: name string
770 * Network device names need to be valid file names to
771 * to allow sysfs to work. We also disallow any kind of
772 * whitespace.
774 int dev_valid_name(const char *name)
776 if (*name == '\0')
777 return 0;
778 if (strlen(name) >= IFNAMSIZ)
779 return 0;
780 if (!strcmp(name, ".") || !strcmp(name, ".."))
781 return 0;
783 while (*name) {
784 if (*name == '/' || isspace(*name))
785 return 0;
786 name++;
788 return 1;
792 * __dev_alloc_name - allocate a name for a device
793 * @net: network namespace to allocate the device name in
794 * @name: name format string
795 * @buf: scratch buffer and result name string
797 * Passed a format string - eg "lt%d" it will try and find a suitable
798 * id. It scans list of devices to build up a free map, then chooses
799 * the first empty slot. The caller must hold the dev_base or rtnl lock
800 * while allocating the name and adding the device in order to avoid
801 * duplicates.
802 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
803 * Returns the number of the unit assigned or a negative errno code.
806 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
808 int i = 0;
809 const char *p;
810 const int max_netdevices = 8*PAGE_SIZE;
811 unsigned long *inuse;
812 struct net_device *d;
814 p = strnchr(name, IFNAMSIZ-1, '%');
815 if (p) {
817 * Verify the string as this thing may have come from
818 * the user. There must be either one "%d" and no other "%"
819 * characters.
821 if (p[1] != 'd' || strchr(p + 2, '%'))
822 return -EINVAL;
824 /* Use one page as a bit array of possible slots */
825 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
826 if (!inuse)
827 return -ENOMEM;
829 for_each_netdev(net, d) {
830 if (!sscanf(d->name, name, &i))
831 continue;
832 if (i < 0 || i >= max_netdevices)
833 continue;
835 /* avoid cases where sscanf is not exact inverse of printf */
836 snprintf(buf, IFNAMSIZ, name, i);
837 if (!strncmp(buf, d->name, IFNAMSIZ))
838 set_bit(i, inuse);
841 i = find_first_zero_bit(inuse, max_netdevices);
842 free_page((unsigned long) inuse);
845 snprintf(buf, IFNAMSIZ, name, i);
846 if (!__dev_get_by_name(net, buf))
847 return i;
849 /* It is possible to run out of possible slots
850 * when the name is long and there isn't enough space left
851 * for the digits, or if all bits are used.
853 return -ENFILE;
857 * dev_alloc_name - allocate a name for a device
858 * @dev: device
859 * @name: name format string
861 * Passed a format string - eg "lt%d" it will try and find a suitable
862 * id. It scans list of devices to build up a free map, then chooses
863 * the first empty slot. The caller must hold the dev_base or rtnl lock
864 * while allocating the name and adding the device in order to avoid
865 * duplicates.
866 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
867 * Returns the number of the unit assigned or a negative errno code.
870 int dev_alloc_name(struct net_device *dev, const char *name)
872 char buf[IFNAMSIZ];
873 struct net *net;
874 int ret;
876 BUG_ON(!dev_net(dev));
877 net = dev_net(dev);
878 ret = __dev_alloc_name(net, name, buf);
879 if (ret >= 0)
880 strlcpy(dev->name, buf, IFNAMSIZ);
881 return ret;
886 * dev_change_name - change name of a device
887 * @dev: device
888 * @newname: name (or format string) must be at least IFNAMSIZ
890 * Change name of a device, can pass format strings "eth%d".
891 * for wildcarding.
893 int dev_change_name(struct net_device *dev, const char *newname)
895 char oldname[IFNAMSIZ];
896 int err = 0;
897 int ret;
898 struct net *net;
900 ASSERT_RTNL();
901 BUG_ON(!dev_net(dev));
903 net = dev_net(dev);
904 if (dev->flags & IFF_UP)
905 return -EBUSY;
907 if (!dev_valid_name(newname))
908 return -EINVAL;
910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0;
913 memcpy(oldname, dev->name, IFNAMSIZ);
915 if (strchr(newname, '%')) {
916 err = dev_alloc_name(dev, newname);
917 if (err < 0)
918 return err;
920 else if (__dev_get_by_name(net, newname))
921 return -EEXIST;
922 else
923 strlcpy(dev->name, newname, IFNAMSIZ);
925 rollback:
926 /* For now only devices in the initial network namespace
927 * are in sysfs.
929 if (net == &init_net) {
930 ret = device_rename(&dev->dev, dev->name);
931 if (ret) {
932 memcpy(dev->name, oldname, IFNAMSIZ);
933 return ret;
937 write_lock_bh(&dev_base_lock);
938 hlist_del(&dev->name_hlist);
939 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
940 write_unlock_bh(&dev_base_lock);
942 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
943 ret = notifier_to_errno(ret);
945 if (ret) {
946 if (err) {
947 printk(KERN_ERR
948 "%s: name change rollback failed: %d.\n",
949 dev->name, ret);
950 } else {
951 err = ret;
952 memcpy(dev->name, oldname, IFNAMSIZ);
953 goto rollback;
957 return err;
961 * dev_set_alias - change ifalias of a device
962 * @dev: device
963 * @alias: name up to IFALIASZ
964 * @len: limit of bytes to copy from info
966 * Set ifalias for a device,
968 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
970 ASSERT_RTNL();
972 if (len >= IFALIASZ)
973 return -EINVAL;
975 if (!len) {
976 if (dev->ifalias) {
977 kfree(dev->ifalias);
978 dev->ifalias = NULL;
980 return 0;
983 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
984 if (!dev->ifalias)
985 return -ENOMEM;
987 strlcpy(dev->ifalias, alias, len+1);
988 return len;
993 * netdev_features_change - device changes features
994 * @dev: device to cause notification
996 * Called to indicate a device has changed features.
998 void netdev_features_change(struct net_device *dev)
1000 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1002 EXPORT_SYMBOL(netdev_features_change);
1005 * netdev_state_change - device changes state
1006 * @dev: device to cause notification
1008 * Called to indicate a device has changed state. This function calls
1009 * the notifier chains for netdev_chain and sends a NEWLINK message
1010 * to the routing socket.
1012 void netdev_state_change(struct net_device *dev)
1014 if (dev->flags & IFF_UP) {
1015 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1016 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1020 void netdev_bonding_change(struct net_device *dev)
1022 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1024 EXPORT_SYMBOL(netdev_bonding_change);
1027 * dev_load - load a network module
1028 * @net: the applicable net namespace
1029 * @name: name of interface
1031 * If a network interface is not present and the process has suitable
1032 * privileges this function loads the module. If module loading is not
1033 * available in this kernel then it becomes a nop.
1036 void dev_load(struct net *net, const char *name)
1038 struct net_device *dev;
1040 read_lock(&dev_base_lock);
1041 dev = __dev_get_by_name(net, name);
1042 read_unlock(&dev_base_lock);
1044 if (!dev && capable(CAP_SYS_MODULE))
1045 request_module("%s", name);
1049 * dev_open - prepare an interface for use.
1050 * @dev: device to open
1052 * Takes a device from down to up state. The device's private open
1053 * function is invoked and then the multicast lists are loaded. Finally
1054 * the device is moved into the up state and a %NETDEV_UP message is
1055 * sent to the netdev notifier chain.
1057 * Calling this function on an active interface is a nop. On a failure
1058 * a negative errno code is returned.
1060 int dev_open(struct net_device *dev)
1062 const struct net_device_ops *ops = dev->netdev_ops;
1063 int ret = 0;
1065 ASSERT_RTNL();
1068 * Is it already up?
1071 if (dev->flags & IFF_UP)
1072 return 0;
1075 * Is it even present?
1077 if (!netif_device_present(dev))
1078 return -ENODEV;
1081 * Call device private open method
1083 set_bit(__LINK_STATE_START, &dev->state);
1085 if (ops->ndo_validate_addr)
1086 ret = ops->ndo_validate_addr(dev);
1088 if (!ret && ops->ndo_open)
1089 ret = ops->ndo_open(dev);
1092 * If it went open OK then:
1095 if (ret)
1096 clear_bit(__LINK_STATE_START, &dev->state);
1097 else {
1099 * Set the flags.
1101 dev->flags |= IFF_UP;
1104 * Initialize multicasting status
1106 dev_set_rx_mode(dev);
1109 * Wakeup transmit queue engine
1111 dev_activate(dev);
1114 * ... and announce new interface.
1116 call_netdevice_notifiers(NETDEV_UP, dev);
1119 return ret;
1123 * dev_close - shutdown an interface.
1124 * @dev: device to shutdown
1126 * This function moves an active device into down state. A
1127 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129 * chain.
1131 int dev_close(struct net_device *dev)
1133 const struct net_device_ops *ops = dev->netdev_ops;
1134 ASSERT_RTNL();
1136 might_sleep();
1138 if (!(dev->flags & IFF_UP))
1139 return 0;
1142 * Tell people we are going down, so that they can
1143 * prepare to death, when device is still operating.
1145 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1147 clear_bit(__LINK_STATE_START, &dev->state);
1149 /* Synchronize to scheduled poll. We cannot touch poll list,
1150 * it can be even on different cpu. So just clear netif_running().
1152 * dev->stop() will invoke napi_disable() on all of it's
1153 * napi_struct instances on this device.
1155 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1157 dev_deactivate(dev);
1160 * Call the device specific close. This cannot fail.
1161 * Only if device is UP
1163 * We allow it to be called even after a DETACH hot-plug
1164 * event.
1166 if (ops->ndo_stop)
1167 ops->ndo_stop(dev);
1170 * Device is now down.
1173 dev->flags &= ~IFF_UP;
1176 * Tell people we are down
1178 call_netdevice_notifiers(NETDEV_DOWN, dev);
1180 return 0;
1185 * dev_disable_lro - disable Large Receive Offload on a device
1186 * @dev: device
1188 * Disable Large Receive Offload (LRO) on a net device. Must be
1189 * called under RTNL. This is needed if received packets may be
1190 * forwarded to another interface.
1192 void dev_disable_lro(struct net_device *dev)
1194 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1195 dev->ethtool_ops->set_flags) {
1196 u32 flags = dev->ethtool_ops->get_flags(dev);
1197 if (flags & ETH_FLAG_LRO) {
1198 flags &= ~ETH_FLAG_LRO;
1199 dev->ethtool_ops->set_flags(dev, flags);
1202 WARN_ON(dev->features & NETIF_F_LRO);
1204 EXPORT_SYMBOL(dev_disable_lro);
1207 static int dev_boot_phase = 1;
1210 * Device change register/unregister. These are not inline or static
1211 * as we export them to the world.
1215 * register_netdevice_notifier - register a network notifier block
1216 * @nb: notifier
1218 * Register a notifier to be called when network device events occur.
1219 * The notifier passed is linked into the kernel structures and must
1220 * not be reused until it has been unregistered. A negative errno code
1221 * is returned on a failure.
1223 * When registered all registration and up events are replayed
1224 * to the new notifier to allow device to have a race free
1225 * view of the network device list.
1228 int register_netdevice_notifier(struct notifier_block *nb)
1230 struct net_device *dev;
1231 struct net_device *last;
1232 struct net *net;
1233 int err;
1235 rtnl_lock();
1236 err = raw_notifier_chain_register(&netdev_chain, nb);
1237 if (err)
1238 goto unlock;
1239 if (dev_boot_phase)
1240 goto unlock;
1241 for_each_net(net) {
1242 for_each_netdev(net, dev) {
1243 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1244 err = notifier_to_errno(err);
1245 if (err)
1246 goto rollback;
1248 if (!(dev->flags & IFF_UP))
1249 continue;
1251 nb->notifier_call(nb, NETDEV_UP, dev);
1255 unlock:
1256 rtnl_unlock();
1257 return err;
1259 rollback:
1260 last = dev;
1261 for_each_net(net) {
1262 for_each_netdev(net, dev) {
1263 if (dev == last)
1264 break;
1266 if (dev->flags & IFF_UP) {
1267 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1268 nb->notifier_call(nb, NETDEV_DOWN, dev);
1270 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1274 raw_notifier_chain_unregister(&netdev_chain, nb);
1275 goto unlock;
1279 * unregister_netdevice_notifier - unregister a network notifier block
1280 * @nb: notifier
1282 * Unregister a notifier previously registered by
1283 * register_netdevice_notifier(). The notifier is unlinked into the
1284 * kernel structures and may then be reused. A negative errno code
1285 * is returned on a failure.
1288 int unregister_netdevice_notifier(struct notifier_block *nb)
1290 int err;
1292 rtnl_lock();
1293 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1294 rtnl_unlock();
1295 return err;
1299 * call_netdevice_notifiers - call all network notifier blocks
1300 * @val: value passed unmodified to notifier function
1301 * @dev: net_device pointer passed unmodified to notifier function
1303 * Call all network notifier blocks. Parameters and return value
1304 * are as for raw_notifier_call_chain().
1307 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1309 return raw_notifier_call_chain(&netdev_chain, val, dev);
1312 /* When > 0 there are consumers of rx skb time stamps */
1313 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1315 void net_enable_timestamp(void)
1317 atomic_inc(&netstamp_needed);
1320 void net_disable_timestamp(void)
1322 atomic_dec(&netstamp_needed);
1325 static inline void net_timestamp(struct sk_buff *skb)
1327 if (atomic_read(&netstamp_needed))
1328 __net_timestamp(skb);
1329 else
1330 skb->tstamp.tv64 = 0;
1334 * Support routine. Sends outgoing frames to any network
1335 * taps currently in use.
1338 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1340 struct packet_type *ptype;
1342 net_timestamp(skb);
1344 rcu_read_lock();
1345 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1346 /* Never send packets back to the socket
1347 * they originated from - MvS (miquels@drinkel.ow.org)
1349 if ((ptype->dev == dev || !ptype->dev) &&
1350 (ptype->af_packet_priv == NULL ||
1351 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1352 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1353 if (!skb2)
1354 break;
1356 /* skb->nh should be correctly
1357 set by sender, so that the second statement is
1358 just protection against buggy protocols.
1360 skb_reset_mac_header(skb2);
1362 if (skb_network_header(skb2) < skb2->data ||
1363 skb2->network_header > skb2->tail) {
1364 if (net_ratelimit())
1365 printk(KERN_CRIT "protocol %04x is "
1366 "buggy, dev %s\n",
1367 skb2->protocol, dev->name);
1368 skb_reset_network_header(skb2);
1371 skb2->transport_header = skb2->network_header;
1372 skb2->pkt_type = PACKET_OUTGOING;
1373 ptype->func(skb2, skb->dev, ptype, skb->dev);
1376 rcu_read_unlock();
1380 static inline void __netif_reschedule(struct Qdisc *q)
1382 struct softnet_data *sd;
1383 unsigned long flags;
1385 local_irq_save(flags);
1386 sd = &__get_cpu_var(softnet_data);
1387 q->next_sched = sd->output_queue;
1388 sd->output_queue = q;
1389 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1390 local_irq_restore(flags);
1393 void __netif_schedule(struct Qdisc *q)
1395 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1396 __netif_reschedule(q);
1398 EXPORT_SYMBOL(__netif_schedule);
1400 void dev_kfree_skb_irq(struct sk_buff *skb)
1402 if (atomic_dec_and_test(&skb->users)) {
1403 struct softnet_data *sd;
1404 unsigned long flags;
1406 local_irq_save(flags);
1407 sd = &__get_cpu_var(softnet_data);
1408 skb->next = sd->completion_queue;
1409 sd->completion_queue = skb;
1410 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1411 local_irq_restore(flags);
1414 EXPORT_SYMBOL(dev_kfree_skb_irq);
1416 void dev_kfree_skb_any(struct sk_buff *skb)
1418 if (in_irq() || irqs_disabled())
1419 dev_kfree_skb_irq(skb);
1420 else
1421 dev_kfree_skb(skb);
1423 EXPORT_SYMBOL(dev_kfree_skb_any);
1427 * netif_device_detach - mark device as removed
1428 * @dev: network device
1430 * Mark device as removed from system and therefore no longer available.
1432 void netif_device_detach(struct net_device *dev)
1434 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1435 netif_running(dev)) {
1436 netif_stop_queue(dev);
1439 EXPORT_SYMBOL(netif_device_detach);
1442 * netif_device_attach - mark device as attached
1443 * @dev: network device
1445 * Mark device as attached from system and restart if needed.
1447 void netif_device_attach(struct net_device *dev)
1449 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1450 netif_running(dev)) {
1451 netif_wake_queue(dev);
1452 __netdev_watchdog_up(dev);
1455 EXPORT_SYMBOL(netif_device_attach);
1457 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1459 return ((features & NETIF_F_GEN_CSUM) ||
1460 ((features & NETIF_F_IP_CSUM) &&
1461 protocol == htons(ETH_P_IP)) ||
1462 ((features & NETIF_F_IPV6_CSUM) &&
1463 protocol == htons(ETH_P_IPV6)));
1466 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1468 if (can_checksum_protocol(dev->features, skb->protocol))
1469 return true;
1471 if (skb->protocol == htons(ETH_P_8021Q)) {
1472 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1473 if (can_checksum_protocol(dev->features & dev->vlan_features,
1474 veh->h_vlan_encapsulated_proto))
1475 return true;
1478 return false;
1482 * Invalidate hardware checksum when packet is to be mangled, and
1483 * complete checksum manually on outgoing path.
1485 int skb_checksum_help(struct sk_buff *skb)
1487 __wsum csum;
1488 int ret = 0, offset;
1490 if (skb->ip_summed == CHECKSUM_COMPLETE)
1491 goto out_set_summed;
1493 if (unlikely(skb_shinfo(skb)->gso_size)) {
1494 /* Let GSO fix up the checksum. */
1495 goto out_set_summed;
1498 offset = skb->csum_start - skb_headroom(skb);
1499 BUG_ON(offset >= skb_headlen(skb));
1500 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1502 offset += skb->csum_offset;
1503 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1505 if (skb_cloned(skb) &&
1506 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1507 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1508 if (ret)
1509 goto out;
1512 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1513 out_set_summed:
1514 skb->ip_summed = CHECKSUM_NONE;
1515 out:
1516 return ret;
1520 * skb_gso_segment - Perform segmentation on skb.
1521 * @skb: buffer to segment
1522 * @features: features for the output path (see dev->features)
1524 * This function segments the given skb and returns a list of segments.
1526 * It may return NULL if the skb requires no segmentation. This is
1527 * only possible when GSO is used for verifying header integrity.
1529 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1531 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1532 struct packet_type *ptype;
1533 __be16 type = skb->protocol;
1534 int err;
1536 BUG_ON(skb_shinfo(skb)->frag_list);
1538 skb_reset_mac_header(skb);
1539 skb->mac_len = skb->network_header - skb->mac_header;
1540 __skb_pull(skb, skb->mac_len);
1542 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1543 if (skb_header_cloned(skb) &&
1544 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1545 return ERR_PTR(err);
1548 rcu_read_lock();
1549 list_for_each_entry_rcu(ptype,
1550 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1551 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1552 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1553 err = ptype->gso_send_check(skb);
1554 segs = ERR_PTR(err);
1555 if (err || skb_gso_ok(skb, features))
1556 break;
1557 __skb_push(skb, (skb->data -
1558 skb_network_header(skb)));
1560 segs = ptype->gso_segment(skb, features);
1561 break;
1564 rcu_read_unlock();
1566 __skb_push(skb, skb->data - skb_mac_header(skb));
1568 return segs;
1571 EXPORT_SYMBOL(skb_gso_segment);
1573 /* Take action when hardware reception checksum errors are detected. */
1574 #ifdef CONFIG_BUG
1575 void netdev_rx_csum_fault(struct net_device *dev)
1577 if (net_ratelimit()) {
1578 printk(KERN_ERR "%s: hw csum failure.\n",
1579 dev ? dev->name : "<unknown>");
1580 dump_stack();
1583 EXPORT_SYMBOL(netdev_rx_csum_fault);
1584 #endif
1586 /* Actually, we should eliminate this check as soon as we know, that:
1587 * 1. IOMMU is present and allows to map all the memory.
1588 * 2. No high memory really exists on this machine.
1591 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1593 #ifdef CONFIG_HIGHMEM
1594 int i;
1596 if (dev->features & NETIF_F_HIGHDMA)
1597 return 0;
1599 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1600 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1601 return 1;
1603 #endif
1604 return 0;
1607 struct dev_gso_cb {
1608 void (*destructor)(struct sk_buff *skb);
1611 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1613 static void dev_gso_skb_destructor(struct sk_buff *skb)
1615 struct dev_gso_cb *cb;
1617 do {
1618 struct sk_buff *nskb = skb->next;
1620 skb->next = nskb->next;
1621 nskb->next = NULL;
1622 kfree_skb(nskb);
1623 } while (skb->next);
1625 cb = DEV_GSO_CB(skb);
1626 if (cb->destructor)
1627 cb->destructor(skb);
1631 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1632 * @skb: buffer to segment
1634 * This function segments the given skb and stores the list of segments
1635 * in skb->next.
1637 static int dev_gso_segment(struct sk_buff *skb)
1639 struct net_device *dev = skb->dev;
1640 struct sk_buff *segs;
1641 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1642 NETIF_F_SG : 0);
1644 segs = skb_gso_segment(skb, features);
1646 /* Verifying header integrity only. */
1647 if (!segs)
1648 return 0;
1650 if (IS_ERR(segs))
1651 return PTR_ERR(segs);
1653 skb->next = segs;
1654 DEV_GSO_CB(skb)->destructor = skb->destructor;
1655 skb->destructor = dev_gso_skb_destructor;
1657 return 0;
1660 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1661 struct netdev_queue *txq)
1663 const struct net_device_ops *ops = dev->netdev_ops;
1665 prefetch(&dev->netdev_ops->ndo_start_xmit);
1666 if (likely(!skb->next)) {
1667 if (!list_empty(&ptype_all))
1668 dev_queue_xmit_nit(skb, dev);
1670 if (netif_needs_gso(dev, skb)) {
1671 if (unlikely(dev_gso_segment(skb)))
1672 goto out_kfree_skb;
1673 if (skb->next)
1674 goto gso;
1677 return ops->ndo_start_xmit(skb, dev);
1680 gso:
1681 do {
1682 struct sk_buff *nskb = skb->next;
1683 int rc;
1685 skb->next = nskb->next;
1686 nskb->next = NULL;
1687 rc = ops->ndo_start_xmit(nskb, dev);
1688 if (unlikely(rc)) {
1689 nskb->next = skb->next;
1690 skb->next = nskb;
1691 return rc;
1693 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1694 return NETDEV_TX_BUSY;
1695 } while (skb->next);
1697 skb->destructor = DEV_GSO_CB(skb)->destructor;
1699 out_kfree_skb:
1700 kfree_skb(skb);
1701 return 0;
1704 static u32 simple_tx_hashrnd;
1705 static int simple_tx_hashrnd_initialized = 0;
1707 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1709 u32 addr1, addr2, ports;
1710 u32 hash, ihl;
1711 u8 ip_proto = 0;
1713 if (unlikely(!simple_tx_hashrnd_initialized)) {
1714 get_random_bytes(&simple_tx_hashrnd, 4);
1715 simple_tx_hashrnd_initialized = 1;
1718 switch (skb->protocol) {
1719 case htons(ETH_P_IP):
1720 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1721 ip_proto = ip_hdr(skb)->protocol;
1722 addr1 = ip_hdr(skb)->saddr;
1723 addr2 = ip_hdr(skb)->daddr;
1724 ihl = ip_hdr(skb)->ihl;
1725 break;
1726 case htons(ETH_P_IPV6):
1727 ip_proto = ipv6_hdr(skb)->nexthdr;
1728 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1729 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1730 ihl = (40 >> 2);
1731 break;
1732 default:
1733 return 0;
1737 switch (ip_proto) {
1738 case IPPROTO_TCP:
1739 case IPPROTO_UDP:
1740 case IPPROTO_DCCP:
1741 case IPPROTO_ESP:
1742 case IPPROTO_AH:
1743 case IPPROTO_SCTP:
1744 case IPPROTO_UDPLITE:
1745 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1746 break;
1748 default:
1749 ports = 0;
1750 break;
1753 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1755 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1758 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1759 struct sk_buff *skb)
1761 const struct net_device_ops *ops = dev->netdev_ops;
1762 u16 queue_index = 0;
1764 if (ops->ndo_select_queue)
1765 queue_index = ops->ndo_select_queue(dev, skb);
1766 else if (dev->real_num_tx_queues > 1)
1767 queue_index = simple_tx_hash(dev, skb);
1769 skb_set_queue_mapping(skb, queue_index);
1770 return netdev_get_tx_queue(dev, queue_index);
1774 * dev_queue_xmit - transmit a buffer
1775 * @skb: buffer to transmit
1777 * Queue a buffer for transmission to a network device. The caller must
1778 * have set the device and priority and built the buffer before calling
1779 * this function. The function can be called from an interrupt.
1781 * A negative errno code is returned on a failure. A success does not
1782 * guarantee the frame will be transmitted as it may be dropped due
1783 * to congestion or traffic shaping.
1785 * -----------------------------------------------------------------------------------
1786 * I notice this method can also return errors from the queue disciplines,
1787 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1788 * be positive.
1790 * Regardless of the return value, the skb is consumed, so it is currently
1791 * difficult to retry a send to this method. (You can bump the ref count
1792 * before sending to hold a reference for retry if you are careful.)
1794 * When calling this method, interrupts MUST be enabled. This is because
1795 * the BH enable code must have IRQs enabled so that it will not deadlock.
1796 * --BLG
1798 int dev_queue_xmit(struct sk_buff *skb)
1800 struct net_device *dev = skb->dev;
1801 struct netdev_queue *txq;
1802 struct Qdisc *q;
1803 int rc = -ENOMEM;
1805 /* GSO will handle the following emulations directly. */
1806 if (netif_needs_gso(dev, skb))
1807 goto gso;
1809 if (skb_shinfo(skb)->frag_list &&
1810 !(dev->features & NETIF_F_FRAGLIST) &&
1811 __skb_linearize(skb))
1812 goto out_kfree_skb;
1814 /* Fragmented skb is linearized if device does not support SG,
1815 * or if at least one of fragments is in highmem and device
1816 * does not support DMA from it.
1818 if (skb_shinfo(skb)->nr_frags &&
1819 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1820 __skb_linearize(skb))
1821 goto out_kfree_skb;
1823 /* If packet is not checksummed and device does not support
1824 * checksumming for this protocol, complete checksumming here.
1826 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1827 skb_set_transport_header(skb, skb->csum_start -
1828 skb_headroom(skb));
1829 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1830 goto out_kfree_skb;
1833 gso:
1834 /* Disable soft irqs for various locks below. Also
1835 * stops preemption for RCU.
1837 rcu_read_lock_bh();
1839 txq = dev_pick_tx(dev, skb);
1840 q = rcu_dereference(txq->qdisc);
1842 #ifdef CONFIG_NET_CLS_ACT
1843 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1844 #endif
1845 if (q->enqueue) {
1846 spinlock_t *root_lock = qdisc_lock(q);
1848 spin_lock(root_lock);
1850 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1851 kfree_skb(skb);
1852 rc = NET_XMIT_DROP;
1853 } else {
1854 rc = qdisc_enqueue_root(skb, q);
1855 qdisc_run(q);
1857 spin_unlock(root_lock);
1859 goto out;
1862 /* The device has no queue. Common case for software devices:
1863 loopback, all the sorts of tunnels...
1865 Really, it is unlikely that netif_tx_lock protection is necessary
1866 here. (f.e. loopback and IP tunnels are clean ignoring statistics
1867 counters.)
1868 However, it is possible, that they rely on protection
1869 made by us here.
1871 Check this and shot the lock. It is not prone from deadlocks.
1872 Either shot noqueue qdisc, it is even simpler 8)
1874 if (dev->flags & IFF_UP) {
1875 int cpu = smp_processor_id(); /* ok because BHs are off */
1877 if (txq->xmit_lock_owner != cpu) {
1879 HARD_TX_LOCK(dev, txq, cpu);
1881 if (!netif_tx_queue_stopped(txq)) {
1882 rc = 0;
1883 if (!dev_hard_start_xmit(skb, dev, txq)) {
1884 HARD_TX_UNLOCK(dev, txq);
1885 goto out;
1888 HARD_TX_UNLOCK(dev, txq);
1889 if (net_ratelimit())
1890 printk(KERN_CRIT "Virtual device %s asks to "
1891 "queue packet!\n", dev->name);
1892 } else {
1893 /* Recursion is detected! It is possible,
1894 * unfortunately */
1895 if (net_ratelimit())
1896 printk(KERN_CRIT "Dead loop on virtual device "
1897 "%s, fix it urgently!\n", dev->name);
1901 rc = -ENETDOWN;
1902 rcu_read_unlock_bh();
1904 out_kfree_skb:
1905 kfree_skb(skb);
1906 return rc;
1907 out:
1908 rcu_read_unlock_bh();
1909 return rc;
1913 /*=======================================================================
1914 Receiver routines
1915 =======================================================================*/
1917 int netdev_max_backlog __read_mostly = 1000;
1918 int netdev_budget __read_mostly = 300;
1919 int weight_p __read_mostly = 64; /* old backlog weight */
1921 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1925 * netif_rx - post buffer to the network code
1926 * @skb: buffer to post
1928 * This function receives a packet from a device driver and queues it for
1929 * the upper (protocol) levels to process. It always succeeds. The buffer
1930 * may be dropped during processing for congestion control or by the
1931 * protocol layers.
1933 * return values:
1934 * NET_RX_SUCCESS (no congestion)
1935 * NET_RX_DROP (packet was dropped)
1939 int netif_rx(struct sk_buff *skb)
1941 struct softnet_data *queue;
1942 unsigned long flags;
1944 /* if netpoll wants it, pretend we never saw it */
1945 if (netpoll_rx(skb))
1946 return NET_RX_DROP;
1948 if (!skb->tstamp.tv64)
1949 net_timestamp(skb);
1952 * The code is rearranged so that the path is the most
1953 * short when CPU is congested, but is still operating.
1955 local_irq_save(flags);
1956 queue = &__get_cpu_var(softnet_data);
1958 __get_cpu_var(netdev_rx_stat).total++;
1959 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1960 if (queue->input_pkt_queue.qlen) {
1961 enqueue:
1962 __skb_queue_tail(&queue->input_pkt_queue, skb);
1963 local_irq_restore(flags);
1964 return NET_RX_SUCCESS;
1967 napi_schedule(&queue->backlog);
1968 goto enqueue;
1971 __get_cpu_var(netdev_rx_stat).dropped++;
1972 local_irq_restore(flags);
1974 kfree_skb(skb);
1975 return NET_RX_DROP;
1978 int netif_rx_ni(struct sk_buff *skb)
1980 int err;
1982 preempt_disable();
1983 err = netif_rx(skb);
1984 if (local_softirq_pending())
1985 do_softirq();
1986 preempt_enable();
1988 return err;
1991 EXPORT_SYMBOL(netif_rx_ni);
1993 static void net_tx_action(struct softirq_action *h)
1995 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1997 if (sd->completion_queue) {
1998 struct sk_buff *clist;
2000 local_irq_disable();
2001 clist = sd->completion_queue;
2002 sd->completion_queue = NULL;
2003 local_irq_enable();
2005 while (clist) {
2006 struct sk_buff *skb = clist;
2007 clist = clist->next;
2009 WARN_ON(atomic_read(&skb->users));
2010 __kfree_skb(skb);
2014 if (sd->output_queue) {
2015 struct Qdisc *head;
2017 local_irq_disable();
2018 head = sd->output_queue;
2019 sd->output_queue = NULL;
2020 local_irq_enable();
2022 while (head) {
2023 struct Qdisc *q = head;
2024 spinlock_t *root_lock;
2026 head = head->next_sched;
2028 root_lock = qdisc_lock(q);
2029 if (spin_trylock(root_lock)) {
2030 smp_mb__before_clear_bit();
2031 clear_bit(__QDISC_STATE_SCHED,
2032 &q->state);
2033 qdisc_run(q);
2034 spin_unlock(root_lock);
2035 } else {
2036 if (!test_bit(__QDISC_STATE_DEACTIVATED,
2037 &q->state)) {
2038 __netif_reschedule(q);
2039 } else {
2040 smp_mb__before_clear_bit();
2041 clear_bit(__QDISC_STATE_SCHED,
2042 &q->state);
2049 static inline int deliver_skb(struct sk_buff *skb,
2050 struct packet_type *pt_prev,
2051 struct net_device *orig_dev)
2053 atomic_inc(&skb->users);
2054 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2057 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2058 /* These hooks defined here for ATM */
2059 struct net_bridge;
2060 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2061 unsigned char *addr);
2062 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2065 * If bridge module is loaded call bridging hook.
2066 * returns NULL if packet was consumed.
2068 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2069 struct sk_buff *skb) __read_mostly;
2070 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2071 struct packet_type **pt_prev, int *ret,
2072 struct net_device *orig_dev)
2074 struct net_bridge_port *port;
2076 if (skb->pkt_type == PACKET_LOOPBACK ||
2077 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2078 return skb;
2080 if (*pt_prev) {
2081 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2082 *pt_prev = NULL;
2085 return br_handle_frame_hook(port, skb);
2087 #else
2088 #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
2089 #endif
2091 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2092 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2093 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2095 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2096 struct packet_type **pt_prev,
2097 int *ret,
2098 struct net_device *orig_dev)
2100 if (skb->dev->macvlan_port == NULL)
2101 return skb;
2103 if (*pt_prev) {
2104 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2105 *pt_prev = NULL;
2107 return macvlan_handle_frame_hook(skb);
2109 #else
2110 #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2111 #endif
2113 #ifdef CONFIG_NET_CLS_ACT
2114 /* TODO: Maybe we should just force sch_ingress to be compiled in
2115 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2116 * a compare and 2 stores extra right now if we dont have it on
2117 * but have CONFIG_NET_CLS_ACT
2118 * NOTE: This doesnt stop any functionality; if you dont have
2119 * the ingress scheduler, you just cant add policies on ingress.
2122 static int ing_filter(struct sk_buff *skb)
2124 struct net_device *dev = skb->dev;
2125 u32 ttl = G_TC_RTTL(skb->tc_verd);
2126 struct netdev_queue *rxq;
2127 int result = TC_ACT_OK;
2128 struct Qdisc *q;
2130 if (MAX_RED_LOOP < ttl++) {
2131 printk(KERN_WARNING
2132 "Redir loop detected Dropping packet (%d->%d)\n",
2133 skb->iif, dev->ifindex);
2134 return TC_ACT_SHOT;
2137 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2138 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2140 rxq = &dev->rx_queue;
2142 q = rxq->qdisc;
2143 if (q != &noop_qdisc) {
2144 spin_lock(qdisc_lock(q));
2145 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2146 result = qdisc_enqueue_root(skb, q);
2147 spin_unlock(qdisc_lock(q));
2150 return result;
2153 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2154 struct packet_type **pt_prev,
2155 int *ret, struct net_device *orig_dev)
2157 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2158 goto out;
2160 if (*pt_prev) {
2161 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2162 *pt_prev = NULL;
2163 } else {
2164 /* Huh? Why does turning on AF_PACKET affect this? */
2165 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2168 switch (ing_filter(skb)) {
2169 case TC_ACT_SHOT:
2170 case TC_ACT_STOLEN:
2171 kfree_skb(skb);
2172 return NULL;
2175 out:
2176 skb->tc_verd = 0;
2177 return skb;
2179 #endif
2182 * netif_nit_deliver - deliver received packets to network taps
2183 * @skb: buffer
2185 * This function is used to deliver incoming packets to network
2186 * taps. It should be used when the normal netif_receive_skb path
2187 * is bypassed, for example because of VLAN acceleration.
2189 void netif_nit_deliver(struct sk_buff *skb)
2191 struct packet_type *ptype;
2193 if (list_empty(&ptype_all))
2194 return;
2196 skb_reset_network_header(skb);
2197 skb_reset_transport_header(skb);
2198 skb->mac_len = skb->network_header - skb->mac_header;
2200 rcu_read_lock();
2201 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2202 if (!ptype->dev || ptype->dev == skb->dev)
2203 deliver_skb(skb, ptype, skb->dev);
2205 rcu_read_unlock();
2209 * netif_receive_skb - process receive buffer from network
2210 * @skb: buffer to process
2212 * netif_receive_skb() is the main receive data processing function.
2213 * It always succeeds. The buffer may be dropped during processing
2214 * for congestion control or by the protocol layers.
2216 * This function may only be called from softirq context and interrupts
2217 * should be enabled.
2219 * Return values (usually ignored):
2220 * NET_RX_SUCCESS: no congestion
2221 * NET_RX_DROP: packet was dropped
2223 int netif_receive_skb(struct sk_buff *skb)
2225 struct packet_type *ptype, *pt_prev;
2226 struct net_device *orig_dev;
2227 struct net_device *null_or_orig;
2228 int ret = NET_RX_DROP;
2229 __be16 type;
2231 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2232 return NET_RX_SUCCESS;
2234 /* if we've gotten here through NAPI, check netpoll */
2235 if (netpoll_receive_skb(skb))
2236 return NET_RX_DROP;
2238 if (!skb->tstamp.tv64)
2239 net_timestamp(skb);
2241 if (!skb->iif)
2242 skb->iif = skb->dev->ifindex;
2244 null_or_orig = NULL;
2245 orig_dev = skb->dev;
2246 if (orig_dev->master) {
2247 if (skb_bond_should_drop(skb))
2248 null_or_orig = orig_dev; /* deliver only exact match */
2249 else
2250 skb->dev = orig_dev->master;
2253 __get_cpu_var(netdev_rx_stat).total++;
2255 skb_reset_network_header(skb);
2256 skb_reset_transport_header(skb);
2257 skb->mac_len = skb->network_header - skb->mac_header;
2259 pt_prev = NULL;
2261 rcu_read_lock();
2263 /* Don't receive packets in an exiting network namespace */
2264 if (!net_alive(dev_net(skb->dev))) {
2265 kfree_skb(skb);
2266 goto out;
2269 #ifdef CONFIG_NET_CLS_ACT
2270 if (skb->tc_verd & TC_NCLS) {
2271 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2272 goto ncls;
2274 #endif
2276 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2277 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2278 ptype->dev == orig_dev) {
2279 if (pt_prev)
2280 ret = deliver_skb(skb, pt_prev, orig_dev);
2281 pt_prev = ptype;
2285 #ifdef CONFIG_NET_CLS_ACT
2286 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2287 if (!skb)
2288 goto out;
2289 ncls:
2290 #endif
2292 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2293 if (!skb)
2294 goto out;
2295 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2296 if (!skb)
2297 goto out;
2299 type = skb->protocol;
2300 list_for_each_entry_rcu(ptype,
2301 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2302 if (ptype->type == type &&
2303 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2304 ptype->dev == orig_dev)) {
2305 if (pt_prev)
2306 ret = deliver_skb(skb, pt_prev, orig_dev);
2307 pt_prev = ptype;
2311 if (pt_prev) {
2312 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2313 } else {
2314 kfree_skb(skb);
2315 /* Jamal, now you will not able to escape explaining
2316 * me how you were going to use this. :-)
2318 ret = NET_RX_DROP;
2321 out:
2322 rcu_read_unlock();
2323 return ret;
2326 /* Network device is going away, flush any packets still pending */
2327 static void flush_backlog(void *arg)
2329 struct net_device *dev = arg;
2330 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2331 struct sk_buff *skb, *tmp;
2333 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2334 if (skb->dev == dev) {
2335 __skb_unlink(skb, &queue->input_pkt_queue);
2336 kfree_skb(skb);
2340 static int process_backlog(struct napi_struct *napi, int quota)
2342 int work = 0;
2343 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2344 unsigned long start_time = jiffies;
2346 napi->weight = weight_p;
2347 do {
2348 struct sk_buff *skb;
2350 local_irq_disable();
2351 skb = __skb_dequeue(&queue->input_pkt_queue);
2352 if (!skb) {
2353 __napi_complete(napi);
2354 local_irq_enable();
2355 break;
2357 local_irq_enable();
2359 netif_receive_skb(skb);
2360 } while (++work < quota && jiffies == start_time);
2362 return work;
2366 * __napi_schedule - schedule for receive
2367 * @n: entry to schedule
2369 * The entry's receive function will be scheduled to run
2371 void __napi_schedule(struct napi_struct *n)
2373 unsigned long flags;
2375 local_irq_save(flags);
2376 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2377 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2378 local_irq_restore(flags);
2380 EXPORT_SYMBOL(__napi_schedule);
2383 static void net_rx_action(struct softirq_action *h)
2385 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2386 unsigned long time_limit = jiffies + 2;
2387 int budget = netdev_budget;
2388 void *have;
2390 local_irq_disable();
2392 while (!list_empty(list)) {
2393 struct napi_struct *n;
2394 int work, weight;
2396 /* If softirq window is exhuasted then punt.
2397 * Allow this to run for 2 jiffies since which will allow
2398 * an average latency of 1.5/HZ.
2400 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2401 goto softnet_break;
2403 local_irq_enable();
2405 /* Even though interrupts have been re-enabled, this
2406 * access is safe because interrupts can only add new
2407 * entries to the tail of this list, and only ->poll()
2408 * calls can remove this head entry from the list.
2410 n = list_entry(list->next, struct napi_struct, poll_list);
2412 have = netpoll_poll_lock(n);
2414 weight = n->weight;
2416 /* This NAPI_STATE_SCHED test is for avoiding a race
2417 * with netpoll's poll_napi(). Only the entity which
2418 * obtains the lock and sees NAPI_STATE_SCHED set will
2419 * actually make the ->poll() call. Therefore we avoid
2420 * accidently calling ->poll() when NAPI is not scheduled.
2422 work = 0;
2423 if (test_bit(NAPI_STATE_SCHED, &n->state))
2424 work = n->poll(n, weight);
2426 WARN_ON_ONCE(work > weight);
2428 budget -= work;
2430 local_irq_disable();
2432 /* Drivers must not modify the NAPI state if they
2433 * consume the entire weight. In such cases this code
2434 * still "owns" the NAPI instance and therefore can
2435 * move the instance around on the list at-will.
2437 if (unlikely(work == weight)) {
2438 if (unlikely(napi_disable_pending(n)))
2439 __napi_complete(n);
2440 else
2441 list_move_tail(&n->poll_list, list);
2444 netpoll_poll_unlock(have);
2446 out:
2447 local_irq_enable();
2449 #ifdef CONFIG_NET_DMA
2451 * There may not be any more sk_buffs coming right now, so push
2452 * any pending DMA copies to hardware
2454 if (!cpus_empty(net_dma.channel_mask)) {
2455 int chan_idx;
2456 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2457 struct dma_chan *chan = net_dma.channels[chan_idx];
2458 if (chan)
2459 dma_async_memcpy_issue_pending(chan);
2462 #endif
2464 return;
2466 softnet_break:
2467 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2468 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2469 goto out;
2472 static gifconf_func_t * gifconf_list [NPROTO];
2475 * register_gifconf - register a SIOCGIF handler
2476 * @family: Address family
2477 * @gifconf: Function handler
2479 * Register protocol dependent address dumping routines. The handler
2480 * that is passed must not be freed or reused until it has been replaced
2481 * by another handler.
2483 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2485 if (family >= NPROTO)
2486 return -EINVAL;
2487 gifconf_list[family] = gifconf;
2488 return 0;
2493 * Map an interface index to its name (SIOCGIFNAME)
2497 * We need this ioctl for efficient implementation of the
2498 * if_indextoname() function required by the IPv6 API. Without
2499 * it, we would have to search all the interfaces to find a
2500 * match. --pb
2503 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2505 struct net_device *dev;
2506 struct ifreq ifr;
2509 * Fetch the caller's info block.
2512 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2513 return -EFAULT;
2515 read_lock(&dev_base_lock);
2516 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2517 if (!dev) {
2518 read_unlock(&dev_base_lock);
2519 return -ENODEV;
2522 strcpy(ifr.ifr_name, dev->name);
2523 read_unlock(&dev_base_lock);
2525 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2526 return -EFAULT;
2527 return 0;
2531 * Perform a SIOCGIFCONF call. This structure will change
2532 * size eventually, and there is nothing I can do about it.
2533 * Thus we will need a 'compatibility mode'.
2536 static int dev_ifconf(struct net *net, char __user *arg)
2538 struct ifconf ifc;
2539 struct net_device *dev;
2540 char __user *pos;
2541 int len;
2542 int total;
2543 int i;
2546 * Fetch the caller's info block.
2549 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2550 return -EFAULT;
2552 pos = ifc.ifc_buf;
2553 len = ifc.ifc_len;
2556 * Loop over the interfaces, and write an info block for each.
2559 total = 0;
2560 for_each_netdev(net, dev) {
2561 for (i = 0; i < NPROTO; i++) {
2562 if (gifconf_list[i]) {
2563 int done;
2564 if (!pos)
2565 done = gifconf_list[i](dev, NULL, 0);
2566 else
2567 done = gifconf_list[i](dev, pos + total,
2568 len - total);
2569 if (done < 0)
2570 return -EFAULT;
2571 total += done;
2577 * All done. Write the updated control block back to the caller.
2579 ifc.ifc_len = total;
2582 * Both BSD and Solaris return 0 here, so we do too.
2584 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2587 #ifdef CONFIG_PROC_FS
2589 * This is invoked by the /proc filesystem handler to display a device
2590 * in detail.
2592 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2593 __acquires(dev_base_lock)
2595 struct net *net = seq_file_net(seq);
2596 loff_t off;
2597 struct net_device *dev;
2599 read_lock(&dev_base_lock);
2600 if (!*pos)
2601 return SEQ_START_TOKEN;
2603 off = 1;
2604 for_each_netdev(net, dev)
2605 if (off++ == *pos)
2606 return dev;
2608 return NULL;
2611 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2613 struct net *net = seq_file_net(seq);
2614 ++*pos;
2615 return v == SEQ_START_TOKEN ?
2616 first_net_device(net) : next_net_device((struct net_device *)v);
2619 void dev_seq_stop(struct seq_file *seq, void *v)
2620 __releases(dev_base_lock)
2622 read_unlock(&dev_base_lock);
2625 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2627 const struct net_device_stats *stats = dev_get_stats(dev);
2629 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2630 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2631 dev->name, stats->rx_bytes, stats->rx_packets,
2632 stats->rx_errors,
2633 stats->rx_dropped + stats->rx_missed_errors,
2634 stats->rx_fifo_errors,
2635 stats->rx_length_errors + stats->rx_over_errors +
2636 stats->rx_crc_errors + stats->rx_frame_errors,
2637 stats->rx_compressed, stats->multicast,
2638 stats->tx_bytes, stats->tx_packets,
2639 stats->tx_errors, stats->tx_dropped,
2640 stats->tx_fifo_errors, stats->collisions,
2641 stats->tx_carrier_errors +
2642 stats->tx_aborted_errors +
2643 stats->tx_window_errors +
2644 stats->tx_heartbeat_errors,
2645 stats->tx_compressed);
2649 * Called from the PROCfs module. This now uses the new arbitrary sized
2650 * /proc/net interface to create /proc/net/dev
2652 static int dev_seq_show(struct seq_file *seq, void *v)
2654 if (v == SEQ_START_TOKEN)
2655 seq_puts(seq, "Inter-| Receive "
2656 " | Transmit\n"
2657 " face |bytes packets errs drop fifo frame "
2658 "compressed multicast|bytes packets errs "
2659 "drop fifo colls carrier compressed\n");
2660 else
2661 dev_seq_printf_stats(seq, v);
2662 return 0;
2665 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2667 struct netif_rx_stats *rc = NULL;
2669 while (*pos < nr_cpu_ids)
2670 if (cpu_online(*pos)) {
2671 rc = &per_cpu(netdev_rx_stat, *pos);
2672 break;
2673 } else
2674 ++*pos;
2675 return rc;
2678 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2680 return softnet_get_online(pos);
2683 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2685 ++*pos;
2686 return softnet_get_online(pos);
2689 static void softnet_seq_stop(struct seq_file *seq, void *v)
2693 static int softnet_seq_show(struct seq_file *seq, void *v)
2695 struct netif_rx_stats *s = v;
2697 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2698 s->total, s->dropped, s->time_squeeze, 0,
2699 0, 0, 0, 0, /* was fastroute */
2700 s->cpu_collision );
2701 return 0;
2704 static const struct seq_operations dev_seq_ops = {
2705 .start = dev_seq_start,
2706 .next = dev_seq_next,
2707 .stop = dev_seq_stop,
2708 .show = dev_seq_show,
2711 static int dev_seq_open(struct inode *inode, struct file *file)
2713 return seq_open_net(inode, file, &dev_seq_ops,
2714 sizeof(struct seq_net_private));
2717 static const struct file_operations dev_seq_fops = {
2718 .owner = THIS_MODULE,
2719 .open = dev_seq_open,
2720 .read = seq_read,
2721 .llseek = seq_lseek,
2722 .release = seq_release_net,
2725 static const struct seq_operations softnet_seq_ops = {
2726 .start = softnet_seq_start,
2727 .next = softnet_seq_next,
2728 .stop = softnet_seq_stop,
2729 .show = softnet_seq_show,
2732 static int softnet_seq_open(struct inode *inode, struct file *file)
2734 return seq_open(file, &softnet_seq_ops);
2737 static const struct file_operations softnet_seq_fops = {
2738 .owner = THIS_MODULE,
2739 .open = softnet_seq_open,
2740 .read = seq_read,
2741 .llseek = seq_lseek,
2742 .release = seq_release,
2745 static void *ptype_get_idx(loff_t pos)
2747 struct packet_type *pt = NULL;
2748 loff_t i = 0;
2749 int t;
2751 list_for_each_entry_rcu(pt, &ptype_all, list) {
2752 if (i == pos)
2753 return pt;
2754 ++i;
2757 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2758 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2759 if (i == pos)
2760 return pt;
2761 ++i;
2764 return NULL;
2767 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2768 __acquires(RCU)
2770 rcu_read_lock();
2771 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2774 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2776 struct packet_type *pt;
2777 struct list_head *nxt;
2778 int hash;
2780 ++*pos;
2781 if (v == SEQ_START_TOKEN)
2782 return ptype_get_idx(0);
2784 pt = v;
2785 nxt = pt->list.next;
2786 if (pt->type == htons(ETH_P_ALL)) {
2787 if (nxt != &ptype_all)
2788 goto found;
2789 hash = 0;
2790 nxt = ptype_base[0].next;
2791 } else
2792 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2794 while (nxt == &ptype_base[hash]) {
2795 if (++hash >= PTYPE_HASH_SIZE)
2796 return NULL;
2797 nxt = ptype_base[hash].next;
2799 found:
2800 return list_entry(nxt, struct packet_type, list);
2803 static void ptype_seq_stop(struct seq_file *seq, void *v)
2804 __releases(RCU)
2806 rcu_read_unlock();
2809 static int ptype_seq_show(struct seq_file *seq, void *v)
2811 struct packet_type *pt = v;
2813 if (v == SEQ_START_TOKEN)
2814 seq_puts(seq, "Type Device Function\n");
2815 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2816 if (pt->type == htons(ETH_P_ALL))
2817 seq_puts(seq, "ALL ");
2818 else
2819 seq_printf(seq, "%04x", ntohs(pt->type));
2821 seq_printf(seq, " %-8s %pF\n",
2822 pt->dev ? pt->dev->name : "", pt->func);
2825 return 0;
2828 static const struct seq_operations ptype_seq_ops = {
2829 .start = ptype_seq_start,
2830 .next = ptype_seq_next,
2831 .stop = ptype_seq_stop,
2832 .show = ptype_seq_show,
2835 static int ptype_seq_open(struct inode *inode, struct file *file)
2837 return seq_open_net(inode, file, &ptype_seq_ops,
2838 sizeof(struct seq_net_private));
2841 static const struct file_operations ptype_seq_fops = {
2842 .owner = THIS_MODULE,
2843 .open = ptype_seq_open,
2844 .read = seq_read,
2845 .llseek = seq_lseek,
2846 .release = seq_release_net,
2850 static int __net_init dev_proc_net_init(struct net *net)
2852 int rc = -ENOMEM;
2854 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2855 goto out;
2856 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2857 goto out_dev;
2858 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2859 goto out_softnet;
2861 if (wext_proc_init(net))
2862 goto out_ptype;
2863 rc = 0;
2864 out:
2865 return rc;
2866 out_ptype:
2867 proc_net_remove(net, "ptype");
2868 out_softnet:
2869 proc_net_remove(net, "softnet_stat");
2870 out_dev:
2871 proc_net_remove(net, "dev");
2872 goto out;
2875 static void __net_exit dev_proc_net_exit(struct net *net)
2877 wext_proc_exit(net);
2879 proc_net_remove(net, "ptype");
2880 proc_net_remove(net, "softnet_stat");
2881 proc_net_remove(net, "dev");
2884 static struct pernet_operations __net_initdata dev_proc_ops = {
2885 .init = dev_proc_net_init,
2886 .exit = dev_proc_net_exit,
2889 static int __init dev_proc_init(void)
2891 return register_pernet_subsys(&dev_proc_ops);
2893 #else
2894 #define dev_proc_init() 0
2895 #endif /* CONFIG_PROC_FS */
2899 * netdev_set_master - set up master/slave pair
2900 * @slave: slave device
2901 * @master: new master device
2903 * Changes the master device of the slave. Pass %NULL to break the
2904 * bonding. The caller must hold the RTNL semaphore. On a failure
2905 * a negative errno code is returned. On success the reference counts
2906 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2907 * function returns zero.
2909 int netdev_set_master(struct net_device *slave, struct net_device *master)
2911 struct net_device *old = slave->master;
2913 ASSERT_RTNL();
2915 if (master) {
2916 if (old)
2917 return -EBUSY;
2918 dev_hold(master);
2921 slave->master = master;
2923 synchronize_net();
2925 if (old)
2926 dev_put(old);
2928 if (master)
2929 slave->flags |= IFF_SLAVE;
2930 else
2931 slave->flags &= ~IFF_SLAVE;
2933 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2934 return 0;
2937 static void dev_change_rx_flags(struct net_device *dev, int flags)
2939 const struct net_device_ops *ops = dev->netdev_ops;
2941 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
2942 ops->ndo_change_rx_flags(dev, flags);
2945 static int __dev_set_promiscuity(struct net_device *dev, int inc)
2947 unsigned short old_flags = dev->flags;
2949 ASSERT_RTNL();
2951 dev->flags |= IFF_PROMISC;
2952 dev->promiscuity += inc;
2953 if (dev->promiscuity == 0) {
2955 * Avoid overflow.
2956 * If inc causes overflow, untouch promisc and return error.
2958 if (inc < 0)
2959 dev->flags &= ~IFF_PROMISC;
2960 else {
2961 dev->promiscuity -= inc;
2962 printk(KERN_WARNING "%s: promiscuity touches roof, "
2963 "set promiscuity failed, promiscuity feature "
2964 "of device might be broken.\n", dev->name);
2965 return -EOVERFLOW;
2968 if (dev->flags != old_flags) {
2969 printk(KERN_INFO "device %s %s promiscuous mode\n",
2970 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2971 "left");
2972 if (audit_enabled)
2973 audit_log(current->audit_context, GFP_ATOMIC,
2974 AUDIT_ANOM_PROMISCUOUS,
2975 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2976 dev->name, (dev->flags & IFF_PROMISC),
2977 (old_flags & IFF_PROMISC),
2978 audit_get_loginuid(current),
2979 current->uid, current->gid,
2980 audit_get_sessionid(current));
2982 dev_change_rx_flags(dev, IFF_PROMISC);
2984 return 0;
2988 * dev_set_promiscuity - update promiscuity count on a device
2989 * @dev: device
2990 * @inc: modifier
2992 * Add or remove promiscuity from a device. While the count in the device
2993 * remains above zero the interface remains promiscuous. Once it hits zero
2994 * the device reverts back to normal filtering operation. A negative inc
2995 * value is used to drop promiscuity on the device.
2996 * Return 0 if successful or a negative errno code on error.
2998 int dev_set_promiscuity(struct net_device *dev, int inc)
3000 unsigned short old_flags = dev->flags;
3001 int err;
3003 err = __dev_set_promiscuity(dev, inc);
3004 if (err < 0)
3005 return err;
3006 if (dev->flags != old_flags)
3007 dev_set_rx_mode(dev);
3008 return err;
3012 * dev_set_allmulti - update allmulti count on a device
3013 * @dev: device
3014 * @inc: modifier
3016 * Add or remove reception of all multicast frames to a device. While the
3017 * count in the device remains above zero the interface remains listening
3018 * to all interfaces. Once it hits zero the device reverts back to normal
3019 * filtering operation. A negative @inc value is used to drop the counter
3020 * when releasing a resource needing all multicasts.
3021 * Return 0 if successful or a negative errno code on error.
3024 int dev_set_allmulti(struct net_device *dev, int inc)
3026 unsigned short old_flags = dev->flags;
3028 ASSERT_RTNL();
3030 dev->flags |= IFF_ALLMULTI;
3031 dev->allmulti += inc;
3032 if (dev->allmulti == 0) {
3034 * Avoid overflow.
3035 * If inc causes overflow, untouch allmulti and return error.
3037 if (inc < 0)
3038 dev->flags &= ~IFF_ALLMULTI;
3039 else {
3040 dev->allmulti -= inc;
3041 printk(KERN_WARNING "%s: allmulti touches roof, "
3042 "set allmulti failed, allmulti feature of "
3043 "device might be broken.\n", dev->name);
3044 return -EOVERFLOW;
3047 if (dev->flags ^ old_flags) {
3048 dev_change_rx_flags(dev, IFF_ALLMULTI);
3049 dev_set_rx_mode(dev);
3051 return 0;
3055 * Upload unicast and multicast address lists to device and
3056 * configure RX filtering. When the device doesn't support unicast
3057 * filtering it is put in promiscuous mode while unicast addresses
3058 * are present.
3060 void __dev_set_rx_mode(struct net_device *dev)
3062 const struct net_device_ops *ops = dev->netdev_ops;
3064 /* dev_open will call this function so the list will stay sane. */
3065 if (!(dev->flags&IFF_UP))
3066 return;
3068 if (!netif_device_present(dev))
3069 return;
3071 if (ops->ndo_set_rx_mode)
3072 ops->ndo_set_rx_mode(dev);
3073 else {
3074 /* Unicast addresses changes may only happen under the rtnl,
3075 * therefore calling __dev_set_promiscuity here is safe.
3077 if (dev->uc_count > 0 && !dev->uc_promisc) {
3078 __dev_set_promiscuity(dev, 1);
3079 dev->uc_promisc = 1;
3080 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3081 __dev_set_promiscuity(dev, -1);
3082 dev->uc_promisc = 0;
3085 if (ops->ndo_set_multicast_list)
3086 ops->ndo_set_multicast_list(dev);
3090 void dev_set_rx_mode(struct net_device *dev)
3092 netif_addr_lock_bh(dev);
3093 __dev_set_rx_mode(dev);
3094 netif_addr_unlock_bh(dev);
3097 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3098 void *addr, int alen, int glbl)
3100 struct dev_addr_list *da;
3102 for (; (da = *list) != NULL; list = &da->next) {
3103 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3104 alen == da->da_addrlen) {
3105 if (glbl) {
3106 int old_glbl = da->da_gusers;
3107 da->da_gusers = 0;
3108 if (old_glbl == 0)
3109 break;
3111 if (--da->da_users)
3112 return 0;
3114 *list = da->next;
3115 kfree(da);
3116 (*count)--;
3117 return 0;
3120 return -ENOENT;
3123 int __dev_addr_add(struct dev_addr_list **list, int *count,
3124 void *addr, int alen, int glbl)
3126 struct dev_addr_list *da;
3128 for (da = *list; da != NULL; da = da->next) {
3129 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3130 da->da_addrlen == alen) {
3131 if (glbl) {
3132 int old_glbl = da->da_gusers;
3133 da->da_gusers = 1;
3134 if (old_glbl)
3135 return 0;
3137 da->da_users++;
3138 return 0;
3142 da = kzalloc(sizeof(*da), GFP_ATOMIC);
3143 if (da == NULL)
3144 return -ENOMEM;
3145 memcpy(da->da_addr, addr, alen);
3146 da->da_addrlen = alen;
3147 da->da_users = 1;
3148 da->da_gusers = glbl ? 1 : 0;
3149 da->next = *list;
3150 *list = da;
3151 (*count)++;
3152 return 0;
3156 * dev_unicast_delete - Release secondary unicast address.
3157 * @dev: device
3158 * @addr: address to delete
3159 * @alen: length of @addr
3161 * Release reference to a secondary unicast address and remove it
3162 * from the device if the reference count drops to zero.
3164 * The caller must hold the rtnl_mutex.
3166 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3168 int err;
3170 ASSERT_RTNL();
3172 netif_addr_lock_bh(dev);
3173 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3174 if (!err)
3175 __dev_set_rx_mode(dev);
3176 netif_addr_unlock_bh(dev);
3177 return err;
3179 EXPORT_SYMBOL(dev_unicast_delete);
3182 * dev_unicast_add - add a secondary unicast address
3183 * @dev: device
3184 * @addr: address to add
3185 * @alen: length of @addr
3187 * Add a secondary unicast address to the device or increase
3188 * the reference count if it already exists.
3190 * The caller must hold the rtnl_mutex.
3192 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3194 int err;
3196 ASSERT_RTNL();
3198 netif_addr_lock_bh(dev);
3199 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3200 if (!err)
3201 __dev_set_rx_mode(dev);
3202 netif_addr_unlock_bh(dev);
3203 return err;
3205 EXPORT_SYMBOL(dev_unicast_add);
3207 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3208 struct dev_addr_list **from, int *from_count)
3210 struct dev_addr_list *da, *next;
3211 int err = 0;
3213 da = *from;
3214 while (da != NULL) {
3215 next = da->next;
3216 if (!da->da_synced) {
3217 err = __dev_addr_add(to, to_count,
3218 da->da_addr, da->da_addrlen, 0);
3219 if (err < 0)
3220 break;
3221 da->da_synced = 1;
3222 da->da_users++;
3223 } else if (da->da_users == 1) {
3224 __dev_addr_delete(to, to_count,
3225 da->da_addr, da->da_addrlen, 0);
3226 __dev_addr_delete(from, from_count,
3227 da->da_addr, da->da_addrlen, 0);
3229 da = next;
3231 return err;
3234 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3235 struct dev_addr_list **from, int *from_count)
3237 struct dev_addr_list *da, *next;
3239 da = *from;
3240 while (da != NULL) {
3241 next = da->next;
3242 if (da->da_synced) {
3243 __dev_addr_delete(to, to_count,
3244 da->da_addr, da->da_addrlen, 0);
3245 da->da_synced = 0;
3246 __dev_addr_delete(from, from_count,
3247 da->da_addr, da->da_addrlen, 0);
3249 da = next;
3254 * dev_unicast_sync - Synchronize device's unicast list to another device
3255 * @to: destination device
3256 * @from: source device
3258 * Add newly added addresses to the destination device and release
3259 * addresses that have no users left. The source device must be
3260 * locked by netif_tx_lock_bh.
3262 * This function is intended to be called from the dev->set_rx_mode
3263 * function of layered software devices.
3265 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3267 int err = 0;
3269 netif_addr_lock_bh(to);
3270 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3271 &from->uc_list, &from->uc_count);
3272 if (!err)
3273 __dev_set_rx_mode(to);
3274 netif_addr_unlock_bh(to);
3275 return err;
3277 EXPORT_SYMBOL(dev_unicast_sync);
3280 * dev_unicast_unsync - Remove synchronized addresses from the destination device
3281 * @to: destination device
3282 * @from: source device
3284 * Remove all addresses that were added to the destination device by
3285 * dev_unicast_sync(). This function is intended to be called from the
3286 * dev->stop function of layered software devices.
3288 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3290 netif_addr_lock_bh(from);
3291 netif_addr_lock(to);
3293 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3294 &from->uc_list, &from->uc_count);
3295 __dev_set_rx_mode(to);
3297 netif_addr_unlock(to);
3298 netif_addr_unlock_bh(from);
3300 EXPORT_SYMBOL(dev_unicast_unsync);
3302 static void __dev_addr_discard(struct dev_addr_list **list)
3304 struct dev_addr_list *tmp;
3306 while (*list != NULL) {
3307 tmp = *list;
3308 *list = tmp->next;
3309 if (tmp->da_users > tmp->da_gusers)
3310 printk("__dev_addr_discard: address leakage! "
3311 "da_users=%d\n", tmp->da_users);
3312 kfree(tmp);
3316 static void dev_addr_discard(struct net_device *dev)
3318 netif_addr_lock_bh(dev);
3320 __dev_addr_discard(&dev->uc_list);
3321 dev->uc_count = 0;
3323 __dev_addr_discard(&dev->mc_list);
3324 dev->mc_count = 0;
3326 netif_addr_unlock_bh(dev);
3330 * dev_get_flags - get flags reported to userspace
3331 * @dev: device
3333 * Get the combination of flag bits exported through APIs to userspace.
3335 unsigned dev_get_flags(const struct net_device *dev)
3337 unsigned flags;
3339 flags = (dev->flags & ~(IFF_PROMISC |
3340 IFF_ALLMULTI |
3341 IFF_RUNNING |
3342 IFF_LOWER_UP |
3343 IFF_DORMANT)) |
3344 (dev->gflags & (IFF_PROMISC |
3345 IFF_ALLMULTI));
3347 if (netif_running(dev)) {
3348 if (netif_oper_up(dev))
3349 flags |= IFF_RUNNING;
3350 if (netif_carrier_ok(dev))
3351 flags |= IFF_LOWER_UP;
3352 if (netif_dormant(dev))
3353 flags |= IFF_DORMANT;
3356 return flags;
3360 * dev_change_flags - change device settings
3361 * @dev: device
3362 * @flags: device state flags
3364 * Change settings on device based state flags. The flags are
3365 * in the userspace exported format.
3367 int dev_change_flags(struct net_device *dev, unsigned flags)
3369 int ret, changes;
3370 int old_flags = dev->flags;
3372 ASSERT_RTNL();
3375 * Set the flags on our device.
3378 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3379 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3380 IFF_AUTOMEDIA)) |
3381 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3382 IFF_ALLMULTI));
3385 * Load in the correct multicast list now the flags have changed.
3388 if ((old_flags ^ flags) & IFF_MULTICAST)
3389 dev_change_rx_flags(dev, IFF_MULTICAST);
3391 dev_set_rx_mode(dev);
3394 * Have we downed the interface. We handle IFF_UP ourselves
3395 * according to user attempts to set it, rather than blindly
3396 * setting it.
3399 ret = 0;
3400 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3401 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3403 if (!ret)
3404 dev_set_rx_mode(dev);
3407 if (dev->flags & IFF_UP &&
3408 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3409 IFF_VOLATILE)))
3410 call_netdevice_notifiers(NETDEV_CHANGE, dev);
3412 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3413 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3414 dev->gflags ^= IFF_PROMISC;
3415 dev_set_promiscuity(dev, inc);
3418 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3419 is important. Some (broken) drivers set IFF_PROMISC, when
3420 IFF_ALLMULTI is requested not asking us and not reporting.
3422 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3423 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3424 dev->gflags ^= IFF_ALLMULTI;
3425 dev_set_allmulti(dev, inc);
3428 /* Exclude state transition flags, already notified */
3429 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3430 if (changes)
3431 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3433 return ret;
3437 * dev_set_mtu - Change maximum transfer unit
3438 * @dev: device
3439 * @new_mtu: new transfer unit
3441 * Change the maximum transfer size of the network device.
3443 int dev_set_mtu(struct net_device *dev, int new_mtu)
3445 const struct net_device_ops *ops = dev->netdev_ops;
3446 int err;
3448 if (new_mtu == dev->mtu)
3449 return 0;
3451 /* MTU must be positive. */
3452 if (new_mtu < 0)
3453 return -EINVAL;
3455 if (!netif_device_present(dev))
3456 return -ENODEV;
3458 err = 0;
3459 if (ops->ndo_change_mtu)
3460 err = ops->ndo_change_mtu(dev, new_mtu);
3461 else
3462 dev->mtu = new_mtu;
3464 if (!err && dev->flags & IFF_UP)
3465 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3466 return err;
3470 * dev_set_mac_address - Change Media Access Control Address
3471 * @dev: device
3472 * @sa: new address
3474 * Change the hardware (MAC) address of the device
3476 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3478 const struct net_device_ops *ops = dev->netdev_ops;
3479 int err;
3481 if (!ops->ndo_set_mac_address)
3482 return -EOPNOTSUPP;
3483 if (sa->sa_family != dev->type)
3484 return -EINVAL;
3485 if (!netif_device_present(dev))
3486 return -ENODEV;
3487 err = ops->ndo_set_mac_address(dev, sa);
3488 if (!err)
3489 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3490 return err;
3494 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3496 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3498 int err;
3499 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3501 if (!dev)
3502 return -ENODEV;
3504 switch (cmd) {
3505 case SIOCGIFFLAGS: /* Get interface flags */
3506 ifr->ifr_flags = dev_get_flags(dev);
3507 return 0;
3509 case SIOCGIFMETRIC: /* Get the metric on the interface
3510 (currently unused) */
3511 ifr->ifr_metric = 0;
3512 return 0;
3514 case SIOCGIFMTU: /* Get the MTU of a device */
3515 ifr->ifr_mtu = dev->mtu;
3516 return 0;
3518 case SIOCGIFHWADDR:
3519 if (!dev->addr_len)
3520 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3521 else
3522 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3523 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3524 ifr->ifr_hwaddr.sa_family = dev->type;
3525 return 0;
3527 case SIOCGIFSLAVE:
3528 err = -EINVAL;
3529 break;
3531 case SIOCGIFMAP:
3532 ifr->ifr_map.mem_start = dev->mem_start;
3533 ifr->ifr_map.mem_end = dev->mem_end;
3534 ifr->ifr_map.base_addr = dev->base_addr;
3535 ifr->ifr_map.irq = dev->irq;
3536 ifr->ifr_map.dma = dev->dma;
3537 ifr->ifr_map.port = dev->if_port;
3538 return 0;
3540 case SIOCGIFINDEX:
3541 ifr->ifr_ifindex = dev->ifindex;
3542 return 0;
3544 case SIOCGIFTXQLEN:
3545 ifr->ifr_qlen = dev->tx_queue_len;
3546 return 0;
3548 default:
3549 /* dev_ioctl() should ensure this case
3550 * is never reached
3552 WARN_ON(1);
3553 err = -EINVAL;
3554 break;
3557 return err;
3561 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3563 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3565 int err;
3566 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3567 const struct net_device_ops *ops = dev->netdev_ops;
3569 if (!dev)
3570 return -ENODEV;
3572 switch (cmd) {
3573 case SIOCSIFFLAGS: /* Set interface flags */
3574 return dev_change_flags(dev, ifr->ifr_flags);
3576 case SIOCSIFMETRIC: /* Set the metric on the interface
3577 (currently unused) */
3578 return -EOPNOTSUPP;
3580 case SIOCSIFMTU: /* Set the MTU of a device */
3581 return dev_set_mtu(dev, ifr->ifr_mtu);
3583 case SIOCSIFHWADDR:
3584 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3586 case SIOCSIFHWBROADCAST:
3587 if (ifr->ifr_hwaddr.sa_family != dev->type)
3588 return -EINVAL;
3589 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3590 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3591 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3592 return 0;
3594 case SIOCSIFMAP:
3595 if (ops->ndo_set_config) {
3596 if (!netif_device_present(dev))
3597 return -ENODEV;
3598 return ops->ndo_set_config(dev, &ifr->ifr_map);
3600 return -EOPNOTSUPP;
3602 case SIOCADDMULTI:
3603 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3604 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3605 return -EINVAL;
3606 if (!netif_device_present(dev))
3607 return -ENODEV;
3608 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3609 dev->addr_len, 1);
3611 case SIOCDELMULTI:
3612 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3613 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3614 return -EINVAL;
3615 if (!netif_device_present(dev))
3616 return -ENODEV;
3617 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3618 dev->addr_len, 1);
3620 case SIOCSIFTXQLEN:
3621 if (ifr->ifr_qlen < 0)
3622 return -EINVAL;
3623 dev->tx_queue_len = ifr->ifr_qlen;
3624 return 0;
3626 case SIOCSIFNAME:
3627 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3628 return dev_change_name(dev, ifr->ifr_newname);
3631 * Unknown or private ioctl
3634 default:
3635 if ((cmd >= SIOCDEVPRIVATE &&
3636 cmd <= SIOCDEVPRIVATE + 15) ||
3637 cmd == SIOCBONDENSLAVE ||
3638 cmd == SIOCBONDRELEASE ||
3639 cmd == SIOCBONDSETHWADDR ||
3640 cmd == SIOCBONDSLAVEINFOQUERY ||
3641 cmd == SIOCBONDINFOQUERY ||
3642 cmd == SIOCBONDCHANGEACTIVE ||
3643 cmd == SIOCGMIIPHY ||
3644 cmd == SIOCGMIIREG ||
3645 cmd == SIOCSMIIREG ||
3646 cmd == SIOCBRADDIF ||
3647 cmd == SIOCBRDELIF ||
3648 cmd == SIOCWANDEV) {
3649 err = -EOPNOTSUPP;
3650 if (ops->ndo_do_ioctl) {
3651 if (netif_device_present(dev))
3652 err = ops->ndo_do_ioctl(dev, ifr, cmd);
3653 else
3654 err = -ENODEV;
3656 } else
3657 err = -EINVAL;
3660 return err;
3664 * This function handles all "interface"-type I/O control requests. The actual
3665 * 'doing' part of this is dev_ifsioc above.
3669 * dev_ioctl - network device ioctl
3670 * @net: the applicable net namespace
3671 * @cmd: command to issue
3672 * @arg: pointer to a struct ifreq in user space
3674 * Issue ioctl functions to devices. This is normally called by the
3675 * user space syscall interfaces but can sometimes be useful for
3676 * other purposes. The return value is the return from the syscall if
3677 * positive or a negative errno code on error.
3680 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3682 struct ifreq ifr;
3683 int ret;
3684 char *colon;
3686 /* One special case: SIOCGIFCONF takes ifconf argument
3687 and requires shared lock, because it sleeps writing
3688 to user space.
3691 if (cmd == SIOCGIFCONF) {
3692 rtnl_lock();
3693 ret = dev_ifconf(net, (char __user *) arg);
3694 rtnl_unlock();
3695 return ret;
3697 if (cmd == SIOCGIFNAME)
3698 return dev_ifname(net, (struct ifreq __user *)arg);
3700 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3701 return -EFAULT;
3703 ifr.ifr_name[IFNAMSIZ-1] = 0;
3705 colon = strchr(ifr.ifr_name, ':');
3706 if (colon)
3707 *colon = 0;
3710 * See which interface the caller is talking about.
3713 switch (cmd) {
3715 * These ioctl calls:
3716 * - can be done by all.
3717 * - atomic and do not require locking.
3718 * - return a value
3720 case SIOCGIFFLAGS:
3721 case SIOCGIFMETRIC:
3722 case SIOCGIFMTU:
3723 case SIOCGIFHWADDR:
3724 case SIOCGIFSLAVE:
3725 case SIOCGIFMAP:
3726 case SIOCGIFINDEX:
3727 case SIOCGIFTXQLEN:
3728 dev_load(net, ifr.ifr_name);
3729 read_lock(&dev_base_lock);
3730 ret = dev_ifsioc_locked(net, &ifr, cmd);
3731 read_unlock(&dev_base_lock);
3732 if (!ret) {
3733 if (colon)
3734 *colon = ':';
3735 if (copy_to_user(arg, &ifr,
3736 sizeof(struct ifreq)))
3737 ret = -EFAULT;
3739 return ret;
3741 case SIOCETHTOOL:
3742 dev_load(net, ifr.ifr_name);
3743 rtnl_lock();
3744 ret = dev_ethtool(net, &ifr);
3745 rtnl_unlock();
3746 if (!ret) {
3747 if (colon)
3748 *colon = ':';
3749 if (copy_to_user(arg, &ifr,
3750 sizeof(struct ifreq)))
3751 ret = -EFAULT;
3753 return ret;
3756 * These ioctl calls:
3757 * - require superuser power.
3758 * - require strict serialization.
3759 * - return a value
3761 case SIOCGMIIPHY:
3762 case SIOCGMIIREG:
3763 case SIOCSIFNAME:
3764 if (!capable(CAP_NET_ADMIN))
3765 return -EPERM;
3766 dev_load(net, ifr.ifr_name);
3767 rtnl_lock();
3768 ret = dev_ifsioc(net, &ifr, cmd);
3769 rtnl_unlock();
3770 if (!ret) {
3771 if (colon)
3772 *colon = ':';
3773 if (copy_to_user(arg, &ifr,
3774 sizeof(struct ifreq)))
3775 ret = -EFAULT;
3777 return ret;
3780 * These ioctl calls:
3781 * - require superuser power.
3782 * - require strict serialization.
3783 * - do not return a value
3785 case SIOCSIFFLAGS:
3786 case SIOCSIFMETRIC:
3787 case SIOCSIFMTU:
3788 case SIOCSIFMAP:
3789 case SIOCSIFHWADDR:
3790 case SIOCSIFSLAVE:
3791 case SIOCADDMULTI:
3792 case SIOCDELMULTI:
3793 case SIOCSIFHWBROADCAST:
3794 case SIOCSIFTXQLEN:
3795 case SIOCSMIIREG:
3796 case SIOCBONDENSLAVE:
3797 case SIOCBONDRELEASE:
3798 case SIOCBONDSETHWADDR:
3799 case SIOCBONDCHANGEACTIVE:
3800 case SIOCBRADDIF:
3801 case SIOCBRDELIF:
3802 if (!capable(CAP_NET_ADMIN))
3803 return -EPERM;
3804 /* fall through */
3805 case SIOCBONDSLAVEINFOQUERY:
3806 case SIOCBONDINFOQUERY:
3807 dev_load(net, ifr.ifr_name);
3808 rtnl_lock();
3809 ret = dev_ifsioc(net, &ifr, cmd);
3810 rtnl_unlock();
3811 return ret;
3813 case SIOCGIFMEM:
3814 /* Get the per device memory space. We can add this but
3815 * currently do not support it */
3816 case SIOCSIFMEM:
3817 /* Set the per device memory buffer space.
3818 * Not applicable in our case */
3819 case SIOCSIFLINK:
3820 return -EINVAL;
3823 * Unknown or private ioctl.
3825 default:
3826 if (cmd == SIOCWANDEV ||
3827 (cmd >= SIOCDEVPRIVATE &&
3828 cmd <= SIOCDEVPRIVATE + 15)) {
3829 dev_load(net, ifr.ifr_name);
3830 rtnl_lock();
3831 ret = dev_ifsioc(net, &ifr, cmd);
3832 rtnl_unlock();
3833 if (!ret && copy_to_user(arg, &ifr,
3834 sizeof(struct ifreq)))
3835 ret = -EFAULT;
3836 return ret;
3838 /* Take care of Wireless Extensions */
3839 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3840 return wext_handle_ioctl(net, &ifr, cmd, arg);
3841 return -EINVAL;
3847 * dev_new_index - allocate an ifindex
3848 * @net: the applicable net namespace
3850 * Returns a suitable unique value for a new device interface
3851 * number. The caller must hold the rtnl semaphore or the
3852 * dev_base_lock to be sure it remains unique.
3854 static int dev_new_index(struct net *net)
3856 static int ifindex;
3857 for (;;) {
3858 if (++ifindex <= 0)
3859 ifindex = 1;
3860 if (!__dev_get_by_index(net, ifindex))
3861 return ifindex;
3865 /* Delayed registration/unregisteration */
3866 static LIST_HEAD(net_todo_list);
3868 static void net_set_todo(struct net_device *dev)
3870 list_add_tail(&dev->todo_list, &net_todo_list);
3873 static void rollback_registered(struct net_device *dev)
3875 BUG_ON(dev_boot_phase);
3876 ASSERT_RTNL();
3878 /* Some devices call without registering for initialization unwind. */
3879 if (dev->reg_state == NETREG_UNINITIALIZED) {
3880 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3881 "was registered\n", dev->name, dev);
3883 WARN_ON(1);
3884 return;
3887 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3889 /* If device is running, close it first. */
3890 dev_close(dev);
3892 /* And unlink it from device chain. */
3893 unlist_netdevice(dev);
3895 dev->reg_state = NETREG_UNREGISTERING;
3897 synchronize_net();
3899 /* Shutdown queueing discipline. */
3900 dev_shutdown(dev);
3903 /* Notify protocols, that we are about to destroy
3904 this device. They should clean all the things.
3906 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3909 * Flush the unicast and multicast chains
3911 dev_addr_discard(dev);
3913 if (dev->netdev_ops->ndo_uninit)
3914 dev->netdev_ops->ndo_uninit(dev);
3916 /* Notifier chain MUST detach us from master device. */
3917 WARN_ON(dev->master);
3919 /* Remove entries from kobject tree */
3920 netdev_unregister_kobject(dev);
3922 synchronize_net();
3924 dev_put(dev);
3927 static void __netdev_init_queue_locks_one(struct net_device *dev,
3928 struct netdev_queue *dev_queue,
3929 void *_unused)
3931 spin_lock_init(&dev_queue->_xmit_lock);
3932 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3933 dev_queue->xmit_lock_owner = -1;
3936 static void netdev_init_queue_locks(struct net_device *dev)
3938 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3939 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3942 unsigned long netdev_fix_features(unsigned long features, const char *name)
3944 /* Fix illegal SG+CSUM combinations. */
3945 if ((features & NETIF_F_SG) &&
3946 !(features & NETIF_F_ALL_CSUM)) {
3947 if (name)
3948 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3949 "checksum feature.\n", name);
3950 features &= ~NETIF_F_SG;
3953 /* TSO requires that SG is present as well. */
3954 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3955 if (name)
3956 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3957 "SG feature.\n", name);
3958 features &= ~NETIF_F_TSO;
3961 if (features & NETIF_F_UFO) {
3962 if (!(features & NETIF_F_GEN_CSUM)) {
3963 if (name)
3964 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3965 "since no NETIF_F_HW_CSUM feature.\n",
3966 name);
3967 features &= ~NETIF_F_UFO;
3970 if (!(features & NETIF_F_SG)) {
3971 if (name)
3972 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3973 "since no NETIF_F_SG feature.\n", name);
3974 features &= ~NETIF_F_UFO;
3978 return features;
3980 EXPORT_SYMBOL(netdev_fix_features);
3983 * register_netdevice - register a network device
3984 * @dev: device to register
3986 * Take a completed network device structure and add it to the kernel
3987 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3988 * chain. 0 is returned on success. A negative errno code is returned
3989 * on a failure to set up the device, or if the name is a duplicate.
3991 * Callers must hold the rtnl semaphore. You may want
3992 * register_netdev() instead of this.
3994 * BUGS:
3995 * The locking appears insufficient to guarantee two parallel registers
3996 * will not get the same name.
3999 int register_netdevice(struct net_device *dev)
4001 struct hlist_head *head;
4002 struct hlist_node *p;
4003 int ret;
4004 struct net *net = dev_net(dev);
4006 BUG_ON(dev_boot_phase);
4007 ASSERT_RTNL();
4009 might_sleep();
4011 /* When net_device's are persistent, this will be fatal. */
4012 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4013 BUG_ON(!net);
4015 spin_lock_init(&dev->addr_list_lock);
4016 netdev_set_addr_lockdep_class(dev);
4017 netdev_init_queue_locks(dev);
4019 dev->iflink = -1;
4021 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4022 /* Netdevice_ops API compatiability support.
4023 * This is temporary until all network devices are converted.
4025 if (dev->netdev_ops) {
4026 const struct net_device_ops *ops = dev->netdev_ops;
4028 dev->init = ops->ndo_init;
4029 dev->uninit = ops->ndo_uninit;
4030 dev->open = ops->ndo_open;
4031 dev->change_rx_flags = ops->ndo_change_rx_flags;
4032 dev->set_rx_mode = ops->ndo_set_rx_mode;
4033 dev->set_multicast_list = ops->ndo_set_multicast_list;
4034 dev->set_mac_address = ops->ndo_set_mac_address;
4035 dev->validate_addr = ops->ndo_validate_addr;
4036 dev->do_ioctl = ops->ndo_do_ioctl;
4037 dev->set_config = ops->ndo_set_config;
4038 dev->change_mtu = ops->ndo_change_mtu;
4039 dev->tx_timeout = ops->ndo_tx_timeout;
4040 dev->get_stats = ops->ndo_get_stats;
4041 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4042 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4043 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4044 #ifdef CONFIG_NET_POLL_CONTROLLER
4045 dev->poll_controller = ops->ndo_poll_controller;
4046 #endif
4047 } else {
4048 char drivername[64];
4049 pr_info("%s (%s): not using net_device_ops yet\n",
4050 dev->name, netdev_drivername(dev, drivername, 64));
4052 /* This works only because net_device_ops and the
4053 compatiablity structure are the same. */
4054 dev->netdev_ops = (void *) &(dev->init);
4056 #endif
4058 /* Init, if this function is available */
4059 if (dev->netdev_ops->ndo_init) {
4060 ret = dev->netdev_ops->ndo_init(dev);
4061 if (ret) {
4062 if (ret > 0)
4063 ret = -EIO;
4064 goto out;
4068 if (!dev_valid_name(dev->name)) {
4069 ret = -EINVAL;
4070 goto err_uninit;
4073 dev->ifindex = dev_new_index(net);
4074 if (dev->iflink == -1)
4075 dev->iflink = dev->ifindex;
4077 /* Check for existence of name */
4078 head = dev_name_hash(net, dev->name);
4079 hlist_for_each(p, head) {
4080 struct net_device *d
4081 = hlist_entry(p, struct net_device, name_hlist);
4082 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4083 ret = -EEXIST;
4084 goto err_uninit;
4088 /* Fix illegal checksum combinations */
4089 if ((dev->features & NETIF_F_HW_CSUM) &&
4090 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4091 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4092 dev->name);
4093 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4096 if ((dev->features & NETIF_F_NO_CSUM) &&
4097 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4098 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4099 dev->name);
4100 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4103 dev->features = netdev_fix_features(dev->features, dev->name);
4105 /* Enable software GSO if SG is supported. */
4106 if (dev->features & NETIF_F_SG)
4107 dev->features |= NETIF_F_GSO;
4109 netdev_initialize_kobject(dev);
4110 ret = netdev_register_kobject(dev);
4111 if (ret)
4112 goto err_uninit;
4113 dev->reg_state = NETREG_REGISTERED;
4116 * Default initial state at registry is that the
4117 * device is present.
4120 set_bit(__LINK_STATE_PRESENT, &dev->state);
4122 dev_init_scheduler(dev);
4123 dev_hold(dev);
4124 list_netdevice(dev);
4126 /* Notify protocols, that a new device appeared. */
4127 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4128 ret = notifier_to_errno(ret);
4129 if (ret) {
4130 rollback_registered(dev);
4131 dev->reg_state = NETREG_UNREGISTERED;
4134 out:
4135 return ret;
4137 err_uninit:
4138 if (dev->netdev_ops->ndo_uninit)
4139 dev->netdev_ops->ndo_uninit(dev);
4140 goto out;
4144 * register_netdev - register a network device
4145 * @dev: device to register
4147 * Take a completed network device structure and add it to the kernel
4148 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4149 * chain. 0 is returned on success. A negative errno code is returned
4150 * on a failure to set up the device, or if the name is a duplicate.
4152 * This is a wrapper around register_netdevice that takes the rtnl semaphore
4153 * and expands the device name if you passed a format string to
4154 * alloc_netdev.
4156 int register_netdev(struct net_device *dev)
4158 int err;
4160 rtnl_lock();
4163 * If the name is a format string the caller wants us to do a
4164 * name allocation.
4166 if (strchr(dev->name, '%')) {
4167 err = dev_alloc_name(dev, dev->name);
4168 if (err < 0)
4169 goto out;
4172 err = register_netdevice(dev);
4173 out:
4174 rtnl_unlock();
4175 return err;
4177 EXPORT_SYMBOL(register_netdev);
4180 * netdev_wait_allrefs - wait until all references are gone.
4182 * This is called when unregistering network devices.
4184 * Any protocol or device that holds a reference should register
4185 * for netdevice notification, and cleanup and put back the
4186 * reference if they receive an UNREGISTER event.
4187 * We can get stuck here if buggy protocols don't correctly
4188 * call dev_put.
4190 static void netdev_wait_allrefs(struct net_device *dev)
4192 unsigned long rebroadcast_time, warning_time;
4194 rebroadcast_time = warning_time = jiffies;
4195 while (atomic_read(&dev->refcnt) != 0) {
4196 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4197 rtnl_lock();
4199 /* Rebroadcast unregister notification */
4200 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4202 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4203 &dev->state)) {
4204 /* We must not have linkwatch events
4205 * pending on unregister. If this
4206 * happens, we simply run the queue
4207 * unscheduled, resulting in a noop
4208 * for this device.
4210 linkwatch_run_queue();
4213 __rtnl_unlock();
4215 rebroadcast_time = jiffies;
4218 msleep(250);
4220 if (time_after(jiffies, warning_time + 10 * HZ)) {
4221 printk(KERN_EMERG "unregister_netdevice: "
4222 "waiting for %s to become free. Usage "
4223 "count = %d\n",
4224 dev->name, atomic_read(&dev->refcnt));
4225 warning_time = jiffies;
4230 /* The sequence is:
4232 * rtnl_lock();
4233 * ...
4234 * register_netdevice(x1);
4235 * register_netdevice(x2);
4236 * ...
4237 * unregister_netdevice(y1);
4238 * unregister_netdevice(y2);
4239 * ...
4240 * rtnl_unlock();
4241 * free_netdev(y1);
4242 * free_netdev(y2);
4244 * We are invoked by rtnl_unlock().
4245 * This allows us to deal with problems:
4246 * 1) We can delete sysfs objects which invoke hotplug
4247 * without deadlocking with linkwatch via keventd.
4248 * 2) Since we run with the RTNL semaphore not held, we can sleep
4249 * safely in order to wait for the netdev refcnt to drop to zero.
4251 * We must not return until all unregister events added during
4252 * the interval the lock was held have been completed.
4254 void netdev_run_todo(void)
4256 struct list_head list;
4258 /* Snapshot list, allow later requests */
4259 list_replace_init(&net_todo_list, &list);
4261 __rtnl_unlock();
4263 while (!list_empty(&list)) {
4264 struct net_device *dev
4265 = list_entry(list.next, struct net_device, todo_list);
4266 list_del(&dev->todo_list);
4268 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4269 printk(KERN_ERR "network todo '%s' but state %d\n",
4270 dev->name, dev->reg_state);
4271 dump_stack();
4272 continue;
4275 dev->reg_state = NETREG_UNREGISTERED;
4277 on_each_cpu(flush_backlog, dev, 1);
4279 netdev_wait_allrefs(dev);
4281 /* paranoia */
4282 BUG_ON(atomic_read(&dev->refcnt));
4283 WARN_ON(dev->ip_ptr);
4284 WARN_ON(dev->ip6_ptr);
4285 WARN_ON(dev->dn_ptr);
4287 if (dev->destructor)
4288 dev->destructor(dev);
4290 /* Free network device */
4291 kobject_put(&dev->dev.kobj);
4296 * dev_get_stats - get network device statistics
4297 * @dev: device to get statistics from
4299 * Get network statistics from device. The device driver may provide
4300 * its own method by setting dev->netdev_ops->get_stats; otherwise
4301 * the internal statistics structure is used.
4303 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4305 const struct net_device_ops *ops = dev->netdev_ops;
4307 if (ops->ndo_get_stats)
4308 return ops->ndo_get_stats(dev);
4309 else
4310 return &dev->stats;
4312 EXPORT_SYMBOL(dev_get_stats);
4314 static void netdev_init_one_queue(struct net_device *dev,
4315 struct netdev_queue *queue,
4316 void *_unused)
4318 queue->dev = dev;
4321 static void netdev_init_queues(struct net_device *dev)
4323 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4324 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4325 spin_lock_init(&dev->tx_global_lock);
4329 * alloc_netdev_mq - allocate network device
4330 * @sizeof_priv: size of private data to allocate space for
4331 * @name: device name format string
4332 * @setup: callback to initialize device
4333 * @queue_count: the number of subqueues to allocate
4335 * Allocates a struct net_device with private data area for driver use
4336 * and performs basic initialization. Also allocates subquue structs
4337 * for each queue on the device at the end of the netdevice.
4339 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4340 void (*setup)(struct net_device *), unsigned int queue_count)
4342 struct netdev_queue *tx;
4343 struct net_device *dev;
4344 size_t alloc_size;
4345 void *p;
4347 BUG_ON(strlen(name) >= sizeof(dev->name));
4349 alloc_size = sizeof(struct net_device);
4350 if (sizeof_priv) {
4351 /* ensure 32-byte alignment of private area */
4352 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4353 alloc_size += sizeof_priv;
4355 /* ensure 32-byte alignment of whole construct */
4356 alloc_size += NETDEV_ALIGN_CONST;
4358 p = kzalloc(alloc_size, GFP_KERNEL);
4359 if (!p) {
4360 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4361 return NULL;
4364 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4365 if (!tx) {
4366 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4367 "tx qdiscs.\n");
4368 kfree(p);
4369 return NULL;
4372 dev = (struct net_device *)
4373 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4374 dev->padded = (char *)dev - (char *)p;
4375 dev_net_set(dev, &init_net);
4377 dev->_tx = tx;
4378 dev->num_tx_queues = queue_count;
4379 dev->real_num_tx_queues = queue_count;
4381 if (sizeof_priv) {
4382 dev->priv = ((char *)dev +
4383 ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4384 & ~NETDEV_ALIGN_CONST));
4387 dev->gso_max_size = GSO_MAX_SIZE;
4389 netdev_init_queues(dev);
4391 netpoll_netdev_init(dev);
4392 setup(dev);
4393 strcpy(dev->name, name);
4394 return dev;
4396 EXPORT_SYMBOL(alloc_netdev_mq);
4399 * free_netdev - free network device
4400 * @dev: device
4402 * This function does the last stage of destroying an allocated device
4403 * interface. The reference to the device object is released.
4404 * If this is the last reference then it will be freed.
4406 void free_netdev(struct net_device *dev)
4408 release_net(dev_net(dev));
4410 kfree(dev->_tx);
4412 /* Compatibility with error handling in drivers */
4413 if (dev->reg_state == NETREG_UNINITIALIZED) {
4414 kfree((char *)dev - dev->padded);
4415 return;
4418 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4419 dev->reg_state = NETREG_RELEASED;
4421 /* will free via device release */
4422 put_device(&dev->dev);
4426 * synchronize_net - Synchronize with packet receive processing
4428 * Wait for packets currently being received to be done.
4429 * Does not block later packets from starting.
4431 void synchronize_net(void)
4433 might_sleep();
4434 synchronize_rcu();
4438 * unregister_netdevice - remove device from the kernel
4439 * @dev: device
4441 * This function shuts down a device interface and removes it
4442 * from the kernel tables.
4444 * Callers must hold the rtnl semaphore. You may want
4445 * unregister_netdev() instead of this.
4448 void unregister_netdevice(struct net_device *dev)
4450 ASSERT_RTNL();
4452 rollback_registered(dev);
4453 /* Finish processing unregister after unlock */
4454 net_set_todo(dev);
4458 * unregister_netdev - remove device from the kernel
4459 * @dev: device
4461 * This function shuts down a device interface and removes it
4462 * from the kernel tables.
4464 * This is just a wrapper for unregister_netdevice that takes
4465 * the rtnl semaphore. In general you want to use this and not
4466 * unregister_netdevice.
4468 void unregister_netdev(struct net_device *dev)
4470 rtnl_lock();
4471 unregister_netdevice(dev);
4472 rtnl_unlock();
4475 EXPORT_SYMBOL(unregister_netdev);
4478 * dev_change_net_namespace - move device to different nethost namespace
4479 * @dev: device
4480 * @net: network namespace
4481 * @pat: If not NULL name pattern to try if the current device name
4482 * is already taken in the destination network namespace.
4484 * This function shuts down a device interface and moves it
4485 * to a new network namespace. On success 0 is returned, on
4486 * a failure a netagive errno code is returned.
4488 * Callers must hold the rtnl semaphore.
4491 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4493 char buf[IFNAMSIZ];
4494 const char *destname;
4495 int err;
4497 ASSERT_RTNL();
4499 /* Don't allow namespace local devices to be moved. */
4500 err = -EINVAL;
4501 if (dev->features & NETIF_F_NETNS_LOCAL)
4502 goto out;
4504 #ifdef CONFIG_SYSFS
4505 /* Don't allow real devices to be moved when sysfs
4506 * is enabled.
4508 err = -EINVAL;
4509 if (dev->dev.parent)
4510 goto out;
4511 #endif
4513 /* Ensure the device has been registrered */
4514 err = -EINVAL;
4515 if (dev->reg_state != NETREG_REGISTERED)
4516 goto out;
4518 /* Get out if there is nothing todo */
4519 err = 0;
4520 if (net_eq(dev_net(dev), net))
4521 goto out;
4523 /* Pick the destination device name, and ensure
4524 * we can use it in the destination network namespace.
4526 err = -EEXIST;
4527 destname = dev->name;
4528 if (__dev_get_by_name(net, destname)) {
4529 /* We get here if we can't use the current device name */
4530 if (!pat)
4531 goto out;
4532 if (!dev_valid_name(pat))
4533 goto out;
4534 if (strchr(pat, '%')) {
4535 if (__dev_alloc_name(net, pat, buf) < 0)
4536 goto out;
4537 destname = buf;
4538 } else
4539 destname = pat;
4540 if (__dev_get_by_name(net, destname))
4541 goto out;
4545 * And now a mini version of register_netdevice unregister_netdevice.
4548 /* If device is running close it first. */
4549 dev_close(dev);
4551 /* And unlink it from device chain */
4552 err = -ENODEV;
4553 unlist_netdevice(dev);
4555 synchronize_net();
4557 /* Shutdown queueing discipline. */
4558 dev_shutdown(dev);
4560 /* Notify protocols, that we are about to destroy
4561 this device. They should clean all the things.
4563 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4566 * Flush the unicast and multicast chains
4568 dev_addr_discard(dev);
4570 netdev_unregister_kobject(dev);
4572 /* Actually switch the network namespace */
4573 dev_net_set(dev, net);
4575 /* Assign the new device name */
4576 if (destname != dev->name)
4577 strcpy(dev->name, destname);
4579 /* If there is an ifindex conflict assign a new one */
4580 if (__dev_get_by_index(net, dev->ifindex)) {
4581 int iflink = (dev->iflink == dev->ifindex);
4582 dev->ifindex = dev_new_index(net);
4583 if (iflink)
4584 dev->iflink = dev->ifindex;
4587 /* Fixup kobjects */
4588 err = netdev_register_kobject(dev);
4589 WARN_ON(err);
4591 /* Add the device back in the hashes */
4592 list_netdevice(dev);
4594 /* Notify protocols, that a new device appeared. */
4595 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4597 synchronize_net();
4598 err = 0;
4599 out:
4600 return err;
4603 static int dev_cpu_callback(struct notifier_block *nfb,
4604 unsigned long action,
4605 void *ocpu)
4607 struct sk_buff **list_skb;
4608 struct Qdisc **list_net;
4609 struct sk_buff *skb;
4610 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4611 struct softnet_data *sd, *oldsd;
4613 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4614 return NOTIFY_OK;
4616 local_irq_disable();
4617 cpu = smp_processor_id();
4618 sd = &per_cpu(softnet_data, cpu);
4619 oldsd = &per_cpu(softnet_data, oldcpu);
4621 /* Find end of our completion_queue. */
4622 list_skb = &sd->completion_queue;
4623 while (*list_skb)
4624 list_skb = &(*list_skb)->next;
4625 /* Append completion queue from offline CPU. */
4626 *list_skb = oldsd->completion_queue;
4627 oldsd->completion_queue = NULL;
4629 /* Find end of our output_queue. */
4630 list_net = &sd->output_queue;
4631 while (*list_net)
4632 list_net = &(*list_net)->next_sched;
4633 /* Append output queue from offline CPU. */
4634 *list_net = oldsd->output_queue;
4635 oldsd->output_queue = NULL;
4637 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4638 local_irq_enable();
4640 /* Process offline CPU's input_pkt_queue */
4641 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4642 netif_rx(skb);
4644 return NOTIFY_OK;
4647 #ifdef CONFIG_NET_DMA
4649 * net_dma_rebalance - try to maintain one DMA channel per CPU
4650 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4652 * This is called when the number of channels allocated to the net_dma client
4653 * changes. The net_dma client tries to have one DMA channel per CPU.
4656 static void net_dma_rebalance(struct net_dma *net_dma)
4658 unsigned int cpu, i, n, chan_idx;
4659 struct dma_chan *chan;
4661 if (cpus_empty(net_dma->channel_mask)) {
4662 for_each_online_cpu(cpu)
4663 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4664 return;
4667 i = 0;
4668 cpu = first_cpu(cpu_online_map);
4670 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4671 chan = net_dma->channels[chan_idx];
4673 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4674 + (i < (num_online_cpus() %
4675 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4677 while(n) {
4678 per_cpu(softnet_data, cpu).net_dma = chan;
4679 cpu = next_cpu(cpu, cpu_online_map);
4680 n--;
4682 i++;
4687 * netdev_dma_event - event callback for the net_dma_client
4688 * @client: should always be net_dma_client
4689 * @chan: DMA channel for the event
4690 * @state: DMA state to be handled
4692 static enum dma_state_client
4693 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4694 enum dma_state state)
4696 int i, found = 0, pos = -1;
4697 struct net_dma *net_dma =
4698 container_of(client, struct net_dma, client);
4699 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4701 spin_lock(&net_dma->lock);
4702 switch (state) {
4703 case DMA_RESOURCE_AVAILABLE:
4704 for (i = 0; i < nr_cpu_ids; i++)
4705 if (net_dma->channels[i] == chan) {
4706 found = 1;
4707 break;
4708 } else if (net_dma->channels[i] == NULL && pos < 0)
4709 pos = i;
4711 if (!found && pos >= 0) {
4712 ack = DMA_ACK;
4713 net_dma->channels[pos] = chan;
4714 cpu_set(pos, net_dma->channel_mask);
4715 net_dma_rebalance(net_dma);
4717 break;
4718 case DMA_RESOURCE_REMOVED:
4719 for (i = 0; i < nr_cpu_ids; i++)
4720 if (net_dma->channels[i] == chan) {
4721 found = 1;
4722 pos = i;
4723 break;
4726 if (found) {
4727 ack = DMA_ACK;
4728 cpu_clear(pos, net_dma->channel_mask);
4729 net_dma->channels[i] = NULL;
4730 net_dma_rebalance(net_dma);
4732 break;
4733 default:
4734 break;
4736 spin_unlock(&net_dma->lock);
4738 return ack;
4742 * netdev_dma_register - register the networking subsystem as a DMA client
4744 static int __init netdev_dma_register(void)
4746 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4747 GFP_KERNEL);
4748 if (unlikely(!net_dma.channels)) {
4749 printk(KERN_NOTICE
4750 "netdev_dma: no memory for net_dma.channels\n");
4751 return -ENOMEM;
4753 spin_lock_init(&net_dma.lock);
4754 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4755 dma_async_client_register(&net_dma.client);
4756 dma_async_client_chan_request(&net_dma.client);
4757 return 0;
4760 #else
4761 static int __init netdev_dma_register(void) { return -ENODEV; }
4762 #endif /* CONFIG_NET_DMA */
4765 * netdev_increment_features - increment feature set by one
4766 * @all: current feature set
4767 * @one: new feature set
4768 * @mask: mask feature set
4770 * Computes a new feature set after adding a device with feature set
4771 * @one to the master device with current feature set @all. Will not
4772 * enable anything that is off in @mask. Returns the new feature set.
4774 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4775 unsigned long mask)
4777 /* If device needs checksumming, downgrade to it. */
4778 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4779 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4780 else if (mask & NETIF_F_ALL_CSUM) {
4781 /* If one device supports v4/v6 checksumming, set for all. */
4782 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4783 !(all & NETIF_F_GEN_CSUM)) {
4784 all &= ~NETIF_F_ALL_CSUM;
4785 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4788 /* If one device supports hw checksumming, set for all. */
4789 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4790 all &= ~NETIF_F_ALL_CSUM;
4791 all |= NETIF_F_HW_CSUM;
4795 one |= NETIF_F_ALL_CSUM;
4797 one |= all & NETIF_F_ONE_FOR_ALL;
4798 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4799 all |= one & mask & NETIF_F_ONE_FOR_ALL;
4801 return all;
4803 EXPORT_SYMBOL(netdev_increment_features);
4805 static struct hlist_head *netdev_create_hash(void)
4807 int i;
4808 struct hlist_head *hash;
4810 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4811 if (hash != NULL)
4812 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4813 INIT_HLIST_HEAD(&hash[i]);
4815 return hash;
4818 /* Initialize per network namespace state */
4819 static int __net_init netdev_init(struct net *net)
4821 INIT_LIST_HEAD(&net->dev_base_head);
4823 net->dev_name_head = netdev_create_hash();
4824 if (net->dev_name_head == NULL)
4825 goto err_name;
4827 net->dev_index_head = netdev_create_hash();
4828 if (net->dev_index_head == NULL)
4829 goto err_idx;
4831 return 0;
4833 err_idx:
4834 kfree(net->dev_name_head);
4835 err_name:
4836 return -ENOMEM;
4840 * netdev_drivername - network driver for the device
4841 * @dev: network device
4842 * @buffer: buffer for resulting name
4843 * @len: size of buffer
4845 * Determine network driver for device.
4847 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4849 const struct device_driver *driver;
4850 const struct device *parent;
4852 if (len <= 0 || !buffer)
4853 return buffer;
4854 buffer[0] = 0;
4856 parent = dev->dev.parent;
4858 if (!parent)
4859 return buffer;
4861 driver = parent->driver;
4862 if (driver && driver->name)
4863 strlcpy(buffer, driver->name, len);
4864 return buffer;
4867 static void __net_exit netdev_exit(struct net *net)
4869 kfree(net->dev_name_head);
4870 kfree(net->dev_index_head);
4873 static struct pernet_operations __net_initdata netdev_net_ops = {
4874 .init = netdev_init,
4875 .exit = netdev_exit,
4878 static void __net_exit default_device_exit(struct net *net)
4880 struct net_device *dev, *next;
4882 * Push all migratable of the network devices back to the
4883 * initial network namespace
4885 rtnl_lock();
4886 for_each_netdev_safe(net, dev, next) {
4887 int err;
4888 char fb_name[IFNAMSIZ];
4890 /* Ignore unmoveable devices (i.e. loopback) */
4891 if (dev->features & NETIF_F_NETNS_LOCAL)
4892 continue;
4894 /* Delete virtual devices */
4895 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4896 dev->rtnl_link_ops->dellink(dev);
4897 continue;
4900 /* Push remaing network devices to init_net */
4901 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4902 err = dev_change_net_namespace(dev, &init_net, fb_name);
4903 if (err) {
4904 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4905 __func__, dev->name, err);
4906 BUG();
4909 rtnl_unlock();
4912 static struct pernet_operations __net_initdata default_device_ops = {
4913 .exit = default_device_exit,
4917 * Initialize the DEV module. At boot time this walks the device list and
4918 * unhooks any devices that fail to initialise (normally hardware not
4919 * present) and leaves us with a valid list of present and active devices.
4924 * This is called single threaded during boot, so no need
4925 * to take the rtnl semaphore.
4927 static int __init net_dev_init(void)
4929 int i, rc = -ENOMEM;
4931 BUG_ON(!dev_boot_phase);
4933 if (dev_proc_init())
4934 goto out;
4936 if (netdev_kobject_init())
4937 goto out;
4939 INIT_LIST_HEAD(&ptype_all);
4940 for (i = 0; i < PTYPE_HASH_SIZE; i++)
4941 INIT_LIST_HEAD(&ptype_base[i]);
4943 if (register_pernet_subsys(&netdev_net_ops))
4944 goto out;
4947 * Initialise the packet receive queues.
4950 for_each_possible_cpu(i) {
4951 struct softnet_data *queue;
4953 queue = &per_cpu(softnet_data, i);
4954 skb_queue_head_init(&queue->input_pkt_queue);
4955 queue->completion_queue = NULL;
4956 INIT_LIST_HEAD(&queue->poll_list);
4958 queue->backlog.poll = process_backlog;
4959 queue->backlog.weight = weight_p;
4962 dev_boot_phase = 0;
4964 /* The loopback device is special if any other network devices
4965 * is present in a network namespace the loopback device must
4966 * be present. Since we now dynamically allocate and free the
4967 * loopback device ensure this invariant is maintained by
4968 * keeping the loopback device as the first device on the
4969 * list of network devices. Ensuring the loopback devices
4970 * is the first device that appears and the last network device
4971 * that disappears.
4973 if (register_pernet_device(&loopback_net_ops))
4974 goto out;
4976 if (register_pernet_device(&default_device_ops))
4977 goto out;
4979 netdev_dma_register();
4981 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4982 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4984 hotcpu_notifier(dev_cpu_callback, 0);
4985 dst_init();
4986 dev_mcast_init();
4987 rc = 0;
4988 out:
4989 return rc;
4992 subsys_initcall(net_dev_init);
4994 EXPORT_SYMBOL(__dev_get_by_index);
4995 EXPORT_SYMBOL(__dev_get_by_name);
4996 EXPORT_SYMBOL(__dev_remove_pack);
4997 EXPORT_SYMBOL(dev_valid_name);
4998 EXPORT_SYMBOL(dev_add_pack);
4999 EXPORT_SYMBOL(dev_alloc_name);
5000 EXPORT_SYMBOL(dev_close);
5001 EXPORT_SYMBOL(dev_get_by_flags);
5002 EXPORT_SYMBOL(dev_get_by_index);
5003 EXPORT_SYMBOL(dev_get_by_name);
5004 EXPORT_SYMBOL(dev_open);
5005 EXPORT_SYMBOL(dev_queue_xmit);
5006 EXPORT_SYMBOL(dev_remove_pack);
5007 EXPORT_SYMBOL(dev_set_allmulti);
5008 EXPORT_SYMBOL(dev_set_promiscuity);
5009 EXPORT_SYMBOL(dev_change_flags);
5010 EXPORT_SYMBOL(dev_set_mtu);
5011 EXPORT_SYMBOL(dev_set_mac_address);
5012 EXPORT_SYMBOL(free_netdev);
5013 EXPORT_SYMBOL(netdev_boot_setup_check);
5014 EXPORT_SYMBOL(netdev_set_master);
5015 EXPORT_SYMBOL(netdev_state_change);
5016 EXPORT_SYMBOL(netif_receive_skb);
5017 EXPORT_SYMBOL(netif_rx);
5018 EXPORT_SYMBOL(register_gifconf);
5019 EXPORT_SYMBOL(register_netdevice);
5020 EXPORT_SYMBOL(register_netdevice_notifier);
5021 EXPORT_SYMBOL(skb_checksum_help);
5022 EXPORT_SYMBOL(synchronize_net);
5023 EXPORT_SYMBOL(unregister_netdevice);
5024 EXPORT_SYMBOL(unregister_netdevice_notifier);
5025 EXPORT_SYMBOL(net_enable_timestamp);
5026 EXPORT_SYMBOL(net_disable_timestamp);
5027 EXPORT_SYMBOL(dev_get_flags);
5029 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5030 EXPORT_SYMBOL(br_handle_frame_hook);
5031 EXPORT_SYMBOL(br_fdb_get_hook);
5032 EXPORT_SYMBOL(br_fdb_put_hook);
5033 #endif
5035 EXPORT_SYMBOL(dev_load);
5037 EXPORT_PER_CPU_SYMBOL(softnet_data);