2 * net/sched/sch_generic.c Generic packet scheduler routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Jamal Hadi Salim, <hadi@nortelnetworks.com> 990601
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <asm/bitops.h>
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
33 #include <net/pkt_sched.h>
35 /* Main transmission queue. */
37 /* Main qdisc structure lock.
39 However, modifications
40 to data, participating in scheduling must be additionally
41 protected with dev->queue_lock spinlock.
43 The idea is the following:
44 - enqueue, dequeue are serialized via top level device
45 spinlock dev->queue_lock.
46 - tree walking is protected by read_lock(qdisc_tree_lock)
47 and this lock is used only in process context.
48 - updates to tree are made only under rtnl semaphore,
49 hence this lock may be made without local bh disabling.
51 qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
53 rwlock_t qdisc_tree_lock
= RW_LOCK_UNLOCKED
;
56 dev->queue_lock serializes queue accesses for this device
57 AND dev->qdisc pointer itself.
59 dev->xmit_lock serializes accesses to device driver.
61 dev->queue_lock and dev->xmit_lock are mutually exclusive,
62 if one is grabbed, another must be free.
67 Note, that this procedure can be called by a watchdog timer, so that
68 we do not check dev->tbusy flag here.
70 Returns: 0 - queue is empty.
71 >0 - queue is not empty, but throttled.
72 <0 - queue is not empty. Device is throttled, if dev->tbusy != 0.
74 NOTE: Called under dev->queue_lock with locally disabled BH.
77 int qdisc_restart(struct net_device
*dev
)
79 struct Qdisc
*q
= dev
->qdisc
;
83 if ((skb
= q
->dequeue(q
)) != NULL
) {
84 if (spin_trylock(&dev
->xmit_lock
)) {
85 /* Remember that the driver is grabbed by us. */
86 dev
->xmit_lock_owner
= smp_processor_id();
88 /* And release queue */
89 spin_unlock(&dev
->queue_lock
);
91 if (!netif_queue_stopped(dev
)) {
93 dev_queue_xmit_nit(skb
, dev
);
95 if (dev
->hard_start_xmit(skb
, dev
) == 0) {
96 dev
->xmit_lock_owner
= -1;
97 spin_unlock(&dev
->xmit_lock
);
99 spin_lock(&dev
->queue_lock
);
104 /* Release the driver */
105 dev
->xmit_lock_owner
= -1;
106 spin_unlock(&dev
->xmit_lock
);
107 spin_lock(&dev
->queue_lock
);
110 /* So, someone grabbed the driver. */
112 /* It may be transient configuration error,
113 when hard_start_xmit() recurses. We detect
114 it by checking xmit owner and drop the
115 packet when deadloop is detected.
117 if (dev
->xmit_lock_owner
== smp_processor_id()) {
120 printk(KERN_DEBUG
"Dead loop on netdevice %s, fix it urgently!\n", dev
->name
);
123 netdev_rx_stat
[smp_processor_id()].cpu_collision
++;
126 /* Device kicked us out :(
127 This is possible in three cases:
130 1. fastroute is enabled
131 2. device cannot determine busy state
132 before start of transmission (f.e. dialout)
133 3. device is buggy (ppp)
136 q
->ops
->requeue(skb
, q
);
143 static void dev_watchdog(unsigned long arg
)
145 struct net_device
*dev
= (struct net_device
*)arg
;
147 spin_lock(&dev
->xmit_lock
);
148 if (dev
->qdisc
!= &noop_qdisc
) {
149 if (netif_device_present(dev
) &&
150 netif_running(dev
) &&
151 netif_carrier_ok(dev
)) {
152 if (netif_queue_stopped(dev
) &&
153 (jiffies
- dev
->trans_start
) > dev
->watchdog_timeo
) {
154 printk(KERN_INFO
"NETDEV WATCHDOG: %s: transmit timed out\n", dev
->name
);
155 dev
->tx_timeout(dev
);
157 if (!mod_timer(&dev
->watchdog_timer
, jiffies
+ dev
->watchdog_timeo
))
161 spin_unlock(&dev
->xmit_lock
);
166 static void dev_watchdog_init(struct net_device
*dev
)
168 init_timer(&dev
->watchdog_timer
);
169 dev
->watchdog_timer
.data
= (unsigned long)dev
;
170 dev
->watchdog_timer
.function
= dev_watchdog
;
173 void __netdev_watchdog_up(struct net_device
*dev
)
175 if (dev
->tx_timeout
) {
176 if (dev
->watchdog_timeo
<= 0)
177 dev
->watchdog_timeo
= 5*HZ
;
178 if (!mod_timer(&dev
->watchdog_timer
, jiffies
+ dev
->watchdog_timeo
))
183 static void dev_watchdog_up(struct net_device
*dev
)
185 spin_lock_bh(&dev
->xmit_lock
);
186 __netdev_watchdog_up(dev
);
187 spin_unlock_bh(&dev
->xmit_lock
);
190 static void dev_watchdog_down(struct net_device
*dev
)
192 spin_lock_bh(&dev
->xmit_lock
);
193 if (del_timer(&dev
->watchdog_timer
))
195 spin_unlock_bh(&dev
->xmit_lock
);
198 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
199 under all circumstances. It is difficult to invent anything faster or
204 noop_enqueue(struct sk_buff
*skb
, struct Qdisc
* qdisc
)
210 static struct sk_buff
*
211 noop_dequeue(struct Qdisc
* qdisc
)
217 noop_requeue(struct sk_buff
*skb
, struct Qdisc
* qdisc
)
220 printk(KERN_DEBUG
"%s deferred output. It is buggy.\n", skb
->dev
->name
);
225 struct Qdisc_ops noop_qdisc_ops
=
237 struct Qdisc noop_qdisc
=
246 struct Qdisc_ops noqueue_qdisc_ops
=
259 struct Qdisc noqueue_qdisc
=
268 static const u8 prio2band
[TC_PRIO_MAX
+1] =
269 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
271 /* 3-band FIFO queue: old style, but should be a bit faster than
272 generic prio+fifo combination.
276 pfifo_fast_enqueue(struct sk_buff
*skb
, struct Qdisc
* qdisc
)
278 struct sk_buff_head
*list
;
280 list
= ((struct sk_buff_head
*)qdisc
->data
) +
281 prio2band
[skb
->priority
&TC_PRIO_MAX
];
283 if (list
->qlen
<= skb
->dev
->tx_queue_len
) {
284 __skb_queue_tail(list
, skb
);
288 qdisc
->stats
.drops
++;
290 return NET_XMIT_DROP
;
293 static struct sk_buff
*
294 pfifo_fast_dequeue(struct Qdisc
* qdisc
)
297 struct sk_buff_head
*list
= ((struct sk_buff_head
*)qdisc
->data
);
300 for (prio
= 0; prio
< 3; prio
++, list
++) {
301 skb
= __skb_dequeue(list
);
311 pfifo_fast_requeue(struct sk_buff
*skb
, struct Qdisc
* qdisc
)
313 struct sk_buff_head
*list
;
315 list
= ((struct sk_buff_head
*)qdisc
->data
) +
316 prio2band
[skb
->priority
&TC_PRIO_MAX
];
318 __skb_queue_head(list
, skb
);
324 pfifo_fast_reset(struct Qdisc
* qdisc
)
327 struct sk_buff_head
*list
= ((struct sk_buff_head
*)qdisc
->data
);
329 for (prio
=0; prio
< 3; prio
++)
330 skb_queue_purge(list
+prio
);
334 static int pfifo_fast_init(struct Qdisc
*qdisc
, struct rtattr
*opt
)
337 struct sk_buff_head
*list
;
339 list
= ((struct sk_buff_head
*)qdisc
->data
);
342 skb_queue_head_init(list
+i
);
347 static struct Qdisc_ops pfifo_fast_ops
=
352 3 * sizeof(struct sk_buff_head
),
363 struct Qdisc
* qdisc_create_dflt(struct net_device
*dev
, struct Qdisc_ops
*ops
)
366 int size
= sizeof(*sch
) + ops
->priv_size
;
368 sch
= kmalloc(size
, GFP_KERNEL
);
371 memset(sch
, 0, size
);
373 skb_queue_head_init(&sch
->q
);
375 sch
->enqueue
= ops
->enqueue
;
376 sch
->dequeue
= ops
->dequeue
;
378 sch
->stats
.lock
= &dev
->queue_lock
;
379 atomic_set(&sch
->refcnt
, 1);
380 if (!ops
->init
|| ops
->init(sch
, NULL
) == 0)
387 /* Under dev->queue_lock and BH! */
389 void qdisc_reset(struct Qdisc
*qdisc
)
391 struct Qdisc_ops
*ops
= qdisc
->ops
;
397 /* Under dev->queue_lock and BH! */
399 void qdisc_destroy(struct Qdisc
*qdisc
)
401 struct Qdisc_ops
*ops
= qdisc
->ops
;
402 struct net_device
*dev
;
404 if (!atomic_dec_and_test(&qdisc
->refcnt
))
409 #ifdef CONFIG_NET_SCHED
411 struct Qdisc
*q
, **qp
;
412 for (qp
= &qdisc
->dev
->qdisc_list
; (q
=*qp
) != NULL
; qp
= &q
->next
) {
419 #ifdef CONFIG_NET_ESTIMATOR
420 qdisc_kill_estimator(&qdisc
->stats
);
427 if (!(qdisc
->flags
&TCQ_F_BUILTIN
))
432 void dev_activate(struct net_device
*dev
)
434 /* No queueing discipline is attached to device;
435 create default one i.e. pfifo_fast for devices,
436 which need queueing and noqueue_qdisc for
440 if (dev
->qdisc_sleeping
== &noop_qdisc
) {
442 if (dev
->tx_queue_len
) {
443 qdisc
= qdisc_create_dflt(dev
, &pfifo_fast_ops
);
445 printk(KERN_INFO
"%s: activation failed\n", dev
->name
);
449 qdisc
= &noqueue_qdisc
;
451 write_lock(&qdisc_tree_lock
);
452 dev
->qdisc_sleeping
= qdisc
;
453 write_unlock(&qdisc_tree_lock
);
456 spin_lock_bh(&dev
->queue_lock
);
457 if ((dev
->qdisc
= dev
->qdisc_sleeping
) != &noqueue_qdisc
) {
458 dev
->trans_start
= jiffies
;
459 dev_watchdog_up(dev
);
461 spin_unlock_bh(&dev
->queue_lock
);
464 void dev_deactivate(struct net_device
*dev
)
468 spin_lock_bh(&dev
->queue_lock
);
470 dev
->qdisc
= &noop_qdisc
;
474 spin_unlock_bh(&dev
->queue_lock
);
476 dev_watchdog_down(dev
);
478 while (test_bit(__LINK_STATE_SCHED
, &dev
->state
)) {
479 current
->policy
|= SCHED_YIELD
;
483 spin_unlock_wait(&dev
->xmit_lock
);
486 void dev_init_scheduler(struct net_device
*dev
)
488 write_lock(&qdisc_tree_lock
);
489 spin_lock_bh(&dev
->queue_lock
);
490 dev
->qdisc
= &noop_qdisc
;
491 spin_unlock_bh(&dev
->queue_lock
);
492 dev
->qdisc_sleeping
= &noop_qdisc
;
493 dev
->qdisc_list
= NULL
;
494 write_unlock(&qdisc_tree_lock
);
496 dev_watchdog_init(dev
);
499 void dev_shutdown(struct net_device
*dev
)
503 write_lock(&qdisc_tree_lock
);
504 spin_lock_bh(&dev
->queue_lock
);
505 qdisc
= dev
->qdisc_sleeping
;
506 dev
->qdisc
= &noop_qdisc
;
507 dev
->qdisc_sleeping
= &noop_qdisc
;
508 qdisc_destroy(qdisc
);
509 #if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
510 if ((qdisc
= dev
->qdisc_ingress
) != NULL
) {
511 dev
->qdisc_ingress
= NULL
;
512 qdisc_destroy(qdisc
);
515 BUG_TRAP(dev
->qdisc_list
== NULL
);
516 BUG_TRAP(!timer_pending(&dev
->watchdog_timer
));
517 dev
->qdisc_list
= NULL
;
518 spin_unlock_bh(&dev
->queue_lock
);
519 write_unlock(&qdisc_tree_lock
);