Linux 2.4.0-test7-pre6
[davej-history.git] / net / sched / sch_generic.c
blobc80e6626de7f75ac43184aed0c7c26e6091b4d72
1 /*
2 * net/sched/sch_generic.c Generic packet scheduler routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Jamal Hadi Salim, <hadi@nortelnetworks.com> 990601
11 * - Ingress support
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <asm/bitops.h>
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
32 #include <net/sock.h>
33 #include <net/pkt_sched.h>
35 /* Main transmission queue. */
37 /* Main qdisc structure lock.
39 However, modifications
40 to data, participating in scheduling must be additionally
41 protected with dev->queue_lock spinlock.
43 The idea is the following:
44 - enqueue, dequeue are serialized via top level device
45 spinlock dev->queue_lock.
46 - tree walking is protected by read_lock(qdisc_tree_lock)
47 and this lock is used only in process context.
48 - updates to tree are made only under rtnl semaphore,
49 hence this lock may be made without local bh disabling.
51 qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
53 rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED;
55 /*
56 dev->queue_lock serializes queue accesses for this device
57 AND dev->qdisc pointer itself.
59 dev->xmit_lock serializes accesses to device driver.
61 dev->queue_lock and dev->xmit_lock are mutually exclusive,
62 if one is grabbed, another must be free.
66 /* Kick device.
67 Note, that this procedure can be called by a watchdog timer, so that
68 we do not check dev->tbusy flag here.
70 Returns: 0 - queue is empty.
71 >0 - queue is not empty, but throttled.
72 <0 - queue is not empty. Device is throttled, if dev->tbusy != 0.
74 NOTE: Called under dev->queue_lock with locally disabled BH.
77 int qdisc_restart(struct net_device *dev)
79 struct Qdisc *q = dev->qdisc;
80 struct sk_buff *skb;
82 /* Dequeue packet */
83 if ((skb = q->dequeue(q)) != NULL) {
84 if (spin_trylock(&dev->xmit_lock)) {
85 /* Remember that the driver is grabbed by us. */
86 dev->xmit_lock_owner = smp_processor_id();
88 /* And release queue */
89 spin_unlock(&dev->queue_lock);
91 if (!netif_queue_stopped(dev)) {
92 if (netdev_nit)
93 dev_queue_xmit_nit(skb, dev);
95 if (dev->hard_start_xmit(skb, dev) == 0) {
96 dev->xmit_lock_owner = -1;
97 spin_unlock(&dev->xmit_lock);
99 spin_lock(&dev->queue_lock);
100 return -1;
104 /* Release the driver */
105 dev->xmit_lock_owner = -1;
106 spin_unlock(&dev->xmit_lock);
107 spin_lock(&dev->queue_lock);
108 q = dev->qdisc;
109 } else {
110 /* So, someone grabbed the driver. */
112 /* It may be transient configuration error,
113 when hard_start_xmit() recurses. We detect
114 it by checking xmit owner and drop the
115 packet when deadloop is detected.
117 if (dev->xmit_lock_owner == smp_processor_id()) {
118 kfree_skb(skb);
119 if (net_ratelimit())
120 printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
121 return -1;
123 netdev_rx_stat[smp_processor_id()].cpu_collision++;
126 /* Device kicked us out :(
127 This is possible in three cases:
129 0. driver is locked
130 1. fastroute is enabled
131 2. device cannot determine busy state
132 before start of transmission (f.e. dialout)
133 3. device is buggy (ppp)
136 q->ops->requeue(skb, q);
137 netif_schedule(dev);
138 return 1;
140 return q->q.qlen;
143 static void dev_watchdog(unsigned long arg)
145 struct net_device *dev = (struct net_device *)arg;
147 spin_lock(&dev->xmit_lock);
148 if (dev->qdisc != &noop_qdisc) {
149 if (netif_device_present(dev) &&
150 netif_running(dev) &&
151 netif_carrier_ok(dev)) {
152 if (netif_queue_stopped(dev) &&
153 (jiffies - dev->trans_start) > dev->watchdog_timeo) {
154 printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
155 dev->tx_timeout(dev);
157 if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
158 dev_hold(dev);
161 spin_unlock(&dev->xmit_lock);
163 dev_put(dev);
166 static void dev_watchdog_init(struct net_device *dev)
168 init_timer(&dev->watchdog_timer);
169 dev->watchdog_timer.data = (unsigned long)dev;
170 dev->watchdog_timer.function = dev_watchdog;
173 void __netdev_watchdog_up(struct net_device *dev)
175 if (dev->tx_timeout) {
176 if (dev->watchdog_timeo <= 0)
177 dev->watchdog_timeo = 5*HZ;
178 if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
179 dev_hold(dev);
183 static void dev_watchdog_up(struct net_device *dev)
185 spin_lock_bh(&dev->xmit_lock);
186 __netdev_watchdog_up(dev);
187 spin_unlock_bh(&dev->xmit_lock);
190 static void dev_watchdog_down(struct net_device *dev)
192 spin_lock_bh(&dev->xmit_lock);
193 if (del_timer(&dev->watchdog_timer))
194 __dev_put(dev);
195 spin_unlock_bh(&dev->xmit_lock);
198 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
199 under all circumstances. It is difficult to invent anything faster or
200 cheaper.
203 static int
204 noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
206 kfree_skb(skb);
207 return NET_XMIT_CN;
210 static struct sk_buff *
211 noop_dequeue(struct Qdisc * qdisc)
213 return NULL;
216 static int
217 noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
219 if (net_ratelimit())
220 printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
221 kfree_skb(skb);
222 return NET_XMIT_CN;
225 struct Qdisc_ops noop_qdisc_ops =
227 NULL,
228 NULL,
229 "noop",
232 noop_enqueue,
233 noop_dequeue,
234 noop_requeue,
237 struct Qdisc noop_qdisc =
239 noop_enqueue,
240 noop_dequeue,
241 TCQ_F_BUILTIN,
242 &noop_qdisc_ops,
246 struct Qdisc_ops noqueue_qdisc_ops =
248 NULL,
249 NULL,
250 "noqueue",
253 noop_enqueue,
254 noop_dequeue,
255 noop_requeue,
259 struct Qdisc noqueue_qdisc =
261 NULL,
262 noop_dequeue,
263 TCQ_F_BUILTIN,
264 &noqueue_qdisc_ops,
268 static const u8 prio2band[TC_PRIO_MAX+1] =
269 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
271 /* 3-band FIFO queue: old style, but should be a bit faster than
272 generic prio+fifo combination.
275 static int
276 pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
278 struct sk_buff_head *list;
280 list = ((struct sk_buff_head*)qdisc->data) +
281 prio2band[skb->priority&TC_PRIO_MAX];
283 if (list->qlen <= skb->dev->tx_queue_len) {
284 __skb_queue_tail(list, skb);
285 qdisc->q.qlen++;
286 return 0;
288 qdisc->stats.drops++;
289 kfree_skb(skb);
290 return NET_XMIT_DROP;
293 static struct sk_buff *
294 pfifo_fast_dequeue(struct Qdisc* qdisc)
296 int prio;
297 struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
298 struct sk_buff *skb;
300 for (prio = 0; prio < 3; prio++, list++) {
301 skb = __skb_dequeue(list);
302 if (skb) {
303 qdisc->q.qlen--;
304 return skb;
307 return NULL;
310 static int
311 pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
313 struct sk_buff_head *list;
315 list = ((struct sk_buff_head*)qdisc->data) +
316 prio2band[skb->priority&TC_PRIO_MAX];
318 __skb_queue_head(list, skb);
319 qdisc->q.qlen++;
320 return 0;
323 static void
324 pfifo_fast_reset(struct Qdisc* qdisc)
326 int prio;
327 struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
329 for (prio=0; prio < 3; prio++)
330 skb_queue_purge(list+prio);
331 qdisc->q.qlen = 0;
334 static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
336 int i;
337 struct sk_buff_head *list;
339 list = ((struct sk_buff_head*)qdisc->data);
341 for (i=0; i<3; i++)
342 skb_queue_head_init(list+i);
344 return 0;
347 static struct Qdisc_ops pfifo_fast_ops =
349 NULL,
350 NULL,
351 "pfifo_fast",
352 3 * sizeof(struct sk_buff_head),
354 pfifo_fast_enqueue,
355 pfifo_fast_dequeue,
356 pfifo_fast_requeue,
357 NULL,
359 pfifo_fast_init,
360 pfifo_fast_reset,
363 struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
365 struct Qdisc *sch;
366 int size = sizeof(*sch) + ops->priv_size;
368 sch = kmalloc(size, GFP_KERNEL);
369 if (!sch)
370 return NULL;
371 memset(sch, 0, size);
373 skb_queue_head_init(&sch->q);
374 sch->ops = ops;
375 sch->enqueue = ops->enqueue;
376 sch->dequeue = ops->dequeue;
377 sch->dev = dev;
378 sch->stats.lock = &dev->queue_lock;
379 atomic_set(&sch->refcnt, 1);
380 if (!ops->init || ops->init(sch, NULL) == 0)
381 return sch;
383 kfree(sch);
384 return NULL;
387 /* Under dev->queue_lock and BH! */
389 void qdisc_reset(struct Qdisc *qdisc)
391 struct Qdisc_ops *ops = qdisc->ops;
393 if (ops->reset)
394 ops->reset(qdisc);
397 /* Under dev->queue_lock and BH! */
399 void qdisc_destroy(struct Qdisc *qdisc)
401 struct Qdisc_ops *ops = qdisc->ops;
402 struct net_device *dev;
404 if (!atomic_dec_and_test(&qdisc->refcnt))
405 return;
407 dev = qdisc->dev;
409 #ifdef CONFIG_NET_SCHED
410 if (dev) {
411 struct Qdisc *q, **qp;
412 for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
413 if (q == qdisc) {
414 *qp = q->next;
415 break;
419 #ifdef CONFIG_NET_ESTIMATOR
420 qdisc_kill_estimator(&qdisc->stats);
421 #endif
422 #endif
423 if (ops->reset)
424 ops->reset(qdisc);
425 if (ops->destroy)
426 ops->destroy(qdisc);
427 if (!(qdisc->flags&TCQ_F_BUILTIN))
428 kfree(qdisc);
432 void dev_activate(struct net_device *dev)
434 /* No queueing discipline is attached to device;
435 create default one i.e. pfifo_fast for devices,
436 which need queueing and noqueue_qdisc for
437 virtual interfaces
440 if (dev->qdisc_sleeping == &noop_qdisc) {
441 struct Qdisc *qdisc;
442 if (dev->tx_queue_len) {
443 qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
444 if (qdisc == NULL) {
445 printk(KERN_INFO "%s: activation failed\n", dev->name);
446 return;
448 } else {
449 qdisc = &noqueue_qdisc;
451 write_lock(&qdisc_tree_lock);
452 dev->qdisc_sleeping = qdisc;
453 write_unlock(&qdisc_tree_lock);
456 spin_lock_bh(&dev->queue_lock);
457 if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) {
458 dev->trans_start = jiffies;
459 dev_watchdog_up(dev);
461 spin_unlock_bh(&dev->queue_lock);
464 void dev_deactivate(struct net_device *dev)
466 struct Qdisc *qdisc;
468 spin_lock_bh(&dev->queue_lock);
469 qdisc = dev->qdisc;
470 dev->qdisc = &noop_qdisc;
472 qdisc_reset(qdisc);
474 spin_unlock_bh(&dev->queue_lock);
476 dev_watchdog_down(dev);
478 while (test_bit(__LINK_STATE_SCHED, &dev->state)) {
479 current->policy |= SCHED_YIELD;
480 schedule();
483 spin_unlock_wait(&dev->xmit_lock);
486 void dev_init_scheduler(struct net_device *dev)
488 write_lock(&qdisc_tree_lock);
489 spin_lock_bh(&dev->queue_lock);
490 dev->qdisc = &noop_qdisc;
491 spin_unlock_bh(&dev->queue_lock);
492 dev->qdisc_sleeping = &noop_qdisc;
493 dev->qdisc_list = NULL;
494 write_unlock(&qdisc_tree_lock);
496 dev_watchdog_init(dev);
499 void dev_shutdown(struct net_device *dev)
501 struct Qdisc *qdisc;
503 write_lock(&qdisc_tree_lock);
504 spin_lock_bh(&dev->queue_lock);
505 qdisc = dev->qdisc_sleeping;
506 dev->qdisc = &noop_qdisc;
507 dev->qdisc_sleeping = &noop_qdisc;
508 qdisc_destroy(qdisc);
509 #if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
510 if ((qdisc = dev->qdisc_ingress) != NULL) {
511 dev->qdisc_ingress = NULL;
512 qdisc_destroy(qdisc);
514 #endif
515 BUG_TRAP(dev->qdisc_list == NULL);
516 BUG_TRAP(!timer_pending(&dev->watchdog_timer));
517 dev->qdisc_list = NULL;
518 spin_unlock_bh(&dev->queue_lock);
519 write_unlock(&qdisc_tree_lock);