fddi: convert to new network device ops
[linux-2.6/mini2440.git] / net / sched / sch_api.c
blob1ef25e6ee1f9084bd1dcf6df43766fd8d56fcada
1 /*
2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * Fixes:
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
32 #include <net/net_namespace.h>
33 #include <net/sock.h>
34 #include <net/netlink.h>
35 #include <net/pkt_sched.h>
37 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
44 Short review.
45 -------------
47 This file consists of two interrelated parts:
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
71 All real intelligent work is done inside qdisc modules.
75 Every discipline has two major routines: enqueue and dequeue.
77 ---dequeue
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
86 ---enqueue
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
98 Auxiliary routines:
100 ---peek
102 like dequeue but without removing a packet from the queue
104 ---reset
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
109 ---init
111 initializes newly created qdisc.
113 ---destroy
115 destroys resources allocated by init and during lifetime of qdisc.
117 ---change
119 changes qdisc parameters.
122 /* Protects list of registered TC modules. It is pure SMP lock. */
123 static DEFINE_RWLOCK(qdisc_mod_lock);
126 /************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
131 /* The list of all installed queueing disciplines. */
133 static struct Qdisc_ops *qdisc_base;
135 /* Register/uregister queueing discipline */
137 int register_qdisc(struct Qdisc_ops *qops)
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->peek == NULL) {
150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
160 qops->next = NULL;
161 *qp = qops;
162 rc = 0;
163 out:
164 write_unlock(&qdisc_mod_lock);
165 return rc;
167 EXPORT_SYMBOL(register_qdisc);
169 int unregister_qdisc(struct Qdisc_ops *qops)
171 struct Qdisc_ops *q, **qp;
172 int err = -ENOENT;
174 write_lock(&qdisc_mod_lock);
175 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
176 if (q == qops)
177 break;
178 if (q) {
179 *qp = q->next;
180 q->next = NULL;
181 err = 0;
183 write_unlock(&qdisc_mod_lock);
184 return err;
186 EXPORT_SYMBOL(unregister_qdisc);
188 /* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
192 struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
194 struct Qdisc *q;
196 if (!(root->flags & TCQ_F_BUILTIN) &&
197 root->handle == handle)
198 return root;
200 list_for_each_entry(q, &root->list, list) {
201 if (q->handle == handle)
202 return q;
204 return NULL;
208 * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
209 * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
211 static DEFINE_SPINLOCK(qdisc_list_lock);
213 static void qdisc_list_add(struct Qdisc *q)
215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
216 spin_lock_bh(&qdisc_list_lock);
217 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
218 spin_unlock_bh(&qdisc_list_lock);
222 void qdisc_list_del(struct Qdisc *q)
224 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
225 spin_lock_bh(&qdisc_list_lock);
226 list_del(&q->list);
227 spin_unlock_bh(&qdisc_list_lock);
230 EXPORT_SYMBOL(qdisc_list_del);
232 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
234 unsigned int i;
235 struct Qdisc *q;
237 spin_lock_bh(&qdisc_list_lock);
239 for (i = 0; i < dev->num_tx_queues; i++) {
240 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
241 struct Qdisc *txq_root = txq->qdisc_sleeping;
243 q = qdisc_match_from_root(txq_root, handle);
244 if (q)
245 goto unlock;
248 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
250 unlock:
251 spin_unlock_bh(&qdisc_list_lock);
253 return q;
256 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
258 unsigned long cl;
259 struct Qdisc *leaf;
260 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
262 if (cops == NULL)
263 return NULL;
264 cl = cops->get(p, classid);
266 if (cl == 0)
267 return NULL;
268 leaf = cops->leaf(p, cl);
269 cops->put(p, cl);
270 return leaf;
273 /* Find queueing discipline by name */
275 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
277 struct Qdisc_ops *q = NULL;
279 if (kind) {
280 read_lock(&qdisc_mod_lock);
281 for (q = qdisc_base; q; q = q->next) {
282 if (nla_strcmp(kind, q->id) == 0) {
283 if (!try_module_get(q->owner))
284 q = NULL;
285 break;
288 read_unlock(&qdisc_mod_lock);
290 return q;
293 static struct qdisc_rate_table *qdisc_rtab_list;
295 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
297 struct qdisc_rate_table *rtab;
299 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
300 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
301 rtab->refcnt++;
302 return rtab;
306 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
307 nla_len(tab) != TC_RTAB_SIZE)
308 return NULL;
310 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
311 if (rtab) {
312 rtab->rate = *r;
313 rtab->refcnt = 1;
314 memcpy(rtab->data, nla_data(tab), 1024);
315 rtab->next = qdisc_rtab_list;
316 qdisc_rtab_list = rtab;
318 return rtab;
320 EXPORT_SYMBOL(qdisc_get_rtab);
322 void qdisc_put_rtab(struct qdisc_rate_table *tab)
324 struct qdisc_rate_table *rtab, **rtabp;
326 if (!tab || --tab->refcnt)
327 return;
329 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
330 if (rtab == tab) {
331 *rtabp = rtab->next;
332 kfree(rtab);
333 return;
337 EXPORT_SYMBOL(qdisc_put_rtab);
339 static LIST_HEAD(qdisc_stab_list);
340 static DEFINE_SPINLOCK(qdisc_stab_lock);
342 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
343 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
344 [TCA_STAB_DATA] = { .type = NLA_BINARY },
347 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
349 struct nlattr *tb[TCA_STAB_MAX + 1];
350 struct qdisc_size_table *stab;
351 struct tc_sizespec *s;
352 unsigned int tsize = 0;
353 u16 *tab = NULL;
354 int err;
356 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
357 if (err < 0)
358 return ERR_PTR(err);
359 if (!tb[TCA_STAB_BASE])
360 return ERR_PTR(-EINVAL);
362 s = nla_data(tb[TCA_STAB_BASE]);
364 if (s->tsize > 0) {
365 if (!tb[TCA_STAB_DATA])
366 return ERR_PTR(-EINVAL);
367 tab = nla_data(tb[TCA_STAB_DATA]);
368 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
371 if (!s || tsize != s->tsize || (!tab && tsize > 0))
372 return ERR_PTR(-EINVAL);
374 spin_lock(&qdisc_stab_lock);
376 list_for_each_entry(stab, &qdisc_stab_list, list) {
377 if (memcmp(&stab->szopts, s, sizeof(*s)))
378 continue;
379 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
380 continue;
381 stab->refcnt++;
382 spin_unlock(&qdisc_stab_lock);
383 return stab;
386 spin_unlock(&qdisc_stab_lock);
388 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
389 if (!stab)
390 return ERR_PTR(-ENOMEM);
392 stab->refcnt = 1;
393 stab->szopts = *s;
394 if (tsize > 0)
395 memcpy(stab->data, tab, tsize * sizeof(u16));
397 spin_lock(&qdisc_stab_lock);
398 list_add_tail(&stab->list, &qdisc_stab_list);
399 spin_unlock(&qdisc_stab_lock);
401 return stab;
404 void qdisc_put_stab(struct qdisc_size_table *tab)
406 if (!tab)
407 return;
409 spin_lock(&qdisc_stab_lock);
411 if (--tab->refcnt == 0) {
412 list_del(&tab->list);
413 kfree(tab);
416 spin_unlock(&qdisc_stab_lock);
418 EXPORT_SYMBOL(qdisc_put_stab);
420 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
422 struct nlattr *nest;
424 nest = nla_nest_start(skb, TCA_STAB);
425 if (nest == NULL)
426 goto nla_put_failure;
427 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
428 nla_nest_end(skb, nest);
430 return skb->len;
432 nla_put_failure:
433 return -1;
436 void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
438 int pkt_len, slot;
440 pkt_len = skb->len + stab->szopts.overhead;
441 if (unlikely(!stab->szopts.tsize))
442 goto out;
444 slot = pkt_len + stab->szopts.cell_align;
445 if (unlikely(slot < 0))
446 slot = 0;
448 slot >>= stab->szopts.cell_log;
449 if (likely(slot < stab->szopts.tsize))
450 pkt_len = stab->data[slot];
451 else
452 pkt_len = stab->data[stab->szopts.tsize - 1] *
453 (slot / stab->szopts.tsize) +
454 stab->data[slot % stab->szopts.tsize];
456 pkt_len <<= stab->szopts.size_log;
457 out:
458 if (unlikely(pkt_len < 1))
459 pkt_len = 1;
460 qdisc_skb_cb(skb)->pkt_len = pkt_len;
462 EXPORT_SYMBOL(qdisc_calculate_pkt_len);
464 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
466 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
467 timer);
469 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
470 smp_wmb();
471 __netif_schedule(qdisc_root(wd->qdisc));
473 return HRTIMER_NORESTART;
476 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
478 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
479 wd->timer.function = qdisc_watchdog;
480 wd->qdisc = qdisc;
482 EXPORT_SYMBOL(qdisc_watchdog_init);
484 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
486 ktime_t time;
488 if (test_bit(__QDISC_STATE_DEACTIVATED,
489 &qdisc_root_sleeping(wd->qdisc)->state))
490 return;
492 wd->qdisc->flags |= TCQ_F_THROTTLED;
493 time = ktime_set(0, 0);
494 time = ktime_add_ns(time, PSCHED_US2NS(expires));
495 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
497 EXPORT_SYMBOL(qdisc_watchdog_schedule);
499 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
501 hrtimer_cancel(&wd->timer);
502 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
504 EXPORT_SYMBOL(qdisc_watchdog_cancel);
506 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
508 unsigned int size = n * sizeof(struct hlist_head), i;
509 struct hlist_head *h;
511 if (size <= PAGE_SIZE)
512 h = kmalloc(size, GFP_KERNEL);
513 else
514 h = (struct hlist_head *)
515 __get_free_pages(GFP_KERNEL, get_order(size));
517 if (h != NULL) {
518 for (i = 0; i < n; i++)
519 INIT_HLIST_HEAD(&h[i]);
521 return h;
524 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
526 unsigned int size = n * sizeof(struct hlist_head);
528 if (size <= PAGE_SIZE)
529 kfree(h);
530 else
531 free_pages((unsigned long)h, get_order(size));
534 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
536 struct Qdisc_class_common *cl;
537 struct hlist_node *n, *next;
538 struct hlist_head *nhash, *ohash;
539 unsigned int nsize, nmask, osize;
540 unsigned int i, h;
542 /* Rehash when load factor exceeds 0.75 */
543 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
544 return;
545 nsize = clhash->hashsize * 2;
546 nmask = nsize - 1;
547 nhash = qdisc_class_hash_alloc(nsize);
548 if (nhash == NULL)
549 return;
551 ohash = clhash->hash;
552 osize = clhash->hashsize;
554 sch_tree_lock(sch);
555 for (i = 0; i < osize; i++) {
556 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
557 h = qdisc_class_hash(cl->classid, nmask);
558 hlist_add_head(&cl->hnode, &nhash[h]);
561 clhash->hash = nhash;
562 clhash->hashsize = nsize;
563 clhash->hashmask = nmask;
564 sch_tree_unlock(sch);
566 qdisc_class_hash_free(ohash, osize);
568 EXPORT_SYMBOL(qdisc_class_hash_grow);
570 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
572 unsigned int size = 4;
574 clhash->hash = qdisc_class_hash_alloc(size);
575 if (clhash->hash == NULL)
576 return -ENOMEM;
577 clhash->hashsize = size;
578 clhash->hashmask = size - 1;
579 clhash->hashelems = 0;
580 return 0;
582 EXPORT_SYMBOL(qdisc_class_hash_init);
584 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
586 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
588 EXPORT_SYMBOL(qdisc_class_hash_destroy);
590 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
591 struct Qdisc_class_common *cl)
593 unsigned int h;
595 INIT_HLIST_NODE(&cl->hnode);
596 h = qdisc_class_hash(cl->classid, clhash->hashmask);
597 hlist_add_head(&cl->hnode, &clhash->hash[h]);
598 clhash->hashelems++;
600 EXPORT_SYMBOL(qdisc_class_hash_insert);
602 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
603 struct Qdisc_class_common *cl)
605 hlist_del(&cl->hnode);
606 clhash->hashelems--;
608 EXPORT_SYMBOL(qdisc_class_hash_remove);
610 /* Allocate an unique handle from space managed by kernel */
612 static u32 qdisc_alloc_handle(struct net_device *dev)
614 int i = 0x10000;
615 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
617 do {
618 autohandle += TC_H_MAKE(0x10000U, 0);
619 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
620 autohandle = TC_H_MAKE(0x80000000U, 0);
621 } while (qdisc_lookup(dev, autohandle) && --i > 0);
623 return i>0 ? autohandle : 0;
626 /* Attach toplevel qdisc to device queue. */
628 static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
629 struct Qdisc *qdisc)
631 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
632 spinlock_t *root_lock;
634 root_lock = qdisc_lock(oqdisc);
635 spin_lock_bh(root_lock);
637 /* Prune old scheduler */
638 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
639 qdisc_reset(oqdisc);
641 /* ... and graft new one */
642 if (qdisc == NULL)
643 qdisc = &noop_qdisc;
644 dev_queue->qdisc_sleeping = qdisc;
645 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
647 spin_unlock_bh(root_lock);
649 return oqdisc;
652 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
654 const struct Qdisc_class_ops *cops;
655 unsigned long cl;
656 u32 parentid;
658 if (n == 0)
659 return;
660 while ((parentid = sch->parent)) {
661 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
662 return;
664 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
665 if (sch == NULL) {
666 WARN_ON(parentid != TC_H_ROOT);
667 return;
669 cops = sch->ops->cl_ops;
670 if (cops->qlen_notify) {
671 cl = cops->get(sch, parentid);
672 cops->qlen_notify(sch, cl);
673 cops->put(sch, cl);
675 sch->q.qlen -= n;
678 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
680 static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
681 struct Qdisc *old, struct Qdisc *new)
683 if (new || old)
684 qdisc_notify(skb, n, clid, old, new);
686 if (old)
687 qdisc_destroy(old);
690 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
691 * to device "dev".
693 * When appropriate send a netlink notification using 'skb'
694 * and "n".
696 * On success, destroy old qdisc.
699 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
700 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
701 struct Qdisc *new, struct Qdisc *old)
703 struct Qdisc *q = old;
704 int err = 0;
706 if (parent == NULL) {
707 unsigned int i, num_q, ingress;
709 ingress = 0;
710 num_q = dev->num_tx_queues;
711 if ((q && q->flags & TCQ_F_INGRESS) ||
712 (new && new->flags & TCQ_F_INGRESS)) {
713 num_q = 1;
714 ingress = 1;
717 if (dev->flags & IFF_UP)
718 dev_deactivate(dev);
720 for (i = 0; i < num_q; i++) {
721 struct netdev_queue *dev_queue = &dev->rx_queue;
723 if (!ingress)
724 dev_queue = netdev_get_tx_queue(dev, i);
726 old = dev_graft_qdisc(dev_queue, new);
727 if (new && i > 0)
728 atomic_inc(&new->refcnt);
730 notify_and_destroy(skb, n, classid, old, new);
733 if (dev->flags & IFF_UP)
734 dev_activate(dev);
735 } else {
736 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
738 err = -EINVAL;
740 if (cops) {
741 unsigned long cl = cops->get(parent, classid);
742 if (cl) {
743 err = cops->graft(parent, cl, new, &old);
744 cops->put(parent, cl);
747 if (!err)
748 notify_and_destroy(skb, n, classid, old, new);
750 return err;
753 /* lockdep annotation is needed for ingress; egress gets it only for name */
754 static struct lock_class_key qdisc_tx_lock;
755 static struct lock_class_key qdisc_rx_lock;
758 Allocate and initialize new qdisc.
760 Parameters are passed via opt.
763 static struct Qdisc *
764 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
765 u32 parent, u32 handle, struct nlattr **tca, int *errp)
767 int err;
768 struct nlattr *kind = tca[TCA_KIND];
769 struct Qdisc *sch;
770 struct Qdisc_ops *ops;
771 struct qdisc_size_table *stab;
773 ops = qdisc_lookup_ops(kind);
774 #ifdef CONFIG_MODULES
775 if (ops == NULL && kind != NULL) {
776 char name[IFNAMSIZ];
777 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
778 /* We dropped the RTNL semaphore in order to
779 * perform the module load. So, even if we
780 * succeeded in loading the module we have to
781 * tell the caller to replay the request. We
782 * indicate this using -EAGAIN.
783 * We replay the request because the device may
784 * go away in the mean time.
786 rtnl_unlock();
787 request_module("sch_%s", name);
788 rtnl_lock();
789 ops = qdisc_lookup_ops(kind);
790 if (ops != NULL) {
791 /* We will try again qdisc_lookup_ops,
792 * so don't keep a reference.
794 module_put(ops->owner);
795 err = -EAGAIN;
796 goto err_out;
800 #endif
802 err = -ENOENT;
803 if (ops == NULL)
804 goto err_out;
806 sch = qdisc_alloc(dev_queue, ops);
807 if (IS_ERR(sch)) {
808 err = PTR_ERR(sch);
809 goto err_out2;
812 sch->parent = parent;
814 if (handle == TC_H_INGRESS) {
815 sch->flags |= TCQ_F_INGRESS;
816 handle = TC_H_MAKE(TC_H_INGRESS, 0);
817 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
818 } else {
819 if (handle == 0) {
820 handle = qdisc_alloc_handle(dev);
821 err = -ENOMEM;
822 if (handle == 0)
823 goto err_out3;
825 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
828 sch->handle = handle;
830 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
831 if (tca[TCA_STAB]) {
832 stab = qdisc_get_stab(tca[TCA_STAB]);
833 if (IS_ERR(stab)) {
834 err = PTR_ERR(stab);
835 goto err_out3;
837 sch->stab = stab;
839 if (tca[TCA_RATE]) {
840 spinlock_t *root_lock;
842 if ((sch->parent != TC_H_ROOT) &&
843 !(sch->flags & TCQ_F_INGRESS))
844 root_lock = qdisc_root_sleeping_lock(sch);
845 else
846 root_lock = qdisc_lock(sch);
848 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
849 root_lock, tca[TCA_RATE]);
850 if (err) {
852 * Any broken qdiscs that would require
853 * a ops->reset() here? The qdisc was never
854 * in action so it shouldn't be necessary.
856 if (ops->destroy)
857 ops->destroy(sch);
858 goto err_out3;
862 qdisc_list_add(sch);
864 return sch;
866 err_out3:
867 qdisc_put_stab(sch->stab);
868 dev_put(dev);
869 kfree((char *) sch - sch->padded);
870 err_out2:
871 module_put(ops->owner);
872 err_out:
873 *errp = err;
874 return NULL;
877 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
879 struct qdisc_size_table *stab = NULL;
880 int err = 0;
882 if (tca[TCA_OPTIONS]) {
883 if (sch->ops->change == NULL)
884 return -EINVAL;
885 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
886 if (err)
887 return err;
890 if (tca[TCA_STAB]) {
891 stab = qdisc_get_stab(tca[TCA_STAB]);
892 if (IS_ERR(stab))
893 return PTR_ERR(stab);
896 qdisc_put_stab(sch->stab);
897 sch->stab = stab;
899 if (tca[TCA_RATE])
900 gen_replace_estimator(&sch->bstats, &sch->rate_est,
901 qdisc_root_sleeping_lock(sch),
902 tca[TCA_RATE]);
903 return 0;
906 struct check_loop_arg
908 struct qdisc_walker w;
909 struct Qdisc *p;
910 int depth;
913 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
915 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
917 struct check_loop_arg arg;
919 if (q->ops->cl_ops == NULL)
920 return 0;
922 arg.w.stop = arg.w.skip = arg.w.count = 0;
923 arg.w.fn = check_loop_fn;
924 arg.depth = depth;
925 arg.p = p;
926 q->ops->cl_ops->walk(q, &arg.w);
927 return arg.w.stop ? -ELOOP : 0;
930 static int
931 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
933 struct Qdisc *leaf;
934 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
935 struct check_loop_arg *arg = (struct check_loop_arg *)w;
937 leaf = cops->leaf(q, cl);
938 if (leaf) {
939 if (leaf == arg->p || arg->depth > 7)
940 return -ELOOP;
941 return check_loop(leaf, arg->p, arg->depth + 1);
943 return 0;
947 * Delete/get qdisc.
950 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
952 struct net *net = sock_net(skb->sk);
953 struct tcmsg *tcm = NLMSG_DATA(n);
954 struct nlattr *tca[TCA_MAX + 1];
955 struct net_device *dev;
956 u32 clid = tcm->tcm_parent;
957 struct Qdisc *q = NULL;
958 struct Qdisc *p = NULL;
959 int err;
961 if (net != &init_net)
962 return -EINVAL;
964 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
965 return -ENODEV;
967 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
968 if (err < 0)
969 return err;
971 if (clid) {
972 if (clid != TC_H_ROOT) {
973 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
974 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
975 return -ENOENT;
976 q = qdisc_leaf(p, clid);
977 } else { /* ingress */
978 q = dev->rx_queue.qdisc_sleeping;
980 } else {
981 struct netdev_queue *dev_queue;
982 dev_queue = netdev_get_tx_queue(dev, 0);
983 q = dev_queue->qdisc_sleeping;
985 if (!q)
986 return -ENOENT;
988 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
989 return -EINVAL;
990 } else {
991 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
992 return -ENOENT;
995 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
996 return -EINVAL;
998 if (n->nlmsg_type == RTM_DELQDISC) {
999 if (!clid)
1000 return -EINVAL;
1001 if (q->handle == 0)
1002 return -ENOENT;
1003 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
1004 return err;
1005 } else {
1006 qdisc_notify(skb, n, clid, NULL, q);
1008 return 0;
1012 Create/change qdisc.
1015 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1017 struct net *net = sock_net(skb->sk);
1018 struct tcmsg *tcm;
1019 struct nlattr *tca[TCA_MAX + 1];
1020 struct net_device *dev;
1021 u32 clid;
1022 struct Qdisc *q, *p;
1023 int err;
1025 if (net != &init_net)
1026 return -EINVAL;
1028 replay:
1029 /* Reinit, just in case something touches this. */
1030 tcm = NLMSG_DATA(n);
1031 clid = tcm->tcm_parent;
1032 q = p = NULL;
1034 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1035 return -ENODEV;
1037 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1038 if (err < 0)
1039 return err;
1041 if (clid) {
1042 if (clid != TC_H_ROOT) {
1043 if (clid != TC_H_INGRESS) {
1044 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1045 return -ENOENT;
1046 q = qdisc_leaf(p, clid);
1047 } else { /*ingress */
1048 q = dev->rx_queue.qdisc_sleeping;
1050 } else {
1051 struct netdev_queue *dev_queue;
1052 dev_queue = netdev_get_tx_queue(dev, 0);
1053 q = dev_queue->qdisc_sleeping;
1056 /* It may be default qdisc, ignore it */
1057 if (q && q->handle == 0)
1058 q = NULL;
1060 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1061 if (tcm->tcm_handle) {
1062 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1063 return -EEXIST;
1064 if (TC_H_MIN(tcm->tcm_handle))
1065 return -EINVAL;
1066 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1067 goto create_n_graft;
1068 if (n->nlmsg_flags&NLM_F_EXCL)
1069 return -EEXIST;
1070 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1071 return -EINVAL;
1072 if (q == p ||
1073 (p && check_loop(q, p, 0)))
1074 return -ELOOP;
1075 atomic_inc(&q->refcnt);
1076 goto graft;
1077 } else {
1078 if (q == NULL)
1079 goto create_n_graft;
1081 /* This magic test requires explanation.
1083 * We know, that some child q is already
1084 * attached to this parent and have choice:
1085 * either to change it or to create/graft new one.
1087 * 1. We are allowed to create/graft only
1088 * if CREATE and REPLACE flags are set.
1090 * 2. If EXCL is set, requestor wanted to say,
1091 * that qdisc tcm_handle is not expected
1092 * to exist, so that we choose create/graft too.
1094 * 3. The last case is when no flags are set.
1095 * Alas, it is sort of hole in API, we
1096 * cannot decide what to do unambiguously.
1097 * For now we select create/graft, if
1098 * user gave KIND, which does not match existing.
1100 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1101 (n->nlmsg_flags&NLM_F_REPLACE) &&
1102 ((n->nlmsg_flags&NLM_F_EXCL) ||
1103 (tca[TCA_KIND] &&
1104 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1105 goto create_n_graft;
1108 } else {
1109 if (!tcm->tcm_handle)
1110 return -EINVAL;
1111 q = qdisc_lookup(dev, tcm->tcm_handle);
1114 /* Change qdisc parameters */
1115 if (q == NULL)
1116 return -ENOENT;
1117 if (n->nlmsg_flags&NLM_F_EXCL)
1118 return -EEXIST;
1119 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1120 return -EINVAL;
1121 err = qdisc_change(q, tca);
1122 if (err == 0)
1123 qdisc_notify(skb, n, clid, NULL, q);
1124 return err;
1126 create_n_graft:
1127 if (!(n->nlmsg_flags&NLM_F_CREATE))
1128 return -ENOENT;
1129 if (clid == TC_H_INGRESS)
1130 q = qdisc_create(dev, &dev->rx_queue,
1131 tcm->tcm_parent, tcm->tcm_parent,
1132 tca, &err);
1133 else
1134 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
1135 tcm->tcm_parent, tcm->tcm_handle,
1136 tca, &err);
1137 if (q == NULL) {
1138 if (err == -EAGAIN)
1139 goto replay;
1140 return err;
1143 graft:
1144 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1145 if (err) {
1146 if (q)
1147 qdisc_destroy(q);
1148 return err;
1151 return 0;
1154 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1155 u32 pid, u32 seq, u16 flags, int event)
1157 struct tcmsg *tcm;
1158 struct nlmsghdr *nlh;
1159 unsigned char *b = skb_tail_pointer(skb);
1160 struct gnet_dump d;
1162 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1163 tcm = NLMSG_DATA(nlh);
1164 tcm->tcm_family = AF_UNSPEC;
1165 tcm->tcm__pad1 = 0;
1166 tcm->tcm__pad2 = 0;
1167 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1168 tcm->tcm_parent = clid;
1169 tcm->tcm_handle = q->handle;
1170 tcm->tcm_info = atomic_read(&q->refcnt);
1171 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1172 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1173 goto nla_put_failure;
1174 q->qstats.qlen = q->q.qlen;
1176 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1177 goto nla_put_failure;
1179 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1180 qdisc_root_sleeping_lock(q), &d) < 0)
1181 goto nla_put_failure;
1183 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1184 goto nla_put_failure;
1186 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1187 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1188 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1189 goto nla_put_failure;
1191 if (gnet_stats_finish_copy(&d) < 0)
1192 goto nla_put_failure;
1194 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1195 return skb->len;
1197 nlmsg_failure:
1198 nla_put_failure:
1199 nlmsg_trim(skb, b);
1200 return -1;
1203 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1204 u32 clid, struct Qdisc *old, struct Qdisc *new)
1206 struct sk_buff *skb;
1207 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1209 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1210 if (!skb)
1211 return -ENOBUFS;
1213 if (old && old->handle) {
1214 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1215 goto err_out;
1217 if (new) {
1218 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1219 goto err_out;
1222 if (skb->len)
1223 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1225 err_out:
1226 kfree_skb(skb);
1227 return -EINVAL;
1230 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1232 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1235 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1236 struct netlink_callback *cb,
1237 int *q_idx_p, int s_q_idx)
1239 int ret = 0, q_idx = *q_idx_p;
1240 struct Qdisc *q;
1242 if (!root)
1243 return 0;
1245 q = root;
1246 if (q_idx < s_q_idx) {
1247 q_idx++;
1248 } else {
1249 if (!tc_qdisc_dump_ignore(q) &&
1250 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1251 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1252 goto done;
1253 q_idx++;
1255 list_for_each_entry(q, &root->list, list) {
1256 if (q_idx < s_q_idx) {
1257 q_idx++;
1258 continue;
1260 if (!tc_qdisc_dump_ignore(q) &&
1261 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1262 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1263 goto done;
1264 q_idx++;
1267 out:
1268 *q_idx_p = q_idx;
1269 return ret;
1270 done:
1271 ret = -1;
1272 goto out;
1275 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1277 struct net *net = sock_net(skb->sk);
1278 int idx, q_idx;
1279 int s_idx, s_q_idx;
1280 struct net_device *dev;
1282 if (net != &init_net)
1283 return 0;
1285 s_idx = cb->args[0];
1286 s_q_idx = q_idx = cb->args[1];
1287 read_lock(&dev_base_lock);
1288 idx = 0;
1289 for_each_netdev(&init_net, dev) {
1290 struct netdev_queue *dev_queue;
1292 if (idx < s_idx)
1293 goto cont;
1294 if (idx > s_idx)
1295 s_q_idx = 0;
1296 q_idx = 0;
1298 dev_queue = netdev_get_tx_queue(dev, 0);
1299 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1300 goto done;
1302 dev_queue = &dev->rx_queue;
1303 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1304 goto done;
1306 cont:
1307 idx++;
1310 done:
1311 read_unlock(&dev_base_lock);
1313 cb->args[0] = idx;
1314 cb->args[1] = q_idx;
1316 return skb->len;
1321 /************************************************
1322 * Traffic classes manipulation. *
1323 ************************************************/
1327 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1329 struct net *net = sock_net(skb->sk);
1330 struct netdev_queue *dev_queue;
1331 struct tcmsg *tcm = NLMSG_DATA(n);
1332 struct nlattr *tca[TCA_MAX + 1];
1333 struct net_device *dev;
1334 struct Qdisc *q = NULL;
1335 const struct Qdisc_class_ops *cops;
1336 unsigned long cl = 0;
1337 unsigned long new_cl;
1338 u32 pid = tcm->tcm_parent;
1339 u32 clid = tcm->tcm_handle;
1340 u32 qid = TC_H_MAJ(clid);
1341 int err;
1343 if (net != &init_net)
1344 return -EINVAL;
1346 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1347 return -ENODEV;
1349 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1350 if (err < 0)
1351 return err;
1354 parent == TC_H_UNSPEC - unspecified parent.
1355 parent == TC_H_ROOT - class is root, which has no parent.
1356 parent == X:0 - parent is root class.
1357 parent == X:Y - parent is a node in hierarchy.
1358 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1360 handle == 0:0 - generate handle from kernel pool.
1361 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1362 handle == X:Y - clear.
1363 handle == X:0 - root class.
1366 /* Step 1. Determine qdisc handle X:0 */
1368 dev_queue = netdev_get_tx_queue(dev, 0);
1369 if (pid != TC_H_ROOT) {
1370 u32 qid1 = TC_H_MAJ(pid);
1372 if (qid && qid1) {
1373 /* If both majors are known, they must be identical. */
1374 if (qid != qid1)
1375 return -EINVAL;
1376 } else if (qid1) {
1377 qid = qid1;
1378 } else if (qid == 0)
1379 qid = dev_queue->qdisc_sleeping->handle;
1381 /* Now qid is genuine qdisc handle consistent
1382 both with parent and child.
1384 TC_H_MAJ(pid) still may be unspecified, complete it now.
1386 if (pid)
1387 pid = TC_H_MAKE(qid, pid);
1388 } else {
1389 if (qid == 0)
1390 qid = dev_queue->qdisc_sleeping->handle;
1393 /* OK. Locate qdisc */
1394 if ((q = qdisc_lookup(dev, qid)) == NULL)
1395 return -ENOENT;
1397 /* An check that it supports classes */
1398 cops = q->ops->cl_ops;
1399 if (cops == NULL)
1400 return -EINVAL;
1402 /* Now try to get class */
1403 if (clid == 0) {
1404 if (pid == TC_H_ROOT)
1405 clid = qid;
1406 } else
1407 clid = TC_H_MAKE(qid, clid);
1409 if (clid)
1410 cl = cops->get(q, clid);
1412 if (cl == 0) {
1413 err = -ENOENT;
1414 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1415 goto out;
1416 } else {
1417 switch (n->nlmsg_type) {
1418 case RTM_NEWTCLASS:
1419 err = -EEXIST;
1420 if (n->nlmsg_flags&NLM_F_EXCL)
1421 goto out;
1422 break;
1423 case RTM_DELTCLASS:
1424 err = cops->delete(q, cl);
1425 if (err == 0)
1426 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1427 goto out;
1428 case RTM_GETTCLASS:
1429 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1430 goto out;
1431 default:
1432 err = -EINVAL;
1433 goto out;
1437 new_cl = cl;
1438 err = cops->change(q, clid, pid, tca, &new_cl);
1439 if (err == 0)
1440 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1442 out:
1443 if (cl)
1444 cops->put(q, cl);
1446 return err;
1450 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1451 unsigned long cl,
1452 u32 pid, u32 seq, u16 flags, int event)
1454 struct tcmsg *tcm;
1455 struct nlmsghdr *nlh;
1456 unsigned char *b = skb_tail_pointer(skb);
1457 struct gnet_dump d;
1458 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1460 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1461 tcm = NLMSG_DATA(nlh);
1462 tcm->tcm_family = AF_UNSPEC;
1463 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1464 tcm->tcm_parent = q->handle;
1465 tcm->tcm_handle = q->handle;
1466 tcm->tcm_info = 0;
1467 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1468 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1469 goto nla_put_failure;
1471 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1472 qdisc_root_sleeping_lock(q), &d) < 0)
1473 goto nla_put_failure;
1475 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1476 goto nla_put_failure;
1478 if (gnet_stats_finish_copy(&d) < 0)
1479 goto nla_put_failure;
1481 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1482 return skb->len;
1484 nlmsg_failure:
1485 nla_put_failure:
1486 nlmsg_trim(skb, b);
1487 return -1;
1490 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1491 struct Qdisc *q, unsigned long cl, int event)
1493 struct sk_buff *skb;
1494 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1496 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1497 if (!skb)
1498 return -ENOBUFS;
1500 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1501 kfree_skb(skb);
1502 return -EINVAL;
1505 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1508 struct qdisc_dump_args
1510 struct qdisc_walker w;
1511 struct sk_buff *skb;
1512 struct netlink_callback *cb;
1515 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1517 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1519 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1520 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1523 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1524 struct tcmsg *tcm, struct netlink_callback *cb,
1525 int *t_p, int s_t)
1527 struct qdisc_dump_args arg;
1529 if (tc_qdisc_dump_ignore(q) ||
1530 *t_p < s_t || !q->ops->cl_ops ||
1531 (tcm->tcm_parent &&
1532 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1533 (*t_p)++;
1534 return 0;
1536 if (*t_p > s_t)
1537 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1538 arg.w.fn = qdisc_class_dump;
1539 arg.skb = skb;
1540 arg.cb = cb;
1541 arg.w.stop = 0;
1542 arg.w.skip = cb->args[1];
1543 arg.w.count = 0;
1544 q->ops->cl_ops->walk(q, &arg.w);
1545 cb->args[1] = arg.w.count;
1546 if (arg.w.stop)
1547 return -1;
1548 (*t_p)++;
1549 return 0;
1552 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1553 struct tcmsg *tcm, struct netlink_callback *cb,
1554 int *t_p, int s_t)
1556 struct Qdisc *q;
1558 if (!root)
1559 return 0;
1561 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1562 return -1;
1564 list_for_each_entry(q, &root->list, list) {
1565 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1566 return -1;
1569 return 0;
1572 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1574 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1575 struct net *net = sock_net(skb->sk);
1576 struct netdev_queue *dev_queue;
1577 struct net_device *dev;
1578 int t, s_t;
1580 if (net != &init_net)
1581 return 0;
1583 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1584 return 0;
1585 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1586 return 0;
1588 s_t = cb->args[0];
1589 t = 0;
1591 dev_queue = netdev_get_tx_queue(dev, 0);
1592 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1593 goto done;
1595 dev_queue = &dev->rx_queue;
1596 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1597 goto done;
1599 done:
1600 cb->args[0] = t;
1602 dev_put(dev);
1603 return skb->len;
1606 /* Main classifier routine: scans classifier chain attached
1607 to this qdisc, (optionally) tests for protocol and asks
1608 specific classifiers.
1610 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1611 struct tcf_result *res)
1613 __be16 protocol = skb->protocol;
1614 int err = 0;
1616 for (; tp; tp = tp->next) {
1617 if ((tp->protocol == protocol ||
1618 tp->protocol == htons(ETH_P_ALL)) &&
1619 (err = tp->classify(skb, tp, res)) >= 0) {
1620 #ifdef CONFIG_NET_CLS_ACT
1621 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1622 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1623 #endif
1624 return err;
1627 return -1;
1629 EXPORT_SYMBOL(tc_classify_compat);
1631 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1632 struct tcf_result *res)
1634 int err = 0;
1635 __be16 protocol;
1636 #ifdef CONFIG_NET_CLS_ACT
1637 struct tcf_proto *otp = tp;
1638 reclassify:
1639 #endif
1640 protocol = skb->protocol;
1642 err = tc_classify_compat(skb, tp, res);
1643 #ifdef CONFIG_NET_CLS_ACT
1644 if (err == TC_ACT_RECLASSIFY) {
1645 u32 verd = G_TC_VERD(skb->tc_verd);
1646 tp = otp;
1648 if (verd++ >= MAX_REC_LOOP) {
1649 printk("rule prio %u protocol %02x reclassify loop, "
1650 "packet dropped\n",
1651 tp->prio&0xffff, ntohs(tp->protocol));
1652 return TC_ACT_SHOT;
1654 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1655 goto reclassify;
1657 #endif
1658 return err;
1660 EXPORT_SYMBOL(tc_classify);
1662 void tcf_destroy(struct tcf_proto *tp)
1664 tp->ops->destroy(tp);
1665 module_put(tp->ops->owner);
1666 kfree(tp);
1669 void tcf_destroy_chain(struct tcf_proto **fl)
1671 struct tcf_proto *tp;
1673 while ((tp = *fl) != NULL) {
1674 *fl = tp->next;
1675 tcf_destroy(tp);
1678 EXPORT_SYMBOL(tcf_destroy_chain);
1680 #ifdef CONFIG_PROC_FS
1681 static int psched_show(struct seq_file *seq, void *v)
1683 struct timespec ts;
1685 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1686 seq_printf(seq, "%08x %08x %08x %08x\n",
1687 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1688 1000000,
1689 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1691 return 0;
1694 static int psched_open(struct inode *inode, struct file *file)
1696 return single_open(file, psched_show, PDE(inode)->data);
1699 static const struct file_operations psched_fops = {
1700 .owner = THIS_MODULE,
1701 .open = psched_open,
1702 .read = seq_read,
1703 .llseek = seq_lseek,
1704 .release = single_release,
1706 #endif
1708 static int __init pktsched_init(void)
1710 register_qdisc(&pfifo_qdisc_ops);
1711 register_qdisc(&bfifo_qdisc_ops);
1712 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1714 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1715 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1716 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1717 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1718 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1719 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1721 return 0;
1724 subsys_initcall(pktsched_init);