pre-2.3.4..
[davej-history.git] / net / sched / sch_api.c
blobbb1bc418298a4c86b6f75ad61fdd13d161ff7114
1 /*
2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * Fixes:
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
32 #include <linux/proc_fs.h>
33 #include <linux/kmod.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
38 #include <asm/processor.h>
39 #include <asm/uaccess.h>
40 #include <asm/system.h>
41 #include <asm/bitops.h>
43 #define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); }
45 #ifdef CONFIG_RTNETLINK
46 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
47 struct Qdisc *old, struct Qdisc *new);
48 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
49 struct Qdisc *q, unsigned long cl, int event);
50 #endif
54 Short review.
55 -------------
57 This file consists of two interrelated parts:
59 1. queueing disciplines manager frontend.
60 2. traffic classes manager frontend.
62 Generally, queueing discipline ("qdisc") is a black box,
63 which is able to enqueue packets and to dequeue them (when
64 device is ready to send something) in order and at times
65 determined by algorithm hidden in it.
67 qdisc's are divided to two categories:
68 - "queues", which have no internal structure visible from outside.
69 - "schedulers", which split all the packets to "traffic classes",
70 using "packet classifiers" (look at cls_api.c)
72 In turn, classes may have child qdiscs (as rule, queues)
73 attached to them etc. etc. etc.
75 The goal of the routines in this file is to translate
76 information supplied by user in the form of handles
77 to more intelligible for kernel form, to make some sanity
78 checks and part of work, which is common to all qdiscs
79 and to provide rtnetlink notifications.
81 All real intelligent work is done inside qdisc modules.
85 Every discipline has two major routines: enqueue and dequeue.
87 ---dequeue
89 dequeue usually returns a skb to send. It is allowed to return NULL,
90 but it does not mean that queue is empty, it just means that
91 discipline does not want to send anything this time.
92 Queue is really empty if q->q.qlen == 0.
93 For complicated disciplines with multiple queues q->q is not
94 real packet queue, but however q->q.qlen must be valid.
96 ---enqueue
98 enqueue returns number of enqueued packets i.e. this number is 1,
99 if packet was enqueued successfully and <1 if something (not
100 necessary THIS packet) was dropped.
102 Auxiliary routines:
104 ---requeue
106 requeues once dequeued packet. It is used for non-standard or
107 just buggy devices, which can defer output even if dev->tbusy=0.
109 ---reset
111 returns qdisc to initial state: purge all buffers, clear all
112 timers, counters (except for statistics) etc.
114 ---init
116 initializes newly created qdisc.
118 ---destroy
120 destroys resources allocated by init and during lifetime of qdisc.
122 ---change
124 changes qdisc parameters.
127 /************************************************
128 * Queueing disciplines manipulation. *
129 ************************************************/
132 /* The list of all installed queueing disciplines. */
134 static struct Qdisc_ops *qdisc_base = NULL;
136 /* Register/uregister queueing discipline */
138 int register_qdisc(struct Qdisc_ops *qops)
140 struct Qdisc_ops *q, **qp;
142 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
143 if (strcmp(qops->id, q->id) == 0)
144 return -EEXIST;
146 if (qops->enqueue == NULL)
147 qops->enqueue = noop_qdisc_ops.enqueue;
148 if (qops->requeue == NULL)
149 qops->requeue = noop_qdisc_ops.requeue;
150 if (qops->dequeue == NULL)
151 qops->dequeue = noop_qdisc_ops.dequeue;
153 qops->next = NULL;
154 *qp = qops;
155 return 0;
158 int unregister_qdisc(struct Qdisc_ops *qops)
160 struct Qdisc_ops *q, **qp;
161 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
162 if (q == qops)
163 break;
164 if (!q)
165 return -ENOENT;
166 *qp = q->next;
167 q->next = NULL;
168 return 0;
171 /* We know handle. Find qdisc among all qdisc's attached to device
172 (root qdisc, all its children, children of children etc.)
175 struct Qdisc *qdisc_lookup(struct device *dev, u32 handle)
177 struct Qdisc *q;
179 for (q = dev->qdisc_list; q; q = q->next) {
180 if (q->handle == handle)
181 return q;
183 return NULL;
186 struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
188 unsigned long cl;
189 struct Qdisc *leaf;
190 struct Qdisc_class_ops *cops = p->ops->cl_ops;
192 if (cops == NULL)
193 return NULL;
194 cl = cops->get(p, classid);
195 if (cl == 0)
196 return NULL;
197 leaf = cops->leaf(p, cl);
198 cops->put(p, cl);
199 return leaf;
202 /* Find queueing discipline by name */
204 struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
206 struct Qdisc_ops *q;
208 if (kind) {
209 for (q = qdisc_base; q; q = q->next) {
210 if (rtattr_strcmp(kind, q->id) == 0)
211 return q;
214 return NULL;
217 static struct qdisc_rate_table *qdisc_rtab_list;
219 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
221 struct qdisc_rate_table *rtab;
223 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
224 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
225 rtab->refcnt++;
226 return rtab;
230 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
231 return NULL;
233 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
234 if (rtab) {
235 rtab->rate = *r;
236 rtab->refcnt = 1;
237 memcpy(rtab->data, RTA_DATA(tab), 1024);
238 rtab->next = qdisc_rtab_list;
239 qdisc_rtab_list = rtab;
241 return rtab;
244 void qdisc_put_rtab(struct qdisc_rate_table *tab)
246 struct qdisc_rate_table *rtab, **rtabp;
248 if (!tab || --tab->refcnt)
249 return;
251 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
252 if (rtab == tab) {
253 *rtabp = rtab->next;
254 kfree(rtab);
255 return;
261 /* Allocate an unique handle from space managed by kernel */
263 u32 qdisc_alloc_handle(struct device *dev)
265 int i = 0x10000;
266 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
268 do {
269 autohandle += TC_H_MAKE(0x10000U, 0);
270 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
271 autohandle = TC_H_MAKE(0x80000000U, 0);
272 } while (qdisc_lookup(dev, autohandle) && --i > 0);
274 return i>0 ? autohandle : 0;
277 /* Attach toplevel qdisc to device dev */
279 static struct Qdisc *
280 dev_graft_qdisc(struct device *dev, struct Qdisc *qdisc)
282 struct Qdisc *oqdisc;
284 if (dev->flags & IFF_UP)
285 dev_deactivate(dev);
287 start_bh_atomic();
288 oqdisc = dev->qdisc_sleeping;
290 /* Prune old scheduler */
291 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
292 qdisc_reset(oqdisc);
294 /* ... and graft new one */
295 if (qdisc == NULL)
296 qdisc = &noop_qdisc;
297 dev->qdisc_sleeping = qdisc;
298 dev->qdisc = &noop_qdisc;
299 end_bh_atomic();
301 if (dev->flags & IFF_UP)
302 dev_activate(dev);
304 return oqdisc;
308 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
309 to device "dev".
311 Old qdisc is not destroyed but returned in *old.
314 int qdisc_graft(struct device *dev, struct Qdisc *parent, u32 classid,
315 struct Qdisc *new, struct Qdisc **old)
317 int err = 0;
319 if (parent == NULL) {
320 *old = dev_graft_qdisc(dev, new);
321 } else {
322 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
324 err = -EINVAL;
326 if (cops) {
327 unsigned long cl = cops->get(parent, classid);
328 if (cl) {
329 err = cops->graft(parent, cl, new, old);
330 cops->put(parent, cl);
334 return err;
337 #ifdef CONFIG_RTNETLINK
340 Allocate and initialize new qdisc.
342 Parameters are passed via opt.
345 static struct Qdisc *
346 qdisc_create(struct device *dev, u32 handle, struct rtattr **tca, int *errp)
348 int err;
349 struct rtattr *kind = tca[TCA_KIND-1];
350 struct Qdisc *sch = NULL;
351 struct Qdisc_ops *ops;
352 int size;
354 ops = qdisc_lookup_ops(kind);
355 #ifdef CONFIG_KMOD
356 if (ops==NULL && tca[TCA_KIND-1] != NULL) {
357 char module_name[4 + IFNAMSIZ + 1];
359 if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
360 sprintf(module_name, "sch_%s", (char*)RTA_DATA(kind));
361 request_module (module_name);
362 ops = qdisc_lookup_ops(kind);
365 #endif
367 err = -EINVAL;
368 if (ops == NULL)
369 goto err_out;
371 size = sizeof(*sch) + ops->priv_size;
373 sch = kmalloc(size, GFP_KERNEL);
374 err = -ENOBUFS;
375 if (!sch)
376 goto err_out;
378 /* Grrr... Resolve race condition with module unload */
380 err = -EINVAL;
381 if (ops != qdisc_lookup_ops(kind))
382 goto err_out;
384 memset(sch, 0, size);
386 skb_queue_head_init(&sch->q);
387 sch->ops = ops;
388 sch->enqueue = ops->enqueue;
389 sch->dequeue = ops->dequeue;
390 sch->dev = dev;
391 atomic_set(&sch->refcnt, 1);
392 if (handle == 0) {
393 handle = qdisc_alloc_handle(dev);
394 err = -ENOMEM;
395 if (handle == 0)
396 goto err_out;
398 sch->handle = handle;
400 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
401 sch->next = dev->qdisc_list;
402 dev->qdisc_list = sch;
403 #ifdef CONFIG_NET_ESTIMATOR
404 if (tca[TCA_RATE-1])
405 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
406 #endif
407 return sch;
410 err_out:
411 *errp = err;
412 if (sch)
413 kfree(sch);
414 return NULL;
417 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
419 if (tca[TCA_OPTIONS-1]) {
420 int err;
422 if (sch->ops->change == NULL)
423 return -EINVAL;
424 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
425 if (err)
426 return err;
428 #ifdef CONFIG_NET_ESTIMATOR
429 if (tca[TCA_RATE-1]) {
430 qdisc_kill_estimator(&sch->stats);
431 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
433 #endif
434 return 0;
437 struct check_loop_arg
439 struct qdisc_walker w;
440 struct Qdisc *p;
441 int depth;
444 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
446 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
448 struct check_loop_arg arg;
450 if (q->ops->cl_ops == NULL)
451 return 0;
453 arg.w.stop = arg.w.skip = arg.w.count = 0;
454 arg.w.fn = check_loop_fn;
455 arg.depth = depth;
456 arg.p = p;
457 q->ops->cl_ops->walk(q, &arg.w);
458 return arg.w.stop ? -ELOOP : 0;
461 static int
462 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
464 struct Qdisc *leaf;
465 struct Qdisc_class_ops *cops = q->ops->cl_ops;
466 struct check_loop_arg *arg = (struct check_loop_arg *)w;
468 leaf = cops->leaf(q, cl);
469 if (leaf) {
470 if (leaf == arg->p || arg->depth > 7)
471 return -ELOOP;
472 return check_loop(leaf, arg->p, arg->depth + 1);
474 return 0;
478 * Delete/get qdisc.
481 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
483 struct tcmsg *tcm = NLMSG_DATA(n);
484 struct rtattr **tca = arg;
485 struct device *dev;
486 u32 clid = tcm->tcm_parent;
487 struct Qdisc *q = NULL;
488 struct Qdisc *p = NULL;
489 int err;
491 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
492 return -ENODEV;
494 if (clid) {
495 if (clid != TC_H_ROOT) {
496 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
497 return -ENOENT;
498 q = qdisc_leaf(p, clid);
499 } else
500 q = dev->qdisc_sleeping;
502 if (!q)
503 return -ENOENT;
505 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
506 return -EINVAL;
507 } else {
508 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
509 return -ENOENT;
512 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
513 return -EINVAL;
515 if (n->nlmsg_type == RTM_DELQDISC) {
516 if (!clid)
517 return -EINVAL;
518 if (q->handle == 0)
519 return -ENOENT;
520 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
521 return err;
522 if (q) {
523 qdisc_notify(skb, n, clid, q, NULL);
524 qdisc_destroy(q);
526 } else {
527 qdisc_notify(skb, n, clid, NULL, q);
529 return 0;
533 Create/change qdisc.
536 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
538 struct tcmsg *tcm = NLMSG_DATA(n);
539 struct rtattr **tca = arg;
540 struct device *dev;
541 u32 clid = tcm->tcm_parent;
542 struct Qdisc *q = NULL;
543 struct Qdisc *p = NULL;
544 int err;
546 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
547 return -ENODEV;
549 if (clid) {
550 if (clid != TC_H_ROOT) {
551 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
552 return -ENOENT;
553 q = qdisc_leaf(p, clid);
554 } else {
555 q = dev->qdisc_sleeping;
558 /* It may be default qdisc, ignore it */
559 if (q && q->handle == 0)
560 q = NULL;
562 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
563 if (tcm->tcm_handle) {
564 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
565 return -EEXIST;
566 if (TC_H_MIN(tcm->tcm_handle))
567 return -EINVAL;
568 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
569 goto create_n_graft;
570 if (n->nlmsg_flags&NLM_F_EXCL)
571 return -EEXIST;
572 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
573 return -EINVAL;
574 if (q == p ||
575 (p && check_loop(q, p, 0)))
576 return -ELOOP;
577 atomic_inc(&q->refcnt);
578 goto graft;
579 } else {
580 if (q == NULL)
581 goto create_n_graft;
583 /* This magic test requires explanation.
585 * We know, that some child q is already
586 * attached to this parent and have choice:
587 * either to change it or to create/graft new one.
589 * 1. We are allowed to create/graft only
590 * if CREATE and REPLACE flags are set.
592 * 2. If EXCL is set, requestor wanted to say,
593 * that qdisc tcm_handle is not expected
594 * to exist, so that we choose create/graft too.
596 * 3. The last case is when no flags are set.
597 * Alas, it is sort of hole in API, we
598 * cannot decide what to do unambiguously.
599 * For now we select create/graft, if
600 * user gave KIND, which does not match existing.
602 if ((n->nlmsg_flags&NLM_F_CREATE) &&
603 (n->nlmsg_flags&NLM_F_REPLACE) &&
604 ((n->nlmsg_flags&NLM_F_EXCL) ||
605 (tca[TCA_KIND-1] &&
606 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
607 goto create_n_graft;
610 } else {
611 if (!tcm->tcm_handle)
612 return -EINVAL;
613 q = qdisc_lookup(dev, tcm->tcm_handle);
616 /* Change qdisc parameters */
617 if (q == NULL)
618 return -ENOENT;
619 if (n->nlmsg_flags&NLM_F_EXCL)
620 return -EEXIST;
621 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
622 return -EINVAL;
623 err = qdisc_change(q, tca);
624 if (err == 0)
625 qdisc_notify(skb, n, clid, NULL, q);
626 return err;
628 create_n_graft:
629 if (!(n->nlmsg_flags&NLM_F_CREATE))
630 return -ENOENT;
631 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
632 if (q == NULL)
633 return err;
635 graft:
636 if (1) {
637 struct Qdisc *old_q = NULL;
638 err = qdisc_graft(dev, p, clid, q, &old_q);
639 if (err) {
640 if (q)
641 qdisc_destroy(q);
642 return err;
644 qdisc_notify(skb, n, clid, old_q, q);
645 if (old_q)
646 qdisc_destroy(old_q);
648 return 0;
651 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
652 u32 pid, u32 seq, unsigned flags, int event)
654 struct tcmsg *tcm;
655 struct nlmsghdr *nlh;
656 unsigned char *b = skb->tail;
658 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
659 nlh->nlmsg_flags = flags;
660 tcm = NLMSG_DATA(nlh);
661 tcm->tcm_family = AF_UNSPEC;
662 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
663 tcm->tcm_parent = clid;
664 tcm->tcm_handle = q->handle;
665 tcm->tcm_info = atomic_read(&q->refcnt);
666 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
667 if (q->ops->dump && q->ops->dump(q, skb) < 0)
668 goto rtattr_failure;
669 q->stats.qlen = q->q.qlen;
670 RTA_PUT(skb, TCA_STATS, sizeof(q->stats), &q->stats);
671 nlh->nlmsg_len = skb->tail - b;
672 return skb->len;
674 nlmsg_failure:
675 rtattr_failure:
676 skb_trim(skb, b - skb->data);
677 return -1;
680 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
681 u32 clid, struct Qdisc *old, struct Qdisc *new)
683 struct sk_buff *skb;
684 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
686 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
687 if (!skb)
688 return -ENOBUFS;
690 if (old && old->handle) {
691 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
692 goto err_out;
694 if (new) {
695 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
696 goto err_out;
699 if (skb->len)
700 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
702 err_out:
703 kfree_skb(skb);
704 return -EINVAL;
707 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
709 int idx, q_idx;
710 int s_idx, s_q_idx;
711 struct device *dev;
712 struct Qdisc *q;
714 s_idx = cb->args[0];
715 s_q_idx = q_idx = cb->args[1];
716 read_lock_bh(&dev_base_lock);
717 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
718 if (idx < s_idx)
719 continue;
720 if (idx > s_idx)
721 s_q_idx = 0;
722 for (q = dev->qdisc_list, q_idx = 0; q;
723 q = q->next, q_idx++) {
724 if (q_idx < s_q_idx)
725 continue;
726 if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
727 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
728 goto done;
732 done:
733 read_unlock_bh(&dev_base_lock);
735 cb->args[0] = idx;
736 cb->args[1] = q_idx;
738 return skb->len;
743 /************************************************
744 * Traffic classes manipulation. *
745 ************************************************/
749 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
751 struct tcmsg *tcm = NLMSG_DATA(n);
752 struct rtattr **tca = arg;
753 struct device *dev;
754 struct Qdisc *q = NULL;
755 struct Qdisc_class_ops *cops;
756 unsigned long cl = 0;
757 unsigned long new_cl;
758 u32 pid = tcm->tcm_parent;
759 u32 clid = tcm->tcm_handle;
760 u32 qid = TC_H_MAJ(clid);
761 int err;
763 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
764 return -ENODEV;
767 parent == TC_H_UNSPEC - unspecified parent.
768 parent == TC_H_ROOT - class is root, which has no parent.
769 parent == X:0 - parent is root class.
770 parent == X:Y - parent is a node in hierarchy.
771 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
773 handle == 0:0 - generate handle from kernel pool.
774 handle == 0:Y - class is X:Y, where X:0 is qdisc.
775 handle == X:Y - clear.
776 handle == X:0 - root class.
779 /* Step 1. Determine qdisc handle X:0 */
781 if (pid != TC_H_ROOT) {
782 u32 qid1 = TC_H_MAJ(pid);
784 if (qid && qid1) {
785 /* If both majors are known, they must be identical. */
786 if (qid != qid1)
787 return -EINVAL;
788 } else if (qid1) {
789 qid = qid1;
790 } else if (qid == 0)
791 qid = dev->qdisc_sleeping->handle;
793 /* Now qid is genuine qdisc handle consistent
794 both with parent and child.
796 TC_H_MAJ(pid) still may be unspecified, complete it now.
798 if (pid)
799 pid = TC_H_MAKE(qid, pid);
800 } else {
801 if (qid == 0)
802 qid = dev->qdisc_sleeping->handle;
805 /* OK. Locate qdisc */
806 if ((q = qdisc_lookup(dev, qid)) == NULL)
807 return -ENOENT;
809 /* An check that it supports classes */
810 cops = q->ops->cl_ops;
811 if (cops == NULL)
812 return -EINVAL;
814 /* Now try to get class */
815 if (clid == 0) {
816 if (pid == TC_H_ROOT)
817 clid = qid;
818 } else
819 clid = TC_H_MAKE(qid, clid);
821 if (clid)
822 cl = cops->get(q, clid);
824 if (cl == 0) {
825 err = -ENOENT;
826 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
827 goto out;
828 } else {
829 switch (n->nlmsg_type) {
830 case RTM_NEWTCLASS:
831 err = -EEXIST;
832 if (n->nlmsg_flags&NLM_F_EXCL)
833 goto out;
834 break;
835 case RTM_DELTCLASS:
836 err = cops->delete(q, cl);
837 if (err == 0)
838 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
839 goto out;
840 case RTM_GETTCLASS:
841 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
842 goto out;
843 default:
844 err = -EINVAL;
845 goto out;
849 new_cl = cl;
850 err = cops->change(q, clid, pid, tca, &new_cl);
851 if (err == 0)
852 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
854 out:
855 if (cl)
856 cops->put(q, cl);
858 return err;
862 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
863 unsigned long cl,
864 u32 pid, u32 seq, unsigned flags, int event)
866 struct tcmsg *tcm;
867 struct nlmsghdr *nlh;
868 unsigned char *b = skb->tail;
870 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
871 nlh->nlmsg_flags = flags;
872 tcm = NLMSG_DATA(nlh);
873 tcm->tcm_family = AF_UNSPEC;
874 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
875 tcm->tcm_parent = q->handle;
876 tcm->tcm_handle = q->handle;
877 tcm->tcm_info = 0;
878 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
879 if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
880 goto rtattr_failure;
881 nlh->nlmsg_len = skb->tail - b;
882 return skb->len;
884 nlmsg_failure:
885 rtattr_failure:
886 skb_trim(skb, b - skb->data);
887 return -1;
890 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
891 struct Qdisc *q, unsigned long cl, int event)
893 struct sk_buff *skb;
894 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
896 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
897 if (!skb)
898 return -ENOBUFS;
900 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
901 kfree_skb(skb);
902 return -EINVAL;
905 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
908 struct qdisc_dump_args
910 struct qdisc_walker w;
911 struct sk_buff *skb;
912 struct netlink_callback *cb;
915 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
917 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
919 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
920 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
923 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
925 int t;
926 int s_t;
927 struct device *dev;
928 struct Qdisc *q;
929 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
930 struct qdisc_dump_args arg;
932 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
933 return 0;
934 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
935 return 0;
937 s_t = cb->args[0];
939 for (q=dev->qdisc_list, t=0; q; q = q->next, t++) {
940 if (t < s_t) continue;
941 if (!q->ops->cl_ops) continue;
942 if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)
943 continue;
944 if (t > s_t)
945 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
946 arg.w.fn = qdisc_class_dump;
947 arg.skb = skb;
948 arg.cb = cb;
949 arg.w.stop = 0;
950 arg.w.skip = cb->args[1];
951 arg.w.count = 0;
952 q->ops->cl_ops->walk(q, &arg.w);
953 cb->args[1] = arg.w.count;
954 if (arg.w.stop)
955 break;
958 cb->args[0] = t;
960 return skb->len;
962 #endif
964 int psched_us_per_tick = 1;
965 int psched_tick_per_us = 1;
967 #ifdef CONFIG_PROC_FS
968 static int psched_read_proc(char *buffer, char **start, off_t offset,
969 int length, int *eof, void *data)
971 int len;
973 len = sprintf(buffer, "%08x %08x\n",
974 psched_tick_per_us, psched_us_per_tick);
976 len -= offset;
978 if (len > length)
979 len = length;
980 if(len < 0)
981 len = 0;
983 *start = buffer + offset;
984 *eof = 1;
986 return len;
988 #endif
990 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
991 int psched_tod_diff(int delta_sec, int bound)
993 int delta;
995 if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
996 return bound;
997 delta = delta_sec * 1000000;
998 if (delta > bound)
999 delta = bound;
1000 return delta;
1002 #endif
1004 psched_time_t psched_time_base;
1006 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1007 psched_tdiff_t psched_clock_per_hz;
1008 int psched_clock_scale;
1009 #endif
1011 #ifdef PSCHED_WATCHER
1012 PSCHED_WATCHER psched_time_mark;
1014 static void psched_tick(unsigned long);
1016 static struct timer_list psched_timer =
1017 { NULL, NULL, 0, 0L, psched_tick };
1019 static void psched_tick(unsigned long dummy)
1021 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1022 psched_time_t dummy_stamp;
1023 PSCHED_GET_TIME(dummy_stamp);
1024 /* It is OK up to 4GHz cpu */
1025 psched_timer.expires = jiffies + 1*HZ;
1026 #else
1027 unsigned long now = jiffies;
1028 psched_time_base = ((u64)now)<<PSCHED_JSCALE;
1029 psched_time_mark = now;
1030 psched_timer.expires = now + 60*60*HZ;
1031 #endif
1032 add_timer(&psched_timer);
1034 #endif
1036 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1037 __initfunc(int psched_calibrate_clock(void))
1039 psched_time_t stamp, stamp1;
1040 struct timeval tv, tv1;
1041 psched_tdiff_t delay;
1042 long rdelay;
1043 unsigned long stop;
1045 #if CPU == 586 || CPU == 686
1046 if (!(boot_cpu_data.x86_capability & X86_FEATURE_TSC))
1047 return -1;
1048 #endif
1050 #ifdef PSCHED_WATCHER
1051 psched_tick(0);
1052 #endif
1053 stop = jiffies + HZ/10;
1054 PSCHED_GET_TIME(stamp);
1055 do_gettimeofday(&tv);
1056 while (time_before(jiffies, stop))
1057 barrier();
1058 PSCHED_GET_TIME(stamp1);
1059 do_gettimeofday(&tv1);
1061 delay = PSCHED_TDIFF(stamp1, stamp);
1062 rdelay = tv1.tv_usec - tv.tv_usec;
1063 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1064 if (rdelay > delay)
1065 return -1;
1066 delay /= rdelay;
1067 psched_tick_per_us = delay;
1068 while ((delay>>=1) != 0)
1069 psched_clock_scale++;
1070 psched_us_per_tick = 1<<psched_clock_scale;
1071 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1072 return 0;
1074 #endif
1076 __initfunc(int pktsched_init(void))
1078 #ifdef CONFIG_RTNETLINK
1079 struct rtnetlink_link *link_p;
1080 #endif
1081 #ifdef CONFIG_PROC_FS
1082 struct proc_dir_entry *ent;
1083 #endif
1085 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1086 if (psched_calibrate_clock() < 0)
1087 return -1;
1088 #elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1089 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1090 psched_us_per_tick = 1000000;
1091 #ifdef PSCHED_WATCHER
1092 psched_tick(0);
1093 #endif
1094 #endif
1096 #ifdef CONFIG_RTNETLINK
1097 link_p = rtnetlink_links[PF_UNSPEC];
1099 /* Setup rtnetlink links. It is made here to avoid
1100 exporting large number of public symbols.
1103 if (link_p) {
1104 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1105 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1106 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1107 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1108 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1109 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1110 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1111 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1113 #endif
1115 #define INIT_QDISC(name) { \
1116 extern struct Qdisc_ops name##_qdisc_ops; \
1117 register_qdisc(&##name##_qdisc_ops); \
1120 INIT_QDISC(pfifo);
1121 INIT_QDISC(bfifo);
1123 #ifdef CONFIG_NET_SCH_CBQ
1124 INIT_QDISC(cbq);
1125 #endif
1126 #ifdef CONFIG_NET_SCH_CSZ
1127 INIT_QDISC(csz);
1128 #endif
1129 #ifdef CONFIG_NET_SCH_HPFQ
1130 INIT_QDISC(hpfq);
1131 #endif
1132 #ifdef CONFIG_NET_SCH_HFSC
1133 INIT_QDISC(hfsc);
1134 #endif
1135 #ifdef CONFIG_NET_SCH_RED
1136 INIT_QDISC(red);
1137 #endif
1138 #ifdef CONFIG_NET_SCH_GRED
1139 INIT_QDISC(gred);
1140 #endif
1141 #ifdef CONFIG_NET_SCH_DSMARK
1142 INIT_QDISC(dsmark);
1143 #endif
1144 #ifdef CONFIG_NET_SCH_SFQ
1145 INIT_QDISC(sfq);
1146 #endif
1147 #ifdef CONFIG_NET_SCH_TBF
1148 INIT_QDISC(tbf);
1149 #endif
1150 #ifdef CONFIG_NET_SCH_TEQL
1151 teql_init();
1152 #endif
1153 #ifdef CONFIG_NET_SCH_PRIO
1154 INIT_QDISC(prio);
1155 #endif
1156 #ifdef CONFIG_NET_CLS
1157 tc_filter_init();
1158 #endif
1160 #ifdef CONFIG_PROC_FS
1161 ent = create_proc_entry("net/psched", 0, 0);
1162 ent->read_proc = psched_read_proc;
1163 #endif
1165 return 0;