2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/string.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/interrupt.h>
29 #include <linux/netdevice.h>
30 #include <linux/skbuff.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
33 #include <linux/proc_fs.h>
34 #include <linux/kmod.h>
37 #include <net/pkt_sched.h>
39 #include <asm/processor.h>
40 #include <asm/uaccess.h>
41 #include <asm/system.h>
42 #include <asm/bitops.h>
44 #ifdef CONFIG_RTNETLINK
45 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
, u32 clid
,
46 struct Qdisc
*old
, struct Qdisc
*new);
47 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
48 struct Qdisc
*q
, unsigned long cl
, int event
);
56 This file consists of two interrelated parts:
58 1. queueing disciplines manager frontend.
59 2. traffic classes manager frontend.
61 Generally, queueing discipline ("qdisc") is a black box,
62 which is able to enqueue packets and to dequeue them (when
63 device is ready to send something) in order and at times
64 determined by algorithm hidden in it.
66 qdisc's are divided to two categories:
67 - "queues", which have no internal structure visible from outside.
68 - "schedulers", which split all the packets to "traffic classes",
69 using "packet classifiers" (look at cls_api.c)
71 In turn, classes may have child qdiscs (as rule, queues)
72 attached to them etc. etc. etc.
74 The goal of the routines in this file is to translate
75 information supplied by user in the form of handles
76 to more intelligible for kernel form, to make some sanity
77 checks and part of work, which is common to all qdiscs
78 and to provide rtnetlink notifications.
80 All real intelligent work is done inside qdisc modules.
84 Every discipline has two major routines: enqueue and dequeue.
88 dequeue usually returns a skb to send. It is allowed to return NULL,
89 but it does not mean that queue is empty, it just means that
90 discipline does not want to send anything this time.
91 Queue is really empty if q->q.qlen == 0.
92 For complicated disciplines with multiple queues q->q is not
93 real packet queue, but however q->q.qlen must be valid.
97 enqueue returns 0, if packet was enqueued successfully.
98 If packet (this one or another one) was dropped, it returns
100 NET_XMIT_DROP - this packet dropped
101 Expected action: do not backoff, but wait until queue will clear.
102 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
103 Expected action: backoff or ignore
104 NET_XMIT_POLICED - dropped by police.
105 Expected action: backoff or error to real-time apps.
111 requeues once dequeued packet. It is used for non-standard or
112 just buggy devices, which can defer output even if dev->tbusy=0.
116 returns qdisc to initial state: purge all buffers, clear all
117 timers, counters (except for statistics) etc.
121 initializes newly created qdisc.
125 destroys resources allocated by init and during lifetime of qdisc.
129 changes qdisc parameters.
132 /* Protects list of registered TC modules. It is pure SMP lock. */
133 static rwlock_t qdisc_mod_lock
= RW_LOCK_UNLOCKED
;
136 /************************************************
137 * Queueing disciplines manipulation. *
138 ************************************************/
141 /* The list of all installed queueing disciplines. */
143 static struct Qdisc_ops
*qdisc_base
= NULL
;
145 /* Register/uregister queueing discipline */
147 int register_qdisc(struct Qdisc_ops
*qops
)
149 struct Qdisc_ops
*q
, **qp
;
151 write_lock(&qdisc_mod_lock
);
152 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
) {
153 if (strcmp(qops
->id
, q
->id
) == 0) {
154 write_unlock(&qdisc_mod_lock
);
159 if (qops
->enqueue
== NULL
)
160 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
161 if (qops
->requeue
== NULL
)
162 qops
->requeue
= noop_qdisc_ops
.requeue
;
163 if (qops
->dequeue
== NULL
)
164 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
168 write_unlock(&qdisc_mod_lock
);
172 int unregister_qdisc(struct Qdisc_ops
*qops
)
174 struct Qdisc_ops
*q
, **qp
;
177 write_lock(&qdisc_mod_lock
);
178 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
)
186 write_unlock(&qdisc_mod_lock
);
190 /* We know handle. Find qdisc among all qdisc's attached to device
191 (root qdisc, all its children, children of children etc.)
194 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
198 for (q
= dev
->qdisc_list
; q
; q
= q
->next
) {
199 if (q
->handle
== handle
)
205 struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
209 struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
213 cl
= cops
->get(p
, classid
);
217 leaf
= cops
->leaf(p
, cl
);
222 /* Find queueing discipline by name */
224 struct Qdisc_ops
*qdisc_lookup_ops(struct rtattr
*kind
)
226 struct Qdisc_ops
*q
= NULL
;
229 read_lock(&qdisc_mod_lock
);
230 for (q
= qdisc_base
; q
; q
= q
->next
) {
231 if (rtattr_strcmp(kind
, q
->id
) == 0)
234 read_unlock(&qdisc_mod_lock
);
239 static struct qdisc_rate_table
*qdisc_rtab_list
;
241 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct rtattr
*tab
)
243 struct qdisc_rate_table
*rtab
;
245 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
246 if (memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) == 0) {
252 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 || RTA_PAYLOAD(tab
) != 1024)
255 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
259 memcpy(rtab
->data
, RTA_DATA(tab
), 1024);
260 rtab
->next
= qdisc_rtab_list
;
261 qdisc_rtab_list
= rtab
;
266 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
268 struct qdisc_rate_table
*rtab
, **rtabp
;
270 if (!tab
|| --tab
->refcnt
)
273 for (rtabp
= &qdisc_rtab_list
; (rtab
=*rtabp
) != NULL
; rtabp
= &rtab
->next
) {
283 /* Allocate an unique handle from space managed by kernel */
285 u32
qdisc_alloc_handle(struct net_device
*dev
)
288 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
291 autohandle
+= TC_H_MAKE(0x10000U
, 0);
292 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
293 autohandle
= TC_H_MAKE(0x80000000U
, 0);
294 } while (qdisc_lookup(dev
, autohandle
) && --i
> 0);
296 return i
>0 ? autohandle
: 0;
299 /* Attach toplevel qdisc to device dev */
301 static struct Qdisc
*
302 dev_graft_qdisc(struct net_device
*dev
, struct Qdisc
*qdisc
)
304 struct Qdisc
*oqdisc
;
306 if (dev
->flags
& IFF_UP
)
309 write_lock(&qdisc_tree_lock
);
310 spin_lock_bh(&dev
->queue_lock
);
311 if (qdisc
&& qdisc
->flags
&TCQ_F_INGRES
) {
312 oqdisc
= dev
->qdisc_ingress
;
313 /* Prune old scheduler */
314 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1) {
317 dev
->qdisc_ingress
= NULL
;
319 dev
->qdisc_ingress
= qdisc
;
324 oqdisc
= dev
->qdisc_sleeping
;
326 /* Prune old scheduler */
327 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1)
330 /* ... and graft new one */
333 dev
->qdisc_sleeping
= qdisc
;
334 dev
->qdisc
= &noop_qdisc
;
337 spin_unlock_bh(&dev
->queue_lock
);
338 write_unlock(&qdisc_tree_lock
);
340 if (dev
->flags
& IFF_UP
)
347 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
350 Old qdisc is not destroyed but returned in *old.
353 int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
, u32 classid
,
354 struct Qdisc
*new, struct Qdisc
**old
)
357 struct Qdisc
*q
= *old
;
360 if (parent
== NULL
) {
361 if (q
&& q
->flags
&TCQ_F_INGRES
) {
362 *old
= dev_graft_qdisc(dev
, q
);
364 *old
= dev_graft_qdisc(dev
, new);
367 struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
372 unsigned long cl
= cops
->get(parent
, classid
);
374 err
= cops
->graft(parent
, cl
, new, old
);
375 cops
->put(parent
, cl
);
382 #ifdef CONFIG_RTNETLINK
385 Allocate and initialize new qdisc.
387 Parameters are passed via opt.
390 static struct Qdisc
*
391 qdisc_create(struct net_device
*dev
, u32 handle
, struct rtattr
**tca
, int *errp
)
394 struct rtattr
*kind
= tca
[TCA_KIND
-1];
395 struct Qdisc
*sch
= NULL
;
396 struct Qdisc_ops
*ops
;
399 ops
= qdisc_lookup_ops(kind
);
401 if (ops
==NULL
&& tca
[TCA_KIND
-1] != NULL
) {
402 char module_name
[4 + IFNAMSIZ
+ 1];
404 if (RTA_PAYLOAD(kind
) <= IFNAMSIZ
) {
405 sprintf(module_name
, "sch_%s", (char*)RTA_DATA(kind
));
406 request_module (module_name
);
407 ops
= qdisc_lookup_ops(kind
);
416 size
= sizeof(*sch
) + ops
->priv_size
;
418 sch
= kmalloc(size
, GFP_KERNEL
);
423 /* Grrr... Resolve race condition with module unload */
426 if (ops
!= qdisc_lookup_ops(kind
))
429 memset(sch
, 0, size
);
431 skb_queue_head_init(&sch
->q
);
433 if (handle
== TC_H_INGRESS
)
434 sch
->flags
|= TCQ_F_INGRES
;
437 sch
->enqueue
= ops
->enqueue
;
438 sch
->dequeue
= ops
->dequeue
;
440 atomic_set(&sch
->refcnt
, 1);
441 sch
->stats
.lock
= &dev
->queue_lock
;
443 handle
= qdisc_alloc_handle(dev
);
449 if (handle
== TC_H_INGRESS
)
450 sch
->handle
=TC_H_MAKE(TC_H_INGRESS
, 0);
452 sch
->handle
= handle
;
454 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
-1])) == 0) {
455 write_lock(&qdisc_tree_lock
);
456 sch
->next
= dev
->qdisc_list
;
457 dev
->qdisc_list
= sch
;
458 write_unlock(&qdisc_tree_lock
);
459 #ifdef CONFIG_NET_ESTIMATOR
461 qdisc_new_estimator(&sch
->stats
, tca
[TCA_RATE
-1]);
473 static int qdisc_change(struct Qdisc
*sch
, struct rtattr
**tca
)
475 if (tca
[TCA_OPTIONS
-1]) {
478 if (sch
->ops
->change
== NULL
)
480 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
-1]);
484 #ifdef CONFIG_NET_ESTIMATOR
485 if (tca
[TCA_RATE
-1]) {
486 qdisc_kill_estimator(&sch
->stats
);
487 qdisc_new_estimator(&sch
->stats
, tca
[TCA_RATE
-1]);
493 struct check_loop_arg
495 struct qdisc_walker w
;
500 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
502 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
504 struct check_loop_arg arg
;
506 if (q
->ops
->cl_ops
== NULL
)
509 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
510 arg
.w
.fn
= check_loop_fn
;
513 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
514 return arg
.w
.stop
? -ELOOP
: 0;
518 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
521 struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
522 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
524 leaf
= cops
->leaf(q
, cl
);
526 if (leaf
== arg
->p
|| arg
->depth
> 7)
528 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
537 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
539 struct tcmsg
*tcm
= NLMSG_DATA(n
);
540 struct rtattr
**tca
= arg
;
541 struct net_device
*dev
;
542 u32 clid
= tcm
->tcm_parent
;
543 struct Qdisc
*q
= NULL
;
544 struct Qdisc
*p
= NULL
;
547 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
551 if (clid
!= TC_H_ROOT
) {
552 if (TC_H_MAJ(clid
) != TC_H_MAJ(TC_H_INGRESS
)) {
553 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
555 q
= qdisc_leaf(p
, clid
);
556 } else { /* ingress */
557 q
= dev
->qdisc_ingress
;
560 q
= dev
->qdisc_sleeping
;
565 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
568 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
572 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
575 if (n
->nlmsg_type
== RTM_DELQDISC
) {
580 if ((err
= qdisc_graft(dev
, p
, clid
, NULL
, &q
)) != 0)
583 qdisc_notify(skb
, n
, clid
, q
, NULL
);
584 spin_lock_bh(&dev
->queue_lock
);
586 spin_unlock_bh(&dev
->queue_lock
);
589 qdisc_notify(skb
, n
, clid
, NULL
, q
);
598 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
600 struct tcmsg
*tcm
= NLMSG_DATA(n
);
601 struct rtattr
**tca
= arg
;
602 struct net_device
*dev
;
603 u32 clid
= tcm
->tcm_parent
;
604 struct Qdisc
*q
= NULL
;
605 struct Qdisc
*p
= NULL
;
608 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
612 if (clid
!= TC_H_ROOT
) {
613 if (clid
!= TC_H_INGRESS
) {
614 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
616 q
= qdisc_leaf(p
, clid
);
617 } else { /*ingress */
618 q
= dev
->qdisc_ingress
;
621 q
= dev
->qdisc_sleeping
;
624 /* It may be default qdisc, ignore it */
625 if (q
&& q
->handle
== 0)
628 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
629 if (tcm
->tcm_handle
) {
630 if (q
&& !(n
->nlmsg_flags
&NLM_F_REPLACE
))
632 if (TC_H_MIN(tcm
->tcm_handle
))
634 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
636 if (n
->nlmsg_flags
&NLM_F_EXCL
)
638 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
641 (p
&& check_loop(q
, p
, 0)))
643 atomic_inc(&q
->refcnt
);
649 /* This magic test requires explanation.
651 * We know, that some child q is already
652 * attached to this parent and have choice:
653 * either to change it or to create/graft new one.
655 * 1. We are allowed to create/graft only
656 * if CREATE and REPLACE flags are set.
658 * 2. If EXCL is set, requestor wanted to say,
659 * that qdisc tcm_handle is not expected
660 * to exist, so that we choose create/graft too.
662 * 3. The last case is when no flags are set.
663 * Alas, it is sort of hole in API, we
664 * cannot decide what to do unambiguously.
665 * For now we select create/graft, if
666 * user gave KIND, which does not match existing.
668 if ((n
->nlmsg_flags
&NLM_F_CREATE
) &&
669 (n
->nlmsg_flags
&NLM_F_REPLACE
) &&
670 ((n
->nlmsg_flags
&NLM_F_EXCL
) ||
672 rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))))
677 if (!tcm
->tcm_handle
)
679 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
682 /* Change qdisc parameters */
685 if (n
->nlmsg_flags
&NLM_F_EXCL
)
687 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
689 err
= qdisc_change(q
, tca
);
691 qdisc_notify(skb
, n
, clid
, NULL
, q
);
695 if (!(n
->nlmsg_flags
&NLM_F_CREATE
))
697 if (clid
== TC_H_INGRESS
)
698 q
= qdisc_create(dev
, tcm
->tcm_parent
, tca
, &err
);
700 q
= qdisc_create(dev
, tcm
->tcm_handle
, tca
, &err
);
706 struct Qdisc
*old_q
= NULL
;
707 err
= qdisc_graft(dev
, p
, clid
, q
, &old_q
);
710 spin_lock_bh(&dev
->queue_lock
);
712 spin_unlock_bh(&dev
->queue_lock
);
716 qdisc_notify(skb
, n
, clid
, old_q
, q
);
718 spin_lock_bh(&dev
->queue_lock
);
719 qdisc_destroy(old_q
);
720 spin_unlock_bh(&dev
->queue_lock
);
726 int qdisc_copy_stats(struct sk_buff
*skb
, struct tc_stats
*st
)
728 spin_lock_bh(st
->lock
);
729 RTA_PUT(skb
, TCA_STATS
, (char*)&st
->lock
- (char*)st
, st
);
730 spin_unlock_bh(st
->lock
);
734 spin_unlock_bh(st
->lock
);
739 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
740 u32 pid
, u32 seq
, unsigned flags
, int event
)
743 struct nlmsghdr
*nlh
;
744 unsigned char *b
= skb
->tail
;
746 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*tcm
));
747 nlh
->nlmsg_flags
= flags
;
748 tcm
= NLMSG_DATA(nlh
);
749 tcm
->tcm_family
= AF_UNSPEC
;
750 tcm
->tcm_ifindex
= q
->dev
? q
->dev
->ifindex
: 0;
751 tcm
->tcm_parent
= clid
;
752 tcm
->tcm_handle
= q
->handle
;
753 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
754 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
755 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
757 q
->stats
.qlen
= q
->q
.qlen
;
758 if (qdisc_copy_stats(skb
, &q
->stats
))
760 nlh
->nlmsg_len
= skb
->tail
- b
;
765 skb_trim(skb
, b
- skb
->data
);
769 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
770 u32 clid
, struct Qdisc
*old
, struct Qdisc
*new)
773 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
775 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
779 if (old
&& old
->handle
) {
780 if (tc_fill_qdisc(skb
, old
, clid
, pid
, n
->nlmsg_seq
, 0, RTM_DELQDISC
) < 0)
784 if (tc_fill_qdisc(skb
, new, clid
, pid
, n
->nlmsg_seq
, old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
789 return rtnetlink_send(skb
, pid
, RTMGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
796 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
800 struct net_device
*dev
;
804 s_q_idx
= q_idx
= cb
->args
[1];
805 read_lock(&dev_base_lock
);
806 for (dev
=dev_base
, idx
=0; dev
; dev
= dev
->next
, idx
++) {
811 read_lock(&qdisc_tree_lock
);
812 for (q
= dev
->qdisc_list
, q_idx
= 0; q
;
813 q
= q
->next
, q_idx
++) {
816 if (tc_fill_qdisc(skb
, q
, 0, NETLINK_CB(cb
->skb
).pid
,
817 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0) {
818 read_unlock(&qdisc_tree_lock
);
822 read_unlock(&qdisc_tree_lock
);
826 read_unlock(&dev_base_lock
);
836 /************************************************
837 * Traffic classes manipulation. *
838 ************************************************/
842 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
844 struct tcmsg
*tcm
= NLMSG_DATA(n
);
845 struct rtattr
**tca
= arg
;
846 struct net_device
*dev
;
847 struct Qdisc
*q
= NULL
;
848 struct Qdisc_class_ops
*cops
;
849 unsigned long cl
= 0;
850 unsigned long new_cl
;
851 u32 pid
= tcm
->tcm_parent
;
852 u32 clid
= tcm
->tcm_handle
;
853 u32 qid
= TC_H_MAJ(clid
);
856 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
860 parent == TC_H_UNSPEC - unspecified parent.
861 parent == TC_H_ROOT - class is root, which has no parent.
862 parent == X:0 - parent is root class.
863 parent == X:Y - parent is a node in hierarchy.
864 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
866 handle == 0:0 - generate handle from kernel pool.
867 handle == 0:Y - class is X:Y, where X:0 is qdisc.
868 handle == X:Y - clear.
869 handle == X:0 - root class.
872 /* Step 1. Determine qdisc handle X:0 */
874 if (pid
!= TC_H_ROOT
) {
875 u32 qid1
= TC_H_MAJ(pid
);
878 /* If both majors are known, they must be identical. */
884 qid
= dev
->qdisc_sleeping
->handle
;
886 /* Now qid is genuine qdisc handle consistent
887 both with parent and child.
889 TC_H_MAJ(pid) still may be unspecified, complete it now.
892 pid
= TC_H_MAKE(qid
, pid
);
895 qid
= dev
->qdisc_sleeping
->handle
;
898 /* OK. Locate qdisc */
899 if ((q
= qdisc_lookup(dev
, qid
)) == NULL
)
902 /* An check that it supports classes */
903 cops
= q
->ops
->cl_ops
;
907 /* Now try to get class */
909 if (pid
== TC_H_ROOT
)
912 clid
= TC_H_MAKE(qid
, clid
);
915 cl
= cops
->get(q
, clid
);
919 if (n
->nlmsg_type
!= RTM_NEWTCLASS
|| !(n
->nlmsg_flags
&NLM_F_CREATE
))
922 switch (n
->nlmsg_type
) {
925 if (n
->nlmsg_flags
&NLM_F_EXCL
)
929 err
= cops
->delete(q
, cl
);
931 tclass_notify(skb
, n
, q
, cl
, RTM_DELTCLASS
);
934 err
= tclass_notify(skb
, n
, q
, cl
, RTM_NEWTCLASS
);
943 err
= cops
->change(q
, clid
, pid
, tca
, &new_cl
);
945 tclass_notify(skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
955 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
957 u32 pid
, u32 seq
, unsigned flags
, int event
)
960 struct nlmsghdr
*nlh
;
961 unsigned char *b
= skb
->tail
;
963 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*tcm
));
964 nlh
->nlmsg_flags
= flags
;
965 tcm
= NLMSG_DATA(nlh
);
966 tcm
->tcm_family
= AF_UNSPEC
;
967 tcm
->tcm_ifindex
= q
->dev
? q
->dev
->ifindex
: 0;
968 tcm
->tcm_parent
= q
->handle
;
969 tcm
->tcm_handle
= q
->handle
;
971 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
972 if (q
->ops
->cl_ops
->dump
&& q
->ops
->cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
974 nlh
->nlmsg_len
= skb
->tail
- b
;
979 skb_trim(skb
, b
- skb
->data
);
983 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
984 struct Qdisc
*q
, unsigned long cl
, int event
)
987 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
989 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
993 if (tc_fill_tclass(skb
, q
, cl
, pid
, n
->nlmsg_seq
, 0, event
) < 0) {
998 return rtnetlink_send(skb
, pid
, RTMGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
1001 struct qdisc_dump_args
1003 struct qdisc_walker w
;
1004 struct sk_buff
*skb
;
1005 struct netlink_callback
*cb
;
1008 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
1010 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
1012 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).pid
,
1013 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
1016 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1020 struct net_device
*dev
;
1022 struct tcmsg
*tcm
= (struct tcmsg
*)NLMSG_DATA(cb
->nlh
);
1023 struct qdisc_dump_args arg
;
1025 if (cb
->nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*tcm
)))
1027 if ((dev
= dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
1032 read_lock(&qdisc_tree_lock
);
1033 for (q
=dev
->qdisc_list
, t
=0; q
; q
= q
->next
, t
++) {
1034 if (t
< s_t
) continue;
1035 if (!q
->ops
->cl_ops
) continue;
1036 if (tcm
->tcm_parent
&& TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)
1039 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1040 arg
.w
.fn
= qdisc_class_dump
;
1044 arg
.w
.skip
= cb
->args
[1];
1046 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1047 cb
->args
[1] = arg
.w
.count
;
1051 read_unlock(&qdisc_tree_lock
);
1060 int psched_us_per_tick
= 1;
1061 int psched_tick_per_us
= 1;
1063 #ifdef CONFIG_PROC_FS
1064 static int psched_read_proc(char *buffer
, char **start
, off_t offset
,
1065 int length
, int *eof
, void *data
)
1069 len
= sprintf(buffer
, "%08x %08x %08x %08x\n",
1070 psched_tick_per_us
, psched_us_per_tick
,
1080 *start
= buffer
+ offset
;
1087 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
1088 int psched_tod_diff(int delta_sec
, int bound
)
1092 if (bound
<= 1000000 || delta_sec
> (0x7FFFFFFF/1000000)-1)
1094 delta
= delta_sec
* 1000000;
1101 psched_time_t psched_time_base
;
1103 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1104 psched_tdiff_t psched_clock_per_hz
;
1105 int psched_clock_scale
;
1108 #ifdef PSCHED_WATCHER
1109 PSCHED_WATCHER psched_time_mark
;
1111 static void psched_tick(unsigned long);
1113 static struct timer_list psched_timer
=
1114 { function
: psched_tick
};
1116 static void psched_tick(unsigned long dummy
)
1118 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1119 psched_time_t dummy_stamp
;
1120 PSCHED_GET_TIME(dummy_stamp
);
1121 /* It is OK up to 4GHz cpu */
1122 psched_timer
.expires
= jiffies
+ 1*HZ
;
1124 unsigned long now
= jiffies
;
1125 psched_time_base
= ((u64
)now
)<<PSCHED_JSCALE
;
1126 psched_time_mark
= now
;
1127 psched_timer
.expires
= now
+ 60*60*HZ
;
1129 add_timer(&psched_timer
);
1133 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1134 int __init
psched_calibrate_clock(void)
1136 psched_time_t stamp
, stamp1
;
1137 struct timeval tv
, tv1
;
1138 psched_tdiff_t delay
;
1142 #ifdef PSCHED_WATCHER
1145 stop
= jiffies
+ HZ
/10;
1146 PSCHED_GET_TIME(stamp
);
1147 do_gettimeofday(&tv
);
1148 while (time_before(jiffies
, stop
))
1150 PSCHED_GET_TIME(stamp1
);
1151 do_gettimeofday(&tv1
);
1153 delay
= PSCHED_TDIFF(stamp1
, stamp
);
1154 rdelay
= tv1
.tv_usec
- tv
.tv_usec
;
1155 rdelay
+= (tv1
.tv_sec
- tv
.tv_sec
)*1000000;
1159 psched_tick_per_us
= delay
;
1160 while ((delay
>>=1) != 0)
1161 psched_clock_scale
++;
1162 psched_us_per_tick
= 1<<psched_clock_scale
;
1163 psched_clock_per_hz
= (psched_tick_per_us
*(1000000/HZ
))>>psched_clock_scale
;
1168 int __init
pktsched_init(void)
1170 #ifdef CONFIG_RTNETLINK
1171 struct rtnetlink_link
*link_p
;
1174 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1175 if (psched_calibrate_clock() < 0)
1177 #elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1178 psched_tick_per_us
= HZ
<<PSCHED_JSCALE
;
1179 psched_us_per_tick
= 1000000;
1180 #ifdef PSCHED_WATCHER
1185 #ifdef CONFIG_RTNETLINK
1186 link_p
= rtnetlink_links
[PF_UNSPEC
];
1188 /* Setup rtnetlink links. It is made here to avoid
1189 exporting large number of public symbols.
1193 link_p
[RTM_NEWQDISC
-RTM_BASE
].doit
= tc_modify_qdisc
;
1194 link_p
[RTM_DELQDISC
-RTM_BASE
].doit
= tc_get_qdisc
;
1195 link_p
[RTM_GETQDISC
-RTM_BASE
].doit
= tc_get_qdisc
;
1196 link_p
[RTM_GETQDISC
-RTM_BASE
].dumpit
= tc_dump_qdisc
;
1197 link_p
[RTM_NEWTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1198 link_p
[RTM_DELTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1199 link_p
[RTM_GETTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1200 link_p
[RTM_GETTCLASS
-RTM_BASE
].dumpit
= tc_dump_tclass
;
1204 #define INIT_QDISC(name) { \
1205 extern struct Qdisc_ops name##_qdisc_ops; \
1206 register_qdisc(&##name##_qdisc_ops); \
1212 #ifdef CONFIG_NET_SCH_CBQ
1215 #ifdef CONFIG_NET_SCH_CSZ
1218 #ifdef CONFIG_NET_SCH_HPFQ
1221 #ifdef CONFIG_NET_SCH_HFSC
1224 #ifdef CONFIG_NET_SCH_RED
1227 #ifdef CONFIG_NET_SCH_GRED
1230 #ifdef CONFIG_NET_SCH_INGRESS
1231 INIT_QDISC(ingress
);
1233 #ifdef CONFIG_NET_SCH_DSMARK
1236 #ifdef CONFIG_NET_SCH_SFQ
1239 #ifdef CONFIG_NET_SCH_TBF
1242 #ifdef CONFIG_NET_SCH_TEQL
1245 #ifdef CONFIG_NET_SCH_PRIO
1248 #ifdef CONFIG_NET_SCH_ATM
1251 #ifdef CONFIG_NET_CLS
1255 #ifdef CONFIG_PROC_FS
1256 create_proc_read_entry("net/psched", 0, 0, psched_read_proc
, NULL
);