2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/interrupt.h>
30 #include <linux/netdevice.h>
31 #include <linux/skbuff.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/init.h>
34 #include <linux/proc_fs.h>
35 #include <linux/seq_file.h>
36 #include <linux/kmod.h>
37 #include <linux/list.h>
40 #include <net/pkt_sched.h>
42 #include <asm/processor.h>
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
45 #include <asm/bitops.h>
47 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
, u32 clid
,
48 struct Qdisc
*old
, struct Qdisc
*new);
49 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
50 struct Qdisc
*q
, unsigned long cl
, int event
);
57 This file consists of two interrelated parts:
59 1. queueing disciplines manager frontend.
60 2. traffic classes manager frontend.
62 Generally, queueing discipline ("qdisc") is a black box,
63 which is able to enqueue packets and to dequeue them (when
64 device is ready to send something) in order and at times
65 determined by algorithm hidden in it.
67 qdisc's are divided to two categories:
68 - "queues", which have no internal structure visible from outside.
69 - "schedulers", which split all the packets to "traffic classes",
70 using "packet classifiers" (look at cls_api.c)
72 In turn, classes may have child qdiscs (as rule, queues)
73 attached to them etc. etc. etc.
75 The goal of the routines in this file is to translate
76 information supplied by user in the form of handles
77 to more intelligible for kernel form, to make some sanity
78 checks and part of work, which is common to all qdiscs
79 and to provide rtnetlink notifications.
81 All real intelligent work is done inside qdisc modules.
85 Every discipline has two major routines: enqueue and dequeue.
89 dequeue usually returns a skb to send. It is allowed to return NULL,
90 but it does not mean that queue is empty, it just means that
91 discipline does not want to send anything this time.
92 Queue is really empty if q->q.qlen == 0.
93 For complicated disciplines with multiple queues q->q is not
94 real packet queue, but however q->q.qlen must be valid.
98 enqueue returns 0, if packet was enqueued successfully.
99 If packet (this one or another one) was dropped, it returns
101 NET_XMIT_DROP - this packet dropped
102 Expected action: do not backoff, but wait until queue will clear.
103 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
104 Expected action: backoff or ignore
105 NET_XMIT_POLICED - dropped by police.
106 Expected action: backoff or error to real-time apps.
112 requeues once dequeued packet. It is used for non-standard or
113 just buggy devices, which can defer output even if dev->tbusy=0.
117 returns qdisc to initial state: purge all buffers, clear all
118 timers, counters (except for statistics) etc.
122 initializes newly created qdisc.
126 destroys resources allocated by init and during lifetime of qdisc.
130 changes qdisc parameters.
133 /* Protects list of registered TC modules. It is pure SMP lock. */
134 static rwlock_t qdisc_mod_lock
= RW_LOCK_UNLOCKED
;
137 /************************************************
138 * Queueing disciplines manipulation. *
139 ************************************************/
142 /* The list of all installed queueing disciplines. */
144 static struct Qdisc_ops
*qdisc_base
;
146 /* Register/uregister queueing discipline */
148 int register_qdisc(struct Qdisc_ops
*qops
)
150 struct Qdisc_ops
*q
, **qp
;
153 write_lock(&qdisc_mod_lock
);
154 for (qp
= &qdisc_base
; (q
= *qp
) != NULL
; qp
= &q
->next
)
155 if (!strcmp(qops
->id
, q
->id
))
158 if (qops
->enqueue
== NULL
)
159 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
160 if (qops
->requeue
== NULL
)
161 qops
->requeue
= noop_qdisc_ops
.requeue
;
162 if (qops
->dequeue
== NULL
)
163 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
169 write_unlock(&qdisc_mod_lock
);
173 int unregister_qdisc(struct Qdisc_ops
*qops
)
175 struct Qdisc_ops
*q
, **qp
;
178 write_lock(&qdisc_mod_lock
);
179 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
)
187 write_unlock(&qdisc_mod_lock
);
191 /* We know handle. Find qdisc among all qdisc's attached to device
192 (root qdisc, all its children, children of children etc.)
195 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
199 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
200 if (q
->handle
== handle
)
206 struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
210 struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
214 cl
= cops
->get(p
, classid
);
218 leaf
= cops
->leaf(p
, cl
);
223 /* Find queueing discipline by name */
225 struct Qdisc_ops
*qdisc_lookup_ops(struct rtattr
*kind
)
227 struct Qdisc_ops
*q
= NULL
;
230 read_lock(&qdisc_mod_lock
);
231 for (q
= qdisc_base
; q
; q
= q
->next
) {
232 if (rtattr_strcmp(kind
, q
->id
) == 0)
235 read_unlock(&qdisc_mod_lock
);
240 static struct qdisc_rate_table
*qdisc_rtab_list
;
242 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct rtattr
*tab
)
244 struct qdisc_rate_table
*rtab
;
246 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
247 if (memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) == 0) {
253 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 || RTA_PAYLOAD(tab
) != 1024)
256 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
260 memcpy(rtab
->data
, RTA_DATA(tab
), 1024);
261 rtab
->next
= qdisc_rtab_list
;
262 qdisc_rtab_list
= rtab
;
267 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
269 struct qdisc_rate_table
*rtab
, **rtabp
;
271 if (!tab
|| --tab
->refcnt
)
274 for (rtabp
= &qdisc_rtab_list
; (rtab
=*rtabp
) != NULL
; rtabp
= &rtab
->next
) {
284 /* Allocate an unique handle from space managed by kernel */
286 u32
qdisc_alloc_handle(struct net_device
*dev
)
289 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
292 autohandle
+= TC_H_MAKE(0x10000U
, 0);
293 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
294 autohandle
= TC_H_MAKE(0x80000000U
, 0);
295 } while (qdisc_lookup(dev
, autohandle
) && --i
> 0);
297 return i
>0 ? autohandle
: 0;
300 /* Attach toplevel qdisc to device dev */
302 static struct Qdisc
*
303 dev_graft_qdisc(struct net_device
*dev
, struct Qdisc
*qdisc
)
305 struct Qdisc
*oqdisc
;
307 if (dev
->flags
& IFF_UP
)
310 qdisc_lock_tree(dev
);
311 if (qdisc
&& qdisc
->flags
&TCQ_F_INGRES
) {
312 oqdisc
= dev
->qdisc_ingress
;
313 /* Prune old scheduler */
314 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1) {
317 dev
->qdisc_ingress
= NULL
;
319 dev
->qdisc_ingress
= qdisc
;
324 oqdisc
= dev
->qdisc_sleeping
;
326 /* Prune old scheduler */
327 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1)
330 /* ... and graft new one */
333 dev
->qdisc_sleeping
= qdisc
;
334 dev
->qdisc
= &noop_qdisc
;
337 qdisc_unlock_tree(dev
);
339 if (dev
->flags
& IFF_UP
)
346 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
349 Old qdisc is not destroyed but returned in *old.
352 int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
, u32 classid
,
353 struct Qdisc
*new, struct Qdisc
**old
)
356 struct Qdisc
*q
= *old
;
359 if (parent
== NULL
) {
360 if (q
&& q
->flags
&TCQ_F_INGRES
) {
361 *old
= dev_graft_qdisc(dev
, q
);
363 *old
= dev_graft_qdisc(dev
, new);
366 struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
371 unsigned long cl
= cops
->get(parent
, classid
);
373 err
= cops
->graft(parent
, cl
, new, old
);
375 new->parent
= classid
;
376 cops
->put(parent
, cl
);
384 Allocate and initialize new qdisc.
386 Parameters are passed via opt.
389 static struct Qdisc
*
390 qdisc_create(struct net_device
*dev
, u32 handle
, struct rtattr
**tca
, int *errp
)
393 struct rtattr
*kind
= tca
[TCA_KIND
-1];
396 struct Qdisc_ops
*ops
;
399 ops
= qdisc_lookup_ops(kind
);
401 if (ops
==NULL
&& tca
[TCA_KIND
-1] != NULL
) {
402 if (RTA_PAYLOAD(kind
) <= IFNAMSIZ
) {
403 request_module("sch_%s", (char*)RTA_DATA(kind
));
404 ops
= qdisc_lookup_ops(kind
);
413 if (!try_module_get(ops
->owner
))
416 /* ensure that the Qdisc and the private data are 32-byte aligned */
417 size
= ((sizeof(*sch
) + QDISC_ALIGN_CONST
) & ~QDISC_ALIGN_CONST
);
418 size
+= ops
->priv_size
+ QDISC_ALIGN_CONST
;
420 p
= kmalloc(size
, GFP_KERNEL
);
425 sch
= (struct Qdisc
*)(((unsigned long)p
+ QDISC_ALIGN_CONST
)
426 & ~QDISC_ALIGN_CONST
);
427 sch
->padded
= (char *)sch
- (char *)p
;
429 INIT_LIST_HEAD(&sch
->list
);
430 skb_queue_head_init(&sch
->q
);
432 if (handle
== TC_H_INGRESS
)
433 sch
->flags
|= TCQ_F_INGRES
;
436 sch
->enqueue
= ops
->enqueue
;
437 sch
->dequeue
= ops
->dequeue
;
440 atomic_set(&sch
->refcnt
, 1);
441 sch
->stats_lock
= &dev
->queue_lock
;
443 handle
= qdisc_alloc_handle(dev
);
449 if (handle
== TC_H_INGRESS
)
450 sch
->handle
=TC_H_MAKE(TC_H_INGRESS
, 0);
452 sch
->handle
= handle
;
454 /* enqueue is accessed locklessly - make sure it's visible
455 * before we set a netdevice's qdisc pointer to sch */
457 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
-1])) == 0) {
458 qdisc_lock_tree(dev
);
459 list_add_tail(&sch
->list
, &dev
->qdisc_list
);
460 qdisc_unlock_tree(dev
);
462 #ifdef CONFIG_NET_ESTIMATOR
464 qdisc_new_estimator(&sch
->stats
, sch
->stats_lock
,
472 module_put(ops
->owner
);
480 static int qdisc_change(struct Qdisc
*sch
, struct rtattr
**tca
)
482 if (tca
[TCA_OPTIONS
-1]) {
485 if (sch
->ops
->change
== NULL
)
487 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
-1]);
491 #ifdef CONFIG_NET_ESTIMATOR
492 if (tca
[TCA_RATE
-1]) {
493 qdisc_kill_estimator(&sch
->stats
);
494 qdisc_new_estimator(&sch
->stats
, sch
->stats_lock
,
501 struct check_loop_arg
503 struct qdisc_walker w
;
508 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
510 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
512 struct check_loop_arg arg
;
514 if (q
->ops
->cl_ops
== NULL
)
517 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
518 arg
.w
.fn
= check_loop_fn
;
521 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
522 return arg
.w
.stop
? -ELOOP
: 0;
526 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
529 struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
530 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
532 leaf
= cops
->leaf(q
, cl
);
534 if (leaf
== arg
->p
|| arg
->depth
> 7)
536 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
545 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
547 struct tcmsg
*tcm
= NLMSG_DATA(n
);
548 struct rtattr
**tca
= arg
;
549 struct net_device
*dev
;
550 u32 clid
= tcm
->tcm_parent
;
551 struct Qdisc
*q
= NULL
;
552 struct Qdisc
*p
= NULL
;
555 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
559 if (clid
!= TC_H_ROOT
) {
560 if (TC_H_MAJ(clid
) != TC_H_MAJ(TC_H_INGRESS
)) {
561 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
563 q
= qdisc_leaf(p
, clid
);
564 } else { /* ingress */
565 q
= dev
->qdisc_ingress
;
568 q
= dev
->qdisc_sleeping
;
573 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
576 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
580 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
583 if (n
->nlmsg_type
== RTM_DELQDISC
) {
588 if ((err
= qdisc_graft(dev
, p
, clid
, NULL
, &q
)) != 0)
591 qdisc_notify(skb
, n
, clid
, q
, NULL
);
592 spin_lock_bh(&dev
->queue_lock
);
594 spin_unlock_bh(&dev
->queue_lock
);
597 qdisc_notify(skb
, n
, clid
, NULL
, q
);
606 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
608 struct tcmsg
*tcm
= NLMSG_DATA(n
);
609 struct rtattr
**tca
= arg
;
610 struct net_device
*dev
;
611 u32 clid
= tcm
->tcm_parent
;
612 struct Qdisc
*q
= NULL
;
613 struct Qdisc
*p
= NULL
;
616 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
620 if (clid
!= TC_H_ROOT
) {
621 if (clid
!= TC_H_INGRESS
) {
622 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
624 q
= qdisc_leaf(p
, clid
);
625 } else { /*ingress */
626 q
= dev
->qdisc_ingress
;
629 q
= dev
->qdisc_sleeping
;
632 /* It may be default qdisc, ignore it */
633 if (q
&& q
->handle
== 0)
636 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
637 if (tcm
->tcm_handle
) {
638 if (q
&& !(n
->nlmsg_flags
&NLM_F_REPLACE
))
640 if (TC_H_MIN(tcm
->tcm_handle
))
642 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
644 if (n
->nlmsg_flags
&NLM_F_EXCL
)
646 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
649 (p
&& check_loop(q
, p
, 0)))
651 atomic_inc(&q
->refcnt
);
657 /* This magic test requires explanation.
659 * We know, that some child q is already
660 * attached to this parent and have choice:
661 * either to change it or to create/graft new one.
663 * 1. We are allowed to create/graft only
664 * if CREATE and REPLACE flags are set.
666 * 2. If EXCL is set, requestor wanted to say,
667 * that qdisc tcm_handle is not expected
668 * to exist, so that we choose create/graft too.
670 * 3. The last case is when no flags are set.
671 * Alas, it is sort of hole in API, we
672 * cannot decide what to do unambiguously.
673 * For now we select create/graft, if
674 * user gave KIND, which does not match existing.
676 if ((n
->nlmsg_flags
&NLM_F_CREATE
) &&
677 (n
->nlmsg_flags
&NLM_F_REPLACE
) &&
678 ((n
->nlmsg_flags
&NLM_F_EXCL
) ||
680 rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))))
685 if (!tcm
->tcm_handle
)
687 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
690 /* Change qdisc parameters */
693 if (n
->nlmsg_flags
&NLM_F_EXCL
)
695 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
697 err
= qdisc_change(q
, tca
);
699 qdisc_notify(skb
, n
, clid
, NULL
, q
);
703 if (!(n
->nlmsg_flags
&NLM_F_CREATE
))
705 if (clid
== TC_H_INGRESS
)
706 q
= qdisc_create(dev
, tcm
->tcm_parent
, tca
, &err
);
708 q
= qdisc_create(dev
, tcm
->tcm_handle
, tca
, &err
);
714 struct Qdisc
*old_q
= NULL
;
715 err
= qdisc_graft(dev
, p
, clid
, q
, &old_q
);
718 spin_lock_bh(&dev
->queue_lock
);
720 spin_unlock_bh(&dev
->queue_lock
);
724 qdisc_notify(skb
, n
, clid
, old_q
, q
);
726 spin_lock_bh(&dev
->queue_lock
);
727 qdisc_destroy(old_q
);
728 spin_unlock_bh(&dev
->queue_lock
);
734 int qdisc_copy_stats(struct sk_buff
*skb
, struct tc_stats
*st
, spinlock_t
*lock
)
737 RTA_PUT(skb
, TCA_STATS
, sizeof(struct tc_stats
), st
);
738 spin_unlock_bh(lock
);
742 spin_unlock_bh(lock
);
747 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
748 u32 pid
, u32 seq
, unsigned flags
, int event
)
751 struct nlmsghdr
*nlh
;
752 unsigned char *b
= skb
->tail
;
754 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*tcm
));
755 nlh
->nlmsg_flags
= flags
;
756 tcm
= NLMSG_DATA(nlh
);
757 tcm
->tcm_family
= AF_UNSPEC
;
758 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
759 tcm
->tcm_parent
= clid
;
760 tcm
->tcm_handle
= q
->handle
;
761 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
762 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
763 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
765 q
->stats
.qlen
= q
->q
.qlen
;
766 if (qdisc_copy_stats(skb
, &q
->stats
, q
->stats_lock
))
768 nlh
->nlmsg_len
= skb
->tail
- b
;
773 skb_trim(skb
, b
- skb
->data
);
777 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
778 u32 clid
, struct Qdisc
*old
, struct Qdisc
*new)
781 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
783 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
787 if (old
&& old
->handle
) {
788 if (tc_fill_qdisc(skb
, old
, clid
, pid
, n
->nlmsg_seq
, 0, RTM_DELQDISC
) < 0)
792 if (tc_fill_qdisc(skb
, new, clid
, pid
, n
->nlmsg_seq
, old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
797 return rtnetlink_send(skb
, pid
, RTMGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
804 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
808 struct net_device
*dev
;
812 s_q_idx
= q_idx
= cb
->args
[1];
813 read_lock(&dev_base_lock
);
814 for (dev
=dev_base
, idx
=0; dev
; dev
= dev
->next
, idx
++) {
819 read_lock_bh(&qdisc_tree_lock
);
821 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
822 if (q_idx
< s_q_idx
) {
826 if (tc_fill_qdisc(skb
, q
, q
->parent
, NETLINK_CB(cb
->skb
).pid
,
827 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0) {
828 read_unlock_bh(&qdisc_tree_lock
);
833 read_unlock_bh(&qdisc_tree_lock
);
837 read_unlock(&dev_base_lock
);
847 /************************************************
848 * Traffic classes manipulation. *
849 ************************************************/
853 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
855 struct tcmsg
*tcm
= NLMSG_DATA(n
);
856 struct rtattr
**tca
= arg
;
857 struct net_device
*dev
;
858 struct Qdisc
*q
= NULL
;
859 struct Qdisc_class_ops
*cops
;
860 unsigned long cl
= 0;
861 unsigned long new_cl
;
862 u32 pid
= tcm
->tcm_parent
;
863 u32 clid
= tcm
->tcm_handle
;
864 u32 qid
= TC_H_MAJ(clid
);
867 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
871 parent == TC_H_UNSPEC - unspecified parent.
872 parent == TC_H_ROOT - class is root, which has no parent.
873 parent == X:0 - parent is root class.
874 parent == X:Y - parent is a node in hierarchy.
875 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
877 handle == 0:0 - generate handle from kernel pool.
878 handle == 0:Y - class is X:Y, where X:0 is qdisc.
879 handle == X:Y - clear.
880 handle == X:0 - root class.
883 /* Step 1. Determine qdisc handle X:0 */
885 if (pid
!= TC_H_ROOT
) {
886 u32 qid1
= TC_H_MAJ(pid
);
889 /* If both majors are known, they must be identical. */
895 qid
= dev
->qdisc_sleeping
->handle
;
897 /* Now qid is genuine qdisc handle consistent
898 both with parent and child.
900 TC_H_MAJ(pid) still may be unspecified, complete it now.
903 pid
= TC_H_MAKE(qid
, pid
);
906 qid
= dev
->qdisc_sleeping
->handle
;
909 /* OK. Locate qdisc */
910 if ((q
= qdisc_lookup(dev
, qid
)) == NULL
)
913 /* An check that it supports classes */
914 cops
= q
->ops
->cl_ops
;
918 /* Now try to get class */
920 if (pid
== TC_H_ROOT
)
923 clid
= TC_H_MAKE(qid
, clid
);
926 cl
= cops
->get(q
, clid
);
930 if (n
->nlmsg_type
!= RTM_NEWTCLASS
|| !(n
->nlmsg_flags
&NLM_F_CREATE
))
933 switch (n
->nlmsg_type
) {
936 if (n
->nlmsg_flags
&NLM_F_EXCL
)
940 err
= cops
->delete(q
, cl
);
942 tclass_notify(skb
, n
, q
, cl
, RTM_DELTCLASS
);
945 err
= tclass_notify(skb
, n
, q
, cl
, RTM_NEWTCLASS
);
954 err
= cops
->change(q
, clid
, pid
, tca
, &new_cl
);
956 tclass_notify(skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
966 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
968 u32 pid
, u32 seq
, unsigned flags
, int event
)
971 struct nlmsghdr
*nlh
;
972 unsigned char *b
= skb
->tail
;
974 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*tcm
));
975 nlh
->nlmsg_flags
= flags
;
976 tcm
= NLMSG_DATA(nlh
);
977 tcm
->tcm_family
= AF_UNSPEC
;
978 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
979 tcm
->tcm_parent
= q
->handle
;
980 tcm
->tcm_handle
= q
->handle
;
982 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
983 if (q
->ops
->cl_ops
->dump
&& q
->ops
->cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
985 nlh
->nlmsg_len
= skb
->tail
- b
;
990 skb_trim(skb
, b
- skb
->data
);
994 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
995 struct Qdisc
*q
, unsigned long cl
, int event
)
998 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
1000 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1004 if (tc_fill_tclass(skb
, q
, cl
, pid
, n
->nlmsg_seq
, 0, event
) < 0) {
1009 return rtnetlink_send(skb
, pid
, RTMGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
1012 struct qdisc_dump_args
1014 struct qdisc_walker w
;
1015 struct sk_buff
*skb
;
1016 struct netlink_callback
*cb
;
1019 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
1021 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
1023 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).pid
,
1024 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
1027 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1031 struct net_device
*dev
;
1033 struct tcmsg
*tcm
= (struct tcmsg
*)NLMSG_DATA(cb
->nlh
);
1034 struct qdisc_dump_args arg
;
1036 if (cb
->nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*tcm
)))
1038 if ((dev
= dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
1044 read_lock_bh(&qdisc_tree_lock
);
1045 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
1046 if (t
< s_t
|| !q
->ops
->cl_ops
||
1048 TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)) {
1053 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1054 arg
.w
.fn
= qdisc_class_dump
;
1058 arg
.w
.skip
= cb
->args
[1];
1060 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1061 cb
->args
[1] = arg
.w
.count
;
1066 read_unlock_bh(&qdisc_tree_lock
);
1074 int psched_us_per_tick
= 1;
1075 int psched_tick_per_us
= 1;
1077 #ifdef CONFIG_PROC_FS
1078 static int psched_show(struct seq_file
*seq
, void *v
)
1080 seq_printf(seq
, "%08x %08x %08x %08x\n",
1081 psched_tick_per_us
, psched_us_per_tick
,
1087 static int psched_open(struct inode
*inode
, struct file
*file
)
1089 return single_open(file
, psched_show
, PDE(inode
)->data
);
1092 static struct file_operations psched_fops
= {
1093 .owner
= THIS_MODULE
,
1094 .open
= psched_open
,
1096 .llseek
= seq_lseek
,
1097 .release
= single_release
,
1101 #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
1102 int psched_tod_diff(int delta_sec
, int bound
)
1106 if (bound
<= 1000000 || delta_sec
> (0x7FFFFFFF/1000000)-1)
1108 delta
= delta_sec
* 1000000;
1113 EXPORT_SYMBOL(psched_tod_diff
);
1116 #ifdef CONFIG_NET_SCH_CLK_CPU
1117 psched_tdiff_t psched_clock_per_hz
;
1118 int psched_clock_scale
;
1119 EXPORT_SYMBOL(psched_clock_per_hz
);
1120 EXPORT_SYMBOL(psched_clock_scale
);
1122 psched_time_t psched_time_base
;
1123 cycles_t psched_time_mark
;
1124 EXPORT_SYMBOL(psched_time_mark
);
1125 EXPORT_SYMBOL(psched_time_base
);
1128 * Periodically adjust psched_time_base to avoid overflow
1129 * with 32-bit get_cycles(). Safe up to 4GHz CPU.
1131 static void psched_tick(unsigned long);
1132 static struct timer_list psched_timer
= TIMER_INITIALIZER(psched_tick
, 0, 0);
1134 static void psched_tick(unsigned long dummy
)
1136 if (sizeof(cycles_t
) == sizeof(u32
)) {
1137 psched_time_t dummy_stamp
;
1138 PSCHED_GET_TIME(dummy_stamp
);
1139 psched_timer
.expires
= jiffies
+ 1*HZ
;
1140 add_timer(&psched_timer
);
1144 int __init
psched_calibrate_clock(void)
1146 psched_time_t stamp
, stamp1
;
1147 struct timeval tv
, tv1
;
1148 psched_tdiff_t delay
;
1153 stop
= jiffies
+ HZ
/10;
1154 PSCHED_GET_TIME(stamp
);
1155 do_gettimeofday(&tv
);
1156 while (time_before(jiffies
, stop
)) {
1160 PSCHED_GET_TIME(stamp1
);
1161 do_gettimeofday(&tv1
);
1163 delay
= PSCHED_TDIFF(stamp1
, stamp
);
1164 rdelay
= tv1
.tv_usec
- tv
.tv_usec
;
1165 rdelay
+= (tv1
.tv_sec
- tv
.tv_sec
)*1000000;
1169 psched_tick_per_us
= delay
;
1170 while ((delay
>>=1) != 0)
1171 psched_clock_scale
++;
1172 psched_us_per_tick
= 1<<psched_clock_scale
;
1173 psched_clock_per_hz
= (psched_tick_per_us
*(1000000/HZ
))>>psched_clock_scale
;
1178 static int __init
pktsched_init(void)
1180 struct rtnetlink_link
*link_p
;
1182 #ifdef CONFIG_NET_SCH_CLK_CPU
1183 if (psched_calibrate_clock() < 0)
1185 #elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
1186 psched_tick_per_us
= HZ
<<PSCHED_JSCALE
;
1187 psched_us_per_tick
= 1000000;
1190 link_p
= rtnetlink_links
[PF_UNSPEC
];
1192 /* Setup rtnetlink links. It is made here to avoid
1193 exporting large number of public symbols.
1197 link_p
[RTM_NEWQDISC
-RTM_BASE
].doit
= tc_modify_qdisc
;
1198 link_p
[RTM_DELQDISC
-RTM_BASE
].doit
= tc_get_qdisc
;
1199 link_p
[RTM_GETQDISC
-RTM_BASE
].doit
= tc_get_qdisc
;
1200 link_p
[RTM_GETQDISC
-RTM_BASE
].dumpit
= tc_dump_qdisc
;
1201 link_p
[RTM_NEWTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1202 link_p
[RTM_DELTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1203 link_p
[RTM_GETTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1204 link_p
[RTM_GETTCLASS
-RTM_BASE
].dumpit
= tc_dump_tclass
;
1207 register_qdisc(&pfifo_qdisc_ops
);
1208 register_qdisc(&bfifo_qdisc_ops
);
1209 proc_net_fops_create("psched", 0, &psched_fops
);
1214 subsys_initcall(pktsched_init
);
1216 EXPORT_SYMBOL(qdisc_copy_stats
);
1217 EXPORT_SYMBOL(qdisc_get_rtab
);
1218 EXPORT_SYMBOL(qdisc_put_rtab
);
1219 EXPORT_SYMBOL(register_qdisc
);
1220 EXPORT_SYMBOL(unregister_qdisc
);