2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data
;
76 struct Qdisc_class_common common
;
77 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
80 unsigned char priority
; /* class priority */
81 unsigned char priority2
; /* priority to be used after overlimit */
82 unsigned char ewma_log
; /* time constant for idle time calculation */
83 unsigned char ovl_strategy
;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle
; /* Class parameters: see below. */
95 struct qdisc_rate_table
*R_tab
;
97 /* Overlimit strategy parameters */
98 void (*overlimit
)(struct cbq_class
*cl
);
99 psched_tdiff_t penalty
;
101 /* General scheduler (WRR) parameters */
103 long quantum
; /* Allotment per WRR round */
104 long weight
; /* Relative allotment: see below */
106 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
107 struct cbq_class
*split
; /* Ptr to split node */
108 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
109 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
110 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
112 struct cbq_class
*sibling
; /* Sibling chain */
113 struct cbq_class
*children
; /* Pointer to children chain */
115 struct Qdisc
*q
; /* Elementary queueing discipline */
119 unsigned char cpriority
; /* Effective priority */
120 unsigned char delayed
;
121 unsigned char level
; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last
; /* Last end of service */
127 psched_time_t undertime
;
129 long deficit
; /* Saved deficit for WRR */
130 psched_time_t penalized
;
131 struct gnet_stats_basic bstats
;
132 struct gnet_stats_queue qstats
;
133 struct gnet_stats_rate_est rate_est
;
134 struct tc_cbq_xstats xstats
;
136 struct tcf_proto
*filter_list
;
141 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash
; /* Hash table of all classes */
147 int nclasses
[TC_CBQ_MAXPRIO
+1];
148 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
150 struct cbq_class link
;
153 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class
*rx_class
;
159 struct cbq_class
*tx_class
;
160 struct cbq_class
*tx_borrowed
;
162 psched_time_t now
; /* Cached timestamp */
163 psched_time_t now_rt
; /* Cached real time */
166 struct hrtimer delay_timer
;
167 struct qdisc_watchdog watchdog
; /* Watchdog timer,
171 psched_tdiff_t wd_expires
;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__
struct cbq_class
*
180 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
182 struct Qdisc_class_common
*clc
;
184 clc
= qdisc_class_find(&q
->clhash
, classid
);
187 return container_of(clc
, struct cbq_class
, common
);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class
*
193 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
195 struct cbq_class
*cl
, *new;
197 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
198 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class
*
217 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
219 struct cbq_sched_data
*q
= qdisc_priv(sch
);
220 struct cbq_class
*head
= &q
->link
;
221 struct cbq_class
**defmap
;
222 struct cbq_class
*cl
= NULL
;
223 u32 prio
= skb
->priority
;
224 struct tcf_result res
;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
230 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
233 *qerr
= NET_XMIT_BYPASS
;
236 defmap
= head
->defaults
;
239 * Step 2+n. Apply classifier.
241 if (!head
->filter_list
||
242 (result
= tc_classify_compat(skb
, head
->filter_list
, &res
)) < 0)
245 if ((cl
= (void*)res
.class) == NULL
) {
246 if (TC_H_MAJ(res
.classid
))
247 cl
= cbq_class_lookup(q
, res
.classid
);
248 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
249 cl
= defmap
[TC_PRIO_BESTEFFORT
];
251 if (cl
== NULL
|| cl
->level
>= head
->level
)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr
= NET_XMIT_SUCCESS
;
262 case TC_ACT_RECLASSIFY
:
263 return cbq_reclassify(skb
, cl
);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio
) == 0 &&
284 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
285 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
299 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
300 int prio
= cl
->cpriority
;
301 struct cbq_class
*cl_tail
;
303 cl_tail
= q
->active
[prio
];
304 q
->active
[prio
] = cl
;
306 if (cl_tail
!= NULL
) {
307 cl
->next_alive
= cl_tail
->next_alive
;
308 cl_tail
->next_alive
= cl
;
311 q
->activemask
|= (1<<prio
);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class
*this)
323 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
324 int prio
= this->cpriority
;
325 struct cbq_class
*cl
;
326 struct cbq_class
*cl_prev
= q
->active
[prio
];
329 cl
= cl_prev
->next_alive
;
331 cl_prev
->next_alive
= cl
->next_alive
;
332 cl
->next_alive
= NULL
;
334 if (cl
== q
->active
[prio
]) {
335 q
->active
[prio
] = cl_prev
;
336 if (cl
== q
->active
[prio
]) {
337 q
->active
[prio
] = NULL
;
338 q
->activemask
&= ~(1<<prio
);
344 } while ((cl_prev
= cl
) != q
->active
[prio
]);
348 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
350 int toplevel
= q
->toplevel
;
352 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
356 now
= psched_get_time();
357 incr
= now
- q
->now_rt
;
361 if (cl
->undertime
< now
) {
362 q
->toplevel
= cl
->level
;
365 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
370 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
372 struct cbq_sched_data
*q
= qdisc_priv(sch
);
373 int uninitialized_var(ret
);
374 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
376 #ifdef CONFIG_NET_CLS_ACT
380 if (ret
== NET_XMIT_BYPASS
)
386 #ifdef CONFIG_NET_CLS_ACT
387 cl
->q
->__parent
= sch
;
389 ret
= qdisc_enqueue(skb
, cl
->q
);
390 if (ret
== NET_XMIT_SUCCESS
) {
392 sch
->bstats
.packets
++;
393 sch
->bstats
.bytes
+= qdisc_pkt_len(skb
);
394 cbq_mark_toplevel(q
, cl
);
396 cbq_activate_class(cl
);
401 cbq_mark_toplevel(q
, cl
);
407 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
409 struct cbq_sched_data
*q
= qdisc_priv(sch
);
410 struct cbq_class
*cl
;
413 if ((cl
= q
->tx_class
) == NULL
) {
420 cbq_mark_toplevel(q
, cl
);
422 #ifdef CONFIG_NET_CLS_ACT
424 cl
->q
->__parent
= sch
;
426 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
428 sch
->qstats
.requeues
++;
430 cbq_activate_class(cl
);
438 /* Overlimit actions */
440 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
442 static void cbq_ovl_classic(struct cbq_class
*cl
)
444 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
445 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
448 delay
+= cl
->offtime
;
451 Class goes to sleep, so that it will have no
452 chance to work avgidle. Let's forgive it 8)
454 BTW cbq-2.0 has a crap in this
455 place, apparently they forgot to shift it by cl->ewma_log.
458 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
459 if (cl
->avgidle
< cl
->minidle
)
460 cl
->avgidle
= cl
->minidle
;
463 cl
->undertime
= q
->now
+ delay
;
465 cl
->xstats
.overactions
++;
468 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
469 q
->wd_expires
= delay
;
471 /* Dirty work! We must schedule wakeups based on
472 real available rate, rather than leaf rate,
473 which may be tiny (even zero).
475 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
477 psched_tdiff_t base_delay
= q
->wd_expires
;
479 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
480 delay
= b
->undertime
- q
->now
;
481 if (delay
< base_delay
) {
488 q
->wd_expires
= base_delay
;
492 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
496 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
498 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
499 struct cbq_class
*this = cl
;
502 if (cl
->level
> q
->toplevel
) {
506 } while ((cl
= cl
->borrow
) != NULL
);
513 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
515 static void cbq_ovl_delay(struct cbq_class
*cl
)
517 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
518 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
521 psched_time_t sched
= q
->now
;
524 delay
+= cl
->offtime
;
526 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
527 if (cl
->avgidle
< cl
->minidle
)
528 cl
->avgidle
= cl
->minidle
;
529 cl
->undertime
= q
->now
+ delay
;
532 sched
+= delay
+ cl
->penalty
;
533 cl
->penalized
= sched
;
534 cl
->cpriority
= TC_CBQ_MAXPRIO
;
535 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
537 expires
= ktime_set(0, 0);
538 expires
= ktime_add_ns(expires
, PSCHED_US2NS(sched
));
539 if (hrtimer_try_to_cancel(&q
->delay_timer
) &&
540 ktime_to_ns(ktime_sub(q
->delay_timer
.expires
,
542 q
->delay_timer
.expires
= expires
;
543 hrtimer_restart(&q
->delay_timer
);
545 cl
->xstats
.overactions
++;
550 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
551 q
->wd_expires
= delay
;
554 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
556 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
558 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
560 cl
->penalized
= q
->now
+ cl
->penalty
;
562 if (cl
->cpriority
!= cl
->priority2
) {
563 cl
->cpriority
= cl
->priority2
;
564 q
->pmask
|= (1<<cl
->cpriority
);
565 cl
->xstats
.overactions
++;
570 /* TC_CBQ_OVL_DROP: penalize class by dropping */
572 static void cbq_ovl_drop(struct cbq_class
*cl
)
574 if (cl
->q
->ops
->drop
)
575 if (cl
->q
->ops
->drop(cl
->q
))
577 cl
->xstats
.overactions
++;
581 static psched_tdiff_t
cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
,
584 struct cbq_class
*cl
;
585 struct cbq_class
*cl_prev
= q
->active
[prio
];
586 psched_time_t sched
= now
;
592 cl
= cl_prev
->next_alive
;
593 if (now
- cl
->penalized
> 0) {
594 cl_prev
->next_alive
= cl
->next_alive
;
595 cl
->next_alive
= NULL
;
596 cl
->cpriority
= cl
->priority
;
598 cbq_activate_class(cl
);
600 if (cl
== q
->active
[prio
]) {
601 q
->active
[prio
] = cl_prev
;
602 if (cl
== q
->active
[prio
]) {
603 q
->active
[prio
] = NULL
;
608 cl
= cl_prev
->next_alive
;
609 } else if (sched
- cl
->penalized
> 0)
610 sched
= cl
->penalized
;
611 } while ((cl_prev
= cl
) != q
->active
[prio
]);
616 static enum hrtimer_restart
cbq_undelay(struct hrtimer
*timer
)
618 struct cbq_sched_data
*q
= container_of(timer
, struct cbq_sched_data
,
620 struct Qdisc
*sch
= q
->watchdog
.qdisc
;
622 psched_tdiff_t delay
= 0;
625 now
= psched_get_time();
631 int prio
= ffz(~pmask
);
636 tmp
= cbq_undelay_prio(q
, prio
, now
);
639 if (tmp
< delay
|| delay
== 0)
647 time
= ktime_set(0, 0);
648 time
= ktime_add_ns(time
, PSCHED_US2NS(now
+ delay
));
649 hrtimer_start(&q
->delay_timer
, time
, HRTIMER_MODE_ABS
);
652 sch
->flags
&= ~TCQ_F_THROTTLED
;
653 __netif_schedule(sch
);
654 return HRTIMER_NORESTART
;
657 #ifdef CONFIG_NET_CLS_ACT
658 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
660 struct Qdisc
*sch
= child
->__parent
;
661 struct cbq_sched_data
*q
= qdisc_priv(sch
);
662 struct cbq_class
*cl
= q
->rx_class
;
666 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
668 cbq_mark_toplevel(q
, cl
);
671 cl
->q
->__parent
= sch
;
673 if (qdisc_enqueue(skb
, cl
->q
) == 0) {
675 sch
->bstats
.packets
++;
676 sch
->bstats
.bytes
+= qdisc_pkt_len(skb
);
678 cbq_activate_class(cl
);
691 It is mission critical procedure.
693 We "regenerate" toplevel cutoff, if transmitting class
694 has backlog and it is not regulated. It is not part of
695 original CBQ description, but looks more reasonable.
696 Probably, it is wrong. This question needs further investigation.
699 static __inline__
void
700 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
701 struct cbq_class
*borrowed
)
703 if (cl
&& q
->toplevel
>= borrowed
->level
) {
704 if (cl
->q
->q
.qlen
> 1) {
706 if (borrowed
->undertime
== PSCHED_PASTPERFECT
) {
707 q
->toplevel
= borrowed
->level
;
710 } while ((borrowed
=borrowed
->borrow
) != NULL
);
713 /* It is not necessary now. Uncommenting it
714 will save CPU cycles, but decrease fairness.
716 q
->toplevel
= TC_CBQ_MAXLEVEL
;
722 cbq_update(struct cbq_sched_data
*q
)
724 struct cbq_class
*this = q
->tx_class
;
725 struct cbq_class
*cl
= this;
730 for ( ; cl
; cl
= cl
->share
) {
731 long avgidle
= cl
->avgidle
;
734 cl
->bstats
.packets
++;
735 cl
->bstats
.bytes
+= len
;
738 (now - last) is total time between packet right edges.
739 (last_pktlen/rate) is "virtual" busy time, so that
741 idle = (now - last) - last_pktlen/rate
744 idle
= q
->now
- cl
->last
;
745 if ((unsigned long)idle
> 128*1024*1024) {
746 avgidle
= cl
->maxidle
;
748 idle
-= L2T(cl
, len
);
750 /* true_avgidle := (1-W)*true_avgidle + W*idle,
751 where W=2^{-ewma_log}. But cl->avgidle is scaled:
752 cl->avgidle == true_avgidle/W,
755 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
759 /* Overlimit or at-limit */
761 if (avgidle
< cl
->minidle
)
762 avgidle
= cl
->minidle
;
764 cl
->avgidle
= avgidle
;
766 /* Calculate expected time, when this class
767 will be allowed to send.
769 (1-W)*true_avgidle + W*delay = 0, i.e.
770 idle = (1/W - 1)*(-true_avgidle)
772 idle = (1 - W)*(-cl->avgidle);
774 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
778 To maintain the rate allocated to the class,
779 we add to undertime virtual clock,
780 necessary to complete transmitted packet.
781 (len/phys_bandwidth has been already passed
782 to the moment of cbq_update)
785 idle
-= L2T(&q
->link
, len
);
786 idle
+= L2T(cl
, len
);
788 cl
->undertime
= q
->now
+ idle
;
792 cl
->undertime
= PSCHED_PASTPERFECT
;
793 if (avgidle
> cl
->maxidle
)
794 cl
->avgidle
= cl
->maxidle
;
796 cl
->avgidle
= avgidle
;
801 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
804 static __inline__
struct cbq_class
*
805 cbq_under_limit(struct cbq_class
*cl
)
807 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
808 struct cbq_class
*this_cl
= cl
;
810 if (cl
->tparent
== NULL
)
813 if (cl
->undertime
== PSCHED_PASTPERFECT
|| q
->now
>= cl
->undertime
) {
819 /* It is very suspicious place. Now overlimit
820 action is generated for not bounded classes
821 only if link is completely congested.
822 Though it is in agree with ancestor-only paradigm,
823 it looks very stupid. Particularly,
824 it means that this chunk of code will either
825 never be called or result in strong amplification
826 of burstiness. Dangerous, silly, and, however,
827 no another solution exists.
829 if ((cl
= cl
->borrow
) == NULL
) {
830 this_cl
->qstats
.overlimits
++;
831 this_cl
->overlimit(this_cl
);
834 if (cl
->level
> q
->toplevel
)
836 } while (cl
->undertime
!= PSCHED_PASTPERFECT
&& q
->now
< cl
->undertime
);
842 static __inline__
struct sk_buff
*
843 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
845 struct cbq_sched_data
*q
= qdisc_priv(sch
);
846 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
850 cl_tail
= cl_prev
= q
->active
[prio
];
851 cl
= cl_prev
->next_alive
;
858 struct cbq_class
*borrow
= cl
;
861 (borrow
= cbq_under_limit(cl
)) == NULL
)
864 if (cl
->deficit
<= 0) {
865 /* Class exhausted its allotment per
866 this round. Switch to the next one.
869 cl
->deficit
+= cl
->quantum
;
873 skb
= cl
->q
->dequeue(cl
->q
);
875 /* Class did not give us any skb :-(
876 It could occur even if cl->q->q.qlen != 0
877 f.e. if cl->q == "tbf"
882 cl
->deficit
-= qdisc_pkt_len(skb
);
884 q
->tx_borrowed
= borrow
;
886 #ifndef CBQ_XSTATS_BORROWS_BYTES
887 borrow
->xstats
.borrows
++;
888 cl
->xstats
.borrows
++;
890 borrow
->xstats
.borrows
+= qdisc_pkt_len(skb
);
891 cl
->xstats
.borrows
+= qdisc_pkt_len(skb
);
894 q
->tx_len
= qdisc_pkt_len(skb
);
896 if (cl
->deficit
<= 0) {
897 q
->active
[prio
] = cl
;
899 cl
->deficit
+= cl
->quantum
;
904 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
905 /* Class is empty or penalized.
906 Unlink it from active chain.
908 cl_prev
->next_alive
= cl
->next_alive
;
909 cl
->next_alive
= NULL
;
911 /* Did cl_tail point to it? */
916 /* Was it the last class in this band? */
919 q
->active
[prio
] = NULL
;
920 q
->activemask
&= ~(1<<prio
);
922 cbq_activate_class(cl
);
926 q
->active
[prio
] = cl_tail
;
929 cbq_activate_class(cl
);
937 } while (cl_prev
!= cl_tail
);
940 q
->active
[prio
] = cl_prev
;
945 static __inline__
struct sk_buff
*
946 cbq_dequeue_1(struct Qdisc
*sch
)
948 struct cbq_sched_data
*q
= qdisc_priv(sch
);
952 activemask
= q
->activemask
&0xFF;
954 int prio
= ffz(~activemask
);
955 activemask
&= ~(1<<prio
);
956 skb
= cbq_dequeue_prio(sch
, prio
);
963 static struct sk_buff
*
964 cbq_dequeue(struct Qdisc
*sch
)
967 struct cbq_sched_data
*q
= qdisc_priv(sch
);
971 now
= psched_get_time();
972 incr
= now
- q
->now_rt
;
975 psched_tdiff_t incr2
;
976 /* Time integrator. We calculate EOS time
977 by adding expected packet transmission time.
978 If real time is greater, we warp artificial clock,
981 cbq_time = max(real_time, work);
983 incr2
= L2T(&q
->link
, q
->tx_len
);
986 if ((incr
-= incr2
) < 0)
995 skb
= cbq_dequeue_1(sch
);
998 sch
->flags
&= ~TCQ_F_THROTTLED
;
1002 /* All the classes are overlimit.
1006 1. Scheduler is empty.
1007 2. Toplevel cutoff inhibited borrowing.
1008 3. Root class is overlimit.
1010 Reset 2d and 3d conditions and retry.
1012 Note, that NS and cbq-2.0 are buggy, peeking
1013 an arbitrary class is appropriate for ancestor-only
1014 sharing, but not for toplevel algorithm.
1016 Our version is better, but slower, because it requires
1017 two passes, but it is unavoidable with top-level sharing.
1020 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1021 q
->link
.undertime
== PSCHED_PASTPERFECT
)
1024 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1025 q
->link
.undertime
= PSCHED_PASTPERFECT
;
1028 /* No packets in scheduler or nobody wants to give them to us :-(
1029 Sigh... start watchdog timer in the last case. */
1032 sch
->qstats
.overlimits
++;
1034 qdisc_watchdog_schedule(&q
->watchdog
,
1035 now
+ q
->wd_expires
);
1040 /* CBQ class maintanance routines */
1042 static void cbq_adjust_levels(struct cbq_class
*this)
1049 struct cbq_class
*cl
;
1051 if ((cl
= this->children
) != NULL
) {
1053 if (cl
->level
> level
)
1055 } while ((cl
= cl
->sibling
) != this->children
);
1057 this->level
= level
+1;
1058 } while ((this = this->tparent
) != NULL
);
1061 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1063 struct cbq_class
*cl
;
1064 struct hlist_node
*n
;
1067 if (q
->quanta
[prio
] == 0)
1070 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1071 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1072 /* BUGGGG... Beware! This expression suffer of
1073 arithmetic overflows!
1075 if (cl
->priority
== prio
) {
1076 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1079 if (cl
->quantum
<= 0 || cl
->quantum
>32*qdisc_dev(cl
->qdisc
)->mtu
) {
1080 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->common
.classid
, cl
->quantum
);
1081 cl
->quantum
= qdisc_dev(cl
->qdisc
)->mtu
/2 + 1;
1087 static void cbq_sync_defmap(struct cbq_class
*cl
)
1089 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1090 struct cbq_class
*split
= cl
->split
;
1097 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1098 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1099 split
->defaults
[i
] = NULL
;
1102 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1103 int level
= split
->level
;
1105 if (split
->defaults
[i
])
1108 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1109 struct hlist_node
*n
;
1110 struct cbq_class
*c
;
1112 hlist_for_each_entry(c
, n
, &q
->clhash
.hash
[h
],
1114 if (c
->split
== split
&& c
->level
< level
&&
1116 split
->defaults
[i
] = c
;
1124 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1126 struct cbq_class
*split
= NULL
;
1129 if ((split
= cl
->split
) == NULL
)
1131 splitid
= split
->common
.classid
;
1134 if (split
== NULL
|| split
->common
.classid
!= splitid
) {
1135 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1136 if (split
->common
.classid
== splitid
)
1143 if (cl
->split
!= split
) {
1145 cbq_sync_defmap(cl
);
1147 cl
->defmap
= def
&mask
;
1149 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1151 cbq_sync_defmap(cl
);
1154 static void cbq_unlink_class(struct cbq_class
*this)
1156 struct cbq_class
*cl
, **clp
;
1157 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1159 qdisc_class_hash_remove(&q
->clhash
, &this->common
);
1161 if (this->tparent
) {
1170 } while ((cl
= *clp
) != this->sibling
);
1172 if (this->tparent
->children
== this) {
1173 this->tparent
->children
= this->sibling
;
1174 if (this->sibling
== this)
1175 this->tparent
->children
= NULL
;
1178 BUG_TRAP(this->sibling
== this);
1182 static void cbq_link_class(struct cbq_class
*this)
1184 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1185 struct cbq_class
*parent
= this->tparent
;
1187 this->sibling
= this;
1188 qdisc_class_hash_insert(&q
->clhash
, &this->common
);
1193 if (parent
->children
== NULL
) {
1194 parent
->children
= this;
1196 this->sibling
= parent
->children
->sibling
;
1197 parent
->children
->sibling
= this;
1201 static unsigned int cbq_drop(struct Qdisc
* sch
)
1203 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1204 struct cbq_class
*cl
, *cl_head
;
1208 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1209 if ((cl_head
= q
->active
[prio
]) == NULL
)
1214 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1217 cbq_deactivate_class(cl
);
1220 } while ((cl
= cl
->next_alive
) != cl_head
);
1226 cbq_reset(struct Qdisc
* sch
)
1228 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1229 struct cbq_class
*cl
;
1230 struct hlist_node
*n
;
1237 q
->tx_borrowed
= NULL
;
1238 qdisc_watchdog_cancel(&q
->watchdog
);
1239 hrtimer_cancel(&q
->delay_timer
);
1240 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1241 q
->now
= psched_get_time();
1244 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1245 q
->active
[prio
] = NULL
;
1247 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1248 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1251 cl
->next_alive
= NULL
;
1252 cl
->undertime
= PSCHED_PASTPERFECT
;
1253 cl
->avgidle
= cl
->maxidle
;
1254 cl
->deficit
= cl
->quantum
;
1255 cl
->cpriority
= cl
->priority
;
1262 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1264 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1265 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1266 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1268 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1269 cl
->ewma_log
= lss
->ewma_log
;
1270 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1271 cl
->avpkt
= lss
->avpkt
;
1272 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1273 cl
->minidle
= -(long)lss
->minidle
;
1274 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1275 cl
->maxidle
= lss
->maxidle
;
1276 cl
->avgidle
= lss
->maxidle
;
1278 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1279 cl
->offtime
= lss
->offtime
;
1283 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1285 q
->nclasses
[cl
->priority
]--;
1286 q
->quanta
[cl
->priority
] -= cl
->weight
;
1287 cbq_normalize_quanta(q
, cl
->priority
);
1290 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1292 q
->nclasses
[cl
->priority
]++;
1293 q
->quanta
[cl
->priority
] += cl
->weight
;
1294 cbq_normalize_quanta(q
, cl
->priority
);
1297 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1299 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1302 cl
->allot
= wrr
->allot
;
1304 cl
->weight
= wrr
->weight
;
1305 if (wrr
->priority
) {
1306 cl
->priority
= wrr
->priority
-1;
1307 cl
->cpriority
= cl
->priority
;
1308 if (cl
->priority
>= cl
->priority2
)
1309 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1316 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1318 switch (ovl
->strategy
) {
1319 case TC_CBQ_OVL_CLASSIC
:
1320 cl
->overlimit
= cbq_ovl_classic
;
1322 case TC_CBQ_OVL_DELAY
:
1323 cl
->overlimit
= cbq_ovl_delay
;
1325 case TC_CBQ_OVL_LOWPRIO
:
1326 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1327 ovl
->priority2
-1 <= cl
->priority
)
1329 cl
->priority2
= ovl
->priority2
-1;
1330 cl
->overlimit
= cbq_ovl_lowprio
;
1332 case TC_CBQ_OVL_DROP
:
1333 cl
->overlimit
= cbq_ovl_drop
;
1335 case TC_CBQ_OVL_RCLASSIC
:
1336 cl
->overlimit
= cbq_ovl_rclassic
;
1341 cl
->penalty
= ovl
->penalty
;
1345 #ifdef CONFIG_NET_CLS_ACT
1346 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1348 cl
->police
= p
->police
;
1350 if (cl
->q
->handle
) {
1351 if (p
->police
== TC_POLICE_RECLASSIFY
)
1352 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1354 cl
->q
->reshape_fail
= NULL
;
1360 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1362 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1366 static const struct nla_policy cbq_policy
[TCA_CBQ_MAX
+ 1] = {
1367 [TCA_CBQ_LSSOPT
] = { .len
= sizeof(struct tc_cbq_lssopt
) },
1368 [TCA_CBQ_WRROPT
] = { .len
= sizeof(struct tc_cbq_wrropt
) },
1369 [TCA_CBQ_FOPT
] = { .len
= sizeof(struct tc_cbq_fopt
) },
1370 [TCA_CBQ_OVL_STRATEGY
] = { .len
= sizeof(struct tc_cbq_ovl
) },
1371 [TCA_CBQ_RATE
] = { .len
= sizeof(struct tc_ratespec
) },
1372 [TCA_CBQ_RTAB
] = { .type
= NLA_BINARY
, .len
= TC_RTAB_SIZE
},
1373 [TCA_CBQ_POLICE
] = { .len
= sizeof(struct tc_cbq_police
) },
1376 static int cbq_init(struct Qdisc
*sch
, struct nlattr
*opt
)
1378 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1379 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1380 struct tc_ratespec
*r
;
1383 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1387 if (tb
[TCA_CBQ_RTAB
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
)
1390 r
= nla_data(tb
[TCA_CBQ_RATE
]);
1392 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
])) == NULL
)
1395 err
= qdisc_class_hash_init(&q
->clhash
);
1400 q
->link
.sibling
= &q
->link
;
1401 q
->link
.common
.classid
= sch
->handle
;
1402 q
->link
.qdisc
= sch
;
1403 if (!(q
->link
.q
= qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1406 q
->link
.q
= &noop_qdisc
;
1408 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1409 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1410 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1411 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1412 q
->link
.overlimit
= cbq_ovl_classic
;
1413 q
->link
.allot
= psched_mtu(qdisc_dev(sch
));
1414 q
->link
.quantum
= q
->link
.allot
;
1415 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1417 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1418 q
->link
.avpkt
= q
->link
.allot
/2;
1419 q
->link
.minidle
= -0x7FFFFFFF;
1421 qdisc_watchdog_init(&q
->watchdog
, sch
);
1422 hrtimer_init(&q
->delay_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
1423 q
->delay_timer
.function
= cbq_undelay
;
1424 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1425 q
->now
= psched_get_time();
1428 cbq_link_class(&q
->link
);
1430 if (tb
[TCA_CBQ_LSSOPT
])
1431 cbq_set_lss(&q
->link
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1433 cbq_addprio(q
, &q
->link
);
1437 qdisc_put_rtab(q
->link
.R_tab
);
1441 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1443 unsigned char *b
= skb_tail_pointer(skb
);
1445 NLA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1453 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1455 unsigned char *b
= skb_tail_pointer(skb
);
1456 struct tc_cbq_lssopt opt
;
1459 if (cl
->borrow
== NULL
)
1460 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1461 if (cl
->share
== NULL
)
1462 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1463 opt
.ewma_log
= cl
->ewma_log
;
1464 opt
.level
= cl
->level
;
1465 opt
.avpkt
= cl
->avpkt
;
1466 opt
.maxidle
= cl
->maxidle
;
1467 opt
.minidle
= (u32
)(-cl
->minidle
);
1468 opt
.offtime
= cl
->offtime
;
1470 NLA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1478 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1480 unsigned char *b
= skb_tail_pointer(skb
);
1481 struct tc_cbq_wrropt opt
;
1484 opt
.allot
= cl
->allot
;
1485 opt
.priority
= cl
->priority
+1;
1486 opt
.cpriority
= cl
->cpriority
+1;
1487 opt
.weight
= cl
->weight
;
1488 NLA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1496 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1498 unsigned char *b
= skb_tail_pointer(skb
);
1499 struct tc_cbq_ovl opt
;
1501 opt
.strategy
= cl
->ovl_strategy
;
1502 opt
.priority2
= cl
->priority2
+1;
1504 opt
.penalty
= cl
->penalty
;
1505 NLA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1513 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1515 unsigned char *b
= skb_tail_pointer(skb
);
1516 struct tc_cbq_fopt opt
;
1518 if (cl
->split
|| cl
->defmap
) {
1519 opt
.split
= cl
->split
? cl
->split
->common
.classid
: 0;
1520 opt
.defmap
= cl
->defmap
;
1522 NLA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1531 #ifdef CONFIG_NET_CLS_ACT
1532 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1534 unsigned char *b
= skb_tail_pointer(skb
);
1535 struct tc_cbq_police opt
;
1538 opt
.police
= cl
->police
;
1541 NLA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1551 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1553 if (cbq_dump_lss(skb
, cl
) < 0 ||
1554 cbq_dump_rate(skb
, cl
) < 0 ||
1555 cbq_dump_wrr(skb
, cl
) < 0 ||
1556 cbq_dump_ovl(skb
, cl
) < 0 ||
1557 #ifdef CONFIG_NET_CLS_ACT
1558 cbq_dump_police(skb
, cl
) < 0 ||
1560 cbq_dump_fopt(skb
, cl
) < 0)
1565 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1567 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1568 struct nlattr
*nest
;
1570 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1572 goto nla_put_failure
;
1573 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1574 goto nla_put_failure
;
1575 nla_nest_end(skb
, nest
);
1579 nla_nest_cancel(skb
, nest
);
1584 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1586 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1588 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1589 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1593 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1594 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1596 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1597 struct nlattr
*nest
;
1600 tcm
->tcm_parent
= cl
->tparent
->common
.classid
;
1602 tcm
->tcm_parent
= TC_H_ROOT
;
1603 tcm
->tcm_handle
= cl
->common
.classid
;
1604 tcm
->tcm_info
= cl
->q
->handle
;
1606 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1608 goto nla_put_failure
;
1609 if (cbq_dump_attr(skb
, cl
) < 0)
1610 goto nla_put_failure
;
1611 nla_nest_end(skb
, nest
);
1615 nla_nest_cancel(skb
, nest
);
1620 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1621 struct gnet_dump
*d
)
1623 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1624 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1626 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1627 cl
->xstats
.avgidle
= cl
->avgidle
;
1628 cl
->xstats
.undertime
= 0;
1630 if (cl
->undertime
!= PSCHED_PASTPERFECT
)
1631 cl
->xstats
.undertime
= cl
->undertime
- q
->now
;
1633 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1634 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1635 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1638 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1641 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1644 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1648 new = qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1650 cl
->common
.classid
);
1654 #ifdef CONFIG_NET_CLS_ACT
1655 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1656 new->reshape_fail
= cbq_reshape_fail
;
1660 *old
= xchg(&cl
->q
, new);
1661 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1663 sch_tree_unlock(sch
);
1670 static struct Qdisc
*
1671 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1673 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1675 return cl
? cl
->q
: NULL
;
1678 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1680 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1682 if (cl
->q
->q
.qlen
== 0)
1683 cbq_deactivate_class(cl
);
1686 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1688 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1689 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1693 return (unsigned long)cl
;
1698 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1700 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1702 BUG_TRAP(!cl
->filters
);
1704 tcf_destroy_chain(&cl
->filter_list
);
1705 qdisc_destroy(cl
->q
);
1706 qdisc_put_rtab(cl
->R_tab
);
1707 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1713 cbq_destroy(struct Qdisc
* sch
)
1715 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1716 struct hlist_node
*n
, *next
;
1717 struct cbq_class
*cl
;
1720 #ifdef CONFIG_NET_CLS_ACT
1724 * Filters must be destroyed first because we don't destroy the
1725 * classes from root to leafs which means that filters can still
1726 * be bound to classes which have been destroyed already. --TGR '04
1728 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1729 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
)
1730 tcf_destroy_chain(&cl
->filter_list
);
1732 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1733 hlist_for_each_entry_safe(cl
, n
, next
, &q
->clhash
.hash
[h
],
1735 cbq_destroy_class(sch
, cl
);
1737 qdisc_class_hash_destroy(&q
->clhash
);
1740 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1742 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1744 if (--cl
->refcnt
== 0) {
1745 #ifdef CONFIG_NET_CLS_ACT
1746 spinlock_t
*root_lock
= qdisc_root_lock(sch
);
1747 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1749 spin_lock_bh(root_lock
);
1750 if (q
->rx_class
== cl
)
1752 spin_unlock_bh(root_lock
);
1755 cbq_destroy_class(sch
, cl
);
1760 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct nlattr
**tca
,
1764 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1765 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1766 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
1767 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1768 struct cbq_class
*parent
;
1769 struct qdisc_rate_table
*rtab
= NULL
;
1774 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1782 cl
->tparent
->common
.classid
!= parentid
)
1784 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1788 if (tb
[TCA_CBQ_RATE
]) {
1789 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1794 /* Change class parameters */
1797 if (cl
->next_alive
!= NULL
)
1798 cbq_deactivate_class(cl
);
1801 rtab
= xchg(&cl
->R_tab
, rtab
);
1802 qdisc_put_rtab(rtab
);
1805 if (tb
[TCA_CBQ_LSSOPT
])
1806 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1808 if (tb
[TCA_CBQ_WRROPT
]) {
1810 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1813 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1814 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1816 #ifdef CONFIG_NET_CLS_ACT
1817 if (tb
[TCA_CBQ_POLICE
])
1818 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1821 if (tb
[TCA_CBQ_FOPT
])
1822 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1825 cbq_activate_class(cl
);
1827 sch_tree_unlock(sch
);
1830 gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1831 qdisc_root_lock(sch
),
1836 if (parentid
== TC_H_ROOT
)
1839 if (tb
[TCA_CBQ_WRROPT
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
||
1840 tb
[TCA_CBQ_LSSOPT
] == NULL
)
1843 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1849 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1853 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1855 for (i
=0; i
<0x8000; i
++) {
1856 if (++q
->hgenerator
>= 0x8000)
1858 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1864 classid
= classid
|q
->hgenerator
;
1869 parent
= cbq_class_lookup(q
, parentid
);
1876 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1882 if (!(cl
->q
= qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1883 &pfifo_qdisc_ops
, classid
)))
1884 cl
->q
= &noop_qdisc
;
1885 cl
->common
.classid
= classid
;
1886 cl
->tparent
= parent
;
1888 cl
->allot
= parent
->allot
;
1889 cl
->quantum
= cl
->allot
;
1890 cl
->weight
= cl
->R_tab
->rate
.rate
;
1894 cl
->borrow
= cl
->tparent
;
1895 if (cl
->tparent
!= &q
->link
)
1896 cl
->share
= cl
->tparent
;
1897 cbq_adjust_levels(parent
);
1898 cl
->minidle
= -0x7FFFFFFF;
1899 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1900 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1901 if (cl
->ewma_log
==0)
1902 cl
->ewma_log
= q
->link
.ewma_log
;
1904 cl
->maxidle
= q
->link
.maxidle
;
1906 cl
->avpkt
= q
->link
.avpkt
;
1907 cl
->overlimit
= cbq_ovl_classic
;
1908 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1909 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1910 #ifdef CONFIG_NET_CLS_ACT
1911 if (tb
[TCA_CBQ_POLICE
])
1912 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1914 if (tb
[TCA_CBQ_FOPT
])
1915 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1916 sch_tree_unlock(sch
);
1918 qdisc_class_hash_grow(sch
, &q
->clhash
);
1921 gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1922 qdisc_root_lock(sch
), tca
[TCA_RATE
]);
1924 *arg
= (unsigned long)cl
;
1928 qdisc_put_rtab(rtab
);
1932 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1934 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1935 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1938 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1943 qlen
= cl
->q
->q
.qlen
;
1945 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
1948 cbq_deactivate_class(cl
);
1950 if (q
->tx_borrowed
== cl
)
1951 q
->tx_borrowed
= q
->tx_class
;
1952 if (q
->tx_class
== cl
) {
1954 q
->tx_borrowed
= NULL
;
1956 #ifdef CONFIG_NET_CLS_ACT
1957 if (q
->rx_class
== cl
)
1961 cbq_unlink_class(cl
);
1962 cbq_adjust_levels(cl
->tparent
);
1964 cbq_sync_defmap(cl
);
1967 sch_tree_unlock(sch
);
1969 if (--cl
->refcnt
== 0)
1970 cbq_destroy_class(sch
, cl
);
1975 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
1977 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1978 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1983 return &cl
->filter_list
;
1986 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
1989 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1990 struct cbq_class
*p
= (struct cbq_class
*)parent
;
1991 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1994 if (p
&& p
->level
<= cl
->level
)
1997 return (unsigned long)cl
;
2002 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2004 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2009 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2011 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2012 struct cbq_class
*cl
;
2013 struct hlist_node
*n
;
2019 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
2020 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
2021 if (arg
->count
< arg
->skip
) {
2025 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2034 static const struct Qdisc_class_ops cbq_class_ops
= {
2037 .qlen_notify
= cbq_qlen_notify
,
2040 .change
= cbq_change_class
,
2041 .delete = cbq_delete
,
2043 .tcf_chain
= cbq_find_tcf
,
2044 .bind_tcf
= cbq_bind_filter
,
2045 .unbind_tcf
= cbq_unbind_filter
,
2046 .dump
= cbq_dump_class
,
2047 .dump_stats
= cbq_dump_class_stats
,
2050 static struct Qdisc_ops cbq_qdisc_ops __read_mostly
= {
2052 .cl_ops
= &cbq_class_ops
,
2054 .priv_size
= sizeof(struct cbq_sched_data
),
2055 .enqueue
= cbq_enqueue
,
2056 .dequeue
= cbq_dequeue
,
2057 .requeue
= cbq_requeue
,
2061 .destroy
= cbq_destroy
,
2064 .dump_stats
= cbq_dump_stats
,
2065 .owner
= THIS_MODULE
,
2068 static int __init
cbq_module_init(void)
2070 return register_qdisc(&cbq_qdisc_ops
);
2072 static void __exit
cbq_module_exit(void)
2074 unregister_qdisc(&cbq_qdisc_ops
);
2076 module_init(cbq_module_init
)
2077 module_exit(cbq_module_exit
)
2078 MODULE_LICENSE("GPL");