2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data
;
76 struct Qdisc_class_common common
;
77 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
80 unsigned char priority
; /* class priority */
81 unsigned char priority2
; /* priority to be used after overlimit */
82 unsigned char ewma_log
; /* time constant for idle time calculation */
83 unsigned char ovl_strategy
;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle
; /* Class parameters: see below. */
95 struct qdisc_rate_table
*R_tab
;
97 /* Overlimit strategy parameters */
98 void (*overlimit
)(struct cbq_class
*cl
);
99 psched_tdiff_t penalty
;
101 /* General scheduler (WRR) parameters */
103 long quantum
; /* Allotment per WRR round */
104 long weight
; /* Relative allotment: see below */
106 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
107 struct cbq_class
*split
; /* Ptr to split node */
108 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
109 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
110 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
112 struct cbq_class
*sibling
; /* Sibling chain */
113 struct cbq_class
*children
; /* Pointer to children chain */
115 struct Qdisc
*q
; /* Elementary queueing discipline */
119 unsigned char cpriority
; /* Effective priority */
120 unsigned char delayed
;
121 unsigned char level
; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last
; /* Last end of service */
127 psched_time_t undertime
;
129 long deficit
; /* Saved deficit for WRR */
130 psched_time_t penalized
;
131 struct gnet_stats_basic_packed bstats
;
132 struct gnet_stats_queue qstats
;
133 struct gnet_stats_rate_est rate_est
;
134 struct tc_cbq_xstats xstats
;
136 struct tcf_proto
*filter_list
;
141 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash
; /* Hash table of all classes */
147 int nclasses
[TC_CBQ_MAXPRIO
+1];
148 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
150 struct cbq_class link
;
153 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class
*rx_class
;
159 struct cbq_class
*tx_class
;
160 struct cbq_class
*tx_borrowed
;
162 psched_time_t now
; /* Cached timestamp */
163 psched_time_t now_rt
; /* Cached real time */
166 struct tasklet_hrtimer delay_timer
;
167 struct qdisc_watchdog watchdog
; /* Watchdog timer,
171 psched_tdiff_t wd_expires
;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__
struct cbq_class
*
180 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
182 struct Qdisc_class_common
*clc
;
184 clc
= qdisc_class_find(&q
->clhash
, classid
);
187 return container_of(clc
, struct cbq_class
, common
);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class
*
193 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
195 struct cbq_class
*cl
, *new;
197 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
198 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class
*
217 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
219 struct cbq_sched_data
*q
= qdisc_priv(sch
);
220 struct cbq_class
*head
= &q
->link
;
221 struct cbq_class
**defmap
;
222 struct cbq_class
*cl
= NULL
;
223 u32 prio
= skb
->priority
;
224 struct tcf_result res
;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
230 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
233 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_BYPASS
;
236 defmap
= head
->defaults
;
239 * Step 2+n. Apply classifier.
241 if (!head
->filter_list
||
242 (result
= tc_classify_compat(skb
, head
->filter_list
, &res
)) < 0)
245 if ((cl
= (void*)res
.class) == NULL
) {
246 if (TC_H_MAJ(res
.classid
))
247 cl
= cbq_class_lookup(q
, res
.classid
);
248 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
249 cl
= defmap
[TC_PRIO_BESTEFFORT
];
251 if (cl
== NULL
|| cl
->level
>= head
->level
)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_STOLEN
;
262 case TC_ACT_RECLASSIFY
:
263 return cbq_reclassify(skb
, cl
);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio
) == 0 &&
284 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
285 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
299 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
300 int prio
= cl
->cpriority
;
301 struct cbq_class
*cl_tail
;
303 cl_tail
= q
->active
[prio
];
304 q
->active
[prio
] = cl
;
306 if (cl_tail
!= NULL
) {
307 cl
->next_alive
= cl_tail
->next_alive
;
308 cl_tail
->next_alive
= cl
;
311 q
->activemask
|= (1<<prio
);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class
*this)
323 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
324 int prio
= this->cpriority
;
325 struct cbq_class
*cl
;
326 struct cbq_class
*cl_prev
= q
->active
[prio
];
329 cl
= cl_prev
->next_alive
;
331 cl_prev
->next_alive
= cl
->next_alive
;
332 cl
->next_alive
= NULL
;
334 if (cl
== q
->active
[prio
]) {
335 q
->active
[prio
] = cl_prev
;
336 if (cl
== q
->active
[prio
]) {
337 q
->active
[prio
] = NULL
;
338 q
->activemask
&= ~(1<<prio
);
344 } while ((cl_prev
= cl
) != q
->active
[prio
]);
348 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
350 int toplevel
= q
->toplevel
;
352 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
356 now
= psched_get_time();
357 incr
= now
- q
->now_rt
;
361 if (cl
->undertime
< now
) {
362 q
->toplevel
= cl
->level
;
365 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
370 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
372 struct cbq_sched_data
*q
= qdisc_priv(sch
);
373 int uninitialized_var(ret
);
374 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
376 #ifdef CONFIG_NET_CLS_ACT
380 if (ret
& __NET_XMIT_BYPASS
)
386 #ifdef CONFIG_NET_CLS_ACT
387 cl
->q
->__parent
= sch
;
389 ret
= qdisc_enqueue(skb
, cl
->q
);
390 if (ret
== NET_XMIT_SUCCESS
) {
392 sch
->bstats
.packets
++;
393 sch
->bstats
.bytes
+= qdisc_pkt_len(skb
);
394 cbq_mark_toplevel(q
, cl
);
396 cbq_activate_class(cl
);
400 if (net_xmit_drop_count(ret
)) {
402 cbq_mark_toplevel(q
, cl
);
408 /* Overlimit actions */
410 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
412 static void cbq_ovl_classic(struct cbq_class
*cl
)
414 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
415 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
418 delay
+= cl
->offtime
;
421 Class goes to sleep, so that it will have no
422 chance to work avgidle. Let's forgive it 8)
424 BTW cbq-2.0 has a crap in this
425 place, apparently they forgot to shift it by cl->ewma_log.
428 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
429 if (cl
->avgidle
< cl
->minidle
)
430 cl
->avgidle
= cl
->minidle
;
433 cl
->undertime
= q
->now
+ delay
;
435 cl
->xstats
.overactions
++;
438 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
439 q
->wd_expires
= delay
;
441 /* Dirty work! We must schedule wakeups based on
442 real available rate, rather than leaf rate,
443 which may be tiny (even zero).
445 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
447 psched_tdiff_t base_delay
= q
->wd_expires
;
449 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
450 delay
= b
->undertime
- q
->now
;
451 if (delay
< base_delay
) {
458 q
->wd_expires
= base_delay
;
462 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
466 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
468 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
469 struct cbq_class
*this = cl
;
472 if (cl
->level
> q
->toplevel
) {
476 } while ((cl
= cl
->borrow
) != NULL
);
483 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
485 static void cbq_ovl_delay(struct cbq_class
*cl
)
487 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
488 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
490 if (test_bit(__QDISC_STATE_DEACTIVATED
,
491 &qdisc_root_sleeping(cl
->qdisc
)->state
))
495 psched_time_t sched
= q
->now
;
498 delay
+= cl
->offtime
;
500 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
501 if (cl
->avgidle
< cl
->minidle
)
502 cl
->avgidle
= cl
->minidle
;
503 cl
->undertime
= q
->now
+ delay
;
508 sched
+= delay
+ cl
->penalty
;
509 cl
->penalized
= sched
;
510 cl
->cpriority
= TC_CBQ_MAXPRIO
;
511 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
513 expires
= ktime_set(0, 0);
514 expires
= ktime_add_ns(expires
, PSCHED_TICKS2NS(sched
));
515 ht
= &q
->delay_timer
.timer
;
516 if (hrtimer_try_to_cancel(ht
) &&
517 ktime_to_ns(ktime_sub(hrtimer_get_expires(ht
),
519 hrtimer_set_expires(ht
, expires
);
522 cl
->xstats
.overactions
++;
527 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
528 q
->wd_expires
= delay
;
531 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
533 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
535 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
537 cl
->penalized
= q
->now
+ cl
->penalty
;
539 if (cl
->cpriority
!= cl
->priority2
) {
540 cl
->cpriority
= cl
->priority2
;
541 q
->pmask
|= (1<<cl
->cpriority
);
542 cl
->xstats
.overactions
++;
547 /* TC_CBQ_OVL_DROP: penalize class by dropping */
549 static void cbq_ovl_drop(struct cbq_class
*cl
)
551 if (cl
->q
->ops
->drop
)
552 if (cl
->q
->ops
->drop(cl
->q
))
554 cl
->xstats
.overactions
++;
558 static psched_tdiff_t
cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
,
561 struct cbq_class
*cl
;
562 struct cbq_class
*cl_prev
= q
->active
[prio
];
563 psched_time_t sched
= now
;
569 cl
= cl_prev
->next_alive
;
570 if (now
- cl
->penalized
> 0) {
571 cl_prev
->next_alive
= cl
->next_alive
;
572 cl
->next_alive
= NULL
;
573 cl
->cpriority
= cl
->priority
;
575 cbq_activate_class(cl
);
577 if (cl
== q
->active
[prio
]) {
578 q
->active
[prio
] = cl_prev
;
579 if (cl
== q
->active
[prio
]) {
580 q
->active
[prio
] = NULL
;
585 cl
= cl_prev
->next_alive
;
586 } else if (sched
- cl
->penalized
> 0)
587 sched
= cl
->penalized
;
588 } while ((cl_prev
= cl
) != q
->active
[prio
]);
593 static enum hrtimer_restart
cbq_undelay(struct hrtimer
*timer
)
595 struct cbq_sched_data
*q
= container_of(timer
, struct cbq_sched_data
,
597 struct Qdisc
*sch
= q
->watchdog
.qdisc
;
599 psched_tdiff_t delay
= 0;
602 now
= psched_get_time();
608 int prio
= ffz(~pmask
);
613 tmp
= cbq_undelay_prio(q
, prio
, now
);
616 if (tmp
< delay
|| delay
== 0)
624 time
= ktime_set(0, 0);
625 time
= ktime_add_ns(time
, PSCHED_TICKS2NS(now
+ delay
));
626 tasklet_hrtimer_start(&q
->delay_timer
, time
, HRTIMER_MODE_ABS
);
629 sch
->flags
&= ~TCQ_F_THROTTLED
;
630 __netif_schedule(qdisc_root(sch
));
631 return HRTIMER_NORESTART
;
634 #ifdef CONFIG_NET_CLS_ACT
635 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
637 struct Qdisc
*sch
= child
->__parent
;
638 struct cbq_sched_data
*q
= qdisc_priv(sch
);
639 struct cbq_class
*cl
= q
->rx_class
;
643 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
646 cbq_mark_toplevel(q
, cl
);
649 cl
->q
->__parent
= sch
;
651 ret
= qdisc_enqueue(skb
, cl
->q
);
652 if (ret
== NET_XMIT_SUCCESS
) {
654 sch
->bstats
.packets
++;
655 sch
->bstats
.bytes
+= qdisc_pkt_len(skb
);
657 cbq_activate_class(cl
);
660 if (net_xmit_drop_count(ret
))
671 It is mission critical procedure.
673 We "regenerate" toplevel cutoff, if transmitting class
674 has backlog and it is not regulated. It is not part of
675 original CBQ description, but looks more reasonable.
676 Probably, it is wrong. This question needs further investigation.
679 static __inline__
void
680 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
681 struct cbq_class
*borrowed
)
683 if (cl
&& q
->toplevel
>= borrowed
->level
) {
684 if (cl
->q
->q
.qlen
> 1) {
686 if (borrowed
->undertime
== PSCHED_PASTPERFECT
) {
687 q
->toplevel
= borrowed
->level
;
690 } while ((borrowed
=borrowed
->borrow
) != NULL
);
693 /* It is not necessary now. Uncommenting it
694 will save CPU cycles, but decrease fairness.
696 q
->toplevel
= TC_CBQ_MAXLEVEL
;
702 cbq_update(struct cbq_sched_data
*q
)
704 struct cbq_class
*this = q
->tx_class
;
705 struct cbq_class
*cl
= this;
710 for ( ; cl
; cl
= cl
->share
) {
711 long avgidle
= cl
->avgidle
;
714 cl
->bstats
.packets
++;
715 cl
->bstats
.bytes
+= len
;
718 (now - last) is total time between packet right edges.
719 (last_pktlen/rate) is "virtual" busy time, so that
721 idle = (now - last) - last_pktlen/rate
724 idle
= q
->now
- cl
->last
;
725 if ((unsigned long)idle
> 128*1024*1024) {
726 avgidle
= cl
->maxidle
;
728 idle
-= L2T(cl
, len
);
730 /* true_avgidle := (1-W)*true_avgidle + W*idle,
731 where W=2^{-ewma_log}. But cl->avgidle is scaled:
732 cl->avgidle == true_avgidle/W,
735 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
739 /* Overlimit or at-limit */
741 if (avgidle
< cl
->minidle
)
742 avgidle
= cl
->minidle
;
744 cl
->avgidle
= avgidle
;
746 /* Calculate expected time, when this class
747 will be allowed to send.
749 (1-W)*true_avgidle + W*delay = 0, i.e.
750 idle = (1/W - 1)*(-true_avgidle)
752 idle = (1 - W)*(-cl->avgidle);
754 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
758 To maintain the rate allocated to the class,
759 we add to undertime virtual clock,
760 necessary to complete transmitted packet.
761 (len/phys_bandwidth has been already passed
762 to the moment of cbq_update)
765 idle
-= L2T(&q
->link
, len
);
766 idle
+= L2T(cl
, len
);
768 cl
->undertime
= q
->now
+ idle
;
772 cl
->undertime
= PSCHED_PASTPERFECT
;
773 if (avgidle
> cl
->maxidle
)
774 cl
->avgidle
= cl
->maxidle
;
776 cl
->avgidle
= avgidle
;
781 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
784 static __inline__
struct cbq_class
*
785 cbq_under_limit(struct cbq_class
*cl
)
787 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
788 struct cbq_class
*this_cl
= cl
;
790 if (cl
->tparent
== NULL
)
793 if (cl
->undertime
== PSCHED_PASTPERFECT
|| q
->now
>= cl
->undertime
) {
799 /* It is very suspicious place. Now overlimit
800 action is generated for not bounded classes
801 only if link is completely congested.
802 Though it is in agree with ancestor-only paradigm,
803 it looks very stupid. Particularly,
804 it means that this chunk of code will either
805 never be called or result in strong amplification
806 of burstiness. Dangerous, silly, and, however,
807 no another solution exists.
809 if ((cl
= cl
->borrow
) == NULL
) {
810 this_cl
->qstats
.overlimits
++;
811 this_cl
->overlimit(this_cl
);
814 if (cl
->level
> q
->toplevel
)
816 } while (cl
->undertime
!= PSCHED_PASTPERFECT
&& q
->now
< cl
->undertime
);
822 static __inline__
struct sk_buff
*
823 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
825 struct cbq_sched_data
*q
= qdisc_priv(sch
);
826 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
830 cl_tail
= cl_prev
= q
->active
[prio
];
831 cl
= cl_prev
->next_alive
;
838 struct cbq_class
*borrow
= cl
;
841 (borrow
= cbq_under_limit(cl
)) == NULL
)
844 if (cl
->deficit
<= 0) {
845 /* Class exhausted its allotment per
846 this round. Switch to the next one.
849 cl
->deficit
+= cl
->quantum
;
853 skb
= cl
->q
->dequeue(cl
->q
);
855 /* Class did not give us any skb :-(
856 It could occur even if cl->q->q.qlen != 0
857 f.e. if cl->q == "tbf"
862 cl
->deficit
-= qdisc_pkt_len(skb
);
864 q
->tx_borrowed
= borrow
;
866 #ifndef CBQ_XSTATS_BORROWS_BYTES
867 borrow
->xstats
.borrows
++;
868 cl
->xstats
.borrows
++;
870 borrow
->xstats
.borrows
+= qdisc_pkt_len(skb
);
871 cl
->xstats
.borrows
+= qdisc_pkt_len(skb
);
874 q
->tx_len
= qdisc_pkt_len(skb
);
876 if (cl
->deficit
<= 0) {
877 q
->active
[prio
] = cl
;
879 cl
->deficit
+= cl
->quantum
;
884 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
885 /* Class is empty or penalized.
886 Unlink it from active chain.
888 cl_prev
->next_alive
= cl
->next_alive
;
889 cl
->next_alive
= NULL
;
891 /* Did cl_tail point to it? */
896 /* Was it the last class in this band? */
899 q
->active
[prio
] = NULL
;
900 q
->activemask
&= ~(1<<prio
);
902 cbq_activate_class(cl
);
906 q
->active
[prio
] = cl_tail
;
909 cbq_activate_class(cl
);
917 } while (cl_prev
!= cl_tail
);
920 q
->active
[prio
] = cl_prev
;
925 static __inline__
struct sk_buff
*
926 cbq_dequeue_1(struct Qdisc
*sch
)
928 struct cbq_sched_data
*q
= qdisc_priv(sch
);
932 activemask
= q
->activemask
&0xFF;
934 int prio
= ffz(~activemask
);
935 activemask
&= ~(1<<prio
);
936 skb
= cbq_dequeue_prio(sch
, prio
);
943 static struct sk_buff
*
944 cbq_dequeue(struct Qdisc
*sch
)
947 struct cbq_sched_data
*q
= qdisc_priv(sch
);
951 now
= psched_get_time();
952 incr
= now
- q
->now_rt
;
955 psched_tdiff_t incr2
;
956 /* Time integrator. We calculate EOS time
957 by adding expected packet transmission time.
958 If real time is greater, we warp artificial clock,
961 cbq_time = max(real_time, work);
963 incr2
= L2T(&q
->link
, q
->tx_len
);
966 if ((incr
-= incr2
) < 0)
975 skb
= cbq_dequeue_1(sch
);
978 sch
->flags
&= ~TCQ_F_THROTTLED
;
982 /* All the classes are overlimit.
986 1. Scheduler is empty.
987 2. Toplevel cutoff inhibited borrowing.
988 3. Root class is overlimit.
990 Reset 2d and 3d conditions and retry.
992 Note, that NS and cbq-2.0 are buggy, peeking
993 an arbitrary class is appropriate for ancestor-only
994 sharing, but not for toplevel algorithm.
996 Our version is better, but slower, because it requires
997 two passes, but it is unavoidable with top-level sharing.
1000 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1001 q
->link
.undertime
== PSCHED_PASTPERFECT
)
1004 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1005 q
->link
.undertime
= PSCHED_PASTPERFECT
;
1008 /* No packets in scheduler or nobody wants to give them to us :-(
1009 Sigh... start watchdog timer in the last case. */
1012 sch
->qstats
.overlimits
++;
1014 qdisc_watchdog_schedule(&q
->watchdog
,
1015 now
+ q
->wd_expires
);
1020 /* CBQ class maintanance routines */
1022 static void cbq_adjust_levels(struct cbq_class
*this)
1029 struct cbq_class
*cl
;
1031 if ((cl
= this->children
) != NULL
) {
1033 if (cl
->level
> level
)
1035 } while ((cl
= cl
->sibling
) != this->children
);
1037 this->level
= level
+1;
1038 } while ((this = this->tparent
) != NULL
);
1041 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1043 struct cbq_class
*cl
;
1044 struct hlist_node
*n
;
1047 if (q
->quanta
[prio
] == 0)
1050 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1051 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1052 /* BUGGGG... Beware! This expression suffer of
1053 arithmetic overflows!
1055 if (cl
->priority
== prio
) {
1056 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1059 if (cl
->quantum
<= 0 || cl
->quantum
>32*qdisc_dev(cl
->qdisc
)->mtu
) {
1060 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->common
.classid
, cl
->quantum
);
1061 cl
->quantum
= qdisc_dev(cl
->qdisc
)->mtu
/2 + 1;
1067 static void cbq_sync_defmap(struct cbq_class
*cl
)
1069 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1070 struct cbq_class
*split
= cl
->split
;
1077 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1078 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1079 split
->defaults
[i
] = NULL
;
1082 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1083 int level
= split
->level
;
1085 if (split
->defaults
[i
])
1088 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1089 struct hlist_node
*n
;
1090 struct cbq_class
*c
;
1092 hlist_for_each_entry(c
, n
, &q
->clhash
.hash
[h
],
1094 if (c
->split
== split
&& c
->level
< level
&&
1096 split
->defaults
[i
] = c
;
1104 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1106 struct cbq_class
*split
= NULL
;
1109 if ((split
= cl
->split
) == NULL
)
1111 splitid
= split
->common
.classid
;
1114 if (split
== NULL
|| split
->common
.classid
!= splitid
) {
1115 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1116 if (split
->common
.classid
== splitid
)
1123 if (cl
->split
!= split
) {
1125 cbq_sync_defmap(cl
);
1127 cl
->defmap
= def
&mask
;
1129 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1131 cbq_sync_defmap(cl
);
1134 static void cbq_unlink_class(struct cbq_class
*this)
1136 struct cbq_class
*cl
, **clp
;
1137 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1139 qdisc_class_hash_remove(&q
->clhash
, &this->common
);
1141 if (this->tparent
) {
1150 } while ((cl
= *clp
) != this->sibling
);
1152 if (this->tparent
->children
== this) {
1153 this->tparent
->children
= this->sibling
;
1154 if (this->sibling
== this)
1155 this->tparent
->children
= NULL
;
1158 WARN_ON(this->sibling
!= this);
1162 static void cbq_link_class(struct cbq_class
*this)
1164 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1165 struct cbq_class
*parent
= this->tparent
;
1167 this->sibling
= this;
1168 qdisc_class_hash_insert(&q
->clhash
, &this->common
);
1173 if (parent
->children
== NULL
) {
1174 parent
->children
= this;
1176 this->sibling
= parent
->children
->sibling
;
1177 parent
->children
->sibling
= this;
1181 static unsigned int cbq_drop(struct Qdisc
* sch
)
1183 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1184 struct cbq_class
*cl
, *cl_head
;
1188 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1189 if ((cl_head
= q
->active
[prio
]) == NULL
)
1194 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1197 cbq_deactivate_class(cl
);
1200 } while ((cl
= cl
->next_alive
) != cl_head
);
1206 cbq_reset(struct Qdisc
* sch
)
1208 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1209 struct cbq_class
*cl
;
1210 struct hlist_node
*n
;
1217 q
->tx_borrowed
= NULL
;
1218 qdisc_watchdog_cancel(&q
->watchdog
);
1219 tasklet_hrtimer_cancel(&q
->delay_timer
);
1220 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1221 q
->now
= psched_get_time();
1224 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1225 q
->active
[prio
] = NULL
;
1227 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1228 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1231 cl
->next_alive
= NULL
;
1232 cl
->undertime
= PSCHED_PASTPERFECT
;
1233 cl
->avgidle
= cl
->maxidle
;
1234 cl
->deficit
= cl
->quantum
;
1235 cl
->cpriority
= cl
->priority
;
1242 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1244 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1245 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1246 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1248 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1249 cl
->ewma_log
= lss
->ewma_log
;
1250 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1251 cl
->avpkt
= lss
->avpkt
;
1252 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1253 cl
->minidle
= -(long)lss
->minidle
;
1254 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1255 cl
->maxidle
= lss
->maxidle
;
1256 cl
->avgidle
= lss
->maxidle
;
1258 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1259 cl
->offtime
= lss
->offtime
;
1263 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1265 q
->nclasses
[cl
->priority
]--;
1266 q
->quanta
[cl
->priority
] -= cl
->weight
;
1267 cbq_normalize_quanta(q
, cl
->priority
);
1270 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1272 q
->nclasses
[cl
->priority
]++;
1273 q
->quanta
[cl
->priority
] += cl
->weight
;
1274 cbq_normalize_quanta(q
, cl
->priority
);
1277 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1279 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1282 cl
->allot
= wrr
->allot
;
1284 cl
->weight
= wrr
->weight
;
1285 if (wrr
->priority
) {
1286 cl
->priority
= wrr
->priority
-1;
1287 cl
->cpriority
= cl
->priority
;
1288 if (cl
->priority
>= cl
->priority2
)
1289 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1296 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1298 switch (ovl
->strategy
) {
1299 case TC_CBQ_OVL_CLASSIC
:
1300 cl
->overlimit
= cbq_ovl_classic
;
1302 case TC_CBQ_OVL_DELAY
:
1303 cl
->overlimit
= cbq_ovl_delay
;
1305 case TC_CBQ_OVL_LOWPRIO
:
1306 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1307 ovl
->priority2
-1 <= cl
->priority
)
1309 cl
->priority2
= ovl
->priority2
-1;
1310 cl
->overlimit
= cbq_ovl_lowprio
;
1312 case TC_CBQ_OVL_DROP
:
1313 cl
->overlimit
= cbq_ovl_drop
;
1315 case TC_CBQ_OVL_RCLASSIC
:
1316 cl
->overlimit
= cbq_ovl_rclassic
;
1321 cl
->penalty
= ovl
->penalty
;
1325 #ifdef CONFIG_NET_CLS_ACT
1326 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1328 cl
->police
= p
->police
;
1330 if (cl
->q
->handle
) {
1331 if (p
->police
== TC_POLICE_RECLASSIFY
)
1332 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1334 cl
->q
->reshape_fail
= NULL
;
1340 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1342 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1346 static const struct nla_policy cbq_policy
[TCA_CBQ_MAX
+ 1] = {
1347 [TCA_CBQ_LSSOPT
] = { .len
= sizeof(struct tc_cbq_lssopt
) },
1348 [TCA_CBQ_WRROPT
] = { .len
= sizeof(struct tc_cbq_wrropt
) },
1349 [TCA_CBQ_FOPT
] = { .len
= sizeof(struct tc_cbq_fopt
) },
1350 [TCA_CBQ_OVL_STRATEGY
] = { .len
= sizeof(struct tc_cbq_ovl
) },
1351 [TCA_CBQ_RATE
] = { .len
= sizeof(struct tc_ratespec
) },
1352 [TCA_CBQ_RTAB
] = { .type
= NLA_BINARY
, .len
= TC_RTAB_SIZE
},
1353 [TCA_CBQ_POLICE
] = { .len
= sizeof(struct tc_cbq_police
) },
1356 static int cbq_init(struct Qdisc
*sch
, struct nlattr
*opt
)
1358 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1359 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1360 struct tc_ratespec
*r
;
1363 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1367 if (tb
[TCA_CBQ_RTAB
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
)
1370 r
= nla_data(tb
[TCA_CBQ_RATE
]);
1372 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
])) == NULL
)
1375 err
= qdisc_class_hash_init(&q
->clhash
);
1380 q
->link
.sibling
= &q
->link
;
1381 q
->link
.common
.classid
= sch
->handle
;
1382 q
->link
.qdisc
= sch
;
1383 if (!(q
->link
.q
= qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1386 q
->link
.q
= &noop_qdisc
;
1388 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1389 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1390 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1391 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1392 q
->link
.overlimit
= cbq_ovl_classic
;
1393 q
->link
.allot
= psched_mtu(qdisc_dev(sch
));
1394 q
->link
.quantum
= q
->link
.allot
;
1395 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1397 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1398 q
->link
.avpkt
= q
->link
.allot
/2;
1399 q
->link
.minidle
= -0x7FFFFFFF;
1401 qdisc_watchdog_init(&q
->watchdog
, sch
);
1402 tasklet_hrtimer_init(&q
->delay_timer
, cbq_undelay
,
1403 CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
1404 q
->delay_timer
.function
= cbq_undelay
;
1405 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1406 q
->now
= psched_get_time();
1409 cbq_link_class(&q
->link
);
1411 if (tb
[TCA_CBQ_LSSOPT
])
1412 cbq_set_lss(&q
->link
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1414 cbq_addprio(q
, &q
->link
);
1418 qdisc_put_rtab(q
->link
.R_tab
);
1422 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1424 unsigned char *b
= skb_tail_pointer(skb
);
1426 NLA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1434 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1436 unsigned char *b
= skb_tail_pointer(skb
);
1437 struct tc_cbq_lssopt opt
;
1440 if (cl
->borrow
== NULL
)
1441 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1442 if (cl
->share
== NULL
)
1443 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1444 opt
.ewma_log
= cl
->ewma_log
;
1445 opt
.level
= cl
->level
;
1446 opt
.avpkt
= cl
->avpkt
;
1447 opt
.maxidle
= cl
->maxidle
;
1448 opt
.minidle
= (u32
)(-cl
->minidle
);
1449 opt
.offtime
= cl
->offtime
;
1451 NLA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1459 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1461 unsigned char *b
= skb_tail_pointer(skb
);
1462 struct tc_cbq_wrropt opt
;
1465 opt
.allot
= cl
->allot
;
1466 opt
.priority
= cl
->priority
+1;
1467 opt
.cpriority
= cl
->cpriority
+1;
1468 opt
.weight
= cl
->weight
;
1469 NLA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1477 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1479 unsigned char *b
= skb_tail_pointer(skb
);
1480 struct tc_cbq_ovl opt
;
1482 opt
.strategy
= cl
->ovl_strategy
;
1483 opt
.priority2
= cl
->priority2
+1;
1485 opt
.penalty
= cl
->penalty
;
1486 NLA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1494 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1496 unsigned char *b
= skb_tail_pointer(skb
);
1497 struct tc_cbq_fopt opt
;
1499 if (cl
->split
|| cl
->defmap
) {
1500 opt
.split
= cl
->split
? cl
->split
->common
.classid
: 0;
1501 opt
.defmap
= cl
->defmap
;
1503 NLA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1512 #ifdef CONFIG_NET_CLS_ACT
1513 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1515 unsigned char *b
= skb_tail_pointer(skb
);
1516 struct tc_cbq_police opt
;
1519 opt
.police
= cl
->police
;
1522 NLA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1532 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1534 if (cbq_dump_lss(skb
, cl
) < 0 ||
1535 cbq_dump_rate(skb
, cl
) < 0 ||
1536 cbq_dump_wrr(skb
, cl
) < 0 ||
1537 cbq_dump_ovl(skb
, cl
) < 0 ||
1538 #ifdef CONFIG_NET_CLS_ACT
1539 cbq_dump_police(skb
, cl
) < 0 ||
1541 cbq_dump_fopt(skb
, cl
) < 0)
1546 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1548 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1549 struct nlattr
*nest
;
1551 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1553 goto nla_put_failure
;
1554 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1555 goto nla_put_failure
;
1556 nla_nest_end(skb
, nest
);
1560 nla_nest_cancel(skb
, nest
);
1565 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1567 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1569 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1570 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1574 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1575 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1577 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1578 struct nlattr
*nest
;
1581 tcm
->tcm_parent
= cl
->tparent
->common
.classid
;
1583 tcm
->tcm_parent
= TC_H_ROOT
;
1584 tcm
->tcm_handle
= cl
->common
.classid
;
1585 tcm
->tcm_info
= cl
->q
->handle
;
1587 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1589 goto nla_put_failure
;
1590 if (cbq_dump_attr(skb
, cl
) < 0)
1591 goto nla_put_failure
;
1592 nla_nest_end(skb
, nest
);
1596 nla_nest_cancel(skb
, nest
);
1601 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1602 struct gnet_dump
*d
)
1604 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1605 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1607 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1608 cl
->xstats
.avgidle
= cl
->avgidle
;
1609 cl
->xstats
.undertime
= 0;
1611 if (cl
->undertime
!= PSCHED_PASTPERFECT
)
1612 cl
->xstats
.undertime
= cl
->undertime
- q
->now
;
1614 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1615 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1616 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1619 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1622 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1625 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1629 new = qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1631 cl
->common
.classid
);
1635 #ifdef CONFIG_NET_CLS_ACT
1636 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1637 new->reshape_fail
= cbq_reshape_fail
;
1643 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1645 sch_tree_unlock(sch
);
1652 static struct Qdisc
*
1653 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1655 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1657 return cl
? cl
->q
: NULL
;
1660 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1662 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1664 if (cl
->q
->q
.qlen
== 0)
1665 cbq_deactivate_class(cl
);
1668 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1670 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1671 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1675 return (unsigned long)cl
;
1680 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1682 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1684 WARN_ON(cl
->filters
);
1686 tcf_destroy_chain(&cl
->filter_list
);
1687 qdisc_destroy(cl
->q
);
1688 qdisc_put_rtab(cl
->R_tab
);
1689 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1695 cbq_destroy(struct Qdisc
* sch
)
1697 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1698 struct hlist_node
*n
, *next
;
1699 struct cbq_class
*cl
;
1702 #ifdef CONFIG_NET_CLS_ACT
1706 * Filters must be destroyed first because we don't destroy the
1707 * classes from root to leafs which means that filters can still
1708 * be bound to classes which have been destroyed already. --TGR '04
1710 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1711 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
)
1712 tcf_destroy_chain(&cl
->filter_list
);
1714 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1715 hlist_for_each_entry_safe(cl
, n
, next
, &q
->clhash
.hash
[h
],
1717 cbq_destroy_class(sch
, cl
);
1719 qdisc_class_hash_destroy(&q
->clhash
);
1722 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1724 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1726 if (--cl
->refcnt
== 0) {
1727 #ifdef CONFIG_NET_CLS_ACT
1728 spinlock_t
*root_lock
= qdisc_root_sleeping_lock(sch
);
1729 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1731 spin_lock_bh(root_lock
);
1732 if (q
->rx_class
== cl
)
1734 spin_unlock_bh(root_lock
);
1737 cbq_destroy_class(sch
, cl
);
1742 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct nlattr
**tca
,
1746 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1747 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1748 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
1749 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1750 struct cbq_class
*parent
;
1751 struct qdisc_rate_table
*rtab
= NULL
;
1756 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1764 cl
->tparent
->common
.classid
!= parentid
)
1766 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1770 if (tb
[TCA_CBQ_RATE
]) {
1771 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]),
1777 if (tca
[TCA_RATE
]) {
1778 err
= gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1779 qdisc_root_sleeping_lock(sch
),
1783 qdisc_put_rtab(rtab
);
1788 /* Change class parameters */
1791 if (cl
->next_alive
!= NULL
)
1792 cbq_deactivate_class(cl
);
1795 qdisc_put_rtab(cl
->R_tab
);
1799 if (tb
[TCA_CBQ_LSSOPT
])
1800 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1802 if (tb
[TCA_CBQ_WRROPT
]) {
1804 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1807 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1808 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1810 #ifdef CONFIG_NET_CLS_ACT
1811 if (tb
[TCA_CBQ_POLICE
])
1812 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1815 if (tb
[TCA_CBQ_FOPT
])
1816 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1819 cbq_activate_class(cl
);
1821 sch_tree_unlock(sch
);
1826 if (parentid
== TC_H_ROOT
)
1829 if (tb
[TCA_CBQ_WRROPT
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
||
1830 tb
[TCA_CBQ_LSSOPT
] == NULL
)
1833 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1839 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1843 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1845 for (i
=0; i
<0x8000; i
++) {
1846 if (++q
->hgenerator
>= 0x8000)
1848 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1854 classid
= classid
|q
->hgenerator
;
1859 parent
= cbq_class_lookup(q
, parentid
);
1866 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1870 if (tca
[TCA_RATE
]) {
1871 err
= gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1872 qdisc_root_sleeping_lock(sch
),
1883 if (!(cl
->q
= qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1884 &pfifo_qdisc_ops
, classid
)))
1885 cl
->q
= &noop_qdisc
;
1886 cl
->common
.classid
= classid
;
1887 cl
->tparent
= parent
;
1889 cl
->allot
= parent
->allot
;
1890 cl
->quantum
= cl
->allot
;
1891 cl
->weight
= cl
->R_tab
->rate
.rate
;
1895 cl
->borrow
= cl
->tparent
;
1896 if (cl
->tparent
!= &q
->link
)
1897 cl
->share
= cl
->tparent
;
1898 cbq_adjust_levels(parent
);
1899 cl
->minidle
= -0x7FFFFFFF;
1900 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1901 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1902 if (cl
->ewma_log
==0)
1903 cl
->ewma_log
= q
->link
.ewma_log
;
1905 cl
->maxidle
= q
->link
.maxidle
;
1907 cl
->avpkt
= q
->link
.avpkt
;
1908 cl
->overlimit
= cbq_ovl_classic
;
1909 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1910 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1911 #ifdef CONFIG_NET_CLS_ACT
1912 if (tb
[TCA_CBQ_POLICE
])
1913 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1915 if (tb
[TCA_CBQ_FOPT
])
1916 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1917 sch_tree_unlock(sch
);
1919 qdisc_class_hash_grow(sch
, &q
->clhash
);
1921 *arg
= (unsigned long)cl
;
1925 qdisc_put_rtab(rtab
);
1929 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1931 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1932 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1935 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1940 qlen
= cl
->q
->q
.qlen
;
1942 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
1945 cbq_deactivate_class(cl
);
1947 if (q
->tx_borrowed
== cl
)
1948 q
->tx_borrowed
= q
->tx_class
;
1949 if (q
->tx_class
== cl
) {
1951 q
->tx_borrowed
= NULL
;
1953 #ifdef CONFIG_NET_CLS_ACT
1954 if (q
->rx_class
== cl
)
1958 cbq_unlink_class(cl
);
1959 cbq_adjust_levels(cl
->tparent
);
1961 cbq_sync_defmap(cl
);
1964 sch_tree_unlock(sch
);
1966 BUG_ON(--cl
->refcnt
== 0);
1968 * This shouldn't happen: we "hold" one cops->get() when called
1969 * from tc_ctl_tclass; the destroy method is done from cops->put().
1975 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
1977 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1978 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1983 return &cl
->filter_list
;
1986 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
1989 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1990 struct cbq_class
*p
= (struct cbq_class
*)parent
;
1991 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1994 if (p
&& p
->level
<= cl
->level
)
1997 return (unsigned long)cl
;
2002 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2004 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2009 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2011 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2012 struct cbq_class
*cl
;
2013 struct hlist_node
*n
;
2019 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
2020 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
2021 if (arg
->count
< arg
->skip
) {
2025 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2034 static const struct Qdisc_class_ops cbq_class_ops
= {
2037 .qlen_notify
= cbq_qlen_notify
,
2040 .change
= cbq_change_class
,
2041 .delete = cbq_delete
,
2043 .tcf_chain
= cbq_find_tcf
,
2044 .bind_tcf
= cbq_bind_filter
,
2045 .unbind_tcf
= cbq_unbind_filter
,
2046 .dump
= cbq_dump_class
,
2047 .dump_stats
= cbq_dump_class_stats
,
2050 static struct Qdisc_ops cbq_qdisc_ops __read_mostly
= {
2052 .cl_ops
= &cbq_class_ops
,
2054 .priv_size
= sizeof(struct cbq_sched_data
),
2055 .enqueue
= cbq_enqueue
,
2056 .dequeue
= cbq_dequeue
,
2057 .peek
= qdisc_peek_dequeued
,
2061 .destroy
= cbq_destroy
,
2064 .dump_stats
= cbq_dump_stats
,
2065 .owner
= THIS_MODULE
,
2068 static int __init
cbq_module_init(void)
2070 return register_qdisc(&cbq_qdisc_ops
);
2072 static void __exit
cbq_module_exit(void)
2074 unregister_qdisc(&cbq_qdisc_ops
);
2076 module_init(cbq_module_init
)
2077 module_exit(cbq_module_exit
)
2078 MODULE_LICENSE("GPL");