2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data
;
76 struct cbq_class
*next
; /* hash table link */
77 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
81 unsigned char priority
; /* class priority */
82 unsigned char priority2
; /* priority to be used after overlimit */
83 unsigned char ewma_log
; /* time constant for idle time calculation */
84 unsigned char ovl_strategy
;
85 #ifdef CONFIG_NET_CLS_ACT
91 /* Link-sharing scheduler parameters */
92 long maxidle
; /* Class parameters: see below. */
96 struct qdisc_rate_table
*R_tab
;
98 /* Overlimit strategy parameters */
99 void (*overlimit
)(struct cbq_class
*cl
);
100 psched_tdiff_t penalty
;
102 /* General scheduler (WRR) parameters */
104 long quantum
; /* Allotment per WRR round */
105 long weight
; /* Relative allotment: see below */
107 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
108 struct cbq_class
*split
; /* Ptr to split node */
109 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
110 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
111 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
113 struct cbq_class
*sibling
; /* Sibling chain */
114 struct cbq_class
*children
; /* Pointer to children chain */
116 struct Qdisc
*q
; /* Elementary queueing discipline */
120 unsigned char cpriority
; /* Effective priority */
121 unsigned char delayed
;
122 unsigned char level
; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
127 psched_time_t last
; /* Last end of service */
128 psched_time_t undertime
;
130 long deficit
; /* Saved deficit for WRR */
131 psched_time_t penalized
;
132 struct gnet_stats_basic bstats
;
133 struct gnet_stats_queue qstats
;
134 struct gnet_stats_rate_est rate_est
;
135 struct tc_cbq_xstats xstats
;
137 struct tcf_proto
*filter_list
;
142 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
145 struct cbq_sched_data
147 struct cbq_class
*classes
[16]; /* Hash table of all classes */
148 int nclasses
[TC_CBQ_MAXPRIO
+1];
149 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
151 struct cbq_class link
;
154 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
157 #ifdef CONFIG_NET_CLS_ACT
158 struct cbq_class
*rx_class
;
160 struct cbq_class
*tx_class
;
161 struct cbq_class
*tx_borrowed
;
163 psched_time_t now
; /* Cached timestamp */
164 psched_time_t now_rt
; /* Cached real time */
167 struct hrtimer delay_timer
;
168 struct qdisc_watchdog watchdog
; /* Watchdog timer,
172 psched_tdiff_t wd_expires
;
178 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
181 static __inline__
unsigned cbq_hash(u32 h
)
188 static __inline__
struct cbq_class
*
189 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
191 struct cbq_class
*cl
;
193 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
194 if (cl
->classid
== classid
)
199 #ifdef CONFIG_NET_CLS_ACT
201 static struct cbq_class
*
202 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
204 struct cbq_class
*cl
, *new;
206 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
207 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
215 /* Classify packet. The procedure is pretty complicated, but
216 it allows us to combine link sharing and priority scheduling
219 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220 so that it resolves to split nodes. Then packets are classified
221 by logical priority, or a more specific classifier may be attached
225 static struct cbq_class
*
226 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
228 struct cbq_sched_data
*q
= qdisc_priv(sch
);
229 struct cbq_class
*head
= &q
->link
;
230 struct cbq_class
**defmap
;
231 struct cbq_class
*cl
= NULL
;
232 u32 prio
= skb
->priority
;
233 struct tcf_result res
;
236 * Step 1. If skb->priority points to one of our classes, use it.
238 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
239 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
242 *qerr
= NET_XMIT_BYPASS
;
245 defmap
= head
->defaults
;
248 * Step 2+n. Apply classifier.
250 if (!head
->filter_list
||
251 (result
= tc_classify_compat(skb
, head
->filter_list
, &res
)) < 0)
254 if ((cl
= (void*)res
.class) == NULL
) {
255 if (TC_H_MAJ(res
.classid
))
256 cl
= cbq_class_lookup(q
, res
.classid
);
257 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
258 cl
= defmap
[TC_PRIO_BESTEFFORT
];
260 if (cl
== NULL
|| cl
->level
>= head
->level
)
264 #ifdef CONFIG_NET_CLS_ACT
268 *qerr
= NET_XMIT_SUCCESS
;
271 case TC_ACT_RECLASSIFY
:
272 return cbq_reclassify(skb
, cl
);
279 * Step 3+n. If classifier selected a link sharing class,
280 * apply agency specific classifier.
281 * Repeat this procdure until we hit a leaf node.
290 * Step 4. No success...
292 if (TC_H_MAJ(prio
) == 0 &&
293 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
294 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
301 A packet has just been enqueued on the empty class.
302 cbq_activate_class adds it to the tail of active class list
303 of its priority band.
306 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
308 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
309 int prio
= cl
->cpriority
;
310 struct cbq_class
*cl_tail
;
312 cl_tail
= q
->active
[prio
];
313 q
->active
[prio
] = cl
;
315 if (cl_tail
!= NULL
) {
316 cl
->next_alive
= cl_tail
->next_alive
;
317 cl_tail
->next_alive
= cl
;
320 q
->activemask
|= (1<<prio
);
325 Unlink class from active chain.
326 Note that this same procedure is done directly in cbq_dequeue*
327 during round-robin procedure.
330 static void cbq_deactivate_class(struct cbq_class
*this)
332 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
333 int prio
= this->cpriority
;
334 struct cbq_class
*cl
;
335 struct cbq_class
*cl_prev
= q
->active
[prio
];
338 cl
= cl_prev
->next_alive
;
340 cl_prev
->next_alive
= cl
->next_alive
;
341 cl
->next_alive
= NULL
;
343 if (cl
== q
->active
[prio
]) {
344 q
->active
[prio
] = cl_prev
;
345 if (cl
== q
->active
[prio
]) {
346 q
->active
[prio
] = NULL
;
347 q
->activemask
&= ~(1<<prio
);
353 } while ((cl_prev
= cl
) != q
->active
[prio
]);
357 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
359 int toplevel
= q
->toplevel
;
361 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
365 now
= psched_get_time();
366 incr
= now
- q
->now_rt
;
370 if (cl
->undertime
< now
) {
371 q
->toplevel
= cl
->level
;
374 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
379 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
381 struct cbq_sched_data
*q
= qdisc_priv(sch
);
383 int uninitialized_var(ret
);
384 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
386 #ifdef CONFIG_NET_CLS_ACT
390 if (ret
== NET_XMIT_BYPASS
)
396 #ifdef CONFIG_NET_CLS_ACT
397 cl
->q
->__parent
= sch
;
399 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == NET_XMIT_SUCCESS
) {
401 sch
->bstats
.packets
++;
402 sch
->bstats
.bytes
+=len
;
403 cbq_mark_toplevel(q
, cl
);
405 cbq_activate_class(cl
);
410 cbq_mark_toplevel(q
, cl
);
416 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
418 struct cbq_sched_data
*q
= qdisc_priv(sch
);
419 struct cbq_class
*cl
;
422 if ((cl
= q
->tx_class
) == NULL
) {
429 cbq_mark_toplevel(q
, cl
);
431 #ifdef CONFIG_NET_CLS_ACT
433 cl
->q
->__parent
= sch
;
435 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
437 sch
->qstats
.requeues
++;
439 cbq_activate_class(cl
);
447 /* Overlimit actions */
449 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
451 static void cbq_ovl_classic(struct cbq_class
*cl
)
453 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
454 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
457 delay
+= cl
->offtime
;
460 Class goes to sleep, so that it will have no
461 chance to work avgidle. Let's forgive it 8)
463 BTW cbq-2.0 has a crap in this
464 place, apparently they forgot to shift it by cl->ewma_log.
467 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
468 if (cl
->avgidle
< cl
->minidle
)
469 cl
->avgidle
= cl
->minidle
;
472 cl
->undertime
= q
->now
+ delay
;
474 cl
->xstats
.overactions
++;
477 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
478 q
->wd_expires
= delay
;
480 /* Dirty work! We must schedule wakeups based on
481 real available rate, rather than leaf rate,
482 which may be tiny (even zero).
484 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
486 psched_tdiff_t base_delay
= q
->wd_expires
;
488 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
489 delay
= b
->undertime
- q
->now
;
490 if (delay
< base_delay
) {
497 q
->wd_expires
= base_delay
;
501 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
505 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
507 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
508 struct cbq_class
*this = cl
;
511 if (cl
->level
> q
->toplevel
) {
515 } while ((cl
= cl
->borrow
) != NULL
);
522 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
524 static void cbq_ovl_delay(struct cbq_class
*cl
)
526 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
527 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
530 psched_time_t sched
= q
->now
;
533 delay
+= cl
->offtime
;
535 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
536 if (cl
->avgidle
< cl
->minidle
)
537 cl
->avgidle
= cl
->minidle
;
538 cl
->undertime
= q
->now
+ delay
;
541 sched
+= delay
+ cl
->penalty
;
542 cl
->penalized
= sched
;
543 cl
->cpriority
= TC_CBQ_MAXPRIO
;
544 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
546 expires
= ktime_set(0, 0);
547 expires
= ktime_add_ns(expires
, PSCHED_US2NS(sched
));
548 if (hrtimer_try_to_cancel(&q
->delay_timer
) &&
549 ktime_to_ns(ktime_sub(q
->delay_timer
.expires
,
551 q
->delay_timer
.expires
= expires
;
552 hrtimer_restart(&q
->delay_timer
);
554 cl
->xstats
.overactions
++;
559 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
560 q
->wd_expires
= delay
;
563 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
565 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
567 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
569 cl
->penalized
= q
->now
+ cl
->penalty
;
571 if (cl
->cpriority
!= cl
->priority2
) {
572 cl
->cpriority
= cl
->priority2
;
573 q
->pmask
|= (1<<cl
->cpriority
);
574 cl
->xstats
.overactions
++;
579 /* TC_CBQ_OVL_DROP: penalize class by dropping */
581 static void cbq_ovl_drop(struct cbq_class
*cl
)
583 if (cl
->q
->ops
->drop
)
584 if (cl
->q
->ops
->drop(cl
->q
))
586 cl
->xstats
.overactions
++;
590 static psched_tdiff_t
cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
,
593 struct cbq_class
*cl
;
594 struct cbq_class
*cl_prev
= q
->active
[prio
];
595 psched_time_t sched
= now
;
601 cl
= cl_prev
->next_alive
;
602 if (now
- cl
->penalized
> 0) {
603 cl_prev
->next_alive
= cl
->next_alive
;
604 cl
->next_alive
= NULL
;
605 cl
->cpriority
= cl
->priority
;
607 cbq_activate_class(cl
);
609 if (cl
== q
->active
[prio
]) {
610 q
->active
[prio
] = cl_prev
;
611 if (cl
== q
->active
[prio
]) {
612 q
->active
[prio
] = NULL
;
617 cl
= cl_prev
->next_alive
;
618 } else if (sched
- cl
->penalized
> 0)
619 sched
= cl
->penalized
;
620 } while ((cl_prev
= cl
) != q
->active
[prio
]);
625 static enum hrtimer_restart
cbq_undelay(struct hrtimer
*timer
)
627 struct cbq_sched_data
*q
= container_of(timer
, struct cbq_sched_data
,
629 struct Qdisc
*sch
= q
->watchdog
.qdisc
;
631 psched_tdiff_t delay
= 0;
634 now
= psched_get_time();
640 int prio
= ffz(~pmask
);
645 tmp
= cbq_undelay_prio(q
, prio
, now
);
648 if (tmp
< delay
|| delay
== 0)
656 time
= ktime_set(0, 0);
657 time
= ktime_add_ns(time
, PSCHED_US2NS(now
+ delay
));
658 hrtimer_start(&q
->delay_timer
, time
, HRTIMER_MODE_ABS
);
661 sch
->flags
&= ~TCQ_F_THROTTLED
;
662 netif_schedule(sch
->dev
);
663 return HRTIMER_NORESTART
;
666 #ifdef CONFIG_NET_CLS_ACT
667 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
670 struct Qdisc
*sch
= child
->__parent
;
671 struct cbq_sched_data
*q
= qdisc_priv(sch
);
672 struct cbq_class
*cl
= q
->rx_class
;
676 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
678 cbq_mark_toplevel(q
, cl
);
681 cl
->q
->__parent
= sch
;
683 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
685 sch
->bstats
.packets
++;
686 sch
->bstats
.bytes
+=len
;
688 cbq_activate_class(cl
);
701 It is mission critical procedure.
703 We "regenerate" toplevel cutoff, if transmitting class
704 has backlog and it is not regulated. It is not part of
705 original CBQ description, but looks more reasonable.
706 Probably, it is wrong. This question needs further investigation.
709 static __inline__
void
710 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
711 struct cbq_class
*borrowed
)
713 if (cl
&& q
->toplevel
>= borrowed
->level
) {
714 if (cl
->q
->q
.qlen
> 1) {
716 if (borrowed
->undertime
== PSCHED_PASTPERFECT
) {
717 q
->toplevel
= borrowed
->level
;
720 } while ((borrowed
=borrowed
->borrow
) != NULL
);
723 /* It is not necessary now. Uncommenting it
724 will save CPU cycles, but decrease fairness.
726 q
->toplevel
= TC_CBQ_MAXLEVEL
;
732 cbq_update(struct cbq_sched_data
*q
)
734 struct cbq_class
*this = q
->tx_class
;
735 struct cbq_class
*cl
= this;
740 for ( ; cl
; cl
= cl
->share
) {
741 long avgidle
= cl
->avgidle
;
744 cl
->bstats
.packets
++;
745 cl
->bstats
.bytes
+= len
;
748 (now - last) is total time between packet right edges.
749 (last_pktlen/rate) is "virtual" busy time, so that
751 idle = (now - last) - last_pktlen/rate
754 idle
= q
->now
- cl
->last
;
755 if ((unsigned long)idle
> 128*1024*1024) {
756 avgidle
= cl
->maxidle
;
758 idle
-= L2T(cl
, len
);
760 /* true_avgidle := (1-W)*true_avgidle + W*idle,
761 where W=2^{-ewma_log}. But cl->avgidle is scaled:
762 cl->avgidle == true_avgidle/W,
765 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
769 /* Overlimit or at-limit */
771 if (avgidle
< cl
->minidle
)
772 avgidle
= cl
->minidle
;
774 cl
->avgidle
= avgidle
;
776 /* Calculate expected time, when this class
777 will be allowed to send.
779 (1-W)*true_avgidle + W*delay = 0, i.e.
780 idle = (1/W - 1)*(-true_avgidle)
782 idle = (1 - W)*(-cl->avgidle);
784 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
788 To maintain the rate allocated to the class,
789 we add to undertime virtual clock,
790 necessary to complete transmitted packet.
791 (len/phys_bandwidth has been already passed
792 to the moment of cbq_update)
795 idle
-= L2T(&q
->link
, len
);
796 idle
+= L2T(cl
, len
);
798 cl
->undertime
= q
->now
+ idle
;
802 cl
->undertime
= PSCHED_PASTPERFECT
;
803 if (avgidle
> cl
->maxidle
)
804 cl
->avgidle
= cl
->maxidle
;
806 cl
->avgidle
= avgidle
;
811 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
814 static __inline__
struct cbq_class
*
815 cbq_under_limit(struct cbq_class
*cl
)
817 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
818 struct cbq_class
*this_cl
= cl
;
820 if (cl
->tparent
== NULL
)
823 if (cl
->undertime
== PSCHED_PASTPERFECT
|| q
->now
>= cl
->undertime
) {
829 /* It is very suspicious place. Now overlimit
830 action is generated for not bounded classes
831 only if link is completely congested.
832 Though it is in agree with ancestor-only paradigm,
833 it looks very stupid. Particularly,
834 it means that this chunk of code will either
835 never be called or result in strong amplification
836 of burstiness. Dangerous, silly, and, however,
837 no another solution exists.
839 if ((cl
= cl
->borrow
) == NULL
) {
840 this_cl
->qstats
.overlimits
++;
841 this_cl
->overlimit(this_cl
);
844 if (cl
->level
> q
->toplevel
)
846 } while (cl
->undertime
!= PSCHED_PASTPERFECT
&& q
->now
< cl
->undertime
);
852 static __inline__
struct sk_buff
*
853 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
855 struct cbq_sched_data
*q
= qdisc_priv(sch
);
856 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
860 cl_tail
= cl_prev
= q
->active
[prio
];
861 cl
= cl_prev
->next_alive
;
868 struct cbq_class
*borrow
= cl
;
871 (borrow
= cbq_under_limit(cl
)) == NULL
)
874 if (cl
->deficit
<= 0) {
875 /* Class exhausted its allotment per
876 this round. Switch to the next one.
879 cl
->deficit
+= cl
->quantum
;
883 skb
= cl
->q
->dequeue(cl
->q
);
885 /* Class did not give us any skb :-(
886 It could occur even if cl->q->q.qlen != 0
887 f.e. if cl->q == "tbf"
892 cl
->deficit
-= skb
->len
;
894 q
->tx_borrowed
= borrow
;
896 #ifndef CBQ_XSTATS_BORROWS_BYTES
897 borrow
->xstats
.borrows
++;
898 cl
->xstats
.borrows
++;
900 borrow
->xstats
.borrows
+= skb
->len
;
901 cl
->xstats
.borrows
+= skb
->len
;
904 q
->tx_len
= skb
->len
;
906 if (cl
->deficit
<= 0) {
907 q
->active
[prio
] = cl
;
909 cl
->deficit
+= cl
->quantum
;
914 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
915 /* Class is empty or penalized.
916 Unlink it from active chain.
918 cl_prev
->next_alive
= cl
->next_alive
;
919 cl
->next_alive
= NULL
;
921 /* Did cl_tail point to it? */
926 /* Was it the last class in this band? */
929 q
->active
[prio
] = NULL
;
930 q
->activemask
&= ~(1<<prio
);
932 cbq_activate_class(cl
);
936 q
->active
[prio
] = cl_tail
;
939 cbq_activate_class(cl
);
947 } while (cl_prev
!= cl_tail
);
950 q
->active
[prio
] = cl_prev
;
955 static __inline__
struct sk_buff
*
956 cbq_dequeue_1(struct Qdisc
*sch
)
958 struct cbq_sched_data
*q
= qdisc_priv(sch
);
962 activemask
= q
->activemask
&0xFF;
964 int prio
= ffz(~activemask
);
965 activemask
&= ~(1<<prio
);
966 skb
= cbq_dequeue_prio(sch
, prio
);
973 static struct sk_buff
*
974 cbq_dequeue(struct Qdisc
*sch
)
977 struct cbq_sched_data
*q
= qdisc_priv(sch
);
981 now
= psched_get_time();
982 incr
= now
- q
->now_rt
;
985 psched_tdiff_t incr2
;
986 /* Time integrator. We calculate EOS time
987 by adding expected packet transmission time.
988 If real time is greater, we warp artificial clock,
991 cbq_time = max(real_time, work);
993 incr2
= L2T(&q
->link
, q
->tx_len
);
996 if ((incr
-= incr2
) < 0)
1005 skb
= cbq_dequeue_1(sch
);
1008 sch
->flags
&= ~TCQ_F_THROTTLED
;
1012 /* All the classes are overlimit.
1016 1. Scheduler is empty.
1017 2. Toplevel cutoff inhibited borrowing.
1018 3. Root class is overlimit.
1020 Reset 2d and 3d conditions and retry.
1022 Note, that NS and cbq-2.0 are buggy, peeking
1023 an arbitrary class is appropriate for ancestor-only
1024 sharing, but not for toplevel algorithm.
1026 Our version is better, but slower, because it requires
1027 two passes, but it is unavoidable with top-level sharing.
1030 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1031 q
->link
.undertime
== PSCHED_PASTPERFECT
)
1034 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1035 q
->link
.undertime
= PSCHED_PASTPERFECT
;
1038 /* No packets in scheduler or nobody wants to give them to us :-(
1039 Sigh... start watchdog timer in the last case. */
1042 sch
->qstats
.overlimits
++;
1044 qdisc_watchdog_schedule(&q
->watchdog
,
1045 now
+ q
->wd_expires
);
1050 /* CBQ class maintanance routines */
1052 static void cbq_adjust_levels(struct cbq_class
*this)
1059 struct cbq_class
*cl
;
1061 if ((cl
= this->children
) != NULL
) {
1063 if (cl
->level
> level
)
1065 } while ((cl
= cl
->sibling
) != this->children
);
1067 this->level
= level
+1;
1068 } while ((this = this->tparent
) != NULL
);
1071 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1073 struct cbq_class
*cl
;
1076 if (q
->quanta
[prio
] == 0)
1079 for (h
=0; h
<16; h
++) {
1080 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1081 /* BUGGGG... Beware! This expression suffer of
1082 arithmetic overflows!
1084 if (cl
->priority
== prio
) {
1085 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1088 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1089 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1090 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1096 static void cbq_sync_defmap(struct cbq_class
*cl
)
1098 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1099 struct cbq_class
*split
= cl
->split
;
1106 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1107 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1108 split
->defaults
[i
] = NULL
;
1111 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1112 int level
= split
->level
;
1114 if (split
->defaults
[i
])
1117 for (h
=0; h
<16; h
++) {
1118 struct cbq_class
*c
;
1120 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1121 if (c
->split
== split
&& c
->level
< level
&&
1123 split
->defaults
[i
] = c
;
1131 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1133 struct cbq_class
*split
= NULL
;
1136 if ((split
= cl
->split
) == NULL
)
1138 splitid
= split
->classid
;
1141 if (split
== NULL
|| split
->classid
!= splitid
) {
1142 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1143 if (split
->classid
== splitid
)
1150 if (cl
->split
!= split
) {
1152 cbq_sync_defmap(cl
);
1154 cl
->defmap
= def
&mask
;
1156 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1158 cbq_sync_defmap(cl
);
1161 static void cbq_unlink_class(struct cbq_class
*this)
1163 struct cbq_class
*cl
, **clp
;
1164 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1166 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1174 if (this->tparent
) {
1183 } while ((cl
= *clp
) != this->sibling
);
1185 if (this->tparent
->children
== this) {
1186 this->tparent
->children
= this->sibling
;
1187 if (this->sibling
== this)
1188 this->tparent
->children
= NULL
;
1191 BUG_TRAP(this->sibling
== this);
1195 static void cbq_link_class(struct cbq_class
*this)
1197 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1198 unsigned h
= cbq_hash(this->classid
);
1199 struct cbq_class
*parent
= this->tparent
;
1201 this->sibling
= this;
1202 this->next
= q
->classes
[h
];
1203 q
->classes
[h
] = this;
1208 if (parent
->children
== NULL
) {
1209 parent
->children
= this;
1211 this->sibling
= parent
->children
->sibling
;
1212 parent
->children
->sibling
= this;
1216 static unsigned int cbq_drop(struct Qdisc
* sch
)
1218 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1219 struct cbq_class
*cl
, *cl_head
;
1223 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1224 if ((cl_head
= q
->active
[prio
]) == NULL
)
1229 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1232 cbq_deactivate_class(cl
);
1235 } while ((cl
= cl
->next_alive
) != cl_head
);
1241 cbq_reset(struct Qdisc
* sch
)
1243 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1244 struct cbq_class
*cl
;
1251 q
->tx_borrowed
= NULL
;
1252 qdisc_watchdog_cancel(&q
->watchdog
);
1253 hrtimer_cancel(&q
->delay_timer
);
1254 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1255 q
->now
= psched_get_time();
1258 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1259 q
->active
[prio
] = NULL
;
1261 for (h
= 0; h
< 16; h
++) {
1262 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1265 cl
->next_alive
= NULL
;
1266 cl
->undertime
= PSCHED_PASTPERFECT
;
1267 cl
->avgidle
= cl
->maxidle
;
1268 cl
->deficit
= cl
->quantum
;
1269 cl
->cpriority
= cl
->priority
;
1276 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1278 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1279 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1280 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1282 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1283 cl
->ewma_log
= lss
->ewma_log
;
1284 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1285 cl
->avpkt
= lss
->avpkt
;
1286 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1287 cl
->minidle
= -(long)lss
->minidle
;
1288 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1289 cl
->maxidle
= lss
->maxidle
;
1290 cl
->avgidle
= lss
->maxidle
;
1292 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1293 cl
->offtime
= lss
->offtime
;
1297 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1299 q
->nclasses
[cl
->priority
]--;
1300 q
->quanta
[cl
->priority
] -= cl
->weight
;
1301 cbq_normalize_quanta(q
, cl
->priority
);
1304 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1306 q
->nclasses
[cl
->priority
]++;
1307 q
->quanta
[cl
->priority
] += cl
->weight
;
1308 cbq_normalize_quanta(q
, cl
->priority
);
1311 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1313 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1316 cl
->allot
= wrr
->allot
;
1318 cl
->weight
= wrr
->weight
;
1319 if (wrr
->priority
) {
1320 cl
->priority
= wrr
->priority
-1;
1321 cl
->cpriority
= cl
->priority
;
1322 if (cl
->priority
>= cl
->priority2
)
1323 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1330 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1332 switch (ovl
->strategy
) {
1333 case TC_CBQ_OVL_CLASSIC
:
1334 cl
->overlimit
= cbq_ovl_classic
;
1336 case TC_CBQ_OVL_DELAY
:
1337 cl
->overlimit
= cbq_ovl_delay
;
1339 case TC_CBQ_OVL_LOWPRIO
:
1340 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1341 ovl
->priority2
-1 <= cl
->priority
)
1343 cl
->priority2
= ovl
->priority2
-1;
1344 cl
->overlimit
= cbq_ovl_lowprio
;
1346 case TC_CBQ_OVL_DROP
:
1347 cl
->overlimit
= cbq_ovl_drop
;
1349 case TC_CBQ_OVL_RCLASSIC
:
1350 cl
->overlimit
= cbq_ovl_rclassic
;
1355 cl
->penalty
= ovl
->penalty
;
1359 #ifdef CONFIG_NET_CLS_ACT
1360 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1362 cl
->police
= p
->police
;
1364 if (cl
->q
->handle
) {
1365 if (p
->police
== TC_POLICE_RECLASSIFY
)
1366 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1368 cl
->q
->reshape_fail
= NULL
;
1374 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1376 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1380 static const struct nla_policy cbq_policy
[TCA_CBQ_MAX
+ 1] = {
1381 [TCA_CBQ_LSSOPT
] = { .len
= sizeof(struct tc_cbq_lssopt
) },
1382 [TCA_CBQ_WRROPT
] = { .len
= sizeof(struct tc_cbq_wrropt
) },
1383 [TCA_CBQ_FOPT
] = { .len
= sizeof(struct tc_cbq_fopt
) },
1384 [TCA_CBQ_OVL_STRATEGY
] = { .len
= sizeof(struct tc_cbq_ovl
) },
1385 [TCA_CBQ_RATE
] = { .len
= sizeof(struct tc_ratespec
) },
1386 [TCA_CBQ_RTAB
] = { .type
= NLA_BINARY
, .len
= TC_RTAB_SIZE
},
1387 [TCA_CBQ_POLICE
] = { .len
= sizeof(struct tc_cbq_police
) },
1390 static int cbq_init(struct Qdisc
*sch
, struct nlattr
*opt
)
1392 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1393 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1394 struct tc_ratespec
*r
;
1397 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1401 if (tb
[TCA_CBQ_RTAB
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
)
1404 r
= nla_data(tb
[TCA_CBQ_RATE
]);
1406 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
])) == NULL
)
1410 q
->link
.sibling
= &q
->link
;
1411 q
->link
.classid
= sch
->handle
;
1412 q
->link
.qdisc
= sch
;
1413 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1415 q
->link
.q
= &noop_qdisc
;
1417 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1418 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1419 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1420 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1421 q
->link
.overlimit
= cbq_ovl_classic
;
1422 q
->link
.allot
= psched_mtu(sch
->dev
);
1423 q
->link
.quantum
= q
->link
.allot
;
1424 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1426 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1427 q
->link
.avpkt
= q
->link
.allot
/2;
1428 q
->link
.minidle
= -0x7FFFFFFF;
1430 qdisc_watchdog_init(&q
->watchdog
, sch
);
1431 hrtimer_init(&q
->delay_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
1432 q
->delay_timer
.function
= cbq_undelay
;
1433 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1434 q
->now
= psched_get_time();
1437 cbq_link_class(&q
->link
);
1439 if (tb
[TCA_CBQ_LSSOPT
])
1440 cbq_set_lss(&q
->link
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1442 cbq_addprio(q
, &q
->link
);
1446 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1448 unsigned char *b
= skb_tail_pointer(skb
);
1450 NLA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1458 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1460 unsigned char *b
= skb_tail_pointer(skb
);
1461 struct tc_cbq_lssopt opt
;
1464 if (cl
->borrow
== NULL
)
1465 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1466 if (cl
->share
== NULL
)
1467 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1468 opt
.ewma_log
= cl
->ewma_log
;
1469 opt
.level
= cl
->level
;
1470 opt
.avpkt
= cl
->avpkt
;
1471 opt
.maxidle
= cl
->maxidle
;
1472 opt
.minidle
= (u32
)(-cl
->minidle
);
1473 opt
.offtime
= cl
->offtime
;
1475 NLA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1483 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1485 unsigned char *b
= skb_tail_pointer(skb
);
1486 struct tc_cbq_wrropt opt
;
1489 opt
.allot
= cl
->allot
;
1490 opt
.priority
= cl
->priority
+1;
1491 opt
.cpriority
= cl
->cpriority
+1;
1492 opt
.weight
= cl
->weight
;
1493 NLA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1501 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1503 unsigned char *b
= skb_tail_pointer(skb
);
1504 struct tc_cbq_ovl opt
;
1506 opt
.strategy
= cl
->ovl_strategy
;
1507 opt
.priority2
= cl
->priority2
+1;
1509 opt
.penalty
= cl
->penalty
;
1510 NLA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1518 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1520 unsigned char *b
= skb_tail_pointer(skb
);
1521 struct tc_cbq_fopt opt
;
1523 if (cl
->split
|| cl
->defmap
) {
1524 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1525 opt
.defmap
= cl
->defmap
;
1527 NLA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1536 #ifdef CONFIG_NET_CLS_ACT
1537 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1539 unsigned char *b
= skb_tail_pointer(skb
);
1540 struct tc_cbq_police opt
;
1543 opt
.police
= cl
->police
;
1546 NLA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1556 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1558 if (cbq_dump_lss(skb
, cl
) < 0 ||
1559 cbq_dump_rate(skb
, cl
) < 0 ||
1560 cbq_dump_wrr(skb
, cl
) < 0 ||
1561 cbq_dump_ovl(skb
, cl
) < 0 ||
1562 #ifdef CONFIG_NET_CLS_ACT
1563 cbq_dump_police(skb
, cl
) < 0 ||
1565 cbq_dump_fopt(skb
, cl
) < 0)
1570 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1572 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1573 struct nlattr
*nest
;
1575 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1577 goto nla_put_failure
;
1578 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1579 goto nla_put_failure
;
1580 nla_nest_end(skb
, nest
);
1584 nla_nest_cancel(skb
, nest
);
1589 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1591 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1593 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1594 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1598 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1599 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1601 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1602 struct nlattr
*nest
;
1605 tcm
->tcm_parent
= cl
->tparent
->classid
;
1607 tcm
->tcm_parent
= TC_H_ROOT
;
1608 tcm
->tcm_handle
= cl
->classid
;
1609 tcm
->tcm_info
= cl
->q
->handle
;
1611 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1613 goto nla_put_failure
;
1614 if (cbq_dump_attr(skb
, cl
) < 0)
1615 goto nla_put_failure
;
1616 nla_nest_end(skb
, nest
);
1620 nla_nest_cancel(skb
, nest
);
1625 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1626 struct gnet_dump
*d
)
1628 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1629 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1631 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1632 cl
->xstats
.avgidle
= cl
->avgidle
;
1633 cl
->xstats
.undertime
= 0;
1635 if (cl
->undertime
!= PSCHED_PASTPERFECT
)
1636 cl
->xstats
.undertime
= cl
->undertime
- q
->now
;
1638 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1639 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1640 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1643 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1646 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1649 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1653 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1654 cl
->classid
)) == NULL
)
1657 #ifdef CONFIG_NET_CLS_ACT
1658 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1659 new->reshape_fail
= cbq_reshape_fail
;
1663 *old
= xchg(&cl
->q
, new);
1664 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1666 sch_tree_unlock(sch
);
1673 static struct Qdisc
*
1674 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1676 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1678 return cl
? cl
->q
: NULL
;
1681 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1683 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1685 if (cl
->q
->q
.qlen
== 0)
1686 cbq_deactivate_class(cl
);
1689 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1691 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1692 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1696 return (unsigned long)cl
;
1701 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1703 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1705 BUG_TRAP(!cl
->filters
);
1707 tcf_destroy_chain(cl
->filter_list
);
1708 qdisc_destroy(cl
->q
);
1709 qdisc_put_rtab(cl
->R_tab
);
1710 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1716 cbq_destroy(struct Qdisc
* sch
)
1718 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1719 struct cbq_class
*cl
;
1722 #ifdef CONFIG_NET_CLS_ACT
1726 * Filters must be destroyed first because we don't destroy the
1727 * classes from root to leafs which means that filters can still
1728 * be bound to classes which have been destroyed already. --TGR '04
1730 for (h
= 0; h
< 16; h
++) {
1731 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1732 tcf_destroy_chain(cl
->filter_list
);
1733 cl
->filter_list
= NULL
;
1736 for (h
= 0; h
< 16; h
++) {
1737 struct cbq_class
*next
;
1739 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1741 cbq_destroy_class(sch
, cl
);
1746 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1748 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1750 if (--cl
->refcnt
== 0) {
1751 #ifdef CONFIG_NET_CLS_ACT
1752 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1754 spin_lock_bh(&sch
->dev
->queue_lock
);
1755 if (q
->rx_class
== cl
)
1757 spin_unlock_bh(&sch
->dev
->queue_lock
);
1760 cbq_destroy_class(sch
, cl
);
1765 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct nlattr
**tca
,
1769 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1770 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1771 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
1772 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1773 struct cbq_class
*parent
;
1774 struct qdisc_rate_table
*rtab
= NULL
;
1779 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1786 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1788 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1792 if (tb
[TCA_CBQ_RATE
]) {
1793 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1798 /* Change class parameters */
1801 if (cl
->next_alive
!= NULL
)
1802 cbq_deactivate_class(cl
);
1805 rtab
= xchg(&cl
->R_tab
, rtab
);
1806 qdisc_put_rtab(rtab
);
1809 if (tb
[TCA_CBQ_LSSOPT
])
1810 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1812 if (tb
[TCA_CBQ_WRROPT
]) {
1814 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1817 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1818 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1820 #ifdef CONFIG_NET_CLS_ACT
1821 if (tb
[TCA_CBQ_POLICE
])
1822 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1825 if (tb
[TCA_CBQ_FOPT
])
1826 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1829 cbq_activate_class(cl
);
1831 sch_tree_unlock(sch
);
1834 gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1835 &sch
->dev
->queue_lock
,
1840 if (parentid
== TC_H_ROOT
)
1843 if (tb
[TCA_CBQ_WRROPT
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
||
1844 tb
[TCA_CBQ_LSSOPT
] == NULL
)
1847 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1853 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1857 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1859 for (i
=0; i
<0x8000; i
++) {
1860 if (++q
->hgenerator
>= 0x8000)
1862 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1868 classid
= classid
|q
->hgenerator
;
1873 parent
= cbq_class_lookup(q
, parentid
);
1880 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1886 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
, classid
)))
1887 cl
->q
= &noop_qdisc
;
1888 cl
->classid
= classid
;
1889 cl
->tparent
= parent
;
1891 cl
->allot
= parent
->allot
;
1892 cl
->quantum
= cl
->allot
;
1893 cl
->weight
= cl
->R_tab
->rate
.rate
;
1897 cl
->borrow
= cl
->tparent
;
1898 if (cl
->tparent
!= &q
->link
)
1899 cl
->share
= cl
->tparent
;
1900 cbq_adjust_levels(parent
);
1901 cl
->minidle
= -0x7FFFFFFF;
1902 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1903 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1904 if (cl
->ewma_log
==0)
1905 cl
->ewma_log
= q
->link
.ewma_log
;
1907 cl
->maxidle
= q
->link
.maxidle
;
1909 cl
->avpkt
= q
->link
.avpkt
;
1910 cl
->overlimit
= cbq_ovl_classic
;
1911 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1912 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1913 #ifdef CONFIG_NET_CLS_ACT
1914 if (tb
[TCA_CBQ_POLICE
])
1915 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1917 if (tb
[TCA_CBQ_FOPT
])
1918 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1919 sch_tree_unlock(sch
);
1922 gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1923 &sch
->dev
->queue_lock
, tca
[TCA_RATE
]);
1925 *arg
= (unsigned long)cl
;
1929 qdisc_put_rtab(rtab
);
1933 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1935 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1936 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1939 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1944 qlen
= cl
->q
->q
.qlen
;
1946 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
1949 cbq_deactivate_class(cl
);
1951 if (q
->tx_borrowed
== cl
)
1952 q
->tx_borrowed
= q
->tx_class
;
1953 if (q
->tx_class
== cl
) {
1955 q
->tx_borrowed
= NULL
;
1957 #ifdef CONFIG_NET_CLS_ACT
1958 if (q
->rx_class
== cl
)
1962 cbq_unlink_class(cl
);
1963 cbq_adjust_levels(cl
->tparent
);
1965 cbq_sync_defmap(cl
);
1968 sch_tree_unlock(sch
);
1970 if (--cl
->refcnt
== 0)
1971 cbq_destroy_class(sch
, cl
);
1976 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
1978 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1979 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1984 return &cl
->filter_list
;
1987 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
1990 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1991 struct cbq_class
*p
= (struct cbq_class
*)parent
;
1992 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1995 if (p
&& p
->level
<= cl
->level
)
1998 return (unsigned long)cl
;
2003 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2005 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2010 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2012 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2018 for (h
= 0; h
< 16; h
++) {
2019 struct cbq_class
*cl
;
2021 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2022 if (arg
->count
< arg
->skip
) {
2026 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2035 static const struct Qdisc_class_ops cbq_class_ops
= {
2038 .qlen_notify
= cbq_qlen_notify
,
2041 .change
= cbq_change_class
,
2042 .delete = cbq_delete
,
2044 .tcf_chain
= cbq_find_tcf
,
2045 .bind_tcf
= cbq_bind_filter
,
2046 .unbind_tcf
= cbq_unbind_filter
,
2047 .dump
= cbq_dump_class
,
2048 .dump_stats
= cbq_dump_class_stats
,
2051 static struct Qdisc_ops cbq_qdisc_ops __read_mostly
= {
2053 .cl_ops
= &cbq_class_ops
,
2055 .priv_size
= sizeof(struct cbq_sched_data
),
2056 .enqueue
= cbq_enqueue
,
2057 .dequeue
= cbq_dequeue
,
2058 .requeue
= cbq_requeue
,
2062 .destroy
= cbq_destroy
,
2065 .dump_stats
= cbq_dump_stats
,
2066 .owner
= THIS_MODULE
,
2069 static int __init
cbq_module_init(void)
2071 return register_qdisc(&cbq_qdisc_ops
);
2073 static void __exit
cbq_module_exit(void)
2075 unregister_qdisc(&cbq_qdisc_ops
);
2077 module_init(cbq_module_init
)
2078 module_exit(cbq_module_exit
)
2079 MODULE_LICENSE("GPL");