2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <net/netlink.h>
21 #include <net/pkt_sched.h>
25 struct cbq_sched_data
;
30 struct Qdisc_class_common common
;
31 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
34 unsigned char priority
; /* class priority */
35 unsigned char priority2
; /* priority to be used after overlimit */
36 unsigned char ewma_log
; /* time constant for idle time calculation */
37 unsigned char ovl_strategy
;
38 #ifdef CONFIG_NET_CLS_ACT
44 /* Link-sharing scheduler parameters */
45 long maxidle
; /* Class parameters: see below. */
49 struct qdisc_rate_table
*R_tab
;
51 /* Overlimit strategy parameters */
52 void (*overlimit
)(struct cbq_class
*cl
);
53 psched_tdiff_t penalty
;
55 /* General scheduler (WRR) parameters */
57 long quantum
; /* Allotment per WRR round */
58 long weight
; /* Relative allotment: see below */
60 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
61 struct cbq_class
*split
; /* Ptr to split node */
62 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
63 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
64 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
66 struct cbq_class
*sibling
; /* Sibling chain */
67 struct cbq_class
*children
; /* Pointer to children chain */
69 struct Qdisc
*q
; /* Elementary queueing discipline */
73 unsigned char cpriority
; /* Effective priority */
74 unsigned char delayed
;
75 unsigned char level
; /* level of the class in hierarchy:
76 0 for leaf classes, and maximal
77 level of children + 1 for nodes.
80 psched_time_t last
; /* Last end of service */
81 psched_time_t undertime
;
83 long deficit
; /* Saved deficit for WRR */
84 psched_time_t penalized
;
85 struct gnet_stats_basic_packed bstats
;
86 struct gnet_stats_queue qstats
;
87 struct gnet_stats_rate_est rate_est
;
88 struct tc_cbq_xstats xstats
;
90 struct tcf_proto
*filter_list
;
95 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
100 struct Qdisc_class_hash clhash
; /* Hash table of all classes */
101 int nclasses
[TC_CBQ_MAXPRIO
+1];
102 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
104 struct cbq_class link
;
107 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
110 #ifdef CONFIG_NET_CLS_ACT
111 struct cbq_class
*rx_class
;
113 struct cbq_class
*tx_class
;
114 struct cbq_class
*tx_borrowed
;
116 psched_time_t now
; /* Cached timestamp */
117 psched_time_t now_rt
; /* Cached real time */
120 struct hrtimer delay_timer
;
121 struct qdisc_watchdog watchdog
; /* Watchdog timer,
125 psched_tdiff_t wd_expires
;
131 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
133 static __inline__
struct cbq_class
*
134 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
136 struct Qdisc_class_common
*clc
;
138 clc
= qdisc_class_find(&q
->clhash
, classid
);
141 return container_of(clc
, struct cbq_class
, common
);
144 #ifdef CONFIG_NET_CLS_ACT
146 static struct cbq_class
*
147 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
149 struct cbq_class
*cl
, *new;
151 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
152 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
160 /* Classify packet. The procedure is pretty complicated, but
161 it allows us to combine link sharing and priority scheduling
164 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
165 so that it resolves to split nodes. Then packets are classified
166 by logical priority, or a more specific classifier may be attached
170 static struct cbq_class
*
171 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
173 struct cbq_sched_data
*q
= qdisc_priv(sch
);
174 struct cbq_class
*head
= &q
->link
;
175 struct cbq_class
**defmap
;
176 struct cbq_class
*cl
= NULL
;
177 u32 prio
= skb
->priority
;
178 struct tcf_result res
;
181 * Step 1. If skb->priority points to one of our classes, use it.
183 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
184 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
187 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_BYPASS
;
190 defmap
= head
->defaults
;
193 * Step 2+n. Apply classifier.
195 if (!head
->filter_list
||
196 (result
= tc_classify_compat(skb
, head
->filter_list
, &res
)) < 0)
199 if ((cl
= (void*)res
.class) == NULL
) {
200 if (TC_H_MAJ(res
.classid
))
201 cl
= cbq_class_lookup(q
, res
.classid
);
202 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
203 cl
= defmap
[TC_PRIO_BESTEFFORT
];
205 if (cl
== NULL
|| cl
->level
>= head
->level
)
209 #ifdef CONFIG_NET_CLS_ACT
213 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_STOLEN
;
216 case TC_ACT_RECLASSIFY
:
217 return cbq_reclassify(skb
, cl
);
224 * Step 3+n. If classifier selected a link sharing class,
225 * apply agency specific classifier.
226 * Repeat this procdure until we hit a leaf node.
235 * Step 4. No success...
237 if (TC_H_MAJ(prio
) == 0 &&
238 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
239 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
246 A packet has just been enqueued on the empty class.
247 cbq_activate_class adds it to the tail of active class list
248 of its priority band.
251 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
253 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
254 int prio
= cl
->cpriority
;
255 struct cbq_class
*cl_tail
;
257 cl_tail
= q
->active
[prio
];
258 q
->active
[prio
] = cl
;
260 if (cl_tail
!= NULL
) {
261 cl
->next_alive
= cl_tail
->next_alive
;
262 cl_tail
->next_alive
= cl
;
265 q
->activemask
|= (1<<prio
);
270 Unlink class from active chain.
271 Note that this same procedure is done directly in cbq_dequeue*
272 during round-robin procedure.
275 static void cbq_deactivate_class(struct cbq_class
*this)
277 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
278 int prio
= this->cpriority
;
279 struct cbq_class
*cl
;
280 struct cbq_class
*cl_prev
= q
->active
[prio
];
283 cl
= cl_prev
->next_alive
;
285 cl_prev
->next_alive
= cl
->next_alive
;
286 cl
->next_alive
= NULL
;
288 if (cl
== q
->active
[prio
]) {
289 q
->active
[prio
] = cl_prev
;
290 if (cl
== q
->active
[prio
]) {
291 q
->active
[prio
] = NULL
;
292 q
->activemask
&= ~(1<<prio
);
298 } while ((cl_prev
= cl
) != q
->active
[prio
]);
302 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
304 int toplevel
= q
->toplevel
;
306 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
310 now
= psched_get_time();
311 incr
= now
- q
->now_rt
;
315 if (cl
->undertime
< now
) {
316 q
->toplevel
= cl
->level
;
319 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
324 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
326 struct cbq_sched_data
*q
= qdisc_priv(sch
);
327 int uninitialized_var(ret
);
328 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
330 #ifdef CONFIG_NET_CLS_ACT
334 if (ret
& __NET_XMIT_BYPASS
)
340 #ifdef CONFIG_NET_CLS_ACT
341 cl
->q
->__parent
= sch
;
343 ret
= qdisc_enqueue(skb
, cl
->q
);
344 if (ret
== NET_XMIT_SUCCESS
) {
346 sch
->bstats
.packets
++;
347 sch
->bstats
.bytes
+= qdisc_pkt_len(skb
);
348 cbq_mark_toplevel(q
, cl
);
350 cbq_activate_class(cl
);
354 if (net_xmit_drop_count(ret
)) {
356 cbq_mark_toplevel(q
, cl
);
362 /* Overlimit actions */
364 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
366 static void cbq_ovl_classic(struct cbq_class
*cl
)
368 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
369 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
372 delay
+= cl
->offtime
;
375 Class goes to sleep, so that it will have no
376 chance to work avgidle. Let's forgive it 8)
378 BTW cbq-2.0 has a crap in this
379 place, apparently they forgot to shift it by cl->ewma_log.
382 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
383 if (cl
->avgidle
< cl
->minidle
)
384 cl
->avgidle
= cl
->minidle
;
387 cl
->undertime
= q
->now
+ delay
;
389 cl
->xstats
.overactions
++;
392 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
393 q
->wd_expires
= delay
;
395 /* Dirty work! We must schedule wakeups based on
396 real available rate, rather than leaf rate,
397 which may be tiny (even zero).
399 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
401 psched_tdiff_t base_delay
= q
->wd_expires
;
403 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
404 delay
= b
->undertime
- q
->now
;
405 if (delay
< base_delay
) {
412 q
->wd_expires
= base_delay
;
416 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
420 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
422 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
423 struct cbq_class
*this = cl
;
426 if (cl
->level
> q
->toplevel
) {
430 } while ((cl
= cl
->borrow
) != NULL
);
437 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
439 static void cbq_ovl_delay(struct cbq_class
*cl
)
441 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
442 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
444 if (test_bit(__QDISC_STATE_DEACTIVATED
,
445 &qdisc_root_sleeping(cl
->qdisc
)->state
))
449 psched_time_t sched
= q
->now
;
452 delay
+= cl
->offtime
;
454 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
455 if (cl
->avgidle
< cl
->minidle
)
456 cl
->avgidle
= cl
->minidle
;
457 cl
->undertime
= q
->now
+ delay
;
460 sched
+= delay
+ cl
->penalty
;
461 cl
->penalized
= sched
;
462 cl
->cpriority
= TC_CBQ_MAXPRIO
;
463 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
465 expires
= ktime_set(0, 0);
466 expires
= ktime_add_ns(expires
, PSCHED_TICKS2NS(sched
));
467 if (hrtimer_try_to_cancel(&q
->delay_timer
) &&
468 ktime_to_ns(ktime_sub(
469 hrtimer_get_expires(&q
->delay_timer
),
471 hrtimer_set_expires(&q
->delay_timer
, expires
);
472 hrtimer_restart(&q
->delay_timer
);
474 cl
->xstats
.overactions
++;
479 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
480 q
->wd_expires
= delay
;
483 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
485 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
487 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
489 cl
->penalized
= q
->now
+ cl
->penalty
;
491 if (cl
->cpriority
!= cl
->priority2
) {
492 cl
->cpriority
= cl
->priority2
;
493 q
->pmask
|= (1<<cl
->cpriority
);
494 cl
->xstats
.overactions
++;
499 /* TC_CBQ_OVL_DROP: penalize class by dropping */
501 static void cbq_ovl_drop(struct cbq_class
*cl
)
503 if (cl
->q
->ops
->drop
)
504 if (cl
->q
->ops
->drop(cl
->q
))
506 cl
->xstats
.overactions
++;
510 static psched_tdiff_t
cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
,
513 struct cbq_class
*cl
;
514 struct cbq_class
*cl_prev
= q
->active
[prio
];
515 psched_time_t sched
= now
;
521 cl
= cl_prev
->next_alive
;
522 if (now
- cl
->penalized
> 0) {
523 cl_prev
->next_alive
= cl
->next_alive
;
524 cl
->next_alive
= NULL
;
525 cl
->cpriority
= cl
->priority
;
527 cbq_activate_class(cl
);
529 if (cl
== q
->active
[prio
]) {
530 q
->active
[prio
] = cl_prev
;
531 if (cl
== q
->active
[prio
]) {
532 q
->active
[prio
] = NULL
;
537 cl
= cl_prev
->next_alive
;
538 } else if (sched
- cl
->penalized
> 0)
539 sched
= cl
->penalized
;
540 } while ((cl_prev
= cl
) != q
->active
[prio
]);
545 static enum hrtimer_restart
cbq_undelay(struct hrtimer
*timer
)
547 struct cbq_sched_data
*q
= container_of(timer
, struct cbq_sched_data
,
549 struct Qdisc
*sch
= q
->watchdog
.qdisc
;
551 psched_tdiff_t delay
= 0;
554 now
= psched_get_time();
560 int prio
= ffz(~pmask
);
565 tmp
= cbq_undelay_prio(q
, prio
, now
);
568 if (tmp
< delay
|| delay
== 0)
576 time
= ktime_set(0, 0);
577 time
= ktime_add_ns(time
, PSCHED_TICKS2NS(now
+ delay
));
578 hrtimer_start(&q
->delay_timer
, time
, HRTIMER_MODE_ABS
);
581 sch
->flags
&= ~TCQ_F_THROTTLED
;
582 __netif_schedule(qdisc_root(sch
));
583 return HRTIMER_NORESTART
;
586 #ifdef CONFIG_NET_CLS_ACT
587 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
589 struct Qdisc
*sch
= child
->__parent
;
590 struct cbq_sched_data
*q
= qdisc_priv(sch
);
591 struct cbq_class
*cl
= q
->rx_class
;
595 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
598 cbq_mark_toplevel(q
, cl
);
601 cl
->q
->__parent
= sch
;
603 ret
= qdisc_enqueue(skb
, cl
->q
);
604 if (ret
== NET_XMIT_SUCCESS
) {
606 sch
->bstats
.packets
++;
607 sch
->bstats
.bytes
+= qdisc_pkt_len(skb
);
609 cbq_activate_class(cl
);
612 if (net_xmit_drop_count(ret
))
623 It is mission critical procedure.
625 We "regenerate" toplevel cutoff, if transmitting class
626 has backlog and it is not regulated. It is not part of
627 original CBQ description, but looks more reasonable.
628 Probably, it is wrong. This question needs further investigation.
631 static __inline__
void
632 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
633 struct cbq_class
*borrowed
)
635 if (cl
&& q
->toplevel
>= borrowed
->level
) {
636 if (cl
->q
->q
.qlen
> 1) {
638 if (borrowed
->undertime
== PSCHED_PASTPERFECT
) {
639 q
->toplevel
= borrowed
->level
;
642 } while ((borrowed
=borrowed
->borrow
) != NULL
);
648 cbq_update(struct cbq_sched_data
*q
)
650 struct cbq_class
*this = q
->tx_class
;
651 struct cbq_class
*cl
= this;
656 for ( ; cl
; cl
= cl
->share
) {
657 long avgidle
= cl
->avgidle
;
660 cl
->bstats
.packets
++;
661 cl
->bstats
.bytes
+= len
;
664 (now - last) is total time between packet right edges.
665 (last_pktlen/rate) is "virtual" busy time, so that
667 idle = (now - last) - last_pktlen/rate
670 idle
= q
->now
- cl
->last
;
671 if ((unsigned long)idle
> 128*1024*1024) {
672 avgidle
= cl
->maxidle
;
674 idle
-= L2T(cl
, len
);
676 /* true_avgidle := (1-W)*true_avgidle + W*idle,
677 where W=2^{-ewma_log}. But cl->avgidle is scaled:
678 cl->avgidle == true_avgidle/W,
681 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
685 /* Overlimit or at-limit */
687 if (avgidle
< cl
->minidle
)
688 avgidle
= cl
->minidle
;
690 cl
->avgidle
= avgidle
;
692 /* Calculate expected time, when this class
693 will be allowed to send.
695 (1-W)*true_avgidle + W*delay = 0, i.e.
696 idle = (1/W - 1)*(-true_avgidle)
698 idle = (1 - W)*(-cl->avgidle);
700 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
704 To maintain the rate allocated to the class,
705 we add to undertime virtual clock,
706 necessary to complete transmitted packet.
707 (len/phys_bandwidth has been already passed
708 to the moment of cbq_update)
711 idle
-= L2T(&q
->link
, len
);
712 idle
+= L2T(cl
, len
);
714 cl
->undertime
= q
->now
+ idle
;
718 cl
->undertime
= PSCHED_PASTPERFECT
;
719 if (avgidle
> cl
->maxidle
)
720 cl
->avgidle
= cl
->maxidle
;
722 cl
->avgidle
= avgidle
;
727 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
730 static __inline__
struct cbq_class
*
731 cbq_under_limit(struct cbq_class
*cl
)
733 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
734 struct cbq_class
*this_cl
= cl
;
736 if (cl
->tparent
== NULL
)
739 if (cl
->undertime
== PSCHED_PASTPERFECT
|| q
->now
>= cl
->undertime
) {
745 /* It is very suspicious place. Now overlimit
746 action is generated for not bounded classes
747 only if link is completely congested.
748 Though it is in agree with ancestor-only paradigm,
749 it looks very stupid. Particularly,
750 it means that this chunk of code will either
751 never be called or result in strong amplification
752 of burstiness. Dangerous, silly, and, however,
753 no another solution exists.
755 if ((cl
= cl
->borrow
) == NULL
) {
756 this_cl
->qstats
.overlimits
++;
757 this_cl
->overlimit(this_cl
);
760 if (cl
->level
> q
->toplevel
)
762 } while (cl
->undertime
!= PSCHED_PASTPERFECT
&& q
->now
< cl
->undertime
);
768 static __inline__
struct sk_buff
*
769 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
771 struct cbq_sched_data
*q
= qdisc_priv(sch
);
772 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
776 cl_tail
= cl_prev
= q
->active
[prio
];
777 cl
= cl_prev
->next_alive
;
784 struct cbq_class
*borrow
= cl
;
787 (borrow
= cbq_under_limit(cl
)) == NULL
)
790 if (cl
->deficit
<= 0) {
791 /* Class exhausted its allotment per
792 this round. Switch to the next one.
795 cl
->deficit
+= cl
->quantum
;
799 skb
= cl
->q
->dequeue(cl
->q
);
801 /* Class did not give us any skb :-(
802 It could occur even if cl->q->q.qlen != 0
803 f.e. if cl->q == "tbf"
808 cl
->deficit
-= qdisc_pkt_len(skb
);
810 q
->tx_borrowed
= borrow
;
812 #ifndef CBQ_XSTATS_BORROWS_BYTES
813 borrow
->xstats
.borrows
++;
814 cl
->xstats
.borrows
++;
816 borrow
->xstats
.borrows
+= qdisc_pkt_len(skb
);
817 cl
->xstats
.borrows
+= qdisc_pkt_len(skb
);
820 q
->tx_len
= qdisc_pkt_len(skb
);
822 if (cl
->deficit
<= 0) {
823 q
->active
[prio
] = cl
;
825 cl
->deficit
+= cl
->quantum
;
830 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
831 /* Class is empty or penalized.
832 Unlink it from active chain.
834 cl_prev
->next_alive
= cl
->next_alive
;
835 cl
->next_alive
= NULL
;
837 /* Did cl_tail point to it? */
842 /* Was it the last class in this band? */
845 q
->active
[prio
] = NULL
;
846 q
->activemask
&= ~(1<<prio
);
848 cbq_activate_class(cl
);
852 q
->active
[prio
] = cl_tail
;
855 cbq_activate_class(cl
);
863 } while (cl_prev
!= cl_tail
);
866 q
->active
[prio
] = cl_prev
;
871 static __inline__
struct sk_buff
*
872 cbq_dequeue_1(struct Qdisc
*sch
)
874 struct cbq_sched_data
*q
= qdisc_priv(sch
);
878 activemask
= q
->activemask
&0xFF;
880 int prio
= ffz(~activemask
);
881 activemask
&= ~(1<<prio
);
882 skb
= cbq_dequeue_prio(sch
, prio
);
889 static struct sk_buff
*
890 cbq_dequeue(struct Qdisc
*sch
)
893 struct cbq_sched_data
*q
= qdisc_priv(sch
);
897 now
= psched_get_time();
898 incr
= now
- q
->now_rt
;
901 psched_tdiff_t incr2
;
902 /* Time integrator. We calculate EOS time
903 by adding expected packet transmission time.
904 If real time is greater, we warp artificial clock,
907 cbq_time = max(real_time, work);
909 incr2
= L2T(&q
->link
, q
->tx_len
);
912 if ((incr
-= incr2
) < 0)
921 skb
= cbq_dequeue_1(sch
);
924 sch
->flags
&= ~TCQ_F_THROTTLED
;
928 /* All the classes are overlimit.
932 1. Scheduler is empty.
933 2. Toplevel cutoff inhibited borrowing.
934 3. Root class is overlimit.
936 Reset 2d and 3d conditions and retry.
938 Note, that NS and cbq-2.0 are buggy, peeking
939 an arbitrary class is appropriate for ancestor-only
940 sharing, but not for toplevel algorithm.
942 Our version is better, but slower, because it requires
943 two passes, but it is unavoidable with top-level sharing.
946 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
947 q
->link
.undertime
== PSCHED_PASTPERFECT
)
950 q
->toplevel
= TC_CBQ_MAXLEVEL
;
951 q
->link
.undertime
= PSCHED_PASTPERFECT
;
954 /* No packets in scheduler or nobody wants to give them to us :-(
955 Sigh... start watchdog timer in the last case. */
958 sch
->qstats
.overlimits
++;
960 qdisc_watchdog_schedule(&q
->watchdog
,
961 now
+ q
->wd_expires
);
966 /* CBQ class maintanance routines */
968 static void cbq_adjust_levels(struct cbq_class
*this)
975 struct cbq_class
*cl
;
977 if ((cl
= this->children
) != NULL
) {
979 if (cl
->level
> level
)
981 } while ((cl
= cl
->sibling
) != this->children
);
983 this->level
= level
+1;
984 } while ((this = this->tparent
) != NULL
);
987 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
989 struct cbq_class
*cl
;
990 struct hlist_node
*n
;
993 if (q
->quanta
[prio
] == 0)
996 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
997 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
998 /* BUGGGG... Beware! This expression suffer of
999 arithmetic overflows!
1001 if (cl
->priority
== prio
) {
1002 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1005 if (cl
->quantum
<= 0 || cl
->quantum
>32*qdisc_dev(cl
->qdisc
)->mtu
) {
1006 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->common
.classid
, cl
->quantum
);
1007 cl
->quantum
= qdisc_dev(cl
->qdisc
)->mtu
/2 + 1;
1013 static void cbq_sync_defmap(struct cbq_class
*cl
)
1015 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1016 struct cbq_class
*split
= cl
->split
;
1023 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1024 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1025 split
->defaults
[i
] = NULL
;
1028 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1029 int level
= split
->level
;
1031 if (split
->defaults
[i
])
1034 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1035 struct hlist_node
*n
;
1036 struct cbq_class
*c
;
1038 hlist_for_each_entry(c
, n
, &q
->clhash
.hash
[h
],
1040 if (c
->split
== split
&& c
->level
< level
&&
1042 split
->defaults
[i
] = c
;
1050 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1052 struct cbq_class
*split
= NULL
;
1055 if ((split
= cl
->split
) == NULL
)
1057 splitid
= split
->common
.classid
;
1060 if (split
== NULL
|| split
->common
.classid
!= splitid
) {
1061 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1062 if (split
->common
.classid
== splitid
)
1069 if (cl
->split
!= split
) {
1071 cbq_sync_defmap(cl
);
1073 cl
->defmap
= def
&mask
;
1075 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1077 cbq_sync_defmap(cl
);
1080 static void cbq_unlink_class(struct cbq_class
*this)
1082 struct cbq_class
*cl
, **clp
;
1083 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1085 qdisc_class_hash_remove(&q
->clhash
, &this->common
);
1087 if (this->tparent
) {
1096 } while ((cl
= *clp
) != this->sibling
);
1098 if (this->tparent
->children
== this) {
1099 this->tparent
->children
= this->sibling
;
1100 if (this->sibling
== this)
1101 this->tparent
->children
= NULL
;
1104 WARN_ON(this->sibling
!= this);
1108 static void cbq_link_class(struct cbq_class
*this)
1110 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1111 struct cbq_class
*parent
= this->tparent
;
1113 this->sibling
= this;
1114 qdisc_class_hash_insert(&q
->clhash
, &this->common
);
1119 if (parent
->children
== NULL
) {
1120 parent
->children
= this;
1122 this->sibling
= parent
->children
->sibling
;
1123 parent
->children
->sibling
= this;
1127 static unsigned int cbq_drop(struct Qdisc
* sch
)
1129 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1130 struct cbq_class
*cl
, *cl_head
;
1134 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1135 if ((cl_head
= q
->active
[prio
]) == NULL
)
1140 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1143 cbq_deactivate_class(cl
);
1146 } while ((cl
= cl
->next_alive
) != cl_head
);
1152 cbq_reset(struct Qdisc
* sch
)
1154 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1155 struct cbq_class
*cl
;
1156 struct hlist_node
*n
;
1163 q
->tx_borrowed
= NULL
;
1164 qdisc_watchdog_cancel(&q
->watchdog
);
1165 hrtimer_cancel(&q
->delay_timer
);
1166 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1167 q
->now
= psched_get_time();
1170 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1171 q
->active
[prio
] = NULL
;
1173 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1174 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1177 cl
->next_alive
= NULL
;
1178 cl
->undertime
= PSCHED_PASTPERFECT
;
1179 cl
->avgidle
= cl
->maxidle
;
1180 cl
->deficit
= cl
->quantum
;
1181 cl
->cpriority
= cl
->priority
;
1188 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1190 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1191 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1192 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1194 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1195 cl
->ewma_log
= lss
->ewma_log
;
1196 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1197 cl
->avpkt
= lss
->avpkt
;
1198 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1199 cl
->minidle
= -(long)lss
->minidle
;
1200 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1201 cl
->maxidle
= lss
->maxidle
;
1202 cl
->avgidle
= lss
->maxidle
;
1204 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1205 cl
->offtime
= lss
->offtime
;
1209 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1211 q
->nclasses
[cl
->priority
]--;
1212 q
->quanta
[cl
->priority
] -= cl
->weight
;
1213 cbq_normalize_quanta(q
, cl
->priority
);
1216 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1218 q
->nclasses
[cl
->priority
]++;
1219 q
->quanta
[cl
->priority
] += cl
->weight
;
1220 cbq_normalize_quanta(q
, cl
->priority
);
1223 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1225 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1228 cl
->allot
= wrr
->allot
;
1230 cl
->weight
= wrr
->weight
;
1231 if (wrr
->priority
) {
1232 cl
->priority
= wrr
->priority
-1;
1233 cl
->cpriority
= cl
->priority
;
1234 if (cl
->priority
>= cl
->priority2
)
1235 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1242 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1244 switch (ovl
->strategy
) {
1245 case TC_CBQ_OVL_CLASSIC
:
1246 cl
->overlimit
= cbq_ovl_classic
;
1248 case TC_CBQ_OVL_DELAY
:
1249 cl
->overlimit
= cbq_ovl_delay
;
1251 case TC_CBQ_OVL_LOWPRIO
:
1252 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1253 ovl
->priority2
-1 <= cl
->priority
)
1255 cl
->priority2
= ovl
->priority2
-1;
1256 cl
->overlimit
= cbq_ovl_lowprio
;
1258 case TC_CBQ_OVL_DROP
:
1259 cl
->overlimit
= cbq_ovl_drop
;
1261 case TC_CBQ_OVL_RCLASSIC
:
1262 cl
->overlimit
= cbq_ovl_rclassic
;
1267 cl
->penalty
= ovl
->penalty
;
1271 #ifdef CONFIG_NET_CLS_ACT
1272 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1274 cl
->police
= p
->police
;
1276 if (cl
->q
->handle
) {
1277 if (p
->police
== TC_POLICE_RECLASSIFY
)
1278 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1280 cl
->q
->reshape_fail
= NULL
;
1286 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1288 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1292 static const struct nla_policy cbq_policy
[TCA_CBQ_MAX
+ 1] = {
1293 [TCA_CBQ_LSSOPT
] = { .len
= sizeof(struct tc_cbq_lssopt
) },
1294 [TCA_CBQ_WRROPT
] = { .len
= sizeof(struct tc_cbq_wrropt
) },
1295 [TCA_CBQ_FOPT
] = { .len
= sizeof(struct tc_cbq_fopt
) },
1296 [TCA_CBQ_OVL_STRATEGY
] = { .len
= sizeof(struct tc_cbq_ovl
) },
1297 [TCA_CBQ_RATE
] = { .len
= sizeof(struct tc_ratespec
) },
1298 [TCA_CBQ_RTAB
] = { .type
= NLA_BINARY
, .len
= TC_RTAB_SIZE
},
1299 [TCA_CBQ_POLICE
] = { .len
= sizeof(struct tc_cbq_police
) },
1302 static int cbq_init(struct Qdisc
*sch
, struct nlattr
*opt
)
1304 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1305 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1306 struct tc_ratespec
*r
;
1309 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1313 if (tb
[TCA_CBQ_RTAB
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
)
1316 r
= nla_data(tb
[TCA_CBQ_RATE
]);
1318 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
])) == NULL
)
1321 err
= qdisc_class_hash_init(&q
->clhash
);
1326 q
->link
.sibling
= &q
->link
;
1327 q
->link
.common
.classid
= sch
->handle
;
1328 q
->link
.qdisc
= sch
;
1329 if (!(q
->link
.q
= qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1332 q
->link
.q
= &noop_qdisc
;
1334 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1335 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1336 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1337 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1338 q
->link
.overlimit
= cbq_ovl_classic
;
1339 q
->link
.allot
= psched_mtu(qdisc_dev(sch
));
1340 q
->link
.quantum
= q
->link
.allot
;
1341 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1343 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1344 q
->link
.avpkt
= q
->link
.allot
/2;
1345 q
->link
.minidle
= -0x7FFFFFFF;
1347 qdisc_watchdog_init(&q
->watchdog
, sch
);
1348 hrtimer_init(&q
->delay_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
1349 q
->delay_timer
.function
= cbq_undelay
;
1350 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1351 q
->now
= psched_get_time();
1354 cbq_link_class(&q
->link
);
1356 if (tb
[TCA_CBQ_LSSOPT
])
1357 cbq_set_lss(&q
->link
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1359 cbq_addprio(q
, &q
->link
);
1363 qdisc_put_rtab(q
->link
.R_tab
);
1367 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1369 unsigned char *b
= skb_tail_pointer(skb
);
1371 NLA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1379 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1381 unsigned char *b
= skb_tail_pointer(skb
);
1382 struct tc_cbq_lssopt opt
;
1385 if (cl
->borrow
== NULL
)
1386 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1387 if (cl
->share
== NULL
)
1388 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1389 opt
.ewma_log
= cl
->ewma_log
;
1390 opt
.level
= cl
->level
;
1391 opt
.avpkt
= cl
->avpkt
;
1392 opt
.maxidle
= cl
->maxidle
;
1393 opt
.minidle
= (u32
)(-cl
->minidle
);
1394 opt
.offtime
= cl
->offtime
;
1396 NLA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1404 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1406 unsigned char *b
= skb_tail_pointer(skb
);
1407 struct tc_cbq_wrropt opt
;
1410 opt
.allot
= cl
->allot
;
1411 opt
.priority
= cl
->priority
+1;
1412 opt
.cpriority
= cl
->cpriority
+1;
1413 opt
.weight
= cl
->weight
;
1414 NLA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1422 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1424 unsigned char *b
= skb_tail_pointer(skb
);
1425 struct tc_cbq_ovl opt
;
1427 opt
.strategy
= cl
->ovl_strategy
;
1428 opt
.priority2
= cl
->priority2
+1;
1430 opt
.penalty
= cl
->penalty
;
1431 NLA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1439 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1441 unsigned char *b
= skb_tail_pointer(skb
);
1442 struct tc_cbq_fopt opt
;
1444 if (cl
->split
|| cl
->defmap
) {
1445 opt
.split
= cl
->split
? cl
->split
->common
.classid
: 0;
1446 opt
.defmap
= cl
->defmap
;
1448 NLA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1457 #ifdef CONFIG_NET_CLS_ACT
1458 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1460 unsigned char *b
= skb_tail_pointer(skb
);
1461 struct tc_cbq_police opt
;
1464 opt
.police
= cl
->police
;
1467 NLA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1477 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1479 if (cbq_dump_lss(skb
, cl
) < 0 ||
1480 cbq_dump_rate(skb
, cl
) < 0 ||
1481 cbq_dump_wrr(skb
, cl
) < 0 ||
1482 cbq_dump_ovl(skb
, cl
) < 0 ||
1483 #ifdef CONFIG_NET_CLS_ACT
1484 cbq_dump_police(skb
, cl
) < 0 ||
1486 cbq_dump_fopt(skb
, cl
) < 0)
1491 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1493 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1494 struct nlattr
*nest
;
1496 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1498 goto nla_put_failure
;
1499 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1500 goto nla_put_failure
;
1501 nla_nest_end(skb
, nest
);
1505 nla_nest_cancel(skb
, nest
);
1510 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1512 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1514 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1515 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1519 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1520 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1522 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1523 struct nlattr
*nest
;
1526 tcm
->tcm_parent
= cl
->tparent
->common
.classid
;
1528 tcm
->tcm_parent
= TC_H_ROOT
;
1529 tcm
->tcm_handle
= cl
->common
.classid
;
1530 tcm
->tcm_info
= cl
->q
->handle
;
1532 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1534 goto nla_put_failure
;
1535 if (cbq_dump_attr(skb
, cl
) < 0)
1536 goto nla_put_failure
;
1537 nla_nest_end(skb
, nest
);
1541 nla_nest_cancel(skb
, nest
);
1546 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1547 struct gnet_dump
*d
)
1549 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1550 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1552 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1553 cl
->xstats
.avgidle
= cl
->avgidle
;
1554 cl
->xstats
.undertime
= 0;
1556 if (cl
->undertime
!= PSCHED_PASTPERFECT
)
1557 cl
->xstats
.undertime
= cl
->undertime
- q
->now
;
1559 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1560 gnet_stats_copy_rate_est(d
, &cl
->bstats
, &cl
->rate_est
) < 0 ||
1561 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1564 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1567 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1570 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1573 new = qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1574 &pfifo_qdisc_ops
, cl
->common
.classid
);
1578 #ifdef CONFIG_NET_CLS_ACT
1579 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1580 new->reshape_fail
= cbq_reshape_fail
;
1586 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1588 sch_tree_unlock(sch
);
1593 static struct Qdisc
*
1594 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1596 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1601 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1603 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1605 if (cl
->q
->q
.qlen
== 0)
1606 cbq_deactivate_class(cl
);
1609 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1611 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1612 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1616 return (unsigned long)cl
;
1621 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1623 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1625 WARN_ON(cl
->filters
);
1627 tcf_destroy_chain(&cl
->filter_list
);
1628 qdisc_destroy(cl
->q
);
1629 qdisc_put_rtab(cl
->R_tab
);
1630 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1636 cbq_destroy(struct Qdisc
* sch
)
1638 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1639 struct hlist_node
*n
, *next
;
1640 struct cbq_class
*cl
;
1643 #ifdef CONFIG_NET_CLS_ACT
1647 * Filters must be destroyed first because we don't destroy the
1648 * classes from root to leafs which means that filters can still
1649 * be bound to classes which have been destroyed already. --TGR '04
1651 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1652 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
)
1653 tcf_destroy_chain(&cl
->filter_list
);
1655 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1656 hlist_for_each_entry_safe(cl
, n
, next
, &q
->clhash
.hash
[h
],
1658 cbq_destroy_class(sch
, cl
);
1660 qdisc_class_hash_destroy(&q
->clhash
);
1663 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1665 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1667 if (--cl
->refcnt
== 0) {
1668 #ifdef CONFIG_NET_CLS_ACT
1669 spinlock_t
*root_lock
= qdisc_root_sleeping_lock(sch
);
1670 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1672 spin_lock_bh(root_lock
);
1673 if (q
->rx_class
== cl
)
1675 spin_unlock_bh(root_lock
);
1678 cbq_destroy_class(sch
, cl
);
1683 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct nlattr
**tca
,
1687 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1688 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1689 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
1690 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1691 struct cbq_class
*parent
;
1692 struct qdisc_rate_table
*rtab
= NULL
;
1697 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1705 cl
->tparent
->common
.classid
!= parentid
)
1707 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1711 if (tb
[TCA_CBQ_RATE
]) {
1712 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]),
1718 if (tca
[TCA_RATE
]) {
1719 err
= gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1720 qdisc_root_sleeping_lock(sch
),
1724 qdisc_put_rtab(rtab
);
1729 /* Change class parameters */
1732 if (cl
->next_alive
!= NULL
)
1733 cbq_deactivate_class(cl
);
1736 qdisc_put_rtab(cl
->R_tab
);
1740 if (tb
[TCA_CBQ_LSSOPT
])
1741 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1743 if (tb
[TCA_CBQ_WRROPT
]) {
1745 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1748 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1749 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1751 #ifdef CONFIG_NET_CLS_ACT
1752 if (tb
[TCA_CBQ_POLICE
])
1753 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1756 if (tb
[TCA_CBQ_FOPT
])
1757 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1760 cbq_activate_class(cl
);
1762 sch_tree_unlock(sch
);
1767 if (parentid
== TC_H_ROOT
)
1770 if (tb
[TCA_CBQ_WRROPT
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
||
1771 tb
[TCA_CBQ_LSSOPT
] == NULL
)
1774 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1780 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1784 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1786 for (i
=0; i
<0x8000; i
++) {
1787 if (++q
->hgenerator
>= 0x8000)
1789 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1795 classid
= classid
|q
->hgenerator
;
1800 parent
= cbq_class_lookup(q
, parentid
);
1807 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1811 if (tca
[TCA_RATE
]) {
1812 err
= gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1813 qdisc_root_sleeping_lock(sch
),
1824 if (!(cl
->q
= qdisc_create_dflt(qdisc_dev(sch
), sch
->dev_queue
,
1825 &pfifo_qdisc_ops
, classid
)))
1826 cl
->q
= &noop_qdisc
;
1827 cl
->common
.classid
= classid
;
1828 cl
->tparent
= parent
;
1830 cl
->allot
= parent
->allot
;
1831 cl
->quantum
= cl
->allot
;
1832 cl
->weight
= cl
->R_tab
->rate
.rate
;
1836 cl
->borrow
= cl
->tparent
;
1837 if (cl
->tparent
!= &q
->link
)
1838 cl
->share
= cl
->tparent
;
1839 cbq_adjust_levels(parent
);
1840 cl
->minidle
= -0x7FFFFFFF;
1841 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1842 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1843 if (cl
->ewma_log
==0)
1844 cl
->ewma_log
= q
->link
.ewma_log
;
1846 cl
->maxidle
= q
->link
.maxidle
;
1848 cl
->avpkt
= q
->link
.avpkt
;
1849 cl
->overlimit
= cbq_ovl_classic
;
1850 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1851 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1852 #ifdef CONFIG_NET_CLS_ACT
1853 if (tb
[TCA_CBQ_POLICE
])
1854 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1856 if (tb
[TCA_CBQ_FOPT
])
1857 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1858 sch_tree_unlock(sch
);
1860 qdisc_class_hash_grow(sch
, &q
->clhash
);
1862 *arg
= (unsigned long)cl
;
1866 qdisc_put_rtab(rtab
);
1870 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1872 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1873 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1876 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1881 qlen
= cl
->q
->q
.qlen
;
1883 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
1886 cbq_deactivate_class(cl
);
1888 if (q
->tx_borrowed
== cl
)
1889 q
->tx_borrowed
= q
->tx_class
;
1890 if (q
->tx_class
== cl
) {
1892 q
->tx_borrowed
= NULL
;
1894 #ifdef CONFIG_NET_CLS_ACT
1895 if (q
->rx_class
== cl
)
1899 cbq_unlink_class(cl
);
1900 cbq_adjust_levels(cl
->tparent
);
1902 cbq_sync_defmap(cl
);
1905 sch_tree_unlock(sch
);
1907 BUG_ON(--cl
->refcnt
== 0);
1909 * This shouldn't happen: we "hold" one cops->get() when called
1910 * from tc_ctl_tclass; the destroy method is done from cops->put().
1916 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
1918 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1919 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1924 return &cl
->filter_list
;
1927 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
1930 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1931 struct cbq_class
*p
= (struct cbq_class
*)parent
;
1932 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1935 if (p
&& p
->level
<= cl
->level
)
1938 return (unsigned long)cl
;
1943 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
1945 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1950 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
1952 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1953 struct cbq_class
*cl
;
1954 struct hlist_node
*n
;
1960 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1961 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1962 if (arg
->count
< arg
->skip
) {
1966 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
1975 static const struct Qdisc_class_ops cbq_class_ops
= {
1978 .qlen_notify
= cbq_qlen_notify
,
1981 .change
= cbq_change_class
,
1982 .delete = cbq_delete
,
1984 .tcf_chain
= cbq_find_tcf
,
1985 .bind_tcf
= cbq_bind_filter
,
1986 .unbind_tcf
= cbq_unbind_filter
,
1987 .dump
= cbq_dump_class
,
1988 .dump_stats
= cbq_dump_class_stats
,
1991 static struct Qdisc_ops cbq_qdisc_ops __read_mostly
= {
1993 .cl_ops
= &cbq_class_ops
,
1995 .priv_size
= sizeof(struct cbq_sched_data
),
1996 .enqueue
= cbq_enqueue
,
1997 .dequeue
= cbq_dequeue
,
1998 .peek
= qdisc_peek_dequeued
,
2002 .destroy
= cbq_destroy
,
2005 .dump_stats
= cbq_dump_stats
,
2006 .owner
= THIS_MODULE
,
2009 static int __init
cbq_module_init(void)
2011 return register_qdisc(&cbq_qdisc_ops
);
2013 static void __exit
cbq_module_exit(void)
2015 unregister_qdisc(&cbq_qdisc_ops
);
2017 module_init(cbq_module_init
)
2018 module_exit(cbq_module_exit
)
2019 MODULE_LICENSE("GPL");