2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <net/netlink.h>
21 #include <net/pkt_sched.h>
24 /* Class-Based Queueing (CBQ) algorithm.
25 =======================================
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
28 Management Models for Packet Networks",
29 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
31 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
33 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
39 -----------------------------------------------------------------------
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
46 --- The WRR algorithm is different. Our version looks more
47 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
72 struct cbq_sched_data
;
77 struct Qdisc_class_common common
;
78 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
81 unsigned char priority
; /* class priority */
82 unsigned char priority2
; /* priority to be used after overlimit */
83 unsigned char ewma_log
; /* time constant for idle time calculation */
84 unsigned char ovl_strategy
;
85 #ifdef CONFIG_NET_CLS_ACT
91 /* Link-sharing scheduler parameters */
92 long maxidle
; /* Class parameters: see below. */
96 struct qdisc_rate_table
*R_tab
;
98 /* Overlimit strategy parameters */
99 void (*overlimit
)(struct cbq_class
*cl
);
100 psched_tdiff_t penalty
;
102 /* General scheduler (WRR) parameters */
104 long quantum
; /* Allotment per WRR round */
105 long weight
; /* Relative allotment: see below */
107 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
108 struct cbq_class
*split
; /* Ptr to split node */
109 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
110 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
111 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
113 struct cbq_class
*sibling
; /* Sibling chain */
114 struct cbq_class
*children
; /* Pointer to children chain */
116 struct Qdisc
*q
; /* Elementary queueing discipline */
120 unsigned char cpriority
; /* Effective priority */
121 unsigned char delayed
;
122 unsigned char level
; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
127 psched_time_t last
; /* Last end of service */
128 psched_time_t undertime
;
130 long deficit
; /* Saved deficit for WRR */
131 psched_time_t penalized
;
132 struct gnet_stats_basic_packed bstats
;
133 struct gnet_stats_queue qstats
;
134 struct gnet_stats_rate_est rate_est
;
135 struct tc_cbq_xstats xstats
;
137 struct tcf_proto
*filter_list
;
142 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
145 struct cbq_sched_data
147 struct Qdisc_class_hash clhash
; /* Hash table of all classes */
148 int nclasses
[TC_CBQ_MAXPRIO
+1];
149 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
151 struct cbq_class link
;
154 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
157 #ifdef CONFIG_NET_CLS_ACT
158 struct cbq_class
*rx_class
;
160 struct cbq_class
*tx_class
;
161 struct cbq_class
*tx_borrowed
;
163 psched_time_t now
; /* Cached timestamp */
164 psched_time_t now_rt
; /* Cached real time */
167 struct hrtimer delay_timer
;
168 struct qdisc_watchdog watchdog
; /* Watchdog timer,
172 psched_tdiff_t wd_expires
;
178 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
180 static __inline__
struct cbq_class
*
181 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
183 struct Qdisc_class_common
*clc
;
185 clc
= qdisc_class_find(&q
->clhash
, classid
);
188 return container_of(clc
, struct cbq_class
, common
);
191 #ifdef CONFIG_NET_CLS_ACT
193 static struct cbq_class
*
194 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
196 struct cbq_class
*cl
, *new;
198 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
199 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
207 /* Classify packet. The procedure is pretty complicated, but
208 it allows us to combine link sharing and priority scheduling
211 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212 so that it resolves to split nodes. Then packets are classified
213 by logical priority, or a more specific classifier may be attached
217 static struct cbq_class
*
218 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
220 struct cbq_sched_data
*q
= qdisc_priv(sch
);
221 struct cbq_class
*head
= &q
->link
;
222 struct cbq_class
**defmap
;
223 struct cbq_class
*cl
= NULL
;
224 u32 prio
= skb
->priority
;
225 struct tcf_result res
;
228 * Step 1. If skb->priority points to one of our classes, use it.
230 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
231 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
234 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_BYPASS
;
237 defmap
= head
->defaults
;
240 * Step 2+n. Apply classifier.
242 if (!head
->filter_list
||
243 (result
= tc_classify_compat(skb
, head
->filter_list
, &res
)) < 0)
246 if ((cl
= (void*)res
.class) == NULL
) {
247 if (TC_H_MAJ(res
.classid
))
248 cl
= cbq_class_lookup(q
, res
.classid
);
249 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
250 cl
= defmap
[TC_PRIO_BESTEFFORT
];
252 if (cl
== NULL
|| cl
->level
>= head
->level
)
256 #ifdef CONFIG_NET_CLS_ACT
260 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_STOLEN
;
263 case TC_ACT_RECLASSIFY
:
264 return cbq_reclassify(skb
, cl
);
271 * Step 3+n. If classifier selected a link sharing class,
272 * apply agency specific classifier.
273 * Repeat this procdure until we hit a leaf node.
282 * Step 4. No success...
284 if (TC_H_MAJ(prio
) == 0 &&
285 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
286 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
293 A packet has just been enqueued on the empty class.
294 cbq_activate_class adds it to the tail of active class list
295 of its priority band.
298 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
300 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
301 int prio
= cl
->cpriority
;
302 struct cbq_class
*cl_tail
;
304 cl_tail
= q
->active
[prio
];
305 q
->active
[prio
] = cl
;
307 if (cl_tail
!= NULL
) {
308 cl
->next_alive
= cl_tail
->next_alive
;
309 cl_tail
->next_alive
= cl
;
312 q
->activemask
|= (1<<prio
);
317 Unlink class from active chain.
318 Note that this same procedure is done directly in cbq_dequeue*
319 during round-robin procedure.
322 static void cbq_deactivate_class(struct cbq_class
*this)
324 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
325 int prio
= this->cpriority
;
326 struct cbq_class
*cl
;
327 struct cbq_class
*cl_prev
= q
->active
[prio
];
330 cl
= cl_prev
->next_alive
;
332 cl_prev
->next_alive
= cl
->next_alive
;
333 cl
->next_alive
= NULL
;
335 if (cl
== q
->active
[prio
]) {
336 q
->active
[prio
] = cl_prev
;
337 if (cl
== q
->active
[prio
]) {
338 q
->active
[prio
] = NULL
;
339 q
->activemask
&= ~(1<<prio
);
345 } while ((cl_prev
= cl
) != q
->active
[prio
]);
349 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
351 int toplevel
= q
->toplevel
;
353 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
357 now
= psched_get_time();
358 incr
= now
- q
->now_rt
;
362 if (cl
->undertime
< now
) {
363 q
->toplevel
= cl
->level
;
366 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
371 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
373 struct cbq_sched_data
*q
= qdisc_priv(sch
);
374 int uninitialized_var(ret
);
375 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
377 #ifdef CONFIG_NET_CLS_ACT
381 if (ret
& __NET_XMIT_BYPASS
)
387 #ifdef CONFIG_NET_CLS_ACT
388 cl
->q
->__parent
= sch
;
390 ret
= qdisc_enqueue(skb
, cl
->q
);
391 if (ret
== NET_XMIT_SUCCESS
) {
393 cbq_mark_toplevel(q
, cl
);
395 cbq_activate_class(cl
);
399 if (net_xmit_drop_count(ret
)) {
401 cbq_mark_toplevel(q
, cl
);
407 /* Overlimit actions */
409 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
411 static void cbq_ovl_classic(struct cbq_class
*cl
)
413 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
414 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
417 delay
+= cl
->offtime
;
420 Class goes to sleep, so that it will have no
421 chance to work avgidle. Let's forgive it 8)
423 BTW cbq-2.0 has a crap in this
424 place, apparently they forgot to shift it by cl->ewma_log.
427 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
428 if (cl
->avgidle
< cl
->minidle
)
429 cl
->avgidle
= cl
->minidle
;
432 cl
->undertime
= q
->now
+ delay
;
434 cl
->xstats
.overactions
++;
437 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
438 q
->wd_expires
= delay
;
440 /* Dirty work! We must schedule wakeups based on
441 real available rate, rather than leaf rate,
442 which may be tiny (even zero).
444 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
446 psched_tdiff_t base_delay
= q
->wd_expires
;
448 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
449 delay
= b
->undertime
- q
->now
;
450 if (delay
< base_delay
) {
457 q
->wd_expires
= base_delay
;
461 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
465 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
467 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
468 struct cbq_class
*this = cl
;
471 if (cl
->level
> q
->toplevel
) {
475 } while ((cl
= cl
->borrow
) != NULL
);
482 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
484 static void cbq_ovl_delay(struct cbq_class
*cl
)
486 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
487 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
489 if (test_bit(__QDISC_STATE_DEACTIVATED
,
490 &qdisc_root_sleeping(cl
->qdisc
)->state
))
494 psched_time_t sched
= q
->now
;
497 delay
+= cl
->offtime
;
499 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
500 if (cl
->avgidle
< cl
->minidle
)
501 cl
->avgidle
= cl
->minidle
;
502 cl
->undertime
= q
->now
+ delay
;
505 sched
+= delay
+ cl
->penalty
;
506 cl
->penalized
= sched
;
507 cl
->cpriority
= TC_CBQ_MAXPRIO
;
508 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
510 expires
= ktime_set(0, 0);
511 expires
= ktime_add_ns(expires
, PSCHED_TICKS2NS(sched
));
512 if (hrtimer_try_to_cancel(&q
->delay_timer
) &&
513 ktime_to_ns(ktime_sub(
514 hrtimer_get_expires(&q
->delay_timer
),
516 hrtimer_set_expires(&q
->delay_timer
, expires
);
517 hrtimer_restart(&q
->delay_timer
);
519 cl
->xstats
.overactions
++;
524 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
525 q
->wd_expires
= delay
;
528 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
530 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
532 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
534 cl
->penalized
= q
->now
+ cl
->penalty
;
536 if (cl
->cpriority
!= cl
->priority2
) {
537 cl
->cpriority
= cl
->priority2
;
538 q
->pmask
|= (1<<cl
->cpriority
);
539 cl
->xstats
.overactions
++;
544 /* TC_CBQ_OVL_DROP: penalize class by dropping */
546 static void cbq_ovl_drop(struct cbq_class
*cl
)
548 if (cl
->q
->ops
->drop
)
549 if (cl
->q
->ops
->drop(cl
->q
))
551 cl
->xstats
.overactions
++;
555 static psched_tdiff_t
cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
,
558 struct cbq_class
*cl
;
559 struct cbq_class
*cl_prev
= q
->active
[prio
];
560 psched_time_t sched
= now
;
566 cl
= cl_prev
->next_alive
;
567 if (now
- cl
->penalized
> 0) {
568 cl_prev
->next_alive
= cl
->next_alive
;
569 cl
->next_alive
= NULL
;
570 cl
->cpriority
= cl
->priority
;
572 cbq_activate_class(cl
);
574 if (cl
== q
->active
[prio
]) {
575 q
->active
[prio
] = cl_prev
;
576 if (cl
== q
->active
[prio
]) {
577 q
->active
[prio
] = NULL
;
582 cl
= cl_prev
->next_alive
;
583 } else if (sched
- cl
->penalized
> 0)
584 sched
= cl
->penalized
;
585 } while ((cl_prev
= cl
) != q
->active
[prio
]);
590 static enum hrtimer_restart
cbq_undelay(struct hrtimer
*timer
)
592 struct cbq_sched_data
*q
= container_of(timer
, struct cbq_sched_data
,
594 struct Qdisc
*sch
= q
->watchdog
.qdisc
;
596 psched_tdiff_t delay
= 0;
599 now
= psched_get_time();
605 int prio
= ffz(~pmask
);
610 tmp
= cbq_undelay_prio(q
, prio
, now
);
613 if (tmp
< delay
|| delay
== 0)
621 time
= ktime_set(0, 0);
622 time
= ktime_add_ns(time
, PSCHED_TICKS2NS(now
+ delay
));
623 hrtimer_start(&q
->delay_timer
, time
, HRTIMER_MODE_ABS
);
626 sch
->flags
&= ~TCQ_F_THROTTLED
;
627 __netif_schedule(qdisc_root(sch
));
628 return HRTIMER_NORESTART
;
631 #ifdef CONFIG_NET_CLS_ACT
632 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
634 struct Qdisc
*sch
= child
->__parent
;
635 struct cbq_sched_data
*q
= qdisc_priv(sch
);
636 struct cbq_class
*cl
= q
->rx_class
;
640 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
643 cbq_mark_toplevel(q
, cl
);
646 cl
->q
->__parent
= sch
;
648 ret
= qdisc_enqueue(skb
, cl
->q
);
649 if (ret
== NET_XMIT_SUCCESS
) {
652 cbq_activate_class(cl
);
655 if (net_xmit_drop_count(ret
))
666 It is mission critical procedure.
668 We "regenerate" toplevel cutoff, if transmitting class
669 has backlog and it is not regulated. It is not part of
670 original CBQ description, but looks more reasonable.
671 Probably, it is wrong. This question needs further investigation.
674 static __inline__
void
675 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
676 struct cbq_class
*borrowed
)
678 if (cl
&& q
->toplevel
>= borrowed
->level
) {
679 if (cl
->q
->q
.qlen
> 1) {
681 if (borrowed
->undertime
== PSCHED_PASTPERFECT
) {
682 q
->toplevel
= borrowed
->level
;
685 } while ((borrowed
=borrowed
->borrow
) != NULL
);
688 /* It is not necessary now. Uncommenting it
689 will save CPU cycles, but decrease fairness.
691 q
->toplevel
= TC_CBQ_MAXLEVEL
;
697 cbq_update(struct cbq_sched_data
*q
)
699 struct cbq_class
*this = q
->tx_class
;
700 struct cbq_class
*cl
= this;
705 for ( ; cl
; cl
= cl
->share
) {
706 long avgidle
= cl
->avgidle
;
709 cl
->bstats
.packets
++;
710 cl
->bstats
.bytes
+= len
;
713 (now - last) is total time between packet right edges.
714 (last_pktlen/rate) is "virtual" busy time, so that
716 idle = (now - last) - last_pktlen/rate
719 idle
= q
->now
- cl
->last
;
720 if ((unsigned long)idle
> 128*1024*1024) {
721 avgidle
= cl
->maxidle
;
723 idle
-= L2T(cl
, len
);
725 /* true_avgidle := (1-W)*true_avgidle + W*idle,
726 where W=2^{-ewma_log}. But cl->avgidle is scaled:
727 cl->avgidle == true_avgidle/W,
730 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
734 /* Overlimit or at-limit */
736 if (avgidle
< cl
->minidle
)
737 avgidle
= cl
->minidle
;
739 cl
->avgidle
= avgidle
;
741 /* Calculate expected time, when this class
742 will be allowed to send.
744 (1-W)*true_avgidle + W*delay = 0, i.e.
745 idle = (1/W - 1)*(-true_avgidle)
747 idle = (1 - W)*(-cl->avgidle);
749 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
753 To maintain the rate allocated to the class,
754 we add to undertime virtual clock,
755 necessary to complete transmitted packet.
756 (len/phys_bandwidth has been already passed
757 to the moment of cbq_update)
760 idle
-= L2T(&q
->link
, len
);
761 idle
+= L2T(cl
, len
);
763 cl
->undertime
= q
->now
+ idle
;
767 cl
->undertime
= PSCHED_PASTPERFECT
;
768 if (avgidle
> cl
->maxidle
)
769 cl
->avgidle
= cl
->maxidle
;
771 cl
->avgidle
= avgidle
;
776 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
779 static __inline__
struct cbq_class
*
780 cbq_under_limit(struct cbq_class
*cl
)
782 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
783 struct cbq_class
*this_cl
= cl
;
785 if (cl
->tparent
== NULL
)
788 if (cl
->undertime
== PSCHED_PASTPERFECT
|| q
->now
>= cl
->undertime
) {
794 /* It is very suspicious place. Now overlimit
795 action is generated for not bounded classes
796 only if link is completely congested.
797 Though it is in agree with ancestor-only paradigm,
798 it looks very stupid. Particularly,
799 it means that this chunk of code will either
800 never be called or result in strong amplification
801 of burstiness. Dangerous, silly, and, however,
802 no another solution exists.
804 if ((cl
= cl
->borrow
) == NULL
) {
805 this_cl
->qstats
.overlimits
++;
806 this_cl
->overlimit(this_cl
);
809 if (cl
->level
> q
->toplevel
)
811 } while (cl
->undertime
!= PSCHED_PASTPERFECT
&& q
->now
< cl
->undertime
);
817 static __inline__
struct sk_buff
*
818 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
820 struct cbq_sched_data
*q
= qdisc_priv(sch
);
821 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
825 cl_tail
= cl_prev
= q
->active
[prio
];
826 cl
= cl_prev
->next_alive
;
833 struct cbq_class
*borrow
= cl
;
836 (borrow
= cbq_under_limit(cl
)) == NULL
)
839 if (cl
->deficit
<= 0) {
840 /* Class exhausted its allotment per
841 this round. Switch to the next one.
844 cl
->deficit
+= cl
->quantum
;
848 skb
= cl
->q
->dequeue(cl
->q
);
850 /* Class did not give us any skb :-(
851 It could occur even if cl->q->q.qlen != 0
852 f.e. if cl->q == "tbf"
857 cl
->deficit
-= qdisc_pkt_len(skb
);
859 q
->tx_borrowed
= borrow
;
861 #ifndef CBQ_XSTATS_BORROWS_BYTES
862 borrow
->xstats
.borrows
++;
863 cl
->xstats
.borrows
++;
865 borrow
->xstats
.borrows
+= qdisc_pkt_len(skb
);
866 cl
->xstats
.borrows
+= qdisc_pkt_len(skb
);
869 q
->tx_len
= qdisc_pkt_len(skb
);
871 if (cl
->deficit
<= 0) {
872 q
->active
[prio
] = cl
;
874 cl
->deficit
+= cl
->quantum
;
879 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
880 /* Class is empty or penalized.
881 Unlink it from active chain.
883 cl_prev
->next_alive
= cl
->next_alive
;
884 cl
->next_alive
= NULL
;
886 /* Did cl_tail point to it? */
891 /* Was it the last class in this band? */
894 q
->active
[prio
] = NULL
;
895 q
->activemask
&= ~(1<<prio
);
897 cbq_activate_class(cl
);
901 q
->active
[prio
] = cl_tail
;
904 cbq_activate_class(cl
);
912 } while (cl_prev
!= cl_tail
);
915 q
->active
[prio
] = cl_prev
;
920 static __inline__
struct sk_buff
*
921 cbq_dequeue_1(struct Qdisc
*sch
)
923 struct cbq_sched_data
*q
= qdisc_priv(sch
);
927 activemask
= q
->activemask
&0xFF;
929 int prio
= ffz(~activemask
);
930 activemask
&= ~(1<<prio
);
931 skb
= cbq_dequeue_prio(sch
, prio
);
938 static struct sk_buff
*
939 cbq_dequeue(struct Qdisc
*sch
)
942 struct cbq_sched_data
*q
= qdisc_priv(sch
);
946 now
= psched_get_time();
947 incr
= now
- q
->now_rt
;
950 psched_tdiff_t incr2
;
951 /* Time integrator. We calculate EOS time
952 by adding expected packet transmission time.
953 If real time is greater, we warp artificial clock,
956 cbq_time = max(real_time, work);
958 incr2
= L2T(&q
->link
, q
->tx_len
);
961 if ((incr
-= incr2
) < 0)
970 skb
= cbq_dequeue_1(sch
);
972 qdisc_bstats_update(sch
, skb
);
974 sch
->flags
&= ~TCQ_F_THROTTLED
;
978 /* All the classes are overlimit.
982 1. Scheduler is empty.
983 2. Toplevel cutoff inhibited borrowing.
984 3. Root class is overlimit.
986 Reset 2d and 3d conditions and retry.
988 Note, that NS and cbq-2.0 are buggy, peeking
989 an arbitrary class is appropriate for ancestor-only
990 sharing, but not for toplevel algorithm.
992 Our version is better, but slower, because it requires
993 two passes, but it is unavoidable with top-level sharing.
996 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
997 q
->link
.undertime
== PSCHED_PASTPERFECT
)
1000 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1001 q
->link
.undertime
= PSCHED_PASTPERFECT
;
1004 /* No packets in scheduler or nobody wants to give them to us :-(
1005 Sigh... start watchdog timer in the last case. */
1008 sch
->qstats
.overlimits
++;
1010 qdisc_watchdog_schedule(&q
->watchdog
,
1011 now
+ q
->wd_expires
);
1016 /* CBQ class maintanance routines */
1018 static void cbq_adjust_levels(struct cbq_class
*this)
1025 struct cbq_class
*cl
;
1027 if ((cl
= this->children
) != NULL
) {
1029 if (cl
->level
> level
)
1031 } while ((cl
= cl
->sibling
) != this->children
);
1033 this->level
= level
+1;
1034 } while ((this = this->tparent
) != NULL
);
1037 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1039 struct cbq_class
*cl
;
1040 struct hlist_node
*n
;
1043 if (q
->quanta
[prio
] == 0)
1046 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1047 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1048 /* BUGGGG... Beware! This expression suffer of
1049 arithmetic overflows!
1051 if (cl
->priority
== prio
) {
1052 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1055 if (cl
->quantum
<= 0 || cl
->quantum
>32*qdisc_dev(cl
->qdisc
)->mtu
) {
1056 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->common
.classid
, cl
->quantum
);
1057 cl
->quantum
= qdisc_dev(cl
->qdisc
)->mtu
/2 + 1;
1063 static void cbq_sync_defmap(struct cbq_class
*cl
)
1065 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1066 struct cbq_class
*split
= cl
->split
;
1073 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1074 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1075 split
->defaults
[i
] = NULL
;
1078 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1079 int level
= split
->level
;
1081 if (split
->defaults
[i
])
1084 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1085 struct hlist_node
*n
;
1086 struct cbq_class
*c
;
1088 hlist_for_each_entry(c
, n
, &q
->clhash
.hash
[h
],
1090 if (c
->split
== split
&& c
->level
< level
&&
1092 split
->defaults
[i
] = c
;
1100 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1102 struct cbq_class
*split
= NULL
;
1105 if ((split
= cl
->split
) == NULL
)
1107 splitid
= split
->common
.classid
;
1110 if (split
== NULL
|| split
->common
.classid
!= splitid
) {
1111 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1112 if (split
->common
.classid
== splitid
)
1119 if (cl
->split
!= split
) {
1121 cbq_sync_defmap(cl
);
1123 cl
->defmap
= def
&mask
;
1125 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1127 cbq_sync_defmap(cl
);
1130 static void cbq_unlink_class(struct cbq_class
*this)
1132 struct cbq_class
*cl
, **clp
;
1133 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1135 qdisc_class_hash_remove(&q
->clhash
, &this->common
);
1137 if (this->tparent
) {
1146 } while ((cl
= *clp
) != this->sibling
);
1148 if (this->tparent
->children
== this) {
1149 this->tparent
->children
= this->sibling
;
1150 if (this->sibling
== this)
1151 this->tparent
->children
= NULL
;
1154 WARN_ON(this->sibling
!= this);
1158 static void cbq_link_class(struct cbq_class
*this)
1160 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1161 struct cbq_class
*parent
= this->tparent
;
1163 this->sibling
= this;
1164 qdisc_class_hash_insert(&q
->clhash
, &this->common
);
1169 if (parent
->children
== NULL
) {
1170 parent
->children
= this;
1172 this->sibling
= parent
->children
->sibling
;
1173 parent
->children
->sibling
= this;
1177 static unsigned int cbq_drop(struct Qdisc
* sch
)
1179 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1180 struct cbq_class
*cl
, *cl_head
;
1184 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1185 if ((cl_head
= q
->active
[prio
]) == NULL
)
1190 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1193 cbq_deactivate_class(cl
);
1196 } while ((cl
= cl
->next_alive
) != cl_head
);
1202 cbq_reset(struct Qdisc
* sch
)
1204 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1205 struct cbq_class
*cl
;
1206 struct hlist_node
*n
;
1213 q
->tx_borrowed
= NULL
;
1214 qdisc_watchdog_cancel(&q
->watchdog
);
1215 hrtimer_cancel(&q
->delay_timer
);
1216 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1217 q
->now
= psched_get_time();
1220 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1221 q
->active
[prio
] = NULL
;
1223 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1224 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
1227 cl
->next_alive
= NULL
;
1228 cl
->undertime
= PSCHED_PASTPERFECT
;
1229 cl
->avgidle
= cl
->maxidle
;
1230 cl
->deficit
= cl
->quantum
;
1231 cl
->cpriority
= cl
->priority
;
1238 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1240 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1241 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1242 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1244 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1245 cl
->ewma_log
= lss
->ewma_log
;
1246 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1247 cl
->avpkt
= lss
->avpkt
;
1248 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1249 cl
->minidle
= -(long)lss
->minidle
;
1250 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1251 cl
->maxidle
= lss
->maxidle
;
1252 cl
->avgidle
= lss
->maxidle
;
1254 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1255 cl
->offtime
= lss
->offtime
;
1259 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1261 q
->nclasses
[cl
->priority
]--;
1262 q
->quanta
[cl
->priority
] -= cl
->weight
;
1263 cbq_normalize_quanta(q
, cl
->priority
);
1266 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1268 q
->nclasses
[cl
->priority
]++;
1269 q
->quanta
[cl
->priority
] += cl
->weight
;
1270 cbq_normalize_quanta(q
, cl
->priority
);
1273 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1275 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1278 cl
->allot
= wrr
->allot
;
1280 cl
->weight
= wrr
->weight
;
1281 if (wrr
->priority
) {
1282 cl
->priority
= wrr
->priority
-1;
1283 cl
->cpriority
= cl
->priority
;
1284 if (cl
->priority
>= cl
->priority2
)
1285 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1292 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1294 switch (ovl
->strategy
) {
1295 case TC_CBQ_OVL_CLASSIC
:
1296 cl
->overlimit
= cbq_ovl_classic
;
1298 case TC_CBQ_OVL_DELAY
:
1299 cl
->overlimit
= cbq_ovl_delay
;
1301 case TC_CBQ_OVL_LOWPRIO
:
1302 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1303 ovl
->priority2
-1 <= cl
->priority
)
1305 cl
->priority2
= ovl
->priority2
-1;
1306 cl
->overlimit
= cbq_ovl_lowprio
;
1308 case TC_CBQ_OVL_DROP
:
1309 cl
->overlimit
= cbq_ovl_drop
;
1311 case TC_CBQ_OVL_RCLASSIC
:
1312 cl
->overlimit
= cbq_ovl_rclassic
;
1317 cl
->penalty
= ovl
->penalty
;
1321 #ifdef CONFIG_NET_CLS_ACT
1322 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1324 cl
->police
= p
->police
;
1326 if (cl
->q
->handle
) {
1327 if (p
->police
== TC_POLICE_RECLASSIFY
)
1328 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1330 cl
->q
->reshape_fail
= NULL
;
1336 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1338 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1342 static const struct nla_policy cbq_policy
[TCA_CBQ_MAX
+ 1] = {
1343 [TCA_CBQ_LSSOPT
] = { .len
= sizeof(struct tc_cbq_lssopt
) },
1344 [TCA_CBQ_WRROPT
] = { .len
= sizeof(struct tc_cbq_wrropt
) },
1345 [TCA_CBQ_FOPT
] = { .len
= sizeof(struct tc_cbq_fopt
) },
1346 [TCA_CBQ_OVL_STRATEGY
] = { .len
= sizeof(struct tc_cbq_ovl
) },
1347 [TCA_CBQ_RATE
] = { .len
= sizeof(struct tc_ratespec
) },
1348 [TCA_CBQ_RTAB
] = { .type
= NLA_BINARY
, .len
= TC_RTAB_SIZE
},
1349 [TCA_CBQ_POLICE
] = { .len
= sizeof(struct tc_cbq_police
) },
1352 static int cbq_init(struct Qdisc
*sch
, struct nlattr
*opt
)
1354 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1355 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1356 struct tc_ratespec
*r
;
1359 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1363 if (tb
[TCA_CBQ_RTAB
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
)
1366 r
= nla_data(tb
[TCA_CBQ_RATE
]);
1368 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
])) == NULL
)
1371 err
= qdisc_class_hash_init(&q
->clhash
);
1376 q
->link
.sibling
= &q
->link
;
1377 q
->link
.common
.classid
= sch
->handle
;
1378 q
->link
.qdisc
= sch
;
1379 q
->link
.q
= qdisc_create_dflt(sch
->dev_queue
, &pfifo_qdisc_ops
,
1382 q
->link
.q
= &noop_qdisc
;
1384 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1385 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1386 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1387 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1388 q
->link
.overlimit
= cbq_ovl_classic
;
1389 q
->link
.allot
= psched_mtu(qdisc_dev(sch
));
1390 q
->link
.quantum
= q
->link
.allot
;
1391 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1393 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1394 q
->link
.avpkt
= q
->link
.allot
/2;
1395 q
->link
.minidle
= -0x7FFFFFFF;
1397 qdisc_watchdog_init(&q
->watchdog
, sch
);
1398 hrtimer_init(&q
->delay_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
1399 q
->delay_timer
.function
= cbq_undelay
;
1400 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1401 q
->now
= psched_get_time();
1404 cbq_link_class(&q
->link
);
1406 if (tb
[TCA_CBQ_LSSOPT
])
1407 cbq_set_lss(&q
->link
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1409 cbq_addprio(q
, &q
->link
);
1413 qdisc_put_rtab(q
->link
.R_tab
);
1417 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1419 unsigned char *b
= skb_tail_pointer(skb
);
1421 NLA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1429 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1431 unsigned char *b
= skb_tail_pointer(skb
);
1432 struct tc_cbq_lssopt opt
;
1435 if (cl
->borrow
== NULL
)
1436 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1437 if (cl
->share
== NULL
)
1438 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1439 opt
.ewma_log
= cl
->ewma_log
;
1440 opt
.level
= cl
->level
;
1441 opt
.avpkt
= cl
->avpkt
;
1442 opt
.maxidle
= cl
->maxidle
;
1443 opt
.minidle
= (u32
)(-cl
->minidle
);
1444 opt
.offtime
= cl
->offtime
;
1446 NLA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1454 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1456 unsigned char *b
= skb_tail_pointer(skb
);
1457 struct tc_cbq_wrropt opt
;
1460 opt
.allot
= cl
->allot
;
1461 opt
.priority
= cl
->priority
+1;
1462 opt
.cpriority
= cl
->cpriority
+1;
1463 opt
.weight
= cl
->weight
;
1464 NLA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1472 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1474 unsigned char *b
= skb_tail_pointer(skb
);
1475 struct tc_cbq_ovl opt
;
1477 opt
.strategy
= cl
->ovl_strategy
;
1478 opt
.priority2
= cl
->priority2
+1;
1480 opt
.penalty
= cl
->penalty
;
1481 NLA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1489 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1491 unsigned char *b
= skb_tail_pointer(skb
);
1492 struct tc_cbq_fopt opt
;
1494 if (cl
->split
|| cl
->defmap
) {
1495 opt
.split
= cl
->split
? cl
->split
->common
.classid
: 0;
1496 opt
.defmap
= cl
->defmap
;
1498 NLA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1507 #ifdef CONFIG_NET_CLS_ACT
1508 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1510 unsigned char *b
= skb_tail_pointer(skb
);
1511 struct tc_cbq_police opt
;
1514 opt
.police
= cl
->police
;
1517 NLA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1527 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1529 if (cbq_dump_lss(skb
, cl
) < 0 ||
1530 cbq_dump_rate(skb
, cl
) < 0 ||
1531 cbq_dump_wrr(skb
, cl
) < 0 ||
1532 cbq_dump_ovl(skb
, cl
) < 0 ||
1533 #ifdef CONFIG_NET_CLS_ACT
1534 cbq_dump_police(skb
, cl
) < 0 ||
1536 cbq_dump_fopt(skb
, cl
) < 0)
1541 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1543 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1544 struct nlattr
*nest
;
1546 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1548 goto nla_put_failure
;
1549 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1550 goto nla_put_failure
;
1551 nla_nest_end(skb
, nest
);
1555 nla_nest_cancel(skb
, nest
);
1560 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1562 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1564 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1565 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1569 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1570 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1572 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1573 struct nlattr
*nest
;
1576 tcm
->tcm_parent
= cl
->tparent
->common
.classid
;
1578 tcm
->tcm_parent
= TC_H_ROOT
;
1579 tcm
->tcm_handle
= cl
->common
.classid
;
1580 tcm
->tcm_info
= cl
->q
->handle
;
1582 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1584 goto nla_put_failure
;
1585 if (cbq_dump_attr(skb
, cl
) < 0)
1586 goto nla_put_failure
;
1587 nla_nest_end(skb
, nest
);
1591 nla_nest_cancel(skb
, nest
);
1596 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1597 struct gnet_dump
*d
)
1599 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1600 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1602 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1603 cl
->xstats
.avgidle
= cl
->avgidle
;
1604 cl
->xstats
.undertime
= 0;
1606 if (cl
->undertime
!= PSCHED_PASTPERFECT
)
1607 cl
->xstats
.undertime
= cl
->undertime
- q
->now
;
1609 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1610 gnet_stats_copy_rate_est(d
, &cl
->bstats
, &cl
->rate_est
) < 0 ||
1611 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1614 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1617 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1620 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1623 new = qdisc_create_dflt(sch
->dev_queue
,
1624 &pfifo_qdisc_ops
, cl
->common
.classid
);
1628 #ifdef CONFIG_NET_CLS_ACT
1629 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1630 new->reshape_fail
= cbq_reshape_fail
;
1636 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1638 sch_tree_unlock(sch
);
1643 static struct Qdisc
*
1644 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1646 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1651 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1653 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1655 if (cl
->q
->q
.qlen
== 0)
1656 cbq_deactivate_class(cl
);
1659 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1661 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1662 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1666 return (unsigned long)cl
;
1671 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1673 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1675 WARN_ON(cl
->filters
);
1677 tcf_destroy_chain(&cl
->filter_list
);
1678 qdisc_destroy(cl
->q
);
1679 qdisc_put_rtab(cl
->R_tab
);
1680 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1686 cbq_destroy(struct Qdisc
* sch
)
1688 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1689 struct hlist_node
*n
, *next
;
1690 struct cbq_class
*cl
;
1693 #ifdef CONFIG_NET_CLS_ACT
1697 * Filters must be destroyed first because we don't destroy the
1698 * classes from root to leafs which means that filters can still
1699 * be bound to classes which have been destroyed already. --TGR '04
1701 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1702 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
)
1703 tcf_destroy_chain(&cl
->filter_list
);
1705 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1706 hlist_for_each_entry_safe(cl
, n
, next
, &q
->clhash
.hash
[h
],
1708 cbq_destroy_class(sch
, cl
);
1710 qdisc_class_hash_destroy(&q
->clhash
);
1713 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1715 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1717 if (--cl
->refcnt
== 0) {
1718 #ifdef CONFIG_NET_CLS_ACT
1719 spinlock_t
*root_lock
= qdisc_root_sleeping_lock(sch
);
1720 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1722 spin_lock_bh(root_lock
);
1723 if (q
->rx_class
== cl
)
1725 spin_unlock_bh(root_lock
);
1728 cbq_destroy_class(sch
, cl
);
1733 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct nlattr
**tca
,
1737 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1738 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1739 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
1740 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1741 struct cbq_class
*parent
;
1742 struct qdisc_rate_table
*rtab
= NULL
;
1747 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1755 cl
->tparent
->common
.classid
!= parentid
)
1757 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1761 if (tb
[TCA_CBQ_RATE
]) {
1762 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]),
1768 if (tca
[TCA_RATE
]) {
1769 err
= gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1770 qdisc_root_sleeping_lock(sch
),
1774 qdisc_put_rtab(rtab
);
1779 /* Change class parameters */
1782 if (cl
->next_alive
!= NULL
)
1783 cbq_deactivate_class(cl
);
1786 qdisc_put_rtab(cl
->R_tab
);
1790 if (tb
[TCA_CBQ_LSSOPT
])
1791 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1793 if (tb
[TCA_CBQ_WRROPT
]) {
1795 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1798 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1799 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1801 #ifdef CONFIG_NET_CLS_ACT
1802 if (tb
[TCA_CBQ_POLICE
])
1803 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1806 if (tb
[TCA_CBQ_FOPT
])
1807 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1810 cbq_activate_class(cl
);
1812 sch_tree_unlock(sch
);
1817 if (parentid
== TC_H_ROOT
)
1820 if (tb
[TCA_CBQ_WRROPT
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
||
1821 tb
[TCA_CBQ_LSSOPT
] == NULL
)
1824 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1830 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1834 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1836 for (i
=0; i
<0x8000; i
++) {
1837 if (++q
->hgenerator
>= 0x8000)
1839 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1845 classid
= classid
|q
->hgenerator
;
1850 parent
= cbq_class_lookup(q
, parentid
);
1857 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1861 if (tca
[TCA_RATE
]) {
1862 err
= gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1863 qdisc_root_sleeping_lock(sch
),
1874 cl
->q
= qdisc_create_dflt(sch
->dev_queue
, &pfifo_qdisc_ops
, classid
);
1876 cl
->q
= &noop_qdisc
;
1877 cl
->common
.classid
= classid
;
1878 cl
->tparent
= parent
;
1880 cl
->allot
= parent
->allot
;
1881 cl
->quantum
= cl
->allot
;
1882 cl
->weight
= cl
->R_tab
->rate
.rate
;
1886 cl
->borrow
= cl
->tparent
;
1887 if (cl
->tparent
!= &q
->link
)
1888 cl
->share
= cl
->tparent
;
1889 cbq_adjust_levels(parent
);
1890 cl
->minidle
= -0x7FFFFFFF;
1891 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1892 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1893 if (cl
->ewma_log
==0)
1894 cl
->ewma_log
= q
->link
.ewma_log
;
1896 cl
->maxidle
= q
->link
.maxidle
;
1898 cl
->avpkt
= q
->link
.avpkt
;
1899 cl
->overlimit
= cbq_ovl_classic
;
1900 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1901 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1902 #ifdef CONFIG_NET_CLS_ACT
1903 if (tb
[TCA_CBQ_POLICE
])
1904 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1906 if (tb
[TCA_CBQ_FOPT
])
1907 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1908 sch_tree_unlock(sch
);
1910 qdisc_class_hash_grow(sch
, &q
->clhash
);
1912 *arg
= (unsigned long)cl
;
1916 qdisc_put_rtab(rtab
);
1920 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1922 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1923 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1926 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1931 qlen
= cl
->q
->q
.qlen
;
1933 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
1936 cbq_deactivate_class(cl
);
1938 if (q
->tx_borrowed
== cl
)
1939 q
->tx_borrowed
= q
->tx_class
;
1940 if (q
->tx_class
== cl
) {
1942 q
->tx_borrowed
= NULL
;
1944 #ifdef CONFIG_NET_CLS_ACT
1945 if (q
->rx_class
== cl
)
1949 cbq_unlink_class(cl
);
1950 cbq_adjust_levels(cl
->tparent
);
1952 cbq_sync_defmap(cl
);
1955 sch_tree_unlock(sch
);
1957 BUG_ON(--cl
->refcnt
== 0);
1959 * This shouldn't happen: we "hold" one cops->get() when called
1960 * from tc_ctl_tclass; the destroy method is done from cops->put().
1966 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
1968 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1969 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1974 return &cl
->filter_list
;
1977 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
1980 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1981 struct cbq_class
*p
= (struct cbq_class
*)parent
;
1982 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1985 if (p
&& p
->level
<= cl
->level
)
1988 return (unsigned long)cl
;
1993 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
1995 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2000 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2002 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2003 struct cbq_class
*cl
;
2004 struct hlist_node
*n
;
2010 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
2011 hlist_for_each_entry(cl
, n
, &q
->clhash
.hash
[h
], common
.hnode
) {
2012 if (arg
->count
< arg
->skip
) {
2016 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2025 static const struct Qdisc_class_ops cbq_class_ops
= {
2028 .qlen_notify
= cbq_qlen_notify
,
2031 .change
= cbq_change_class
,
2032 .delete = cbq_delete
,
2034 .tcf_chain
= cbq_find_tcf
,
2035 .bind_tcf
= cbq_bind_filter
,
2036 .unbind_tcf
= cbq_unbind_filter
,
2037 .dump
= cbq_dump_class
,
2038 .dump_stats
= cbq_dump_class_stats
,
2041 static struct Qdisc_ops cbq_qdisc_ops __read_mostly
= {
2043 .cl_ops
= &cbq_class_ops
,
2045 .priv_size
= sizeof(struct cbq_sched_data
),
2046 .enqueue
= cbq_enqueue
,
2047 .dequeue
= cbq_dequeue
,
2048 .peek
= qdisc_peek_dequeued
,
2052 .destroy
= cbq_destroy
,
2055 .dump_stats
= cbq_dump_stats
,
2056 .owner
= THIS_MODULE
,
2059 static int __init
cbq_module_init(void)
2061 return register_qdisc(&cbq_qdisc_ops
);
2063 static void __exit
cbq_module_exit(void)
2065 unregister_qdisc(&cbq_qdisc_ops
);
2067 module_init(cbq_module_init
)
2068 module_exit(cbq_module_exit
)
2069 MODULE_LICENSE("GPL");