2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/if_ether.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/etherdevice.h>
31 #include <linux/notifier.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
36 #include <net/pkt_sched.h>
39 /* Class-Based Queueing (CBQ) algorithm.
40 =======================================
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43 Management Models for Packet Networks",
44 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
54 -----------------------------------------------------------------------
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
61 --- The WRR algorithm is different. Our version looks more
62 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
87 struct cbq_sched_data
;
92 struct cbq_class
*next
; /* hash table link */
93 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
97 unsigned char priority
; /* class priority */
98 unsigned char priority2
; /* priority to be used after overlimit */
99 unsigned char ewma_log
; /* time constant for idle time calculation */
100 unsigned char ovl_strategy
;
101 #ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police
;
107 /* Link-sharing scheduler parameters */
108 long maxidle
; /* Class parameters: see below. */
112 struct qdisc_rate_table
*R_tab
;
114 /* Overlimit strategy parameters */
115 void (*overlimit
)(struct cbq_class
*cl
);
118 /* General scheduler (WRR) parameters */
120 long quantum
; /* Allotment per WRR round */
121 long weight
; /* Relative allotment: see below */
123 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
124 struct cbq_class
*split
; /* Ptr to split node */
125 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
126 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
127 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
129 struct cbq_class
*sibling
; /* Sibling chain */
130 struct cbq_class
*children
; /* Pointer to children chain */
132 struct Qdisc
*q
; /* Elementary queueing discipline */
136 unsigned char cpriority
; /* Effective priority */
137 unsigned char delayed
;
138 unsigned char level
; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
143 psched_time_t last
; /* Last end of service */
144 psched_time_t undertime
;
146 long deficit
; /* Saved deficit for WRR */
147 unsigned long penalized
;
148 struct gnet_stats_basic bstats
;
149 struct gnet_stats_queue qstats
;
150 struct gnet_stats_rate_est rate_est
;
151 spinlock_t
*stats_lock
;
152 struct tc_cbq_xstats xstats
;
154 struct tcf_proto
*filter_list
;
159 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
162 struct cbq_sched_data
164 struct cbq_class
*classes
[16]; /* Hash table of all classes */
165 int nclasses
[TC_CBQ_MAXPRIO
+1];
166 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
168 struct cbq_class link
;
171 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
174 #ifdef CONFIG_NET_CLS_POLICE
175 struct cbq_class
*rx_class
;
177 struct cbq_class
*tx_class
;
178 struct cbq_class
*tx_borrowed
;
180 psched_time_t now
; /* Cached timestamp */
181 psched_time_t now_rt
; /* Cached real time */
184 struct timer_list delay_timer
;
185 struct timer_list wd_timer
; /* Watchdog timer,
195 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
198 static __inline__
unsigned cbq_hash(u32 h
)
205 static __inline__
struct cbq_class
*
206 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
208 struct cbq_class
*cl
;
210 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
211 if (cl
->classid
== classid
)
216 #ifdef CONFIG_NET_CLS_POLICE
218 static struct cbq_class
*
219 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
221 struct cbq_class
*cl
, *new;
223 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
224 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
232 /* Classify packet. The procedure is pretty complicated, but
233 it allows us to combine link sharing and priority scheduling
236 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237 so that it resolves to split nodes. Then packets are classified
238 by logical priority, or a more specific classifier may be attached
242 static struct cbq_class
*
243 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
245 struct cbq_sched_data
*q
= qdisc_priv(sch
);
246 struct cbq_class
*head
= &q
->link
;
247 struct cbq_class
**defmap
;
248 struct cbq_class
*cl
= NULL
;
249 u32 prio
= skb
->priority
;
250 struct tcf_result res
;
253 * Step 1. If skb->priority points to one of our classes, use it.
255 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
256 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
259 *qerr
= NET_XMIT_BYPASS
;
262 defmap
= head
->defaults
;
265 * Step 2+n. Apply classifier.
267 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
270 if ((cl
= (void*)res
.class) == NULL
) {
271 if (TC_H_MAJ(res
.classid
))
272 cl
= cbq_class_lookup(q
, res
.classid
);
273 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
274 cl
= defmap
[TC_PRIO_BESTEFFORT
];
276 if (cl
== NULL
|| cl
->level
>= head
->level
)
280 #ifdef CONFIG_NET_CLS_ACT
284 *qerr
= NET_XMIT_SUCCESS
;
288 #elif defined(CONFIG_NET_CLS_POLICE)
290 case TC_POLICE_RECLASSIFY
:
291 return cbq_reclassify(skb
, cl
);
302 * Step 3+n. If classifier selected a link sharing class,
303 * apply agency specific classifier.
304 * Repeat this procdure until we hit a leaf node.
313 * Step 4. No success...
315 if (TC_H_MAJ(prio
) == 0 &&
316 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
317 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
324 A packet has just been enqueued on the empty class.
325 cbq_activate_class adds it to the tail of active class list
326 of its priority band.
329 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
331 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
332 int prio
= cl
->cpriority
;
333 struct cbq_class
*cl_tail
;
335 cl_tail
= q
->active
[prio
];
336 q
->active
[prio
] = cl
;
338 if (cl_tail
!= NULL
) {
339 cl
->next_alive
= cl_tail
->next_alive
;
340 cl_tail
->next_alive
= cl
;
343 q
->activemask
|= (1<<prio
);
348 Unlink class from active chain.
349 Note that this same procedure is done directly in cbq_dequeue*
350 during round-robin procedure.
353 static void cbq_deactivate_class(struct cbq_class
*this)
355 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
356 int prio
= this->cpriority
;
357 struct cbq_class
*cl
;
358 struct cbq_class
*cl_prev
= q
->active
[prio
];
361 cl
= cl_prev
->next_alive
;
363 cl_prev
->next_alive
= cl
->next_alive
;
364 cl
->next_alive
= NULL
;
366 if (cl
== q
->active
[prio
]) {
367 q
->active
[prio
] = cl_prev
;
368 if (cl
== q
->active
[prio
]) {
369 q
->active
[prio
] = NULL
;
370 q
->activemask
&= ~(1<<prio
);
375 cl
= cl_prev
->next_alive
;
378 } while ((cl_prev
= cl
) != q
->active
[prio
]);
382 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
384 int toplevel
= q
->toplevel
;
386 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
390 PSCHED_GET_TIME(now
);
391 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
392 PSCHED_TADD2(q
->now
, incr
, now
);
395 if (PSCHED_TLESS(cl
->undertime
, now
)) {
396 q
->toplevel
= cl
->level
;
399 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
404 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
406 struct cbq_sched_data
*q
= qdisc_priv(sch
);
409 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
411 #ifdef CONFIG_NET_CLS_POLICE
415 if (ret
== NET_XMIT_BYPASS
)
421 #ifdef CONFIG_NET_CLS_POLICE
422 cl
->q
->__parent
= sch
;
424 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == NET_XMIT_SUCCESS
) {
426 sch
->bstats
.packets
++;
427 sch
->bstats
.bytes
+=len
;
428 cbq_mark_toplevel(q
, cl
);
430 cbq_activate_class(cl
);
435 cbq_mark_toplevel(q
, cl
);
441 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
443 struct cbq_sched_data
*q
= qdisc_priv(sch
);
444 struct cbq_class
*cl
;
447 if ((cl
= q
->tx_class
) == NULL
) {
454 cbq_mark_toplevel(q
, cl
);
456 #ifdef CONFIG_NET_CLS_POLICE
458 cl
->q
->__parent
= sch
;
460 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
462 sch
->qstats
.requeues
++;
464 cbq_activate_class(cl
);
472 /* Overlimit actions */
474 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
476 static void cbq_ovl_classic(struct cbq_class
*cl
)
478 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
479 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
482 delay
+= cl
->offtime
;
485 Class goes to sleep, so that it will have no
486 chance to work avgidle. Let's forgive it 8)
488 BTW cbq-2.0 has a crap in this
489 place, apparently they forgot to shift it by cl->ewma_log.
492 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
493 if (cl
->avgidle
< cl
->minidle
)
494 cl
->avgidle
= cl
->minidle
;
497 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
499 cl
->xstats
.overactions
++;
502 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
503 q
->wd_expires
= delay
;
505 /* Dirty work! We must schedule wakeups based on
506 real available rate, rather than leaf rate,
507 which may be tiny (even zero).
509 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
511 psched_tdiff_t base_delay
= q
->wd_expires
;
513 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
514 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
515 if (delay
< base_delay
) {
522 q
->wd_expires
= base_delay
;
526 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
530 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
532 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
533 struct cbq_class
*this = cl
;
536 if (cl
->level
> q
->toplevel
) {
540 } while ((cl
= cl
->borrow
) != NULL
);
547 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
549 static void cbq_ovl_delay(struct cbq_class
*cl
)
551 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
552 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
555 unsigned long sched
= jiffies
;
557 delay
+= cl
->offtime
;
559 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
560 if (cl
->avgidle
< cl
->minidle
)
561 cl
->avgidle
= cl
->minidle
;
562 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
565 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
566 cl
->penalized
= sched
;
567 cl
->cpriority
= TC_CBQ_MAXPRIO
;
568 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
569 if (del_timer(&q
->delay_timer
) &&
570 (long)(q
->delay_timer
.expires
- sched
) > 0)
571 q
->delay_timer
.expires
= sched
;
572 add_timer(&q
->delay_timer
);
574 cl
->xstats
.overactions
++;
579 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
580 q
->wd_expires
= delay
;
583 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
585 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
587 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
589 cl
->penalized
= jiffies
+ cl
->penalty
;
591 if (cl
->cpriority
!= cl
->priority2
) {
592 cl
->cpriority
= cl
->priority2
;
593 q
->pmask
|= (1<<cl
->cpriority
);
594 cl
->xstats
.overactions
++;
599 /* TC_CBQ_OVL_DROP: penalize class by dropping */
601 static void cbq_ovl_drop(struct cbq_class
*cl
)
603 if (cl
->q
->ops
->drop
)
604 if (cl
->q
->ops
->drop(cl
->q
))
606 cl
->xstats
.overactions
++;
610 static void cbq_watchdog(unsigned long arg
)
612 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
614 sch
->flags
&= ~TCQ_F_THROTTLED
;
615 netif_schedule(sch
->dev
);
618 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
620 struct cbq_class
*cl
;
621 struct cbq_class
*cl_prev
= q
->active
[prio
];
622 unsigned long now
= jiffies
;
623 unsigned long sched
= now
;
629 cl
= cl_prev
->next_alive
;
630 if ((long)(now
- cl
->penalized
) > 0) {
631 cl_prev
->next_alive
= cl
->next_alive
;
632 cl
->next_alive
= NULL
;
633 cl
->cpriority
= cl
->priority
;
635 cbq_activate_class(cl
);
637 if (cl
== q
->active
[prio
]) {
638 q
->active
[prio
] = cl_prev
;
639 if (cl
== q
->active
[prio
]) {
640 q
->active
[prio
] = NULL
;
645 cl
= cl_prev
->next_alive
;
646 } else if ((long)(sched
- cl
->penalized
) > 0)
647 sched
= cl
->penalized
;
648 } while ((cl_prev
= cl
) != q
->active
[prio
]);
650 return (long)(sched
- now
);
653 static void cbq_undelay(unsigned long arg
)
655 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
656 struct cbq_sched_data
*q
= qdisc_priv(sch
);
664 int prio
= ffz(~pmask
);
669 tmp
= cbq_undelay_prio(q
, prio
);
672 if (tmp
< delay
|| delay
== 0)
678 q
->delay_timer
.expires
= jiffies
+ delay
;
679 add_timer(&q
->delay_timer
);
682 sch
->flags
&= ~TCQ_F_THROTTLED
;
683 netif_schedule(sch
->dev
);
687 #ifdef CONFIG_NET_CLS_POLICE
689 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
692 struct Qdisc
*sch
= child
->__parent
;
693 struct cbq_sched_data
*q
= qdisc_priv(sch
);
694 struct cbq_class
*cl
= q
->rx_class
;
698 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
700 cbq_mark_toplevel(q
, cl
);
703 cl
->q
->__parent
= sch
;
705 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
707 sch
->bstats
.packets
++;
708 sch
->bstats
.bytes
+=len
;
710 cbq_activate_class(cl
);
723 It is mission critical procedure.
725 We "regenerate" toplevel cutoff, if transmitting class
726 has backlog and it is not regulated. It is not part of
727 original CBQ description, but looks more reasonable.
728 Probably, it is wrong. This question needs further investigation.
731 static __inline__
void
732 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
733 struct cbq_class
*borrowed
)
735 if (cl
&& q
->toplevel
>= borrowed
->level
) {
736 if (cl
->q
->q
.qlen
> 1) {
738 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
739 q
->toplevel
= borrowed
->level
;
742 } while ((borrowed
=borrowed
->borrow
) != NULL
);
745 /* It is not necessary now. Uncommenting it
746 will save CPU cycles, but decrease fairness.
748 q
->toplevel
= TC_CBQ_MAXLEVEL
;
754 cbq_update(struct cbq_sched_data
*q
)
756 struct cbq_class
*this = q
->tx_class
;
757 struct cbq_class
*cl
= this;
762 for ( ; cl
; cl
= cl
->share
) {
763 long avgidle
= cl
->avgidle
;
766 cl
->bstats
.packets
++;
767 cl
->bstats
.bytes
+= len
;
770 (now - last) is total time between packet right edges.
771 (last_pktlen/rate) is "virtual" busy time, so that
773 idle = (now - last) - last_pktlen/rate
776 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
777 if ((unsigned long)idle
> 128*1024*1024) {
778 avgidle
= cl
->maxidle
;
780 idle
-= L2T(cl
, len
);
782 /* true_avgidle := (1-W)*true_avgidle + W*idle,
783 where W=2^{-ewma_log}. But cl->avgidle is scaled:
784 cl->avgidle == true_avgidle/W,
787 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
791 /* Overlimit or at-limit */
793 if (avgidle
< cl
->minidle
)
794 avgidle
= cl
->minidle
;
796 cl
->avgidle
= avgidle
;
798 /* Calculate expected time, when this class
799 will be allowed to send.
801 (1-W)*true_avgidle + W*delay = 0, i.e.
802 idle = (1/W - 1)*(-true_avgidle)
804 idle = (1 - W)*(-cl->avgidle);
806 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
810 To maintain the rate allocated to the class,
811 we add to undertime virtual clock,
812 necessary to complete transmitted packet.
813 (len/phys_bandwidth has been already passed
814 to the moment of cbq_update)
817 idle
-= L2T(&q
->link
, len
);
818 idle
+= L2T(cl
, len
);
820 PSCHED_AUDIT_TDIFF(idle
);
822 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
826 PSCHED_SET_PASTPERFECT(cl
->undertime
);
827 if (avgidle
> cl
->maxidle
)
828 cl
->avgidle
= cl
->maxidle
;
830 cl
->avgidle
= avgidle
;
835 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
838 static __inline__
struct cbq_class
*
839 cbq_under_limit(struct cbq_class
*cl
)
841 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
842 struct cbq_class
*this_cl
= cl
;
844 if (cl
->tparent
== NULL
)
847 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
848 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
854 /* It is very suspicious place. Now overlimit
855 action is generated for not bounded classes
856 only if link is completely congested.
857 Though it is in agree with ancestor-only paradigm,
858 it looks very stupid. Particularly,
859 it means that this chunk of code will either
860 never be called or result in strong amplification
861 of burstiness. Dangerous, silly, and, however,
862 no another solution exists.
864 if ((cl
= cl
->borrow
) == NULL
) {
865 this_cl
->qstats
.overlimits
++;
866 this_cl
->overlimit(this_cl
);
869 if (cl
->level
> q
->toplevel
)
871 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
872 PSCHED_TLESS(q
->now
, cl
->undertime
));
878 static __inline__
struct sk_buff
*
879 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
881 struct cbq_sched_data
*q
= qdisc_priv(sch
);
882 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
886 cl_tail
= cl_prev
= q
->active
[prio
];
887 cl
= cl_prev
->next_alive
;
894 struct cbq_class
*borrow
= cl
;
897 (borrow
= cbq_under_limit(cl
)) == NULL
)
900 if (cl
->deficit
<= 0) {
901 /* Class exhausted its allotment per
902 this round. Switch to the next one.
905 cl
->deficit
+= cl
->quantum
;
909 skb
= cl
->q
->dequeue(cl
->q
);
911 /* Class did not give us any skb :-(
912 It could occur even if cl->q->q.qlen != 0
913 f.e. if cl->q == "tbf"
918 cl
->deficit
-= skb
->len
;
920 q
->tx_borrowed
= borrow
;
922 #ifndef CBQ_XSTATS_BORROWS_BYTES
923 borrow
->xstats
.borrows
++;
924 cl
->xstats
.borrows
++;
926 borrow
->xstats
.borrows
+= skb
->len
;
927 cl
->xstats
.borrows
+= skb
->len
;
930 q
->tx_len
= skb
->len
;
932 if (cl
->deficit
<= 0) {
933 q
->active
[prio
] = cl
;
935 cl
->deficit
+= cl
->quantum
;
940 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
941 /* Class is empty or penalized.
942 Unlink it from active chain.
944 cl_prev
->next_alive
= cl
->next_alive
;
945 cl
->next_alive
= NULL
;
947 /* Did cl_tail point to it? */
952 /* Was it the last class in this band? */
955 q
->active
[prio
] = NULL
;
956 q
->activemask
&= ~(1<<prio
);
958 cbq_activate_class(cl
);
962 q
->active
[prio
] = cl_tail
;
965 cbq_activate_class(cl
);
973 } while (cl_prev
!= cl_tail
);
976 q
->active
[prio
] = cl_prev
;
981 static __inline__
struct sk_buff
*
982 cbq_dequeue_1(struct Qdisc
*sch
)
984 struct cbq_sched_data
*q
= qdisc_priv(sch
);
988 activemask
= q
->activemask
&0xFF;
990 int prio
= ffz(~activemask
);
991 activemask
&= ~(1<<prio
);
992 skb
= cbq_dequeue_prio(sch
, prio
);
999 static struct sk_buff
*
1000 cbq_dequeue(struct Qdisc
*sch
)
1002 struct sk_buff
*skb
;
1003 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1005 psched_tdiff_t incr
;
1007 PSCHED_GET_TIME(now
);
1008 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
1011 psched_tdiff_t incr2
;
1012 /* Time integrator. We calculate EOS time
1013 by adding expected packet transmission time.
1014 If real time is greater, we warp artificial clock,
1017 cbq_time = max(real_time, work);
1019 incr2
= L2T(&q
->link
, q
->tx_len
);
1020 PSCHED_TADD(q
->now
, incr2
);
1022 if ((incr
-= incr2
) < 0)
1025 PSCHED_TADD(q
->now
, incr
);
1031 skb
= cbq_dequeue_1(sch
);
1034 sch
->flags
&= ~TCQ_F_THROTTLED
;
1038 /* All the classes are overlimit.
1042 1. Scheduler is empty.
1043 2. Toplevel cutoff inhibited borrowing.
1044 3. Root class is overlimit.
1046 Reset 2d and 3d conditions and retry.
1048 Note, that NS and cbq-2.0 are buggy, peeking
1049 an arbitrary class is appropriate for ancestor-only
1050 sharing, but not for toplevel algorithm.
1052 Our version is better, but slower, because it requires
1053 two passes, but it is unavoidable with top-level sharing.
1056 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1057 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1060 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1061 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1064 /* No packets in scheduler or nobody wants to give them to us :-(
1065 Sigh... start watchdog timer in the last case. */
1068 sch
->qstats
.overlimits
++;
1069 if (q
->wd_expires
) {
1070 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1073 mod_timer(&q
->wd_timer
, jiffies
+ delay
);
1074 sch
->flags
|= TCQ_F_THROTTLED
;
1080 /* CBQ class maintanance routines */
1082 static void cbq_adjust_levels(struct cbq_class
*this)
1089 struct cbq_class
*cl
;
1091 if ((cl
= this->children
) != NULL
) {
1093 if (cl
->level
> level
)
1095 } while ((cl
= cl
->sibling
) != this->children
);
1097 this->level
= level
+1;
1098 } while ((this = this->tparent
) != NULL
);
1101 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1103 struct cbq_class
*cl
;
1106 if (q
->quanta
[prio
] == 0)
1109 for (h
=0; h
<16; h
++) {
1110 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1111 /* BUGGGG... Beware! This expression suffer of
1112 arithmetic overflows!
1114 if (cl
->priority
== prio
) {
1115 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1118 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1119 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1120 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1126 static void cbq_sync_defmap(struct cbq_class
*cl
)
1128 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1129 struct cbq_class
*split
= cl
->split
;
1136 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1137 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1138 split
->defaults
[i
] = NULL
;
1141 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1142 int level
= split
->level
;
1144 if (split
->defaults
[i
])
1147 for (h
=0; h
<16; h
++) {
1148 struct cbq_class
*c
;
1150 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1151 if (c
->split
== split
&& c
->level
< level
&&
1153 split
->defaults
[i
] = c
;
1161 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1163 struct cbq_class
*split
= NULL
;
1166 if ((split
= cl
->split
) == NULL
)
1168 splitid
= split
->classid
;
1171 if (split
== NULL
|| split
->classid
!= splitid
) {
1172 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1173 if (split
->classid
== splitid
)
1180 if (cl
->split
!= split
) {
1182 cbq_sync_defmap(cl
);
1184 cl
->defmap
= def
&mask
;
1186 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1188 cbq_sync_defmap(cl
);
1191 static void cbq_unlink_class(struct cbq_class
*this)
1193 struct cbq_class
*cl
, **clp
;
1194 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1196 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1204 if (this->tparent
) {
1213 } while ((cl
= *clp
) != this->sibling
);
1215 if (this->tparent
->children
== this) {
1216 this->tparent
->children
= this->sibling
;
1217 if (this->sibling
== this)
1218 this->tparent
->children
= NULL
;
1221 BUG_TRAP(this->sibling
== this);
1225 static void cbq_link_class(struct cbq_class
*this)
1227 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1228 unsigned h
= cbq_hash(this->classid
);
1229 struct cbq_class
*parent
= this->tparent
;
1231 this->sibling
= this;
1232 this->next
= q
->classes
[h
];
1233 q
->classes
[h
] = this;
1238 if (parent
->children
== NULL
) {
1239 parent
->children
= this;
1241 this->sibling
= parent
->children
->sibling
;
1242 parent
->children
->sibling
= this;
1246 static unsigned int cbq_drop(struct Qdisc
* sch
)
1248 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1249 struct cbq_class
*cl
, *cl_head
;
1253 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1254 if ((cl_head
= q
->active
[prio
]) == NULL
)
1259 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1263 } while ((cl
= cl
->next_alive
) != cl_head
);
1269 cbq_reset(struct Qdisc
* sch
)
1271 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1272 struct cbq_class
*cl
;
1279 q
->tx_borrowed
= NULL
;
1280 del_timer(&q
->wd_timer
);
1281 del_timer(&q
->delay_timer
);
1282 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1283 PSCHED_GET_TIME(q
->now
);
1286 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1287 q
->active
[prio
] = NULL
;
1289 for (h
= 0; h
< 16; h
++) {
1290 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1293 cl
->next_alive
= NULL
;
1294 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1295 cl
->avgidle
= cl
->maxidle
;
1296 cl
->deficit
= cl
->quantum
;
1297 cl
->cpriority
= cl
->priority
;
1304 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1306 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1307 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1308 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1310 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1311 cl
->ewma_log
= lss
->ewma_log
;
1312 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1313 cl
->avpkt
= lss
->avpkt
;
1314 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1315 cl
->minidle
= -(long)lss
->minidle
;
1316 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1317 cl
->maxidle
= lss
->maxidle
;
1318 cl
->avgidle
= lss
->maxidle
;
1320 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1321 cl
->offtime
= lss
->offtime
;
1325 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1327 q
->nclasses
[cl
->priority
]--;
1328 q
->quanta
[cl
->priority
] -= cl
->weight
;
1329 cbq_normalize_quanta(q
, cl
->priority
);
1332 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1334 q
->nclasses
[cl
->priority
]++;
1335 q
->quanta
[cl
->priority
] += cl
->weight
;
1336 cbq_normalize_quanta(q
, cl
->priority
);
1339 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1341 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1344 cl
->allot
= wrr
->allot
;
1346 cl
->weight
= wrr
->weight
;
1347 if (wrr
->priority
) {
1348 cl
->priority
= wrr
->priority
-1;
1349 cl
->cpriority
= cl
->priority
;
1350 if (cl
->priority
>= cl
->priority2
)
1351 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1358 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1360 switch (ovl
->strategy
) {
1361 case TC_CBQ_OVL_CLASSIC
:
1362 cl
->overlimit
= cbq_ovl_classic
;
1364 case TC_CBQ_OVL_DELAY
:
1365 cl
->overlimit
= cbq_ovl_delay
;
1367 case TC_CBQ_OVL_LOWPRIO
:
1368 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1369 ovl
->priority2
-1 <= cl
->priority
)
1371 cl
->priority2
= ovl
->priority2
-1;
1372 cl
->overlimit
= cbq_ovl_lowprio
;
1374 case TC_CBQ_OVL_DROP
:
1375 cl
->overlimit
= cbq_ovl_drop
;
1377 case TC_CBQ_OVL_RCLASSIC
:
1378 cl
->overlimit
= cbq_ovl_rclassic
;
1383 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1390 cl
->police
= p
->police
;
1392 if (cl
->q
->handle
) {
1393 if (p
->police
== TC_POLICE_RECLASSIFY
)
1394 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1396 cl
->q
->reshape_fail
= NULL
;
1402 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1404 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1408 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1410 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1411 struct rtattr
*tb
[TCA_CBQ_MAX
];
1412 struct tc_ratespec
*r
;
1414 if (rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
) < 0 ||
1415 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1416 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1419 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1420 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1423 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1425 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
)
1429 q
->link
.sibling
= &q
->link
;
1430 q
->link
.classid
= sch
->handle
;
1431 q
->link
.qdisc
= sch
;
1432 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1434 q
->link
.q
= &noop_qdisc
;
1436 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1437 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1438 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1439 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1440 q
->link
.overlimit
= cbq_ovl_classic
;
1441 q
->link
.allot
= psched_mtu(sch
->dev
);
1442 q
->link
.quantum
= q
->link
.allot
;
1443 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1445 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1446 q
->link
.avpkt
= q
->link
.allot
/2;
1447 q
->link
.minidle
= -0x7FFFFFFF;
1448 q
->link
.stats_lock
= &sch
->dev
->queue_lock
;
1450 init_timer(&q
->wd_timer
);
1451 q
->wd_timer
.data
= (unsigned long)sch
;
1452 q
->wd_timer
.function
= cbq_watchdog
;
1453 init_timer(&q
->delay_timer
);
1454 q
->delay_timer
.data
= (unsigned long)sch
;
1455 q
->delay_timer
.function
= cbq_undelay
;
1456 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1457 PSCHED_GET_TIME(q
->now
);
1460 cbq_link_class(&q
->link
);
1462 if (tb
[TCA_CBQ_LSSOPT
-1])
1463 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1465 cbq_addprio(q
, &q
->link
);
1469 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1471 unsigned char *b
= skb
->tail
;
1473 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1477 skb_trim(skb
, b
- skb
->data
);
1481 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1483 unsigned char *b
= skb
->tail
;
1484 struct tc_cbq_lssopt opt
;
1487 if (cl
->borrow
== NULL
)
1488 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1489 if (cl
->share
== NULL
)
1490 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1491 opt
.ewma_log
= cl
->ewma_log
;
1492 opt
.level
= cl
->level
;
1493 opt
.avpkt
= cl
->avpkt
;
1494 opt
.maxidle
= cl
->maxidle
;
1495 opt
.minidle
= (u32
)(-cl
->minidle
);
1496 opt
.offtime
= cl
->offtime
;
1498 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1502 skb_trim(skb
, b
- skb
->data
);
1506 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1508 unsigned char *b
= skb
->tail
;
1509 struct tc_cbq_wrropt opt
;
1512 opt
.allot
= cl
->allot
;
1513 opt
.priority
= cl
->priority
+1;
1514 opt
.cpriority
= cl
->cpriority
+1;
1515 opt
.weight
= cl
->weight
;
1516 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1520 skb_trim(skb
, b
- skb
->data
);
1524 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1526 unsigned char *b
= skb
->tail
;
1527 struct tc_cbq_ovl opt
;
1529 opt
.strategy
= cl
->ovl_strategy
;
1530 opt
.priority2
= cl
->priority2
+1;
1532 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1533 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1537 skb_trim(skb
, b
- skb
->data
);
1541 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1543 unsigned char *b
= skb
->tail
;
1544 struct tc_cbq_fopt opt
;
1546 if (cl
->split
|| cl
->defmap
) {
1547 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1548 opt
.defmap
= cl
->defmap
;
1550 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1555 skb_trim(skb
, b
- skb
->data
);
1559 #ifdef CONFIG_NET_CLS_POLICE
1560 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1562 unsigned char *b
= skb
->tail
;
1563 struct tc_cbq_police opt
;
1566 opt
.police
= cl
->police
;
1569 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1574 skb_trim(skb
, b
- skb
->data
);
1579 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1581 if (cbq_dump_lss(skb
, cl
) < 0 ||
1582 cbq_dump_rate(skb
, cl
) < 0 ||
1583 cbq_dump_wrr(skb
, cl
) < 0 ||
1584 cbq_dump_ovl(skb
, cl
) < 0 ||
1585 #ifdef CONFIG_NET_CLS_POLICE
1586 cbq_dump_police(skb
, cl
) < 0 ||
1588 cbq_dump_fopt(skb
, cl
) < 0)
1593 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1595 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1596 unsigned char *b
= skb
->tail
;
1599 rta
= (struct rtattr
*)b
;
1600 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1601 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1602 goto rtattr_failure
;
1603 rta
->rta_len
= skb
->tail
- b
;
1607 skb_trim(skb
, b
- skb
->data
);
1612 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1614 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1616 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1617 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1621 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1622 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1624 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1625 unsigned char *b
= skb
->tail
;
1629 tcm
->tcm_parent
= cl
->tparent
->classid
;
1631 tcm
->tcm_parent
= TC_H_ROOT
;
1632 tcm
->tcm_handle
= cl
->classid
;
1633 tcm
->tcm_info
= cl
->q
->handle
;
1635 rta
= (struct rtattr
*)b
;
1636 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1637 if (cbq_dump_attr(skb
, cl
) < 0)
1638 goto rtattr_failure
;
1639 rta
->rta_len
= skb
->tail
- b
;
1643 skb_trim(skb
, b
- skb
->data
);
1648 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1649 struct gnet_dump
*d
)
1651 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1652 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1654 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1655 cl
->xstats
.avgidle
= cl
->avgidle
;
1656 cl
->xstats
.undertime
= 0;
1658 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1659 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1661 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1662 #ifdef CONFIG_NET_ESTIMATOR
1663 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1665 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1668 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1671 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1674 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1678 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1679 cl
->classid
)) == NULL
)
1682 #ifdef CONFIG_NET_CLS_POLICE
1683 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1684 new->reshape_fail
= cbq_reshape_fail
;
1690 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1692 sch_tree_unlock(sch
);
1699 static struct Qdisc
*
1700 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1702 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1704 return cl
? cl
->q
: NULL
;
1707 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1709 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1710 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1714 return (unsigned long)cl
;
1719 static void cbq_destroy_filters(struct cbq_class
*cl
)
1721 struct tcf_proto
*tp
;
1723 while ((tp
= cl
->filter_list
) != NULL
) {
1724 cl
->filter_list
= tp
->next
;
1729 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1731 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1733 BUG_TRAP(!cl
->filters
);
1735 cbq_destroy_filters(cl
);
1736 qdisc_destroy(cl
->q
);
1737 qdisc_put_rtab(cl
->R_tab
);
1738 #ifdef CONFIG_NET_ESTIMATOR
1739 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1746 cbq_destroy(struct Qdisc
* sch
)
1748 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1749 struct cbq_class
*cl
;
1752 #ifdef CONFIG_NET_CLS_POLICE
1756 * Filters must be destroyed first because we don't destroy the
1757 * classes from root to leafs which means that filters can still
1758 * be bound to classes which have been destroyed already. --TGR '04
1760 for (h
= 0; h
< 16; h
++)
1761 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1762 cbq_destroy_filters(cl
);
1764 for (h
= 0; h
< 16; h
++) {
1765 struct cbq_class
*next
;
1767 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1769 cbq_destroy_class(sch
, cl
);
1774 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1776 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1778 if (--cl
->refcnt
== 0) {
1779 #ifdef CONFIG_NET_CLS_POLICE
1780 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1782 spin_lock_bh(&sch
->dev
->queue_lock
);
1783 if (q
->rx_class
== cl
)
1785 spin_unlock_bh(&sch
->dev
->queue_lock
);
1788 cbq_destroy_class(sch
, cl
);
1793 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1797 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1798 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1799 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1800 struct rtattr
*tb
[TCA_CBQ_MAX
];
1801 struct cbq_class
*parent
;
1802 struct qdisc_rate_table
*rtab
= NULL
;
1804 if (opt
==NULL
|| rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
))
1807 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1808 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1811 if (tb
[TCA_CBQ_FOPT
-1] &&
1812 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1815 if (tb
[TCA_CBQ_RATE
-1] &&
1816 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1819 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1820 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1823 if (tb
[TCA_CBQ_WRROPT
-1] &&
1824 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1827 #ifdef CONFIG_NET_CLS_POLICE
1828 if (tb
[TCA_CBQ_POLICE
-1] &&
1829 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1836 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1838 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1842 if (tb
[TCA_CBQ_RATE
-1]) {
1843 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1848 /* Change class parameters */
1851 if (cl
->next_alive
!= NULL
)
1852 cbq_deactivate_class(cl
);
1855 rtab
= xchg(&cl
->R_tab
, rtab
);
1856 qdisc_put_rtab(rtab
);
1859 if (tb
[TCA_CBQ_LSSOPT
-1])
1860 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1862 if (tb
[TCA_CBQ_WRROPT
-1]) {
1864 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1867 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1868 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1870 #ifdef CONFIG_NET_CLS_POLICE
1871 if (tb
[TCA_CBQ_POLICE
-1])
1872 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1875 if (tb
[TCA_CBQ_FOPT
-1])
1876 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1879 cbq_activate_class(cl
);
1881 sch_tree_unlock(sch
);
1883 #ifdef CONFIG_NET_ESTIMATOR
1884 if (tca
[TCA_RATE
-1])
1885 gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1886 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1891 if (parentid
== TC_H_ROOT
)
1894 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1895 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1898 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1904 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1908 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1910 for (i
=0; i
<0x8000; i
++) {
1911 if (++q
->hgenerator
>= 0x8000)
1913 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1919 classid
= classid
|q
->hgenerator
;
1924 parent
= cbq_class_lookup(q
, parentid
);
1931 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1937 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
, classid
)))
1938 cl
->q
= &noop_qdisc
;
1939 cl
->classid
= classid
;
1940 cl
->tparent
= parent
;
1942 cl
->allot
= parent
->allot
;
1943 cl
->quantum
= cl
->allot
;
1944 cl
->weight
= cl
->R_tab
->rate
.rate
;
1945 cl
->stats_lock
= &sch
->dev
->queue_lock
;
1949 cl
->borrow
= cl
->tparent
;
1950 if (cl
->tparent
!= &q
->link
)
1951 cl
->share
= cl
->tparent
;
1952 cbq_adjust_levels(parent
);
1953 cl
->minidle
= -0x7FFFFFFF;
1954 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1955 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1956 if (cl
->ewma_log
==0)
1957 cl
->ewma_log
= q
->link
.ewma_log
;
1959 cl
->maxidle
= q
->link
.maxidle
;
1961 cl
->avpkt
= q
->link
.avpkt
;
1962 cl
->overlimit
= cbq_ovl_classic
;
1963 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1964 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1965 #ifdef CONFIG_NET_CLS_POLICE
1966 if (tb
[TCA_CBQ_POLICE
-1])
1967 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1969 if (tb
[TCA_CBQ_FOPT
-1])
1970 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1971 sch_tree_unlock(sch
);
1973 #ifdef CONFIG_NET_ESTIMATOR
1974 if (tca
[TCA_RATE
-1])
1975 gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1976 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1979 *arg
= (unsigned long)cl
;
1983 qdisc_put_rtab(rtab
);
1987 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1989 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1990 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1992 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1998 cbq_deactivate_class(cl
);
2000 if (q
->tx_borrowed
== cl
)
2001 q
->tx_borrowed
= q
->tx_class
;
2002 if (q
->tx_class
== cl
) {
2004 q
->tx_borrowed
= NULL
;
2006 #ifdef CONFIG_NET_CLS_POLICE
2007 if (q
->rx_class
== cl
)
2011 cbq_unlink_class(cl
);
2012 cbq_adjust_levels(cl
->tparent
);
2014 cbq_sync_defmap(cl
);
2017 sch_tree_unlock(sch
);
2019 if (--cl
->refcnt
== 0)
2020 cbq_destroy_class(sch
, cl
);
2025 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2027 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2028 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2033 return &cl
->filter_list
;
2036 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2039 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2040 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2041 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2044 if (p
&& p
->level
<= cl
->level
)
2047 return (unsigned long)cl
;
2052 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2054 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2059 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2061 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2067 for (h
= 0; h
< 16; h
++) {
2068 struct cbq_class
*cl
;
2070 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2071 if (arg
->count
< arg
->skip
) {
2075 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2084 static struct Qdisc_class_ops cbq_class_ops
= {
2089 .change
= cbq_change_class
,
2090 .delete = cbq_delete
,
2092 .tcf_chain
= cbq_find_tcf
,
2093 .bind_tcf
= cbq_bind_filter
,
2094 .unbind_tcf
= cbq_unbind_filter
,
2095 .dump
= cbq_dump_class
,
2096 .dump_stats
= cbq_dump_class_stats
,
2099 static struct Qdisc_ops cbq_qdisc_ops
= {
2101 .cl_ops
= &cbq_class_ops
,
2103 .priv_size
= sizeof(struct cbq_sched_data
),
2104 .enqueue
= cbq_enqueue
,
2105 .dequeue
= cbq_dequeue
,
2106 .requeue
= cbq_requeue
,
2110 .destroy
= cbq_destroy
,
2113 .dump_stats
= cbq_dump_stats
,
2114 .owner
= THIS_MODULE
,
2117 static int __init
cbq_module_init(void)
2119 return register_qdisc(&cbq_qdisc_ops
);
2121 static void __exit
cbq_module_exit(void)
2123 unregister_qdisc(&cbq_qdisc_ops
);
2125 module_init(cbq_module_init
)
2126 module_exit(cbq_module_exit
)
2127 MODULE_LICENSE("GPL");