2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/if_ether.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/etherdevice.h>
31 #include <linux/notifier.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
36 #include <net/pkt_sched.h>
39 /* Class-Based Queueing (CBQ) algorithm.
40 =======================================
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43 Management Models for Packet Networks",
44 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
54 -----------------------------------------------------------------------
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
61 --- The WRR algorithm is different. Our version looks more
62 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
87 struct cbq_sched_data
;
92 struct cbq_class
*next
; /* hash table link */
93 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
97 unsigned char priority
; /* class priority */
98 unsigned char priority2
; /* priority to be used after overlimit */
99 unsigned char ewma_log
; /* time constant for idle time calculation */
100 unsigned char ovl_strategy
;
101 #ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police
;
107 /* Link-sharing scheduler parameters */
108 long maxidle
; /* Class parameters: see below. */
112 struct qdisc_rate_table
*R_tab
;
114 /* Overlimit strategy parameters */
115 void (*overlimit
)(struct cbq_class
*cl
);
118 /* General scheduler (WRR) parameters */
120 long quantum
; /* Allotment per WRR round */
121 long weight
; /* Relative allotment: see below */
123 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
124 struct cbq_class
*split
; /* Ptr to split node */
125 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
126 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
127 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
129 struct cbq_class
*sibling
; /* Sibling chain */
130 struct cbq_class
*children
; /* Pointer to children chain */
132 struct Qdisc
*q
; /* Elementary queueing discipline */
136 unsigned char cpriority
; /* Effective priority */
137 unsigned char delayed
;
138 unsigned char level
; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
143 psched_time_t last
; /* Last end of service */
144 psched_time_t undertime
;
146 long deficit
; /* Saved deficit for WRR */
147 unsigned long penalized
;
148 struct gnet_stats_basic bstats
;
149 struct gnet_stats_queue qstats
;
150 struct gnet_stats_rate_est rate_est
;
151 spinlock_t
*stats_lock
;
152 struct tc_cbq_xstats xstats
;
154 struct tcf_proto
*filter_list
;
159 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
162 struct cbq_sched_data
164 struct cbq_class
*classes
[16]; /* Hash table of all classes */
165 int nclasses
[TC_CBQ_MAXPRIO
+1];
166 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
168 struct cbq_class link
;
171 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
174 #ifdef CONFIG_NET_CLS_POLICE
175 struct cbq_class
*rx_class
;
177 struct cbq_class
*tx_class
;
178 struct cbq_class
*tx_borrowed
;
180 psched_time_t now
; /* Cached timestamp */
181 psched_time_t now_rt
; /* Cached real time */
184 struct timer_list delay_timer
;
185 struct timer_list wd_timer
; /* Watchdog timer,
195 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
198 static __inline__
unsigned cbq_hash(u32 h
)
205 static __inline__
struct cbq_class
*
206 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
208 struct cbq_class
*cl
;
210 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
211 if (cl
->classid
== classid
)
216 #ifdef CONFIG_NET_CLS_POLICE
218 static struct cbq_class
*
219 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
221 struct cbq_class
*cl
, *new;
223 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
224 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
232 /* Classify packet. The procedure is pretty complicated, but
233 it allows us to combine link sharing and priority scheduling
236 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237 so that it resolves to split nodes. Then packets are classified
238 by logical priority, or a more specific classifier may be attached
242 static struct cbq_class
*
243 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
245 struct cbq_sched_data
*q
= qdisc_priv(sch
);
246 struct cbq_class
*head
= &q
->link
;
247 struct cbq_class
**defmap
;
248 struct cbq_class
*cl
= NULL
;
249 u32 prio
= skb
->priority
;
250 struct tcf_result res
;
253 * Step 1. If skb->priority points to one of our classes, use it.
255 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
256 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
259 *qerr
= NET_XMIT_BYPASS
;
262 defmap
= head
->defaults
;
265 * Step 2+n. Apply classifier.
267 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
270 if ((cl
= (void*)res
.class) == NULL
) {
271 if (TC_H_MAJ(res
.classid
))
272 cl
= cbq_class_lookup(q
, res
.classid
);
273 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
274 cl
= defmap
[TC_PRIO_BESTEFFORT
];
276 if (cl
== NULL
|| cl
->level
>= head
->level
)
280 #ifdef CONFIG_NET_CLS_ACT
284 *qerr
= NET_XMIT_SUCCESS
;
288 #elif defined(CONFIG_NET_CLS_POLICE)
290 case TC_POLICE_RECLASSIFY
:
291 return cbq_reclassify(skb
, cl
);
302 * Step 3+n. If classifier selected a link sharing class,
303 * apply agency specific classifier.
304 * Repeat this procdure until we hit a leaf node.
313 * Step 4. No success...
315 if (TC_H_MAJ(prio
) == 0 &&
316 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
317 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
324 A packet has just been enqueued on the empty class.
325 cbq_activate_class adds it to the tail of active class list
326 of its priority band.
329 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
331 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
332 int prio
= cl
->cpriority
;
333 struct cbq_class
*cl_tail
;
335 cl_tail
= q
->active
[prio
];
336 q
->active
[prio
] = cl
;
338 if (cl_tail
!= NULL
) {
339 cl
->next_alive
= cl_tail
->next_alive
;
340 cl_tail
->next_alive
= cl
;
343 q
->activemask
|= (1<<prio
);
348 Unlink class from active chain.
349 Note that this same procedure is done directly in cbq_dequeue*
350 during round-robin procedure.
353 static void cbq_deactivate_class(struct cbq_class
*this)
355 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
356 int prio
= this->cpriority
;
357 struct cbq_class
*cl
;
358 struct cbq_class
*cl_prev
= q
->active
[prio
];
361 cl
= cl_prev
->next_alive
;
363 cl_prev
->next_alive
= cl
->next_alive
;
364 cl
->next_alive
= NULL
;
366 if (cl
== q
->active
[prio
]) {
367 q
->active
[prio
] = cl_prev
;
368 if (cl
== q
->active
[prio
]) {
369 q
->active
[prio
] = NULL
;
370 q
->activemask
&= ~(1<<prio
);
376 } while ((cl_prev
= cl
) != q
->active
[prio
]);
380 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
382 int toplevel
= q
->toplevel
;
384 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
388 PSCHED_GET_TIME(now
);
389 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
390 PSCHED_TADD2(q
->now
, incr
, now
);
393 if (PSCHED_TLESS(cl
->undertime
, now
)) {
394 q
->toplevel
= cl
->level
;
397 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
402 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
404 struct cbq_sched_data
*q
= qdisc_priv(sch
);
407 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
409 #ifdef CONFIG_NET_CLS_POLICE
413 if (ret
== NET_XMIT_BYPASS
)
419 #ifdef CONFIG_NET_CLS_POLICE
420 cl
->q
->__parent
= sch
;
422 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == NET_XMIT_SUCCESS
) {
424 sch
->bstats
.packets
++;
425 sch
->bstats
.bytes
+=len
;
426 cbq_mark_toplevel(q
, cl
);
428 cbq_activate_class(cl
);
433 cbq_mark_toplevel(q
, cl
);
439 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
441 struct cbq_sched_data
*q
= qdisc_priv(sch
);
442 struct cbq_class
*cl
;
445 if ((cl
= q
->tx_class
) == NULL
) {
452 cbq_mark_toplevel(q
, cl
);
454 #ifdef CONFIG_NET_CLS_POLICE
456 cl
->q
->__parent
= sch
;
458 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
460 sch
->qstats
.requeues
++;
462 cbq_activate_class(cl
);
470 /* Overlimit actions */
472 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
474 static void cbq_ovl_classic(struct cbq_class
*cl
)
476 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
477 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
480 delay
+= cl
->offtime
;
483 Class goes to sleep, so that it will have no
484 chance to work avgidle. Let's forgive it 8)
486 BTW cbq-2.0 has a crap in this
487 place, apparently they forgot to shift it by cl->ewma_log.
490 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
491 if (cl
->avgidle
< cl
->minidle
)
492 cl
->avgidle
= cl
->minidle
;
495 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
497 cl
->xstats
.overactions
++;
500 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
501 q
->wd_expires
= delay
;
503 /* Dirty work! We must schedule wakeups based on
504 real available rate, rather than leaf rate,
505 which may be tiny (even zero).
507 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
509 psched_tdiff_t base_delay
= q
->wd_expires
;
511 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
512 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
513 if (delay
< base_delay
) {
520 q
->wd_expires
= base_delay
;
524 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
528 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
530 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
531 struct cbq_class
*this = cl
;
534 if (cl
->level
> q
->toplevel
) {
538 } while ((cl
= cl
->borrow
) != NULL
);
545 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
547 static void cbq_ovl_delay(struct cbq_class
*cl
)
549 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
550 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
553 unsigned long sched
= jiffies
;
555 delay
+= cl
->offtime
;
557 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
558 if (cl
->avgidle
< cl
->minidle
)
559 cl
->avgidle
= cl
->minidle
;
560 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
563 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
564 cl
->penalized
= sched
;
565 cl
->cpriority
= TC_CBQ_MAXPRIO
;
566 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
567 if (del_timer(&q
->delay_timer
) &&
568 (long)(q
->delay_timer
.expires
- sched
) > 0)
569 q
->delay_timer
.expires
= sched
;
570 add_timer(&q
->delay_timer
);
572 cl
->xstats
.overactions
++;
577 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
578 q
->wd_expires
= delay
;
581 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
583 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
585 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
587 cl
->penalized
= jiffies
+ cl
->penalty
;
589 if (cl
->cpriority
!= cl
->priority2
) {
590 cl
->cpriority
= cl
->priority2
;
591 q
->pmask
|= (1<<cl
->cpriority
);
592 cl
->xstats
.overactions
++;
597 /* TC_CBQ_OVL_DROP: penalize class by dropping */
599 static void cbq_ovl_drop(struct cbq_class
*cl
)
601 if (cl
->q
->ops
->drop
)
602 if (cl
->q
->ops
->drop(cl
->q
))
604 cl
->xstats
.overactions
++;
608 static void cbq_watchdog(unsigned long arg
)
610 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
612 sch
->flags
&= ~TCQ_F_THROTTLED
;
613 netif_schedule(sch
->dev
);
616 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
618 struct cbq_class
*cl
;
619 struct cbq_class
*cl_prev
= q
->active
[prio
];
620 unsigned long now
= jiffies
;
621 unsigned long sched
= now
;
627 cl
= cl_prev
->next_alive
;
628 if ((long)(now
- cl
->penalized
) > 0) {
629 cl_prev
->next_alive
= cl
->next_alive
;
630 cl
->next_alive
= NULL
;
631 cl
->cpriority
= cl
->priority
;
633 cbq_activate_class(cl
);
635 if (cl
== q
->active
[prio
]) {
636 q
->active
[prio
] = cl_prev
;
637 if (cl
== q
->active
[prio
]) {
638 q
->active
[prio
] = NULL
;
643 cl
= cl_prev
->next_alive
;
644 } else if ((long)(sched
- cl
->penalized
) > 0)
645 sched
= cl
->penalized
;
646 } while ((cl_prev
= cl
) != q
->active
[prio
]);
648 return (long)(sched
- now
);
651 static void cbq_undelay(unsigned long arg
)
653 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
654 struct cbq_sched_data
*q
= qdisc_priv(sch
);
662 int prio
= ffz(~pmask
);
667 tmp
= cbq_undelay_prio(q
, prio
);
670 if (tmp
< delay
|| delay
== 0)
676 q
->delay_timer
.expires
= jiffies
+ delay
;
677 add_timer(&q
->delay_timer
);
680 sch
->flags
&= ~TCQ_F_THROTTLED
;
681 netif_schedule(sch
->dev
);
685 #ifdef CONFIG_NET_CLS_POLICE
687 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
690 struct Qdisc
*sch
= child
->__parent
;
691 struct cbq_sched_data
*q
= qdisc_priv(sch
);
692 struct cbq_class
*cl
= q
->rx_class
;
696 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
698 cbq_mark_toplevel(q
, cl
);
701 cl
->q
->__parent
= sch
;
703 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
705 sch
->bstats
.packets
++;
706 sch
->bstats
.bytes
+=len
;
708 cbq_activate_class(cl
);
721 It is mission critical procedure.
723 We "regenerate" toplevel cutoff, if transmitting class
724 has backlog and it is not regulated. It is not part of
725 original CBQ description, but looks more reasonable.
726 Probably, it is wrong. This question needs further investigation.
729 static __inline__
void
730 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
731 struct cbq_class
*borrowed
)
733 if (cl
&& q
->toplevel
>= borrowed
->level
) {
734 if (cl
->q
->q
.qlen
> 1) {
736 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
737 q
->toplevel
= borrowed
->level
;
740 } while ((borrowed
=borrowed
->borrow
) != NULL
);
743 /* It is not necessary now. Uncommenting it
744 will save CPU cycles, but decrease fairness.
746 q
->toplevel
= TC_CBQ_MAXLEVEL
;
752 cbq_update(struct cbq_sched_data
*q
)
754 struct cbq_class
*this = q
->tx_class
;
755 struct cbq_class
*cl
= this;
760 for ( ; cl
; cl
= cl
->share
) {
761 long avgidle
= cl
->avgidle
;
764 cl
->bstats
.packets
++;
765 cl
->bstats
.bytes
+= len
;
768 (now - last) is total time between packet right edges.
769 (last_pktlen/rate) is "virtual" busy time, so that
771 idle = (now - last) - last_pktlen/rate
774 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
775 if ((unsigned long)idle
> 128*1024*1024) {
776 avgidle
= cl
->maxidle
;
778 idle
-= L2T(cl
, len
);
780 /* true_avgidle := (1-W)*true_avgidle + W*idle,
781 where W=2^{-ewma_log}. But cl->avgidle is scaled:
782 cl->avgidle == true_avgidle/W,
785 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
789 /* Overlimit or at-limit */
791 if (avgidle
< cl
->minidle
)
792 avgidle
= cl
->minidle
;
794 cl
->avgidle
= avgidle
;
796 /* Calculate expected time, when this class
797 will be allowed to send.
799 (1-W)*true_avgidle + W*delay = 0, i.e.
800 idle = (1/W - 1)*(-true_avgidle)
802 idle = (1 - W)*(-cl->avgidle);
804 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
808 To maintain the rate allocated to the class,
809 we add to undertime virtual clock,
810 necessary to complete transmitted packet.
811 (len/phys_bandwidth has been already passed
812 to the moment of cbq_update)
815 idle
-= L2T(&q
->link
, len
);
816 idle
+= L2T(cl
, len
);
818 PSCHED_AUDIT_TDIFF(idle
);
820 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
824 PSCHED_SET_PASTPERFECT(cl
->undertime
);
825 if (avgidle
> cl
->maxidle
)
826 cl
->avgidle
= cl
->maxidle
;
828 cl
->avgidle
= avgidle
;
833 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
836 static __inline__
struct cbq_class
*
837 cbq_under_limit(struct cbq_class
*cl
)
839 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
840 struct cbq_class
*this_cl
= cl
;
842 if (cl
->tparent
== NULL
)
845 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
846 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
852 /* It is very suspicious place. Now overlimit
853 action is generated for not bounded classes
854 only if link is completely congested.
855 Though it is in agree with ancestor-only paradigm,
856 it looks very stupid. Particularly,
857 it means that this chunk of code will either
858 never be called or result in strong amplification
859 of burstiness. Dangerous, silly, and, however,
860 no another solution exists.
862 if ((cl
= cl
->borrow
) == NULL
) {
863 this_cl
->qstats
.overlimits
++;
864 this_cl
->overlimit(this_cl
);
867 if (cl
->level
> q
->toplevel
)
869 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
870 PSCHED_TLESS(q
->now
, cl
->undertime
));
876 static __inline__
struct sk_buff
*
877 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
879 struct cbq_sched_data
*q
= qdisc_priv(sch
);
880 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
884 cl_tail
= cl_prev
= q
->active
[prio
];
885 cl
= cl_prev
->next_alive
;
892 struct cbq_class
*borrow
= cl
;
895 (borrow
= cbq_under_limit(cl
)) == NULL
)
898 if (cl
->deficit
<= 0) {
899 /* Class exhausted its allotment per
900 this round. Switch to the next one.
903 cl
->deficit
+= cl
->quantum
;
907 skb
= cl
->q
->dequeue(cl
->q
);
909 /* Class did not give us any skb :-(
910 It could occur even if cl->q->q.qlen != 0
911 f.e. if cl->q == "tbf"
916 cl
->deficit
-= skb
->len
;
918 q
->tx_borrowed
= borrow
;
920 #ifndef CBQ_XSTATS_BORROWS_BYTES
921 borrow
->xstats
.borrows
++;
922 cl
->xstats
.borrows
++;
924 borrow
->xstats
.borrows
+= skb
->len
;
925 cl
->xstats
.borrows
+= skb
->len
;
928 q
->tx_len
= skb
->len
;
930 if (cl
->deficit
<= 0) {
931 q
->active
[prio
] = cl
;
933 cl
->deficit
+= cl
->quantum
;
938 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
939 /* Class is empty or penalized.
940 Unlink it from active chain.
942 cl_prev
->next_alive
= cl
->next_alive
;
943 cl
->next_alive
= NULL
;
945 /* Did cl_tail point to it? */
950 /* Was it the last class in this band? */
953 q
->active
[prio
] = NULL
;
954 q
->activemask
&= ~(1<<prio
);
956 cbq_activate_class(cl
);
960 q
->active
[prio
] = cl_tail
;
963 cbq_activate_class(cl
);
971 } while (cl_prev
!= cl_tail
);
974 q
->active
[prio
] = cl_prev
;
979 static __inline__
struct sk_buff
*
980 cbq_dequeue_1(struct Qdisc
*sch
)
982 struct cbq_sched_data
*q
= qdisc_priv(sch
);
986 activemask
= q
->activemask
&0xFF;
988 int prio
= ffz(~activemask
);
989 activemask
&= ~(1<<prio
);
990 skb
= cbq_dequeue_prio(sch
, prio
);
997 static struct sk_buff
*
998 cbq_dequeue(struct Qdisc
*sch
)
1000 struct sk_buff
*skb
;
1001 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1003 psched_tdiff_t incr
;
1005 PSCHED_GET_TIME(now
);
1006 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
1009 psched_tdiff_t incr2
;
1010 /* Time integrator. We calculate EOS time
1011 by adding expected packet transmission time.
1012 If real time is greater, we warp artificial clock,
1015 cbq_time = max(real_time, work);
1017 incr2
= L2T(&q
->link
, q
->tx_len
);
1018 PSCHED_TADD(q
->now
, incr2
);
1020 if ((incr
-= incr2
) < 0)
1023 PSCHED_TADD(q
->now
, incr
);
1029 skb
= cbq_dequeue_1(sch
);
1032 sch
->flags
&= ~TCQ_F_THROTTLED
;
1036 /* All the classes are overlimit.
1040 1. Scheduler is empty.
1041 2. Toplevel cutoff inhibited borrowing.
1042 3. Root class is overlimit.
1044 Reset 2d and 3d conditions and retry.
1046 Note, that NS and cbq-2.0 are buggy, peeking
1047 an arbitrary class is appropriate for ancestor-only
1048 sharing, but not for toplevel algorithm.
1050 Our version is better, but slower, because it requires
1051 two passes, but it is unavoidable with top-level sharing.
1054 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1055 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1058 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1059 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1062 /* No packets in scheduler or nobody wants to give them to us :-(
1063 Sigh... start watchdog timer in the last case. */
1066 sch
->qstats
.overlimits
++;
1067 if (q
->wd_expires
) {
1068 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1071 mod_timer(&q
->wd_timer
, jiffies
+ delay
);
1072 sch
->flags
|= TCQ_F_THROTTLED
;
1078 /* CBQ class maintanance routines */
1080 static void cbq_adjust_levels(struct cbq_class
*this)
1087 struct cbq_class
*cl
;
1089 if ((cl
= this->children
) != NULL
) {
1091 if (cl
->level
> level
)
1093 } while ((cl
= cl
->sibling
) != this->children
);
1095 this->level
= level
+1;
1096 } while ((this = this->tparent
) != NULL
);
1099 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1101 struct cbq_class
*cl
;
1104 if (q
->quanta
[prio
] == 0)
1107 for (h
=0; h
<16; h
++) {
1108 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1109 /* BUGGGG... Beware! This expression suffer of
1110 arithmetic overflows!
1112 if (cl
->priority
== prio
) {
1113 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1116 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1117 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1118 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1124 static void cbq_sync_defmap(struct cbq_class
*cl
)
1126 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1127 struct cbq_class
*split
= cl
->split
;
1134 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1135 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1136 split
->defaults
[i
] = NULL
;
1139 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1140 int level
= split
->level
;
1142 if (split
->defaults
[i
])
1145 for (h
=0; h
<16; h
++) {
1146 struct cbq_class
*c
;
1148 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1149 if (c
->split
== split
&& c
->level
< level
&&
1151 split
->defaults
[i
] = c
;
1159 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1161 struct cbq_class
*split
= NULL
;
1164 if ((split
= cl
->split
) == NULL
)
1166 splitid
= split
->classid
;
1169 if (split
== NULL
|| split
->classid
!= splitid
) {
1170 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1171 if (split
->classid
== splitid
)
1178 if (cl
->split
!= split
) {
1180 cbq_sync_defmap(cl
);
1182 cl
->defmap
= def
&mask
;
1184 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1186 cbq_sync_defmap(cl
);
1189 static void cbq_unlink_class(struct cbq_class
*this)
1191 struct cbq_class
*cl
, **clp
;
1192 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1194 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1202 if (this->tparent
) {
1211 } while ((cl
= *clp
) != this->sibling
);
1213 if (this->tparent
->children
== this) {
1214 this->tparent
->children
= this->sibling
;
1215 if (this->sibling
== this)
1216 this->tparent
->children
= NULL
;
1219 BUG_TRAP(this->sibling
== this);
1223 static void cbq_link_class(struct cbq_class
*this)
1225 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1226 unsigned h
= cbq_hash(this->classid
);
1227 struct cbq_class
*parent
= this->tparent
;
1229 this->sibling
= this;
1230 this->next
= q
->classes
[h
];
1231 q
->classes
[h
] = this;
1236 if (parent
->children
== NULL
) {
1237 parent
->children
= this;
1239 this->sibling
= parent
->children
->sibling
;
1240 parent
->children
->sibling
= this;
1244 static unsigned int cbq_drop(struct Qdisc
* sch
)
1246 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1247 struct cbq_class
*cl
, *cl_head
;
1251 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1252 if ((cl_head
= q
->active
[prio
]) == NULL
)
1257 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1260 cbq_deactivate_class(cl
);
1263 } while ((cl
= cl
->next_alive
) != cl_head
);
1269 cbq_reset(struct Qdisc
* sch
)
1271 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1272 struct cbq_class
*cl
;
1279 q
->tx_borrowed
= NULL
;
1280 del_timer(&q
->wd_timer
);
1281 del_timer(&q
->delay_timer
);
1282 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1283 PSCHED_GET_TIME(q
->now
);
1286 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1287 q
->active
[prio
] = NULL
;
1289 for (h
= 0; h
< 16; h
++) {
1290 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1293 cl
->next_alive
= NULL
;
1294 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1295 cl
->avgidle
= cl
->maxidle
;
1296 cl
->deficit
= cl
->quantum
;
1297 cl
->cpriority
= cl
->priority
;
1304 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1306 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1307 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1308 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1310 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1311 cl
->ewma_log
= lss
->ewma_log
;
1312 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1313 cl
->avpkt
= lss
->avpkt
;
1314 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1315 cl
->minidle
= -(long)lss
->minidle
;
1316 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1317 cl
->maxidle
= lss
->maxidle
;
1318 cl
->avgidle
= lss
->maxidle
;
1320 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1321 cl
->offtime
= lss
->offtime
;
1325 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1327 q
->nclasses
[cl
->priority
]--;
1328 q
->quanta
[cl
->priority
] -= cl
->weight
;
1329 cbq_normalize_quanta(q
, cl
->priority
);
1332 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1334 q
->nclasses
[cl
->priority
]++;
1335 q
->quanta
[cl
->priority
] += cl
->weight
;
1336 cbq_normalize_quanta(q
, cl
->priority
);
1339 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1341 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1344 cl
->allot
= wrr
->allot
;
1346 cl
->weight
= wrr
->weight
;
1347 if (wrr
->priority
) {
1348 cl
->priority
= wrr
->priority
-1;
1349 cl
->cpriority
= cl
->priority
;
1350 if (cl
->priority
>= cl
->priority2
)
1351 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1358 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1360 switch (ovl
->strategy
) {
1361 case TC_CBQ_OVL_CLASSIC
:
1362 cl
->overlimit
= cbq_ovl_classic
;
1364 case TC_CBQ_OVL_DELAY
:
1365 cl
->overlimit
= cbq_ovl_delay
;
1367 case TC_CBQ_OVL_LOWPRIO
:
1368 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1369 ovl
->priority2
-1 <= cl
->priority
)
1371 cl
->priority2
= ovl
->priority2
-1;
1372 cl
->overlimit
= cbq_ovl_lowprio
;
1374 case TC_CBQ_OVL_DROP
:
1375 cl
->overlimit
= cbq_ovl_drop
;
1377 case TC_CBQ_OVL_RCLASSIC
:
1378 cl
->overlimit
= cbq_ovl_rclassic
;
1383 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1390 cl
->police
= p
->police
;
1392 if (cl
->q
->handle
) {
1393 if (p
->police
== TC_POLICE_RECLASSIFY
)
1394 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1396 cl
->q
->reshape_fail
= NULL
;
1402 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1404 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1408 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1410 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1411 struct rtattr
*tb
[TCA_CBQ_MAX
];
1412 struct tc_ratespec
*r
;
1414 if (rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
) < 0 ||
1415 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1416 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1419 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1420 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1423 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1425 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
)
1429 q
->link
.sibling
= &q
->link
;
1430 q
->link
.classid
= sch
->handle
;
1431 q
->link
.qdisc
= sch
;
1432 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1434 q
->link
.q
= &noop_qdisc
;
1436 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1437 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1438 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1439 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1440 q
->link
.overlimit
= cbq_ovl_classic
;
1441 q
->link
.allot
= psched_mtu(sch
->dev
);
1442 q
->link
.quantum
= q
->link
.allot
;
1443 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1445 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1446 q
->link
.avpkt
= q
->link
.allot
/2;
1447 q
->link
.minidle
= -0x7FFFFFFF;
1448 q
->link
.stats_lock
= &sch
->dev
->queue_lock
;
1450 init_timer(&q
->wd_timer
);
1451 q
->wd_timer
.data
= (unsigned long)sch
;
1452 q
->wd_timer
.function
= cbq_watchdog
;
1453 init_timer(&q
->delay_timer
);
1454 q
->delay_timer
.data
= (unsigned long)sch
;
1455 q
->delay_timer
.function
= cbq_undelay
;
1456 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1457 PSCHED_GET_TIME(q
->now
);
1460 cbq_link_class(&q
->link
);
1462 if (tb
[TCA_CBQ_LSSOPT
-1])
1463 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1465 cbq_addprio(q
, &q
->link
);
1469 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1471 unsigned char *b
= skb
->tail
;
1473 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1477 skb_trim(skb
, b
- skb
->data
);
1481 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1483 unsigned char *b
= skb
->tail
;
1484 struct tc_cbq_lssopt opt
;
1487 if (cl
->borrow
== NULL
)
1488 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1489 if (cl
->share
== NULL
)
1490 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1491 opt
.ewma_log
= cl
->ewma_log
;
1492 opt
.level
= cl
->level
;
1493 opt
.avpkt
= cl
->avpkt
;
1494 opt
.maxidle
= cl
->maxidle
;
1495 opt
.minidle
= (u32
)(-cl
->minidle
);
1496 opt
.offtime
= cl
->offtime
;
1498 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1502 skb_trim(skb
, b
- skb
->data
);
1506 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1508 unsigned char *b
= skb
->tail
;
1509 struct tc_cbq_wrropt opt
;
1512 opt
.allot
= cl
->allot
;
1513 opt
.priority
= cl
->priority
+1;
1514 opt
.cpriority
= cl
->cpriority
+1;
1515 opt
.weight
= cl
->weight
;
1516 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1520 skb_trim(skb
, b
- skb
->data
);
1524 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1526 unsigned char *b
= skb
->tail
;
1527 struct tc_cbq_ovl opt
;
1529 opt
.strategy
= cl
->ovl_strategy
;
1530 opt
.priority2
= cl
->priority2
+1;
1532 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1533 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1537 skb_trim(skb
, b
- skb
->data
);
1541 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1543 unsigned char *b
= skb
->tail
;
1544 struct tc_cbq_fopt opt
;
1546 if (cl
->split
|| cl
->defmap
) {
1547 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1548 opt
.defmap
= cl
->defmap
;
1550 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1555 skb_trim(skb
, b
- skb
->data
);
1559 #ifdef CONFIG_NET_CLS_POLICE
1560 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1562 unsigned char *b
= skb
->tail
;
1563 struct tc_cbq_police opt
;
1566 opt
.police
= cl
->police
;
1569 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1574 skb_trim(skb
, b
- skb
->data
);
1579 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1581 if (cbq_dump_lss(skb
, cl
) < 0 ||
1582 cbq_dump_rate(skb
, cl
) < 0 ||
1583 cbq_dump_wrr(skb
, cl
) < 0 ||
1584 cbq_dump_ovl(skb
, cl
) < 0 ||
1585 #ifdef CONFIG_NET_CLS_POLICE
1586 cbq_dump_police(skb
, cl
) < 0 ||
1588 cbq_dump_fopt(skb
, cl
) < 0)
1593 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1595 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1596 unsigned char *b
= skb
->tail
;
1599 rta
= (struct rtattr
*)b
;
1600 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1601 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1602 goto rtattr_failure
;
1603 rta
->rta_len
= skb
->tail
- b
;
1607 skb_trim(skb
, b
- skb
->data
);
1612 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1614 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1616 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1617 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1621 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1622 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1624 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1625 unsigned char *b
= skb
->tail
;
1629 tcm
->tcm_parent
= cl
->tparent
->classid
;
1631 tcm
->tcm_parent
= TC_H_ROOT
;
1632 tcm
->tcm_handle
= cl
->classid
;
1633 tcm
->tcm_info
= cl
->q
->handle
;
1635 rta
= (struct rtattr
*)b
;
1636 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1637 if (cbq_dump_attr(skb
, cl
) < 0)
1638 goto rtattr_failure
;
1639 rta
->rta_len
= skb
->tail
- b
;
1643 skb_trim(skb
, b
- skb
->data
);
1648 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1649 struct gnet_dump
*d
)
1651 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1652 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1654 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1655 cl
->xstats
.avgidle
= cl
->avgidle
;
1656 cl
->xstats
.undertime
= 0;
1658 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1659 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1661 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1662 #ifdef CONFIG_NET_ESTIMATOR
1663 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1665 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1668 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1671 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1674 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1678 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1679 cl
->classid
)) == NULL
)
1682 #ifdef CONFIG_NET_CLS_POLICE
1683 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1684 new->reshape_fail
= cbq_reshape_fail
;
1688 *old
= xchg(&cl
->q
, new);
1689 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1691 sch_tree_unlock(sch
);
1698 static struct Qdisc
*
1699 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1701 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1703 return cl
? cl
->q
: NULL
;
1706 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1708 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1710 if (cl
->q
->q
.qlen
== 0)
1711 cbq_deactivate_class(cl
);
1714 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1716 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1717 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1721 return (unsigned long)cl
;
1726 static void cbq_destroy_filters(struct cbq_class
*cl
)
1728 struct tcf_proto
*tp
;
1730 while ((tp
= cl
->filter_list
) != NULL
) {
1731 cl
->filter_list
= tp
->next
;
1736 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1738 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1740 BUG_TRAP(!cl
->filters
);
1742 cbq_destroy_filters(cl
);
1743 qdisc_destroy(cl
->q
);
1744 qdisc_put_rtab(cl
->R_tab
);
1745 #ifdef CONFIG_NET_ESTIMATOR
1746 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1753 cbq_destroy(struct Qdisc
* sch
)
1755 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1756 struct cbq_class
*cl
;
1759 #ifdef CONFIG_NET_CLS_POLICE
1763 * Filters must be destroyed first because we don't destroy the
1764 * classes from root to leafs which means that filters can still
1765 * be bound to classes which have been destroyed already. --TGR '04
1767 for (h
= 0; h
< 16; h
++)
1768 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1769 cbq_destroy_filters(cl
);
1771 for (h
= 0; h
< 16; h
++) {
1772 struct cbq_class
*next
;
1774 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1776 cbq_destroy_class(sch
, cl
);
1781 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1783 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1785 if (--cl
->refcnt
== 0) {
1786 #ifdef CONFIG_NET_CLS_POLICE
1787 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1789 spin_lock_bh(&sch
->dev
->queue_lock
);
1790 if (q
->rx_class
== cl
)
1792 spin_unlock_bh(&sch
->dev
->queue_lock
);
1795 cbq_destroy_class(sch
, cl
);
1800 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1804 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1805 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1806 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1807 struct rtattr
*tb
[TCA_CBQ_MAX
];
1808 struct cbq_class
*parent
;
1809 struct qdisc_rate_table
*rtab
= NULL
;
1811 if (opt
==NULL
|| rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
))
1814 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1815 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1818 if (tb
[TCA_CBQ_FOPT
-1] &&
1819 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1822 if (tb
[TCA_CBQ_RATE
-1] &&
1823 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1826 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1827 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1830 if (tb
[TCA_CBQ_WRROPT
-1] &&
1831 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1834 #ifdef CONFIG_NET_CLS_POLICE
1835 if (tb
[TCA_CBQ_POLICE
-1] &&
1836 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1843 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1845 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1849 if (tb
[TCA_CBQ_RATE
-1]) {
1850 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1855 /* Change class parameters */
1858 if (cl
->next_alive
!= NULL
)
1859 cbq_deactivate_class(cl
);
1862 rtab
= xchg(&cl
->R_tab
, rtab
);
1863 qdisc_put_rtab(rtab
);
1866 if (tb
[TCA_CBQ_LSSOPT
-1])
1867 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1869 if (tb
[TCA_CBQ_WRROPT
-1]) {
1871 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1874 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1875 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1877 #ifdef CONFIG_NET_CLS_POLICE
1878 if (tb
[TCA_CBQ_POLICE
-1])
1879 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1882 if (tb
[TCA_CBQ_FOPT
-1])
1883 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1886 cbq_activate_class(cl
);
1888 sch_tree_unlock(sch
);
1890 #ifdef CONFIG_NET_ESTIMATOR
1891 if (tca
[TCA_RATE
-1])
1892 gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1893 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1898 if (parentid
== TC_H_ROOT
)
1901 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1902 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1905 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1911 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1915 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1917 for (i
=0; i
<0x8000; i
++) {
1918 if (++q
->hgenerator
>= 0x8000)
1920 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1926 classid
= classid
|q
->hgenerator
;
1931 parent
= cbq_class_lookup(q
, parentid
);
1938 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1944 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
, classid
)))
1945 cl
->q
= &noop_qdisc
;
1946 cl
->classid
= classid
;
1947 cl
->tparent
= parent
;
1949 cl
->allot
= parent
->allot
;
1950 cl
->quantum
= cl
->allot
;
1951 cl
->weight
= cl
->R_tab
->rate
.rate
;
1952 cl
->stats_lock
= &sch
->dev
->queue_lock
;
1956 cl
->borrow
= cl
->tparent
;
1957 if (cl
->tparent
!= &q
->link
)
1958 cl
->share
= cl
->tparent
;
1959 cbq_adjust_levels(parent
);
1960 cl
->minidle
= -0x7FFFFFFF;
1961 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1962 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1963 if (cl
->ewma_log
==0)
1964 cl
->ewma_log
= q
->link
.ewma_log
;
1966 cl
->maxidle
= q
->link
.maxidle
;
1968 cl
->avpkt
= q
->link
.avpkt
;
1969 cl
->overlimit
= cbq_ovl_classic
;
1970 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1971 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1972 #ifdef CONFIG_NET_CLS_POLICE
1973 if (tb
[TCA_CBQ_POLICE
-1])
1974 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1976 if (tb
[TCA_CBQ_FOPT
-1])
1977 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1978 sch_tree_unlock(sch
);
1980 #ifdef CONFIG_NET_ESTIMATOR
1981 if (tca
[TCA_RATE
-1])
1982 gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1983 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1986 *arg
= (unsigned long)cl
;
1990 qdisc_put_rtab(rtab
);
1994 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1996 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1997 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2000 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
2005 qlen
= cl
->q
->q
.qlen
;
2007 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
2010 cbq_deactivate_class(cl
);
2012 if (q
->tx_borrowed
== cl
)
2013 q
->tx_borrowed
= q
->tx_class
;
2014 if (q
->tx_class
== cl
) {
2016 q
->tx_borrowed
= NULL
;
2018 #ifdef CONFIG_NET_CLS_POLICE
2019 if (q
->rx_class
== cl
)
2023 cbq_unlink_class(cl
);
2024 cbq_adjust_levels(cl
->tparent
);
2026 cbq_sync_defmap(cl
);
2029 sch_tree_unlock(sch
);
2031 if (--cl
->refcnt
== 0)
2032 cbq_destroy_class(sch
, cl
);
2037 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2039 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2040 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2045 return &cl
->filter_list
;
2048 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2051 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2052 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2053 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2056 if (p
&& p
->level
<= cl
->level
)
2059 return (unsigned long)cl
;
2064 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2066 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2071 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2073 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2079 for (h
= 0; h
< 16; h
++) {
2080 struct cbq_class
*cl
;
2082 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2083 if (arg
->count
< arg
->skip
) {
2087 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2096 static struct Qdisc_class_ops cbq_class_ops
= {
2099 .qlen_notify
= cbq_qlen_notify
,
2102 .change
= cbq_change_class
,
2103 .delete = cbq_delete
,
2105 .tcf_chain
= cbq_find_tcf
,
2106 .bind_tcf
= cbq_bind_filter
,
2107 .unbind_tcf
= cbq_unbind_filter
,
2108 .dump
= cbq_dump_class
,
2109 .dump_stats
= cbq_dump_class_stats
,
2112 static struct Qdisc_ops cbq_qdisc_ops
= {
2114 .cl_ops
= &cbq_class_ops
,
2116 .priv_size
= sizeof(struct cbq_sched_data
),
2117 .enqueue
= cbq_enqueue
,
2118 .dequeue
= cbq_dequeue
,
2119 .requeue
= cbq_requeue
,
2123 .destroy
= cbq_destroy
,
2126 .dump_stats
= cbq_dump_stats
,
2127 .owner
= THIS_MODULE
,
2130 static int __init
cbq_module_init(void)
2132 return register_qdisc(&cbq_qdisc_ops
);
2134 static void __exit
cbq_module_exit(void)
2136 unregister_qdisc(&cbq_qdisc_ops
);
2138 module_init(cbq_module_init
)
2139 module_exit(cbq_module_exit
)
2140 MODULE_LICENSE("GPL");