2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data
;
93 struct cbq_class
*next
; /* hash table link */
94 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
98 unsigned char priority
; /* class priority */
99 unsigned char priority2
; /* priority to be used after overlimit */
100 unsigned char ewma_log
; /* time constant for idle time calculation */
101 unsigned char ovl_strategy
;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police
;
108 /* Link-sharing scheduler parameters */
109 long maxidle
; /* Class paramters: see below. */
113 struct qdisc_rate_table
*R_tab
;
115 /* Overlimit strategy parameters */
116 void (*overlimit
)(struct cbq_class
*cl
);
119 /* General scheduler (WRR) parameters */
121 long quantum
; /* Allotment per WRR round */
122 long weight
; /* Relative allotment: see below */
124 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
125 struct cbq_class
*split
; /* Ptr to split node */
126 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
127 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
128 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
130 struct cbq_class
*sibling
; /* Sibling chain */
131 struct cbq_class
*children
; /* Pointer to children chain */
133 struct Qdisc
*q
; /* Elementary queueing discipline */
137 unsigned char cpriority
; /* Effective priority */
138 unsigned char delayed
;
139 unsigned char level
; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last
; /* Last end of service */
145 psched_time_t undertime
;
147 long deficit
; /* Saved deficit for WRR */
148 unsigned long penalized
;
149 struct tc_stats stats
;
150 struct tc_cbq_xstats xstats
;
152 struct tcf_proto
*filter_list
;
157 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
160 struct cbq_sched_data
162 struct cbq_class
*classes
[16]; /* Hash table of all classes */
163 int nclasses
[TC_CBQ_MAXPRIO
+1];
164 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
166 struct cbq_class link
;
169 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
172 #ifdef CONFIG_NET_CLS_POLICE
173 struct cbq_class
*rx_class
;
175 struct cbq_class
*tx_class
;
176 struct cbq_class
*tx_borrowed
;
178 psched_time_t now
; /* Cached timestamp */
179 psched_time_t now_rt
; /* Cached real time */
182 struct timer_list delay_timer
;
183 struct timer_list wd_timer
; /* Watchdog timer,
193 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
196 static __inline__
unsigned cbq_hash(u32 h
)
203 static __inline__
struct cbq_class
*
204 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
206 struct cbq_class
*cl
;
208 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
209 if (cl
->classid
== classid
)
214 #ifdef CONFIG_NET_CLS_POLICE
216 static struct cbq_class
*
217 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
219 struct cbq_class
*cl
, *new;
221 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
222 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
230 /* Classify packet. The procedure is pretty complicated, but
231 it allows us to combine link sharing and priority scheduling
234 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
235 so that it resolves to split nodes. Then packets are classified
236 by logical priority, or a more specific classifier may be attached
240 static struct cbq_class
*
241 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
)
243 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
244 struct cbq_class
*head
= &q
->link
;
245 struct cbq_class
**defmap
;
246 struct cbq_class
*cl
= NULL
;
247 u32 prio
= skb
->priority
;
248 struct tcf_result res
;
251 * Step 1. If skb->priority points to one of our classes, use it.
253 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
254 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
260 defmap
= head
->defaults
;
263 * Step 2+n. Apply classifier.
265 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
268 if ((cl
= (void*)res
.class) == NULL
) {
269 if (TC_H_MAJ(res
.classid
))
270 cl
= cbq_class_lookup(q
, res
.classid
);
271 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
272 cl
= defmap
[TC_PRIO_BESTEFFORT
];
274 if (cl
== NULL
|| cl
->level
>= head
->level
)
278 #ifdef CONFIG_NET_CLS_POLICE
280 case TC_POLICE_RECLASSIFY
:
281 return cbq_reclassify(skb
, cl
);
292 * Step 3+n. If classifier selected a link sharing class,
293 * apply agency specific classifier.
294 * Repeat this procdure until we hit a leaf node.
303 * Step 4. No success...
305 if (TC_H_MAJ(prio
) == 0 &&
306 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
307 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
314 A packet has just been enqueued on the empty class.
315 cbq_activate_class adds it to the tail of active class list
316 of its priority band.
319 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
321 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
322 int prio
= cl
->cpriority
;
323 struct cbq_class
*cl_tail
;
325 cl_tail
= q
->active
[prio
];
326 q
->active
[prio
] = cl
;
328 if (cl_tail
!= NULL
) {
329 cl
->next_alive
= cl_tail
->next_alive
;
330 cl_tail
->next_alive
= cl
;
333 q
->activemask
|= (1<<prio
);
338 Unlink class from active chain.
339 Note that this same procedure is done directly in cbq_dequeue*
340 during round-robin procedure.
343 static void cbq_deactivate_class(struct cbq_class
*this)
345 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
346 int prio
= this->cpriority
;
347 struct cbq_class
*cl
;
348 struct cbq_class
*cl_prev
= q
->active
[prio
];
351 cl
= cl_prev
->next_alive
;
353 cl_prev
->next_alive
= cl
->next_alive
;
354 cl
->next_alive
= NULL
;
356 if (cl
== q
->active
[prio
]) {
357 q
->active
[prio
] = cl_prev
;
358 if (cl
== q
->active
[prio
]) {
359 q
->active
[prio
] = NULL
;
360 q
->activemask
&= ~(1<<prio
);
365 cl
= cl_prev
->next_alive
;
368 } while ((cl_prev
= cl
) != q
->active
[prio
]);
372 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
374 int toplevel
= q
->toplevel
;
376 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
380 PSCHED_GET_TIME(now
);
381 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
382 PSCHED_TADD2(q
->now
, incr
, now
);
385 if (PSCHED_TLESS(cl
->undertime
, now
)) {
386 q
->toplevel
= cl
->level
;
389 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
394 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
396 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
397 struct cbq_class
*cl
= cbq_classify(skb
, sch
);
399 int ret
= NET_XMIT_POLICED
;
401 #ifdef CONFIG_NET_CLS_POLICE
405 #ifdef CONFIG_NET_CLS_POLICE
406 cl
->q
->__parent
= sch
;
408 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == 0) {
410 sch
->stats
.packets
++;
411 sch
->stats
.bytes
+=len
;
412 cbq_mark_toplevel(q
, cl
);
414 cbq_activate_class(cl
);
423 cbq_mark_toplevel(q
, cl
);
430 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
432 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
433 struct cbq_class
*cl
;
436 if ((cl
= q
->tx_class
) == NULL
) {
443 cbq_mark_toplevel(q
, cl
);
445 #ifdef CONFIG_NET_CLS_POLICE
447 cl
->q
->__parent
= sch
;
449 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
452 cbq_activate_class(cl
);
460 /* Overlimit actions */
462 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
464 static void cbq_ovl_classic(struct cbq_class
*cl
)
466 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
467 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
470 delay
+= cl
->offtime
;
473 Class goes to sleep, so that it will have no
474 chance to work avgidle. Let's forgive it 8)
476 BTW cbq-2.0 has a crap in this
477 place, apparently they forgot to shift it by cl->ewma_log.
480 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
481 if (cl
->avgidle
< cl
->minidle
)
482 cl
->avgidle
= cl
->minidle
;
485 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
487 cl
->xstats
.overactions
++;
490 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
491 q
->wd_expires
= delay
;
493 /* Dirty work! We must schedule wakeups based on
494 real available rate, rather than leaf rate,
495 which may be tiny (even zero).
497 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
499 psched_tdiff_t base_delay
= q
->wd_expires
;
501 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
502 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
503 if (delay
< base_delay
) {
510 q
->wd_expires
= base_delay
;
514 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
518 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
520 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
521 struct cbq_class
*this = cl
;
524 if (cl
->level
> q
->toplevel
) {
528 } while ((cl
= cl
->borrow
) != NULL
);
535 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
537 static void cbq_ovl_delay(struct cbq_class
*cl
)
539 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
540 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
543 unsigned long sched
= jiffies
;
545 delay
+= cl
->offtime
;
547 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
548 if (cl
->avgidle
< cl
->minidle
)
549 cl
->avgidle
= cl
->minidle
;
550 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
553 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
554 cl
->penalized
= sched
;
555 cl
->cpriority
= TC_CBQ_MAXPRIO
;
556 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
557 if (del_timer(&q
->delay_timer
) &&
558 (long)(q
->delay_timer
.expires
- sched
) > 0)
559 q
->delay_timer
.expires
= sched
;
560 add_timer(&q
->delay_timer
);
562 cl
->xstats
.overactions
++;
567 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
568 q
->wd_expires
= delay
;
571 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
573 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
575 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
577 cl
->penalized
= jiffies
+ cl
->penalty
;
579 if (cl
->cpriority
!= cl
->priority2
) {
580 cl
->cpriority
= cl
->priority2
;
581 q
->pmask
|= (1<<cl
->cpriority
);
582 cl
->xstats
.overactions
++;
587 /* TC_CBQ_OVL_DROP: penalize class by dropping */
589 static void cbq_ovl_drop(struct cbq_class
*cl
)
591 if (cl
->q
->ops
->drop
)
592 if (cl
->q
->ops
->drop(cl
->q
))
594 cl
->xstats
.overactions
++;
598 static void cbq_watchdog(unsigned long arg
)
600 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
602 sch
->flags
&= ~TCQ_F_THROTTLED
;
603 netif_schedule(sch
->dev
);
606 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
608 struct cbq_class
*cl
;
609 struct cbq_class
*cl_prev
= q
->active
[prio
];
610 unsigned long now
= jiffies
;
611 unsigned long sched
= now
;
617 cl
= cl_prev
->next_alive
;
618 if ((long)(now
- cl
->penalized
) > 0) {
619 cl_prev
->next_alive
= cl
->next_alive
;
620 cl
->next_alive
= NULL
;
621 cl
->cpriority
= cl
->priority
;
623 cbq_activate_class(cl
);
625 if (cl
== q
->active
[prio
]) {
626 q
->active
[prio
] = cl_prev
;
627 if (cl
== q
->active
[prio
]) {
628 q
->active
[prio
] = NULL
;
633 cl
= cl_prev
->next_alive
;
634 } else if ((long)(sched
- cl
->penalized
) > 0)
635 sched
= cl
->penalized
;
636 } while ((cl_prev
= cl
) != q
->active
[prio
]);
638 return (long)(sched
- now
);
641 static void cbq_undelay(unsigned long arg
)
643 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
644 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
652 int prio
= ffz(~pmask
);
657 tmp
= cbq_undelay_prio(q
, prio
);
660 if (tmp
< delay
|| delay
== 0)
666 q
->delay_timer
.expires
= jiffies
+ delay
;
667 add_timer(&q
->delay_timer
);
670 sch
->flags
&= ~TCQ_F_THROTTLED
;
671 netif_schedule(sch
->dev
);
675 #ifdef CONFIG_NET_CLS_POLICE
677 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
680 struct Qdisc
*sch
= child
->__parent
;
681 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
682 struct cbq_class
*cl
= q
->rx_class
;
686 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
688 cbq_mark_toplevel(q
, cl
);
691 cl
->q
->__parent
= sch
;
693 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
695 sch
->stats
.packets
++;
696 sch
->stats
.bytes
+=len
;
698 cbq_activate_class(cl
);
711 It is mission critical procedure.
713 We "regenerate" toplevel cutoff, if transmitting class
714 has backlog and it is not regulated. It is not part of
715 original CBQ description, but looks more reasonable.
716 Probably, it is wrong. This question needs further investigation.
719 static __inline__
void
720 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
721 struct cbq_class
*borrowed
)
723 if (cl
&& q
->toplevel
>= borrowed
->level
) {
724 if (cl
->q
->q
.qlen
> 1) {
726 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
727 q
->toplevel
= borrowed
->level
;
730 } while ((borrowed
=borrowed
->borrow
) != NULL
);
733 /* It is not necessary now. Uncommenting it
734 will save CPU cycles, but decrease fairness.
736 q
->toplevel
= TC_CBQ_MAXLEVEL
;
742 cbq_update(struct cbq_sched_data
*q
)
744 struct cbq_class
*this = q
->tx_class
;
745 struct cbq_class
*cl
= this;
750 for ( ; cl
; cl
= cl
->share
) {
751 long avgidle
= cl
->avgidle
;
755 cl
->stats
.bytes
+= len
;
758 (now - last) is total time between packet right edges.
759 (last_pktlen/rate) is "virtual" busy time, so that
761 idle = (now - last) - last_pktlen/rate
764 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
765 if ((unsigned long)idle
> 128*1024*1024) {
766 avgidle
= cl
->maxidle
;
768 idle
-= L2T(cl
, len
);
770 /* true_avgidle := (1-W)*true_avgidle + W*idle,
771 where W=2^{-ewma_log}. But cl->avgidle is scaled:
772 cl->avgidle == true_avgidle/W,
775 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
779 /* Overlimit or at-limit */
781 if (avgidle
< cl
->minidle
)
782 avgidle
= cl
->minidle
;
784 cl
->avgidle
= avgidle
;
786 /* Calculate expected time, when this class
787 will be allowed to send.
789 (1-W)*true_avgidle + W*delay = 0, i.e.
790 idle = (1/W - 1)*(-true_avgidle)
792 idle = (1 - W)*(-cl->avgidle);
794 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
798 To maintain the rate allocated to the class,
799 we add to undertime virtual clock,
800 necesary to complete transmitted packet.
801 (len/phys_bandwidth has been already passed
802 to the moment of cbq_update)
805 idle
-= L2T(&q
->link
, len
);
806 idle
+= L2T(cl
, len
);
808 PSCHED_AUDIT_TDIFF(idle
);
810 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
814 PSCHED_SET_PASTPERFECT(cl
->undertime
);
815 if (avgidle
> cl
->maxidle
)
816 cl
->avgidle
= cl
->maxidle
;
818 cl
->avgidle
= avgidle
;
823 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
826 static __inline__
struct cbq_class
*
827 cbq_under_limit(struct cbq_class
*cl
)
829 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
830 struct cbq_class
*this_cl
= cl
;
832 if (cl
->tparent
== NULL
)
835 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
836 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
842 /* It is very suspicious place. Now overlimit
843 action is generated for not bounded classes
844 only if link is completely congested.
845 Though it is in agree with ancestor-only paradigm,
846 it looks very stupid. Particularly,
847 it means that this chunk of code will either
848 never be called or result in strong amplification
849 of burstiness. Dangerous, silly, and, however,
850 no another solution exists.
852 if ((cl
= cl
->borrow
) == NULL
) {
853 this_cl
->stats
.overlimits
++;
854 this_cl
->overlimit(this_cl
);
857 if (cl
->level
> q
->toplevel
)
859 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
860 PSCHED_TLESS(q
->now
, cl
->undertime
));
866 static __inline__
struct sk_buff
*
867 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
869 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
870 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
874 cl_tail
= cl_prev
= q
->active
[prio
];
875 cl
= cl_prev
->next_alive
;
882 struct cbq_class
*borrow
= cl
;
885 (borrow
= cbq_under_limit(cl
)) == NULL
)
888 if (cl
->deficit
<= 0) {
889 /* Class exhausted its allotment per
890 this round. Switch to the next one.
893 cl
->deficit
+= cl
->quantum
;
897 skb
= cl
->q
->dequeue(cl
->q
);
899 /* Class did not give us any skb :-(
900 It could occur even if cl->q->q.qlen != 0
901 f.e. if cl->q == "tbf"
906 cl
->deficit
-= skb
->len
;
908 q
->tx_borrowed
= borrow
;
910 #ifndef CBQ_XSTATS_BORROWS_BYTES
911 borrow
->xstats
.borrows
++;
912 cl
->xstats
.borrows
++;
914 borrow
->xstats
.borrows
+= skb
->len
;
915 cl
->xstats
.borrows
+= skb
->len
;
918 q
->tx_len
= skb
->len
;
920 if (cl
->deficit
<= 0) {
921 q
->active
[prio
] = cl
;
923 cl
->deficit
+= cl
->quantum
;
928 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
929 /* Class is empty or penalized.
930 Unlink it from active chain.
932 cl_prev
->next_alive
= cl
->next_alive
;
933 cl
->next_alive
= NULL
;
935 /* Did cl_tail point to it? */
940 /* Was it the last class in this band? */
943 q
->active
[prio
] = NULL
;
944 q
->activemask
&= ~(1<<prio
);
946 cbq_activate_class(cl
);
950 q
->active
[prio
] = cl_tail
;
953 cbq_activate_class(cl
);
961 } while (cl_prev
!= cl_tail
);
964 q
->active
[prio
] = cl_prev
;
969 static __inline__
struct sk_buff
*
970 cbq_dequeue_1(struct Qdisc
*sch
)
972 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
976 activemask
= q
->activemask
&0xFF;
978 int prio
= ffz(~activemask
);
979 activemask
&= ~(1<<prio
);
980 skb
= cbq_dequeue_prio(sch
, prio
);
987 static struct sk_buff
*
988 cbq_dequeue(struct Qdisc
*sch
)
991 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
995 PSCHED_GET_TIME(now
);
996 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
999 psched_tdiff_t incr2
;
1000 /* Time integrator. We calculate EOS time
1001 by adding expected packet transmittion time.
1002 If real time is greater, we warp artificial clock,
1005 cbq_time = max(real_time, work);
1007 incr2
= L2T(&q
->link
, q
->tx_len
);
1008 PSCHED_TADD(q
->now
, incr2
);
1010 if ((incr
-= incr2
) < 0)
1013 PSCHED_TADD(q
->now
, incr
);
1019 skb
= cbq_dequeue_1(sch
);
1022 sch
->flags
&= ~TCQ_F_THROTTLED
;
1026 /* All the classes are overlimit.
1030 1. Scheduler is empty.
1031 2. Toplevel cutoff inhibited borrowing.
1032 3. Root class is overlimit.
1034 Reset 2d and 3d conditions and retry.
1036 Note, that NS and cbq-2.0 are buggy, peeking
1037 an arbitrary class is appropriate for ancestor-only
1038 sharing, but not for toplevel algorithm.
1040 Our version is better, but slower, because it requires
1041 two passes, but it is unavoidable with top-level sharing.
1044 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1045 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1048 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1049 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1052 /* No packets in scheduler or nobody wants to give them to us :-(
1053 Sigh... start watchdog timer in the last case. */
1056 sch
->stats
.overlimits
++;
1057 if (q
->wd_expires
) {
1058 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1061 mod_timer(&q
->wd_timer
, jiffies
+ delay
);
1062 sch
->flags
|= TCQ_F_THROTTLED
;
1068 /* CBQ class maintanance routines */
1070 static void cbq_adjust_levels(struct cbq_class
*this)
1077 struct cbq_class
*cl
;
1079 if ((cl
= this->children
) != NULL
) {
1081 if (cl
->level
> level
)
1083 } while ((cl
= cl
->sibling
) != this->children
);
1085 this->level
= level
+1;
1086 } while ((this = this->tparent
) != NULL
);
1089 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1091 struct cbq_class
*cl
;
1094 if (q
->quanta
[prio
] == 0)
1097 for (h
=0; h
<16; h
++) {
1098 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1099 /* BUGGGG... Beware! This expression suffer of
1100 arithmetic overflows!
1102 if (cl
->priority
== prio
) {
1103 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1106 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1107 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1108 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1114 static void cbq_sync_defmap(struct cbq_class
*cl
)
1116 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
1117 struct cbq_class
*split
= cl
->split
;
1124 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1125 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1126 split
->defaults
[i
] = NULL
;
1129 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1130 int level
= split
->level
;
1132 if (split
->defaults
[i
])
1135 for (h
=0; h
<16; h
++) {
1136 struct cbq_class
*c
;
1138 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1139 if (c
->split
== split
&& c
->level
< level
&&
1141 split
->defaults
[i
] = c
;
1149 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1151 struct cbq_class
*split
= NULL
;
1154 if ((split
= cl
->split
) == NULL
)
1156 splitid
= split
->classid
;
1159 if (split
== NULL
|| split
->classid
!= splitid
) {
1160 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1161 if (split
->classid
== splitid
)
1168 if (cl
->split
!= split
) {
1170 cbq_sync_defmap(cl
);
1172 cl
->defmap
= def
&mask
;
1174 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1176 cbq_sync_defmap(cl
);
1179 static void cbq_unlink_class(struct cbq_class
*this)
1181 struct cbq_class
*cl
, **clp
;
1182 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
1184 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1192 if (this->tparent
) {
1201 } while ((cl
= *clp
) != this->sibling
);
1203 if (this->tparent
->children
== this) {
1204 this->tparent
->children
= this->sibling
;
1205 if (this->sibling
== this)
1206 this->tparent
->children
= NULL
;
1209 BUG_TRAP(this->sibling
== this);
1213 static void cbq_link_class(struct cbq_class
*this)
1215 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
1216 unsigned h
= cbq_hash(this->classid
);
1217 struct cbq_class
*parent
= this->tparent
;
1219 this->sibling
= this;
1220 this->next
= q
->classes
[h
];
1221 q
->classes
[h
] = this;
1226 if (parent
->children
== NULL
) {
1227 parent
->children
= this;
1229 this->sibling
= parent
->children
->sibling
;
1230 parent
->children
->sibling
= this;
1234 static unsigned int cbq_drop(struct Qdisc
* sch
)
1236 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1237 struct cbq_class
*cl
, *cl_head
;
1241 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1242 if ((cl_head
= q
->active
[prio
]) == NULL
)
1247 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1251 } while ((cl
= cl
->next_alive
) != cl_head
);
1257 cbq_reset(struct Qdisc
* sch
)
1259 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1260 struct cbq_class
*cl
;
1267 q
->tx_borrowed
= NULL
;
1268 del_timer(&q
->wd_timer
);
1269 del_timer(&q
->delay_timer
);
1270 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1271 PSCHED_GET_TIME(q
->now
);
1274 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1275 q
->active
[prio
] = NULL
;
1277 for (h
= 0; h
< 16; h
++) {
1278 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1281 cl
->next_alive
= NULL
;
1282 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1283 cl
->avgidle
= cl
->maxidle
;
1284 cl
->deficit
= cl
->quantum
;
1285 cl
->cpriority
= cl
->priority
;
1292 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1294 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1295 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1296 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1298 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1299 cl
->ewma_log
= lss
->ewma_log
;
1300 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1301 cl
->avpkt
= lss
->avpkt
;
1302 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1303 cl
->minidle
= -(long)lss
->minidle
;
1304 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1305 cl
->maxidle
= lss
->maxidle
;
1306 cl
->avgidle
= lss
->maxidle
;
1308 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1309 cl
->offtime
= lss
->offtime
;
1313 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1315 q
->nclasses
[cl
->priority
]--;
1316 q
->quanta
[cl
->priority
] -= cl
->weight
;
1317 cbq_normalize_quanta(q
, cl
->priority
);
1320 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1322 q
->nclasses
[cl
->priority
]++;
1323 q
->quanta
[cl
->priority
] += cl
->weight
;
1324 cbq_normalize_quanta(q
, cl
->priority
);
1327 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1329 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
1332 cl
->allot
= wrr
->allot
;
1334 cl
->weight
= wrr
->weight
;
1335 if (wrr
->priority
) {
1336 cl
->priority
= wrr
->priority
-1;
1337 cl
->cpriority
= cl
->priority
;
1338 if (cl
->priority
>= cl
->priority2
)
1339 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1346 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1348 switch (ovl
->strategy
) {
1349 case TC_CBQ_OVL_CLASSIC
:
1350 cl
->overlimit
= cbq_ovl_classic
;
1352 case TC_CBQ_OVL_DELAY
:
1353 cl
->overlimit
= cbq_ovl_delay
;
1355 case TC_CBQ_OVL_LOWPRIO
:
1356 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1357 ovl
->priority2
-1 <= cl
->priority
)
1359 cl
->priority2
= ovl
->priority2
-1;
1360 cl
->overlimit
= cbq_ovl_lowprio
;
1362 case TC_CBQ_OVL_DROP
:
1363 cl
->overlimit
= cbq_ovl_drop
;
1365 case TC_CBQ_OVL_RCLASSIC
:
1366 cl
->overlimit
= cbq_ovl_rclassic
;
1371 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1375 #ifdef CONFIG_NET_CLS_POLICE
1376 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1378 cl
->police
= p
->police
;
1380 if (cl
->q
->handle
) {
1381 if (p
->police
== TC_POLICE_RECLASSIFY
)
1382 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1384 cl
->q
->reshape_fail
= NULL
;
1390 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1392 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1396 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1398 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1399 struct rtattr
*tb
[TCA_CBQ_MAX
];
1400 struct tc_ratespec
*r
;
1402 if (rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)) < 0 ||
1403 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1404 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1407 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1408 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1411 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1414 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
) {
1420 q
->link
.sibling
= &q
->link
;
1421 q
->link
.classid
= sch
->handle
;
1422 q
->link
.qdisc
= sch
;
1423 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1424 q
->link
.q
= &noop_qdisc
;
1426 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1427 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1428 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1429 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1430 q
->link
.overlimit
= cbq_ovl_classic
;
1431 q
->link
.allot
= psched_mtu(sch
->dev
);
1432 q
->link
.quantum
= q
->link
.allot
;
1433 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1435 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1436 q
->link
.avpkt
= q
->link
.allot
/2;
1437 q
->link
.minidle
= -0x7FFFFFFF;
1438 q
->link
.stats
.lock
= &sch
->dev
->queue_lock
;
1440 init_timer(&q
->wd_timer
);
1441 q
->wd_timer
.data
= (unsigned long)sch
;
1442 q
->wd_timer
.function
= cbq_watchdog
;
1443 init_timer(&q
->delay_timer
);
1444 q
->delay_timer
.data
= (unsigned long)sch
;
1445 q
->delay_timer
.function
= cbq_undelay
;
1446 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1447 PSCHED_GET_TIME(q
->now
);
1450 cbq_link_class(&q
->link
);
1452 if (tb
[TCA_CBQ_LSSOPT
-1])
1453 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1455 cbq_addprio(q
, &q
->link
);
1459 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1461 unsigned char *b
= skb
->tail
;
1463 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1467 skb_trim(skb
, b
- skb
->data
);
1471 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1473 unsigned char *b
= skb
->tail
;
1474 struct tc_cbq_lssopt opt
;
1477 if (cl
->borrow
== NULL
)
1478 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1479 if (cl
->share
== NULL
)
1480 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1481 opt
.ewma_log
= cl
->ewma_log
;
1482 opt
.level
= cl
->level
;
1483 opt
.avpkt
= cl
->avpkt
;
1484 opt
.maxidle
= cl
->maxidle
;
1485 opt
.minidle
= (u32
)(-cl
->minidle
);
1486 opt
.offtime
= cl
->offtime
;
1488 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1492 skb_trim(skb
, b
- skb
->data
);
1496 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1498 unsigned char *b
= skb
->tail
;
1499 struct tc_cbq_wrropt opt
;
1502 opt
.allot
= cl
->allot
;
1503 opt
.priority
= cl
->priority
+1;
1504 opt
.cpriority
= cl
->cpriority
+1;
1505 opt
.weight
= cl
->weight
;
1506 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1510 skb_trim(skb
, b
- skb
->data
);
1514 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1516 unsigned char *b
= skb
->tail
;
1517 struct tc_cbq_ovl opt
;
1519 opt
.strategy
= cl
->ovl_strategy
;
1520 opt
.priority2
= cl
->priority2
+1;
1522 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1523 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1527 skb_trim(skb
, b
- skb
->data
);
1531 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1533 unsigned char *b
= skb
->tail
;
1534 struct tc_cbq_fopt opt
;
1536 if (cl
->split
|| cl
->defmap
) {
1537 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1538 opt
.defmap
= cl
->defmap
;
1540 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1545 skb_trim(skb
, b
- skb
->data
);
1549 #ifdef CONFIG_NET_CLS_POLICE
1550 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1552 unsigned char *b
= skb
->tail
;
1553 struct tc_cbq_police opt
;
1556 opt
.police
= cl
->police
;
1559 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1564 skb_trim(skb
, b
- skb
->data
);
1569 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1571 if (cbq_dump_lss(skb
, cl
) < 0 ||
1572 cbq_dump_rate(skb
, cl
) < 0 ||
1573 cbq_dump_wrr(skb
, cl
) < 0 ||
1574 cbq_dump_ovl(skb
, cl
) < 0 ||
1575 #ifdef CONFIG_NET_CLS_POLICE
1576 cbq_dump_police(skb
, cl
) < 0 ||
1578 cbq_dump_fopt(skb
, cl
) < 0)
1583 int cbq_copy_xstats(struct sk_buff
*skb
, struct tc_cbq_xstats
*st
)
1585 RTA_PUT(skb
, TCA_XSTATS
, sizeof(*st
), st
);
1593 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1595 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1596 unsigned char *b
= skb
->tail
;
1599 rta
= (struct rtattr
*)b
;
1600 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1601 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1602 goto rtattr_failure
;
1603 rta
->rta_len
= skb
->tail
- b
;
1604 spin_lock_bh(&sch
->dev
->queue_lock
);
1605 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1606 if (cbq_copy_xstats(skb
, &q
->link
.xstats
)) {
1607 spin_unlock_bh(&sch
->dev
->queue_lock
);
1608 goto rtattr_failure
;
1610 spin_unlock_bh(&sch
->dev
->queue_lock
);
1614 skb_trim(skb
, b
- skb
->data
);
1619 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1620 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1622 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1623 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1624 unsigned char *b
= skb
->tail
;
1628 tcm
->tcm_parent
= cl
->tparent
->classid
;
1630 tcm
->tcm_parent
= TC_H_ROOT
;
1631 tcm
->tcm_handle
= cl
->classid
;
1632 tcm
->tcm_info
= cl
->q
->handle
;
1634 rta
= (struct rtattr
*)b
;
1635 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1636 if (cbq_dump_attr(skb
, cl
) < 0)
1637 goto rtattr_failure
;
1638 rta
->rta_len
= skb
->tail
- b
;
1639 cl
->stats
.qlen
= cl
->q
->q
.qlen
;
1640 if (qdisc_copy_stats(skb
, &cl
->stats
))
1641 goto rtattr_failure
;
1642 spin_lock_bh(&sch
->dev
->queue_lock
);
1643 cl
->xstats
.avgidle
= cl
->avgidle
;
1644 cl
->xstats
.undertime
= 0;
1645 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1646 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1647 if (cbq_copy_xstats(skb
, &cl
->xstats
)) {
1648 spin_unlock_bh(&sch
->dev
->queue_lock
);
1649 goto rtattr_failure
;
1651 spin_unlock_bh(&sch
->dev
->queue_lock
);
1656 skb_trim(skb
, b
- skb
->data
);
1660 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1663 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1667 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)) == NULL
)
1670 #ifdef CONFIG_NET_CLS_POLICE
1671 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1672 new->reshape_fail
= cbq_reshape_fail
;
1678 sch
->q
.qlen
-= (*old
)->q
.qlen
;
1680 sch_tree_unlock(sch
);
1687 static struct Qdisc
*
1688 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1690 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1692 return cl
? cl
->q
: NULL
;
1695 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1697 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1698 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1702 return (unsigned long)cl
;
1707 static void cbq_destroy_filters(struct cbq_class
*cl
)
1709 struct tcf_proto
*tp
;
1711 while ((tp
= cl
->filter_list
) != NULL
) {
1712 cl
->filter_list
= tp
->next
;
1717 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1719 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1721 BUG_TRAP(!cl
->filters
);
1723 cbq_destroy_filters(cl
);
1724 qdisc_destroy(cl
->q
);
1725 qdisc_put_rtab(cl
->R_tab
);
1726 #ifdef CONFIG_NET_ESTIMATOR
1727 qdisc_kill_estimator(&cl
->stats
);
1734 cbq_destroy(struct Qdisc
* sch
)
1736 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1737 struct cbq_class
*cl
;
1740 #ifdef CONFIG_NET_CLS_POLICE
1744 * Filters must be destroyed first because we don't destroy the
1745 * classes from root to leafs which means that filters can still
1746 * be bound to classes which have been destroyed already. --TGR '04
1748 for (h
= 0; h
< 16; h
++)
1749 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1750 cbq_destroy_filters(cl
);
1752 for (h
= 0; h
< 16; h
++) {
1753 struct cbq_class
*next
;
1755 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1757 cbq_destroy_class(sch
, cl
);
1764 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1766 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1768 if (--cl
->refcnt
== 0) {
1769 #ifdef CONFIG_NET_CLS_POLICE
1770 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1772 spin_lock_bh(&sch
->dev
->queue_lock
);
1773 if (q
->rx_class
== cl
)
1775 spin_unlock_bh(&sch
->dev
->queue_lock
);
1778 cbq_destroy_class(sch
, cl
);
1783 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1787 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1788 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1789 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1790 struct rtattr
*tb
[TCA_CBQ_MAX
];
1791 struct cbq_class
*parent
;
1792 struct qdisc_rate_table
*rtab
= NULL
;
1795 rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)))
1798 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1799 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1802 if (tb
[TCA_CBQ_FOPT
-1] &&
1803 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1806 if (tb
[TCA_CBQ_RATE
-1] &&
1807 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1810 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1811 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1814 if (tb
[TCA_CBQ_WRROPT
-1] &&
1815 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1818 #ifdef CONFIG_NET_CLS_POLICE
1819 if (tb
[TCA_CBQ_POLICE
-1] &&
1820 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1827 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1829 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1833 if (tb
[TCA_CBQ_RATE
-1]) {
1834 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1839 /* Change class parameters */
1842 if (cl
->next_alive
!= NULL
)
1843 cbq_deactivate_class(cl
);
1846 rtab
= xchg(&cl
->R_tab
, rtab
);
1847 qdisc_put_rtab(rtab
);
1850 if (tb
[TCA_CBQ_LSSOPT
-1])
1851 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1853 if (tb
[TCA_CBQ_WRROPT
-1]) {
1855 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1858 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1859 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1861 #ifdef CONFIG_NET_CLS_POLICE
1862 if (tb
[TCA_CBQ_POLICE
-1])
1863 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1866 if (tb
[TCA_CBQ_FOPT
-1])
1867 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1870 cbq_activate_class(cl
);
1872 sch_tree_unlock(sch
);
1874 #ifdef CONFIG_NET_ESTIMATOR
1875 if (tca
[TCA_RATE
-1]) {
1876 qdisc_kill_estimator(&cl
->stats
);
1877 qdisc_new_estimator(&cl
->stats
, tca
[TCA_RATE
-1]);
1883 if (parentid
== TC_H_ROOT
)
1886 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1887 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1890 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1896 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1900 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1902 for (i
=0; i
<0x8000; i
++) {
1903 if (++q
->hgenerator
>= 0x8000)
1905 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1911 classid
= classid
|q
->hgenerator
;
1916 parent
= cbq_class_lookup(q
, parentid
);
1923 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
1926 memset(cl
, 0, sizeof(*cl
));
1930 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1931 cl
->q
= &noop_qdisc
;
1932 cl
->classid
= classid
;
1933 cl
->tparent
= parent
;
1935 cl
->allot
= parent
->allot
;
1936 cl
->quantum
= cl
->allot
;
1937 cl
->weight
= cl
->R_tab
->rate
.rate
;
1938 cl
->stats
.lock
= &sch
->dev
->queue_lock
;
1942 cl
->borrow
= cl
->tparent
;
1943 if (cl
->tparent
!= &q
->link
)
1944 cl
->share
= cl
->tparent
;
1945 cbq_adjust_levels(parent
);
1946 cl
->minidle
= -0x7FFFFFFF;
1947 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1948 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1949 if (cl
->ewma_log
==0)
1950 cl
->ewma_log
= q
->link
.ewma_log
;
1952 cl
->maxidle
= q
->link
.maxidle
;
1954 cl
->avpkt
= q
->link
.avpkt
;
1955 cl
->overlimit
= cbq_ovl_classic
;
1956 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1957 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1958 #ifdef CONFIG_NET_CLS_POLICE
1959 if (tb
[TCA_CBQ_POLICE
-1])
1960 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1962 if (tb
[TCA_CBQ_FOPT
-1])
1963 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1964 sch_tree_unlock(sch
);
1966 #ifdef CONFIG_NET_ESTIMATOR
1967 if (tca
[TCA_RATE
-1])
1968 qdisc_new_estimator(&cl
->stats
, tca
[TCA_RATE
-1]);
1971 *arg
= (unsigned long)cl
;
1975 qdisc_put_rtab(rtab
);
1979 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1981 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1982 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1984 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1990 cbq_deactivate_class(cl
);
1992 if (q
->tx_borrowed
== cl
)
1993 q
->tx_borrowed
= q
->tx_class
;
1994 if (q
->tx_class
== cl
) {
1996 q
->tx_borrowed
= NULL
;
1998 #ifdef CONFIG_NET_CLS_POLICE
1999 if (q
->rx_class
== cl
)
2003 cbq_unlink_class(cl
);
2004 cbq_adjust_levels(cl
->tparent
);
2006 cbq_sync_defmap(cl
);
2009 sch_tree_unlock(sch
);
2011 if (--cl
->refcnt
== 0)
2012 cbq_destroy_class(sch
, cl
);
2017 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2019 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2020 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2025 return &cl
->filter_list
;
2028 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2031 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2032 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2033 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2036 if (p
&& p
->level
<= cl
->level
)
2039 return (unsigned long)cl
;
2044 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2046 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2051 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2053 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2059 for (h
= 0; h
< 16; h
++) {
2060 struct cbq_class
*cl
;
2062 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2063 if (arg
->count
< arg
->skip
) {
2067 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2076 static struct Qdisc_class_ops cbq_class_ops
=
2093 struct Qdisc_ops cbq_qdisc_ops
=
2098 sizeof(struct cbq_sched_data
),
2108 NULL
/* cbq_change */,
2114 int init_module(void)
2116 return register_qdisc(&cbq_qdisc_ops
);
2119 void cleanup_module(void)
2121 unregister_qdisc(&cbq_qdisc_ops
);
2124 MODULE_LICENSE("GPL");