2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data
;
93 struct cbq_class
*next
; /* hash table link */
94 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
98 unsigned char priority
; /* class priority */
99 unsigned char priority2
; /* priority to be used after overlimit */
100 unsigned char ewma_log
; /* time constant for idle time calculation */
101 unsigned char ovl_strategy
;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police
;
108 /* Link-sharing scheduler parameters */
109 long maxidle
; /* Class paramters: see below. */
113 struct qdisc_rate_table
*R_tab
;
115 /* Overlimit strategy parameters */
116 void (*overlimit
)(struct cbq_class
*cl
);
119 /* General scheduler (WRR) parameters */
121 long quantum
; /* Allotment per WRR round */
122 long weight
; /* Relative allotment: see below */
124 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
125 struct cbq_class
*split
; /* Ptr to split node */
126 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
127 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
128 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
130 struct cbq_class
*sibling
; /* Sibling chain */
131 struct cbq_class
*children
; /* Pointer to children chain */
133 struct Qdisc
*q
; /* Elementary queueing discipline */
137 unsigned char cpriority
; /* Effective priority */
138 unsigned char delayed
;
139 unsigned char level
; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last
; /* Last end of service */
145 psched_time_t undertime
;
147 long deficit
; /* Saved deficit for WRR */
148 unsigned long penalized
;
149 struct tc_stats stats
;
150 struct tc_cbq_xstats xstats
;
152 struct tcf_proto
*filter_list
;
157 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
160 struct cbq_sched_data
162 struct cbq_class
*classes
[16]; /* Hash table of all classes */
163 int nclasses
[TC_CBQ_MAXPRIO
+1];
164 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
166 struct cbq_class link
;
169 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
172 #ifdef CONFIG_NET_CLS_POLICE
173 struct cbq_class
*rx_class
;
175 struct cbq_class
*tx_class
;
176 struct cbq_class
*tx_borrowed
;
178 psched_time_t now
; /* Cached timestamp */
179 psched_time_t now_rt
; /* Cached real time */
182 struct timer_list delay_timer
;
183 struct timer_list wd_timer
; /* Watchdog timer,
193 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196 static __inline__
unsigned cbq_hash(u32 h
)
203 static __inline__
struct cbq_class
*
204 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
206 struct cbq_class
*cl
;
208 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
209 if (cl
->classid
== classid
)
214 #ifdef CONFIG_NET_CLS_POLICE
216 static struct cbq_class
*
217 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
219 struct cbq_class
*cl
, *new;
221 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
222 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
230 /* Classify packet. The procedure is pretty complicated, but
231 it allows us to combine link sharing and priority scheduling
234 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
235 so that it resolves to split nodes. Then packets are classified
236 by logical priority, or a more specific classifier may be attached
240 static struct cbq_class
*
241 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
)
243 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
244 struct cbq_class
*head
= &q
->link
;
245 struct cbq_class
**defmap
;
246 struct cbq_class
*cl
= NULL
;
247 u32 prio
= skb
->priority
;
248 struct tcf_result res
;
251 * Step 1. If skb->priority points to one of our classes, use it.
253 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
254 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
260 defmap
= head
->defaults
;
263 * Step 2+n. Apply classifier.
265 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
268 if ((cl
= (void*)res
.class) == NULL
) {
269 if (TC_H_MAJ(res
.classid
))
270 cl
= cbq_class_lookup(q
, res
.classid
);
271 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
272 cl
= defmap
[TC_PRIO_BESTEFFORT
];
274 if (cl
== NULL
|| cl
->level
>= head
->level
)
278 #ifdef CONFIG_NET_CLS_POLICE
280 case TC_POLICE_RECLASSIFY
:
281 return cbq_reclassify(skb
, cl
);
291 * Step 3+n. If classifier selected a link sharing class,
292 * apply agency specific classifier.
293 * Repeat this procdure until we hit a leaf node.
302 * Step 4. No success...
304 if (TC_H_MAJ(prio
) == 0 &&
305 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
306 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
313 A packet has just been enqueued on the empty class.
314 cbq_activate_class adds it to the tail of active class list
315 of its priority band.
318 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
320 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
321 int prio
= cl
->cpriority
;
322 struct cbq_class
*cl_tail
;
324 cl_tail
= q
->active
[prio
];
325 q
->active
[prio
] = cl
;
327 if (cl_tail
!= NULL
) {
328 cl
->next_alive
= cl_tail
->next_alive
;
329 cl_tail
->next_alive
= cl
;
332 q
->activemask
|= (1<<prio
);
337 Unlink class from active chain.
338 Note that this same procedure is done directly in cbq_dequeue*
339 during round-robin procedure.
342 static void cbq_deactivate_class(struct cbq_class
*this)
344 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
345 int prio
= this->cpriority
;
346 struct cbq_class
*cl
;
347 struct cbq_class
*cl_prev
= q
->active
[prio
];
350 cl
= cl_prev
->next_alive
;
352 cl_prev
->next_alive
= cl
->next_alive
;
353 cl
->next_alive
= NULL
;
355 if (cl
== q
->active
[prio
]) {
356 q
->active
[prio
] = cl_prev
;
357 if (cl
== q
->active
[prio
]) {
358 q
->active
[prio
] = NULL
;
359 q
->activemask
&= ~(1<<prio
);
364 cl
= cl_prev
->next_alive
;
367 } while ((cl_prev
= cl
) != q
->active
[prio
]);
371 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
373 int toplevel
= q
->toplevel
;
375 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
379 PSCHED_GET_TIME(now
);
380 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
381 PSCHED_TADD2(q
->now
, incr
, now
);
384 if (PSCHED_TLESS(cl
->undertime
, now
)) {
385 q
->toplevel
= cl
->level
;
388 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
393 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
395 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
396 struct cbq_class
*cl
= cbq_classify(skb
, sch
);
398 int ret
= NET_XMIT_POLICED
;
400 #ifdef CONFIG_NET_CLS_POLICE
404 #ifdef CONFIG_NET_CLS_POLICE
405 cl
->q
->__parent
= sch
;
407 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == 0) {
409 sch
->stats
.packets
++;
410 sch
->stats
.bytes
+=len
;
411 cbq_mark_toplevel(q
, cl
);
413 cbq_activate_class(cl
);
422 cbq_mark_toplevel(q
, cl
);
429 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
431 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
432 struct cbq_class
*cl
;
435 if ((cl
= q
->tx_class
) == NULL
) {
442 cbq_mark_toplevel(q
, cl
);
444 #ifdef CONFIG_NET_CLS_POLICE
446 cl
->q
->__parent
= sch
;
448 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
451 cbq_activate_class(cl
);
459 /* Overlimit actions */
461 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
463 static void cbq_ovl_classic(struct cbq_class
*cl
)
465 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
466 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
469 delay
+= cl
->offtime
;
472 Class goes to sleep, so that it will have no
473 chance to work avgidle. Let's forgive it 8)
475 BTW cbq-2.0 has a crap in this
476 place, apparently they forgot to shift it by cl->ewma_log.
479 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
480 if (cl
->avgidle
< cl
->minidle
)
481 cl
->avgidle
= cl
->minidle
;
484 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
486 cl
->xstats
.overactions
++;
489 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
490 q
->wd_expires
= delay
;
492 /* Dirty work! We must schedule wakeups based on
493 real available rate, rather than leaf rate,
494 which may be tiny (even zero).
496 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
498 psched_tdiff_t base_delay
= q
->wd_expires
;
500 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
501 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
502 if (delay
< base_delay
) {
509 q
->wd_expires
= delay
;
513 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
517 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
519 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
520 struct cbq_class
*this = cl
;
523 if (cl
->level
> q
->toplevel
) {
527 } while ((cl
= cl
->borrow
) != NULL
);
534 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
536 static void cbq_ovl_delay(struct cbq_class
*cl
)
538 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
539 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
542 unsigned long sched
= jiffies
;
544 delay
+= cl
->offtime
;
546 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
547 if (cl
->avgidle
< cl
->minidle
)
548 cl
->avgidle
= cl
->minidle
;
549 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
552 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
553 cl
->penalized
= sched
;
554 cl
->cpriority
= TC_CBQ_MAXPRIO
;
555 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
556 if (del_timer(&q
->delay_timer
) &&
557 (long)(q
->delay_timer
.expires
- sched
) > 0)
558 q
->delay_timer
.expires
= sched
;
559 add_timer(&q
->delay_timer
);
561 cl
->xstats
.overactions
++;
566 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
567 q
->wd_expires
= delay
;
570 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
572 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
574 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
576 cl
->penalized
= jiffies
+ cl
->penalty
;
578 if (cl
->cpriority
!= cl
->priority2
) {
579 cl
->cpriority
= cl
->priority2
;
580 q
->pmask
|= (1<<cl
->cpriority
);
581 cl
->xstats
.overactions
++;
586 /* TC_CBQ_OVL_DROP: penalize class by dropping */
588 static void cbq_ovl_drop(struct cbq_class
*cl
)
590 if (cl
->q
->ops
->drop
)
591 if (cl
->q
->ops
->drop(cl
->q
))
593 cl
->xstats
.overactions
++;
597 static void cbq_watchdog(unsigned long arg
)
599 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
601 sch
->flags
&= ~TCQ_F_THROTTLED
;
602 netif_schedule(sch
->dev
);
605 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
607 struct cbq_class
*cl
;
608 struct cbq_class
*cl_prev
= q
->active
[prio
];
609 unsigned long now
= jiffies
;
610 unsigned long sched
= now
;
616 cl
= cl_prev
->next_alive
;
617 if ((long)(now
- cl
->penalized
) > 0) {
618 cl_prev
->next_alive
= cl
->next_alive
;
619 cl
->next_alive
= NULL
;
620 cl
->cpriority
= cl
->priority
;
622 cbq_activate_class(cl
);
624 if (cl
== q
->active
[prio
]) {
625 q
->active
[prio
] = cl_prev
;
626 if (cl
== q
->active
[prio
]) {
627 q
->active
[prio
] = NULL
;
632 cl
= cl_prev
->next_alive
;
633 } else if ((long)(sched
- cl
->penalized
) > 0)
634 sched
= cl
->penalized
;
635 } while ((cl_prev
= cl
) != q
->active
[prio
]);
637 return (long)(sched
- now
);
640 static void cbq_undelay(unsigned long arg
)
642 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
643 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
651 int prio
= ffz(~pmask
);
656 tmp
= cbq_undelay_prio(q
, prio
);
659 if (tmp
< delay
|| delay
== 0)
665 q
->delay_timer
.expires
= jiffies
+ delay
;
666 add_timer(&q
->delay_timer
);
669 sch
->flags
&= ~TCQ_F_THROTTLED
;
670 netif_schedule(sch
->dev
);
674 #ifdef CONFIG_NET_CLS_POLICE
676 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
679 struct Qdisc
*sch
= child
->__parent
;
680 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
681 struct cbq_class
*cl
= q
->rx_class
;
685 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
687 cbq_mark_toplevel(q
, cl
);
690 cl
->q
->__parent
= sch
;
692 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
694 sch
->stats
.packets
++;
695 sch
->stats
.bytes
+=len
;
697 cbq_activate_class(cl
);
710 It is mission critical procedure.
712 We "regenerate" toplevel cutoff, if transmitting class
713 has backlog and it is not regulated. It is not part of
714 original CBQ description, but looks more reasonable.
715 Probably, it is wrong. This question needs further investigation.
718 static __inline__
void
719 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
720 struct cbq_class
*borrowed
)
722 if (cl
&& q
->toplevel
>= borrowed
->level
) {
723 if (cl
->q
->q
.qlen
> 1) {
725 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
726 q
->toplevel
= borrowed
->level
;
729 } while ((borrowed
=borrowed
->borrow
) != NULL
);
732 /* It is not necessary now. Uncommenting it
733 will save CPU cycles, but decrease fairness.
735 q
->toplevel
= TC_CBQ_MAXLEVEL
;
741 cbq_update(struct cbq_sched_data
*q
)
743 struct cbq_class
*this = q
->tx_class
;
744 struct cbq_class
*cl
= this;
749 for ( ; cl
; cl
= cl
->share
) {
750 long avgidle
= cl
->avgidle
;
754 cl
->stats
.bytes
+= len
;
757 (now - last) is total time between packet right edges.
758 (last_pktlen/rate) is "virtual" busy time, so that
760 idle = (now - last) - last_pktlen/rate
763 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
764 if ((unsigned long)idle
> 128*1024*1024) {
765 avgidle
= cl
->maxidle
;
767 idle
-= L2T(cl
, len
);
769 /* true_avgidle := (1-W)*true_avgidle + W*idle,
770 where W=2^{-ewma_log}. But cl->avgidle is scaled:
771 cl->avgidle == true_avgidle/W,
774 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
778 /* Overlimit or at-limit */
780 if (avgidle
< cl
->minidle
)
781 avgidle
= cl
->minidle
;
783 cl
->avgidle
= avgidle
;
785 /* Calculate expected time, when this class
786 will be allowed to send.
788 (1-W)*true_avgidle + W*delay = 0, i.e.
789 idle = (1/W - 1)*(-true_avgidle)
791 idle = (1 - W)*(-cl->avgidle);
793 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
797 To maintain the rate allocated to the class,
798 we add to undertime virtual clock,
799 necesary to complete transmitted packet.
800 (len/phys_bandwidth has been already passed
801 to the moment of cbq_update)
804 idle
-= L2T(&q
->link
, len
);
805 idle
+= L2T(cl
, len
);
807 PSCHED_AUDIT_TDIFF(idle
);
809 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
813 PSCHED_SET_PASTPERFECT(cl
->undertime
);
814 if (avgidle
> cl
->maxidle
)
815 cl
->avgidle
= cl
->maxidle
;
817 cl
->avgidle
= avgidle
;
822 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
825 static __inline__
struct cbq_class
*
826 cbq_under_limit(struct cbq_class
*cl
)
828 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
829 struct cbq_class
*this_cl
= cl
;
831 if (cl
->tparent
== NULL
)
834 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
835 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
841 /* It is very suspicious place. Now overlimit
842 action is generated for not bounded classes
843 only if link is completely congested.
844 Though it is in agree with ancestor-only paradigm,
845 it looks very stupid. Particularly,
846 it means that this chunk of code will either
847 never be called or result in strong amplification
848 of burstiness. Dangerous, silly, and, however,
849 no another solution exists.
851 if ((cl
= cl
->borrow
) == NULL
) {
852 this_cl
->stats
.overlimits
++;
853 this_cl
->overlimit(this_cl
);
856 if (cl
->level
> q
->toplevel
)
858 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
859 PSCHED_TLESS(q
->now
, cl
->undertime
));
865 static __inline__
struct sk_buff
*
866 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
868 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
869 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
873 cl_tail
= cl_prev
= q
->active
[prio
];
874 cl
= cl_prev
->next_alive
;
881 struct cbq_class
*borrow
= cl
;
884 (borrow
= cbq_under_limit(cl
)) == NULL
)
887 if (cl
->deficit
<= 0) {
888 /* Class exhausted its allotment per
889 this round. Switch to the next one.
892 cl
->deficit
+= cl
->quantum
;
896 skb
= cl
->q
->dequeue(cl
->q
);
898 /* Class did not give us any skb :-(
899 It could occur even if cl->q->q.qlen != 0
900 f.e. if cl->q == "tbf"
905 cl
->deficit
-= skb
->len
;
907 q
->tx_borrowed
= borrow
;
909 #ifndef CBQ_XSTATS_BORROWS_BYTES
910 borrow
->xstats
.borrows
++;
911 cl
->xstats
.borrows
++;
913 borrow
->xstats
.borrows
+= skb
->len
;
914 cl
->xstats
.borrows
+= skb
->len
;
917 q
->tx_len
= skb
->len
;
919 if (cl
->deficit
<= 0) {
920 q
->active
[prio
] = cl
;
922 cl
->deficit
+= cl
->quantum
;
927 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
928 /* Class is empty or penalized.
929 Unlink it from active chain.
931 cl_prev
->next_alive
= cl
->next_alive
;
932 cl
->next_alive
= NULL
;
934 /* Did cl_tail point to it? */
939 /* Was it the last class in this band? */
942 q
->active
[prio
] = NULL
;
943 q
->activemask
&= ~(1<<prio
);
945 cbq_activate_class(cl
);
949 q
->active
[prio
] = cl_tail
;
952 cbq_activate_class(cl
);
960 } while (cl_prev
!= cl_tail
);
963 q
->active
[prio
] = cl_prev
;
968 static __inline__
struct sk_buff
*
969 cbq_dequeue_1(struct Qdisc
*sch
)
971 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
975 activemask
= q
->activemask
&0xFF;
977 int prio
= ffz(~activemask
);
978 activemask
&= ~(1<<prio
);
979 skb
= cbq_dequeue_prio(sch
, prio
);
986 static struct sk_buff
*
987 cbq_dequeue(struct Qdisc
*sch
)
990 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
994 PSCHED_GET_TIME(now
);
995 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
998 psched_tdiff_t incr2
;
999 /* Time integrator. We calculate EOS time
1000 by adding expected packet transmittion time.
1001 If real time is greater, we warp artificial clock,
1004 cbq_time = max(real_time, work);
1006 incr2
= L2T(&q
->link
, q
->tx_len
);
1007 PSCHED_TADD(q
->now
, incr2
);
1009 if ((incr
-= incr2
) < 0)
1012 PSCHED_TADD(q
->now
, incr
);
1018 skb
= cbq_dequeue_1(sch
);
1021 sch
->flags
&= ~TCQ_F_THROTTLED
;
1025 /* All the classes are overlimit.
1029 1. Scheduler is empty.
1030 2. Toplevel cutoff inhibited borrowing.
1031 3. Root class is overlimit.
1033 Reset 2d and 3d conditions and retry.
1035 Note, that NS and cbq-2.0 are buggy, peeking
1036 an arbitrary class is appropriate for ancestor-only
1037 sharing, but not for toplevel algorithm.
1039 Our version is better, but slower, because it requires
1040 two passes, but it is unavoidable with top-level sharing.
1043 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1044 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1047 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1048 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1051 /* No packets in scheduler or nobody wants to give them to us :-(
1052 Sigh... start watchdog timer in the last case. */
1055 sch
->stats
.overlimits
++;
1056 if (q
->wd_expires
&& !netif_queue_stopped(sch
->dev
)) {
1057 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1058 del_timer(&q
->wd_timer
);
1061 q
->wd_timer
.expires
= jiffies
+ delay
;
1062 add_timer(&q
->wd_timer
);
1063 sch
->flags
|= TCQ_F_THROTTLED
;
1069 /* CBQ class maintanance routines */
1071 static void cbq_adjust_levels(struct cbq_class
*this)
1078 struct cbq_class
*cl
;
1080 if ((cl
= this->children
) != NULL
) {
1082 if (cl
->level
> level
)
1084 } while ((cl
= cl
->sibling
) != this->children
);
1086 this->level
= level
+1;
1087 } while ((this = this->tparent
) != NULL
);
1090 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1092 struct cbq_class
*cl
;
1095 if (q
->quanta
[prio
] == 0)
1098 for (h
=0; h
<16; h
++) {
1099 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1100 /* BUGGGG... Beware! This expression suffer of
1101 arithmetic overflows!
1103 if (cl
->priority
== prio
) {
1104 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1107 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1108 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1109 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1115 static void cbq_sync_defmap(struct cbq_class
*cl
)
1117 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
1118 struct cbq_class
*split
= cl
->split
;
1125 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1126 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1127 split
->defaults
[i
] = NULL
;
1130 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1131 int level
= split
->level
;
1133 if (split
->defaults
[i
])
1136 for (h
=0; h
<16; h
++) {
1137 struct cbq_class
*c
;
1139 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1140 if (c
->split
== split
&& c
->level
< level
&&
1142 split
->defaults
[i
] = c
;
1150 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1152 struct cbq_class
*split
= NULL
;
1155 if ((split
= cl
->split
) == NULL
)
1157 splitid
= split
->classid
;
1160 if (split
== NULL
|| split
->classid
!= splitid
) {
1161 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1162 if (split
->classid
== splitid
)
1169 if (cl
->split
!= split
) {
1171 cbq_sync_defmap(cl
);
1173 cl
->defmap
= def
&mask
;
1175 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1177 cbq_sync_defmap(cl
);
1180 static void cbq_unlink_class(struct cbq_class
*this)
1182 struct cbq_class
*cl
, **clp
;
1183 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
1185 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1193 if (this->tparent
) {
1202 } while ((cl
= *clp
) != this->sibling
);
1204 if (this->tparent
->children
== this) {
1205 this->tparent
->children
= this->sibling
;
1206 if (this->sibling
== this)
1207 this->tparent
->children
= NULL
;
1210 BUG_TRAP(this->sibling
== this);
1214 static void cbq_link_class(struct cbq_class
*this)
1216 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
1217 unsigned h
= cbq_hash(this->classid
);
1218 struct cbq_class
*parent
= this->tparent
;
1220 this->sibling
= this;
1221 this->next
= q
->classes
[h
];
1222 q
->classes
[h
] = this;
1227 if (parent
->children
== NULL
) {
1228 parent
->children
= this;
1230 this->sibling
= parent
->children
->sibling
;
1231 parent
->children
->sibling
= this;
1235 static int cbq_drop(struct Qdisc
* sch
)
1237 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1238 struct cbq_class
*cl
, *cl_head
;
1241 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1242 if ((cl_head
= q
->active
[prio
]) == NULL
)
1247 if (cl
->q
->ops
->drop
&& cl
->q
->ops
->drop(cl
->q
))
1249 } while ((cl
= cl
->next_alive
) != cl_head
);
1255 cbq_reset(struct Qdisc
* sch
)
1257 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1258 struct cbq_class
*cl
;
1265 q
->tx_borrowed
= NULL
;
1266 del_timer(&q
->wd_timer
);
1267 del_timer(&q
->delay_timer
);
1268 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1269 PSCHED_GET_TIME(q
->now
);
1272 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1273 q
->active
[prio
] = NULL
;
1275 for (h
= 0; h
< 16; h
++) {
1276 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1279 cl
->next_alive
= NULL
;
1280 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1281 cl
->avgidle
= cl
->maxidle
;
1282 cl
->deficit
= cl
->quantum
;
1283 cl
->cpriority
= cl
->priority
;
1290 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1292 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1293 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1294 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1296 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1297 cl
->ewma_log
= lss
->ewma_log
;
1298 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1299 cl
->avpkt
= lss
->avpkt
;
1300 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1301 cl
->minidle
= -(long)lss
->minidle
;
1302 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1303 cl
->maxidle
= lss
->maxidle
;
1304 cl
->avgidle
= lss
->maxidle
;
1306 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1307 cl
->offtime
= lss
->offtime
;
1311 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1313 q
->nclasses
[cl
->priority
]--;
1314 q
->quanta
[cl
->priority
] -= cl
->weight
;
1315 cbq_normalize_quanta(q
, cl
->priority
);
1318 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1320 q
->nclasses
[cl
->priority
]++;
1321 q
->quanta
[cl
->priority
] += cl
->weight
;
1322 cbq_normalize_quanta(q
, cl
->priority
);
1325 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1327 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
1330 cl
->allot
= wrr
->allot
;
1332 cl
->weight
= wrr
->weight
;
1333 if (wrr
->priority
) {
1334 cl
->priority
= wrr
->priority
-1;
1335 cl
->cpriority
= cl
->priority
;
1336 if (cl
->priority
>= cl
->priority2
)
1337 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1344 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1346 switch (ovl
->strategy
) {
1347 case TC_CBQ_OVL_CLASSIC
:
1348 cl
->overlimit
= cbq_ovl_classic
;
1350 case TC_CBQ_OVL_DELAY
:
1351 cl
->overlimit
= cbq_ovl_delay
;
1353 case TC_CBQ_OVL_LOWPRIO
:
1354 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1355 ovl
->priority2
-1 <= cl
->priority
)
1357 cl
->priority2
= ovl
->priority2
-1;
1358 cl
->overlimit
= cbq_ovl_lowprio
;
1360 case TC_CBQ_OVL_DROP
:
1361 cl
->overlimit
= cbq_ovl_drop
;
1363 case TC_CBQ_OVL_RCLASSIC
:
1364 cl
->overlimit
= cbq_ovl_rclassic
;
1369 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1373 #ifdef CONFIG_NET_CLS_POLICE
1374 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1376 cl
->police
= p
->police
;
1378 if (cl
->q
->handle
) {
1379 if (p
->police
== TC_POLICE_RECLASSIFY
)
1380 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1382 cl
->q
->reshape_fail
= NULL
;
1388 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1390 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1394 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1396 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1397 struct rtattr
*tb
[TCA_CBQ_MAX
];
1398 struct tc_ratespec
*r
;
1400 if (rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)) < 0 ||
1401 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1402 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1405 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1406 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1409 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1412 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
) {
1418 q
->link
.sibling
= &q
->link
;
1419 q
->link
.classid
= sch
->handle
;
1420 q
->link
.qdisc
= sch
;
1421 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1422 q
->link
.q
= &noop_qdisc
;
1424 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1425 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1426 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1427 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1428 q
->link
.overlimit
= cbq_ovl_classic
;
1429 q
->link
.allot
= psched_mtu(sch
->dev
);
1430 q
->link
.quantum
= q
->link
.allot
;
1431 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1433 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1434 q
->link
.avpkt
= q
->link
.allot
/2;
1435 q
->link
.minidle
= -0x7FFFFFFF;
1436 q
->link
.stats
.lock
= &sch
->dev
->queue_lock
;
1438 init_timer(&q
->wd_timer
);
1439 q
->wd_timer
.data
= (unsigned long)sch
;
1440 q
->wd_timer
.function
= cbq_watchdog
;
1441 init_timer(&q
->delay_timer
);
1442 q
->delay_timer
.data
= (unsigned long)sch
;
1443 q
->delay_timer
.function
= cbq_undelay
;
1444 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1445 PSCHED_GET_TIME(q
->now
);
1448 cbq_link_class(&q
->link
);
1450 if (tb
[TCA_CBQ_LSSOPT
-1])
1451 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1453 cbq_addprio(q
, &q
->link
);
1457 #ifdef CONFIG_RTNETLINK
1459 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1461 unsigned char *b
= skb
->tail
;
1463 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1467 skb_trim(skb
, b
- skb
->data
);
1471 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1473 unsigned char *b
= skb
->tail
;
1474 struct tc_cbq_lssopt opt
;
1477 if (cl
->borrow
== NULL
)
1478 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1479 if (cl
->share
== NULL
)
1480 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1481 opt
.ewma_log
= cl
->ewma_log
;
1482 opt
.level
= cl
->level
;
1483 opt
.avpkt
= cl
->avpkt
;
1484 opt
.maxidle
= cl
->maxidle
;
1485 opt
.minidle
= (u32
)(-cl
->minidle
);
1486 opt
.offtime
= cl
->offtime
;
1488 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1492 skb_trim(skb
, b
- skb
->data
);
1496 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1498 unsigned char *b
= skb
->tail
;
1499 struct tc_cbq_wrropt opt
;
1502 opt
.allot
= cl
->allot
;
1503 opt
.priority
= cl
->priority
+1;
1504 opt
.cpriority
= cl
->cpriority
+1;
1505 opt
.weight
= cl
->weight
;
1506 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1510 skb_trim(skb
, b
- skb
->data
);
1514 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1516 unsigned char *b
= skb
->tail
;
1517 struct tc_cbq_ovl opt
;
1519 opt
.strategy
= cl
->ovl_strategy
;
1520 opt
.priority2
= cl
->priority2
+1;
1521 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1522 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1526 skb_trim(skb
, b
- skb
->data
);
1530 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1532 unsigned char *b
= skb
->tail
;
1533 struct tc_cbq_fopt opt
;
1535 if (cl
->split
|| cl
->defmap
) {
1536 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1537 opt
.defmap
= cl
->defmap
;
1539 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1544 skb_trim(skb
, b
- skb
->data
);
1548 #ifdef CONFIG_NET_CLS_POLICE
1549 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1551 unsigned char *b
= skb
->tail
;
1552 struct tc_cbq_police opt
;
1555 opt
.police
= cl
->police
;
1556 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1561 skb_trim(skb
, b
- skb
->data
);
1566 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1568 if (cbq_dump_lss(skb
, cl
) < 0 ||
1569 cbq_dump_rate(skb
, cl
) < 0 ||
1570 cbq_dump_wrr(skb
, cl
) < 0 ||
1571 cbq_dump_ovl(skb
, cl
) < 0 ||
1572 #ifdef CONFIG_NET_CLS_POLICE
1573 cbq_dump_police(skb
, cl
) < 0 ||
1575 cbq_dump_fopt(skb
, cl
) < 0)
1580 int cbq_copy_xstats(struct sk_buff
*skb
, struct tc_cbq_xstats
*st
)
1582 RTA_PUT(skb
, TCA_XSTATS
, sizeof(*st
), st
);
1590 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1592 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1593 unsigned char *b
= skb
->tail
;
1596 rta
= (struct rtattr
*)b
;
1597 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1598 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1599 goto rtattr_failure
;
1600 rta
->rta_len
= skb
->tail
- b
;
1601 spin_lock_bh(&sch
->dev
->queue_lock
);
1602 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1603 if (cbq_copy_xstats(skb
, &q
->link
.xstats
)) {
1604 spin_unlock_bh(&sch
->dev
->queue_lock
);
1605 goto rtattr_failure
;
1607 spin_unlock_bh(&sch
->dev
->queue_lock
);
1611 skb_trim(skb
, b
- skb
->data
);
1616 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1617 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1619 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1620 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1621 unsigned char *b
= skb
->tail
;
1625 tcm
->tcm_parent
= cl
->tparent
->classid
;
1627 tcm
->tcm_parent
= TC_H_ROOT
;
1628 tcm
->tcm_handle
= cl
->classid
;
1629 tcm
->tcm_info
= cl
->q
->handle
;
1631 rta
= (struct rtattr
*)b
;
1632 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1633 if (cbq_dump_attr(skb
, cl
) < 0)
1634 goto rtattr_failure
;
1635 rta
->rta_len
= skb
->tail
- b
;
1636 cl
->stats
.qlen
= cl
->q
->q
.qlen
;
1637 if (qdisc_copy_stats(skb
, &cl
->stats
))
1638 goto rtattr_failure
;
1639 spin_lock_bh(&sch
->dev
->queue_lock
);
1640 cl
->xstats
.avgidle
= cl
->avgidle
;
1641 cl
->xstats
.undertime
= 0;
1642 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1643 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1644 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1645 if (cbq_copy_xstats(skb
, &cl
->xstats
)) {
1646 spin_unlock_bh(&sch
->dev
->queue_lock
);
1647 goto rtattr_failure
;
1649 spin_unlock_bh(&sch
->dev
->queue_lock
);
1654 skb_trim(skb
, b
- skb
->data
);
1660 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1663 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1667 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)) == NULL
)
1670 #ifdef CONFIG_NET_CLS_POLICE
1671 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1672 new->reshape_fail
= cbq_reshape_fail
;
1679 sch_tree_unlock(sch
);
1686 static struct Qdisc
*
1687 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1689 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1691 return cl
? cl
->q
: NULL
;
1694 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1696 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1697 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1701 return (unsigned long)cl
;
1706 static void cbq_destroy_filters(struct cbq_class
*cl
)
1708 struct tcf_proto
*tp
;
1710 while ((tp
= cl
->filter_list
) != NULL
) {
1711 cl
->filter_list
= tp
->next
;
1712 tp
->ops
->destroy(tp
);
1716 static void cbq_destroy_class(struct cbq_class
*cl
)
1718 cbq_destroy_filters(cl
);
1719 qdisc_destroy(cl
->q
);
1720 qdisc_put_rtab(cl
->R_tab
);
1721 #ifdef CONFIG_NET_ESTIMATOR
1722 qdisc_kill_estimator(&cl
->stats
);
1728 cbq_destroy(struct Qdisc
* sch
)
1730 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1731 struct cbq_class
*cl
;
1734 #ifdef CONFIG_NET_CLS_POLICE
1737 for (h
= 0; h
< 16; h
++) {
1738 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1739 cbq_destroy_filters(cl
);
1742 for (h
= 0; h
< 16; h
++) {
1743 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1745 cbq_destroy_class(cl
);
1748 qdisc_put_rtab(q
->link
.R_tab
);
1752 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1754 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1756 if (--cl
->refcnt
== 0) {
1757 #ifdef CONFIG_NET_CLS_POLICE
1758 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1760 spin_lock_bh(&sch
->dev
->queue_lock
);
1761 if (q
->rx_class
== cl
)
1763 spin_unlock_bh(&sch
->dev
->queue_lock
);
1766 cbq_destroy_class(cl
);
1771 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1775 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1776 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1777 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1778 struct rtattr
*tb
[TCA_CBQ_MAX
];
1779 struct cbq_class
*parent
;
1780 struct qdisc_rate_table
*rtab
= NULL
;
1783 rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)))
1786 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1787 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1790 if (tb
[TCA_CBQ_FOPT
-1] &&
1791 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1794 if (tb
[TCA_CBQ_RATE
-1] &&
1795 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1798 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1799 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1802 if (tb
[TCA_CBQ_WRROPT
-1] &&
1803 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1806 #ifdef CONFIG_NET_CLS_POLICE
1807 if (tb
[TCA_CBQ_POLICE
-1] &&
1808 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1815 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1817 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1821 if (tb
[TCA_CBQ_RATE
-1]) {
1822 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1827 /* Change class parameters */
1830 if (cl
->next_alive
!= NULL
)
1831 cbq_deactivate_class(cl
);
1834 rtab
= xchg(&cl
->R_tab
, rtab
);
1835 qdisc_put_rtab(rtab
);
1838 if (tb
[TCA_CBQ_LSSOPT
-1])
1839 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1841 if (tb
[TCA_CBQ_WRROPT
-1]) {
1843 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1846 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1847 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1849 #ifdef CONFIG_NET_CLS_POLICE
1850 if (tb
[TCA_CBQ_POLICE
-1])
1851 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1854 if (tb
[TCA_CBQ_FOPT
-1])
1855 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1858 cbq_activate_class(cl
);
1860 sch_tree_unlock(sch
);
1862 #ifdef CONFIG_NET_ESTIMATOR
1863 if (tca
[TCA_RATE
-1]) {
1864 qdisc_kill_estimator(&cl
->stats
);
1865 qdisc_new_estimator(&cl
->stats
, tca
[TCA_RATE
-1]);
1871 if (parentid
== TC_H_ROOT
)
1874 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1875 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1878 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1884 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1888 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1890 for (i
=0; i
<0x8000; i
++) {
1891 if (++q
->hgenerator
>= 0x8000)
1893 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1899 classid
= classid
|q
->hgenerator
;
1904 parent
= cbq_class_lookup(q
, parentid
);
1911 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
1914 memset(cl
, 0, sizeof(*cl
));
1918 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1919 cl
->q
= &noop_qdisc
;
1920 cl
->classid
= classid
;
1921 cl
->tparent
= parent
;
1923 cl
->allot
= parent
->allot
;
1924 cl
->quantum
= cl
->allot
;
1925 cl
->weight
= cl
->R_tab
->rate
.rate
;
1926 cl
->stats
.lock
= &sch
->dev
->queue_lock
;
1930 cl
->borrow
= cl
->tparent
;
1931 if (cl
->tparent
!= &q
->link
)
1932 cl
->share
= cl
->tparent
;
1933 cbq_adjust_levels(parent
);
1934 cl
->minidle
= -0x7FFFFFFF;
1935 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1936 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1937 if (cl
->ewma_log
==0)
1938 cl
->ewma_log
= q
->link
.ewma_log
;
1940 cl
->maxidle
= q
->link
.maxidle
;
1942 cl
->avpkt
= q
->link
.avpkt
;
1943 cl
->overlimit
= cbq_ovl_classic
;
1944 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1945 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1946 #ifdef CONFIG_NET_CLS_POLICE
1947 if (tb
[TCA_CBQ_POLICE
-1])
1948 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1950 if (tb
[TCA_CBQ_FOPT
-1])
1951 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1952 sch_tree_unlock(sch
);
1954 #ifdef CONFIG_NET_ESTIMATOR
1955 if (tca
[TCA_RATE
-1])
1956 qdisc_new_estimator(&cl
->stats
, tca
[TCA_RATE
-1]);
1959 *arg
= (unsigned long)cl
;
1963 qdisc_put_rtab(rtab
);
1967 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1969 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1970 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1972 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1978 cbq_deactivate_class(cl
);
1980 if (q
->tx_borrowed
== cl
)
1981 q
->tx_borrowed
= q
->tx_class
;
1982 if (q
->tx_class
== cl
) {
1984 q
->tx_borrowed
= NULL
;
1986 #ifdef CONFIG_NET_CLS_POLICE
1987 if (q
->rx_class
== cl
)
1991 cbq_unlink_class(cl
);
1992 cbq_adjust_levels(cl
->tparent
);
1994 cbq_sync_defmap(cl
);
1997 sch_tree_unlock(sch
);
1999 if (--cl
->refcnt
== 0)
2000 cbq_destroy_class(cl
);
2005 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2007 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2008 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2013 return &cl
->filter_list
;
2016 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2019 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2020 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2021 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2024 if (p
&& p
->level
<= cl
->level
)
2027 return (unsigned long)cl
;
2032 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2034 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2039 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2041 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2047 for (h
= 0; h
< 16; h
++) {
2048 struct cbq_class
*cl
;
2050 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2051 if (arg
->count
< arg
->skip
) {
2055 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2064 static struct Qdisc_class_ops cbq_class_ops
=
2078 #ifdef CONFIG_RTNETLINK
2083 struct Qdisc_ops cbq_qdisc_ops
=
2088 sizeof(struct cbq_sched_data
),
2098 NULL
/* cbq_change */,
2100 #ifdef CONFIG_RTNETLINK
2106 int init_module(void)
2108 return register_qdisc(&cbq_qdisc_ops
);
2111 void cleanup_module(void)
2113 unregister_qdisc(&cbq_qdisc_ops
);