2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <linux/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data
;
93 struct cbq_class
*next
; /* hash table link */
94 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
98 unsigned char priority
; /* class priority */
99 unsigned char priority2
; /* priority to be used after overlimit */
100 unsigned char ewma_log
; /* time constant for idle time calculation */
101 unsigned char ovl_strategy
;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police
;
108 /* Link-sharing scheduler parameters */
109 long maxidle
; /* Class parameters: see below. */
113 struct qdisc_rate_table
*R_tab
;
115 /* Overlimit strategy parameters */
116 void (*overlimit
)(struct cbq_class
*cl
);
119 /* General scheduler (WRR) parameters */
121 long quantum
; /* Allotment per WRR round */
122 long weight
; /* Relative allotment: see below */
124 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
125 struct cbq_class
*split
; /* Ptr to split node */
126 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
127 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
128 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
130 struct cbq_class
*sibling
; /* Sibling chain */
131 struct cbq_class
*children
; /* Pointer to children chain */
133 struct Qdisc
*q
; /* Elementary queueing discipline */
137 unsigned char cpriority
; /* Effective priority */
138 unsigned char delayed
;
139 unsigned char level
; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last
; /* Last end of service */
145 psched_time_t undertime
;
147 long deficit
; /* Saved deficit for WRR */
148 unsigned long penalized
;
149 struct gnet_stats_basic bstats
;
150 struct gnet_stats_queue qstats
;
151 struct gnet_stats_rate_est rate_est
;
152 spinlock_t
*stats_lock
;
153 struct tc_cbq_xstats xstats
;
155 struct tcf_proto
*filter_list
;
160 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
163 struct cbq_sched_data
165 struct cbq_class
*classes
[16]; /* Hash table of all classes */
166 int nclasses
[TC_CBQ_MAXPRIO
+1];
167 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
169 struct cbq_class link
;
172 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
175 #ifdef CONFIG_NET_CLS_POLICE
176 struct cbq_class
*rx_class
;
178 struct cbq_class
*tx_class
;
179 struct cbq_class
*tx_borrowed
;
181 psched_time_t now
; /* Cached timestamp */
182 psched_time_t now_rt
; /* Cached real time */
185 struct timer_list delay_timer
;
186 struct timer_list wd_timer
; /* Watchdog timer,
196 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
199 static __inline__
unsigned cbq_hash(u32 h
)
206 static __inline__
struct cbq_class
*
207 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
209 struct cbq_class
*cl
;
211 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
212 if (cl
->classid
== classid
)
217 #ifdef CONFIG_NET_CLS_POLICE
219 static struct cbq_class
*
220 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
222 struct cbq_class
*cl
, *new;
224 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
225 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
233 /* Classify packet. The procedure is pretty complicated, but
234 it allows us to combine link sharing and priority scheduling
237 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
238 so that it resolves to split nodes. Then packets are classified
239 by logical priority, or a more specific classifier may be attached
243 static struct cbq_class
*
244 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
246 struct cbq_sched_data
*q
= qdisc_priv(sch
);
247 struct cbq_class
*head
= &q
->link
;
248 struct cbq_class
**defmap
;
249 struct cbq_class
*cl
= NULL
;
250 u32 prio
= skb
->priority
;
251 struct tcf_result res
;
254 * Step 1. If skb->priority points to one of our classes, use it.
256 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
257 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
260 *qerr
= NET_XMIT_BYPASS
;
263 defmap
= head
->defaults
;
266 * Step 2+n. Apply classifier.
268 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
271 if ((cl
= (void*)res
.class) == NULL
) {
272 if (TC_H_MAJ(res
.classid
))
273 cl
= cbq_class_lookup(q
, res
.classid
);
274 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
275 cl
= defmap
[TC_PRIO_BESTEFFORT
];
277 if (cl
== NULL
|| cl
->level
>= head
->level
)
281 #ifdef CONFIG_NET_CLS_ACT
285 *qerr
= NET_XMIT_SUCCESS
;
289 #elif defined(CONFIG_NET_CLS_POLICE)
291 case TC_POLICE_RECLASSIFY
:
292 return cbq_reclassify(skb
, cl
);
303 * Step 3+n. If classifier selected a link sharing class,
304 * apply agency specific classifier.
305 * Repeat this procdure until we hit a leaf node.
314 * Step 4. No success...
316 if (TC_H_MAJ(prio
) == 0 &&
317 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
318 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
325 A packet has just been enqueued on the empty class.
326 cbq_activate_class adds it to the tail of active class list
327 of its priority band.
330 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
332 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
333 int prio
= cl
->cpriority
;
334 struct cbq_class
*cl_tail
;
336 cl_tail
= q
->active
[prio
];
337 q
->active
[prio
] = cl
;
339 if (cl_tail
!= NULL
) {
340 cl
->next_alive
= cl_tail
->next_alive
;
341 cl_tail
->next_alive
= cl
;
344 q
->activemask
|= (1<<prio
);
349 Unlink class from active chain.
350 Note that this same procedure is done directly in cbq_dequeue*
351 during round-robin procedure.
354 static void cbq_deactivate_class(struct cbq_class
*this)
356 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
357 int prio
= this->cpriority
;
358 struct cbq_class
*cl
;
359 struct cbq_class
*cl_prev
= q
->active
[prio
];
362 cl
= cl_prev
->next_alive
;
364 cl_prev
->next_alive
= cl
->next_alive
;
365 cl
->next_alive
= NULL
;
367 if (cl
== q
->active
[prio
]) {
368 q
->active
[prio
] = cl_prev
;
369 if (cl
== q
->active
[prio
]) {
370 q
->active
[prio
] = NULL
;
371 q
->activemask
&= ~(1<<prio
);
376 cl
= cl_prev
->next_alive
;
379 } while ((cl_prev
= cl
) != q
->active
[prio
]);
383 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
385 int toplevel
= q
->toplevel
;
387 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
391 PSCHED_GET_TIME(now
);
392 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
393 PSCHED_TADD2(q
->now
, incr
, now
);
396 if (PSCHED_TLESS(cl
->undertime
, now
)) {
397 q
->toplevel
= cl
->level
;
400 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
405 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
407 struct cbq_sched_data
*q
= qdisc_priv(sch
);
410 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
412 #ifdef CONFIG_NET_CLS_POLICE
416 if (ret
== NET_XMIT_BYPASS
)
422 #ifdef CONFIG_NET_CLS_POLICE
423 cl
->q
->__parent
= sch
;
425 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == NET_XMIT_SUCCESS
) {
427 sch
->bstats
.packets
++;
428 sch
->bstats
.bytes
+=len
;
429 cbq_mark_toplevel(q
, cl
);
431 cbq_activate_class(cl
);
436 cbq_mark_toplevel(q
, cl
);
442 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
444 struct cbq_sched_data
*q
= qdisc_priv(sch
);
445 struct cbq_class
*cl
;
448 if ((cl
= q
->tx_class
) == NULL
) {
455 cbq_mark_toplevel(q
, cl
);
457 #ifdef CONFIG_NET_CLS_POLICE
459 cl
->q
->__parent
= sch
;
461 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
463 sch
->qstats
.requeues
++;
465 cbq_activate_class(cl
);
473 /* Overlimit actions */
475 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
477 static void cbq_ovl_classic(struct cbq_class
*cl
)
479 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
480 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
483 delay
+= cl
->offtime
;
486 Class goes to sleep, so that it will have no
487 chance to work avgidle. Let's forgive it 8)
489 BTW cbq-2.0 has a crap in this
490 place, apparently they forgot to shift it by cl->ewma_log.
493 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
494 if (cl
->avgidle
< cl
->minidle
)
495 cl
->avgidle
= cl
->minidle
;
498 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
500 cl
->xstats
.overactions
++;
503 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
504 q
->wd_expires
= delay
;
506 /* Dirty work! We must schedule wakeups based on
507 real available rate, rather than leaf rate,
508 which may be tiny (even zero).
510 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
512 psched_tdiff_t base_delay
= q
->wd_expires
;
514 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
515 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
516 if (delay
< base_delay
) {
523 q
->wd_expires
= base_delay
;
527 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
531 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
533 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
534 struct cbq_class
*this = cl
;
537 if (cl
->level
> q
->toplevel
) {
541 } while ((cl
= cl
->borrow
) != NULL
);
548 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
550 static void cbq_ovl_delay(struct cbq_class
*cl
)
552 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
553 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
556 unsigned long sched
= jiffies
;
558 delay
+= cl
->offtime
;
560 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
561 if (cl
->avgidle
< cl
->minidle
)
562 cl
->avgidle
= cl
->minidle
;
563 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
566 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
567 cl
->penalized
= sched
;
568 cl
->cpriority
= TC_CBQ_MAXPRIO
;
569 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
570 if (del_timer(&q
->delay_timer
) &&
571 (long)(q
->delay_timer
.expires
- sched
) > 0)
572 q
->delay_timer
.expires
= sched
;
573 add_timer(&q
->delay_timer
);
575 cl
->xstats
.overactions
++;
580 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
581 q
->wd_expires
= delay
;
584 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
586 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
588 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
590 cl
->penalized
= jiffies
+ cl
->penalty
;
592 if (cl
->cpriority
!= cl
->priority2
) {
593 cl
->cpriority
= cl
->priority2
;
594 q
->pmask
|= (1<<cl
->cpriority
);
595 cl
->xstats
.overactions
++;
600 /* TC_CBQ_OVL_DROP: penalize class by dropping */
602 static void cbq_ovl_drop(struct cbq_class
*cl
)
604 if (cl
->q
->ops
->drop
)
605 if (cl
->q
->ops
->drop(cl
->q
))
607 cl
->xstats
.overactions
++;
611 static void cbq_watchdog(unsigned long arg
)
613 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
615 sch
->flags
&= ~TCQ_F_THROTTLED
;
616 netif_schedule(sch
->dev
);
619 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
621 struct cbq_class
*cl
;
622 struct cbq_class
*cl_prev
= q
->active
[prio
];
623 unsigned long now
= jiffies
;
624 unsigned long sched
= now
;
630 cl
= cl_prev
->next_alive
;
631 if ((long)(now
- cl
->penalized
) > 0) {
632 cl_prev
->next_alive
= cl
->next_alive
;
633 cl
->next_alive
= NULL
;
634 cl
->cpriority
= cl
->priority
;
636 cbq_activate_class(cl
);
638 if (cl
== q
->active
[prio
]) {
639 q
->active
[prio
] = cl_prev
;
640 if (cl
== q
->active
[prio
]) {
641 q
->active
[prio
] = NULL
;
646 cl
= cl_prev
->next_alive
;
647 } else if ((long)(sched
- cl
->penalized
) > 0)
648 sched
= cl
->penalized
;
649 } while ((cl_prev
= cl
) != q
->active
[prio
]);
651 return (long)(sched
- now
);
654 static void cbq_undelay(unsigned long arg
)
656 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
657 struct cbq_sched_data
*q
= qdisc_priv(sch
);
665 int prio
= ffz(~pmask
);
670 tmp
= cbq_undelay_prio(q
, prio
);
673 if (tmp
< delay
|| delay
== 0)
679 q
->delay_timer
.expires
= jiffies
+ delay
;
680 add_timer(&q
->delay_timer
);
683 sch
->flags
&= ~TCQ_F_THROTTLED
;
684 netif_schedule(sch
->dev
);
688 #ifdef CONFIG_NET_CLS_POLICE
690 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
693 struct Qdisc
*sch
= child
->__parent
;
694 struct cbq_sched_data
*q
= qdisc_priv(sch
);
695 struct cbq_class
*cl
= q
->rx_class
;
699 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
701 cbq_mark_toplevel(q
, cl
);
704 cl
->q
->__parent
= sch
;
706 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
708 sch
->bstats
.packets
++;
709 sch
->bstats
.bytes
+=len
;
711 cbq_activate_class(cl
);
724 It is mission critical procedure.
726 We "regenerate" toplevel cutoff, if transmitting class
727 has backlog and it is not regulated. It is not part of
728 original CBQ description, but looks more reasonable.
729 Probably, it is wrong. This question needs further investigation.
732 static __inline__
void
733 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
734 struct cbq_class
*borrowed
)
736 if (cl
&& q
->toplevel
>= borrowed
->level
) {
737 if (cl
->q
->q
.qlen
> 1) {
739 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
740 q
->toplevel
= borrowed
->level
;
743 } while ((borrowed
=borrowed
->borrow
) != NULL
);
746 /* It is not necessary now. Uncommenting it
747 will save CPU cycles, but decrease fairness.
749 q
->toplevel
= TC_CBQ_MAXLEVEL
;
755 cbq_update(struct cbq_sched_data
*q
)
757 struct cbq_class
*this = q
->tx_class
;
758 struct cbq_class
*cl
= this;
763 for ( ; cl
; cl
= cl
->share
) {
764 long avgidle
= cl
->avgidle
;
767 cl
->bstats
.packets
++;
768 cl
->bstats
.bytes
+= len
;
771 (now - last) is total time between packet right edges.
772 (last_pktlen/rate) is "virtual" busy time, so that
774 idle = (now - last) - last_pktlen/rate
777 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
778 if ((unsigned long)idle
> 128*1024*1024) {
779 avgidle
= cl
->maxidle
;
781 idle
-= L2T(cl
, len
);
783 /* true_avgidle := (1-W)*true_avgidle + W*idle,
784 where W=2^{-ewma_log}. But cl->avgidle is scaled:
785 cl->avgidle == true_avgidle/W,
788 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
792 /* Overlimit or at-limit */
794 if (avgidle
< cl
->minidle
)
795 avgidle
= cl
->minidle
;
797 cl
->avgidle
= avgidle
;
799 /* Calculate expected time, when this class
800 will be allowed to send.
802 (1-W)*true_avgidle + W*delay = 0, i.e.
803 idle = (1/W - 1)*(-true_avgidle)
805 idle = (1 - W)*(-cl->avgidle);
807 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
811 To maintain the rate allocated to the class,
812 we add to undertime virtual clock,
813 necessary to complete transmitted packet.
814 (len/phys_bandwidth has been already passed
815 to the moment of cbq_update)
818 idle
-= L2T(&q
->link
, len
);
819 idle
+= L2T(cl
, len
);
821 PSCHED_AUDIT_TDIFF(idle
);
823 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
827 PSCHED_SET_PASTPERFECT(cl
->undertime
);
828 if (avgidle
> cl
->maxidle
)
829 cl
->avgidle
= cl
->maxidle
;
831 cl
->avgidle
= avgidle
;
836 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
839 static __inline__
struct cbq_class
*
840 cbq_under_limit(struct cbq_class
*cl
)
842 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
843 struct cbq_class
*this_cl
= cl
;
845 if (cl
->tparent
== NULL
)
848 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
849 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
855 /* It is very suspicious place. Now overlimit
856 action is generated for not bounded classes
857 only if link is completely congested.
858 Though it is in agree with ancestor-only paradigm,
859 it looks very stupid. Particularly,
860 it means that this chunk of code will either
861 never be called or result in strong amplification
862 of burstiness. Dangerous, silly, and, however,
863 no another solution exists.
865 if ((cl
= cl
->borrow
) == NULL
) {
866 this_cl
->qstats
.overlimits
++;
867 this_cl
->overlimit(this_cl
);
870 if (cl
->level
> q
->toplevel
)
872 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
873 PSCHED_TLESS(q
->now
, cl
->undertime
));
879 static __inline__
struct sk_buff
*
880 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
882 struct cbq_sched_data
*q
= qdisc_priv(sch
);
883 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
887 cl_tail
= cl_prev
= q
->active
[prio
];
888 cl
= cl_prev
->next_alive
;
895 struct cbq_class
*borrow
= cl
;
898 (borrow
= cbq_under_limit(cl
)) == NULL
)
901 if (cl
->deficit
<= 0) {
902 /* Class exhausted its allotment per
903 this round. Switch to the next one.
906 cl
->deficit
+= cl
->quantum
;
910 skb
= cl
->q
->dequeue(cl
->q
);
912 /* Class did not give us any skb :-(
913 It could occur even if cl->q->q.qlen != 0
914 f.e. if cl->q == "tbf"
919 cl
->deficit
-= skb
->len
;
921 q
->tx_borrowed
= borrow
;
923 #ifndef CBQ_XSTATS_BORROWS_BYTES
924 borrow
->xstats
.borrows
++;
925 cl
->xstats
.borrows
++;
927 borrow
->xstats
.borrows
+= skb
->len
;
928 cl
->xstats
.borrows
+= skb
->len
;
931 q
->tx_len
= skb
->len
;
933 if (cl
->deficit
<= 0) {
934 q
->active
[prio
] = cl
;
936 cl
->deficit
+= cl
->quantum
;
941 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
942 /* Class is empty or penalized.
943 Unlink it from active chain.
945 cl_prev
->next_alive
= cl
->next_alive
;
946 cl
->next_alive
= NULL
;
948 /* Did cl_tail point to it? */
953 /* Was it the last class in this band? */
956 q
->active
[prio
] = NULL
;
957 q
->activemask
&= ~(1<<prio
);
959 cbq_activate_class(cl
);
963 q
->active
[prio
] = cl_tail
;
966 cbq_activate_class(cl
);
974 } while (cl_prev
!= cl_tail
);
977 q
->active
[prio
] = cl_prev
;
982 static __inline__
struct sk_buff
*
983 cbq_dequeue_1(struct Qdisc
*sch
)
985 struct cbq_sched_data
*q
= qdisc_priv(sch
);
989 activemask
= q
->activemask
&0xFF;
991 int prio
= ffz(~activemask
);
992 activemask
&= ~(1<<prio
);
993 skb
= cbq_dequeue_prio(sch
, prio
);
1000 static struct sk_buff
*
1001 cbq_dequeue(struct Qdisc
*sch
)
1003 struct sk_buff
*skb
;
1004 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1006 psched_tdiff_t incr
;
1008 PSCHED_GET_TIME(now
);
1009 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
1012 psched_tdiff_t incr2
;
1013 /* Time integrator. We calculate EOS time
1014 by adding expected packet transmission time.
1015 If real time is greater, we warp artificial clock,
1018 cbq_time = max(real_time, work);
1020 incr2
= L2T(&q
->link
, q
->tx_len
);
1021 PSCHED_TADD(q
->now
, incr2
);
1023 if ((incr
-= incr2
) < 0)
1026 PSCHED_TADD(q
->now
, incr
);
1032 skb
= cbq_dequeue_1(sch
);
1035 sch
->flags
&= ~TCQ_F_THROTTLED
;
1039 /* All the classes are overlimit.
1043 1. Scheduler is empty.
1044 2. Toplevel cutoff inhibited borrowing.
1045 3. Root class is overlimit.
1047 Reset 2d and 3d conditions and retry.
1049 Note, that NS and cbq-2.0 are buggy, peeking
1050 an arbitrary class is appropriate for ancestor-only
1051 sharing, but not for toplevel algorithm.
1053 Our version is better, but slower, because it requires
1054 two passes, but it is unavoidable with top-level sharing.
1057 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1058 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1061 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1062 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1065 /* No packets in scheduler or nobody wants to give them to us :-(
1066 Sigh... start watchdog timer in the last case. */
1069 sch
->qstats
.overlimits
++;
1070 if (q
->wd_expires
) {
1071 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1074 mod_timer(&q
->wd_timer
, jiffies
+ delay
);
1075 sch
->flags
|= TCQ_F_THROTTLED
;
1081 /* CBQ class maintanance routines */
1083 static void cbq_adjust_levels(struct cbq_class
*this)
1090 struct cbq_class
*cl
;
1092 if ((cl
= this->children
) != NULL
) {
1094 if (cl
->level
> level
)
1096 } while ((cl
= cl
->sibling
) != this->children
);
1098 this->level
= level
+1;
1099 } while ((this = this->tparent
) != NULL
);
1102 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1104 struct cbq_class
*cl
;
1107 if (q
->quanta
[prio
] == 0)
1110 for (h
=0; h
<16; h
++) {
1111 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1112 /* BUGGGG... Beware! This expression suffer of
1113 arithmetic overflows!
1115 if (cl
->priority
== prio
) {
1116 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1119 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1120 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1121 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1127 static void cbq_sync_defmap(struct cbq_class
*cl
)
1129 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1130 struct cbq_class
*split
= cl
->split
;
1137 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1138 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1139 split
->defaults
[i
] = NULL
;
1142 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1143 int level
= split
->level
;
1145 if (split
->defaults
[i
])
1148 for (h
=0; h
<16; h
++) {
1149 struct cbq_class
*c
;
1151 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1152 if (c
->split
== split
&& c
->level
< level
&&
1154 split
->defaults
[i
] = c
;
1162 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1164 struct cbq_class
*split
= NULL
;
1167 if ((split
= cl
->split
) == NULL
)
1169 splitid
= split
->classid
;
1172 if (split
== NULL
|| split
->classid
!= splitid
) {
1173 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1174 if (split
->classid
== splitid
)
1181 if (cl
->split
!= split
) {
1183 cbq_sync_defmap(cl
);
1185 cl
->defmap
= def
&mask
;
1187 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1189 cbq_sync_defmap(cl
);
1192 static void cbq_unlink_class(struct cbq_class
*this)
1194 struct cbq_class
*cl
, **clp
;
1195 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1197 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1205 if (this->tparent
) {
1214 } while ((cl
= *clp
) != this->sibling
);
1216 if (this->tparent
->children
== this) {
1217 this->tparent
->children
= this->sibling
;
1218 if (this->sibling
== this)
1219 this->tparent
->children
= NULL
;
1222 BUG_TRAP(this->sibling
== this);
1226 static void cbq_link_class(struct cbq_class
*this)
1228 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1229 unsigned h
= cbq_hash(this->classid
);
1230 struct cbq_class
*parent
= this->tparent
;
1232 this->sibling
= this;
1233 this->next
= q
->classes
[h
];
1234 q
->classes
[h
] = this;
1239 if (parent
->children
== NULL
) {
1240 parent
->children
= this;
1242 this->sibling
= parent
->children
->sibling
;
1243 parent
->children
->sibling
= this;
1247 static unsigned int cbq_drop(struct Qdisc
* sch
)
1249 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1250 struct cbq_class
*cl
, *cl_head
;
1254 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1255 if ((cl_head
= q
->active
[prio
]) == NULL
)
1260 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1264 } while ((cl
= cl
->next_alive
) != cl_head
);
1270 cbq_reset(struct Qdisc
* sch
)
1272 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1273 struct cbq_class
*cl
;
1280 q
->tx_borrowed
= NULL
;
1281 del_timer(&q
->wd_timer
);
1282 del_timer(&q
->delay_timer
);
1283 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1284 PSCHED_GET_TIME(q
->now
);
1287 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1288 q
->active
[prio
] = NULL
;
1290 for (h
= 0; h
< 16; h
++) {
1291 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1294 cl
->next_alive
= NULL
;
1295 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1296 cl
->avgidle
= cl
->maxidle
;
1297 cl
->deficit
= cl
->quantum
;
1298 cl
->cpriority
= cl
->priority
;
1305 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1307 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1308 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1309 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1311 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1312 cl
->ewma_log
= lss
->ewma_log
;
1313 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1314 cl
->avpkt
= lss
->avpkt
;
1315 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1316 cl
->minidle
= -(long)lss
->minidle
;
1317 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1318 cl
->maxidle
= lss
->maxidle
;
1319 cl
->avgidle
= lss
->maxidle
;
1321 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1322 cl
->offtime
= lss
->offtime
;
1326 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1328 q
->nclasses
[cl
->priority
]--;
1329 q
->quanta
[cl
->priority
] -= cl
->weight
;
1330 cbq_normalize_quanta(q
, cl
->priority
);
1333 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1335 q
->nclasses
[cl
->priority
]++;
1336 q
->quanta
[cl
->priority
] += cl
->weight
;
1337 cbq_normalize_quanta(q
, cl
->priority
);
1340 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1342 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1345 cl
->allot
= wrr
->allot
;
1347 cl
->weight
= wrr
->weight
;
1348 if (wrr
->priority
) {
1349 cl
->priority
= wrr
->priority
-1;
1350 cl
->cpriority
= cl
->priority
;
1351 if (cl
->priority
>= cl
->priority2
)
1352 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1359 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1361 switch (ovl
->strategy
) {
1362 case TC_CBQ_OVL_CLASSIC
:
1363 cl
->overlimit
= cbq_ovl_classic
;
1365 case TC_CBQ_OVL_DELAY
:
1366 cl
->overlimit
= cbq_ovl_delay
;
1368 case TC_CBQ_OVL_LOWPRIO
:
1369 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1370 ovl
->priority2
-1 <= cl
->priority
)
1372 cl
->priority2
= ovl
->priority2
-1;
1373 cl
->overlimit
= cbq_ovl_lowprio
;
1375 case TC_CBQ_OVL_DROP
:
1376 cl
->overlimit
= cbq_ovl_drop
;
1378 case TC_CBQ_OVL_RCLASSIC
:
1379 cl
->overlimit
= cbq_ovl_rclassic
;
1384 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1388 #ifdef CONFIG_NET_CLS_POLICE
1389 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1391 cl
->police
= p
->police
;
1393 if (cl
->q
->handle
) {
1394 if (p
->police
== TC_POLICE_RECLASSIFY
)
1395 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1397 cl
->q
->reshape_fail
= NULL
;
1403 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1405 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1409 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1411 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1412 struct rtattr
*tb
[TCA_CBQ_MAX
];
1413 struct tc_ratespec
*r
;
1415 if (rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
) < 0 ||
1416 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1417 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1420 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1421 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1424 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1426 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
)
1430 q
->link
.sibling
= &q
->link
;
1431 q
->link
.classid
= sch
->handle
;
1432 q
->link
.qdisc
= sch
;
1433 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1434 q
->link
.q
= &noop_qdisc
;
1436 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1437 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1438 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1439 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1440 q
->link
.overlimit
= cbq_ovl_classic
;
1441 q
->link
.allot
= psched_mtu(sch
->dev
);
1442 q
->link
.quantum
= q
->link
.allot
;
1443 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1445 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1446 q
->link
.avpkt
= q
->link
.allot
/2;
1447 q
->link
.minidle
= -0x7FFFFFFF;
1448 q
->link
.stats_lock
= &sch
->dev
->queue_lock
;
1450 init_timer(&q
->wd_timer
);
1451 q
->wd_timer
.data
= (unsigned long)sch
;
1452 q
->wd_timer
.function
= cbq_watchdog
;
1453 init_timer(&q
->delay_timer
);
1454 q
->delay_timer
.data
= (unsigned long)sch
;
1455 q
->delay_timer
.function
= cbq_undelay
;
1456 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1457 PSCHED_GET_TIME(q
->now
);
1460 cbq_link_class(&q
->link
);
1462 if (tb
[TCA_CBQ_LSSOPT
-1])
1463 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1465 cbq_addprio(q
, &q
->link
);
1469 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1471 unsigned char *b
= skb
->tail
;
1473 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1477 skb_trim(skb
, b
- skb
->data
);
1481 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1483 unsigned char *b
= skb
->tail
;
1484 struct tc_cbq_lssopt opt
;
1487 if (cl
->borrow
== NULL
)
1488 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1489 if (cl
->share
== NULL
)
1490 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1491 opt
.ewma_log
= cl
->ewma_log
;
1492 opt
.level
= cl
->level
;
1493 opt
.avpkt
= cl
->avpkt
;
1494 opt
.maxidle
= cl
->maxidle
;
1495 opt
.minidle
= (u32
)(-cl
->minidle
);
1496 opt
.offtime
= cl
->offtime
;
1498 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1502 skb_trim(skb
, b
- skb
->data
);
1506 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1508 unsigned char *b
= skb
->tail
;
1509 struct tc_cbq_wrropt opt
;
1512 opt
.allot
= cl
->allot
;
1513 opt
.priority
= cl
->priority
+1;
1514 opt
.cpriority
= cl
->cpriority
+1;
1515 opt
.weight
= cl
->weight
;
1516 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1520 skb_trim(skb
, b
- skb
->data
);
1524 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1526 unsigned char *b
= skb
->tail
;
1527 struct tc_cbq_ovl opt
;
1529 opt
.strategy
= cl
->ovl_strategy
;
1530 opt
.priority2
= cl
->priority2
+1;
1532 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1533 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1537 skb_trim(skb
, b
- skb
->data
);
1541 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1543 unsigned char *b
= skb
->tail
;
1544 struct tc_cbq_fopt opt
;
1546 if (cl
->split
|| cl
->defmap
) {
1547 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1548 opt
.defmap
= cl
->defmap
;
1550 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1555 skb_trim(skb
, b
- skb
->data
);
1559 #ifdef CONFIG_NET_CLS_POLICE
1560 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1562 unsigned char *b
= skb
->tail
;
1563 struct tc_cbq_police opt
;
1566 opt
.police
= cl
->police
;
1569 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1574 skb_trim(skb
, b
- skb
->data
);
1579 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1581 if (cbq_dump_lss(skb
, cl
) < 0 ||
1582 cbq_dump_rate(skb
, cl
) < 0 ||
1583 cbq_dump_wrr(skb
, cl
) < 0 ||
1584 cbq_dump_ovl(skb
, cl
) < 0 ||
1585 #ifdef CONFIG_NET_CLS_POLICE
1586 cbq_dump_police(skb
, cl
) < 0 ||
1588 cbq_dump_fopt(skb
, cl
) < 0)
1593 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1595 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1596 unsigned char *b
= skb
->tail
;
1599 rta
= (struct rtattr
*)b
;
1600 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1601 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1602 goto rtattr_failure
;
1603 rta
->rta_len
= skb
->tail
- b
;
1607 skb_trim(skb
, b
- skb
->data
);
1612 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1614 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1616 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1617 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1621 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1622 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1624 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1625 unsigned char *b
= skb
->tail
;
1629 tcm
->tcm_parent
= cl
->tparent
->classid
;
1631 tcm
->tcm_parent
= TC_H_ROOT
;
1632 tcm
->tcm_handle
= cl
->classid
;
1633 tcm
->tcm_info
= cl
->q
->handle
;
1635 rta
= (struct rtattr
*)b
;
1636 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1637 if (cbq_dump_attr(skb
, cl
) < 0)
1638 goto rtattr_failure
;
1639 rta
->rta_len
= skb
->tail
- b
;
1643 skb_trim(skb
, b
- skb
->data
);
1648 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1649 struct gnet_dump
*d
)
1651 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1652 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1654 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1655 cl
->xstats
.avgidle
= cl
->avgidle
;
1656 cl
->xstats
.undertime
= 0;
1658 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1659 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1661 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1662 #ifdef CONFIG_NET_ESTIMATOR
1663 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1665 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1668 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1671 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1674 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1678 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)) == NULL
)
1681 #ifdef CONFIG_NET_CLS_POLICE
1682 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1683 new->reshape_fail
= cbq_reshape_fail
;
1689 sch
->q
.qlen
-= (*old
)->q
.qlen
;
1691 sch_tree_unlock(sch
);
1698 static struct Qdisc
*
1699 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1701 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1703 return cl
? cl
->q
: NULL
;
1706 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1708 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1709 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1713 return (unsigned long)cl
;
1718 static void cbq_destroy_filters(struct cbq_class
*cl
)
1720 struct tcf_proto
*tp
;
1722 while ((tp
= cl
->filter_list
) != NULL
) {
1723 cl
->filter_list
= tp
->next
;
1728 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1730 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1732 BUG_TRAP(!cl
->filters
);
1734 cbq_destroy_filters(cl
);
1735 qdisc_destroy(cl
->q
);
1736 qdisc_put_rtab(cl
->R_tab
);
1737 #ifdef CONFIG_NET_ESTIMATOR
1738 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1745 cbq_destroy(struct Qdisc
* sch
)
1747 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1748 struct cbq_class
*cl
;
1751 #ifdef CONFIG_NET_CLS_POLICE
1755 * Filters must be destroyed first because we don't destroy the
1756 * classes from root to leafs which means that filters can still
1757 * be bound to classes which have been destroyed already. --TGR '04
1759 for (h
= 0; h
< 16; h
++)
1760 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1761 cbq_destroy_filters(cl
);
1763 for (h
= 0; h
< 16; h
++) {
1764 struct cbq_class
*next
;
1766 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1768 cbq_destroy_class(sch
, cl
);
1773 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1775 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1777 if (--cl
->refcnt
== 0) {
1778 #ifdef CONFIG_NET_CLS_POLICE
1779 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1781 spin_lock_bh(&sch
->dev
->queue_lock
);
1782 if (q
->rx_class
== cl
)
1784 spin_unlock_bh(&sch
->dev
->queue_lock
);
1787 cbq_destroy_class(sch
, cl
);
1792 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1796 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1797 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1798 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1799 struct rtattr
*tb
[TCA_CBQ_MAX
];
1800 struct cbq_class
*parent
;
1801 struct qdisc_rate_table
*rtab
= NULL
;
1803 if (opt
==NULL
|| rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
))
1806 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1807 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1810 if (tb
[TCA_CBQ_FOPT
-1] &&
1811 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1814 if (tb
[TCA_CBQ_RATE
-1] &&
1815 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1818 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1819 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1822 if (tb
[TCA_CBQ_WRROPT
-1] &&
1823 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1826 #ifdef CONFIG_NET_CLS_POLICE
1827 if (tb
[TCA_CBQ_POLICE
-1] &&
1828 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1835 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1837 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1841 if (tb
[TCA_CBQ_RATE
-1]) {
1842 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1847 /* Change class parameters */
1850 if (cl
->next_alive
!= NULL
)
1851 cbq_deactivate_class(cl
);
1854 rtab
= xchg(&cl
->R_tab
, rtab
);
1855 qdisc_put_rtab(rtab
);
1858 if (tb
[TCA_CBQ_LSSOPT
-1])
1859 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1861 if (tb
[TCA_CBQ_WRROPT
-1]) {
1863 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1866 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1867 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1869 #ifdef CONFIG_NET_CLS_POLICE
1870 if (tb
[TCA_CBQ_POLICE
-1])
1871 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1874 if (tb
[TCA_CBQ_FOPT
-1])
1875 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1878 cbq_activate_class(cl
);
1880 sch_tree_unlock(sch
);
1882 #ifdef CONFIG_NET_ESTIMATOR
1883 if (tca
[TCA_RATE
-1])
1884 gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1885 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1890 if (parentid
== TC_H_ROOT
)
1893 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1894 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1897 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1903 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1907 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1909 for (i
=0; i
<0x8000; i
++) {
1910 if (++q
->hgenerator
>= 0x8000)
1912 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1918 classid
= classid
|q
->hgenerator
;
1923 parent
= cbq_class_lookup(q
, parentid
);
1930 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
1933 memset(cl
, 0, sizeof(*cl
));
1937 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1938 cl
->q
= &noop_qdisc
;
1939 cl
->classid
= classid
;
1940 cl
->tparent
= parent
;
1942 cl
->allot
= parent
->allot
;
1943 cl
->quantum
= cl
->allot
;
1944 cl
->weight
= cl
->R_tab
->rate
.rate
;
1945 cl
->stats_lock
= &sch
->dev
->queue_lock
;
1949 cl
->borrow
= cl
->tparent
;
1950 if (cl
->tparent
!= &q
->link
)
1951 cl
->share
= cl
->tparent
;
1952 cbq_adjust_levels(parent
);
1953 cl
->minidle
= -0x7FFFFFFF;
1954 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1955 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1956 if (cl
->ewma_log
==0)
1957 cl
->ewma_log
= q
->link
.ewma_log
;
1959 cl
->maxidle
= q
->link
.maxidle
;
1961 cl
->avpkt
= q
->link
.avpkt
;
1962 cl
->overlimit
= cbq_ovl_classic
;
1963 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1964 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1965 #ifdef CONFIG_NET_CLS_POLICE
1966 if (tb
[TCA_CBQ_POLICE
-1])
1967 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1969 if (tb
[TCA_CBQ_FOPT
-1])
1970 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1971 sch_tree_unlock(sch
);
1973 #ifdef CONFIG_NET_ESTIMATOR
1974 if (tca
[TCA_RATE
-1])
1975 gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1976 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1979 *arg
= (unsigned long)cl
;
1983 qdisc_put_rtab(rtab
);
1987 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1989 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1990 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1992 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1998 cbq_deactivate_class(cl
);
2000 if (q
->tx_borrowed
== cl
)
2001 q
->tx_borrowed
= q
->tx_class
;
2002 if (q
->tx_class
== cl
) {
2004 q
->tx_borrowed
= NULL
;
2006 #ifdef CONFIG_NET_CLS_POLICE
2007 if (q
->rx_class
== cl
)
2011 cbq_unlink_class(cl
);
2012 cbq_adjust_levels(cl
->tparent
);
2014 cbq_sync_defmap(cl
);
2017 sch_tree_unlock(sch
);
2019 if (--cl
->refcnt
== 0)
2020 cbq_destroy_class(sch
, cl
);
2025 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2027 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2028 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2033 return &cl
->filter_list
;
2036 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2039 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2040 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2041 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2044 if (p
&& p
->level
<= cl
->level
)
2047 return (unsigned long)cl
;
2052 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2054 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2059 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2061 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2067 for (h
= 0; h
< 16; h
++) {
2068 struct cbq_class
*cl
;
2070 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2071 if (arg
->count
< arg
->skip
) {
2075 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2084 static struct Qdisc_class_ops cbq_class_ops
= {
2089 .change
= cbq_change_class
,
2090 .delete = cbq_delete
,
2092 .tcf_chain
= cbq_find_tcf
,
2093 .bind_tcf
= cbq_bind_filter
,
2094 .unbind_tcf
= cbq_unbind_filter
,
2095 .dump
= cbq_dump_class
,
2096 .dump_stats
= cbq_dump_class_stats
,
2099 static struct Qdisc_ops cbq_qdisc_ops
= {
2101 .cl_ops
= &cbq_class_ops
,
2103 .priv_size
= sizeof(struct cbq_sched_data
),
2104 .enqueue
= cbq_enqueue
,
2105 .dequeue
= cbq_dequeue
,
2106 .requeue
= cbq_requeue
,
2110 .destroy
= cbq_destroy
,
2113 .dump_stats
= cbq_dump_stats
,
2114 .owner
= THIS_MODULE
,
2117 static int __init
cbq_module_init(void)
2119 return register_qdisc(&cbq_qdisc_ops
);
2121 static void __exit
cbq_module_exit(void)
2123 unregister_qdisc(&cbq_qdisc_ops
);
2125 module_init(cbq_module_init
)
2126 module_exit(cbq_module_exit
)
2127 MODULE_LICENSE("GPL");