GUI: Fix Tomato RAF theme for all builds. Compilation typo.
[tomato.git] / release / src-rt-6.x.4708 / linux / linux-2.6.36 / net / sched / sch_cbq.c
blob74e0af840e04938270ce2948966029b87bfb1c66
1 /*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <net/netlink.h>
21 #include <net/pkt_sched.h>
25 struct cbq_sched_data;
28 struct cbq_class
30 struct Qdisc_class_common common;
31 struct cbq_class *next_alive; /* next class with backlog in this priority band */
33 /* Parameters */
34 unsigned char priority; /* class priority */
35 unsigned char priority2; /* priority to be used after overlimit */
36 unsigned char ewma_log; /* time constant for idle time calculation */
37 unsigned char ovl_strategy;
38 #ifdef CONFIG_NET_CLS_ACT
39 unsigned char police;
40 #endif
42 u32 defmap;
44 /* Link-sharing scheduler parameters */
45 long maxidle; /* Class parameters: see below. */
46 long offtime;
47 long minidle;
48 u32 avpkt;
49 struct qdisc_rate_table *R_tab;
51 /* Overlimit strategy parameters */
52 void (*overlimit)(struct cbq_class *cl);
53 psched_tdiff_t penalty;
55 /* General scheduler (WRR) parameters */
56 long allot;
57 long quantum; /* Allotment per WRR round */
58 long weight; /* Relative allotment: see below */
60 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
61 struct cbq_class *split; /* Ptr to split node */
62 struct cbq_class *share; /* Ptr to LS parent in the class tree */
63 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
64 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
65 parent otherwise */
66 struct cbq_class *sibling; /* Sibling chain */
67 struct cbq_class *children; /* Pointer to children chain */
69 struct Qdisc *q; /* Elementary queueing discipline */
72 /* Variables */
73 unsigned char cpriority; /* Effective priority */
74 unsigned char delayed;
75 unsigned char level; /* level of the class in hierarchy:
76 0 for leaf classes, and maximal
77 level of children + 1 for nodes.
80 psched_time_t last; /* Last end of service */
81 psched_time_t undertime;
82 long avgidle;
83 long deficit; /* Saved deficit for WRR */
84 psched_time_t penalized;
85 struct gnet_stats_basic_packed bstats;
86 struct gnet_stats_queue qstats;
87 struct gnet_stats_rate_est rate_est;
88 struct tc_cbq_xstats xstats;
90 struct tcf_proto *filter_list;
92 int refcnt;
93 int filters;
95 struct cbq_class *defaults[TC_PRIO_MAX+1];
98 struct cbq_sched_data
100 struct Qdisc_class_hash clhash; /* Hash table of all classes */
101 int nclasses[TC_CBQ_MAXPRIO+1];
102 unsigned quanta[TC_CBQ_MAXPRIO+1];
104 struct cbq_class link;
106 unsigned activemask;
107 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
108 with backlog */
110 #ifdef CONFIG_NET_CLS_ACT
111 struct cbq_class *rx_class;
112 #endif
113 struct cbq_class *tx_class;
114 struct cbq_class *tx_borrowed;
115 int tx_len;
116 psched_time_t now; /* Cached timestamp */
117 psched_time_t now_rt; /* Cached real time */
118 unsigned pmask;
120 struct hrtimer delay_timer;
121 struct qdisc_watchdog watchdog; /* Watchdog timer,
122 started when CBQ has
123 backlog, but cannot
124 transmit just now */
125 psched_tdiff_t wd_expires;
126 int toplevel;
127 u32 hgenerator;
131 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
133 static __inline__ struct cbq_class *
134 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
136 struct Qdisc_class_common *clc;
138 clc = qdisc_class_find(&q->clhash, classid);
139 if (clc == NULL)
140 return NULL;
141 return container_of(clc, struct cbq_class, common);
144 #ifdef CONFIG_NET_CLS_ACT
146 static struct cbq_class *
147 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
149 struct cbq_class *cl, *new;
151 for (cl = this->tparent; cl; cl = cl->tparent)
152 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
153 return new;
155 return NULL;
158 #endif
160 /* Classify packet. The procedure is pretty complicated, but
161 it allows us to combine link sharing and priority scheduling
162 transparently.
164 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
165 so that it resolves to split nodes. Then packets are classified
166 by logical priority, or a more specific classifier may be attached
167 to the split node.
170 static struct cbq_class *
171 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
173 struct cbq_sched_data *q = qdisc_priv(sch);
174 struct cbq_class *head = &q->link;
175 struct cbq_class **defmap;
176 struct cbq_class *cl = NULL;
177 u32 prio = skb->priority;
178 struct tcf_result res;
181 * Step 1. If skb->priority points to one of our classes, use it.
183 if (TC_H_MAJ(prio^sch->handle) == 0 &&
184 (cl = cbq_class_lookup(q, prio)) != NULL)
185 return cl;
187 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
188 for (;;) {
189 int result = 0;
190 defmap = head->defaults;
193 * Step 2+n. Apply classifier.
195 if (!head->filter_list ||
196 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
197 goto fallback;
199 if ((cl = (void*)res.class) == NULL) {
200 if (TC_H_MAJ(res.classid))
201 cl = cbq_class_lookup(q, res.classid);
202 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
203 cl = defmap[TC_PRIO_BESTEFFORT];
205 if (cl == NULL || cl->level >= head->level)
206 goto fallback;
209 #ifdef CONFIG_NET_CLS_ACT
210 switch (result) {
211 case TC_ACT_QUEUED:
212 case TC_ACT_STOLEN:
213 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
214 case TC_ACT_SHOT:
215 return NULL;
216 case TC_ACT_RECLASSIFY:
217 return cbq_reclassify(skb, cl);
219 #endif
220 if (cl->level == 0)
221 return cl;
224 * Step 3+n. If classifier selected a link sharing class,
225 * apply agency specific classifier.
226 * Repeat this procdure until we hit a leaf node.
228 head = cl;
231 fallback:
232 cl = head;
235 * Step 4. No success...
237 if (TC_H_MAJ(prio) == 0 &&
238 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
239 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
240 return head;
242 return cl;
246 A packet has just been enqueued on the empty class.
247 cbq_activate_class adds it to the tail of active class list
248 of its priority band.
251 static __inline__ void cbq_activate_class(struct cbq_class *cl)
253 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
254 int prio = cl->cpriority;
255 struct cbq_class *cl_tail;
257 cl_tail = q->active[prio];
258 q->active[prio] = cl;
260 if (cl_tail != NULL) {
261 cl->next_alive = cl_tail->next_alive;
262 cl_tail->next_alive = cl;
263 } else {
264 cl->next_alive = cl;
265 q->activemask |= (1<<prio);
270 Unlink class from active chain.
271 Note that this same procedure is done directly in cbq_dequeue*
272 during round-robin procedure.
275 static void cbq_deactivate_class(struct cbq_class *this)
277 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
278 int prio = this->cpriority;
279 struct cbq_class *cl;
280 struct cbq_class *cl_prev = q->active[prio];
282 do {
283 cl = cl_prev->next_alive;
284 if (cl == this) {
285 cl_prev->next_alive = cl->next_alive;
286 cl->next_alive = NULL;
288 if (cl == q->active[prio]) {
289 q->active[prio] = cl_prev;
290 if (cl == q->active[prio]) {
291 q->active[prio] = NULL;
292 q->activemask &= ~(1<<prio);
293 return;
296 return;
298 } while ((cl_prev = cl) != q->active[prio]);
301 static void
302 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
304 int toplevel = q->toplevel;
306 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
307 psched_time_t now;
308 psched_tdiff_t incr;
310 now = psched_get_time();
311 incr = now - q->now_rt;
312 now = q->now + incr;
314 do {
315 if (cl->undertime < now) {
316 q->toplevel = cl->level;
317 return;
319 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
323 static int
324 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
326 struct cbq_sched_data *q = qdisc_priv(sch);
327 int uninitialized_var(ret);
328 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
330 #ifdef CONFIG_NET_CLS_ACT
331 q->rx_class = cl;
332 #endif
333 if (cl == NULL) {
334 if (ret & __NET_XMIT_BYPASS)
335 sch->qstats.drops++;
336 kfree_skb(skb);
337 return ret;
340 #ifdef CONFIG_NET_CLS_ACT
341 cl->q->__parent = sch;
342 #endif
343 ret = qdisc_enqueue(skb, cl->q);
344 if (ret == NET_XMIT_SUCCESS) {
345 sch->q.qlen++;
346 sch->bstats.packets++;
347 sch->bstats.bytes += qdisc_pkt_len(skb);
348 cbq_mark_toplevel(q, cl);
349 if (!cl->next_alive)
350 cbq_activate_class(cl);
351 return ret;
354 if (net_xmit_drop_count(ret)) {
355 sch->qstats.drops++;
356 cbq_mark_toplevel(q, cl);
357 cl->qstats.drops++;
359 return ret;
362 /* Overlimit actions */
364 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
366 static void cbq_ovl_classic(struct cbq_class *cl)
368 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
369 psched_tdiff_t delay = cl->undertime - q->now;
371 if (!cl->delayed) {
372 delay += cl->offtime;
375 Class goes to sleep, so that it will have no
376 chance to work avgidle. Let's forgive it 8)
378 BTW cbq-2.0 has a crap in this
379 place, apparently they forgot to shift it by cl->ewma_log.
381 if (cl->avgidle < 0)
382 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
383 if (cl->avgidle < cl->minidle)
384 cl->avgidle = cl->minidle;
385 if (delay <= 0)
386 delay = 1;
387 cl->undertime = q->now + delay;
389 cl->xstats.overactions++;
390 cl->delayed = 1;
392 if (q->wd_expires == 0 || q->wd_expires > delay)
393 q->wd_expires = delay;
395 /* Dirty work! We must schedule wakeups based on
396 real available rate, rather than leaf rate,
397 which may be tiny (even zero).
399 if (q->toplevel == TC_CBQ_MAXLEVEL) {
400 struct cbq_class *b;
401 psched_tdiff_t base_delay = q->wd_expires;
403 for (b = cl->borrow; b; b = b->borrow) {
404 delay = b->undertime - q->now;
405 if (delay < base_delay) {
406 if (delay <= 0)
407 delay = 1;
408 base_delay = delay;
412 q->wd_expires = base_delay;
416 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
417 they go overlimit
420 static void cbq_ovl_rclassic(struct cbq_class *cl)
422 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
423 struct cbq_class *this = cl;
425 do {
426 if (cl->level > q->toplevel) {
427 cl = NULL;
428 break;
430 } while ((cl = cl->borrow) != NULL);
432 if (cl == NULL)
433 cl = this;
434 cbq_ovl_classic(cl);
437 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
439 static void cbq_ovl_delay(struct cbq_class *cl)
441 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
442 psched_tdiff_t delay = cl->undertime - q->now;
444 if (test_bit(__QDISC_STATE_DEACTIVATED,
445 &qdisc_root_sleeping(cl->qdisc)->state))
446 return;
448 if (!cl->delayed) {
449 psched_time_t sched = q->now;
450 ktime_t expires;
452 delay += cl->offtime;
453 if (cl->avgidle < 0)
454 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
455 if (cl->avgidle < cl->minidle)
456 cl->avgidle = cl->minidle;
457 cl->undertime = q->now + delay;
459 if (delay > 0) {
460 sched += delay + cl->penalty;
461 cl->penalized = sched;
462 cl->cpriority = TC_CBQ_MAXPRIO;
463 q->pmask |= (1<<TC_CBQ_MAXPRIO);
465 expires = ktime_set(0, 0);
466 expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
467 if (hrtimer_try_to_cancel(&q->delay_timer) &&
468 ktime_to_ns(ktime_sub(
469 hrtimer_get_expires(&q->delay_timer),
470 expires)) > 0)
471 hrtimer_set_expires(&q->delay_timer, expires);
472 hrtimer_restart(&q->delay_timer);
473 cl->delayed = 1;
474 cl->xstats.overactions++;
475 return;
477 delay = 1;
479 if (q->wd_expires == 0 || q->wd_expires > delay)
480 q->wd_expires = delay;
483 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
485 static void cbq_ovl_lowprio(struct cbq_class *cl)
487 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
489 cl->penalized = q->now + cl->penalty;
491 if (cl->cpriority != cl->priority2) {
492 cl->cpriority = cl->priority2;
493 q->pmask |= (1<<cl->cpriority);
494 cl->xstats.overactions++;
496 cbq_ovl_classic(cl);
499 /* TC_CBQ_OVL_DROP: penalize class by dropping */
501 static void cbq_ovl_drop(struct cbq_class *cl)
503 if (cl->q->ops->drop)
504 if (cl->q->ops->drop(cl->q))
505 cl->qdisc->q.qlen--;
506 cl->xstats.overactions++;
507 cbq_ovl_classic(cl);
510 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
511 psched_time_t now)
513 struct cbq_class *cl;
514 struct cbq_class *cl_prev = q->active[prio];
515 psched_time_t sched = now;
517 if (cl_prev == NULL)
518 return 0;
520 do {
521 cl = cl_prev->next_alive;
522 if (now - cl->penalized > 0) {
523 cl_prev->next_alive = cl->next_alive;
524 cl->next_alive = NULL;
525 cl->cpriority = cl->priority;
526 cl->delayed = 0;
527 cbq_activate_class(cl);
529 if (cl == q->active[prio]) {
530 q->active[prio] = cl_prev;
531 if (cl == q->active[prio]) {
532 q->active[prio] = NULL;
533 return 0;
537 cl = cl_prev->next_alive;
538 } else if (sched - cl->penalized > 0)
539 sched = cl->penalized;
540 } while ((cl_prev = cl) != q->active[prio]);
542 return sched - now;
545 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
547 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
548 delay_timer);
549 struct Qdisc *sch = q->watchdog.qdisc;
550 psched_time_t now;
551 psched_tdiff_t delay = 0;
552 unsigned pmask;
554 now = psched_get_time();
556 pmask = q->pmask;
557 q->pmask = 0;
559 while (pmask) {
560 int prio = ffz(~pmask);
561 psched_tdiff_t tmp;
563 pmask &= ~(1<<prio);
565 tmp = cbq_undelay_prio(q, prio, now);
566 if (tmp > 0) {
567 q->pmask |= 1<<prio;
568 if (tmp < delay || delay == 0)
569 delay = tmp;
573 if (delay) {
574 ktime_t time;
576 time = ktime_set(0, 0);
577 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
578 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
581 sch->flags &= ~TCQ_F_THROTTLED;
582 __netif_schedule(qdisc_root(sch));
583 return HRTIMER_NORESTART;
586 #ifdef CONFIG_NET_CLS_ACT
587 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
589 struct Qdisc *sch = child->__parent;
590 struct cbq_sched_data *q = qdisc_priv(sch);
591 struct cbq_class *cl = q->rx_class;
593 q->rx_class = NULL;
595 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
596 int ret;
598 cbq_mark_toplevel(q, cl);
600 q->rx_class = cl;
601 cl->q->__parent = sch;
603 ret = qdisc_enqueue(skb, cl->q);
604 if (ret == NET_XMIT_SUCCESS) {
605 sch->q.qlen++;
606 sch->bstats.packets++;
607 sch->bstats.bytes += qdisc_pkt_len(skb);
608 if (!cl->next_alive)
609 cbq_activate_class(cl);
610 return 0;
612 if (net_xmit_drop_count(ret))
613 sch->qstats.drops++;
614 return 0;
617 sch->qstats.drops++;
618 return -1;
620 #endif
623 It is mission critical procedure.
625 We "regenerate" toplevel cutoff, if transmitting class
626 has backlog and it is not regulated. It is not part of
627 original CBQ description, but looks more reasonable.
628 Probably, it is wrong. This question needs further investigation.
631 static __inline__ void
632 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
633 struct cbq_class *borrowed)
635 if (cl && q->toplevel >= borrowed->level) {
636 if (cl->q->q.qlen > 1) {
637 do {
638 if (borrowed->undertime == PSCHED_PASTPERFECT) {
639 q->toplevel = borrowed->level;
640 return;
642 } while ((borrowed=borrowed->borrow) != NULL);
647 static void
648 cbq_update(struct cbq_sched_data *q)
650 struct cbq_class *this = q->tx_class;
651 struct cbq_class *cl = this;
652 int len = q->tx_len;
654 q->tx_class = NULL;
656 for ( ; cl; cl = cl->share) {
657 long avgidle = cl->avgidle;
658 long idle;
660 cl->bstats.packets++;
661 cl->bstats.bytes += len;
664 (now - last) is total time between packet right edges.
665 (last_pktlen/rate) is "virtual" busy time, so that
667 idle = (now - last) - last_pktlen/rate
670 idle = q->now - cl->last;
671 if ((unsigned long)idle > 128*1024*1024) {
672 avgidle = cl->maxidle;
673 } else {
674 idle -= L2T(cl, len);
676 /* true_avgidle := (1-W)*true_avgidle + W*idle,
677 where W=2^{-ewma_log}. But cl->avgidle is scaled:
678 cl->avgidle == true_avgidle/W,
679 hence:
681 avgidle += idle - (avgidle>>cl->ewma_log);
684 if (avgidle <= 0) {
685 /* Overlimit or at-limit */
687 if (avgidle < cl->minidle)
688 avgidle = cl->minidle;
690 cl->avgidle = avgidle;
692 /* Calculate expected time, when this class
693 will be allowed to send.
694 It will occur, when:
695 (1-W)*true_avgidle + W*delay = 0, i.e.
696 idle = (1/W - 1)*(-true_avgidle)
698 idle = (1 - W)*(-cl->avgidle);
700 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
703 That is not all.
704 To maintain the rate allocated to the class,
705 we add to undertime virtual clock,
706 necessary to complete transmitted packet.
707 (len/phys_bandwidth has been already passed
708 to the moment of cbq_update)
711 idle -= L2T(&q->link, len);
712 idle += L2T(cl, len);
714 cl->undertime = q->now + idle;
715 } else {
716 /* Underlimit */
718 cl->undertime = PSCHED_PASTPERFECT;
719 if (avgidle > cl->maxidle)
720 cl->avgidle = cl->maxidle;
721 else
722 cl->avgidle = avgidle;
724 cl->last = q->now;
727 cbq_update_toplevel(q, this, q->tx_borrowed);
730 static __inline__ struct cbq_class *
731 cbq_under_limit(struct cbq_class *cl)
733 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
734 struct cbq_class *this_cl = cl;
736 if (cl->tparent == NULL)
737 return cl;
739 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
740 cl->delayed = 0;
741 return cl;
744 do {
745 /* It is very suspicious place. Now overlimit
746 action is generated for not bounded classes
747 only if link is completely congested.
748 Though it is in agree with ancestor-only paradigm,
749 it looks very stupid. Particularly,
750 it means that this chunk of code will either
751 never be called or result in strong amplification
752 of burstiness. Dangerous, silly, and, however,
753 no another solution exists.
755 if ((cl = cl->borrow) == NULL) {
756 this_cl->qstats.overlimits++;
757 this_cl->overlimit(this_cl);
758 return NULL;
760 if (cl->level > q->toplevel)
761 return NULL;
762 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
764 cl->delayed = 0;
765 return cl;
768 static __inline__ struct sk_buff *
769 cbq_dequeue_prio(struct Qdisc *sch, int prio)
771 struct cbq_sched_data *q = qdisc_priv(sch);
772 struct cbq_class *cl_tail, *cl_prev, *cl;
773 struct sk_buff *skb;
774 int deficit;
776 cl_tail = cl_prev = q->active[prio];
777 cl = cl_prev->next_alive;
779 do {
780 deficit = 0;
782 /* Start round */
783 do {
784 struct cbq_class *borrow = cl;
786 if (cl->q->q.qlen &&
787 (borrow = cbq_under_limit(cl)) == NULL)
788 goto skip_class;
790 if (cl->deficit <= 0) {
791 /* Class exhausted its allotment per
792 this round. Switch to the next one.
794 deficit = 1;
795 cl->deficit += cl->quantum;
796 goto next_class;
799 skb = cl->q->dequeue(cl->q);
801 /* Class did not give us any skb :-(
802 It could occur even if cl->q->q.qlen != 0
803 f.e. if cl->q == "tbf"
805 if (skb == NULL)
806 goto skip_class;
808 cl->deficit -= qdisc_pkt_len(skb);
809 q->tx_class = cl;
810 q->tx_borrowed = borrow;
811 if (borrow != cl) {
812 #ifndef CBQ_XSTATS_BORROWS_BYTES
813 borrow->xstats.borrows++;
814 cl->xstats.borrows++;
815 #else
816 borrow->xstats.borrows += qdisc_pkt_len(skb);
817 cl->xstats.borrows += qdisc_pkt_len(skb);
818 #endif
820 q->tx_len = qdisc_pkt_len(skb);
822 if (cl->deficit <= 0) {
823 q->active[prio] = cl;
824 cl = cl->next_alive;
825 cl->deficit += cl->quantum;
827 return skb;
829 skip_class:
830 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
831 /* Class is empty or penalized.
832 Unlink it from active chain.
834 cl_prev->next_alive = cl->next_alive;
835 cl->next_alive = NULL;
837 /* Did cl_tail point to it? */
838 if (cl == cl_tail) {
839 /* Repair it! */
840 cl_tail = cl_prev;
842 /* Was it the last class in this band? */
843 if (cl == cl_tail) {
844 /* Kill the band! */
845 q->active[prio] = NULL;
846 q->activemask &= ~(1<<prio);
847 if (cl->q->q.qlen)
848 cbq_activate_class(cl);
849 return NULL;
852 q->active[prio] = cl_tail;
854 if (cl->q->q.qlen)
855 cbq_activate_class(cl);
857 cl = cl_prev;
860 next_class:
861 cl_prev = cl;
862 cl = cl->next_alive;
863 } while (cl_prev != cl_tail);
864 } while (deficit);
866 q->active[prio] = cl_prev;
868 return NULL;
871 static __inline__ struct sk_buff *
872 cbq_dequeue_1(struct Qdisc *sch)
874 struct cbq_sched_data *q = qdisc_priv(sch);
875 struct sk_buff *skb;
876 unsigned activemask;
878 activemask = q->activemask&0xFF;
879 while (activemask) {
880 int prio = ffz(~activemask);
881 activemask &= ~(1<<prio);
882 skb = cbq_dequeue_prio(sch, prio);
883 if (skb)
884 return skb;
886 return NULL;
889 static struct sk_buff *
890 cbq_dequeue(struct Qdisc *sch)
892 struct sk_buff *skb;
893 struct cbq_sched_data *q = qdisc_priv(sch);
894 psched_time_t now;
895 psched_tdiff_t incr;
897 now = psched_get_time();
898 incr = now - q->now_rt;
900 if (q->tx_class) {
901 psched_tdiff_t incr2;
902 /* Time integrator. We calculate EOS time
903 by adding expected packet transmission time.
904 If real time is greater, we warp artificial clock,
905 so that:
907 cbq_time = max(real_time, work);
909 incr2 = L2T(&q->link, q->tx_len);
910 q->now += incr2;
911 cbq_update(q);
912 if ((incr -= incr2) < 0)
913 incr = 0;
915 q->now += incr;
916 q->now_rt = now;
918 for (;;) {
919 q->wd_expires = 0;
921 skb = cbq_dequeue_1(sch);
922 if (skb) {
923 sch->q.qlen--;
924 sch->flags &= ~TCQ_F_THROTTLED;
925 return skb;
928 /* All the classes are overlimit.
930 It is possible, if:
932 1. Scheduler is empty.
933 2. Toplevel cutoff inhibited borrowing.
934 3. Root class is overlimit.
936 Reset 2d and 3d conditions and retry.
938 Note, that NS and cbq-2.0 are buggy, peeking
939 an arbitrary class is appropriate for ancestor-only
940 sharing, but not for toplevel algorithm.
942 Our version is better, but slower, because it requires
943 two passes, but it is unavoidable with top-level sharing.
946 if (q->toplevel == TC_CBQ_MAXLEVEL &&
947 q->link.undertime == PSCHED_PASTPERFECT)
948 break;
950 q->toplevel = TC_CBQ_MAXLEVEL;
951 q->link.undertime = PSCHED_PASTPERFECT;
954 /* No packets in scheduler or nobody wants to give them to us :-(
955 Sigh... start watchdog timer in the last case. */
957 if (sch->q.qlen) {
958 sch->qstats.overlimits++;
959 if (q->wd_expires)
960 qdisc_watchdog_schedule(&q->watchdog,
961 now + q->wd_expires);
963 return NULL;
966 /* CBQ class maintanance routines */
968 static void cbq_adjust_levels(struct cbq_class *this)
970 if (this == NULL)
971 return;
973 do {
974 int level = 0;
975 struct cbq_class *cl;
977 if ((cl = this->children) != NULL) {
978 do {
979 if (cl->level > level)
980 level = cl->level;
981 } while ((cl = cl->sibling) != this->children);
983 this->level = level+1;
984 } while ((this = this->tparent) != NULL);
987 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
989 struct cbq_class *cl;
990 struct hlist_node *n;
991 unsigned int h;
993 if (q->quanta[prio] == 0)
994 return;
996 for (h = 0; h < q->clhash.hashsize; h++) {
997 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
998 /* BUGGGG... Beware! This expression suffer of
999 arithmetic overflows!
1001 if (cl->priority == prio) {
1002 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1003 q->quanta[prio];
1005 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1006 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1007 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1013 static void cbq_sync_defmap(struct cbq_class *cl)
1015 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1016 struct cbq_class *split = cl->split;
1017 unsigned h;
1018 int i;
1020 if (split == NULL)
1021 return;
1023 for (i=0; i<=TC_PRIO_MAX; i++) {
1024 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1025 split->defaults[i] = NULL;
1028 for (i=0; i<=TC_PRIO_MAX; i++) {
1029 int level = split->level;
1031 if (split->defaults[i])
1032 continue;
1034 for (h = 0; h < q->clhash.hashsize; h++) {
1035 struct hlist_node *n;
1036 struct cbq_class *c;
1038 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1039 common.hnode) {
1040 if (c->split == split && c->level < level &&
1041 c->defmap&(1<<i)) {
1042 split->defaults[i] = c;
1043 level = c->level;
1050 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1052 struct cbq_class *split = NULL;
1054 if (splitid == 0) {
1055 if ((split = cl->split) == NULL)
1056 return;
1057 splitid = split->common.classid;
1060 if (split == NULL || split->common.classid != splitid) {
1061 for (split = cl->tparent; split; split = split->tparent)
1062 if (split->common.classid == splitid)
1063 break;
1066 if (split == NULL)
1067 return;
1069 if (cl->split != split) {
1070 cl->defmap = 0;
1071 cbq_sync_defmap(cl);
1072 cl->split = split;
1073 cl->defmap = def&mask;
1074 } else
1075 cl->defmap = (cl->defmap&~mask)|(def&mask);
1077 cbq_sync_defmap(cl);
1080 static void cbq_unlink_class(struct cbq_class *this)
1082 struct cbq_class *cl, **clp;
1083 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1085 qdisc_class_hash_remove(&q->clhash, &this->common);
1087 if (this->tparent) {
1088 clp=&this->sibling;
1089 cl = *clp;
1090 do {
1091 if (cl == this) {
1092 *clp = cl->sibling;
1093 break;
1095 clp = &cl->sibling;
1096 } while ((cl = *clp) != this->sibling);
1098 if (this->tparent->children == this) {
1099 this->tparent->children = this->sibling;
1100 if (this->sibling == this)
1101 this->tparent->children = NULL;
1103 } else {
1104 WARN_ON(this->sibling != this);
1108 static void cbq_link_class(struct cbq_class *this)
1110 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1111 struct cbq_class *parent = this->tparent;
1113 this->sibling = this;
1114 qdisc_class_hash_insert(&q->clhash, &this->common);
1116 if (parent == NULL)
1117 return;
1119 if (parent->children == NULL) {
1120 parent->children = this;
1121 } else {
1122 this->sibling = parent->children->sibling;
1123 parent->children->sibling = this;
1127 static unsigned int cbq_drop(struct Qdisc* sch)
1129 struct cbq_sched_data *q = qdisc_priv(sch);
1130 struct cbq_class *cl, *cl_head;
1131 int prio;
1132 unsigned int len;
1134 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1135 if ((cl_head = q->active[prio]) == NULL)
1136 continue;
1138 cl = cl_head;
1139 do {
1140 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1141 sch->q.qlen--;
1142 if (!cl->q->q.qlen)
1143 cbq_deactivate_class(cl);
1144 return len;
1146 } while ((cl = cl->next_alive) != cl_head);
1148 return 0;
1151 static void
1152 cbq_reset(struct Qdisc* sch)
1154 struct cbq_sched_data *q = qdisc_priv(sch);
1155 struct cbq_class *cl;
1156 struct hlist_node *n;
1157 int prio;
1158 unsigned h;
1160 q->activemask = 0;
1161 q->pmask = 0;
1162 q->tx_class = NULL;
1163 q->tx_borrowed = NULL;
1164 qdisc_watchdog_cancel(&q->watchdog);
1165 hrtimer_cancel(&q->delay_timer);
1166 q->toplevel = TC_CBQ_MAXLEVEL;
1167 q->now = psched_get_time();
1168 q->now_rt = q->now;
1170 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1171 q->active[prio] = NULL;
1173 for (h = 0; h < q->clhash.hashsize; h++) {
1174 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1175 qdisc_reset(cl->q);
1177 cl->next_alive = NULL;
1178 cl->undertime = PSCHED_PASTPERFECT;
1179 cl->avgidle = cl->maxidle;
1180 cl->deficit = cl->quantum;
1181 cl->cpriority = cl->priority;
1184 sch->q.qlen = 0;
1188 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1190 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1191 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1192 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1194 if (lss->change&TCF_CBQ_LSS_EWMA)
1195 cl->ewma_log = lss->ewma_log;
1196 if (lss->change&TCF_CBQ_LSS_AVPKT)
1197 cl->avpkt = lss->avpkt;
1198 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1199 cl->minidle = -(long)lss->minidle;
1200 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1201 cl->maxidle = lss->maxidle;
1202 cl->avgidle = lss->maxidle;
1204 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1205 cl->offtime = lss->offtime;
1206 return 0;
1209 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1211 q->nclasses[cl->priority]--;
1212 q->quanta[cl->priority] -= cl->weight;
1213 cbq_normalize_quanta(q, cl->priority);
1216 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1218 q->nclasses[cl->priority]++;
1219 q->quanta[cl->priority] += cl->weight;
1220 cbq_normalize_quanta(q, cl->priority);
1223 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1225 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1227 if (wrr->allot)
1228 cl->allot = wrr->allot;
1229 if (wrr->weight)
1230 cl->weight = wrr->weight;
1231 if (wrr->priority) {
1232 cl->priority = wrr->priority-1;
1233 cl->cpriority = cl->priority;
1234 if (cl->priority >= cl->priority2)
1235 cl->priority2 = TC_CBQ_MAXPRIO-1;
1238 cbq_addprio(q, cl);
1239 return 0;
1242 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1244 switch (ovl->strategy) {
1245 case TC_CBQ_OVL_CLASSIC:
1246 cl->overlimit = cbq_ovl_classic;
1247 break;
1248 case TC_CBQ_OVL_DELAY:
1249 cl->overlimit = cbq_ovl_delay;
1250 break;
1251 case TC_CBQ_OVL_LOWPRIO:
1252 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1253 ovl->priority2-1 <= cl->priority)
1254 return -EINVAL;
1255 cl->priority2 = ovl->priority2-1;
1256 cl->overlimit = cbq_ovl_lowprio;
1257 break;
1258 case TC_CBQ_OVL_DROP:
1259 cl->overlimit = cbq_ovl_drop;
1260 break;
1261 case TC_CBQ_OVL_RCLASSIC:
1262 cl->overlimit = cbq_ovl_rclassic;
1263 break;
1264 default:
1265 return -EINVAL;
1267 cl->penalty = ovl->penalty;
1268 return 0;
1271 #ifdef CONFIG_NET_CLS_ACT
1272 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1274 cl->police = p->police;
1276 if (cl->q->handle) {
1277 if (p->police == TC_POLICE_RECLASSIFY)
1278 cl->q->reshape_fail = cbq_reshape_fail;
1279 else
1280 cl->q->reshape_fail = NULL;
1282 return 0;
1284 #endif
1286 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1288 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1289 return 0;
1292 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1293 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1294 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1295 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1296 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1297 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1298 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1299 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1302 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1304 struct cbq_sched_data *q = qdisc_priv(sch);
1305 struct nlattr *tb[TCA_CBQ_MAX + 1];
1306 struct tc_ratespec *r;
1307 int err;
1309 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1310 if (err < 0)
1311 return err;
1313 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1314 return -EINVAL;
1316 r = nla_data(tb[TCA_CBQ_RATE]);
1318 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1319 return -EINVAL;
1321 err = qdisc_class_hash_init(&q->clhash);
1322 if (err < 0)
1323 goto put_rtab;
1325 q->link.refcnt = 1;
1326 q->link.sibling = &q->link;
1327 q->link.common.classid = sch->handle;
1328 q->link.qdisc = sch;
1329 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1330 &pfifo_qdisc_ops,
1331 sch->handle)))
1332 q->link.q = &noop_qdisc;
1334 q->link.priority = TC_CBQ_MAXPRIO-1;
1335 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1336 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1337 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1338 q->link.overlimit = cbq_ovl_classic;
1339 q->link.allot = psched_mtu(qdisc_dev(sch));
1340 q->link.quantum = q->link.allot;
1341 q->link.weight = q->link.R_tab->rate.rate;
1343 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1344 q->link.avpkt = q->link.allot/2;
1345 q->link.minidle = -0x7FFFFFFF;
1347 qdisc_watchdog_init(&q->watchdog, sch);
1348 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1349 q->delay_timer.function = cbq_undelay;
1350 q->toplevel = TC_CBQ_MAXLEVEL;
1351 q->now = psched_get_time();
1352 q->now_rt = q->now;
1354 cbq_link_class(&q->link);
1356 if (tb[TCA_CBQ_LSSOPT])
1357 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1359 cbq_addprio(q, &q->link);
1360 return 0;
1362 put_rtab:
1363 qdisc_put_rtab(q->link.R_tab);
1364 return err;
1367 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1369 unsigned char *b = skb_tail_pointer(skb);
1371 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1372 return skb->len;
1374 nla_put_failure:
1375 nlmsg_trim(skb, b);
1376 return -1;
1379 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1381 unsigned char *b = skb_tail_pointer(skb);
1382 struct tc_cbq_lssopt opt;
1384 opt.flags = 0;
1385 if (cl->borrow == NULL)
1386 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1387 if (cl->share == NULL)
1388 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1389 opt.ewma_log = cl->ewma_log;
1390 opt.level = cl->level;
1391 opt.avpkt = cl->avpkt;
1392 opt.maxidle = cl->maxidle;
1393 opt.minidle = (u32)(-cl->minidle);
1394 opt.offtime = cl->offtime;
1395 opt.change = ~0;
1396 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1397 return skb->len;
1399 nla_put_failure:
1400 nlmsg_trim(skb, b);
1401 return -1;
1404 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1406 unsigned char *b = skb_tail_pointer(skb);
1407 struct tc_cbq_wrropt opt;
1409 opt.flags = 0;
1410 opt.allot = cl->allot;
1411 opt.priority = cl->priority+1;
1412 opt.cpriority = cl->cpriority+1;
1413 opt.weight = cl->weight;
1414 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1415 return skb->len;
1417 nla_put_failure:
1418 nlmsg_trim(skb, b);
1419 return -1;
1422 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1424 unsigned char *b = skb_tail_pointer(skb);
1425 struct tc_cbq_ovl opt;
1427 opt.strategy = cl->ovl_strategy;
1428 opt.priority2 = cl->priority2+1;
1429 opt.pad = 0;
1430 opt.penalty = cl->penalty;
1431 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1432 return skb->len;
1434 nla_put_failure:
1435 nlmsg_trim(skb, b);
1436 return -1;
1439 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1441 unsigned char *b = skb_tail_pointer(skb);
1442 struct tc_cbq_fopt opt;
1444 if (cl->split || cl->defmap) {
1445 opt.split = cl->split ? cl->split->common.classid : 0;
1446 opt.defmap = cl->defmap;
1447 opt.defchange = ~0;
1448 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1450 return skb->len;
1452 nla_put_failure:
1453 nlmsg_trim(skb, b);
1454 return -1;
1457 #ifdef CONFIG_NET_CLS_ACT
1458 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1460 unsigned char *b = skb_tail_pointer(skb);
1461 struct tc_cbq_police opt;
1463 if (cl->police) {
1464 opt.police = cl->police;
1465 opt.__res1 = 0;
1466 opt.__res2 = 0;
1467 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1469 return skb->len;
1471 nla_put_failure:
1472 nlmsg_trim(skb, b);
1473 return -1;
1475 #endif
1477 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1479 if (cbq_dump_lss(skb, cl) < 0 ||
1480 cbq_dump_rate(skb, cl) < 0 ||
1481 cbq_dump_wrr(skb, cl) < 0 ||
1482 cbq_dump_ovl(skb, cl) < 0 ||
1483 #ifdef CONFIG_NET_CLS_ACT
1484 cbq_dump_police(skb, cl) < 0 ||
1485 #endif
1486 cbq_dump_fopt(skb, cl) < 0)
1487 return -1;
1488 return 0;
1491 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1493 struct cbq_sched_data *q = qdisc_priv(sch);
1494 struct nlattr *nest;
1496 nest = nla_nest_start(skb, TCA_OPTIONS);
1497 if (nest == NULL)
1498 goto nla_put_failure;
1499 if (cbq_dump_attr(skb, &q->link) < 0)
1500 goto nla_put_failure;
1501 nla_nest_end(skb, nest);
1502 return skb->len;
1504 nla_put_failure:
1505 nla_nest_cancel(skb, nest);
1506 return -1;
1509 static int
1510 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1512 struct cbq_sched_data *q = qdisc_priv(sch);
1514 q->link.xstats.avgidle = q->link.avgidle;
1515 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1518 static int
1519 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1520 struct sk_buff *skb, struct tcmsg *tcm)
1522 struct cbq_class *cl = (struct cbq_class*)arg;
1523 struct nlattr *nest;
1525 if (cl->tparent)
1526 tcm->tcm_parent = cl->tparent->common.classid;
1527 else
1528 tcm->tcm_parent = TC_H_ROOT;
1529 tcm->tcm_handle = cl->common.classid;
1530 tcm->tcm_info = cl->q->handle;
1532 nest = nla_nest_start(skb, TCA_OPTIONS);
1533 if (nest == NULL)
1534 goto nla_put_failure;
1535 if (cbq_dump_attr(skb, cl) < 0)
1536 goto nla_put_failure;
1537 nla_nest_end(skb, nest);
1538 return skb->len;
1540 nla_put_failure:
1541 nla_nest_cancel(skb, nest);
1542 return -1;
1545 static int
1546 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1547 struct gnet_dump *d)
1549 struct cbq_sched_data *q = qdisc_priv(sch);
1550 struct cbq_class *cl = (struct cbq_class*)arg;
1552 cl->qstats.qlen = cl->q->q.qlen;
1553 cl->xstats.avgidle = cl->avgidle;
1554 cl->xstats.undertime = 0;
1556 if (cl->undertime != PSCHED_PASTPERFECT)
1557 cl->xstats.undertime = cl->undertime - q->now;
1559 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1560 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1561 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1562 return -1;
1564 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1567 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1568 struct Qdisc **old)
1570 struct cbq_class *cl = (struct cbq_class*)arg;
1572 if (new == NULL) {
1573 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1574 &pfifo_qdisc_ops, cl->common.classid);
1575 if (new == NULL)
1576 return -ENOBUFS;
1577 } else {
1578 #ifdef CONFIG_NET_CLS_ACT
1579 if (cl->police == TC_POLICE_RECLASSIFY)
1580 new->reshape_fail = cbq_reshape_fail;
1581 #endif
1583 sch_tree_lock(sch);
1584 *old = cl->q;
1585 cl->q = new;
1586 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1587 qdisc_reset(*old);
1588 sch_tree_unlock(sch);
1590 return 0;
1593 static struct Qdisc *
1594 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1596 struct cbq_class *cl = (struct cbq_class*)arg;
1598 return cl->q;
1601 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1603 struct cbq_class *cl = (struct cbq_class *)arg;
1605 if (cl->q->q.qlen == 0)
1606 cbq_deactivate_class(cl);
1609 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1611 struct cbq_sched_data *q = qdisc_priv(sch);
1612 struct cbq_class *cl = cbq_class_lookup(q, classid);
1614 if (cl) {
1615 cl->refcnt++;
1616 return (unsigned long)cl;
1618 return 0;
1621 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1623 struct cbq_sched_data *q = qdisc_priv(sch);
1625 WARN_ON(cl->filters);
1627 tcf_destroy_chain(&cl->filter_list);
1628 qdisc_destroy(cl->q);
1629 qdisc_put_rtab(cl->R_tab);
1630 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1631 if (cl != &q->link)
1632 kfree(cl);
1635 static void
1636 cbq_destroy(struct Qdisc* sch)
1638 struct cbq_sched_data *q = qdisc_priv(sch);
1639 struct hlist_node *n, *next;
1640 struct cbq_class *cl;
1641 unsigned h;
1643 #ifdef CONFIG_NET_CLS_ACT
1644 q->rx_class = NULL;
1645 #endif
1647 * Filters must be destroyed first because we don't destroy the
1648 * classes from root to leafs which means that filters can still
1649 * be bound to classes which have been destroyed already. --TGR '04
1651 for (h = 0; h < q->clhash.hashsize; h++) {
1652 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1653 tcf_destroy_chain(&cl->filter_list);
1655 for (h = 0; h < q->clhash.hashsize; h++) {
1656 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1657 common.hnode)
1658 cbq_destroy_class(sch, cl);
1660 qdisc_class_hash_destroy(&q->clhash);
1663 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1665 struct cbq_class *cl = (struct cbq_class*)arg;
1667 if (--cl->refcnt == 0) {
1668 #ifdef CONFIG_NET_CLS_ACT
1669 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1670 struct cbq_sched_data *q = qdisc_priv(sch);
1672 spin_lock_bh(root_lock);
1673 if (q->rx_class == cl)
1674 q->rx_class = NULL;
1675 spin_unlock_bh(root_lock);
1676 #endif
1678 cbq_destroy_class(sch, cl);
1682 static int
1683 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1684 unsigned long *arg)
1686 int err;
1687 struct cbq_sched_data *q = qdisc_priv(sch);
1688 struct cbq_class *cl = (struct cbq_class*)*arg;
1689 struct nlattr *opt = tca[TCA_OPTIONS];
1690 struct nlattr *tb[TCA_CBQ_MAX + 1];
1691 struct cbq_class *parent;
1692 struct qdisc_rate_table *rtab = NULL;
1694 if (opt == NULL)
1695 return -EINVAL;
1697 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1698 if (err < 0)
1699 return err;
1701 if (cl) {
1702 /* Check parent */
1703 if (parentid) {
1704 if (cl->tparent &&
1705 cl->tparent->common.classid != parentid)
1706 return -EINVAL;
1707 if (!cl->tparent && parentid != TC_H_ROOT)
1708 return -EINVAL;
1711 if (tb[TCA_CBQ_RATE]) {
1712 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1713 tb[TCA_CBQ_RTAB]);
1714 if (rtab == NULL)
1715 return -EINVAL;
1718 if (tca[TCA_RATE]) {
1719 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1720 qdisc_root_sleeping_lock(sch),
1721 tca[TCA_RATE]);
1722 if (err) {
1723 if (rtab)
1724 qdisc_put_rtab(rtab);
1725 return err;
1729 /* Change class parameters */
1730 sch_tree_lock(sch);
1732 if (cl->next_alive != NULL)
1733 cbq_deactivate_class(cl);
1735 if (rtab) {
1736 qdisc_put_rtab(cl->R_tab);
1737 cl->R_tab = rtab;
1740 if (tb[TCA_CBQ_LSSOPT])
1741 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1743 if (tb[TCA_CBQ_WRROPT]) {
1744 cbq_rmprio(q, cl);
1745 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1748 if (tb[TCA_CBQ_OVL_STRATEGY])
1749 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1751 #ifdef CONFIG_NET_CLS_ACT
1752 if (tb[TCA_CBQ_POLICE])
1753 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1754 #endif
1756 if (tb[TCA_CBQ_FOPT])
1757 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1759 if (cl->q->q.qlen)
1760 cbq_activate_class(cl);
1762 sch_tree_unlock(sch);
1764 return 0;
1767 if (parentid == TC_H_ROOT)
1768 return -EINVAL;
1770 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1771 tb[TCA_CBQ_LSSOPT] == NULL)
1772 return -EINVAL;
1774 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1775 if (rtab == NULL)
1776 return -EINVAL;
1778 if (classid) {
1779 err = -EINVAL;
1780 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1781 goto failure;
1782 } else {
1783 int i;
1784 classid = TC_H_MAKE(sch->handle,0x8000);
1786 for (i=0; i<0x8000; i++) {
1787 if (++q->hgenerator >= 0x8000)
1788 q->hgenerator = 1;
1789 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1790 break;
1792 err = -ENOSR;
1793 if (i >= 0x8000)
1794 goto failure;
1795 classid = classid|q->hgenerator;
1798 parent = &q->link;
1799 if (parentid) {
1800 parent = cbq_class_lookup(q, parentid);
1801 err = -EINVAL;
1802 if (parent == NULL)
1803 goto failure;
1806 err = -ENOBUFS;
1807 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1808 if (cl == NULL)
1809 goto failure;
1811 if (tca[TCA_RATE]) {
1812 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1813 qdisc_root_sleeping_lock(sch),
1814 tca[TCA_RATE]);
1815 if (err) {
1816 kfree(cl);
1817 goto failure;
1821 cl->R_tab = rtab;
1822 rtab = NULL;
1823 cl->refcnt = 1;
1824 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1825 &pfifo_qdisc_ops, classid)))
1826 cl->q = &noop_qdisc;
1827 cl->common.classid = classid;
1828 cl->tparent = parent;
1829 cl->qdisc = sch;
1830 cl->allot = parent->allot;
1831 cl->quantum = cl->allot;
1832 cl->weight = cl->R_tab->rate.rate;
1834 sch_tree_lock(sch);
1835 cbq_link_class(cl);
1836 cl->borrow = cl->tparent;
1837 if (cl->tparent != &q->link)
1838 cl->share = cl->tparent;
1839 cbq_adjust_levels(parent);
1840 cl->minidle = -0x7FFFFFFF;
1841 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1842 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1843 if (cl->ewma_log==0)
1844 cl->ewma_log = q->link.ewma_log;
1845 if (cl->maxidle==0)
1846 cl->maxidle = q->link.maxidle;
1847 if (cl->avpkt==0)
1848 cl->avpkt = q->link.avpkt;
1849 cl->overlimit = cbq_ovl_classic;
1850 if (tb[TCA_CBQ_OVL_STRATEGY])
1851 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1852 #ifdef CONFIG_NET_CLS_ACT
1853 if (tb[TCA_CBQ_POLICE])
1854 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1855 #endif
1856 if (tb[TCA_CBQ_FOPT])
1857 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1858 sch_tree_unlock(sch);
1860 qdisc_class_hash_grow(sch, &q->clhash);
1862 *arg = (unsigned long)cl;
1863 return 0;
1865 failure:
1866 qdisc_put_rtab(rtab);
1867 return err;
1870 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1872 struct cbq_sched_data *q = qdisc_priv(sch);
1873 struct cbq_class *cl = (struct cbq_class*)arg;
1874 unsigned int qlen;
1876 if (cl->filters || cl->children || cl == &q->link)
1877 return -EBUSY;
1879 sch_tree_lock(sch);
1881 qlen = cl->q->q.qlen;
1882 qdisc_reset(cl->q);
1883 qdisc_tree_decrease_qlen(cl->q, qlen);
1885 if (cl->next_alive)
1886 cbq_deactivate_class(cl);
1888 if (q->tx_borrowed == cl)
1889 q->tx_borrowed = q->tx_class;
1890 if (q->tx_class == cl) {
1891 q->tx_class = NULL;
1892 q->tx_borrowed = NULL;
1894 #ifdef CONFIG_NET_CLS_ACT
1895 if (q->rx_class == cl)
1896 q->rx_class = NULL;
1897 #endif
1899 cbq_unlink_class(cl);
1900 cbq_adjust_levels(cl->tparent);
1901 cl->defmap = 0;
1902 cbq_sync_defmap(cl);
1904 cbq_rmprio(q, cl);
1905 sch_tree_unlock(sch);
1907 BUG_ON(--cl->refcnt == 0);
1909 * This shouldn't happen: we "hold" one cops->get() when called
1910 * from tc_ctl_tclass; the destroy method is done from cops->put().
1913 return 0;
1916 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1918 struct cbq_sched_data *q = qdisc_priv(sch);
1919 struct cbq_class *cl = (struct cbq_class *)arg;
1921 if (cl == NULL)
1922 cl = &q->link;
1924 return &cl->filter_list;
1927 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1928 u32 classid)
1930 struct cbq_sched_data *q = qdisc_priv(sch);
1931 struct cbq_class *p = (struct cbq_class*)parent;
1932 struct cbq_class *cl = cbq_class_lookup(q, classid);
1934 if (cl) {
1935 if (p && p->level <= cl->level)
1936 return 0;
1937 cl->filters++;
1938 return (unsigned long)cl;
1940 return 0;
1943 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1945 struct cbq_class *cl = (struct cbq_class*)arg;
1947 cl->filters--;
1950 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1952 struct cbq_sched_data *q = qdisc_priv(sch);
1953 struct cbq_class *cl;
1954 struct hlist_node *n;
1955 unsigned h;
1957 if (arg->stop)
1958 return;
1960 for (h = 0; h < q->clhash.hashsize; h++) {
1961 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1962 if (arg->count < arg->skip) {
1963 arg->count++;
1964 continue;
1966 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1967 arg->stop = 1;
1968 return;
1970 arg->count++;
1975 static const struct Qdisc_class_ops cbq_class_ops = {
1976 .graft = cbq_graft,
1977 .leaf = cbq_leaf,
1978 .qlen_notify = cbq_qlen_notify,
1979 .get = cbq_get,
1980 .put = cbq_put,
1981 .change = cbq_change_class,
1982 .delete = cbq_delete,
1983 .walk = cbq_walk,
1984 .tcf_chain = cbq_find_tcf,
1985 .bind_tcf = cbq_bind_filter,
1986 .unbind_tcf = cbq_unbind_filter,
1987 .dump = cbq_dump_class,
1988 .dump_stats = cbq_dump_class_stats,
1991 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1992 .next = NULL,
1993 .cl_ops = &cbq_class_ops,
1994 .id = "cbq",
1995 .priv_size = sizeof(struct cbq_sched_data),
1996 .enqueue = cbq_enqueue,
1997 .dequeue = cbq_dequeue,
1998 .peek = qdisc_peek_dequeued,
1999 .drop = cbq_drop,
2000 .init = cbq_init,
2001 .reset = cbq_reset,
2002 .destroy = cbq_destroy,
2003 .change = NULL,
2004 .dump = cbq_dump,
2005 .dump_stats = cbq_dump_stats,
2006 .owner = THIS_MODULE,
2009 static int __init cbq_module_init(void)
2011 return register_qdisc(&cbq_qdisc_ops);
2013 static void __exit cbq_module_exit(void)
2015 unregister_qdisc(&cbq_qdisc_ops);
2017 module_init(cbq_module_init)
2018 module_exit(cbq_module_exit)
2019 MODULE_LICENSE("GPL");