Merge with 2.5.75.
[linux-2.6/linux-mips.git] / net / sched / sch_htb.c
blob6063ef43e1738b58b2004123e2427b6a584500c3
1 /* vim: ts=8 sw=8
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Martin Devera, <devik@cdi.cz>
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
14 * Ondrej Kraus, <krauso@barr.cz>
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * and many others. thanks.
24 * $Id: sch_htb.c,v 1.20 2003/06/18 19:55:49 devik Exp devik $
26 #include <linux/config.h>
27 #include <linux/module.h>
28 #include <asm/uaccess.h>
29 #include <asm/system.h>
30 #include <asm/bitops.h>
31 #include <linux/types.h>
32 #include <linux/kernel.h>
33 #include <linux/version.h>
34 #include <linux/sched.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_ether.h>
43 #include <linux/inet.h>
44 #include <linux/netdevice.h>
45 #include <linux/etherdevice.h>
46 #include <linux/notifier.h>
47 #include <net/ip.h>
48 #include <net/route.h>
49 #include <linux/skbuff.h>
50 #include <linux/list.h>
51 #include <linux/compiler.h>
52 #include <net/sock.h>
53 #include <net/pkt_sched.h>
54 #include <linux/rbtree.h>
56 /* HTB algorithm.
57 Author: devik@cdi.cz
58 ========================================================================
59 HTB is like TBF with multiple classes. It is also similar to CBQ because
60 it allows to assign priority to each class in hierarchy.
61 In fact it is another implementation of Floyd's formal sharing.
63 Levels:
64 Each class is assigned level. Leaf has ALWAYS level 0 and root
65 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
66 one less than their parent.
69 #define HTB_HSIZE 16 /* classid hash size */
70 #define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
71 #define HTB_DEBUG 1 /* compile debugging support (activated by tc tool) */
72 #define HTB_RATECM 1 /* whether to use rate computer */
73 #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
74 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
75 #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
76 #define HTB_VER 0x3000c /* major must be matched with number suplied by TC as version */
78 #if HTB_VER >> 16 != TC_HTB_PROTOVER
79 #error "Mismatched sch_htb.c and pkt_sch.h"
80 #endif
82 /* debugging support; S is subsystem, these are defined:
83 0 - netlink messages
84 1 - enqueue
85 2 - drop & requeue
86 3 - dequeue main
87 4 - dequeue one prio DRR part
88 5 - dequeue class accounting
89 6 - class overlimit status computation
90 7 - hint tree
91 8 - event queue
92 10 - rate estimator
93 11 - classifier
94 12 - fast dequeue cache
96 L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
97 q->debug uint32 contains 16 2-bit fields one for subsystem starting
98 from LSB
100 #ifdef HTB_DEBUG
101 #define HTB_DBG(S,L,FMT,ARG...) if (((q->debug>>(2*S))&3) >= L) \
102 printk(KERN_DEBUG FMT,##ARG)
103 #define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
104 #define HTB_PASSQ q,
105 #define HTB_ARGQ struct htb_sched *q,
106 #define static
107 #undef __inline__
108 #define __inline__
109 #undef inline
110 #define inline
111 #define HTB_CMAGIC 0xFEFAFEF1
112 #define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
113 if ((N)->rb_color == -1) break; \
114 rb_erase(N,R); \
115 (N)->rb_color = -1; } while (0)
116 #else
117 #define HTB_DBG(S,L,FMT,ARG...)
118 #define HTB_PASSQ
119 #define HTB_ARGQ
120 #define HTB_CHCL(cl)
121 #define htb_safe_rb_erase(N,R) rb_erase(N,R)
122 #endif
125 /* used internaly to keep status of single class */
126 enum htb_cmode {
127 HTB_CANT_SEND, /* class can't send and can't borrow */
128 HTB_MAY_BORROW, /* class can't send but may borrow */
129 HTB_CAN_SEND /* class can send */
132 /* interior & leaf nodes; props specific to leaves are marked L: */
133 struct htb_class
135 #ifdef HTB_DEBUG
136 unsigned magic;
137 #endif
138 /* general class parameters */
139 u32 classid;
140 struct tc_stats stats; /* generic stats */
141 struct tc_htb_xstats xstats;/* our special stats */
142 int refcnt; /* usage count of this class */
144 #ifdef HTB_RATECM
145 /* rate measurement counters */
146 unsigned long rate_bytes,sum_bytes;
147 unsigned long rate_packets,sum_packets;
148 #endif
150 /* topology */
151 int level; /* our level (see above) */
152 struct htb_class *parent; /* parent class */
153 struct list_head hlist; /* classid hash list item */
154 struct list_head sibling; /* sibling list item */
155 struct list_head children; /* children list */
157 union {
158 struct htb_class_leaf {
159 struct Qdisc *q;
160 int prio;
161 int aprio;
162 int quantum;
163 int deficit[TC_HTB_MAXDEPTH];
164 struct list_head drop_list;
165 } leaf;
166 struct htb_class_inner {
167 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
168 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
169 } inner;
170 } un;
171 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
172 struct rb_node pq_node; /* node for event queue */
173 unsigned long pq_key; /* the same type as jiffies global */
175 int prio_activity; /* for which prios are we active */
176 enum htb_cmode cmode; /* current mode of the class */
178 /* class attached filters */
179 struct tcf_proto *filter_list;
180 int filter_cnt;
182 int warned; /* only one warning about non work conserving .. */
184 /* token bucket parameters */
185 struct qdisc_rate_table *rate; /* rate table of the class itself */
186 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
187 long buffer,cbuffer; /* token bucket depth/rate */
188 long mbuffer; /* max wait time */
189 long tokens,ctokens; /* current number of tokens */
190 psched_time_t t_c; /* checkpoint time */
193 /* TODO: maybe compute rate when size is too large .. or drop ? */
194 static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
195 int size)
197 int slot = size >> rate->rate.cell_log;
198 if (slot > 255) {
199 cl->xstats.giants++;
200 slot = 255;
202 return rate->data[slot];
205 struct htb_sched
207 struct list_head root; /* root classes list */
208 struct list_head hash[HTB_HSIZE]; /* hashed by classid */
209 struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */
211 /* self list - roots of self generating tree */
212 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
213 int row_mask[TC_HTB_MAXDEPTH];
214 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
216 /* self wait list - roots of wait PQs per row */
217 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
219 /* time of nearest event per level (row) */
220 unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
222 /* cached value of jiffies in dequeue */
223 unsigned long jiffies;
225 /* whether we hit non-work conserving class during this dequeue; we use */
226 int nwc_hit; /* this to disable mindelay complaint in dequeue */
228 int defcls; /* class where unclassified flows go to */
229 u32 debug; /* subsystem debug levels */
231 /* filters for qdisc itself */
232 struct tcf_proto *filter_list;
233 int filter_cnt;
235 int rate2quantum; /* quant = rate / rate2quantum */
236 psched_time_t now; /* cached dequeue time */
237 struct timer_list timer; /* send delay timer */
238 #ifdef HTB_RATECM
239 struct timer_list rttim; /* rate computer timer */
240 int recmp_bucket; /* which hash bucket to recompute next */
241 #endif
243 /* non shaped skbs; let them go directly thru */
244 struct sk_buff_head direct_queue;
245 int direct_qlen; /* max qlen of above */
247 long direct_pkts;
250 /* compute hash of size HTB_HSIZE for given handle */
251 static __inline__ int htb_hash(u32 h)
253 #if HTB_HSIZE != 16
254 #error "Declare new hash for your HTB_HSIZE"
255 #endif
256 h ^= h>>8; /* stolen from cbq_hash */
257 h ^= h>>4;
258 return h & 0xf;
261 /* find class in global hash table using given handle */
262 static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
264 struct htb_sched *q = (struct htb_sched *)sch->data;
265 struct list_head *p;
266 if (TC_H_MAJ(handle) != sch->handle)
267 return NULL;
269 list_for_each (p,q->hash+htb_hash(handle)) {
270 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
271 if (cl->classid == handle)
272 return cl;
274 return NULL;
278 * htb_classify - classify a packet into class
280 * It returns NULL if the packet should be dropped or -1 if the packet
281 * should be passed directly thru. In all other cases leaf class is returned.
282 * We allow direct class selection by classid in priority. The we examine
283 * filters in qdisc and in inner nodes (if higher filter points to the inner
284 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
285 * internal fifo (direct). These packets then go directly thru. If we still
286 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
287 * then finish and return direct queue.
289 #define HTB_DIRECT (struct htb_class*)-1
290 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch)
292 struct htb_sched *q = (struct htb_sched *)sch->data;
293 struct htb_class *cl;
294 struct tcf_result res;
295 struct tcf_proto *tcf;
296 int result;
298 /* allow to select class by setting skb->priority to valid classid;
299 note that nfmark can be used too by attaching filter fw with no
300 rules in it */
301 if (skb->priority == sch->handle)
302 return HTB_DIRECT; /* X:0 (direct flow) selected */
303 if ((cl = htb_find(skb->priority,sch)) != NULL)
304 return cl;
306 tcf = q->filter_list;
307 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
308 #ifdef CONFIG_NET_CLS_POLICE
309 if (result == TC_POLICE_SHOT)
310 return NULL;
311 #endif
312 if ((cl = (void*)res.class) == NULL) {
313 if (res.classid == sch->handle)
314 return HTB_DIRECT; /* X:0 (direct flow) */
315 if ((cl = htb_find(res.classid,sch)) == NULL)
316 break; /* filter selected invalid classid */
318 if (!cl->level)
319 return cl; /* we hit leaf; return it */
321 /* we have got inner class; apply inner filter chain */
322 tcf = cl->filter_list;
324 /* classification failed; try to use default class */
325 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
326 if (!cl || cl->level)
327 return HTB_DIRECT; /* bad default .. this is safe bet */
328 return cl;
331 #ifdef HTB_DEBUG
332 static void htb_next_rb_node(struct rb_node **n);
333 #define HTB_DUMTREE(root,memb) if(root) { \
334 struct rb_node *n = (root)->rb_node; \
335 while (n->rb_left) n = n->rb_left; \
336 while (n) { \
337 struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
338 printk(" %x",cl->classid); htb_next_rb_node (&n); \
341 static void htb_debug_dump (struct htb_sched *q)
343 int i,p;
344 printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
345 /* rows */
346 for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
347 printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
348 for (p=0;p<TC_HTB_NUMPRIO;p++) {
349 if (!q->row[i][p].rb_node) continue;
350 printk(" p%d:",p);
351 HTB_DUMTREE(q->row[i]+p,node[p]);
353 printk("\n");
355 /* classes */
356 for (i = 0; i < HTB_HSIZE; i++) {
357 struct list_head *l;
358 list_for_each (l,q->hash+i) {
359 struct htb_class *cl = list_entry(l,struct htb_class,hlist);
360 long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
361 printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
362 "pa=%x f:",
363 cl->classid,cl->cmode,cl->tokens,cl->ctokens,
364 cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
365 cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
366 if (cl->level)
367 for (p=0;p<TC_HTB_NUMPRIO;p++) {
368 if (!cl->un.inner.feed[p].rb_node) continue;
369 printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
370 HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
372 printk("\n");
376 #endif
378 * htb_add_to_id_tree - adds class to the round robin list
380 * Routine adds class to the list (actually tree) sorted by classid.
381 * Make sure that class is not already on such list for given prio.
383 static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
384 struct htb_class *cl,int prio)
386 struct rb_node **p = &root->rb_node, *parent = NULL;
387 HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
388 #ifdef HTB_DEBUG
389 if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
390 HTB_CHCL(cl);
391 if (*p) {
392 struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
393 HTB_CHCL(x);
395 #endif
396 while (*p) {
397 struct htb_class *c; parent = *p;
398 c = rb_entry(parent, struct htb_class, node[prio]);
399 HTB_CHCL(c);
400 if (cl->classid > c->classid)
401 p = &parent->rb_right;
402 else
403 p = &parent->rb_left;
405 rb_link_node(&cl->node[prio], parent, p);
406 rb_insert_color(&cl->node[prio], root);
410 * htb_add_to_wait_tree - adds class to the event queue with delay
412 * The class is added to priority event queue to indicate that class will
413 * change its mode in cl->pq_key microseconds. Make sure that class is not
414 * already in the queue.
416 static void htb_add_to_wait_tree (struct htb_sched *q,
417 struct htb_class *cl,long delay,int debug_hint)
419 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
420 HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
421 #ifdef HTB_DEBUG
422 if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
423 HTB_CHCL(cl);
424 if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
425 printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
426 #endif
427 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
428 if (cl->pq_key == q->jiffies)
429 cl->pq_key++;
431 /* update the nearest event cache */
432 if (q->near_ev_cache[cl->level] - cl->pq_key < 0x80000000)
433 q->near_ev_cache[cl->level] = cl->pq_key;
435 while (*p) {
436 struct htb_class *c; parent = *p;
437 c = rb_entry(parent, struct htb_class, pq_node);
438 if (cl->pq_key - c->pq_key < 0x80000000)
439 p = &parent->rb_right;
440 else
441 p = &parent->rb_left;
443 rb_link_node(&cl->pq_node, parent, p);
444 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
448 * htb_next_rb_node - finds next node in binary tree
450 * When we are past last key we return NULL.
451 * Average complexity is 2 steps per call.
453 static void htb_next_rb_node(struct rb_node **n)
455 *n = rb_next(*n);
459 * htb_add_class_to_row - add class to its row
461 * The class is added to row at priorities marked in mask.
462 * It does nothing if mask == 0.
464 static inline void htb_add_class_to_row(struct htb_sched *q,
465 struct htb_class *cl,int mask)
467 HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
468 cl->classid,mask,q->row_mask[cl->level]);
469 HTB_CHCL(cl);
470 q->row_mask[cl->level] |= mask;
471 while (mask) {
472 int prio = ffz(~mask);
473 mask &= ~(1 << prio);
474 htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
479 * htb_remove_class_from_row - removes class from its row
481 * The class is removed from row at priorities marked in mask.
482 * It does nothing if mask == 0.
484 static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
485 struct htb_class *cl,int mask)
487 int m = 0;
488 HTB_CHCL(cl);
489 while (mask) {
490 int prio = ffz(~mask);
491 mask &= ~(1 << prio);
492 if (q->ptr[cl->level][prio] == cl->node+prio)
493 htb_next_rb_node(q->ptr[cl->level]+prio);
494 htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
495 if (!q->row[cl->level][prio].rb_node)
496 m |= 1 << prio;
498 HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
499 cl->classid,mask,q->row_mask[cl->level],m);
500 q->row_mask[cl->level] &= ~m;
504 * htb_activate_prios - creates active classe's feed chain
506 * The class is connected to ancestors and/or appropriate rows
507 * for priorities it is participating on. cl->cmode must be new
508 * (activated) mode. It does nothing if cl->prio_activity == 0.
510 static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
512 struct htb_class *p = cl->parent;
513 long m,mask = cl->prio_activity;
514 HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
515 HTB_CHCL(cl);
517 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
518 HTB_CHCL(p);
519 m = mask; while (m) {
520 int prio = ffz(~m);
521 m &= ~(1 << prio);
523 if (p->un.inner.feed[prio].rb_node)
524 /* parent already has its feed in use so that
525 reset bit in mask as parent is already ok */
526 mask &= ~(1 << prio);
528 htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
530 HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
531 p->classid,p->prio_activity,mask,p->cmode);
532 p->prio_activity |= mask;
533 cl = p; p = cl->parent;
534 HTB_CHCL(cl);
536 if (cl->cmode == HTB_CAN_SEND && mask)
537 htb_add_class_to_row(q,cl,mask);
541 * htb_deactivate_prios - remove class from feed chain
543 * cl->cmode must represent old mode (before deactivation). It does
544 * nothing if cl->prio_activity == 0. Class is removed from all feed
545 * chains and rows.
547 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
549 struct htb_class *p = cl->parent;
550 long m,mask = cl->prio_activity;
551 HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
552 HTB_CHCL(cl);
554 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
555 m = mask; mask = 0;
556 while (m) {
557 int prio = ffz(~m);
558 m &= ~(1 << prio);
560 if (p->un.inner.ptr[prio] == cl->node+prio)
561 htb_next_rb_node(p->un.inner.ptr + prio);
563 htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
565 if (!p->un.inner.feed[prio].rb_node)
566 mask |= 1 << prio;
568 HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
569 p->classid,p->prio_activity,mask,p->cmode);
570 p->prio_activity &= ~mask;
571 cl = p; p = cl->parent;
572 HTB_CHCL(cl);
574 if (cl->cmode == HTB_CAN_SEND && mask)
575 htb_remove_class_from_row(q,cl,mask);
579 * htb_class_mode - computes and returns current class mode
581 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
582 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
583 * from now to time when cl will change its state.
584 * Also it is worth to note that class mode doesn't change simply
585 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
586 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
587 * mode transitions per time unit. The speed gain is about 1/6.
589 static __inline__ enum htb_cmode
590 htb_class_mode(struct htb_class *cl,long *diff)
592 long toks;
594 if ((toks = (cl->ctokens + *diff)) < (
595 #if HTB_HYSTERESIS
596 cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
597 #endif
598 0)) {
599 *diff = -toks;
600 return HTB_CANT_SEND;
602 if ((toks = (cl->tokens + *diff)) >= (
603 #if HTB_HYSTERESIS
604 cl->cmode == HTB_CAN_SEND ? -cl->buffer :
605 #endif
607 return HTB_CAN_SEND;
609 *diff = -toks;
610 return HTB_MAY_BORROW;
614 * htb_change_class_mode - changes classe's mode
616 * This should be the only way how to change classe's mode under normal
617 * cirsumstances. Routine will update feed lists linkage, change mode
618 * and add class to the wait event queue if appropriate. New mode should
619 * be different from old one and cl->pq_key has to be valid if changing
620 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
622 static void
623 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
625 enum htb_cmode new_mode = htb_class_mode(cl,diff);
627 HTB_CHCL(cl);
628 HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
630 if (new_mode == cl->cmode)
631 return;
633 if (cl->prio_activity) { /* not necessary: speed optimization */
634 if (cl->cmode != HTB_CANT_SEND)
635 htb_deactivate_prios(q,cl);
636 cl->cmode = new_mode;
637 if (new_mode != HTB_CANT_SEND)
638 htb_activate_prios(q,cl);
639 } else
640 cl->cmode = new_mode;
644 * htb_activate - inserts leaf cl into appropriate active feeds
646 * Routine learns (new) priority of leaf and activates feed chain
647 * for the prio. It can be called on already active leaf safely.
648 * It also adds leaf into droplist.
650 static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
652 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
653 HTB_CHCL(cl);
654 if (!cl->prio_activity) {
655 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
656 htb_activate_prios(q,cl);
657 list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
662 * htb_deactivate - remove leaf cl from active feeds
664 * Make sure that leaf is active. In the other words it can't be called
665 * with non-active leaf. It also removes class from the drop list.
667 static __inline__ void
668 htb_deactivate(struct htb_sched *q,struct htb_class *cl)
670 BUG_TRAP(cl->prio_activity);
671 HTB_CHCL(cl);
672 htb_deactivate_prios(q,cl);
673 cl->prio_activity = 0;
674 list_del_init(&cl->un.leaf.drop_list);
677 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
679 struct htb_sched *q = (struct htb_sched *)sch->data;
680 struct htb_class *cl = htb_classify(skb,sch);
682 if (cl == HTB_DIRECT || !cl) {
683 /* enqueue to helper queue */
684 if (q->direct_queue.qlen < q->direct_qlen && cl) {
685 __skb_queue_tail(&q->direct_queue, skb);
686 q->direct_pkts++;
687 } else {
688 kfree_skb (skb);
689 sch->stats.drops++;
690 return NET_XMIT_DROP;
692 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
693 sch->stats.drops++;
694 cl->stats.drops++;
695 return NET_XMIT_DROP;
696 } else {
697 cl->stats.packets++; cl->stats.bytes += skb->len;
698 htb_activate (q,cl);
701 sch->q.qlen++;
702 sch->stats.packets++; sch->stats.bytes += skb->len;
703 HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
704 return NET_XMIT_SUCCESS;
707 /* TODO: requeuing packet charges it to policers again !! */
708 static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
710 struct htb_sched *q = (struct htb_sched *)sch->data;
711 struct htb_class *cl = htb_classify(skb,sch);
713 if (cl == HTB_DIRECT || !cl) {
714 /* enqueue to helper queue */
715 if (q->direct_queue.qlen < q->direct_qlen && cl) {
716 __skb_queue_tail(&q->direct_queue, skb);
717 q->direct_pkts++;
718 } else {
719 kfree_skb (skb);
720 sch->stats.drops++;
721 return NET_XMIT_DROP;
723 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
724 sch->stats.drops++;
725 cl->stats.drops++;
726 return NET_XMIT_DROP;
727 } else
728 htb_activate (q,cl);
730 sch->q.qlen++;
731 HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
732 return NET_XMIT_SUCCESS;
735 static void htb_timer(unsigned long arg)
737 struct Qdisc *sch = (struct Qdisc*)arg;
738 sch->flags &= ~TCQ_F_THROTTLED;
739 wmb();
740 netif_schedule(sch->dev);
743 #ifdef HTB_RATECM
744 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
745 static void htb_rate_timer(unsigned long arg)
747 struct Qdisc *sch = (struct Qdisc*)arg;
748 struct htb_sched *q = (struct htb_sched *)sch->data;
749 struct list_head *p;
751 /* lock queue so that we can muck with it */
752 HTB_QLOCK(sch);
753 HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
755 q->rttim.expires = jiffies + HZ;
756 add_timer(&q->rttim);
758 /* scan and recompute one bucket at time */
759 if (++q->recmp_bucket >= HTB_HSIZE)
760 q->recmp_bucket = 0;
761 list_for_each (p,q->hash+q->recmp_bucket) {
762 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
763 HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
764 cl->classid,cl->sum_bytes,cl->sum_packets);
765 RT_GEN (cl->sum_bytes,cl->rate_bytes);
766 RT_GEN (cl->sum_packets,cl->rate_packets);
768 HTB_QUNLOCK(sch);
770 #endif
773 * htb_charge_class - charges amount "bytes" to leaf and ancestors
775 * Routine assumes that packet "bytes" long was dequeued from leaf cl
776 * borrowing from "level". It accounts bytes to ceil leaky bucket for
777 * leaf and all ancestors and to rate bucket for ancestors at levels
778 * "level" and higher. It also handles possible change of mode resulting
779 * from the update. Note that mode can also increase here (MAY_BORROW to
780 * CAN_SEND) because we can use more precise clock that event queue here.
781 * In such case we remove class from event queue first.
783 static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
784 int level,int bytes)
786 long toks,diff;
787 enum htb_cmode old_mode;
788 HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
790 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
791 if (toks > cl->B) toks = cl->B; \
792 toks -= L2T(cl, cl->R, bytes); \
793 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
794 cl->T = toks
796 while (cl) {
797 HTB_CHCL(cl);
798 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
799 #ifdef HTB_DEBUG
800 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
801 if (net_ratelimit())
802 printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
803 cl->classid, diff,
804 (unsigned long long) q->now,
805 (unsigned long long) cl->t_c,
806 q->jiffies);
807 diff = 1000;
809 #endif
810 if (cl->level >= level) {
811 if (cl->level == level) cl->xstats.lends++;
812 HTB_ACCNT (tokens,buffer,rate);
813 } else {
814 cl->xstats.borrows++;
815 cl->tokens += diff; /* we moved t_c; update tokens */
817 HTB_ACCNT (ctokens,cbuffer,ceil);
818 cl->t_c = q->now;
819 HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
821 old_mode = cl->cmode; diff = 0;
822 htb_change_class_mode(q,cl,&diff);
823 if (old_mode != cl->cmode) {
824 if (old_mode != HTB_CAN_SEND)
825 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
826 if (cl->cmode != HTB_CAN_SEND)
827 htb_add_to_wait_tree (q,cl,diff,1);
830 #ifdef HTB_RATECM
831 /* update rate counters */
832 cl->sum_bytes += bytes; cl->sum_packets++;
833 #endif
835 /* update byte stats except for leaves which are already updated */
836 if (cl->level) {
837 cl->stats.bytes += bytes;
838 cl->stats.packets++;
840 cl = cl->parent;
845 * htb_do_events - make mode changes to classes at the level
847 * Scans event queue for pending events and applies them. Returns jiffies to
848 * next pending event (0 for no event in pq).
849 * Note: Aplied are events whose have cl->pq_key <= jiffies.
851 static long htb_do_events(struct htb_sched *q,int level)
853 int i;
854 HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
855 level,q->wait_pq[level].rb_node,q->row_mask[level]);
856 for (i = 0; i < 500; i++) {
857 struct htb_class *cl;
858 long diff;
859 struct rb_node *p = q->wait_pq[level].rb_node;
860 if (!p) return 0;
861 while (p->rb_left) p = p->rb_left;
863 cl = rb_entry(p, struct htb_class, pq_node);
864 if (cl->pq_key - (q->jiffies+1) < 0x80000000) {
865 HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
866 return cl->pq_key - q->jiffies;
868 htb_safe_rb_erase(p,q->wait_pq+level);
869 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
870 #ifdef HTB_DEBUG
871 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
872 if (net_ratelimit())
873 printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
874 cl->classid, diff,
875 (unsigned long long) q->now,
876 (unsigned long long) cl->t_c,
877 q->jiffies);
878 diff = 1000;
880 #endif
881 htb_change_class_mode(q,cl,&diff);
882 if (cl->cmode != HTB_CAN_SEND)
883 htb_add_to_wait_tree (q,cl,diff,2);
885 if (net_ratelimit())
886 printk(KERN_WARNING "htb: too many events !\n");
887 return HZ/10;
891 * htb_lookup_leaf - returns next leaf class in DRR order
893 * Find leaf where current feed pointers points to.
895 static struct htb_class *
896 htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr)
898 int i;
899 struct {
900 struct rb_node *root;
901 struct rb_node **pptr;
902 } stk[TC_HTB_MAXDEPTH],*sp = stk;
904 sp->root = tree->rb_node;
905 sp->pptr = pptr;
907 for (i = 0; i < 65535; i++) {
908 if (!*sp->pptr) { /* we are at right end; rewind & go up */
909 *sp->pptr = sp->root;
910 while ((*sp->pptr)->rb_left)
911 *sp->pptr = (*sp->pptr)->rb_left;
912 if (sp > stk) {
913 sp--;
914 BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
915 htb_next_rb_node (sp->pptr);
917 } else {
918 struct htb_class *cl;
919 cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
920 HTB_CHCL(cl);
921 if (!cl->level)
922 return cl;
923 (++sp)->root = cl->un.inner.feed[prio].rb_node;
924 sp->pptr = cl->un.inner.ptr+prio;
927 BUG_TRAP(0);
928 return NULL;
931 /* dequeues packet at given priority and level; call only if
932 you are sure that there is active class at prio/level */
933 static struct sk_buff *
934 htb_dequeue_tree(struct htb_sched *q,int prio,int level)
936 struct sk_buff *skb = NULL;
937 //struct htb_sched *q = (struct htb_sched *)sch->data;
938 struct htb_class *cl,*start;
939 /* look initial class up in the row */
940 start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
942 do {
943 BUG_TRAP(cl && cl->un.leaf.q->q.qlen); if (!cl) return NULL;
944 HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
945 prio,level,cl->classid,cl->un.leaf.deficit[level]);
947 if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL))
948 break;
949 if (!cl->warned) {
950 printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
951 cl->warned = 1;
953 q->nwc_hit++;
954 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
955 cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
956 } while (cl != start);
958 if (likely(skb != NULL)) {
959 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
960 HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
961 level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
962 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
963 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
965 /* this used to be after charge_class but this constelation
966 gives us slightly better performance */
967 if (!cl->un.leaf.q->q.qlen)
968 htb_deactivate (q,cl);
969 htb_charge_class (q,cl,level,skb->len);
971 return skb;
974 static void htb_delay_by(struct Qdisc *sch,long delay)
976 struct htb_sched *q = (struct htb_sched *)sch->data;
977 if (netif_queue_stopped(sch->dev)) return;
978 if (delay <= 0) delay = 1;
979 if (unlikely(delay > 5*HZ)) {
980 if (net_ratelimit())
981 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
982 delay = 5*HZ;
984 /* why don't use jiffies here ? because expires can be in past */
985 mod_timer(&q->timer, q->jiffies + delay);
986 sch->flags |= TCQ_F_THROTTLED;
987 sch->stats.overlimits++;
988 HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
991 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
993 struct sk_buff *skb = NULL;
994 struct htb_sched *q = (struct htb_sched *)sch->data;
995 int level;
996 long min_delay;
997 #ifdef HTB_DEBUG
998 int evs_used = 0;
999 #endif
1001 q->jiffies = jiffies;
1002 HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
1003 sch->q.qlen);
1005 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
1006 if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
1007 sch->flags &= ~TCQ_F_THROTTLED;
1008 sch->q.qlen--;
1009 return skb;
1012 if (!sch->q.qlen) goto fin;
1013 PSCHED_GET_TIME(q->now);
1015 min_delay = LONG_MAX;
1016 q->nwc_hit = 0;
1017 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
1018 /* common case optimization - skip event handler quickly */
1019 int m;
1020 long delay;
1021 if (q->jiffies - q->near_ev_cache[level] < 0x80000000 || 0) {
1022 delay = htb_do_events(q,level);
1023 q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
1024 #ifdef HTB_DEBUG
1025 evs_used++;
1026 #endif
1027 } else
1028 delay = q->near_ev_cache[level] - q->jiffies;
1030 if (delay && min_delay > delay)
1031 min_delay = delay;
1032 m = ~q->row_mask[level];
1033 while (m != (int)(-1)) {
1034 int prio = ffz (m);
1035 m |= 1 << prio;
1036 skb = htb_dequeue_tree(q,prio,level);
1037 if (likely(skb != NULL)) {
1038 sch->q.qlen--;
1039 sch->flags &= ~TCQ_F_THROTTLED;
1040 goto fin;
1044 #ifdef HTB_DEBUG
1045 if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
1046 if (min_delay == LONG_MAX) {
1047 printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
1048 evs_used,q->jiffies,jiffies);
1049 htb_debug_dump(q);
1050 } else
1051 printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
1052 "too small rate\n",min_delay);
1054 #endif
1055 htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
1056 fin:
1057 HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
1058 return skb;
1061 /* try to drop from each class (by prio) until one succeed */
1062 static unsigned int htb_drop(struct Qdisc* sch)
1064 struct htb_sched *q = (struct htb_sched *)sch->data;
1065 int prio;
1067 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1068 struct list_head *p;
1069 list_for_each (p,q->drops+prio) {
1070 struct htb_class *cl = list_entry(p, struct htb_class,
1071 un.leaf.drop_list);
1072 unsigned int len;
1073 if (cl->un.leaf.q->ops->drop &&
1074 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1075 sch->q.qlen--;
1076 if (!cl->un.leaf.q->q.qlen)
1077 htb_deactivate (q,cl);
1078 return len;
1082 return 0;
1085 /* reset all classes */
1086 /* always caled under BH & queue lock */
1087 static void htb_reset(struct Qdisc* sch)
1089 struct htb_sched *q = (struct htb_sched *)sch->data;
1090 int i;
1091 HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
1093 for (i = 0; i < HTB_HSIZE; i++) {
1094 struct list_head *p;
1095 list_for_each (p,q->hash+i) {
1096 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1097 if (cl->level)
1098 memset(&cl->un.inner,0,sizeof(cl->un.inner));
1099 else {
1100 if (cl->un.leaf.q)
1101 qdisc_reset(cl->un.leaf.q);
1102 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1104 cl->prio_activity = 0;
1105 cl->cmode = HTB_CAN_SEND;
1106 #ifdef HTB_DEBUG
1107 cl->pq_node.rb_color = -1;
1108 memset(cl->node,255,sizeof(cl->node));
1109 #endif
1113 sch->flags &= ~TCQ_F_THROTTLED;
1114 del_timer(&q->timer);
1115 __skb_queue_purge(&q->direct_queue);
1116 sch->q.qlen = 0;
1117 memset(q->row,0,sizeof(q->row));
1118 memset(q->row_mask,0,sizeof(q->row_mask));
1119 memset(q->wait_pq,0,sizeof(q->wait_pq));
1120 memset(q->ptr,0,sizeof(q->ptr));
1121 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1122 INIT_LIST_HEAD(q->drops+i);
1125 static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1127 struct htb_sched *q = (struct htb_sched*)sch->data;
1128 struct rtattr *tb[TCA_HTB_INIT];
1129 struct tc_htb_glob *gopt;
1130 int i;
1131 #ifdef HTB_DEBUG
1132 printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
1133 HTB_VER >> 16,HTB_VER & 0xffff);
1134 #endif
1135 if (!opt || rtattr_parse(tb, TCA_HTB_INIT, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1136 tb[TCA_HTB_INIT-1] == NULL ||
1137 RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
1138 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1139 return -EINVAL;
1141 gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
1142 if (gopt->version != HTB_VER >> 16) {
1143 printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1144 HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
1145 return -EINVAL;
1147 memset(q,0,sizeof(*q));
1148 q->debug = gopt->debug;
1149 HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
1151 INIT_LIST_HEAD(&q->root);
1152 for (i = 0; i < HTB_HSIZE; i++)
1153 INIT_LIST_HEAD(q->hash+i);
1154 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1155 INIT_LIST_HEAD(q->drops+i);
1157 init_timer(&q->timer);
1158 skb_queue_head_init(&q->direct_queue);
1160 q->direct_qlen = sch->dev->tx_queue_len;
1161 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1162 q->direct_qlen = 2;
1163 q->timer.function = htb_timer;
1164 q->timer.data = (unsigned long)sch;
1166 #ifdef HTB_RATECM
1167 init_timer(&q->rttim);
1168 q->rttim.function = htb_rate_timer;
1169 q->rttim.data = (unsigned long)sch;
1170 q->rttim.expires = jiffies + HZ;
1171 add_timer(&q->rttim);
1172 #endif
1173 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1174 q->rate2quantum = 1;
1175 q->defcls = gopt->defcls;
1177 return 0;
1180 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1182 struct htb_sched *q = (struct htb_sched*)sch->data;
1183 unsigned char *b = skb->tail;
1184 struct rtattr *rta;
1185 struct tc_htb_glob gopt;
1186 HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
1187 /* stats */
1188 HTB_QLOCK(sch);
1189 gopt.direct_pkts = q->direct_pkts;
1191 #ifdef HTB_DEBUG
1192 htb_debug_dump(q);
1193 #endif
1194 gopt.version = HTB_VER;
1195 gopt.rate2quantum = q->rate2quantum;
1196 gopt.defcls = q->defcls;
1197 gopt.debug = q->debug;
1198 rta = (struct rtattr*)b;
1199 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1200 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1201 rta->rta_len = skb->tail - b;
1202 sch->stats.qlen = sch->q.qlen;
1203 RTA_PUT(skb, TCA_STATS, sizeof(sch->stats), &sch->stats);
1204 HTB_QUNLOCK(sch);
1205 return skb->len;
1206 rtattr_failure:
1207 HTB_QUNLOCK(sch);
1208 skb_trim(skb, skb->tail - skb->data);
1209 return -1;
1212 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1213 struct sk_buff *skb, struct tcmsg *tcm)
1215 #ifdef HTB_DEBUG
1216 struct htb_sched *q = (struct htb_sched*)sch->data;
1217 #endif
1218 struct htb_class *cl = (struct htb_class*)arg;
1219 unsigned char *b = skb->tail;
1220 struct rtattr *rta;
1221 struct tc_htb_opt opt;
1223 HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
1225 HTB_QLOCK(sch);
1226 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1227 tcm->tcm_handle = cl->classid;
1228 if (!cl->level && cl->un.leaf.q) {
1229 tcm->tcm_info = cl->un.leaf.q->handle;
1230 cl->stats.qlen = cl->un.leaf.q->q.qlen;
1233 rta = (struct rtattr*)b;
1234 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1236 memset (&opt,0,sizeof(opt));
1238 opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
1239 opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
1240 opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
1241 opt.level = cl->level;
1242 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1243 rta->rta_len = skb->tail - b;
1245 #ifdef HTB_RATECM
1246 cl->stats.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
1247 cl->stats.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
1248 #endif
1250 cl->xstats.tokens = cl->tokens;
1251 cl->xstats.ctokens = cl->ctokens;
1252 RTA_PUT(skb, TCA_STATS, sizeof(cl->stats), &cl->stats);
1253 RTA_PUT(skb, TCA_XSTATS, sizeof(cl->xstats), &cl->xstats);
1254 HTB_QUNLOCK(sch);
1255 return skb->len;
1256 rtattr_failure:
1257 HTB_QUNLOCK(sch);
1258 skb_trim(skb, b - skb->data);
1259 return -1;
1262 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1263 struct Qdisc **old)
1265 struct htb_class *cl = (struct htb_class*)arg;
1267 if (cl && !cl->level) {
1268 if (new == NULL && (new = qdisc_create_dflt(sch->dev,
1269 &pfifo_qdisc_ops)) == NULL)
1270 return -ENOBUFS;
1271 sch_tree_lock(sch);
1272 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1273 /* TODO: is it correct ? Why CBQ doesn't do it ? */
1274 sch->q.qlen -= (*old)->q.qlen;
1275 qdisc_reset(*old);
1277 sch_tree_unlock(sch);
1278 return 0;
1280 return -ENOENT;
1283 static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
1285 struct htb_class *cl = (struct htb_class*)arg;
1286 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1289 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1291 #ifdef HTB_DEBUG
1292 struct htb_sched *q = (struct htb_sched *)sch->data;
1293 #endif
1294 struct htb_class *cl = htb_find(classid,sch);
1295 HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
1296 if (cl)
1297 cl->refcnt++;
1298 return (unsigned long)cl;
1301 static void htb_destroy_filters(struct tcf_proto **fl)
1303 struct tcf_proto *tp;
1305 while ((tp = *fl) != NULL) {
1306 *fl = tp->next;
1307 tp->ops->destroy(tp);
1311 static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
1313 struct htb_sched *q = (struct htb_sched *)sch->data;
1314 HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
1315 if (!cl->level) {
1316 BUG_TRAP(cl->un.leaf.q);
1317 sch->q.qlen -= cl->un.leaf.q->q.qlen;
1318 qdisc_destroy(cl->un.leaf.q);
1320 qdisc_put_rtab(cl->rate);
1321 qdisc_put_rtab(cl->ceil);
1323 #ifdef CONFIG_NET_ESTIMATOR
1324 qdisc_kill_estimator(&cl->stats);
1325 #endif
1326 htb_destroy_filters (&cl->filter_list);
1328 while (!list_empty(&cl->children))
1329 htb_destroy_class (sch,list_entry(cl->children.next,
1330 struct htb_class,sibling));
1332 /* note: this delete may happen twice (see htb_delete) */
1333 list_del(&cl->hlist);
1334 list_del(&cl->sibling);
1336 if (cl->prio_activity)
1337 htb_deactivate (q,cl);
1339 if (cl->cmode != HTB_CAN_SEND)
1340 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
1342 kfree(cl);
1345 /* always caled under BH & queue lock */
1346 static void htb_destroy(struct Qdisc* sch)
1348 struct htb_sched *q = (struct htb_sched *)sch->data;
1349 HTB_DBG(0,1,"htb_destroy q=%p\n",q);
1351 del_timer_sync (&q->timer);
1352 #ifdef HTB_RATECM
1353 del_timer_sync (&q->rttim);
1354 #endif
1355 while (!list_empty(&q->root))
1356 htb_destroy_class (sch,list_entry(q->root.next,
1357 struct htb_class,sibling));
1359 htb_destroy_filters(&q->filter_list);
1360 __skb_queue_purge(&q->direct_queue);
1363 static int htb_delete(struct Qdisc *sch, unsigned long arg)
1365 struct htb_sched *q = (struct htb_sched *)sch->data;
1366 struct htb_class *cl = (struct htb_class*)arg;
1367 HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1369 // TODO: why don't allow to delete subtree ? references ? does
1370 // tc subsys quarantee us that in htb_destroy it holds no class
1371 // refs so that we can remove children safely there ?
1372 if (!list_empty(&cl->children) || cl->filter_cnt)
1373 return -EBUSY;
1375 sch_tree_lock(sch);
1377 /* delete from hash and active; remainder in destroy_class */
1378 list_del_init(&cl->hlist);
1379 if (cl->prio_activity)
1380 htb_deactivate (q,cl);
1382 if (--cl->refcnt == 0)
1383 htb_destroy_class(sch,cl);
1385 sch_tree_unlock(sch);
1386 return 0;
1389 static void htb_put(struct Qdisc *sch, unsigned long arg)
1391 #ifdef HTB_DEBUG
1392 struct htb_sched *q = (struct htb_sched *)sch->data;
1393 #endif
1394 struct htb_class *cl = (struct htb_class*)arg;
1395 HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1397 if (--cl->refcnt == 0)
1398 htb_destroy_class(sch,cl);
1401 static int htb_change_class(struct Qdisc *sch, u32 classid,
1402 u32 parentid, struct rtattr **tca, unsigned long *arg)
1404 int err = -EINVAL;
1405 struct htb_sched *q = (struct htb_sched *)sch->data;
1406 struct htb_class *cl = (struct htb_class*)*arg,*parent;
1407 struct rtattr *opt = tca[TCA_OPTIONS-1];
1408 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1409 struct rtattr *tb[TCA_HTB_RTAB];
1410 struct tc_htb_opt *hopt;
1412 /* extract all subattrs from opt attr */
1413 if (!opt || rtattr_parse(tb, TCA_HTB_RTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1414 tb[TCA_HTB_PARMS-1] == NULL ||
1415 RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
1416 goto failure;
1418 parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
1420 hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
1421 HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
1422 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
1423 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
1424 if (!rtab || !ctab) goto failure;
1426 if (!cl) { /* new class */
1427 struct Qdisc *new_q;
1428 /* check for valid classid */
1429 if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
1430 goto failure;
1432 /* check maximal depth */
1433 if (parent && parent->parent && parent->parent->level < 2) {
1434 printk(KERN_ERR "htb: tree is too deep\n");
1435 goto failure;
1437 err = -ENOBUFS;
1438 if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1439 goto failure;
1441 memset(cl, 0, sizeof(*cl));
1442 cl->refcnt = 1;
1443 INIT_LIST_HEAD(&cl->sibling);
1444 INIT_LIST_HEAD(&cl->hlist);
1445 INIT_LIST_HEAD(&cl->children);
1446 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1447 #ifdef HTB_DEBUG
1448 cl->magic = HTB_CMAGIC;
1449 #endif
1451 /* create leaf qdisc early because it uses kmalloc(GPF_KERNEL)
1452 so that can't be used inside of sch_tree_lock
1453 -- thanks to Karlis Peisenieks */
1454 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1455 sch_tree_lock(sch);
1456 if (parent && !parent->level) {
1457 /* turn parent into inner node */
1458 sch->q.qlen -= parent->un.leaf.q->q.qlen;
1459 qdisc_destroy (parent->un.leaf.q);
1460 if (parent->prio_activity)
1461 htb_deactivate (q,parent);
1463 /* remove from evt list because of level change */
1464 if (parent->cmode != HTB_CAN_SEND) {
1465 htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
1466 parent->cmode = HTB_CAN_SEND;
1468 parent->level = (parent->parent ? parent->parent->level
1469 : TC_HTB_MAXDEPTH) - 1;
1470 memset (&parent->un.inner,0,sizeof(parent->un.inner));
1472 /* leaf (we) needs elementary qdisc */
1473 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1475 cl->classid = classid; cl->parent = parent;
1477 /* set class to be in HTB_CAN_SEND state */
1478 cl->tokens = hopt->buffer;
1479 cl->ctokens = hopt->cbuffer;
1480 cl->mbuffer = 60000000; /* 1min */
1481 PSCHED_GET_TIME(cl->t_c);
1482 cl->cmode = HTB_CAN_SEND;
1484 /* attach to the hash list and parent's family */
1485 list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
1486 list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
1487 #ifdef HTB_DEBUG
1489 int i;
1490 for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
1491 cl->pq_node.rb_color = -1;
1493 #endif
1494 } else sch_tree_lock(sch);
1496 /* it used to be a nasty bug here, we have to check that node
1497 is really leaf before changing cl->un.leaf ! */
1498 if (!cl->level) {
1499 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1500 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
1501 printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
1502 cl->un.leaf.quantum = 1000;
1504 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
1505 printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
1506 cl->un.leaf.quantum = 200000;
1508 if (hopt->quantum)
1509 cl->un.leaf.quantum = hopt->quantum;
1510 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1511 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1514 cl->buffer = hopt->buffer;
1515 cl->cbuffer = hopt->cbuffer;
1516 if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
1517 if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
1518 sch_tree_unlock(sch);
1520 *arg = (unsigned long)cl;
1521 return 0;
1523 failure:
1524 if (rtab) qdisc_put_rtab(rtab);
1525 if (ctab) qdisc_put_rtab(ctab);
1526 return err;
1529 static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1531 struct htb_sched *q = (struct htb_sched *)sch->data;
1532 struct htb_class *cl = (struct htb_class *)arg;
1533 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1534 HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
1535 return fl;
1538 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1539 u32 classid)
1541 struct htb_sched *q = (struct htb_sched *)sch->data;
1542 struct htb_class *cl = htb_find (classid,sch);
1543 HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
1544 /*if (cl && !cl->level) return 0;
1545 The line above used to be there to prevent attaching filters to
1546 leaves. But at least tc_index filter uses this just to get class
1547 for other reasons so that we have to allow for it.
1548 ----
1549 19.6.2002 As Werner explained it is ok - bind filter is just
1550 another way to "lock" the class - unlike "get" this lock can
1551 be broken by class during destroy IIUC.
1553 if (cl)
1554 cl->filter_cnt++;
1555 else
1556 q->filter_cnt++;
1557 return (unsigned long)cl;
1560 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1562 struct htb_sched *q = (struct htb_sched *)sch->data;
1563 struct htb_class *cl = (struct htb_class *)arg;
1564 HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
1565 if (cl)
1566 cl->filter_cnt--;
1567 else
1568 q->filter_cnt--;
1571 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1573 struct htb_sched *q = (struct htb_sched *)sch->data;
1574 int i;
1576 if (arg->stop)
1577 return;
1579 for (i = 0; i < HTB_HSIZE; i++) {
1580 struct list_head *p;
1581 list_for_each (p,q->hash+i) {
1582 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1583 if (arg->count < arg->skip) {
1584 arg->count++;
1585 continue;
1587 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1588 arg->stop = 1;
1589 return;
1591 arg->count++;
1596 static struct Qdisc_class_ops htb_class_ops = {
1597 .graft = htb_graft,
1598 .leaf = htb_leaf,
1599 .get = htb_get,
1600 .put = htb_put,
1601 .change = htb_change_class,
1602 .delete = htb_delete,
1603 .walk = htb_walk,
1604 .tcf_chain = htb_find_tcf,
1605 .bind_tcf = htb_bind_filter,
1606 .unbind_tcf = htb_unbind_filter,
1607 .dump = htb_dump_class,
1610 struct Qdisc_ops htb_qdisc_ops = {
1611 .next = NULL,
1612 .cl_ops = &htb_class_ops,
1613 .id = "htb",
1614 .priv_size = sizeof(struct htb_sched),
1615 .enqueue = htb_enqueue,
1616 .dequeue = htb_dequeue,
1617 .requeue = htb_requeue,
1618 .drop = htb_drop,
1619 .init = htb_init,
1620 .reset = htb_reset,
1621 .destroy = htb_destroy,
1622 .change = NULL /* htb_change */,
1623 .dump = htb_dump,
1624 .owner = THIS_MODULE,
1627 #ifdef MODULE
1628 int init_module(void)
1630 return register_qdisc(&htb_qdisc_ops);
1633 void cleanup_module(void)
1635 unregister_qdisc(&htb_qdisc_ops);
1637 MODULE_LICENSE("GPL");
1638 #endif