Changes for kernel and Busybox
[tomato.git] / release / src / linux / linux / net / sched / sch_htb.c
blobd63e9386277b315b81c232ad28619aa065cb1ed8
1 /* vim: ts=8 sw=8
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Martin Devera, <devik@cdi.cz>
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
14 * Ondrej Kraus, <krauso@barr.cz>
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
30 #include <linux/config.h>
31 #include <linux/module.h>
32 #include <asm/uaccess.h>
33 #include <asm/system.h>
34 #include <asm/bitops.h>
35 #include <linux/types.h>
36 #include <linux/kernel.h>
37 #include <linux/version.h>
38 #include <linux/sched.h>
39 #include <linux/string.h>
40 #include <linux/mm.h>
41 #include <linux/socket.h>
42 #include <linux/sockios.h>
43 #include <linux/in.h>
44 #include <linux/errno.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/notifier.h>
51 #include <net/ip.h>
52 #include <net/route.h>
53 #include <linux/skbuff.h>
54 #include <linux/list.h>
55 #include <linux/compiler.h>
56 #include <net/sock.h>
57 #include <net/pkt_sched.h>
58 #include <linux/rbtree.h>
60 /* HTB algorithm.
61 Author: devik@cdi.cz
62 ========================================================================
63 HTB is like TBF with multiple classes. It is also similar to CBQ because
64 it allows to assign priority to each class in hierarchy.
65 In fact it is another implementation of Floyd's formal sharing.
67 Levels:
68 Each class is assigned level. Leaf has ALWAYS level 0 and root
69 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
70 one less than their parent.
73 #define HTB_HSIZE 16 /* classid hash size */
74 #define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
75 //#define HTB_DEBUG 1 /* compile debugging support (activated by tc tool) */
76 #define HTB_RATECM 1 /* whether to use rate computer */
77 //#define HTB_HYSTERESIS 0/* whether to use mode hysteresis for speedup */
78 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
79 #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
80 #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
82 #if HTB_VER >> 16 != TC_HTB_PROTOVER
83 #error "Mismatched sch_htb.c and pkt_sch.h"
84 #endif
86 /* debugging support; S is subsystem, these are defined:
87 0 - netlink messages
88 1 - enqueue
89 2 - drop & requeue
90 3 - dequeue main
91 4 - dequeue one prio DRR part
92 5 - dequeue class accounting
93 6 - class overlimit status computation
94 7 - hint tree
95 8 - event queue
96 10 - rate estimator
97 11 - classifier
98 12 - fast dequeue cache
100 L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
101 q->debug uint32 contains 16 2-bit fields one for subsystem starting
102 from LSB
104 #ifdef HTB_DEBUG
105 #define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
106 #define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
107 printk(KERN_DEBUG FMT,##ARG)
108 #define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
109 #define HTB_PASSQ q,
110 #define HTB_ARGQ struct htb_sched *q,
111 #define static
112 #undef __inline__
113 #define __inline__
114 #undef inline
115 #define inline
116 #define HTB_CMAGIC 0xFEFAFEF1
117 #define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
118 if ((N)->rb_color == -1) break; \
119 rb_erase(N,R); \
120 (N)->rb_color = -1; } while (0)
121 #else
122 #define HTB_DBG_COND(S,L) (0)
123 #define HTB_DBG(S,L,FMT,ARG...)
124 #define HTB_PASSQ
125 #define HTB_ARGQ
126 #define HTB_CHCL(cl)
127 #define htb_safe_rb_erase(N,R) rb_erase(N,R)
128 #endif
131 /* used internaly to keep status of single class */
132 enum htb_cmode {
133 HTB_CANT_SEND, /* class can't send and can't borrow */
134 HTB_MAY_BORROW, /* class can't send but may borrow */
135 HTB_CAN_SEND /* class can send */
138 /* interior & leaf nodes; props specific to leaves are marked L: */
139 struct htb_class
141 #ifdef HTB_DEBUG
142 unsigned magic;
143 #endif
144 /* general class parameters */
145 u32 classid;
146 struct tc_stats stats; /* generic stats */
147 struct tc_htb_xstats xstats;/* our special stats */
148 int refcnt; /* usage count of this class */
150 #ifdef HTB_RATECM
151 /* rate measurement counters */
152 unsigned long rate_bytes,sum_bytes;
153 unsigned long rate_packets,sum_packets;
154 #endif
156 /* topology */
157 int level; /* our level (see above) */
158 struct htb_class *parent; /* parent class */
159 struct list_head hlist; /* classid hash list item */
160 struct list_head sibling; /* sibling list item */
161 struct list_head children; /* children list */
163 union {
164 struct htb_class_leaf {
165 struct Qdisc *q;
166 int prio;
167 int aprio;
168 int quantum;
169 int deficit[TC_HTB_MAXDEPTH];
170 struct list_head drop_list;
171 } leaf;
172 struct htb_class_inner {
173 rb_root_t feed[TC_HTB_NUMPRIO]; /* feed trees */
174 rb_node_t *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
175 /* When class changes from state 1->2 and disconnects from
176 parent's feed then we lost ptr value and start from the
177 first child again. Here we store classid of the
178 last valid ptr (used when ptr is NULL). */
179 u32 last_ptr_id[TC_HTB_NUMPRIO];
180 } inner;
181 } un;
182 rb_node_t node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
183 rb_node_t pq_node; /* node for event queue */
184 unsigned long pq_key; /* the same type as jiffies global */
186 int prio_activity; /* for which prios are we active */
187 enum htb_cmode cmode; /* current mode of the class */
189 /* class attached filters */
190 struct tcf_proto *filter_list;
191 int filter_cnt;
193 int warned; /* only one warning about non work conserving .. */
195 /* token bucket parameters */
196 struct qdisc_rate_table *rate; /* rate table of the class itself */
197 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
198 long buffer,cbuffer; /* token bucket depth/rate */
199 long mbuffer; /* max wait time */
200 long tokens,ctokens; /* current number of tokens */
201 psched_time_t t_c; /* checkpoint time */
204 /* TODO: maybe compute rate when size is too large .. or drop ? */
205 static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
206 int size)
208 long result = qdisc_l2t(rate, size);
209 if (result > rate->data[255])
210 cl->xstats.giants++;
211 return result;
214 struct htb_sched
216 struct list_head root; /* root classes list */
217 struct list_head hash[HTB_HSIZE]; /* hashed by classid */
218 struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */
220 /* self list - roots of self generating tree */
221 rb_root_t row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
222 int row_mask[TC_HTB_MAXDEPTH];
223 rb_node_t *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
224 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
226 /* self wait list - roots of wait PQs per row */
227 rb_root_t wait_pq[TC_HTB_MAXDEPTH];
229 /* time of nearest event per level (row) */
230 unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
232 /* cached value of jiffies in dequeue */
233 unsigned long jiffies;
235 /* whether we hit non-work conserving class during this dequeue; we use */
236 int nwc_hit; /* this to disable mindelay complaint in dequeue */
238 int defcls; /* class where unclassified flows go to */
239 u32 debug; /* subsystem debug levels */
241 /* filters for qdisc itself */
242 struct tcf_proto *filter_list;
243 int filter_cnt;
245 int rate2quantum; /* quant = rate / rate2quantum */
246 psched_time_t now; /* cached dequeue time */
247 struct timer_list timer; /* send delay timer */
248 #ifdef HTB_RATECM
249 struct timer_list rttim; /* rate computer timer */
250 int recmp_bucket; /* which hash bucket to recompute next */
251 #endif
253 /* non shaped skbs; let them go directly thru */
254 struct sk_buff_head direct_queue;
255 int direct_qlen; /* max qlen of above */
257 long direct_pkts;
260 /* compute hash of size HTB_HSIZE for given handle */
261 static __inline__ int htb_hash(u32 h)
263 #if HTB_HSIZE != 16
264 #error "Declare new hash for your HTB_HSIZE"
265 #endif
266 h ^= h>>8; /* stolen from cbq_hash */
267 h ^= h>>4;
268 return h & 0xf;
271 /* find class in global hash table using given handle */
272 static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
274 struct htb_sched *q = (struct htb_sched *)sch->data;
275 struct list_head *p;
276 if (TC_H_MAJ(handle) != sch->handle)
277 return NULL;
279 list_for_each (p,q->hash+htb_hash(handle)) {
280 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
281 if (cl->classid == handle)
282 return cl;
284 return NULL;
288 * htb_classify - classify a packet into class
290 * It returns NULL if the packet should be dropped or -1 if the packet
291 * should be passed directly thru. In all other cases leaf class is returned.
292 * We allow direct class selection by classid in priority. The we examine
293 * filters in qdisc and in inner nodes (if higher filter points to the inner
294 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
295 * internal fifo (direct). These packets then go directly thru. If we still
296 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
297 * then finish and return direct queue.
299 #define HTB_DIRECT (struct htb_class*)-1
300 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch)
302 struct htb_sched *q = (struct htb_sched *)sch->data;
303 struct htb_class *cl;
304 struct tcf_result res;
305 struct tcf_proto *tcf;
306 int result;
308 /* allow to select class by setting skb->priority to valid classid;
309 note that nfmark can be used too by attaching filter fw with no
310 rules in it */
311 if (skb->priority == sch->handle)
312 return HTB_DIRECT; /* X:0 (direct flow) selected */
313 if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0)
314 return cl;
316 tcf = q->filter_list;
317 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
318 #ifdef CONFIG_NET_CLS_POLICE
319 if (result == TC_POLICE_SHOT)
320 return NULL;
321 #endif
322 if ((cl = (void*)res.class) == NULL) {
323 if (res.classid == sch->handle)
324 return HTB_DIRECT; /* X:0 (direct flow) */
325 if ((cl = htb_find(res.classid,sch)) == NULL)
326 break; /* filter selected invalid classid */
328 if (!cl->level)
329 return cl; /* we hit leaf; return it */
331 /* we have got inner class; apply inner filter chain */
332 tcf = cl->filter_list;
334 /* classification failed; try to use default class */
335 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
336 if (!cl || cl->level)
337 return HTB_DIRECT; /* bad default .. this is safe bet */
338 return cl;
341 #ifdef HTB_DEBUG
342 static void htb_next_rb_node(rb_node_t **n);
343 #define HTB_DUMTREE(root,memb) if(root) { \
344 rb_node_t *n = (root)->rb_node; \
345 while (n->rb_left) n = n->rb_left; \
346 while (n) { \
347 struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
348 printk(" %x",cl->classid); htb_next_rb_node (&n); \
351 static void htb_debug_dump (struct htb_sched *q)
353 int i,p;
354 printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
355 /* rows */
356 for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
357 printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
358 for (p=0;p<TC_HTB_NUMPRIO;p++) {
359 if (!q->row[i][p].rb_node) continue;
360 printk(" p%d:",p);
361 HTB_DUMTREE(q->row[i]+p,node[p]);
363 printk("\n");
365 /* classes */
366 for (i = 0; i < HTB_HSIZE; i++) {
367 struct list_head *l;
368 list_for_each (l,q->hash+i) {
369 struct htb_class *cl = list_entry(l,struct htb_class,hlist);
370 long long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
371 printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%lld "
372 "pa=%x f:",
373 cl->classid,cl->cmode,cl->tokens,cl->ctokens,
374 cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
375 cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
376 if (cl->level)
377 for (p=0;p<TC_HTB_NUMPRIO;p++) {
378 if (!cl->un.inner.feed[p].rb_node) continue;
379 printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
380 HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
382 printk("\n");
386 #endif
388 * htb_add_to_id_tree - adds class to the round robin list
390 * Routine adds class to the list (actually tree) sorted by classid.
391 * Make sure that class is not already on such list for given prio.
393 static void htb_add_to_id_tree (HTB_ARGQ rb_root_t *root,
394 struct htb_class *cl,int prio)
396 rb_node_t **p = &root->rb_node, *parent = NULL;
397 HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
398 #ifdef HTB_DEBUG
399 if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
400 HTB_CHCL(cl);
401 if (*p) {
402 struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
403 HTB_CHCL(x);
405 #endif
406 while (*p) {
407 struct htb_class *c; parent = *p;
408 c = rb_entry(parent, struct htb_class, node[prio]);
409 HTB_CHCL(c);
410 if (cl->classid > c->classid)
411 p = &parent->rb_right;
412 else
413 p = &parent->rb_left;
415 rb_link_node(&cl->node[prio], parent, p);
416 rb_insert_color(&cl->node[prio], root);
420 * htb_add_to_wait_tree - adds class to the event queue with delay
422 * The class is added to priority event queue to indicate that class will
423 * change its mode in cl->pq_key microseconds. Make sure that class is not
424 * already in the queue.
426 static void htb_add_to_wait_tree (struct htb_sched *q,
427 struct htb_class *cl,long delay,int debug_hint)
429 rb_node_t **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
430 HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
431 #ifdef HTB_DEBUG
432 if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
433 HTB_CHCL(cl);
434 if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
435 printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
436 #endif
437 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
438 if (cl->pq_key == q->jiffies)
439 cl->pq_key++;
441 /* update the nearest event cache */
442 if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
443 q->near_ev_cache[cl->level] = cl->pq_key;
445 while (*p) {
446 struct htb_class *c; parent = *p;
447 c = rb_entry(parent, struct htb_class, pq_node);
448 if (time_after_eq(cl->pq_key, c->pq_key))
449 p = &parent->rb_right;
450 else
451 p = &parent->rb_left;
453 rb_link_node(&cl->pq_node, parent, p);
454 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
458 * htb_next_rb_node - finds next node in binary tree
460 * When we are past last key we return NULL.
461 * Average complexity is 2 steps per call.
463 static void htb_next_rb_node(rb_node_t **n)
465 rb_node_t *p;
466 if ((*n)->rb_right) {
467 /* child at right. use it or its leftmost ancestor */
468 *n = (*n)->rb_right;
469 while ((*n)->rb_left)
470 *n = (*n)->rb_left;
471 return;
473 while ((p = (*n)->rb_parent) != NULL) {
474 /* if we've arrived from left child then we have next node */
475 if (p->rb_left == *n) break;
476 *n = p;
478 *n = p;
482 * htb_add_class_to_row - add class to its row
484 * The class is added to row at priorities marked in mask.
485 * It does nothing if mask == 0.
487 static inline void htb_add_class_to_row(struct htb_sched *q,
488 struct htb_class *cl,int mask)
490 HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
491 cl->classid,mask,q->row_mask[cl->level]);
492 HTB_CHCL(cl);
493 q->row_mask[cl->level] |= mask;
494 while (mask) {
495 int prio = ffz(~mask);
496 mask &= ~(1 << prio);
497 htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
502 * htb_remove_class_from_row - removes class from its row
504 * The class is removed from row at priorities marked in mask.
505 * It does nothing if mask == 0.
507 static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
508 struct htb_class *cl,int mask)
510 int m = 0;
511 HTB_CHCL(cl);
512 while (mask) {
513 int prio = ffz(~mask);
514 mask &= ~(1 << prio);
515 if (q->ptr[cl->level][prio] == cl->node+prio)
516 htb_next_rb_node(q->ptr[cl->level]+prio);
517 htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
518 if (!q->row[cl->level][prio].rb_node)
519 m |= 1 << prio;
521 HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
522 cl->classid,mask,q->row_mask[cl->level],m);
523 q->row_mask[cl->level] &= ~m;
527 * htb_activate_prios - creates active classe's feed chain
529 * The class is connected to ancestors and/or appropriate rows
530 * for priorities it is participating on. cl->cmode must be new
531 * (activated) mode. It does nothing if cl->prio_activity == 0.
533 static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
535 struct htb_class *p = cl->parent;
536 long m,mask = cl->prio_activity;
537 HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
538 HTB_CHCL(cl);
540 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
541 HTB_CHCL(p);
542 m = mask; while (m) {
543 int prio = ffz(~m);
544 m &= ~(1 << prio);
546 if (p->un.inner.feed[prio].rb_node)
547 /* parent already has its feed in use so that
548 reset bit in mask as parent is already ok */
549 mask &= ~(1 << prio);
551 htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
553 HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
554 p->classid,p->prio_activity,mask,p->cmode);
555 p->prio_activity |= mask;
556 cl = p; p = cl->parent;
557 HTB_CHCL(cl);
559 if (cl->cmode == HTB_CAN_SEND && mask)
560 htb_add_class_to_row(q,cl,mask);
564 * htb_deactivate_prios - remove class from feed chain
566 * cl->cmode must represent old mode (before deactivation). It does
567 * nothing if cl->prio_activity == 0. Class is removed from all feed
568 * chains and rows.
570 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
572 struct htb_class *p = cl->parent;
573 long m,mask = cl->prio_activity;
574 HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
575 HTB_CHCL(cl);
577 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
578 m = mask; mask = 0;
579 while (m) {
580 int prio = ffz(~m);
581 m &= ~(1 << prio);
583 if (p->un.inner.ptr[prio] == cl->node+prio) {
584 /* we are removing child which is pointed to from
585 parent feed - forget the pointer but remember
586 classid */
587 p->un.inner.last_ptr_id[prio] = cl->classid;
588 p->un.inner.ptr[prio] = NULL;
591 htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
593 if (!p->un.inner.feed[prio].rb_node)
594 mask |= 1 << prio;
596 HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
597 p->classid,p->prio_activity,mask,p->cmode);
598 p->prio_activity &= ~mask;
599 cl = p; p = cl->parent;
600 HTB_CHCL(cl);
602 if (cl->cmode == HTB_CAN_SEND && mask)
603 htb_remove_class_from_row(q,cl,mask);
607 * htb_class_mode - computes and returns current class mode
609 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
610 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
611 * from now to time when cl will change its state.
612 * Also it is worth to note that class mode doesn't change simply
613 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
614 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
615 * mode transitions per time unit. The speed gain is about 1/6.
617 static __inline__ enum htb_cmode
618 htb_class_mode(struct htb_class *cl,long long *diff)
620 long toks;
622 if ((toks = (cl->ctokens + *diff)) < (
623 #if HTB_HYSTERESIS
624 cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
625 #endif
626 0)) {
627 *diff = -toks;
628 return HTB_CANT_SEND;
630 if ((toks = (cl->tokens + *diff)) >= (
631 #if HTB_HYSTERESIS
632 cl->cmode == HTB_CAN_SEND ? -cl->buffer :
633 #endif
635 return HTB_CAN_SEND;
637 *diff = -toks;
638 return HTB_MAY_BORROW;
642 * htb_change_class_mode - changes classe's mode
644 * This should be the only way how to change classe's mode under normal
645 * cirsumstances. Routine will update feed lists linkage, change mode
646 * and add class to the wait event queue if appropriate. New mode should
647 * be different from old one and cl->pq_key has to be valid if changing
648 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
650 static void
651 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long long *diff)
653 enum htb_cmode new_mode = htb_class_mode(cl,diff);
655 HTB_CHCL(cl);
656 HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
658 if (new_mode == cl->cmode)
659 return;
661 if (cl->prio_activity) { /* not neccessary: speed optimization */
662 if (cl->cmode != HTB_CANT_SEND)
663 htb_deactivate_prios(q,cl);
664 cl->cmode = new_mode;
665 if (new_mode != HTB_CANT_SEND)
666 htb_activate_prios(q,cl);
667 } else
668 cl->cmode = new_mode;
672 * htb_activate - inserts leaf cl into appropriate active feeds
674 * Routine learns (new) priority of leaf and activates feed chain
675 * for the prio. It can be called on already active leaf safely.
676 * It also adds leaf into droplist.
678 static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
680 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
681 HTB_CHCL(cl);
682 if (!cl->prio_activity) {
683 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
684 htb_activate_prios(q,cl);
685 list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
690 * htb_deactivate - remove leaf cl from active feeds
692 * Make sure that leaf is active. In the other words it can't be called
693 * with non-active leaf. It also removes class from the drop list.
695 static __inline__ void
696 htb_deactivate(struct htb_sched *q,struct htb_class *cl)
698 BUG_TRAP(cl->prio_activity);
699 HTB_CHCL(cl);
700 htb_deactivate_prios(q,cl);
701 cl->prio_activity = 0;
702 list_del_init(&cl->un.leaf.drop_list);
705 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
707 struct htb_sched *q = (struct htb_sched *)sch->data;
708 struct htb_class *cl = htb_classify(skb,sch);
710 if (cl == HTB_DIRECT || !cl) {
711 /* enqueue to helper queue */
712 if (q->direct_queue.qlen < q->direct_qlen && cl) {
713 __skb_queue_tail(&q->direct_queue, skb);
714 q->direct_pkts++;
715 } else {
716 kfree_skb (skb);
717 sch->stats.drops++;
718 return NET_XMIT_DROP;
720 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
721 sch->stats.drops++;
722 cl->stats.drops++;
723 return NET_XMIT_DROP;
724 } else {
725 cl->stats.packets++; cl->stats.bytes += skb->len;
726 htb_activate (q,cl);
729 sch->q.qlen++;
730 sch->stats.packets++; sch->stats.bytes += skb->len;
731 HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
732 return NET_XMIT_SUCCESS;
735 /* TODO: requeuing packet charges it to policers again !! */
736 static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
738 struct htb_sched *q = (struct htb_sched *)sch->data;
739 struct htb_class *cl = htb_classify(skb,sch);
740 struct sk_buff *tskb;
742 if (cl == HTB_DIRECT || !cl) {
743 /* enqueue to helper queue */
744 if (q->direct_queue.qlen < q->direct_qlen && cl) {
745 __skb_queue_head(&q->direct_queue, skb);
746 } else {
747 __skb_queue_head(&q->direct_queue, skb);
748 tskb = __skb_dequeue_tail(&q->direct_queue);
749 kfree_skb (tskb);
750 sch->stats.drops++;
751 return NET_XMIT_CN;
753 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
754 sch->stats.drops++;
755 cl->stats.drops++;
756 return NET_XMIT_DROP;
757 } else
758 htb_activate (q,cl);
760 sch->q.qlen++;
761 HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
762 return NET_XMIT_SUCCESS;
765 static void htb_timer(unsigned long arg)
767 struct Qdisc *sch = (struct Qdisc*)arg;
768 sch->flags &= ~TCQ_F_THROTTLED;
769 wmb();
770 netif_schedule(sch->dev);
773 #ifdef HTB_RATECM
774 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
775 static void htb_rate_timer(unsigned long arg)
777 struct Qdisc *sch = (struct Qdisc*)arg;
778 struct htb_sched *q = (struct htb_sched *)sch->data;
779 struct list_head *p;
781 /* lock queue so that we can muck with it */
782 HTB_QLOCK(sch);
783 HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
785 q->rttim.expires = jiffies + HZ;
786 add_timer(&q->rttim);
788 /* scan and recompute one bucket at time */
789 if (++q->recmp_bucket >= HTB_HSIZE)
790 q->recmp_bucket = 0;
791 list_for_each (p,q->hash+q->recmp_bucket) {
792 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
793 HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
794 cl->classid,cl->sum_bytes,cl->sum_packets);
795 RT_GEN (cl->sum_bytes,cl->rate_bytes);
796 RT_GEN (cl->sum_packets,cl->rate_packets);
798 HTB_QUNLOCK(sch);
800 #endif
803 * htb_charge_class - charges ammount "bytes" to leaf and ancestors
805 * Routine assumes that packet "bytes" long was dequeued from leaf cl
806 * borrowing from "level". It accounts bytes to ceil leaky bucket for
807 * leaf and all ancestors and to rate bucket for ancestors at levels
808 * "level" and higher. It also handles possible change of mode resulting
809 * from the update. Note that mode can also increase here (MAY_BORROW to
810 * CAN_SEND) because we can use more precise clock that event queue here.
811 * In such case we remove class from event queue first.
813 static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
814 int level,int bytes)
816 long long diff;
817 long toks;
818 enum htb_cmode old_mode;
819 HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
821 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
822 if (toks > cl->B) toks = cl->B; \
823 toks -= L2T(cl, cl->R, bytes); \
824 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
825 cl->T = toks
827 while (cl) {
828 HTB_CHCL(cl);
829 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
830 #ifdef HTB_DEBUG
831 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
832 if (net_ratelimit())
833 printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%Ld now=%Lu then=%Lu j=%lu\n",
834 cl->classid, diff,
835 (unsigned long long) q->now,
836 (unsigned long long) cl->t_c,
837 q->jiffies);
838 diff = 1000;
840 #endif
841 if (cl->level >= level) {
842 if (cl->level == level) cl->xstats.lends++;
843 HTB_ACCNT (tokens,buffer,rate);
844 } else {
845 cl->xstats.borrows++;
846 cl->tokens += diff; /* we moved t_c; update tokens */
848 HTB_ACCNT (ctokens,cbuffer,ceil);
849 cl->t_c = q->now;
850 HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%Ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
852 old_mode = cl->cmode; diff = 0;
853 htb_change_class_mode(q,cl,&diff);
854 if (old_mode != cl->cmode) {
855 if (old_mode != HTB_CAN_SEND)
856 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
857 if (cl->cmode != HTB_CAN_SEND)
858 htb_add_to_wait_tree (q,cl,diff,1);
861 #ifdef HTB_RATECM
862 /* update rate counters */
863 cl->sum_bytes += bytes; cl->sum_packets++;
864 #endif
866 /* update byte stats except for leaves which are already updated */
867 if (cl->level) {
868 cl->stats.bytes += bytes;
869 cl->stats.packets++;
871 cl = cl->parent;
876 * htb_do_events - make mode changes to classes at the level
878 * Scans event queue for pending events and applies them. Returns jiffies to
879 * next pending event (0 for no event in pq).
880 * Note: Aplied are events whose have cl->pq_key <= jiffies.
882 static long htb_do_events(struct htb_sched *q,int level)
884 int i;
885 HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
886 level,q->wait_pq[level].rb_node,q->row_mask[level]);
887 for (i = 0; i < 500; i++) {
888 struct htb_class *cl;
889 long long diff;
890 rb_node_t *p = q->wait_pq[level].rb_node;
891 if (!p) return 0;
892 while (p->rb_left) p = p->rb_left;
894 cl = rb_entry(p, struct htb_class, pq_node);
895 if (time_after(cl->pq_key, q->jiffies)) {
896 HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
897 return cl->pq_key - q->jiffies;
899 htb_safe_rb_erase(p,q->wait_pq+level);
900 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
901 #ifdef HTB_DEBUG
902 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
903 if (net_ratelimit())
904 printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%Ld now=%Lu then=%Lu j=%lu\n",
905 cl->classid, diff,
906 (unsigned long long) q->now,
907 (unsigned long long) cl->t_c,
908 q->jiffies);
909 diff = 1000;
911 #endif
912 htb_change_class_mode(q,cl,&diff);
913 if (cl->cmode != HTB_CAN_SEND)
914 htb_add_to_wait_tree (q,cl,diff,2);
916 if (net_ratelimit())
917 printk(KERN_WARNING "htb: too many events !\n");
918 return HZ/10;
921 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
922 is no such one exists. */
923 static rb_node_t *
924 htb_id_find_next_upper(int prio,rb_node_t *n,u32 id)
926 rb_node_t *r = NULL;
927 while (n) {
928 struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
929 if (id == cl->classid) return n;
931 if (id > cl->classid) {
932 n = n->rb_right;
933 } else {
934 r = n;
935 n = n->rb_left;
938 return r;
942 * htb_lookup_leaf - returns next leaf class in DRR order
944 * Find leaf where current feed pointers points to.
946 static struct htb_class *
947 htb_lookup_leaf(HTB_ARGQ rb_root_t *tree,int prio,rb_node_t **pptr,u32 *pid)
949 int i;
950 struct {
951 rb_node_t *root;
952 rb_node_t **pptr;
953 u32 *pid;
954 } stk[TC_HTB_MAXDEPTH],*sp = stk;
956 BUG_TRAP(tree->rb_node);
957 sp->root = tree->rb_node;
958 sp->pptr = pptr;
959 sp->pid = pid;
961 for (i = 0; i < 65535; i++) {
962 HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
964 if (!*sp->pptr && *sp->pid) {
965 /* ptr was invalidated but id is valid - try to recover
966 the original or next ptr */
967 *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
969 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
970 can become out of date quickly */
971 if (!*sp->pptr) { /* we are at right end; rewind & go up */
972 *sp->pptr = sp->root;
973 while ((*sp->pptr)->rb_left)
974 *sp->pptr = (*sp->pptr)->rb_left;
975 if (sp > stk) {
976 sp--;
977 BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
978 htb_next_rb_node (sp->pptr);
980 } else {
981 struct htb_class *cl;
982 cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
983 HTB_CHCL(cl);
984 if (!cl->level)
985 return cl;
986 (++sp)->root = cl->un.inner.feed[prio].rb_node;
987 sp->pptr = cl->un.inner.ptr+prio;
988 sp->pid = cl->un.inner.last_ptr_id+prio;
991 BUG_TRAP(0);
992 return NULL;
995 /* dequeues packet at given priority and level; call only if
996 you are sure that there is active class at prio/level */
997 static struct sk_buff *
998 htb_dequeue_tree(struct htb_sched *q,int prio,int level)
1000 struct sk_buff *skb = NULL;
1001 struct htb_class *cl,*start;
1002 /* look initial class up in the row */
1003 start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
1004 q->ptr[level]+prio,q->last_ptr_id[level]+prio);
1006 do {
1007 next:
1008 BUG_TRAP(cl);
1009 if (!cl) return NULL;
1010 HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
1011 prio,level,cl->classid,cl->un.leaf.deficit[level]);
1013 /* class can be empty - it is unlikely but can be true if leaf
1014 qdisc drops packets in enqueue routine or if someone used
1015 graft operation on the leaf since last dequeue;
1016 simply deactivate and skip such class */
1017 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
1018 struct htb_class *next;
1019 htb_deactivate(q,cl);
1021 /* row/level might become empty */
1022 if ((q->row_mask[level] & (1 << prio)) == 0)
1023 return NULL;
1025 next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
1026 prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
1027 if (cl == start) /* fix start if we just deleted it */
1028 start = next;
1029 cl = next;
1030 goto next;
1033 if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL))
1034 break;
1035 if (!cl->warned) {
1036 printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
1037 cl->warned = 1;
1039 q->nwc_hit++;
1040 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
1041 cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
1042 q->last_ptr_id[level]+prio);
1043 } while (cl != start);
1045 if (likely(skb != NULL)) {
1046 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
1047 HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
1048 level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
1049 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
1050 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
1052 /* this used to be after charge_class but this constelation
1053 gives us slightly better performance */
1054 if (!cl->un.leaf.q->q.qlen)
1055 htb_deactivate (q,cl);
1056 htb_charge_class (q,cl,level,skb->len);
1058 return skb;
1061 static void htb_delay_by(struct Qdisc *sch,long delay)
1063 struct htb_sched *q = (struct htb_sched *)sch->data;
1064 if (delay <= 0) delay = 1;
1065 if (unlikely(delay > 5*HZ)) {
1066 if (net_ratelimit())
1067 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
1068 delay = 5*HZ;
1070 /* why don't use jiffies here ? because expires can be in past */
1071 mod_timer(&q->timer, q->jiffies + delay);
1072 sch->flags |= TCQ_F_THROTTLED;
1073 sch->stats.overlimits++;
1074 HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
1077 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
1079 struct sk_buff *skb = NULL;
1080 struct htb_sched *q = (struct htb_sched *)sch->data;
1081 int level;
1082 long min_delay;
1083 #ifdef HTB_DEBUG
1084 int evs_used = 0;
1085 #endif
1087 q->jiffies = jiffies;
1088 HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
1089 sch->q.qlen);
1091 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
1092 if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
1093 sch->flags &= ~TCQ_F_THROTTLED;
1094 sch->q.qlen--;
1095 return skb;
1098 if (!sch->q.qlen) goto fin;
1099 PSCHED_GET_TIME(q->now);
1101 min_delay = LONG_MAX;
1102 q->nwc_hit = 0;
1103 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
1104 /* common case optimization - skip event handler quickly */
1105 int m;
1106 long delay;
1107 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
1108 delay = htb_do_events(q,level);
1109 q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
1110 #ifdef HTB_DEBUG
1111 evs_used++;
1112 #endif
1113 } else
1114 delay = q->near_ev_cache[level] - q->jiffies;
1116 if (delay && min_delay > delay)
1117 min_delay = delay;
1118 m = ~q->row_mask[level];
1119 while (m != (int)(-1)) {
1120 int prio = ffz (m);
1121 m |= 1 << prio;
1122 skb = htb_dequeue_tree(q,prio,level);
1123 if (likely(skb != NULL)) {
1124 sch->q.qlen--;
1125 sch->flags &= ~TCQ_F_THROTTLED;
1126 goto fin;
1130 #ifdef HTB_DEBUG
1131 if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
1132 if (min_delay == LONG_MAX) {
1133 printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
1134 evs_used,q->jiffies,jiffies);
1135 htb_debug_dump(q);
1136 } else
1137 printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
1138 "too small rate\n",min_delay);
1140 #endif
1141 htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
1142 fin:
1143 HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
1144 return skb;
1147 /* try to drop from each class (by prio) until one succeed */
1148 static unsigned int htb_drop(struct Qdisc* sch)
1150 struct htb_sched *q = (struct htb_sched *)sch->data;
1151 int prio;
1153 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1154 struct list_head *p;
1155 list_for_each (p,q->drops+prio) {
1156 struct htb_class *cl = list_entry(p, struct htb_class,
1157 un.leaf.drop_list);
1158 unsigned int len;
1159 if (cl->un.leaf.q->ops->drop &&
1160 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1161 sch->q.qlen--;
1162 if (!cl->un.leaf.q->q.qlen)
1163 htb_deactivate (q,cl);
1164 return len;
1168 return 0;
1171 /* reset all classes */
1172 /* always caled under BH & queue lock */
1173 static void htb_reset(struct Qdisc* sch)
1175 struct htb_sched *q = (struct htb_sched *)sch->data;
1176 int i;
1177 HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
1179 for (i = 0; i < HTB_HSIZE; i++) {
1180 struct list_head *p;
1181 list_for_each (p,q->hash+i) {
1182 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1183 if (cl->level)
1184 memset(&cl->un.inner,0,sizeof(cl->un.inner));
1185 else {
1186 if (cl->un.leaf.q)
1187 qdisc_reset(cl->un.leaf.q);
1188 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1190 cl->prio_activity = 0;
1191 cl->cmode = HTB_CAN_SEND;
1192 #ifdef HTB_DEBUG
1193 cl->pq_node.rb_color = -1;
1194 memset(cl->node,255,sizeof(cl->node));
1195 #endif
1199 sch->flags &= ~TCQ_F_THROTTLED;
1200 del_timer(&q->timer);
1201 __skb_queue_purge(&q->direct_queue);
1202 sch->q.qlen = 0;
1203 memset(q->row,0,sizeof(q->row));
1204 memset(q->row_mask,0,sizeof(q->row_mask));
1205 memset(q->wait_pq,0,sizeof(q->wait_pq));
1206 memset(q->ptr,0,sizeof(q->ptr));
1207 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1208 INIT_LIST_HEAD(q->drops+i);
1211 static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1213 struct htb_sched *q = (struct htb_sched*)sch->data;
1214 struct rtattr *tb[TCA_HTB_INIT];
1215 struct tc_htb_glob *gopt;
1216 int i;
1217 #ifdef HTB_DEBUG
1218 printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
1219 HTB_VER >> 16,HTB_VER & 0xffff);
1220 #endif
1221 if (!opt || rtattr_parse(tb, TCA_HTB_INIT, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1222 tb[TCA_HTB_INIT-1] == NULL ||
1223 RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
1224 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1225 return -EINVAL;
1227 gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
1228 if (gopt->version != HTB_VER >> 16) {
1229 printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1230 HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
1231 return -EINVAL;
1233 q->debug = gopt->debug;
1234 HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
1236 INIT_LIST_HEAD(&q->root);
1237 for (i = 0; i < HTB_HSIZE; i++)
1238 INIT_LIST_HEAD(q->hash+i);
1239 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1240 INIT_LIST_HEAD(q->drops+i);
1242 init_timer(&q->timer);
1243 skb_queue_head_init(&q->direct_queue);
1245 q->direct_qlen = sch->dev->tx_queue_len;
1246 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1247 q->direct_qlen = 2;
1248 q->timer.function = htb_timer;
1249 q->timer.data = (unsigned long)sch;
1251 #ifdef HTB_RATECM
1252 init_timer(&q->rttim);
1253 q->rttim.function = htb_rate_timer;
1254 q->rttim.data = (unsigned long)sch;
1255 q->rttim.expires = jiffies + HZ;
1256 add_timer(&q->rttim);
1257 #endif
1258 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1259 q->rate2quantum = 1;
1260 q->defcls = gopt->defcls;
1262 MOD_INC_USE_COUNT;
1263 return 0;
1266 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1268 struct htb_sched *q = (struct htb_sched*)sch->data;
1269 unsigned char *b = skb->tail;
1270 struct rtattr *rta;
1271 struct tc_htb_glob gopt;
1272 HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
1273 /* stats */
1274 HTB_QLOCK(sch);
1275 gopt.direct_pkts = q->direct_pkts;
1277 #ifdef HTB_DEBUG
1278 if (HTB_DBG_COND(0,2))
1279 htb_debug_dump(q);
1280 #endif
1281 gopt.version = HTB_VER;
1282 gopt.rate2quantum = q->rate2quantum;
1283 gopt.defcls = q->defcls;
1284 gopt.debug = q->debug;
1285 rta = (struct rtattr*)b;
1286 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1287 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1288 rta->rta_len = skb->tail - b;
1289 HTB_QUNLOCK(sch);
1290 return skb->len;
1291 rtattr_failure:
1292 HTB_QUNLOCK(sch);
1293 skb_trim(skb, skb->tail - skb->data);
1294 return -1;
1297 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1298 struct sk_buff *skb, struct tcmsg *tcm)
1300 #ifdef HTB_DEBUG
1301 struct htb_sched *q = (struct htb_sched*)sch->data;
1302 #endif
1303 struct htb_class *cl = (struct htb_class*)arg;
1304 unsigned char *b = skb->tail;
1305 struct rtattr *rta;
1306 struct tc_htb_opt opt;
1308 HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
1310 HTB_QLOCK(sch);
1311 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1312 tcm->tcm_handle = cl->classid;
1313 if (!cl->level && cl->un.leaf.q) {
1314 tcm->tcm_info = cl->un.leaf.q->handle;
1315 cl->stats.qlen = cl->un.leaf.q->q.qlen;
1318 rta = (struct rtattr*)b;
1319 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1321 memset (&opt,0,sizeof(opt));
1323 opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
1324 opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
1325 opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
1326 opt.level = cl->level;
1327 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1328 rta->rta_len = skb->tail - b;
1330 #ifdef HTB_RATECM
1331 cl->stats.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
1332 cl->stats.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
1333 #endif
1335 cl->xstats.tokens = cl->tokens;
1336 cl->xstats.ctokens = cl->ctokens;
1337 RTA_PUT(skb, TCA_STATS, sizeof(cl->stats), &cl->stats);
1338 RTA_PUT(skb, TCA_XSTATS, sizeof(cl->xstats), &cl->xstats);
1339 HTB_QUNLOCK(sch);
1340 return skb->len;
1341 rtattr_failure:
1342 HTB_QUNLOCK(sch);
1343 skb_trim(skb, b - skb->data);
1344 return -1;
1347 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1348 struct Qdisc **old)
1350 struct htb_class *cl = (struct htb_class*)arg;
1352 if (cl && !cl->level) {
1353 if (new == NULL && (new = qdisc_create_dflt(sch->dev,
1354 &pfifo_qdisc_ops)) == NULL)
1355 return -ENOBUFS;
1356 sch_tree_lock(sch);
1357 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1358 if (cl->prio_activity)
1359 htb_deactivate ((struct htb_sched*)sch->data,cl);
1361 /* TODO: is it correct ? Why CBQ doesn't do it ? */
1362 sch->q.qlen -= (*old)->q.qlen;
1363 qdisc_reset(*old);
1365 sch_tree_unlock(sch);
1366 return 0;
1368 return -ENOENT;
1371 static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
1373 struct htb_class *cl = (struct htb_class*)arg;
1374 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1377 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1379 #ifdef HTB_DEBUG
1380 struct htb_sched *q = (struct htb_sched *)sch->data;
1381 #endif
1382 struct htb_class *cl = htb_find(classid,sch);
1383 HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
1384 if (cl)
1385 cl->refcnt++;
1386 return (unsigned long)cl;
1389 static void htb_destroy_filters(struct tcf_proto **fl)
1391 struct tcf_proto *tp;
1393 while ((tp = *fl) != NULL) {
1394 *fl = tp->next;
1395 tcf_destroy(tp);
1399 static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
1401 struct htb_sched *q = (struct htb_sched *)sch->data;
1402 HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
1403 if (!cl->level) {
1404 BUG_TRAP(cl->un.leaf.q);
1405 sch->q.qlen -= cl->un.leaf.q->q.qlen;
1406 qdisc_destroy(cl->un.leaf.q);
1408 qdisc_put_rtab(cl->rate);
1409 qdisc_put_rtab(cl->ceil);
1411 #ifdef CONFIG_NET_ESTIMATOR
1412 qdisc_kill_estimator(&cl->stats);
1413 #endif
1414 htb_destroy_filters (&cl->filter_list);
1416 while (!list_empty(&cl->children))
1417 htb_destroy_class (sch,list_entry(cl->children.next,
1418 struct htb_class,sibling));
1420 /* note: this delete may happen twice (see htb_delete) */
1421 list_del(&cl->hlist);
1422 list_del(&cl->sibling);
1424 if (cl->prio_activity)
1425 htb_deactivate (q,cl);
1427 if (cl->cmode != HTB_CAN_SEND)
1428 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
1430 kfree(cl);
1433 /* always caled under BH & queue lock */
1434 static void htb_destroy(struct Qdisc* sch)
1436 struct htb_sched *q = (struct htb_sched *)sch->data;
1437 HTB_DBG(0,1,"htb_destroy q=%p\n",q);
1439 del_timer_sync (&q->timer);
1440 #ifdef HTB_RATECM
1441 del_timer_sync (&q->rttim);
1442 #endif
1443 /* This line used to be after htb_destroy_class call below
1444 and surprisingly it worked in 2.4. But it must precede it
1445 because filter need its target class alive to be able to call
1446 unbind_filter on it (without Oops). */
1447 htb_destroy_filters(&q->filter_list);
1449 while (!list_empty(&q->root))
1450 htb_destroy_class (sch,list_entry(q->root.next,
1451 struct htb_class,sibling));
1453 __skb_queue_purge(&q->direct_queue);
1454 MOD_DEC_USE_COUNT;
1457 static int htb_delete(struct Qdisc *sch, unsigned long arg)
1459 struct htb_sched *q = (struct htb_sched *)sch->data;
1460 struct htb_class *cl = (struct htb_class*)arg;
1461 HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1463 // TODO: why don't allow to delete subtree ? references ? does
1464 // tc subsys quarantee us that in htb_destroy it holds no class
1465 // refs so that we can remove children safely there ?
1466 if (!list_empty(&cl->children) || cl->filter_cnt)
1467 return -EBUSY;
1469 sch_tree_lock(sch);
1471 /* delete from hash and active; remainder in destroy_class */
1472 list_del_init(&cl->hlist);
1473 if (cl->prio_activity)
1474 htb_deactivate (q,cl);
1476 if (--cl->refcnt == 0)
1477 htb_destroy_class(sch,cl);
1479 sch_tree_unlock(sch);
1480 return 0;
1483 static void htb_put(struct Qdisc *sch, unsigned long arg)
1485 #ifdef HTB_DEBUG
1486 struct htb_sched *q = (struct htb_sched *)sch->data;
1487 #endif
1488 struct htb_class *cl = (struct htb_class*)arg;
1489 HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1491 if (--cl->refcnt == 0)
1492 htb_destroy_class(sch,cl);
1495 static int htb_change_class(struct Qdisc *sch, u32 classid,
1496 u32 parentid, struct rtattr **tca, unsigned long *arg)
1498 int err = -EINVAL;
1499 struct htb_sched *q = (struct htb_sched *)sch->data;
1500 struct htb_class *cl = (struct htb_class*)*arg,*parent;
1501 struct rtattr *opt = tca[TCA_OPTIONS-1];
1502 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1503 struct rtattr *tb[TCA_HTB_RTAB];
1504 struct tc_htb_opt *hopt;
1506 /* extract all subattrs from opt attr */
1507 if (!opt || rtattr_parse(tb, TCA_HTB_RTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1508 tb[TCA_HTB_PARMS-1] == NULL ||
1509 RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
1510 goto failure;
1512 parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
1514 hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
1515 HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
1516 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
1517 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
1518 if (!rtab || !ctab) goto failure;
1520 if (!cl) { /* new class */
1521 struct Qdisc *new_q;
1522 /* check for valid classid */
1523 if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
1524 goto failure;
1526 /* check maximal depth */
1527 if (parent && parent->parent && parent->parent->level < 2) {
1528 printk(KERN_ERR "htb: tree is too deep\n");
1529 goto failure;
1531 err = -ENOBUFS;
1532 if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1533 goto failure;
1535 memset(cl, 0, sizeof(*cl));
1536 cl->refcnt = 1;
1537 INIT_LIST_HEAD(&cl->sibling);
1538 INIT_LIST_HEAD(&cl->hlist);
1539 INIT_LIST_HEAD(&cl->children);
1540 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1541 #ifdef HTB_DEBUG
1542 cl->magic = HTB_CMAGIC;
1543 #endif
1545 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1546 so that can't be used inside of sch_tree_lock
1547 -- thanks to Karlis Peisenieks */
1548 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1549 sch_tree_lock(sch);
1550 if (parent && !parent->level) {
1551 /* turn parent into inner node */
1552 sch->q.qlen -= parent->un.leaf.q->q.qlen;
1553 qdisc_destroy (parent->un.leaf.q);
1554 if (parent->prio_activity)
1555 htb_deactivate (q,parent);
1557 /* remove from evt list because of level change */
1558 if (parent->cmode != HTB_CAN_SEND) {
1559 htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
1560 parent->cmode = HTB_CAN_SEND;
1562 parent->level = (parent->parent ? parent->parent->level
1563 : TC_HTB_MAXDEPTH) - 1;
1564 memset (&parent->un.inner,0,sizeof(parent->un.inner));
1566 /* leaf (we) needs elementary qdisc */
1567 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1569 cl->classid = classid; cl->parent = parent;
1571 /* set class to be in HTB_CAN_SEND state */
1572 cl->tokens = hopt->buffer;
1573 cl->ctokens = hopt->cbuffer;
1574 cl->mbuffer = 60000000; /* 1min */
1575 PSCHED_GET_TIME(cl->t_c);
1576 cl->cmode = HTB_CAN_SEND;
1578 /* attach to the hash list and parent's family */
1579 list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
1580 list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
1581 #ifdef HTB_DEBUG
1583 int i;
1584 for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
1585 cl->pq_node.rb_color = -1;
1587 #endif
1588 } else sch_tree_lock(sch);
1590 /* it used to be a nasty bug here, we have to check that node
1591 is really leaf before changing cl->un.leaf ! */
1592 if (!cl->level) {
1593 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1594 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
1595 printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
1596 cl->un.leaf.quantum = 1000;
1598 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
1599 printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
1600 cl->un.leaf.quantum = 200000;
1602 if (hopt->quantum)
1603 cl->un.leaf.quantum = hopt->quantum;
1604 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1605 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1608 cl->buffer = hopt->buffer;
1609 cl->cbuffer = hopt->cbuffer;
1610 if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
1611 if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
1612 sch_tree_unlock(sch);
1614 *arg = (unsigned long)cl;
1615 return 0;
1617 failure:
1618 if (rtab) qdisc_put_rtab(rtab);
1619 if (ctab) qdisc_put_rtab(ctab);
1620 return err;
1623 static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1625 struct htb_sched *q = (struct htb_sched *)sch->data;
1626 struct htb_class *cl = (struct htb_class *)arg;
1627 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1628 HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
1629 return fl;
1632 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1633 u32 classid)
1635 struct htb_sched *q = (struct htb_sched *)sch->data;
1636 struct htb_class *cl = htb_find (classid,sch);
1637 HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
1638 /*if (cl && !cl->level) return 0;
1639 The line above used to be there to prevent attaching filters to
1640 leaves. But at least tc_index filter uses this just to get class
1641 for other reasons so that we have to allow for it.
1642 ----
1643 19.6.2002 As Werner explained it is ok - bind filter is just
1644 another way to "lock" the class - unlike "get" this lock can
1645 be broken by class during destroy IIUC.
1647 if (cl)
1648 cl->filter_cnt++;
1649 else
1650 q->filter_cnt++;
1651 return (unsigned long)cl;
1654 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1656 struct htb_sched *q = (struct htb_sched *)sch->data;
1657 struct htb_class *cl = (struct htb_class *)arg;
1658 HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
1659 if (cl)
1660 cl->filter_cnt--;
1661 else
1662 q->filter_cnt--;
1665 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1667 struct htb_sched *q = (struct htb_sched *)sch->data;
1668 int i;
1670 if (arg->stop)
1671 return;
1673 for (i = 0; i < HTB_HSIZE; i++) {
1674 struct list_head *p;
1675 list_for_each (p,q->hash+i) {
1676 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1677 if (arg->count < arg->skip) {
1678 arg->count++;
1679 continue;
1681 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1682 arg->stop = 1;
1683 return;
1685 arg->count++;
1690 static struct Qdisc_class_ops htb_class_ops =
1692 htb_graft,
1693 htb_leaf,
1694 htb_get,
1695 htb_put,
1696 htb_change_class,
1697 htb_delete,
1698 htb_walk,
1700 htb_find_tcf,
1701 htb_bind_filter,
1702 htb_unbind_filter,
1704 htb_dump_class,
1707 struct Qdisc_ops htb_qdisc_ops =
1709 NULL,
1710 &htb_class_ops,
1711 "htb",
1712 sizeof(struct htb_sched),
1714 htb_enqueue,
1715 htb_dequeue,
1716 htb_requeue,
1717 htb_drop,
1719 htb_init,
1720 htb_reset,
1721 htb_destroy,
1722 NULL /* htb_change */,
1724 htb_dump,
1727 #ifdef MODULE
1728 int init_module(void)
1730 return register_qdisc(&htb_qdisc_ops);
1733 void cleanup_module(void)
1735 unregister_qdisc(&htb_qdisc_ops);
1737 MODULE_LICENSE("GPL");
1738 #endif