[PKT_SCHED]: GRED: Introduce tc_index_to_dp()
[linux-2.6/zen-sources.git] / net / sched / sch_gred.c
blob38dab959feed2e8704337d78cbe6d1184efadc16
1 /*
2 * net/sched/sch_gred.c Generic Random Early Detection queue.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002
12 * 991129: - Bug fix with grio mode
13 * - a better sing. AvgQ mode with Grio(WRED)
14 * - A finer grained VQ dequeue based on sugestion
15 * from Ren Liu
16 * - More error checks
20 * For all the glorious comments look at Alexey's sch_red.c
23 #include <linux/config.h>
24 #include <linux/module.h>
25 #include <asm/uaccess.h>
26 #include <asm/system.h>
27 #include <linux/bitops.h>
28 #include <linux/types.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
31 #include <linux/string.h>
32 #include <linux/mm.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/in.h>
36 #include <linux/errno.h>
37 #include <linux/interrupt.h>
38 #include <linux/if_ether.h>
39 #include <linux/inet.h>
40 #include <linux/netdevice.h>
41 #include <linux/etherdevice.h>
42 #include <linux/notifier.h>
43 #include <net/ip.h>
44 #include <net/route.h>
45 #include <linux/skbuff.h>
46 #include <net/sock.h>
47 #include <net/pkt_sched.h>
48 #include <net/red.h>
50 #if 1 /* control */
51 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
52 #else
53 #define DPRINTK(format,args...)
54 #endif
56 #if 0 /* data */
57 #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
58 #else
59 #define D2PRINTK(format,args...)
60 #endif
62 #define GRED_DEF_PRIO (MAX_DPs / 2)
63 #define GRED_VQ_MASK (MAX_DPs - 1)
65 struct gred_sched_data;
66 struct gred_sched;
68 struct gred_sched_data
70 u32 limit; /* HARD maximal queue length */
71 u32 DP; /* the drop pramaters */
72 u32 bytesin; /* bytes seen on virtualQ so far*/
73 u32 packetsin; /* packets seen on virtualQ so far*/
74 u32 backlog; /* bytes on the virtualQ */
75 u8 prio; /* the prio of this vq */
77 struct red_parms parms;
78 struct red_stats stats;
81 enum {
82 GRED_WRED_MODE = 1,
83 GRED_RIO_MODE,
86 struct gred_sched
88 struct gred_sched_data *tab[MAX_DPs];
89 unsigned long flags;
90 u32 DPs;
91 u32 def;
92 u8 initd;
95 static inline int gred_wred_mode(struct gred_sched *table)
97 return test_bit(GRED_WRED_MODE, &table->flags);
100 static inline void gred_enable_wred_mode(struct gred_sched *table)
102 __set_bit(GRED_WRED_MODE, &table->flags);
105 static inline void gred_disable_wred_mode(struct gred_sched *table)
107 __clear_bit(GRED_WRED_MODE, &table->flags);
110 static inline int gred_rio_mode(struct gred_sched *table)
112 return test_bit(GRED_RIO_MODE, &table->flags);
115 static inline void gred_enable_rio_mode(struct gred_sched *table)
117 __set_bit(GRED_RIO_MODE, &table->flags);
120 static inline void gred_disable_rio_mode(struct gred_sched *table)
122 __clear_bit(GRED_RIO_MODE, &table->flags);
125 static inline int gred_wred_mode_check(struct Qdisc *sch)
127 struct gred_sched *table = qdisc_priv(sch);
128 int i;
130 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */
131 for (i = 0; i < table->DPs; i++) {
132 struct gred_sched_data *q = table->tab[i];
133 int n;
135 if (q == NULL)
136 continue;
138 for (n = 0; n < table->DPs; n++)
139 if (table->tab[n] && table->tab[n] != q &&
140 table->tab[n]->prio == q->prio)
141 return 1;
144 return 0;
147 static inline unsigned int gred_backlog(struct gred_sched *table,
148 struct gred_sched_data *q,
149 struct Qdisc *sch)
151 if (gred_wred_mode(table))
152 return sch->qstats.backlog;
153 else
154 return q->backlog;
157 static inline u16 tc_index_to_dp(struct sk_buff *skb)
159 return skb->tc_index & GRED_VQ_MASK;
162 static int
163 gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
165 struct gred_sched_data *q=NULL;
166 struct gred_sched *t= qdisc_priv(sch);
167 unsigned long qavg = 0;
168 int i=0;
169 u16 dp;
171 if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
172 D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
173 goto do_enqueue;
176 dp = tc_index_to_dp(skb);
178 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
179 printk("GRED: setting to default (%d)\n ",t->def);
180 if (!(q=t->tab[t->def])) {
181 DPRINTK("GRED: setting to default FAILED! dropping!! "
182 "(%d)\n ", t->def);
183 goto drop;
185 /* fix tc_index? --could be controvesial but needed for
186 requeueing */
187 skb->tc_index=(skb->tc_index & ~GRED_VQ_MASK) | t->def;
190 D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
191 "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
192 sch->qstats.backlog);
193 /* sum up all the qaves of prios <= to ours to get the new qave*/
194 if (!gred_wred_mode(t) && gred_rio_mode(t)) {
195 for (i=0;i<t->DPs;i++) {
196 if ((!t->tab[i]) || (i==q->DP))
197 continue;
199 if (t->tab[i]->prio < q->prio &&
200 !red_is_idling(&t->tab[i]->parms))
201 qavg +=t->tab[i]->parms.qavg;
206 q->packetsin++;
207 q->bytesin+=skb->len;
209 if (gred_wred_mode(t)) {
210 qavg = 0;
211 q->parms.qavg = t->tab[t->def]->parms.qavg;
212 q->parms.qidlestart = t->tab[t->def]->parms.qidlestart;
215 q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch));
217 if (red_is_idling(&q->parms))
218 red_end_of_idle_period(&q->parms);
220 if (gred_wred_mode(t))
221 t->tab[t->def]->parms.qavg = q->parms.qavg;
223 switch (red_action(&q->parms, q->parms.qavg + qavg)) {
224 case RED_DONT_MARK:
225 break;
227 case RED_PROB_MARK:
228 sch->qstats.overlimits++;
229 q->stats.prob_drop++;
230 goto congestion_drop;
232 case RED_HARD_MARK:
233 sch->qstats.overlimits++;
234 q->stats.forced_drop++;
235 goto congestion_drop;
238 if (q->backlog + skb->len <= q->limit) {
239 q->backlog += skb->len;
240 do_enqueue:
241 return qdisc_enqueue_tail(skb, sch);
244 q->stats.pdrop++;
245 drop:
246 return qdisc_drop(skb, sch);
248 congestion_drop:
249 qdisc_drop(skb, sch);
250 return NET_XMIT_CN;
253 static int
254 gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
256 struct gred_sched *t = qdisc_priv(sch);
257 struct gred_sched_data *q = t->tab[tc_index_to_dp(skb)];
258 /* error checking here -- probably unnecessary */
260 if (red_is_idling(&q->parms))
261 red_end_of_idle_period(&q->parms);
263 q->backlog += skb->len;
264 return qdisc_requeue(skb, sch);
267 static struct sk_buff *
268 gred_dequeue(struct Qdisc* sch)
270 struct sk_buff *skb;
271 struct gred_sched_data *q;
272 struct gred_sched *t= qdisc_priv(sch);
274 skb = qdisc_dequeue_head(sch);
276 if (skb) {
277 q = t->tab[tc_index_to_dp(skb)];
278 if (q) {
279 q->backlog -= skb->len;
280 if (!q->backlog && !gred_wred_mode(t))
281 red_start_of_idle_period(&q->parms);
282 } else {
283 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",
284 tc_index_to_dp(skb));
286 return skb;
289 if (gred_wred_mode(t)) {
290 q= t->tab[t->def];
291 if (!q)
292 D2PRINTK("no default VQ set: Results will be "
293 "screwed up\n");
294 else
295 red_start_of_idle_period(&q->parms);
298 return NULL;
301 static unsigned int gred_drop(struct Qdisc* sch)
303 struct sk_buff *skb;
305 struct gred_sched_data *q;
306 struct gred_sched *t= qdisc_priv(sch);
308 skb = qdisc_dequeue_tail(sch);
309 if (skb) {
310 unsigned int len = skb->len;
311 q = t->tab[tc_index_to_dp(skb)];
312 if (q) {
313 q->backlog -= len;
314 q->stats.other++;
315 if (!q->backlog && !gred_wred_mode(t))
316 red_start_of_idle_period(&q->parms);
317 } else {
318 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",
319 tc_index_to_dp(skb));
322 qdisc_drop(skb, sch);
323 return len;
326 q=t->tab[t->def];
327 if (!q) {
328 D2PRINTK("no default VQ set: Results might be screwed up\n");
329 return 0;
332 red_start_of_idle_period(&q->parms);
333 return 0;
337 static void gred_reset(struct Qdisc* sch)
339 int i;
340 struct gred_sched_data *q;
341 struct gred_sched *t= qdisc_priv(sch);
343 qdisc_reset_queue(sch);
345 for (i=0;i<t->DPs;i++) {
346 q= t->tab[i];
347 if (!q)
348 continue;
349 red_restart(&q->parms);
350 q->backlog = 0;
354 static inline void gred_destroy_vq(struct gred_sched_data *q)
356 kfree(q);
359 static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps)
361 struct gred_sched *table = qdisc_priv(sch);
362 struct tc_gred_sopt *sopt;
363 int i;
365 if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt))
366 return -EINVAL;
368 sopt = RTA_DATA(dps);
370 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
371 return -EINVAL;
373 sch_tree_lock(sch);
374 table->DPs = sopt->DPs;
375 table->def = sopt->def_DP;
378 * Every entry point to GRED is synchronized with the above code
379 * and the DP is checked against DPs, i.e. shadowed VQs can no
380 * longer be found so we can unlock right here.
382 sch_tree_unlock(sch);
384 if (sopt->grio) {
385 gred_enable_rio_mode(table);
386 gred_disable_wred_mode(table);
387 if (gred_wred_mode_check(sch))
388 gred_enable_wred_mode(table);
389 } else {
390 gred_disable_rio_mode(table);
391 gred_disable_wred_mode(table);
394 for (i = table->DPs; i < MAX_DPs; i++) {
395 if (table->tab[i]) {
396 printk(KERN_WARNING "GRED: Warning: Destroying "
397 "shadowed VQ 0x%x\n", i);
398 gred_destroy_vq(table->tab[i]);
399 table->tab[i] = NULL;
403 table->initd = 0;
405 return 0;
408 static inline int gred_change_vq(struct Qdisc *sch, int dp,
409 struct tc_gred_qopt *ctl, int prio, u8 *stab)
411 struct gred_sched *table = qdisc_priv(sch);
412 struct gred_sched_data *q;
414 if (table->tab[dp] == NULL) {
415 table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL);
416 if (table->tab[dp] == NULL)
417 return -ENOMEM;
418 memset(table->tab[dp], 0, sizeof(*q));
421 q = table->tab[dp];
422 q->DP = dp;
423 q->prio = prio;
424 q->limit = ctl->limit;
426 if (q->backlog == 0)
427 red_end_of_idle_period(&q->parms);
429 red_set_parms(&q->parms,
430 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
431 ctl->Scell_log, stab);
433 return 0;
436 static int gred_change(struct Qdisc *sch, struct rtattr *opt)
438 struct gred_sched *table = qdisc_priv(sch);
439 struct tc_gred_qopt *ctl;
440 struct rtattr *tb[TCA_GRED_MAX];
441 int err = -EINVAL, prio = GRED_DEF_PRIO;
442 u8 *stab;
444 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
445 return -EINVAL;
447 if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL)
448 return gred_change_table_def(sch, opt);
450 if (tb[TCA_GRED_PARMS-1] == NULL ||
451 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
452 tb[TCA_GRED_STAB-1] == NULL ||
453 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
454 return -EINVAL;
456 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
457 stab = RTA_DATA(tb[TCA_GRED_STAB-1]);
459 if (ctl->DP >= table->DPs)
460 goto errout;
462 if (gred_rio_mode(table)) {
463 if (ctl->prio == 0) {
464 int def_prio = GRED_DEF_PRIO;
466 if (table->tab[table->def])
467 def_prio = table->tab[table->def]->prio;
469 printk(KERN_DEBUG "GRED: DP %u does not have a prio "
470 "setting default to %d\n", ctl->DP, def_prio);
472 prio = def_prio;
473 } else
474 prio = ctl->prio;
477 sch_tree_lock(sch);
479 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
480 if (err < 0)
481 goto errout_locked;
483 if (table->tab[table->def] == NULL) {
484 if (gred_rio_mode(table))
485 prio = table->tab[ctl->DP]->prio;
487 err = gred_change_vq(sch, table->def, ctl, prio, stab);
488 if (err < 0)
489 goto errout_locked;
492 table->initd = 1;
494 if (gred_rio_mode(table)) {
495 gred_disable_wred_mode(table);
496 if (gred_wred_mode_check(sch))
497 gred_enable_wred_mode(table);
500 err = 0;
502 errout_locked:
503 sch_tree_unlock(sch);
504 errout:
505 return err;
508 static int gred_init(struct Qdisc *sch, struct rtattr *opt)
510 struct rtattr *tb[TCA_GRED_MAX];
512 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
513 return -EINVAL;
515 if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1])
516 return -EINVAL;
518 return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]);
521 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
523 struct gred_sched *table = qdisc_priv(sch);
524 struct rtattr *parms, *opts = NULL;
525 int i;
526 struct tc_gred_sopt sopt = {
527 .DPs = table->DPs,
528 .def_DP = table->def,
529 .grio = gred_rio_mode(table),
532 opts = RTA_NEST(skb, TCA_OPTIONS);
533 RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
534 parms = RTA_NEST(skb, TCA_GRED_PARMS);
536 for (i = 0; i < MAX_DPs; i++) {
537 struct gred_sched_data *q = table->tab[i];
538 struct tc_gred_qopt opt;
540 memset(&opt, 0, sizeof(opt));
542 if (!q) {
543 /* hack -- fix at some point with proper message
544 This is how we indicate to tc that there is no VQ
545 at this DP */
547 opt.DP = MAX_DPs + i;
548 goto append_opt;
551 opt.limit = q->limit;
552 opt.DP = q->DP;
553 opt.backlog = q->backlog;
554 opt.prio = q->prio;
555 opt.qth_min = q->parms.qth_min >> q->parms.Wlog;
556 opt.qth_max = q->parms.qth_max >> q->parms.Wlog;
557 opt.Wlog = q->parms.Wlog;
558 opt.Plog = q->parms.Plog;
559 opt.Scell_log = q->parms.Scell_log;
560 opt.other = q->stats.other;
561 opt.early = q->stats.prob_drop;
562 opt.forced = q->stats.forced_drop;
563 opt.pdrop = q->stats.pdrop;
564 opt.packets = q->packetsin;
565 opt.bytesin = q->bytesin;
567 if (gred_wred_mode(table)) {
568 q->parms.qidlestart =
569 table->tab[table->def]->parms.qidlestart;
570 q->parms.qavg = table->tab[table->def]->parms.qavg;
573 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
575 append_opt:
576 RTA_APPEND(skb, sizeof(opt), &opt);
579 RTA_NEST_END(skb, parms);
581 return RTA_NEST_END(skb, opts);
583 rtattr_failure:
584 return RTA_NEST_CANCEL(skb, opts);
587 static void gred_destroy(struct Qdisc *sch)
589 struct gred_sched *table = qdisc_priv(sch);
590 int i;
592 for (i = 0;i < table->DPs; i++) {
593 if (table->tab[i])
594 gred_destroy_vq(table->tab[i]);
598 static struct Qdisc_ops gred_qdisc_ops = {
599 .next = NULL,
600 .cl_ops = NULL,
601 .id = "gred",
602 .priv_size = sizeof(struct gred_sched),
603 .enqueue = gred_enqueue,
604 .dequeue = gred_dequeue,
605 .requeue = gred_requeue,
606 .drop = gred_drop,
607 .init = gred_init,
608 .reset = gred_reset,
609 .destroy = gred_destroy,
610 .change = gred_change,
611 .dump = gred_dump,
612 .owner = THIS_MODULE,
615 static int __init gred_module_init(void)
617 return register_qdisc(&gred_qdisc_ops);
619 static void __exit gred_module_exit(void)
621 unregister_qdisc(&gred_qdisc_ops);
623 module_init(gred_module_init)
624 module_exit(gred_module_exit)
625 MODULE_LICENSE("GPL");