net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int uninitialized_var(ret);
 374         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 375
 376 #ifdef CONFIG_NET_CLS_ACT
 377         q->rx_class = cl;
 378 #endif
 379         if (cl == NULL) {
 380                 if (ret == NET_XMIT_BYPASS)
 381                         sch->qstats.drops++;
 382                 kfree_skb(skb);
 383                 return ret;
 384         }
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         cl->q->__parent = sch;
 388 #endif
 389         ret = qdisc_enqueue(skb, cl->q);
 390         if (ret == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes += qdisc_pkt_len(skb);
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         sch->qstats.drops++;
 401         cbq_mark_toplevel(q, cl);
 402         cl->qstats.drops++;
 403         return ret;
 404 }
 405
 406 static int
 407 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 408 {
 409         struct cbq_sched_data *q = qdisc_priv(sch);
 410         struct cbq_class *cl;
 411         int ret;
 412
 413         if ((cl = q->tx_class) == NULL) {
 414                 kfree_skb(skb);
 415                 sch->qstats.drops++;
 416                 return NET_XMIT_CN;
 417         }
 418         q->tx_class = NULL;
 419
 420         cbq_mark_toplevel(q, cl);
 421
 422 #ifdef CONFIG_NET_CLS_ACT
 423         q->rx_class = cl;
 424         cl->q->__parent = sch;
 425 #endif
 426         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 427                 sch->q.qlen++;
 428                 sch->qstats.requeues++;
 429                 if (!cl->next_alive)
 430                         cbq_activate_class(cl);
 431                 return 0;
 432         }
 433         sch->qstats.drops++;
 434         cl->qstats.drops++;
 435         return ret;
 436 }
 437
 438 /* Overlimit actions */
 439
 440 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 441
 442 static void cbq_ovl_classic(struct cbq_class *cl)
 443 {
 444         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 445         psched_tdiff_t delay = cl->undertime - q->now;
 446
 447         if (!cl->delayed) {
 448                 delay += cl->offtime;
 449
 450                 /*
 451                    Class goes to sleep, so that it will have no
 452                    chance to work avgidle. Let's forgive it 8)
 453
 454                    BTW cbq-2.0 has a crap in this
 455                    place, apparently they forgot to shift it by cl->ewma_log.
 456                  */
 457                 if (cl->avgidle < 0)
 458                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 459                 if (cl->avgidle < cl->minidle)
 460                         cl->avgidle = cl->minidle;
 461                 if (delay <= 0)
 462                         delay = 1;
 463                 cl->undertime = q->now + delay;
 464
 465                 cl->xstats.overactions++;
 466                 cl->delayed = 1;
 467         }
 468         if (q->wd_expires == 0 || q->wd_expires > delay)
 469                 q->wd_expires = delay;
 470
 471         /* Dirty work! We must schedule wakeups based on
 472            real available rate, rather than leaf rate,
 473            which may be tiny (even zero).
 474          */
 475         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 476                 struct cbq_class *b;
 477                 psched_tdiff_t base_delay = q->wd_expires;
 478
 479                 for (b = cl->borrow; b; b = b->borrow) {
 480                         delay = b->undertime - q->now;
 481                         if (delay < base_delay) {
 482                                 if (delay <= 0)
 483                                         delay = 1;
 484                                 base_delay = delay;
 485                         }
 486                 }
 487
 488                 q->wd_expires = base_delay;
 489         }
 490 }
 491
 492 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 493    they go overlimit
 494  */
 495
 496 static void cbq_ovl_rclassic(struct cbq_class *cl)
 497 {
 498         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 499         struct cbq_class *this = cl;
 500
 501         do {
 502                 if (cl->level > q->toplevel) {
 503                         cl = NULL;
 504                         break;
 505                 }
 506         } while ((cl = cl->borrow) != NULL);
 507
 508         if (cl == NULL)
 509                 cl = this;
 510         cbq_ovl_classic(cl);
 511 }
 512
 513 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 514
 515 static void cbq_ovl_delay(struct cbq_class *cl)
 516 {
 517         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 518         psched_tdiff_t delay = cl->undertime - q->now;
 519
 520         if (!cl->delayed) {
 521                 psched_time_t sched = q->now;
 522                 ktime_t expires;
 523
 524                 delay += cl->offtime;
 525                 if (cl->avgidle < 0)
 526                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 527                 if (cl->avgidle < cl->minidle)
 528                         cl->avgidle = cl->minidle;
 529                 cl->undertime = q->now + delay;
 530
 531                 if (delay > 0) {
 532                         sched += delay + cl->penalty;
 533                         cl->penalized = sched;
 534                         cl->cpriority = TC_CBQ_MAXPRIO;
 535                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 536
 537                         expires = ktime_set(0, 0);
 538                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 539                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 540                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 541                                                   expires)) > 0)
 542                                 q->delay_timer.expires = expires;
 543                         hrtimer_restart(&q->delay_timer);
 544                         cl->delayed = 1;
 545                         cl->xstats.overactions++;
 546                         return;
 547                 }
 548                 delay = 1;
 549         }
 550         if (q->wd_expires == 0 || q->wd_expires > delay)
 551                 q->wd_expires = delay;
 552 }
 553
 554 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 555
 556 static void cbq_ovl_lowprio(struct cbq_class *cl)
 557 {
 558         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 559
 560         cl->penalized = q->now + cl->penalty;
 561
 562         if (cl->cpriority != cl->priority2) {
 563                 cl->cpriority = cl->priority2;
 564                 q->pmask |= (1<<cl->cpriority);
 565                 cl->xstats.overactions++;
 566         }
 567         cbq_ovl_classic(cl);
 568 }
 569
 570 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 571
 572 static void cbq_ovl_drop(struct cbq_class *cl)
 573 {
 574         if (cl->q->ops->drop)
 575                 if (cl->q->ops->drop(cl->q))
 576                         cl->qdisc->q.qlen--;
 577         cl->xstats.overactions++;
 578         cbq_ovl_classic(cl);
 579 }
 580
 581 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 582                                        psched_time_t now)
 583 {
 584         struct cbq_class *cl;
 585         struct cbq_class *cl_prev = q->active[prio];
 586         psched_time_t sched = now;
 587
 588         if (cl_prev == NULL)
 589                 return 0;
 590
 591         do {
 592                 cl = cl_prev->next_alive;
 593                 if (now - cl->penalized > 0) {
 594                         cl_prev->next_alive = cl->next_alive;
 595                         cl->next_alive = NULL;
 596                         cl->cpriority = cl->priority;
 597                         cl->delayed = 0;
 598                         cbq_activate_class(cl);
 599
 600                         if (cl == q->active[prio]) {
 601                                 q->active[prio] = cl_prev;
 602                                 if (cl == q->active[prio]) {
 603                                         q->active[prio] = NULL;
 604                                         return 0;
 605                                 }
 606                         }
 607
 608                         cl = cl_prev->next_alive;
 609                 } else if (sched - cl->penalized > 0)
 610                         sched = cl->penalized;
 611         } while ((cl_prev = cl) != q->active[prio]);
 612
 613         return sched - now;
 614 }
 615
 616 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 617 {
 618         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 619                                                 delay_timer);
 620         struct Qdisc *sch = q->watchdog.qdisc;
 621         psched_time_t now;
 622         psched_tdiff_t delay = 0;
 623         unsigned pmask;
 624
 625         now = psched_get_time();
 626
 627         pmask = q->pmask;
 628         q->pmask = 0;
 629
 630         while (pmask) {
 631                 int prio = ffz(~pmask);
 632                 psched_tdiff_t tmp;
 633
 634                 pmask &= ~(1<<prio);
 635
 636                 tmp = cbq_undelay_prio(q, prio, now);
 637                 if (tmp > 0) {
 638                         q->pmask |= 1<<prio;
 639                         if (tmp < delay || delay == 0)
 640                                 delay = tmp;
 641                 }
 642         }
 643
 644         if (delay) {
 645                 ktime_t time;
 646
 647                 time = ktime_set(0, 0);
 648                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 649                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 650         }
 651
 652         sch->flags &= ~TCQ_F_THROTTLED;
 653         __netif_schedule(sch);
 654         return HRTIMER_NORESTART;
 655 }
 656
 657 #ifdef CONFIG_NET_CLS_ACT
 658 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 659 {
 660         struct Qdisc *sch = child->__parent;
 661         struct cbq_sched_data *q = qdisc_priv(sch);
 662         struct cbq_class *cl = q->rx_class;
 663
 664         q->rx_class = NULL;
 665
 666         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 667
 668                 cbq_mark_toplevel(q, cl);
 669
 670                 q->rx_class = cl;
 671                 cl->q->__parent = sch;
 672
 673                 if (qdisc_enqueue(skb, cl->q) == 0) {
 674                         sch->q.qlen++;
 675                         sch->bstats.packets++;
 676                         sch->bstats.bytes += qdisc_pkt_len(skb);
 677                         if (!cl->next_alive)
 678                                 cbq_activate_class(cl);
 679                         return 0;
 680                 }
 681                 sch->qstats.drops++;
 682                 return 0;
 683         }
 684
 685         sch->qstats.drops++;
 686         return -1;
 687 }
 688 #endif
 689
 690 /*
 691    It is mission critical procedure.
 692
 693    We "regenerate" toplevel cutoff, if transmitting class
 694    has backlog and it is not regulated. It is not part of
 695    original CBQ description, but looks more reasonable.
 696    Probably, it is wrong. This question needs further investigation.
 697 */
 698
 699 static __inline__ void
 700 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 701                     struct cbq_class *borrowed)
 702 {
 703         if (cl && q->toplevel >= borrowed->level) {
 704                 if (cl->q->q.qlen > 1) {
 705                         do {
 706                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 707                                         q->toplevel = borrowed->level;
 708                                         return;
 709                                 }
 710                         } while ((borrowed=borrowed->borrow) != NULL);
 711                 }
 712 #if 0
 713         /* It is not necessary now. Uncommenting it
 714            will save CPU cycles, but decrease fairness.
 715          */
 716                 q->toplevel = TC_CBQ_MAXLEVEL;
 717 #endif
 718         }
 719 }
 720
 721 static void
 722 cbq_update(struct cbq_sched_data *q)
 723 {
 724         struct cbq_class *this = q->tx_class;
 725         struct cbq_class *cl = this;
 726         int len = q->tx_len;
 727
 728         q->tx_class = NULL;
 729
 730         for ( ; cl; cl = cl->share) {
 731                 long avgidle = cl->avgidle;
 732                 long idle;
 733
 734                 cl->bstats.packets++;
 735                 cl->bstats.bytes += len;
 736
 737                 /*
 738                    (now - last) is total time between packet right edges.
 739                    (last_pktlen/rate) is "virtual" busy time, so that
 740
 741                          idle = (now - last) - last_pktlen/rate
 742                  */
 743
 744                 idle = q->now - cl->last;
 745                 if ((unsigned long)idle > 128*1024*1024) {
 746                         avgidle = cl->maxidle;
 747                 } else {
 748                         idle -= L2T(cl, len);
 749
 750                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 751                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 752                    cl->avgidle == true_avgidle/W,
 753                    hence:
 754                  */
 755                         avgidle += idle - (avgidle>>cl->ewma_log);
 756                 }
 757
 758                 if (avgidle <= 0) {
 759                         /* Overlimit or at-limit */
 760
 761                         if (avgidle < cl->minidle)
 762                                 avgidle = cl->minidle;
 763
 764                         cl->avgidle = avgidle;
 765
 766                         /* Calculate expected time, when this class
 767                            will be allowed to send.
 768                            It will occur, when:
 769                            (1-W)*true_avgidle + W*delay = 0, i.e.
 770                            idle = (1/W - 1)*(-true_avgidle)
 771                            or
 772                            idle = (1 - W)*(-cl->avgidle);
 773                          */
 774                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 775
 776                         /*
 777                            That is not all.
 778                            To maintain the rate allocated to the class,
 779                            we add to undertime virtual clock,
 780                            necessary to complete transmitted packet.
 781                            (len/phys_bandwidth has been already passed
 782                            to the moment of cbq_update)
 783                          */
 784
 785                         idle -= L2T(&q->link, len);
 786                         idle += L2T(cl, len);
 787
 788                         cl->undertime = q->now + idle;
 789                 } else {
 790                         /* Underlimit */
 791
 792                         cl->undertime = PSCHED_PASTPERFECT;
 793                         if (avgidle > cl->maxidle)
 794                                 cl->avgidle = cl->maxidle;
 795                         else
 796                                 cl->avgidle = avgidle;
 797                 }
 798                 cl->last = q->now;
 799         }
 800
 801         cbq_update_toplevel(q, this, q->tx_borrowed);
 802 }
 803
 804 static __inline__ struct cbq_class *
 805 cbq_under_limit(struct cbq_class *cl)
 806 {
 807         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 808         struct cbq_class *this_cl = cl;
 809
 810         if (cl->tparent == NULL)
 811                 return cl;
 812
 813         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 814                 cl->delayed = 0;
 815                 return cl;
 816         }
 817
 818         do {
 819                 /* It is very suspicious place. Now overlimit
 820                    action is generated for not bounded classes
 821                    only if link is completely congested.
 822                    Though it is in agree with ancestor-only paradigm,
 823                    it looks very stupid. Particularly,
 824                    it means that this chunk of code will either
 825                    never be called or result in strong amplification
 826                    of burstiness. Dangerous, silly, and, however,
 827                    no another solution exists.
 828                  */
 829                 if ((cl = cl->borrow) == NULL) {
 830                         this_cl->qstats.overlimits++;
 831                         this_cl->overlimit(this_cl);
 832                         return NULL;
 833                 }
 834                 if (cl->level > q->toplevel)
 835                         return NULL;
 836         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 837
 838         cl->delayed = 0;
 839         return cl;
 840 }
 841
 842 static __inline__ struct sk_buff *
 843 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 844 {
 845         struct cbq_sched_data *q = qdisc_priv(sch);
 846         struct cbq_class *cl_tail, *cl_prev, *cl;
 847         struct sk_buff *skb;
 848         int deficit;
 849
 850         cl_tail = cl_prev = q->active[prio];
 851         cl = cl_prev->next_alive;
 852
 853         do {
 854                 deficit = 0;
 855
 856                 /* Start round */
 857                 do {
 858                         struct cbq_class *borrow = cl;
 859
 860                         if (cl->q->q.qlen &&
 861                             (borrow = cbq_under_limit(cl)) == NULL)
 862                                 goto skip_class;
 863
 864                         if (cl->deficit <= 0) {
 865                                 /* Class exhausted its allotment per
 866                                    this round. Switch to the next one.
 867                                  */
 868                                 deficit = 1;
 869                                 cl->deficit += cl->quantum;
 870                                 goto next_class;
 871                         }
 872
 873                         skb = cl->q->dequeue(cl->q);
 874
 875                         /* Class did not give us any skb :-(
 876                            It could occur even if cl->q->q.qlen != 0
 877                            f.e. if cl->q == "tbf"
 878                          */
 879                         if (skb == NULL)
 880                                 goto skip_class;
 881
 882                         cl->deficit -= qdisc_pkt_len(skb);
 883                         q->tx_class = cl;
 884                         q->tx_borrowed = borrow;
 885                         if (borrow != cl) {
 886 #ifndef CBQ_XSTATS_BORROWS_BYTES
 887                                 borrow->xstats.borrows++;
 888                                 cl->xstats.borrows++;
 889 #else
 890                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 891                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 892 #endif
 893                         }
 894                         q->tx_len = qdisc_pkt_len(skb);
 895
 896                         if (cl->deficit <= 0) {
 897                                 q->active[prio] = cl;
 898                                 cl = cl->next_alive;
 899                                 cl->deficit += cl->quantum;
 900                         }
 901                         return skb;
 902
 903 skip_class:
 904                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 905                                 /* Class is empty or penalized.
 906                                    Unlink it from active chain.
 907                                  */
 908                                 cl_prev->next_alive = cl->next_alive;
 909                                 cl->next_alive = NULL;
 910
 911                                 /* Did cl_tail point to it? */
 912                                 if (cl == cl_tail) {
 913                                         /* Repair it! */
 914                                         cl_tail = cl_prev;
 915
 916                                         /* Was it the last class in this band? */
 917                                         if (cl == cl_tail) {
 918                                                 /* Kill the band! */
 919                                                 q->active[prio] = NULL;
 920                                                 q->activemask &= ~(1<<prio);
 921                                                 if (cl->q->q.qlen)
 922                                                         cbq_activate_class(cl);
 923                                                 return NULL;
 924                                         }
 925
 926                                         q->active[prio] = cl_tail;
 927                                 }
 928                                 if (cl->q->q.qlen)
 929                                         cbq_activate_class(cl);
 930
 931                                 cl = cl_prev;
 932                         }
 933
 934 next_class:
 935                         cl_prev = cl;
 936                         cl = cl->next_alive;
 937                 } while (cl_prev != cl_tail);
 938         } while (deficit);
 939
 940         q->active[prio] = cl_prev;
 941
 942         return NULL;
 943 }
 944
 945 static __inline__ struct sk_buff *
 946 cbq_dequeue_1(struct Qdisc *sch)
 947 {
 948         struct cbq_sched_data *q = qdisc_priv(sch);
 949         struct sk_buff *skb;
 950         unsigned activemask;
 951
 952         activemask = q->activemask&0xFF;
 953         while (activemask) {
 954                 int prio = ffz(~activemask);
 955                 activemask &= ~(1<<prio);
 956                 skb = cbq_dequeue_prio(sch, prio);
 957                 if (skb)
 958                         return skb;
 959         }
 960         return NULL;
 961 }
 962
 963 static struct sk_buff *
 964 cbq_dequeue(struct Qdisc *sch)
 965 {
 966         struct sk_buff *skb;
 967         struct cbq_sched_data *q = qdisc_priv(sch);
 968         psched_time_t now;
 969         psched_tdiff_t incr;
 970
 971         now = psched_get_time();
 972         incr = now - q->now_rt;
 973
 974         if (q->tx_class) {
 975                 psched_tdiff_t incr2;
 976                 /* Time integrator. We calculate EOS time
 977                    by adding expected packet transmission time.
 978                    If real time is greater, we warp artificial clock,
 979                    so that:
 980
 981                    cbq_time = max(real_time, work);
 982                  */
 983                 incr2 = L2T(&q->link, q->tx_len);
 984                 q->now += incr2;
 985                 cbq_update(q);
 986                 if ((incr -= incr2) < 0)
 987                         incr = 0;
 988         }
 989         q->now += incr;
 990         q->now_rt = now;
 991
 992         for (;;) {
 993                 q->wd_expires = 0;
 994
 995                 skb = cbq_dequeue_1(sch);
 996                 if (skb) {
 997                         sch->q.qlen--;
 998                         sch->flags &= ~TCQ_F_THROTTLED;
 999                         return skb;
1000                 }
1001
1002                 /* All the classes are overlimit.
1003
1004                    It is possible, if:
1005
1006                    1. Scheduler is empty.
1007                    2. Toplevel cutoff inhibited borrowing.
1008                    3. Root class is overlimit.
1009
1010                    Reset 2d and 3d conditions and retry.
1011
1012                    Note, that NS and cbq-2.0 are buggy, peeking
1013                    an arbitrary class is appropriate for ancestor-only
1014                    sharing, but not for toplevel algorithm.
1015
1016                    Our version is better, but slower, because it requires
1017                    two passes, but it is unavoidable with top-level sharing.
1018                 */
1019
1020                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1021                     q->link.undertime == PSCHED_PASTPERFECT)
1022                         break;
1023
1024                 q->toplevel = TC_CBQ_MAXLEVEL;
1025                 q->link.undertime = PSCHED_PASTPERFECT;
1026         }
1027
1028         /* No packets in scheduler or nobody wants to give them to us :-(
1029            Sigh... start watchdog timer in the last case. */
1030
1031         if (sch->q.qlen) {
1032                 sch->qstats.overlimits++;
1033                 if (q->wd_expires)
1034                         qdisc_watchdog_schedule(&q->watchdog,
1035                                                 now + q->wd_expires);
1036         }
1037         return NULL;
1038 }
1039
1040 /* CBQ class maintanance routines */
1041
1042 static void cbq_adjust_levels(struct cbq_class *this)
1043 {
1044         if (this == NULL)
1045                 return;
1046
1047         do {
1048                 int level = 0;
1049                 struct cbq_class *cl;
1050
1051                 if ((cl = this->children) != NULL) {
1052                         do {
1053                                 if (cl->level > level)
1054                                         level = cl->level;
1055                         } while ((cl = cl->sibling) != this->children);
1056                 }
1057                 this->level = level+1;
1058         } while ((this = this->tparent) != NULL);
1059 }
1060
1061 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1062 {
1063         struct cbq_class *cl;
1064         struct hlist_node *n;
1065         unsigned int h;
1066
1067         if (q->quanta[prio] == 0)
1068                 return;
1069
1070         for (h = 0; h < q->clhash.hashsize; h++) {
1071                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1072                         /* BUGGGG... Beware! This expression suffer of
1073                            arithmetic overflows!
1074                          */
1075                         if (cl->priority == prio) {
1076                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1077                                         q->quanta[prio];
1078                         }
1079                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1080                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1081                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1082                         }
1083                 }
1084         }
1085 }
1086
1087 static void cbq_sync_defmap(struct cbq_class *cl)
1088 {
1089         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1090         struct cbq_class *split = cl->split;
1091         unsigned h;
1092         int i;
1093
1094         if (split == NULL)
1095                 return;
1096
1097         for (i=0; i<=TC_PRIO_MAX; i++) {
1098                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1099                         split->defaults[i] = NULL;
1100         }
1101
1102         for (i=0; i<=TC_PRIO_MAX; i++) {
1103                 int level = split->level;
1104
1105                 if (split->defaults[i])
1106                         continue;
1107
1108                 for (h = 0; h < q->clhash.hashsize; h++) {
1109                         struct hlist_node *n;
1110                         struct cbq_class *c;
1111
1112                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1113                                              common.hnode) {
1114                                 if (c->split == split && c->level < level &&
1115                                     c->defmap&(1<<i)) {
1116                                         split->defaults[i] = c;
1117                                         level = c->level;
1118                                 }
1119                         }
1120                 }
1121         }
1122 }
1123
1124 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1125 {
1126         struct cbq_class *split = NULL;
1127
1128         if (splitid == 0) {
1129                 if ((split = cl->split) == NULL)
1130                         return;
1131                 splitid = split->common.classid;
1132         }
1133
1134         if (split == NULL || split->common.classid != splitid) {
1135                 for (split = cl->tparent; split; split = split->tparent)
1136                         if (split->common.classid == splitid)
1137                                 break;
1138         }
1139
1140         if (split == NULL)
1141                 return;
1142
1143         if (cl->split != split) {
1144                 cl->defmap = 0;
1145                 cbq_sync_defmap(cl);
1146                 cl->split = split;
1147                 cl->defmap = def&mask;
1148         } else
1149                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1150
1151         cbq_sync_defmap(cl);
1152 }
1153
1154 static void cbq_unlink_class(struct cbq_class *this)
1155 {
1156         struct cbq_class *cl, **clp;
1157         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1158
1159         qdisc_class_hash_remove(&q->clhash, &this->common);
1160
1161         if (this->tparent) {
1162                 clp=&this->sibling;
1163                 cl = *clp;
1164                 do {
1165                         if (cl == this) {
1166                                 *clp = cl->sibling;
1167                                 break;
1168                         }
1169                         clp = &cl->sibling;
1170                 } while ((cl = *clp) != this->sibling);
1171
1172                 if (this->tparent->children == this) {
1173                         this->tparent->children = this->sibling;
1174                         if (this->sibling == this)
1175                                 this->tparent->children = NULL;
1176                 }
1177         } else {
1178                 BUG_TRAP(this->sibling == this);
1179         }
1180 }
1181
1182 static void cbq_link_class(struct cbq_class *this)
1183 {
1184         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1185         struct cbq_class *parent = this->tparent;
1186
1187         this->sibling = this;
1188         qdisc_class_hash_insert(&q->clhash, &this->common);
1189
1190         if (parent == NULL)
1191                 return;
1192
1193         if (parent->children == NULL) {
1194                 parent->children = this;
1195         } else {
1196                 this->sibling = parent->children->sibling;
1197                 parent->children->sibling = this;
1198         }
1199 }
1200
1201 static unsigned int cbq_drop(struct Qdisc* sch)
1202 {
1203         struct cbq_sched_data *q = qdisc_priv(sch);
1204         struct cbq_class *cl, *cl_head;
1205         int prio;
1206         unsigned int len;
1207
1208         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1209                 if ((cl_head = q->active[prio]) == NULL)
1210                         continue;
1211
1212                 cl = cl_head;
1213                 do {
1214                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1215                                 sch->q.qlen--;
1216                                 if (!cl->q->q.qlen)
1217                                         cbq_deactivate_class(cl);
1218                                 return len;
1219                         }
1220                 } while ((cl = cl->next_alive) != cl_head);
1221         }
1222         return 0;
1223 }
1224
1225 static void
1226 cbq_reset(struct Qdisc* sch)
1227 {
1228         struct cbq_sched_data *q = qdisc_priv(sch);
1229         struct cbq_class *cl;
1230         struct hlist_node *n;
1231         int prio;
1232         unsigned h;
1233
1234         q->activemask = 0;
1235         q->pmask = 0;
1236         q->tx_class = NULL;
1237         q->tx_borrowed = NULL;
1238         qdisc_watchdog_cancel(&q->watchdog);
1239         hrtimer_cancel(&q->delay_timer);
1240         q->toplevel = TC_CBQ_MAXLEVEL;
1241         q->now = psched_get_time();
1242         q->now_rt = q->now;
1243
1244         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1245                 q->active[prio] = NULL;
1246
1247         for (h = 0; h < q->clhash.hashsize; h++) {
1248                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1249                         qdisc_reset(cl->q);
1250
1251                         cl->next_alive = NULL;
1252                         cl->undertime = PSCHED_PASTPERFECT;
1253                         cl->avgidle = cl->maxidle;
1254                         cl->deficit = cl->quantum;
1255                         cl->cpriority = cl->priority;
1256                 }
1257         }
1258         sch->q.qlen = 0;
1259 }
1260
1261
1262 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1263 {
1264         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1265                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1266                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1267         }
1268         if (lss->change&TCF_CBQ_LSS_EWMA)
1269                 cl->ewma_log = lss->ewma_log;
1270         if (lss->change&TCF_CBQ_LSS_AVPKT)
1271                 cl->avpkt = lss->avpkt;
1272         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1273                 cl->minidle = -(long)lss->minidle;
1274         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1275                 cl->maxidle = lss->maxidle;
1276                 cl->avgidle = lss->maxidle;
1277         }
1278         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1279                 cl->offtime = lss->offtime;
1280         return 0;
1281 }
1282
1283 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1284 {
1285         q->nclasses[cl->priority]--;
1286         q->quanta[cl->priority] -= cl->weight;
1287         cbq_normalize_quanta(q, cl->priority);
1288 }
1289
1290 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1291 {
1292         q->nclasses[cl->priority]++;
1293         q->quanta[cl->priority] += cl->weight;
1294         cbq_normalize_quanta(q, cl->priority);
1295 }
1296
1297 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1298 {
1299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1300
1301         if (wrr->allot)
1302                 cl->allot = wrr->allot;
1303         if (wrr->weight)
1304                 cl->weight = wrr->weight;
1305         if (wrr->priority) {
1306                 cl->priority = wrr->priority-1;
1307                 cl->cpriority = cl->priority;
1308                 if (cl->priority >= cl->priority2)
1309                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1310         }
1311
1312         cbq_addprio(q, cl);
1313         return 0;
1314 }
1315
1316 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1317 {
1318         switch (ovl->strategy) {
1319         case TC_CBQ_OVL_CLASSIC:
1320                 cl->overlimit = cbq_ovl_classic;
1321                 break;
1322         case TC_CBQ_OVL_DELAY:
1323                 cl->overlimit = cbq_ovl_delay;
1324                 break;
1325         case TC_CBQ_OVL_LOWPRIO:
1326                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1327                     ovl->priority2-1 <= cl->priority)
1328                         return -EINVAL;
1329                 cl->priority2 = ovl->priority2-1;
1330                 cl->overlimit = cbq_ovl_lowprio;
1331                 break;
1332         case TC_CBQ_OVL_DROP:
1333                 cl->overlimit = cbq_ovl_drop;
1334                 break;
1335         case TC_CBQ_OVL_RCLASSIC:
1336                 cl->overlimit = cbq_ovl_rclassic;
1337                 break;
1338         default:
1339                 return -EINVAL;
1340         }
1341         cl->penalty = ovl->penalty;
1342         return 0;
1343 }
1344
1345 #ifdef CONFIG_NET_CLS_ACT
1346 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1347 {
1348         cl->police = p->police;
1349
1350         if (cl->q->handle) {
1351                 if (p->police == TC_POLICE_RECLASSIFY)
1352                         cl->q->reshape_fail = cbq_reshape_fail;
1353                 else
1354                         cl->q->reshape_fail = NULL;
1355         }
1356         return 0;
1357 }
1358 #endif
1359
1360 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1361 {
1362         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1363         return 0;
1364 }
1365
1366 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1367         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1368         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1369         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1370         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1371         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1372         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1373         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1374 };
1375
1376 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1377 {
1378         struct cbq_sched_data *q = qdisc_priv(sch);
1379         struct nlattr *tb[TCA_CBQ_MAX + 1];
1380         struct tc_ratespec *r;
1381         int err;
1382
1383         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1384         if (err < 0)
1385                 return err;
1386
1387         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1388                 return -EINVAL;
1389
1390         r = nla_data(tb[TCA_CBQ_RATE]);
1391
1392         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1393                 return -EINVAL;
1394
1395         err = qdisc_class_hash_init(&q->clhash);
1396         if (err < 0)
1397                 goto put_rtab;
1398
1399         q->link.refcnt = 1;
1400         q->link.sibling = &q->link;
1401         q->link.common.classid = sch->handle;
1402         q->link.qdisc = sch;
1403         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1404                                             &pfifo_qdisc_ops,
1405                                             sch->handle)))
1406                 q->link.q = &noop_qdisc;
1407
1408         q->link.priority = TC_CBQ_MAXPRIO-1;
1409         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1410         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1411         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1412         q->link.overlimit = cbq_ovl_classic;
1413         q->link.allot = psched_mtu(qdisc_dev(sch));
1414         q->link.quantum = q->link.allot;
1415         q->link.weight = q->link.R_tab->rate.rate;
1416
1417         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1418         q->link.avpkt = q->link.allot/2;
1419         q->link.minidle = -0x7FFFFFFF;
1420
1421         qdisc_watchdog_init(&q->watchdog, sch);
1422         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1423         q->delay_timer.function = cbq_undelay;
1424         q->toplevel = TC_CBQ_MAXLEVEL;
1425         q->now = psched_get_time();
1426         q->now_rt = q->now;
1427
1428         cbq_link_class(&q->link);
1429
1430         if (tb[TCA_CBQ_LSSOPT])
1431                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1432
1433         cbq_addprio(q, &q->link);
1434         return 0;
1435
1436 put_rtab:
1437         qdisc_put_rtab(q->link.R_tab);
1438         return err;
1439 }
1440
1441 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1442 {
1443         unsigned char *b = skb_tail_pointer(skb);
1444
1445         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1446         return skb->len;
1447
1448 nla_put_failure:
1449         nlmsg_trim(skb, b);
1450         return -1;
1451 }
1452
1453 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1454 {
1455         unsigned char *b = skb_tail_pointer(skb);
1456         struct tc_cbq_lssopt opt;
1457
1458         opt.flags = 0;
1459         if (cl->borrow == NULL)
1460                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1461         if (cl->share == NULL)
1462                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1463         opt.ewma_log = cl->ewma_log;
1464         opt.level = cl->level;
1465         opt.avpkt = cl->avpkt;
1466         opt.maxidle = cl->maxidle;
1467         opt.minidle = (u32)(-cl->minidle);
1468         opt.offtime = cl->offtime;
1469         opt.change = ~0;
1470         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1471         return skb->len;
1472
1473 nla_put_failure:
1474         nlmsg_trim(skb, b);
1475         return -1;
1476 }
1477
1478 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1479 {
1480         unsigned char *b = skb_tail_pointer(skb);
1481         struct tc_cbq_wrropt opt;
1482
1483         opt.flags = 0;
1484         opt.allot = cl->allot;
1485         opt.priority = cl->priority+1;
1486         opt.cpriority = cl->cpriority+1;
1487         opt.weight = cl->weight;
1488         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1489         return skb->len;
1490
1491 nla_put_failure:
1492         nlmsg_trim(skb, b);
1493         return -1;
1494 }
1495
1496 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1497 {
1498         unsigned char *b = skb_tail_pointer(skb);
1499         struct tc_cbq_ovl opt;
1500
1501         opt.strategy = cl->ovl_strategy;
1502         opt.priority2 = cl->priority2+1;
1503         opt.pad = 0;
1504         opt.penalty = cl->penalty;
1505         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1506         return skb->len;
1507
1508 nla_put_failure:
1509         nlmsg_trim(skb, b);
1510         return -1;
1511 }
1512
1513 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1514 {
1515         unsigned char *b = skb_tail_pointer(skb);
1516         struct tc_cbq_fopt opt;
1517
1518         if (cl->split || cl->defmap) {
1519                 opt.split = cl->split ? cl->split->common.classid : 0;
1520                 opt.defmap = cl->defmap;
1521                 opt.defchange = ~0;
1522                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1523         }
1524         return skb->len;
1525
1526 nla_put_failure:
1527         nlmsg_trim(skb, b);
1528         return -1;
1529 }
1530
1531 #ifdef CONFIG_NET_CLS_ACT
1532 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1533 {
1534         unsigned char *b = skb_tail_pointer(skb);
1535         struct tc_cbq_police opt;
1536
1537         if (cl->police) {
1538                 opt.police = cl->police;
1539                 opt.__res1 = 0;
1540                 opt.__res2 = 0;
1541                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1542         }
1543         return skb->len;
1544
1545 nla_put_failure:
1546         nlmsg_trim(skb, b);
1547         return -1;
1548 }
1549 #endif
1550
1551 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1552 {
1553         if (cbq_dump_lss(skb, cl) < 0 ||
1554             cbq_dump_rate(skb, cl) < 0 ||
1555             cbq_dump_wrr(skb, cl) < 0 ||
1556             cbq_dump_ovl(skb, cl) < 0 ||
1557 #ifdef CONFIG_NET_CLS_ACT
1558             cbq_dump_police(skb, cl) < 0 ||
1559 #endif
1560             cbq_dump_fopt(skb, cl) < 0)
1561                 return -1;
1562         return 0;
1563 }
1564
1565 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1566 {
1567         struct cbq_sched_data *q = qdisc_priv(sch);
1568         struct nlattr *nest;
1569
1570         nest = nla_nest_start(skb, TCA_OPTIONS);
1571         if (nest == NULL)
1572                 goto nla_put_failure;
1573         if (cbq_dump_attr(skb, &q->link) < 0)
1574                 goto nla_put_failure;
1575         nla_nest_end(skb, nest);
1576         return skb->len;
1577
1578 nla_put_failure:
1579         nla_nest_cancel(skb, nest);
1580         return -1;
1581 }
1582
1583 static int
1584 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1585 {
1586         struct cbq_sched_data *q = qdisc_priv(sch);
1587
1588         q->link.xstats.avgidle = q->link.avgidle;
1589         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1590 }
1591
1592 static int
1593 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1594                struct sk_buff *skb, struct tcmsg *tcm)
1595 {
1596         struct cbq_class *cl = (struct cbq_class*)arg;
1597         struct nlattr *nest;
1598
1599         if (cl->tparent)
1600                 tcm->tcm_parent = cl->tparent->common.classid;
1601         else
1602                 tcm->tcm_parent = TC_H_ROOT;
1603         tcm->tcm_handle = cl->common.classid;
1604         tcm->tcm_info = cl->q->handle;
1605
1606         nest = nla_nest_start(skb, TCA_OPTIONS);
1607         if (nest == NULL)
1608                 goto nla_put_failure;
1609         if (cbq_dump_attr(skb, cl) < 0)
1610                 goto nla_put_failure;
1611         nla_nest_end(skb, nest);
1612         return skb->len;
1613
1614 nla_put_failure:
1615         nla_nest_cancel(skb, nest);
1616         return -1;
1617 }
1618
1619 static int
1620 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1621         struct gnet_dump *d)
1622 {
1623         struct cbq_sched_data *q = qdisc_priv(sch);
1624         struct cbq_class *cl = (struct cbq_class*)arg;
1625
1626         cl->qstats.qlen = cl->q->q.qlen;
1627         cl->xstats.avgidle = cl->avgidle;
1628         cl->xstats.undertime = 0;
1629
1630         if (cl->undertime != PSCHED_PASTPERFECT)
1631                 cl->xstats.undertime = cl->undertime - q->now;
1632
1633         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1634             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1635             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1636                 return -1;
1637
1638         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1639 }
1640
1641 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1642                      struct Qdisc **old)
1643 {
1644         struct cbq_class *cl = (struct cbq_class*)arg;
1645
1646         if (cl) {
1647                 if (new == NULL) {
1648                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1649                                                 &pfifo_qdisc_ops,
1650                                                 cl->common.classid);
1651                         if (new == NULL)
1652                                 return -ENOBUFS;
1653                 } else {
1654 #ifdef CONFIG_NET_CLS_ACT
1655                         if (cl->police == TC_POLICE_RECLASSIFY)
1656                                 new->reshape_fail = cbq_reshape_fail;
1657 #endif
1658                 }
1659                 sch_tree_lock(sch);
1660                 *old = xchg(&cl->q, new);
1661                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1662                 qdisc_reset(*old);
1663                 sch_tree_unlock(sch);
1664
1665                 return 0;
1666         }
1667         return -ENOENT;
1668 }
1669
1670 static struct Qdisc *
1671 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1672 {
1673         struct cbq_class *cl = (struct cbq_class*)arg;
1674
1675         return cl ? cl->q : NULL;
1676 }
1677
1678 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1679 {
1680         struct cbq_class *cl = (struct cbq_class *)arg;
1681
1682         if (cl->q->q.qlen == 0)
1683                 cbq_deactivate_class(cl);
1684 }
1685
1686 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1687 {
1688         struct cbq_sched_data *q = qdisc_priv(sch);
1689         struct cbq_class *cl = cbq_class_lookup(q, classid);
1690
1691         if (cl) {
1692                 cl->refcnt++;
1693                 return (unsigned long)cl;
1694         }
1695         return 0;
1696 }
1697
1698 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1699 {
1700         struct cbq_sched_data *q = qdisc_priv(sch);
1701
1702         BUG_TRAP(!cl->filters);
1703
1704         tcf_destroy_chain(&cl->filter_list);
1705         qdisc_destroy(cl->q);
1706         qdisc_put_rtab(cl->R_tab);
1707         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1708         if (cl != &q->link)
1709                 kfree(cl);
1710 }
1711
1712 static void
1713 cbq_destroy(struct Qdisc* sch)
1714 {
1715         struct cbq_sched_data *q = qdisc_priv(sch);
1716         struct hlist_node *n, *next;
1717         struct cbq_class *cl;
1718         unsigned h;
1719
1720 #ifdef CONFIG_NET_CLS_ACT
1721         q->rx_class = NULL;
1722 #endif
1723         /*
1724          * Filters must be destroyed first because we don't destroy the
1725          * classes from root to leafs which means that filters can still
1726          * be bound to classes which have been destroyed already. --TGR '04
1727          */
1728         for (h = 0; h < q->clhash.hashsize; h++) {
1729                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1730                         tcf_destroy_chain(&cl->filter_list);
1731         }
1732         for (h = 0; h < q->clhash.hashsize; h++) {
1733                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1734                                           common.hnode)
1735                         cbq_destroy_class(sch, cl);
1736         }
1737         qdisc_class_hash_destroy(&q->clhash);
1738 }
1739
1740 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1741 {
1742         struct cbq_class *cl = (struct cbq_class*)arg;
1743
1744         if (--cl->refcnt == 0) {
1745 #ifdef CONFIG_NET_CLS_ACT
1746                 spinlock_t *root_lock = qdisc_root_lock(sch);
1747                 struct cbq_sched_data *q = qdisc_priv(sch);
1748
1749                 spin_lock_bh(root_lock);
1750                 if (q->rx_class == cl)
1751                         q->rx_class = NULL;
1752                 spin_unlock_bh(root_lock);
1753 #endif
1754
1755                 cbq_destroy_class(sch, cl);
1756         }
1757 }
1758
1759 static int
1760 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1761                  unsigned long *arg)
1762 {
1763         int err;
1764         struct cbq_sched_data *q = qdisc_priv(sch);
1765         struct cbq_class *cl = (struct cbq_class*)*arg;
1766         struct nlattr *opt = tca[TCA_OPTIONS];
1767         struct nlattr *tb[TCA_CBQ_MAX + 1];
1768         struct cbq_class *parent;
1769         struct qdisc_rate_table *rtab = NULL;
1770
1771         if (opt == NULL)
1772                 return -EINVAL;
1773
1774         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1775         if (err < 0)
1776                 return err;
1777
1778         if (cl) {
1779                 /* Check parent */
1780                 if (parentid) {
1781                         if (cl->tparent &&
1782                             cl->tparent->common.classid != parentid)
1783                                 return -EINVAL;
1784                         if (!cl->tparent && parentid != TC_H_ROOT)
1785                                 return -EINVAL;
1786                 }
1787
1788                 if (tb[TCA_CBQ_RATE]) {
1789                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1790                         if (rtab == NULL)
1791                                 return -EINVAL;
1792                 }
1793
1794                 /* Change class parameters */
1795                 sch_tree_lock(sch);
1796
1797                 if (cl->next_alive != NULL)
1798                         cbq_deactivate_class(cl);
1799
1800                 if (rtab) {
1801                         rtab = xchg(&cl->R_tab, rtab);
1802                         qdisc_put_rtab(rtab);
1803                 }
1804
1805                 if (tb[TCA_CBQ_LSSOPT])
1806                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1807
1808                 if (tb[TCA_CBQ_WRROPT]) {
1809                         cbq_rmprio(q, cl);
1810                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1811                 }
1812
1813                 if (tb[TCA_CBQ_OVL_STRATEGY])
1814                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1815
1816 #ifdef CONFIG_NET_CLS_ACT
1817                 if (tb[TCA_CBQ_POLICE])
1818                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1819 #endif
1820
1821                 if (tb[TCA_CBQ_FOPT])
1822                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1823
1824                 if (cl->q->q.qlen)
1825                         cbq_activate_class(cl);
1826
1827                 sch_tree_unlock(sch);
1828
1829                 if (tca[TCA_RATE])
1830                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1831                                               qdisc_root_lock(sch),
1832                                               tca[TCA_RATE]);
1833                 return 0;
1834         }
1835
1836         if (parentid == TC_H_ROOT)
1837                 return -EINVAL;
1838
1839         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1840             tb[TCA_CBQ_LSSOPT] == NULL)
1841                 return -EINVAL;
1842
1843         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1844         if (rtab == NULL)
1845                 return -EINVAL;
1846
1847         if (classid) {
1848                 err = -EINVAL;
1849                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1850                         goto failure;
1851         } else {
1852                 int i;
1853                 classid = TC_H_MAKE(sch->handle,0x8000);
1854
1855                 for (i=0; i<0x8000; i++) {
1856                         if (++q->hgenerator >= 0x8000)
1857                                 q->hgenerator = 1;
1858                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1859                                 break;
1860                 }
1861                 err = -ENOSR;
1862                 if (i >= 0x8000)
1863                         goto failure;
1864                 classid = classid|q->hgenerator;
1865         }
1866
1867         parent = &q->link;
1868         if (parentid) {
1869                 parent = cbq_class_lookup(q, parentid);
1870                 err = -EINVAL;
1871                 if (parent == NULL)
1872                         goto failure;
1873         }
1874
1875         err = -ENOBUFS;
1876         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1877         if (cl == NULL)
1878                 goto failure;
1879         cl->R_tab = rtab;
1880         rtab = NULL;
1881         cl->refcnt = 1;
1882         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1883                                         &pfifo_qdisc_ops, classid)))
1884                 cl->q = &noop_qdisc;
1885         cl->common.classid = classid;
1886         cl->tparent = parent;
1887         cl->qdisc = sch;
1888         cl->allot = parent->allot;
1889         cl->quantum = cl->allot;
1890         cl->weight = cl->R_tab->rate.rate;
1891
1892         sch_tree_lock(sch);
1893         cbq_link_class(cl);
1894         cl->borrow = cl->tparent;
1895         if (cl->tparent != &q->link)
1896                 cl->share = cl->tparent;
1897         cbq_adjust_levels(parent);
1898         cl->minidle = -0x7FFFFFFF;
1899         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1900         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1901         if (cl->ewma_log==0)
1902                 cl->ewma_log = q->link.ewma_log;
1903         if (cl->maxidle==0)
1904                 cl->maxidle = q->link.maxidle;
1905         if (cl->avpkt==0)
1906                 cl->avpkt = q->link.avpkt;
1907         cl->overlimit = cbq_ovl_classic;
1908         if (tb[TCA_CBQ_OVL_STRATEGY])
1909                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1910 #ifdef CONFIG_NET_CLS_ACT
1911         if (tb[TCA_CBQ_POLICE])
1912                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1913 #endif
1914         if (tb[TCA_CBQ_FOPT])
1915                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1916         sch_tree_unlock(sch);
1917
1918         qdisc_class_hash_grow(sch, &q->clhash);
1919
1920         if (tca[TCA_RATE])
1921                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1922                                   qdisc_root_lock(sch), tca[TCA_RATE]);
1923
1924         *arg = (unsigned long)cl;
1925         return 0;
1926
1927 failure:
1928         qdisc_put_rtab(rtab);
1929         return err;
1930 }
1931
1932 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1933 {
1934         struct cbq_sched_data *q = qdisc_priv(sch);
1935         struct cbq_class *cl = (struct cbq_class*)arg;
1936         unsigned int qlen;
1937
1938         if (cl->filters || cl->children || cl == &q->link)
1939                 return -EBUSY;
1940
1941         sch_tree_lock(sch);
1942
1943         qlen = cl->q->q.qlen;
1944         qdisc_reset(cl->q);
1945         qdisc_tree_decrease_qlen(cl->q, qlen);
1946
1947         if (cl->next_alive)
1948                 cbq_deactivate_class(cl);
1949
1950         if (q->tx_borrowed == cl)
1951                 q->tx_borrowed = q->tx_class;
1952         if (q->tx_class == cl) {
1953                 q->tx_class = NULL;
1954                 q->tx_borrowed = NULL;
1955         }
1956 #ifdef CONFIG_NET_CLS_ACT
1957         if (q->rx_class == cl)
1958                 q->rx_class = NULL;
1959 #endif
1960
1961         cbq_unlink_class(cl);
1962         cbq_adjust_levels(cl->tparent);
1963         cl->defmap = 0;
1964         cbq_sync_defmap(cl);
1965
1966         cbq_rmprio(q, cl);
1967         sch_tree_unlock(sch);
1968
1969         if (--cl->refcnt == 0)
1970                 cbq_destroy_class(sch, cl);
1971
1972         return 0;
1973 }
1974
1975 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1976 {
1977         struct cbq_sched_data *q = qdisc_priv(sch);
1978         struct cbq_class *cl = (struct cbq_class *)arg;
1979
1980         if (cl == NULL)
1981                 cl = &q->link;
1982
1983         return &cl->filter_list;
1984 }
1985
1986 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1987                                      u32 classid)
1988 {
1989         struct cbq_sched_data *q = qdisc_priv(sch);
1990         struct cbq_class *p = (struct cbq_class*)parent;
1991         struct cbq_class *cl = cbq_class_lookup(q, classid);
1992
1993         if (cl) {
1994                 if (p && p->level <= cl->level)
1995                         return 0;
1996                 cl->filters++;
1997                 return (unsigned long)cl;
1998         }
1999         return 0;
2000 }
2001
2002 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2003 {
2004         struct cbq_class *cl = (struct cbq_class*)arg;
2005
2006         cl->filters--;
2007 }
2008
2009 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2010 {
2011         struct cbq_sched_data *q = qdisc_priv(sch);
2012         struct cbq_class *cl;
2013         struct hlist_node *n;
2014         unsigned h;
2015
2016         if (arg->stop)
2017                 return;
2018
2019         for (h = 0; h < q->clhash.hashsize; h++) {
2020                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2021                         if (arg->count < arg->skip) {
2022                                 arg->count++;
2023                                 continue;
2024                         }
2025                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2026                                 arg->stop = 1;
2027                                 return;
2028                         }
2029                         arg->count++;
2030                 }
2031         }
2032 }
2033
2034 static const struct Qdisc_class_ops cbq_class_ops = {
2035         .graft          =       cbq_graft,
2036         .leaf           =       cbq_leaf,
2037         .qlen_notify    =       cbq_qlen_notify,
2038         .get            =       cbq_get,
2039         .put            =       cbq_put,
2040         .change         =       cbq_change_class,
2041         .delete         =       cbq_delete,
2042         .walk           =       cbq_walk,
2043         .tcf_chain      =       cbq_find_tcf,
2044         .bind_tcf       =       cbq_bind_filter,
2045         .unbind_tcf     =       cbq_unbind_filter,
2046         .dump           =       cbq_dump_class,
2047         .dump_stats     =       cbq_dump_class_stats,
2048 };
2049
2050 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2051         .next           =       NULL,
2052         .cl_ops         =       &cbq_class_ops,
2053         .id             =       "cbq",
2054         .priv_size      =       sizeof(struct cbq_sched_data),
2055         .enqueue        =       cbq_enqueue,
2056         .dequeue        =       cbq_dequeue,
2057         .requeue        =       cbq_requeue,
2058         .drop           =       cbq_drop,
2059         .init           =       cbq_init,
2060         .reset          =       cbq_reset,
2061         .destroy        =       cbq_destroy,
2062         .change         =       NULL,
2063         .dump           =       cbq_dump,
2064         .dump_stats     =       cbq_dump_stats,
2065         .owner          =       THIS_MODULE,
2066 };
2067
2068 static int __init cbq_module_init(void)
2069 {
2070         return register_qdisc(&cbq_qdisc_ops);
2071 }
2072 static void __exit cbq_module_exit(void)
2073 {
2074         unregister_qdisc(&cbq_qdisc_ops);
2075 }
2076 module_init(cbq_module_init)
2077 module_exit(cbq_module_exit)
2078 MODULE_LICENSE("GPL");