net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic_packed bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int uninitialized_var(ret);
 374         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 375
 376 #ifdef CONFIG_NET_CLS_ACT
 377         q->rx_class = cl;
 378 #endif
 379         if (cl == NULL) {
 380                 if (ret & __NET_XMIT_BYPASS)
 381                         sch->qstats.drops++;
 382                 kfree_skb(skb);
 383                 return ret;
 384         }
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         cl->q->__parent = sch;
 388 #endif
 389         ret = qdisc_enqueue(skb, cl->q);
 390         if (ret == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes += qdisc_pkt_len(skb);
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         if (net_xmit_drop_count(ret)) {
 401                 sch->qstats.drops++;
 402                 cbq_mark_toplevel(q, cl);
 403                 cl->qstats.drops++;
 404         }
 405         return ret;
 406 }
 407
 408 /* Overlimit actions */
 409
 410 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 411
 412 static void cbq_ovl_classic(struct cbq_class *cl)
 413 {
 414         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 415         psched_tdiff_t delay = cl->undertime - q->now;
 416
 417         if (!cl->delayed) {
 418                 delay += cl->offtime;
 419
 420                 /*
 421                    Class goes to sleep, so that it will have no
 422                    chance to work avgidle. Let's forgive it 8)
 423
 424                    BTW cbq-2.0 has a crap in this
 425                    place, apparently they forgot to shift it by cl->ewma_log.
 426                  */
 427                 if (cl->avgidle < 0)
 428                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 429                 if (cl->avgidle < cl->minidle)
 430                         cl->avgidle = cl->minidle;
 431                 if (delay <= 0)
 432                         delay = 1;
 433                 cl->undertime = q->now + delay;
 434
 435                 cl->xstats.overactions++;
 436                 cl->delayed = 1;
 437         }
 438         if (q->wd_expires == 0 || q->wd_expires > delay)
 439                 q->wd_expires = delay;
 440
 441         /* Dirty work! We must schedule wakeups based on
 442            real available rate, rather than leaf rate,
 443            which may be tiny (even zero).
 444          */
 445         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 446                 struct cbq_class *b;
 447                 psched_tdiff_t base_delay = q->wd_expires;
 448
 449                 for (b = cl->borrow; b; b = b->borrow) {
 450                         delay = b->undertime - q->now;
 451                         if (delay < base_delay) {
 452                                 if (delay <= 0)
 453                                         delay = 1;
 454                                 base_delay = delay;
 455                         }
 456                 }
 457
 458                 q->wd_expires = base_delay;
 459         }
 460 }
 461
 462 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 463    they go overlimit
 464  */
 465
 466 static void cbq_ovl_rclassic(struct cbq_class *cl)
 467 {
 468         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 469         struct cbq_class *this = cl;
 470
 471         do {
 472                 if (cl->level > q->toplevel) {
 473                         cl = NULL;
 474                         break;
 475                 }
 476         } while ((cl = cl->borrow) != NULL);
 477
 478         if (cl == NULL)
 479                 cl = this;
 480         cbq_ovl_classic(cl);
 481 }
 482
 483 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 484
 485 static void cbq_ovl_delay(struct cbq_class *cl)
 486 {
 487         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 488         psched_tdiff_t delay = cl->undertime - q->now;
 489
 490         if (test_bit(__QDISC_STATE_DEACTIVATED,
 491                      &qdisc_root_sleeping(cl->qdisc)->state))
 492                 return;
 493
 494         if (!cl->delayed) {
 495                 psched_time_t sched = q->now;
 496                 ktime_t expires;
 497
 498                 delay += cl->offtime;
 499                 if (cl->avgidle < 0)
 500                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 501                 if (cl->avgidle < cl->minidle)
 502                         cl->avgidle = cl->minidle;
 503                 cl->undertime = q->now + delay;
 504
 505                 if (delay > 0) {
 506                         sched += delay + cl->penalty;
 507                         cl->penalized = sched;
 508                         cl->cpriority = TC_CBQ_MAXPRIO;
 509                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 510
 511                         expires = ktime_set(0, 0);
 512                         expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 513                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514                             ktime_to_ns(ktime_sub(
 515                                         hrtimer_get_expires(&q->delay_timer),
 516                                         expires)) > 0)
 517                                 hrtimer_set_expires(&q->delay_timer, expires);
 518                         hrtimer_restart(&q->delay_timer);
 519                         cl->delayed = 1;
 520                         cl->xstats.overactions++;
 521                         return;
 522                 }
 523                 delay = 1;
 524         }
 525         if (q->wd_expires == 0 || q->wd_expires > delay)
 526                 q->wd_expires = delay;
 527 }
 528
 529 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531 static void cbq_ovl_lowprio(struct cbq_class *cl)
 532 {
 533         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535         cl->penalized = q->now + cl->penalty;
 536
 537         if (cl->cpriority != cl->priority2) {
 538                 cl->cpriority = cl->priority2;
 539                 q->pmask |= (1<<cl->cpriority);
 540                 cl->xstats.overactions++;
 541         }
 542         cbq_ovl_classic(cl);
 543 }
 544
 545 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547 static void cbq_ovl_drop(struct cbq_class *cl)
 548 {
 549         if (cl->q->ops->drop)
 550                 if (cl->q->ops->drop(cl->q))
 551                         cl->qdisc->q.qlen--;
 552         cl->xstats.overactions++;
 553         cbq_ovl_classic(cl);
 554 }
 555
 556 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557                                        psched_time_t now)
 558 {
 559         struct cbq_class *cl;
 560         struct cbq_class *cl_prev = q->active[prio];
 561         psched_time_t sched = now;
 562
 563         if (cl_prev == NULL)
 564                 return 0;
 565
 566         do {
 567                 cl = cl_prev->next_alive;
 568                 if (now - cl->penalized > 0) {
 569                         cl_prev->next_alive = cl->next_alive;
 570                         cl->next_alive = NULL;
 571                         cl->cpriority = cl->priority;
 572                         cl->delayed = 0;
 573                         cbq_activate_class(cl);
 574
 575                         if (cl == q->active[prio]) {
 576                                 q->active[prio] = cl_prev;
 577                                 if (cl == q->active[prio]) {
 578                                         q->active[prio] = NULL;
 579                                         return 0;
 580                                 }
 581                         }
 582
 583                         cl = cl_prev->next_alive;
 584                 } else if (sched - cl->penalized > 0)
 585                         sched = cl->penalized;
 586         } while ((cl_prev = cl) != q->active[prio]);
 587
 588         return sched - now;
 589 }
 590
 591 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592 {
 593         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594                                                 delay_timer);
 595         struct Qdisc *sch = q->watchdog.qdisc;
 596         psched_time_t now;
 597         psched_tdiff_t delay = 0;
 598         unsigned pmask;
 599
 600         now = psched_get_time();
 601
 602         pmask = q->pmask;
 603         q->pmask = 0;
 604
 605         while (pmask) {
 606                 int prio = ffz(~pmask);
 607                 psched_tdiff_t tmp;
 608
 609                 pmask &= ~(1<<prio);
 610
 611                 tmp = cbq_undelay_prio(q, prio, now);
 612                 if (tmp > 0) {
 613                         q->pmask |= 1<<prio;
 614                         if (tmp < delay || delay == 0)
 615                                 delay = tmp;
 616                 }
 617         }
 618
 619         if (delay) {
 620                 ktime_t time;
 621
 622                 time = ktime_set(0, 0);
 623                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625         }
 626
 627         sch->flags &= ~TCQ_F_THROTTLED;
 628         __netif_schedule(qdisc_root(sch));
 629         return HRTIMER_NORESTART;
 630 }
 631
 632 #ifdef CONFIG_NET_CLS_ACT
 633 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634 {
 635         struct Qdisc *sch = child->__parent;
 636         struct cbq_sched_data *q = qdisc_priv(sch);
 637         struct cbq_class *cl = q->rx_class;
 638
 639         q->rx_class = NULL;
 640
 641         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642                 int ret;
 643
 644                 cbq_mark_toplevel(q, cl);
 645
 646                 q->rx_class = cl;
 647                 cl->q->__parent = sch;
 648
 649                 ret = qdisc_enqueue(skb, cl->q);
 650                 if (ret == NET_XMIT_SUCCESS) {
 651                         sch->q.qlen++;
 652                         sch->bstats.packets++;
 653                         sch->bstats.bytes += qdisc_pkt_len(skb);
 654                         if (!cl->next_alive)
 655                                 cbq_activate_class(cl);
 656                         return 0;
 657                 }
 658                 if (net_xmit_drop_count(ret))
 659                         sch->qstats.drops++;
 660                 return 0;
 661         }
 662
 663         sch->qstats.drops++;
 664         return -1;
 665 }
 666 #endif
 667
 668 /*
 669    It is mission critical procedure.
 670
 671    We "regenerate" toplevel cutoff, if transmitting class
 672    has backlog and it is not regulated. It is not part of
 673    original CBQ description, but looks more reasonable.
 674    Probably, it is wrong. This question needs further investigation.
 675 */
 676
 677 static __inline__ void
 678 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 679                     struct cbq_class *borrowed)
 680 {
 681         if (cl && q->toplevel >= borrowed->level) {
 682                 if (cl->q->q.qlen > 1) {
 683                         do {
 684                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 685                                         q->toplevel = borrowed->level;
 686                                         return;
 687                                 }
 688                         } while ((borrowed=borrowed->borrow) != NULL);
 689                 }
 690 #if 0
 691         /* It is not necessary now. Uncommenting it
 692            will save CPU cycles, but decrease fairness.
 693          */
 694                 q->toplevel = TC_CBQ_MAXLEVEL;
 695 #endif
 696         }
 697 }
 698
 699 static void
 700 cbq_update(struct cbq_sched_data *q)
 701 {
 702         struct cbq_class *this = q->tx_class;
 703         struct cbq_class *cl = this;
 704         int len = q->tx_len;
 705
 706         q->tx_class = NULL;
 707
 708         for ( ; cl; cl = cl->share) {
 709                 long avgidle = cl->avgidle;
 710                 long idle;
 711
 712                 cl->bstats.packets++;
 713                 cl->bstats.bytes += len;
 714
 715                 /*
 716                    (now - last) is total time between packet right edges.
 717                    (last_pktlen/rate) is "virtual" busy time, so that
 718
 719                          idle = (now - last) - last_pktlen/rate
 720                  */
 721
 722                 idle = q->now - cl->last;
 723                 if ((unsigned long)idle > 128*1024*1024) {
 724                         avgidle = cl->maxidle;
 725                 } else {
 726                         idle -= L2T(cl, len);
 727
 728                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 729                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 730                    cl->avgidle == true_avgidle/W,
 731                    hence:
 732                  */
 733                         avgidle += idle - (avgidle>>cl->ewma_log);
 734                 }
 735
 736                 if (avgidle <= 0) {
 737                         /* Overlimit or at-limit */
 738
 739                         if (avgidle < cl->minidle)
 740                                 avgidle = cl->minidle;
 741
 742                         cl->avgidle = avgidle;
 743
 744                         /* Calculate expected time, when this class
 745                            will be allowed to send.
 746                            It will occur, when:
 747                            (1-W)*true_avgidle + W*delay = 0, i.e.
 748                            idle = (1/W - 1)*(-true_avgidle)
 749                            or
 750                            idle = (1 - W)*(-cl->avgidle);
 751                          */
 752                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 753
 754                         /*
 755                            That is not all.
 756                            To maintain the rate allocated to the class,
 757                            we add to undertime virtual clock,
 758                            necessary to complete transmitted packet.
 759                            (len/phys_bandwidth has been already passed
 760                            to the moment of cbq_update)
 761                          */
 762
 763                         idle -= L2T(&q->link, len);
 764                         idle += L2T(cl, len);
 765
 766                         cl->undertime = q->now + idle;
 767                 } else {
 768                         /* Underlimit */
 769
 770                         cl->undertime = PSCHED_PASTPERFECT;
 771                         if (avgidle > cl->maxidle)
 772                                 cl->avgidle = cl->maxidle;
 773                         else
 774                                 cl->avgidle = avgidle;
 775                 }
 776                 cl->last = q->now;
 777         }
 778
 779         cbq_update_toplevel(q, this, q->tx_borrowed);
 780 }
 781
 782 static __inline__ struct cbq_class *
 783 cbq_under_limit(struct cbq_class *cl)
 784 {
 785         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 786         struct cbq_class *this_cl = cl;
 787
 788         if (cl->tparent == NULL)
 789                 return cl;
 790
 791         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 792                 cl->delayed = 0;
 793                 return cl;
 794         }
 795
 796         do {
 797                 /* It is very suspicious place. Now overlimit
 798                    action is generated for not bounded classes
 799                    only if link is completely congested.
 800                    Though it is in agree with ancestor-only paradigm,
 801                    it looks very stupid. Particularly,
 802                    it means that this chunk of code will either
 803                    never be called or result in strong amplification
 804                    of burstiness. Dangerous, silly, and, however,
 805                    no another solution exists.
 806                  */
 807                 if ((cl = cl->borrow) == NULL) {
 808                         this_cl->qstats.overlimits++;
 809                         this_cl->overlimit(this_cl);
 810                         return NULL;
 811                 }
 812                 if (cl->level > q->toplevel)
 813                         return NULL;
 814         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 815
 816         cl->delayed = 0;
 817         return cl;
 818 }
 819
 820 static __inline__ struct sk_buff *
 821 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 822 {
 823         struct cbq_sched_data *q = qdisc_priv(sch);
 824         struct cbq_class *cl_tail, *cl_prev, *cl;
 825         struct sk_buff *skb;
 826         int deficit;
 827
 828         cl_tail = cl_prev = q->active[prio];
 829         cl = cl_prev->next_alive;
 830
 831         do {
 832                 deficit = 0;
 833
 834                 /* Start round */
 835                 do {
 836                         struct cbq_class *borrow = cl;
 837
 838                         if (cl->q->q.qlen &&
 839                             (borrow = cbq_under_limit(cl)) == NULL)
 840                                 goto skip_class;
 841
 842                         if (cl->deficit <= 0) {
 843                                 /* Class exhausted its allotment per
 844                                    this round. Switch to the next one.
 845                                  */
 846                                 deficit = 1;
 847                                 cl->deficit += cl->quantum;
 848                                 goto next_class;
 849                         }
 850
 851                         skb = cl->q->dequeue(cl->q);
 852
 853                         /* Class did not give us any skb :-(
 854                            It could occur even if cl->q->q.qlen != 0
 855                            f.e. if cl->q == "tbf"
 856                          */
 857                         if (skb == NULL)
 858                                 goto skip_class;
 859
 860                         cl->deficit -= qdisc_pkt_len(skb);
 861                         q->tx_class = cl;
 862                         q->tx_borrowed = borrow;
 863                         if (borrow != cl) {
 864 #ifndef CBQ_XSTATS_BORROWS_BYTES
 865                                 borrow->xstats.borrows++;
 866                                 cl->xstats.borrows++;
 867 #else
 868                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 869                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 870 #endif
 871                         }
 872                         q->tx_len = qdisc_pkt_len(skb);
 873
 874                         if (cl->deficit <= 0) {
 875                                 q->active[prio] = cl;
 876                                 cl = cl->next_alive;
 877                                 cl->deficit += cl->quantum;
 878                         }
 879                         return skb;
 880
 881 skip_class:
 882                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 883                                 /* Class is empty or penalized.
 884                                    Unlink it from active chain.
 885                                  */
 886                                 cl_prev->next_alive = cl->next_alive;
 887                                 cl->next_alive = NULL;
 888
 889                                 /* Did cl_tail point to it? */
 890                                 if (cl == cl_tail) {
 891                                         /* Repair it! */
 892                                         cl_tail = cl_prev;
 893
 894                                         /* Was it the last class in this band? */
 895                                         if (cl == cl_tail) {
 896                                                 /* Kill the band! */
 897                                                 q->active[prio] = NULL;
 898                                                 q->activemask &= ~(1<<prio);
 899                                                 if (cl->q->q.qlen)
 900                                                         cbq_activate_class(cl);
 901                                                 return NULL;
 902                                         }
 903
 904                                         q->active[prio] = cl_tail;
 905                                 }
 906                                 if (cl->q->q.qlen)
 907                                         cbq_activate_class(cl);
 908
 909                                 cl = cl_prev;
 910                         }
 911
 912 next_class:
 913                         cl_prev = cl;
 914                         cl = cl->next_alive;
 915                 } while (cl_prev != cl_tail);
 916         } while (deficit);
 917
 918         q->active[prio] = cl_prev;
 919
 920         return NULL;
 921 }
 922
 923 static __inline__ struct sk_buff *
 924 cbq_dequeue_1(struct Qdisc *sch)
 925 {
 926         struct cbq_sched_data *q = qdisc_priv(sch);
 927         struct sk_buff *skb;
 928         unsigned activemask;
 929
 930         activemask = q->activemask&0xFF;
 931         while (activemask) {
 932                 int prio = ffz(~activemask);
 933                 activemask &= ~(1<<prio);
 934                 skb = cbq_dequeue_prio(sch, prio);
 935                 if (skb)
 936                         return skb;
 937         }
 938         return NULL;
 939 }
 940
 941 static struct sk_buff *
 942 cbq_dequeue(struct Qdisc *sch)
 943 {
 944         struct sk_buff *skb;
 945         struct cbq_sched_data *q = qdisc_priv(sch);
 946         psched_time_t now;
 947         psched_tdiff_t incr;
 948
 949         now = psched_get_time();
 950         incr = now - q->now_rt;
 951
 952         if (q->tx_class) {
 953                 psched_tdiff_t incr2;
 954                 /* Time integrator. We calculate EOS time
 955                    by adding expected packet transmission time.
 956                    If real time is greater, we warp artificial clock,
 957                    so that:
 958
 959                    cbq_time = max(real_time, work);
 960                  */
 961                 incr2 = L2T(&q->link, q->tx_len);
 962                 q->now += incr2;
 963                 cbq_update(q);
 964                 if ((incr -= incr2) < 0)
 965                         incr = 0;
 966         }
 967         q->now += incr;
 968         q->now_rt = now;
 969
 970         for (;;) {
 971                 q->wd_expires = 0;
 972
 973                 skb = cbq_dequeue_1(sch);
 974                 if (skb) {
 975                         sch->q.qlen--;
 976                         sch->flags &= ~TCQ_F_THROTTLED;
 977                         return skb;
 978                 }
 979
 980                 /* All the classes are overlimit.
 981
 982                    It is possible, if:
 983
 984                    1. Scheduler is empty.
 985                    2. Toplevel cutoff inhibited borrowing.
 986                    3. Root class is overlimit.
 987
 988                    Reset 2d and 3d conditions and retry.
 989
 990                    Note, that NS and cbq-2.0 are buggy, peeking
 991                    an arbitrary class is appropriate for ancestor-only
 992                    sharing, but not for toplevel algorithm.
 993
 994                    Our version is better, but slower, because it requires
 995                    two passes, but it is unavoidable with top-level sharing.
 996                 */
 997
 998                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
 999                     q->link.undertime == PSCHED_PASTPERFECT)
1000                         break;
1001
1002                 q->toplevel = TC_CBQ_MAXLEVEL;
1003                 q->link.undertime = PSCHED_PASTPERFECT;
1004         }
1005
1006         /* No packets in scheduler or nobody wants to give them to us :-(
1007            Sigh... start watchdog timer in the last case. */
1008
1009         if (sch->q.qlen) {
1010                 sch->qstats.overlimits++;
1011                 if (q->wd_expires)
1012                         qdisc_watchdog_schedule(&q->watchdog,
1013                                                 now + q->wd_expires);
1014         }
1015         return NULL;
1016 }
1017
1018 /* CBQ class maintanance routines */
1019
1020 static void cbq_adjust_levels(struct cbq_class *this)
1021 {
1022         if (this == NULL)
1023                 return;
1024
1025         do {
1026                 int level = 0;
1027                 struct cbq_class *cl;
1028
1029                 if ((cl = this->children) != NULL) {
1030                         do {
1031                                 if (cl->level > level)
1032                                         level = cl->level;
1033                         } while ((cl = cl->sibling) != this->children);
1034                 }
1035                 this->level = level+1;
1036         } while ((this = this->tparent) != NULL);
1037 }
1038
1039 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1040 {
1041         struct cbq_class *cl;
1042         struct hlist_node *n;
1043         unsigned int h;
1044
1045         if (q->quanta[prio] == 0)
1046                 return;
1047
1048         for (h = 0; h < q->clhash.hashsize; h++) {
1049                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1050                         /* BUGGGG... Beware! This expression suffer of
1051                            arithmetic overflows!
1052                          */
1053                         if (cl->priority == prio) {
1054                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1055                                         q->quanta[prio];
1056                         }
1057                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1058                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1059                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1060                         }
1061                 }
1062         }
1063 }
1064
1065 static void cbq_sync_defmap(struct cbq_class *cl)
1066 {
1067         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1068         struct cbq_class *split = cl->split;
1069         unsigned h;
1070         int i;
1071
1072         if (split == NULL)
1073                 return;
1074
1075         for (i=0; i<=TC_PRIO_MAX; i++) {
1076                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1077                         split->defaults[i] = NULL;
1078         }
1079
1080         for (i=0; i<=TC_PRIO_MAX; i++) {
1081                 int level = split->level;
1082
1083                 if (split->defaults[i])
1084                         continue;
1085
1086                 for (h = 0; h < q->clhash.hashsize; h++) {
1087                         struct hlist_node *n;
1088                         struct cbq_class *c;
1089
1090                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1091                                              common.hnode) {
1092                                 if (c->split == split && c->level < level &&
1093                                     c->defmap&(1<<i)) {
1094                                         split->defaults[i] = c;
1095                                         level = c->level;
1096                                 }
1097                         }
1098                 }
1099         }
1100 }
1101
1102 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1103 {
1104         struct cbq_class *split = NULL;
1105
1106         if (splitid == 0) {
1107                 if ((split = cl->split) == NULL)
1108                         return;
1109                 splitid = split->common.classid;
1110         }
1111
1112         if (split == NULL || split->common.classid != splitid) {
1113                 for (split = cl->tparent; split; split = split->tparent)
1114                         if (split->common.classid == splitid)
1115                                 break;
1116         }
1117
1118         if (split == NULL)
1119                 return;
1120
1121         if (cl->split != split) {
1122                 cl->defmap = 0;
1123                 cbq_sync_defmap(cl);
1124                 cl->split = split;
1125                 cl->defmap = def&mask;
1126         } else
1127                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1128
1129         cbq_sync_defmap(cl);
1130 }
1131
1132 static void cbq_unlink_class(struct cbq_class *this)
1133 {
1134         struct cbq_class *cl, **clp;
1135         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1136
1137         qdisc_class_hash_remove(&q->clhash, &this->common);
1138
1139         if (this->tparent) {
1140                 clp=&this->sibling;
1141                 cl = *clp;
1142                 do {
1143                         if (cl == this) {
1144                                 *clp = cl->sibling;
1145                                 break;
1146                         }
1147                         clp = &cl->sibling;
1148                 } while ((cl = *clp) != this->sibling);
1149
1150                 if (this->tparent->children == this) {
1151                         this->tparent->children = this->sibling;
1152                         if (this->sibling == this)
1153                                 this->tparent->children = NULL;
1154                 }
1155         } else {
1156                 WARN_ON(this->sibling != this);
1157         }
1158 }
1159
1160 static void cbq_link_class(struct cbq_class *this)
1161 {
1162         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1163         struct cbq_class *parent = this->tparent;
1164
1165         this->sibling = this;
1166         qdisc_class_hash_insert(&q->clhash, &this->common);
1167
1168         if (parent == NULL)
1169                 return;
1170
1171         if (parent->children == NULL) {
1172                 parent->children = this;
1173         } else {
1174                 this->sibling = parent->children->sibling;
1175                 parent->children->sibling = this;
1176         }
1177 }
1178
1179 static unsigned int cbq_drop(struct Qdisc* sch)
1180 {
1181         struct cbq_sched_data *q = qdisc_priv(sch);
1182         struct cbq_class *cl, *cl_head;
1183         int prio;
1184         unsigned int len;
1185
1186         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1187                 if ((cl_head = q->active[prio]) == NULL)
1188                         continue;
1189
1190                 cl = cl_head;
1191                 do {
1192                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1193                                 sch->q.qlen--;
1194                                 if (!cl->q->q.qlen)
1195                                         cbq_deactivate_class(cl);
1196                                 return len;
1197                         }
1198                 } while ((cl = cl->next_alive) != cl_head);
1199         }
1200         return 0;
1201 }
1202
1203 static void
1204 cbq_reset(struct Qdisc* sch)
1205 {
1206         struct cbq_sched_data *q = qdisc_priv(sch);
1207         struct cbq_class *cl;
1208         struct hlist_node *n;
1209         int prio;
1210         unsigned h;
1211
1212         q->activemask = 0;
1213         q->pmask = 0;
1214         q->tx_class = NULL;
1215         q->tx_borrowed = NULL;
1216         qdisc_watchdog_cancel(&q->watchdog);
1217         hrtimer_cancel(&q->delay_timer);
1218         q->toplevel = TC_CBQ_MAXLEVEL;
1219         q->now = psched_get_time();
1220         q->now_rt = q->now;
1221
1222         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1223                 q->active[prio] = NULL;
1224
1225         for (h = 0; h < q->clhash.hashsize; h++) {
1226                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1227                         qdisc_reset(cl->q);
1228
1229                         cl->next_alive = NULL;
1230                         cl->undertime = PSCHED_PASTPERFECT;
1231                         cl->avgidle = cl->maxidle;
1232                         cl->deficit = cl->quantum;
1233                         cl->cpriority = cl->priority;
1234                 }
1235         }
1236         sch->q.qlen = 0;
1237 }
1238
1239
1240 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1241 {
1242         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1243                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1244                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1245         }
1246         if (lss->change&TCF_CBQ_LSS_EWMA)
1247                 cl->ewma_log = lss->ewma_log;
1248         if (lss->change&TCF_CBQ_LSS_AVPKT)
1249                 cl->avpkt = lss->avpkt;
1250         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1251                 cl->minidle = -(long)lss->minidle;
1252         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1253                 cl->maxidle = lss->maxidle;
1254                 cl->avgidle = lss->maxidle;
1255         }
1256         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1257                 cl->offtime = lss->offtime;
1258         return 0;
1259 }
1260
1261 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1262 {
1263         q->nclasses[cl->priority]--;
1264         q->quanta[cl->priority] -= cl->weight;
1265         cbq_normalize_quanta(q, cl->priority);
1266 }
1267
1268 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1269 {
1270         q->nclasses[cl->priority]++;
1271         q->quanta[cl->priority] += cl->weight;
1272         cbq_normalize_quanta(q, cl->priority);
1273 }
1274
1275 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1276 {
1277         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1278
1279         if (wrr->allot)
1280                 cl->allot = wrr->allot;
1281         if (wrr->weight)
1282                 cl->weight = wrr->weight;
1283         if (wrr->priority) {
1284                 cl->priority = wrr->priority-1;
1285                 cl->cpriority = cl->priority;
1286                 if (cl->priority >= cl->priority2)
1287                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1288         }
1289
1290         cbq_addprio(q, cl);
1291         return 0;
1292 }
1293
1294 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1295 {
1296         switch (ovl->strategy) {
1297         case TC_CBQ_OVL_CLASSIC:
1298                 cl->overlimit = cbq_ovl_classic;
1299                 break;
1300         case TC_CBQ_OVL_DELAY:
1301                 cl->overlimit = cbq_ovl_delay;
1302                 break;
1303         case TC_CBQ_OVL_LOWPRIO:
1304                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1305                     ovl->priority2-1 <= cl->priority)
1306                         return -EINVAL;
1307                 cl->priority2 = ovl->priority2-1;
1308                 cl->overlimit = cbq_ovl_lowprio;
1309                 break;
1310         case TC_CBQ_OVL_DROP:
1311                 cl->overlimit = cbq_ovl_drop;
1312                 break;
1313         case TC_CBQ_OVL_RCLASSIC:
1314                 cl->overlimit = cbq_ovl_rclassic;
1315                 break;
1316         default:
1317                 return -EINVAL;
1318         }
1319         cl->penalty = ovl->penalty;
1320         return 0;
1321 }
1322
1323 #ifdef CONFIG_NET_CLS_ACT
1324 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1325 {
1326         cl->police = p->police;
1327
1328         if (cl->q->handle) {
1329                 if (p->police == TC_POLICE_RECLASSIFY)
1330                         cl->q->reshape_fail = cbq_reshape_fail;
1331                 else
1332                         cl->q->reshape_fail = NULL;
1333         }
1334         return 0;
1335 }
1336 #endif
1337
1338 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1339 {
1340         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1341         return 0;
1342 }
1343
1344 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1345         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1346         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1347         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1348         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1349         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1350         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1351         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1352 };
1353
1354 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1355 {
1356         struct cbq_sched_data *q = qdisc_priv(sch);
1357         struct nlattr *tb[TCA_CBQ_MAX + 1];
1358         struct tc_ratespec *r;
1359         int err;
1360
1361         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1362         if (err < 0)
1363                 return err;
1364
1365         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1366                 return -EINVAL;
1367
1368         r = nla_data(tb[TCA_CBQ_RATE]);
1369
1370         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1371                 return -EINVAL;
1372
1373         err = qdisc_class_hash_init(&q->clhash);
1374         if (err < 0)
1375                 goto put_rtab;
1376
1377         q->link.refcnt = 1;
1378         q->link.sibling = &q->link;
1379         q->link.common.classid = sch->handle;
1380         q->link.qdisc = sch;
1381         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1382                                             &pfifo_qdisc_ops,
1383                                             sch->handle)))
1384                 q->link.q = &noop_qdisc;
1385
1386         q->link.priority = TC_CBQ_MAXPRIO-1;
1387         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1388         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1389         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1390         q->link.overlimit = cbq_ovl_classic;
1391         q->link.allot = psched_mtu(qdisc_dev(sch));
1392         q->link.quantum = q->link.allot;
1393         q->link.weight = q->link.R_tab->rate.rate;
1394
1395         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1396         q->link.avpkt = q->link.allot/2;
1397         q->link.minidle = -0x7FFFFFFF;
1398
1399         qdisc_watchdog_init(&q->watchdog, sch);
1400         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1401         q->delay_timer.function = cbq_undelay;
1402         q->toplevel = TC_CBQ_MAXLEVEL;
1403         q->now = psched_get_time();
1404         q->now_rt = q->now;
1405
1406         cbq_link_class(&q->link);
1407
1408         if (tb[TCA_CBQ_LSSOPT])
1409                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1410
1411         cbq_addprio(q, &q->link);
1412         return 0;
1413
1414 put_rtab:
1415         qdisc_put_rtab(q->link.R_tab);
1416         return err;
1417 }
1418
1419 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1420 {
1421         unsigned char *b = skb_tail_pointer(skb);
1422
1423         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1424         return skb->len;
1425
1426 nla_put_failure:
1427         nlmsg_trim(skb, b);
1428         return -1;
1429 }
1430
1431 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1432 {
1433         unsigned char *b = skb_tail_pointer(skb);
1434         struct tc_cbq_lssopt opt;
1435
1436         opt.flags = 0;
1437         if (cl->borrow == NULL)
1438                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1439         if (cl->share == NULL)
1440                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1441         opt.ewma_log = cl->ewma_log;
1442         opt.level = cl->level;
1443         opt.avpkt = cl->avpkt;
1444         opt.maxidle = cl->maxidle;
1445         opt.minidle = (u32)(-cl->minidle);
1446         opt.offtime = cl->offtime;
1447         opt.change = ~0;
1448         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1449         return skb->len;
1450
1451 nla_put_failure:
1452         nlmsg_trim(skb, b);
1453         return -1;
1454 }
1455
1456 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1457 {
1458         unsigned char *b = skb_tail_pointer(skb);
1459         struct tc_cbq_wrropt opt;
1460
1461         opt.flags = 0;
1462         opt.allot = cl->allot;
1463         opt.priority = cl->priority+1;
1464         opt.cpriority = cl->cpriority+1;
1465         opt.weight = cl->weight;
1466         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1467         return skb->len;
1468
1469 nla_put_failure:
1470         nlmsg_trim(skb, b);
1471         return -1;
1472 }
1473
1474 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1475 {
1476         unsigned char *b = skb_tail_pointer(skb);
1477         struct tc_cbq_ovl opt;
1478
1479         opt.strategy = cl->ovl_strategy;
1480         opt.priority2 = cl->priority2+1;
1481         opt.pad = 0;
1482         opt.penalty = cl->penalty;
1483         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1484         return skb->len;
1485
1486 nla_put_failure:
1487         nlmsg_trim(skb, b);
1488         return -1;
1489 }
1490
1491 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1492 {
1493         unsigned char *b = skb_tail_pointer(skb);
1494         struct tc_cbq_fopt opt;
1495
1496         if (cl->split || cl->defmap) {
1497                 opt.split = cl->split ? cl->split->common.classid : 0;
1498                 opt.defmap = cl->defmap;
1499                 opt.defchange = ~0;
1500                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1501         }
1502         return skb->len;
1503
1504 nla_put_failure:
1505         nlmsg_trim(skb, b);
1506         return -1;
1507 }
1508
1509 #ifdef CONFIG_NET_CLS_ACT
1510 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1511 {
1512         unsigned char *b = skb_tail_pointer(skb);
1513         struct tc_cbq_police opt;
1514
1515         if (cl->police) {
1516                 opt.police = cl->police;
1517                 opt.__res1 = 0;
1518                 opt.__res2 = 0;
1519                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1520         }
1521         return skb->len;
1522
1523 nla_put_failure:
1524         nlmsg_trim(skb, b);
1525         return -1;
1526 }
1527 #endif
1528
1529 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1530 {
1531         if (cbq_dump_lss(skb, cl) < 0 ||
1532             cbq_dump_rate(skb, cl) < 0 ||
1533             cbq_dump_wrr(skb, cl) < 0 ||
1534             cbq_dump_ovl(skb, cl) < 0 ||
1535 #ifdef CONFIG_NET_CLS_ACT
1536             cbq_dump_police(skb, cl) < 0 ||
1537 #endif
1538             cbq_dump_fopt(skb, cl) < 0)
1539                 return -1;
1540         return 0;
1541 }
1542
1543 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1544 {
1545         struct cbq_sched_data *q = qdisc_priv(sch);
1546         struct nlattr *nest;
1547
1548         nest = nla_nest_start(skb, TCA_OPTIONS);
1549         if (nest == NULL)
1550                 goto nla_put_failure;
1551         if (cbq_dump_attr(skb, &q->link) < 0)
1552                 goto nla_put_failure;
1553         nla_nest_end(skb, nest);
1554         return skb->len;
1555
1556 nla_put_failure:
1557         nla_nest_cancel(skb, nest);
1558         return -1;
1559 }
1560
1561 static int
1562 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1563 {
1564         struct cbq_sched_data *q = qdisc_priv(sch);
1565
1566         q->link.xstats.avgidle = q->link.avgidle;
1567         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1568 }
1569
1570 static int
1571 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1572                struct sk_buff *skb, struct tcmsg *tcm)
1573 {
1574         struct cbq_class *cl = (struct cbq_class*)arg;
1575         struct nlattr *nest;
1576
1577         if (cl->tparent)
1578                 tcm->tcm_parent = cl->tparent->common.classid;
1579         else
1580                 tcm->tcm_parent = TC_H_ROOT;
1581         tcm->tcm_handle = cl->common.classid;
1582         tcm->tcm_info = cl->q->handle;
1583
1584         nest = nla_nest_start(skb, TCA_OPTIONS);
1585         if (nest == NULL)
1586                 goto nla_put_failure;
1587         if (cbq_dump_attr(skb, cl) < 0)
1588                 goto nla_put_failure;
1589         nla_nest_end(skb, nest);
1590         return skb->len;
1591
1592 nla_put_failure:
1593         nla_nest_cancel(skb, nest);
1594         return -1;
1595 }
1596
1597 static int
1598 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1599         struct gnet_dump *d)
1600 {
1601         struct cbq_sched_data *q = qdisc_priv(sch);
1602         struct cbq_class *cl = (struct cbq_class*)arg;
1603
1604         cl->qstats.qlen = cl->q->q.qlen;
1605         cl->xstats.avgidle = cl->avgidle;
1606         cl->xstats.undertime = 0;
1607
1608         if (cl->undertime != PSCHED_PASTPERFECT)
1609                 cl->xstats.undertime = cl->undertime - q->now;
1610
1611         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1612             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1613             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1614                 return -1;
1615
1616         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1617 }
1618
1619 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1620                      struct Qdisc **old)
1621 {
1622         struct cbq_class *cl = (struct cbq_class*)arg;
1623
1624         if (cl) {
1625                 if (new == NULL) {
1626                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1627                                                 &pfifo_qdisc_ops,
1628                                                 cl->common.classid);
1629                         if (new == NULL)
1630                                 return -ENOBUFS;
1631                 } else {
1632 #ifdef CONFIG_NET_CLS_ACT
1633                         if (cl->police == TC_POLICE_RECLASSIFY)
1634                                 new->reshape_fail = cbq_reshape_fail;
1635 #endif
1636                 }
1637                 sch_tree_lock(sch);
1638                 *old = cl->q;
1639                 cl->q = new;
1640                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1641                 qdisc_reset(*old);
1642                 sch_tree_unlock(sch);
1643
1644                 return 0;
1645         }
1646         return -ENOENT;
1647 }
1648
1649 static struct Qdisc *
1650 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1651 {
1652         struct cbq_class *cl = (struct cbq_class*)arg;
1653
1654         return cl ? cl->q : NULL;
1655 }
1656
1657 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1658 {
1659         struct cbq_class *cl = (struct cbq_class *)arg;
1660
1661         if (cl->q->q.qlen == 0)
1662                 cbq_deactivate_class(cl);
1663 }
1664
1665 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1666 {
1667         struct cbq_sched_data *q = qdisc_priv(sch);
1668         struct cbq_class *cl = cbq_class_lookup(q, classid);
1669
1670         if (cl) {
1671                 cl->refcnt++;
1672                 return (unsigned long)cl;
1673         }
1674         return 0;
1675 }
1676
1677 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1678 {
1679         struct cbq_sched_data *q = qdisc_priv(sch);
1680
1681         WARN_ON(cl->filters);
1682
1683         tcf_destroy_chain(&cl->filter_list);
1684         qdisc_destroy(cl->q);
1685         qdisc_put_rtab(cl->R_tab);
1686         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1687         if (cl != &q->link)
1688                 kfree(cl);
1689 }
1690
1691 static void
1692 cbq_destroy(struct Qdisc* sch)
1693 {
1694         struct cbq_sched_data *q = qdisc_priv(sch);
1695         struct hlist_node *n, *next;
1696         struct cbq_class *cl;
1697         unsigned h;
1698
1699 #ifdef CONFIG_NET_CLS_ACT
1700         q->rx_class = NULL;
1701 #endif
1702         /*
1703          * Filters must be destroyed first because we don't destroy the
1704          * classes from root to leafs which means that filters can still
1705          * be bound to classes which have been destroyed already. --TGR '04
1706          */
1707         for (h = 0; h < q->clhash.hashsize; h++) {
1708                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1709                         tcf_destroy_chain(&cl->filter_list);
1710         }
1711         for (h = 0; h < q->clhash.hashsize; h++) {
1712                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1713                                           common.hnode)
1714                         cbq_destroy_class(sch, cl);
1715         }
1716         qdisc_class_hash_destroy(&q->clhash);
1717 }
1718
1719 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1720 {
1721         struct cbq_class *cl = (struct cbq_class*)arg;
1722
1723         if (--cl->refcnt == 0) {
1724 #ifdef CONFIG_NET_CLS_ACT
1725                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1726                 struct cbq_sched_data *q = qdisc_priv(sch);
1727
1728                 spin_lock_bh(root_lock);
1729                 if (q->rx_class == cl)
1730                         q->rx_class = NULL;
1731                 spin_unlock_bh(root_lock);
1732 #endif
1733
1734                 cbq_destroy_class(sch, cl);
1735         }
1736 }
1737
1738 static int
1739 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1740                  unsigned long *arg)
1741 {
1742         int err;
1743         struct cbq_sched_data *q = qdisc_priv(sch);
1744         struct cbq_class *cl = (struct cbq_class*)*arg;
1745         struct nlattr *opt = tca[TCA_OPTIONS];
1746         struct nlattr *tb[TCA_CBQ_MAX + 1];
1747         struct cbq_class *parent;
1748         struct qdisc_rate_table *rtab = NULL;
1749
1750         if (opt == NULL)
1751                 return -EINVAL;
1752
1753         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1754         if (err < 0)
1755                 return err;
1756
1757         if (cl) {
1758                 /* Check parent */
1759                 if (parentid) {
1760                         if (cl->tparent &&
1761                             cl->tparent->common.classid != parentid)
1762                                 return -EINVAL;
1763                         if (!cl->tparent && parentid != TC_H_ROOT)
1764                                 return -EINVAL;
1765                 }
1766
1767                 if (tb[TCA_CBQ_RATE]) {
1768                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1769                                               tb[TCA_CBQ_RTAB]);
1770                         if (rtab == NULL)
1771                                 return -EINVAL;
1772                 }
1773
1774                 if (tca[TCA_RATE]) {
1775                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1776                                                     qdisc_root_sleeping_lock(sch),
1777                                                     tca[TCA_RATE]);
1778                         if (err) {
1779                                 if (rtab)
1780                                         qdisc_put_rtab(rtab);
1781                                 return err;
1782                         }
1783                 }
1784
1785                 /* Change class parameters */
1786                 sch_tree_lock(sch);
1787
1788                 if (cl->next_alive != NULL)
1789                         cbq_deactivate_class(cl);
1790
1791                 if (rtab) {
1792                         qdisc_put_rtab(cl->R_tab);
1793                         cl->R_tab = rtab;
1794                 }
1795
1796                 if (tb[TCA_CBQ_LSSOPT])
1797                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1798
1799                 if (tb[TCA_CBQ_WRROPT]) {
1800                         cbq_rmprio(q, cl);
1801                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1802                 }
1803
1804                 if (tb[TCA_CBQ_OVL_STRATEGY])
1805                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1806
1807 #ifdef CONFIG_NET_CLS_ACT
1808                 if (tb[TCA_CBQ_POLICE])
1809                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1810 #endif
1811
1812                 if (tb[TCA_CBQ_FOPT])
1813                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1814
1815                 if (cl->q->q.qlen)
1816                         cbq_activate_class(cl);
1817
1818                 sch_tree_unlock(sch);
1819
1820                 return 0;
1821         }
1822
1823         if (parentid == TC_H_ROOT)
1824                 return -EINVAL;
1825
1826         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1827             tb[TCA_CBQ_LSSOPT] == NULL)
1828                 return -EINVAL;
1829
1830         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1831         if (rtab == NULL)
1832                 return -EINVAL;
1833
1834         if (classid) {
1835                 err = -EINVAL;
1836                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1837                         goto failure;
1838         } else {
1839                 int i;
1840                 classid = TC_H_MAKE(sch->handle,0x8000);
1841
1842                 for (i=0; i<0x8000; i++) {
1843                         if (++q->hgenerator >= 0x8000)
1844                                 q->hgenerator = 1;
1845                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1846                                 break;
1847                 }
1848                 err = -ENOSR;
1849                 if (i >= 0x8000)
1850                         goto failure;
1851                 classid = classid|q->hgenerator;
1852         }
1853
1854         parent = &q->link;
1855         if (parentid) {
1856                 parent = cbq_class_lookup(q, parentid);
1857                 err = -EINVAL;
1858                 if (parent == NULL)
1859                         goto failure;
1860         }
1861
1862         err = -ENOBUFS;
1863         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1864         if (cl == NULL)
1865                 goto failure;
1866
1867         if (tca[TCA_RATE]) {
1868                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1869                                         qdisc_root_sleeping_lock(sch),
1870                                         tca[TCA_RATE]);
1871                 if (err) {
1872                         kfree(cl);
1873                         goto failure;
1874                 }
1875         }
1876
1877         cl->R_tab = rtab;
1878         rtab = NULL;
1879         cl->refcnt = 1;
1880         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1881                                         &pfifo_qdisc_ops, classid)))
1882                 cl->q = &noop_qdisc;
1883         cl->common.classid = classid;
1884         cl->tparent = parent;
1885         cl->qdisc = sch;
1886         cl->allot = parent->allot;
1887         cl->quantum = cl->allot;
1888         cl->weight = cl->R_tab->rate.rate;
1889
1890         sch_tree_lock(sch);
1891         cbq_link_class(cl);
1892         cl->borrow = cl->tparent;
1893         if (cl->tparent != &q->link)
1894                 cl->share = cl->tparent;
1895         cbq_adjust_levels(parent);
1896         cl->minidle = -0x7FFFFFFF;
1897         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1898         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1899         if (cl->ewma_log==0)
1900                 cl->ewma_log = q->link.ewma_log;
1901         if (cl->maxidle==0)
1902                 cl->maxidle = q->link.maxidle;
1903         if (cl->avpkt==0)
1904                 cl->avpkt = q->link.avpkt;
1905         cl->overlimit = cbq_ovl_classic;
1906         if (tb[TCA_CBQ_OVL_STRATEGY])
1907                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1908 #ifdef CONFIG_NET_CLS_ACT
1909         if (tb[TCA_CBQ_POLICE])
1910                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1911 #endif
1912         if (tb[TCA_CBQ_FOPT])
1913                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1914         sch_tree_unlock(sch);
1915
1916         qdisc_class_hash_grow(sch, &q->clhash);
1917
1918         *arg = (unsigned long)cl;
1919         return 0;
1920
1921 failure:
1922         qdisc_put_rtab(rtab);
1923         return err;
1924 }
1925
1926 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1927 {
1928         struct cbq_sched_data *q = qdisc_priv(sch);
1929         struct cbq_class *cl = (struct cbq_class*)arg;
1930         unsigned int qlen;
1931
1932         if (cl->filters || cl->children || cl == &q->link)
1933                 return -EBUSY;
1934
1935         sch_tree_lock(sch);
1936
1937         qlen = cl->q->q.qlen;
1938         qdisc_reset(cl->q);
1939         qdisc_tree_decrease_qlen(cl->q, qlen);
1940
1941         if (cl->next_alive)
1942                 cbq_deactivate_class(cl);
1943
1944         if (q->tx_borrowed == cl)
1945                 q->tx_borrowed = q->tx_class;
1946         if (q->tx_class == cl) {
1947                 q->tx_class = NULL;
1948                 q->tx_borrowed = NULL;
1949         }
1950 #ifdef CONFIG_NET_CLS_ACT
1951         if (q->rx_class == cl)
1952                 q->rx_class = NULL;
1953 #endif
1954
1955         cbq_unlink_class(cl);
1956         cbq_adjust_levels(cl->tparent);
1957         cl->defmap = 0;
1958         cbq_sync_defmap(cl);
1959
1960         cbq_rmprio(q, cl);
1961         sch_tree_unlock(sch);
1962
1963         BUG_ON(--cl->refcnt == 0);
1964         /*
1965          * This shouldn't happen: we "hold" one cops->get() when called
1966          * from tc_ctl_tclass; the destroy method is done from cops->put().
1967          */
1968
1969         return 0;
1970 }
1971
1972 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1973 {
1974         struct cbq_sched_data *q = qdisc_priv(sch);
1975         struct cbq_class *cl = (struct cbq_class *)arg;
1976
1977         if (cl == NULL)
1978                 cl = &q->link;
1979
1980         return &cl->filter_list;
1981 }
1982
1983 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1984                                      u32 classid)
1985 {
1986         struct cbq_sched_data *q = qdisc_priv(sch);
1987         struct cbq_class *p = (struct cbq_class*)parent;
1988         struct cbq_class *cl = cbq_class_lookup(q, classid);
1989
1990         if (cl) {
1991                 if (p && p->level <= cl->level)
1992                         return 0;
1993                 cl->filters++;
1994                 return (unsigned long)cl;
1995         }
1996         return 0;
1997 }
1998
1999 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2000 {
2001         struct cbq_class *cl = (struct cbq_class*)arg;
2002
2003         cl->filters--;
2004 }
2005
2006 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2007 {
2008         struct cbq_sched_data *q = qdisc_priv(sch);
2009         struct cbq_class *cl;
2010         struct hlist_node *n;
2011         unsigned h;
2012
2013         if (arg->stop)
2014                 return;
2015
2016         for (h = 0; h < q->clhash.hashsize; h++) {
2017                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2018                         if (arg->count < arg->skip) {
2019                                 arg->count++;
2020                                 continue;
2021                         }
2022                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2023                                 arg->stop = 1;
2024                                 return;
2025                         }
2026                         arg->count++;
2027                 }
2028         }
2029 }
2030
2031 static const struct Qdisc_class_ops cbq_class_ops = {
2032         .graft          =       cbq_graft,
2033         .leaf           =       cbq_leaf,
2034         .qlen_notify    =       cbq_qlen_notify,
2035         .get            =       cbq_get,
2036         .put            =       cbq_put,
2037         .change         =       cbq_change_class,
2038         .delete         =       cbq_delete,
2039         .walk           =       cbq_walk,
2040         .tcf_chain      =       cbq_find_tcf,
2041         .bind_tcf       =       cbq_bind_filter,
2042         .unbind_tcf     =       cbq_unbind_filter,
2043         .dump           =       cbq_dump_class,
2044         .dump_stats     =       cbq_dump_class_stats,
2045 };
2046
2047 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2048         .next           =       NULL,
2049         .cl_ops         =       &cbq_class_ops,
2050         .id             =       "cbq",
2051         .priv_size      =       sizeof(struct cbq_sched_data),
2052         .enqueue        =       cbq_enqueue,
2053         .dequeue        =       cbq_dequeue,
2054         .peek           =       qdisc_peek_dequeued,
2055         .drop           =       cbq_drop,
2056         .init           =       cbq_init,
2057         .reset          =       cbq_reset,
2058         .destroy        =       cbq_destroy,
2059         .change         =       NULL,
2060         .dump           =       cbq_dump,
2061         .dump_stats     =       cbq_dump_stats,
2062         .owner          =       THIS_MODULE,
2063 };
2064
2065 static int __init cbq_module_init(void)
2066 {
2067         return register_qdisc(&cbq_qdisc_ops);
2068 }
2069 static void __exit cbq_module_exit(void)
2070 {
2071         unregister_qdisc(&cbq_qdisc_ops);
2072 }
2073 module_init(cbq_module_init)
2074 module_exit(cbq_module_exit)
2075 MODULE_LICENSE("GPL");