net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic_packed bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est64 rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 142 };
 143
 144 struct cbq_sched_data {
 145         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 146         int                     nclasses[TC_CBQ_MAXPRIO + 1];
 147         unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 148
 149         struct cbq_class        link;
 150
 151         unsigned int            activemask;
 152         struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
 153                                                                    with backlog */
 154
 155 #ifdef CONFIG_NET_CLS_ACT
 156         struct cbq_class        *rx_class;
 157 #endif
 158         struct cbq_class        *tx_class;
 159         struct cbq_class        *tx_borrowed;
 160         int                     tx_len;
 161         psched_time_t           now;            /* Cached timestamp */
 162         unsigned int            pmask;
 163
 164         struct hrtimer          delay_timer;
 165         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 166                                                    started when CBQ has
 167                                                    backlog, but cannot
 168                                                    transmit just now */
 169         psched_tdiff_t          wd_expires;
 170         int                     toplevel;
 171         u32                     hgenerator;
 172 };
 173
 174
 175 #define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
 176
 177 static inline struct cbq_class *
 178 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 179 {
 180         struct Qdisc_class_common *clc;
 181
 182         clc = qdisc_class_find(&q->clhash, classid);
 183         if (clc == NULL)
 184                 return NULL;
 185         return container_of(clc, struct cbq_class, common);
 186 }
 187
 188 #ifdef CONFIG_NET_CLS_ACT
 189
 190 static struct cbq_class *
 191 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 192 {
 193         struct cbq_class *cl;
 194
 195         for (cl = this->tparent; cl; cl = cl->tparent) {
 196                 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 197
 198                 if (new != NULL && new != this)
 199                         return new;
 200         }
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207  * it allows us to combine link sharing and priority scheduling
 208  * transparently.
 209  *
 210  * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211  * so that it resolves to split nodes. Then packets are classified
 212  * by logical priority, or a more specific classifier may be attached
 213  * to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 cl = (void *)res.class;
 246                 if (!cl) {
 247                         if (TC_H_MAJ(res.classid))
 248                                 cl = cbq_class_lookup(q, res.classid);
 249                         else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 250                                 cl = defmap[TC_PRIO_BESTEFFORT];
 251
 252                         if (cl == NULL)
 253                                 goto fallback;
 254                 }
 255                 if (cl->level >= head->level)
 256                         goto fallback;
 257 #ifdef CONFIG_NET_CLS_ACT
 258                 switch (result) {
 259                 case TC_ACT_QUEUED:
 260                 case TC_ACT_STOLEN:
 261                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 262                 case TC_ACT_SHOT:
 263                         return NULL;
 264                 case TC_ACT_RECLASSIFY:
 265                         return cbq_reclassify(skb, cl);
 266                 }
 267 #endif
 268                 if (cl->level == 0)
 269                         return cl;
 270
 271                 /*
 272                  * Step 3+n. If classifier selected a link sharing class,
 273                  *         apply agency specific classifier.
 274                  *         Repeat this procdure until we hit a leaf node.
 275                  */
 276                 head = cl;
 277         }
 278
 279 fallback:
 280         cl = head;
 281
 282         /*
 283          * Step 4. No success...
 284          */
 285         if (TC_H_MAJ(prio) == 0 &&
 286             !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 287             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 288                 return head;
 289
 290         return cl;
 291 }
 292
 293 /*
 294  * A packet has just been enqueued on the empty class.
 295  * cbq_activate_class adds it to the tail of active class list
 296  * of its priority band.
 297  */
 298
 299 static inline void cbq_activate_class(struct cbq_class *cl)
 300 {
 301         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 302         int prio = cl->cpriority;
 303         struct cbq_class *cl_tail;
 304
 305         cl_tail = q->active[prio];
 306         q->active[prio] = cl;
 307
 308         if (cl_tail != NULL) {
 309                 cl->next_alive = cl_tail->next_alive;
 310                 cl_tail->next_alive = cl;
 311         } else {
 312                 cl->next_alive = cl;
 313                 q->activemask |= (1<<prio);
 314         }
 315 }
 316
 317 /*
 318  * Unlink class from active chain.
 319  * Note that this same procedure is done directly in cbq_dequeue*
 320  * during round-robin procedure.
 321  */
 322
 323 static void cbq_deactivate_class(struct cbq_class *this)
 324 {
 325         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 326         int prio = this->cpriority;
 327         struct cbq_class *cl;
 328         struct cbq_class *cl_prev = q->active[prio];
 329
 330         do {
 331                 cl = cl_prev->next_alive;
 332                 if (cl == this) {
 333                         cl_prev->next_alive = cl->next_alive;
 334                         cl->next_alive = NULL;
 335
 336                         if (cl == q->active[prio]) {
 337                                 q->active[prio] = cl_prev;
 338                                 if (cl == q->active[prio]) {
 339                                         q->active[prio] = NULL;
 340                                         q->activemask &= ~(1<<prio);
 341                                         return;
 342                                 }
 343                         }
 344                         return;
 345                 }
 346         } while ((cl_prev = cl) != q->active[prio]);
 347 }
 348
 349 static void
 350 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 351 {
 352         int toplevel = q->toplevel;
 353
 354         if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 355                 psched_time_t now = psched_get_time();
 356
 357                 do {
 358                         if (cl->undertime < now) {
 359                                 q->toplevel = cl->level;
 360                                 return;
 361                         }
 362                 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 363         }
 364 }
 365
 366 static int
 367 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 368 {
 369         struct cbq_sched_data *q = qdisc_priv(sch);
 370         int uninitialized_var(ret);
 371         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 372
 373 #ifdef CONFIG_NET_CLS_ACT
 374         q->rx_class = cl;
 375 #endif
 376         if (cl == NULL) {
 377                 if (ret & __NET_XMIT_BYPASS)
 378                         sch->qstats.drops++;
 379                 kfree_skb(skb);
 380                 return ret;
 381         }
 382
 383 #ifdef CONFIG_NET_CLS_ACT
 384         cl->q->__parent = sch;
 385 #endif
 386         ret = qdisc_enqueue(skb, cl->q);
 387         if (ret == NET_XMIT_SUCCESS) {
 388                 sch->q.qlen++;
 389                 cbq_mark_toplevel(q, cl);
 390                 if (!cl->next_alive)
 391                         cbq_activate_class(cl);
 392                 return ret;
 393         }
 394
 395         if (net_xmit_drop_count(ret)) {
 396                 sch->qstats.drops++;
 397                 cbq_mark_toplevel(q, cl);
 398                 cl->qstats.drops++;
 399         }
 400         return ret;
 401 }
 402
 403 /* Overlimit actions */
 404
 405 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 406
 407 static void cbq_ovl_classic(struct cbq_class *cl)
 408 {
 409         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 410         psched_tdiff_t delay = cl->undertime - q->now;
 411
 412         if (!cl->delayed) {
 413                 delay += cl->offtime;
 414
 415                 /*
 416                  * Class goes to sleep, so that it will have no
 417                  * chance to work avgidle. Let's forgive it 8)
 418                  *
 419                  * BTW cbq-2.0 has a crap in this
 420                  * place, apparently they forgot to shift it by cl->ewma_log.
 421                  */
 422                 if (cl->avgidle < 0)
 423                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 424                 if (cl->avgidle < cl->minidle)
 425                         cl->avgidle = cl->minidle;
 426                 if (delay <= 0)
 427                         delay = 1;
 428                 cl->undertime = q->now + delay;
 429
 430                 cl->xstats.overactions++;
 431                 cl->delayed = 1;
 432         }
 433         if (q->wd_expires == 0 || q->wd_expires > delay)
 434                 q->wd_expires = delay;
 435
 436         /* Dirty work! We must schedule wakeups based on
 437          * real available rate, rather than leaf rate,
 438          * which may be tiny (even zero).
 439          */
 440         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 441                 struct cbq_class *b;
 442                 psched_tdiff_t base_delay = q->wd_expires;
 443
 444                 for (b = cl->borrow; b; b = b->borrow) {
 445                         delay = b->undertime - q->now;
 446                         if (delay < base_delay) {
 447                                 if (delay <= 0)
 448                                         delay = 1;
 449                                 base_delay = delay;
 450                         }
 451                 }
 452
 453                 q->wd_expires = base_delay;
 454         }
 455 }
 456
 457 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 458  * they go overlimit
 459  */
 460
 461 static void cbq_ovl_rclassic(struct cbq_class *cl)
 462 {
 463         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 464         struct cbq_class *this = cl;
 465
 466         do {
 467                 if (cl->level > q->toplevel) {
 468                         cl = NULL;
 469                         break;
 470                 }
 471         } while ((cl = cl->borrow) != NULL);
 472
 473         if (cl == NULL)
 474                 cl = this;
 475         cbq_ovl_classic(cl);
 476 }
 477
 478 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 479
 480 static void cbq_ovl_delay(struct cbq_class *cl)
 481 {
 482         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 483         psched_tdiff_t delay = cl->undertime - q->now;
 484
 485         if (test_bit(__QDISC_STATE_DEACTIVATED,
 486                      &qdisc_root_sleeping(cl->qdisc)->state))
 487                 return;
 488
 489         if (!cl->delayed) {
 490                 psched_time_t sched = q->now;
 491                 ktime_t expires;
 492
 493                 delay += cl->offtime;
 494                 if (cl->avgidle < 0)
 495                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 496                 if (cl->avgidle < cl->minidle)
 497                         cl->avgidle = cl->minidle;
 498                 cl->undertime = q->now + delay;
 499
 500                 if (delay > 0) {
 501                         sched += delay + cl->penalty;
 502                         cl->penalized = sched;
 503                         cl->cpriority = TC_CBQ_MAXPRIO;
 504                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 505
 506                         expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
 507                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 508                             ktime_to_ns(ktime_sub(
 509                                         hrtimer_get_expires(&q->delay_timer),
 510                                         expires)) > 0)
 511                                 hrtimer_set_expires(&q->delay_timer, expires);
 512                         hrtimer_restart(&q->delay_timer);
 513                         cl->delayed = 1;
 514                         cl->xstats.overactions++;
 515                         return;
 516                 }
 517                 delay = 1;
 518         }
 519         if (q->wd_expires == 0 || q->wd_expires > delay)
 520                 q->wd_expires = delay;
 521 }
 522
 523 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 524
 525 static void cbq_ovl_lowprio(struct cbq_class *cl)
 526 {
 527         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 528
 529         cl->penalized = q->now + cl->penalty;
 530
 531         if (cl->cpriority != cl->priority2) {
 532                 cl->cpriority = cl->priority2;
 533                 q->pmask |= (1<<cl->cpriority);
 534                 cl->xstats.overactions++;
 535         }
 536         cbq_ovl_classic(cl);
 537 }
 538
 539 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 540
 541 static void cbq_ovl_drop(struct cbq_class *cl)
 542 {
 543         if (cl->q->ops->drop)
 544                 if (cl->q->ops->drop(cl->q))
 545                         cl->qdisc->q.qlen--;
 546         cl->xstats.overactions++;
 547         cbq_ovl_classic(cl);
 548 }
 549
 550 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 551                                        psched_time_t now)
 552 {
 553         struct cbq_class *cl;
 554         struct cbq_class *cl_prev = q->active[prio];
 555         psched_time_t sched = now;
 556
 557         if (cl_prev == NULL)
 558                 return 0;
 559
 560         do {
 561                 cl = cl_prev->next_alive;
 562                 if (now - cl->penalized > 0) {
 563                         cl_prev->next_alive = cl->next_alive;
 564                         cl->next_alive = NULL;
 565                         cl->cpriority = cl->priority;
 566                         cl->delayed = 0;
 567                         cbq_activate_class(cl);
 568
 569                         if (cl == q->active[prio]) {
 570                                 q->active[prio] = cl_prev;
 571                                 if (cl == q->active[prio]) {
 572                                         q->active[prio] = NULL;
 573                                         return 0;
 574                                 }
 575                         }
 576
 577                         cl = cl_prev->next_alive;
 578                 } else if (sched - cl->penalized > 0)
 579                         sched = cl->penalized;
 580         } while ((cl_prev = cl) != q->active[prio]);
 581
 582         return sched - now;
 583 }
 584
 585 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 586 {
 587         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 588                                                 delay_timer);
 589         struct Qdisc *sch = q->watchdog.qdisc;
 590         psched_time_t now;
 591         psched_tdiff_t delay = 0;
 592         unsigned int pmask;
 593
 594         now = psched_get_time();
 595
 596         pmask = q->pmask;
 597         q->pmask = 0;
 598
 599         while (pmask) {
 600                 int prio = ffz(~pmask);
 601                 psched_tdiff_t tmp;
 602
 603                 pmask &= ~(1<<prio);
 604
 605                 tmp = cbq_undelay_prio(q, prio, now);
 606                 if (tmp > 0) {
 607                         q->pmask |= 1<<prio;
 608                         if (tmp < delay || delay == 0)
 609                                 delay = tmp;
 610                 }
 611         }
 612
 613         if (delay) {
 614                 ktime_t time;
 615
 616                 time = ktime_set(0, 0);
 617                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 618                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 619         }
 620
 621         qdisc_unthrottled(sch);
 622         __netif_schedule(qdisc_root(sch));
 623         return HRTIMER_NORESTART;
 624 }
 625
 626 #ifdef CONFIG_NET_CLS_ACT
 627 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 628 {
 629         struct Qdisc *sch = child->__parent;
 630         struct cbq_sched_data *q = qdisc_priv(sch);
 631         struct cbq_class *cl = q->rx_class;
 632
 633         q->rx_class = NULL;
 634
 635         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 636                 int ret;
 637
 638                 cbq_mark_toplevel(q, cl);
 639
 640                 q->rx_class = cl;
 641                 cl->q->__parent = sch;
 642
 643                 ret = qdisc_enqueue(skb, cl->q);
 644                 if (ret == NET_XMIT_SUCCESS) {
 645                         sch->q.qlen++;
 646                         if (!cl->next_alive)
 647                                 cbq_activate_class(cl);
 648                         return 0;
 649                 }
 650                 if (net_xmit_drop_count(ret))
 651                         sch->qstats.drops++;
 652                 return 0;
 653         }
 654
 655         sch->qstats.drops++;
 656         return -1;
 657 }
 658 #endif
 659
 660 /*
 661  * It is mission critical procedure.
 662  *
 663  * We "regenerate" toplevel cutoff, if transmitting class
 664  * has backlog and it is not regulated. It is not part of
 665  * original CBQ description, but looks more reasonable.
 666  * Probably, it is wrong. This question needs further investigation.
 667  */
 668
 669 static inline void
 670 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 671                     struct cbq_class *borrowed)
 672 {
 673         if (cl && q->toplevel >= borrowed->level) {
 674                 if (cl->q->q.qlen > 1) {
 675                         do {
 676                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 677                                         q->toplevel = borrowed->level;
 678                                         return;
 679                                 }
 680                         } while ((borrowed = borrowed->borrow) != NULL);
 681                 }
 682 #if 0
 683         /* It is not necessary now. Uncommenting it
 684            will save CPU cycles, but decrease fairness.
 685          */
 686                 q->toplevel = TC_CBQ_MAXLEVEL;
 687 #endif
 688         }
 689 }
 690
 691 static void
 692 cbq_update(struct cbq_sched_data *q)
 693 {
 694         struct cbq_class *this = q->tx_class;
 695         struct cbq_class *cl = this;
 696         int len = q->tx_len;
 697         psched_time_t now;
 698
 699         q->tx_class = NULL;
 700         /* Time integrator. We calculate EOS time
 701          * by adding expected packet transmission time.
 702          */
 703         now = q->now + L2T(&q->link, len);
 704
 705         for ( ; cl; cl = cl->share) {
 706                 long avgidle = cl->avgidle;
 707                 long idle;
 708
 709                 cl->bstats.packets++;
 710                 cl->bstats.bytes += len;
 711
 712                 /*
 713                  * (now - last) is total time between packet right edges.
 714                  * (last_pktlen/rate) is "virtual" busy time, so that
 715                  *
 716                  *      idle = (now - last) - last_pktlen/rate
 717                  */
 718
 719                 idle = now - cl->last;
 720                 if ((unsigned long)idle > 128*1024*1024) {
 721                         avgidle = cl->maxidle;
 722                 } else {
 723                         idle -= L2T(cl, len);
 724
 725                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 726                  * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 727                  * cl->avgidle == true_avgidle/W,
 728                  * hence:
 729                  */
 730                         avgidle += idle - (avgidle>>cl->ewma_log);
 731                 }
 732
 733                 if (avgidle <= 0) {
 734                         /* Overlimit or at-limit */
 735
 736                         if (avgidle < cl->minidle)
 737                                 avgidle = cl->minidle;
 738
 739                         cl->avgidle = avgidle;
 740
 741                         /* Calculate expected time, when this class
 742                          * will be allowed to send.
 743                          * It will occur, when:
 744                          * (1-W)*true_avgidle + W*delay = 0, i.e.
 745                          * idle = (1/W - 1)*(-true_avgidle)
 746                          * or
 747                          * idle = (1 - W)*(-cl->avgidle);
 748                          */
 749                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 750
 751                         /*
 752                          * That is not all.
 753                          * To maintain the rate allocated to the class,
 754                          * we add to undertime virtual clock,
 755                          * necessary to complete transmitted packet.
 756                          * (len/phys_bandwidth has been already passed
 757                          * to the moment of cbq_update)
 758                          */
 759
 760                         idle -= L2T(&q->link, len);
 761                         idle += L2T(cl, len);
 762
 763                         cl->undertime = now + idle;
 764                 } else {
 765                         /* Underlimit */
 766
 767                         cl->undertime = PSCHED_PASTPERFECT;
 768                         if (avgidle > cl->maxidle)
 769                                 cl->avgidle = cl->maxidle;
 770                         else
 771                                 cl->avgidle = avgidle;
 772                 }
 773                 if ((s64)(now - cl->last) > 0)
 774                         cl->last = now;
 775         }
 776
 777         cbq_update_toplevel(q, this, q->tx_borrowed);
 778 }
 779
 780 static inline struct cbq_class *
 781 cbq_under_limit(struct cbq_class *cl)
 782 {
 783         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 784         struct cbq_class *this_cl = cl;
 785
 786         if (cl->tparent == NULL)
 787                 return cl;
 788
 789         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 790                 cl->delayed = 0;
 791                 return cl;
 792         }
 793
 794         do {
 795                 /* It is very suspicious place. Now overlimit
 796                  * action is generated for not bounded classes
 797                  * only if link is completely congested.
 798                  * Though it is in agree with ancestor-only paradigm,
 799                  * it looks very stupid. Particularly,
 800                  * it means that this chunk of code will either
 801                  * never be called or result in strong amplification
 802                  * of burstiness. Dangerous, silly, and, however,
 803                  * no another solution exists.
 804                  */
 805                 cl = cl->borrow;
 806                 if (!cl) {
 807                         this_cl->qstats.overlimits++;
 808                         this_cl->overlimit(this_cl);
 809                         return NULL;
 810                 }
 811                 if (cl->level > q->toplevel)
 812                         return NULL;
 813         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815         cl->delayed = 0;
 816         return cl;
 817 }
 818
 819 static inline struct sk_buff *
 820 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821 {
 822         struct cbq_sched_data *q = qdisc_priv(sch);
 823         struct cbq_class *cl_tail, *cl_prev, *cl;
 824         struct sk_buff *skb;
 825         int deficit;
 826
 827         cl_tail = cl_prev = q->active[prio];
 828         cl = cl_prev->next_alive;
 829
 830         do {
 831                 deficit = 0;
 832
 833                 /* Start round */
 834                 do {
 835                         struct cbq_class *borrow = cl;
 836
 837                         if (cl->q->q.qlen &&
 838                             (borrow = cbq_under_limit(cl)) == NULL)
 839                                 goto skip_class;
 840
 841                         if (cl->deficit <= 0) {
 842                                 /* Class exhausted its allotment per
 843                                  * this round. Switch to the next one.
 844                                  */
 845                                 deficit = 1;
 846                                 cl->deficit += cl->quantum;
 847                                 goto next_class;
 848                         }
 849
 850                         skb = cl->q->dequeue(cl->q);
 851
 852                         /* Class did not give us any skb :-(
 853                          * It could occur even if cl->q->q.qlen != 0
 854                          * f.e. if cl->q == "tbf"
 855                          */
 856                         if (skb == NULL)
 857                                 goto skip_class;
 858
 859                         cl->deficit -= qdisc_pkt_len(skb);
 860                         q->tx_class = cl;
 861                         q->tx_borrowed = borrow;
 862                         if (borrow != cl) {
 863 #ifndef CBQ_XSTATS_BORROWS_BYTES
 864                                 borrow->xstats.borrows++;
 865                                 cl->xstats.borrows++;
 866 #else
 867                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 868                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 869 #endif
 870                         }
 871                         q->tx_len = qdisc_pkt_len(skb);
 872
 873                         if (cl->deficit <= 0) {
 874                                 q->active[prio] = cl;
 875                                 cl = cl->next_alive;
 876                                 cl->deficit += cl->quantum;
 877                         }
 878                         return skb;
 879
 880 skip_class:
 881                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882                                 /* Class is empty or penalized.
 883                                  * Unlink it from active chain.
 884                                  */
 885                                 cl_prev->next_alive = cl->next_alive;
 886                                 cl->next_alive = NULL;
 887
 888                                 /* Did cl_tail point to it? */
 889                                 if (cl == cl_tail) {
 890                                         /* Repair it! */
 891                                         cl_tail = cl_prev;
 892
 893                                         /* Was it the last class in this band? */
 894                                         if (cl == cl_tail) {
 895                                                 /* Kill the band! */
 896                                                 q->active[prio] = NULL;
 897                                                 q->activemask &= ~(1<<prio);
 898                                                 if (cl->q->q.qlen)
 899                                                         cbq_activate_class(cl);
 900                                                 return NULL;
 901                                         }
 902
 903                                         q->active[prio] = cl_tail;
 904                                 }
 905                                 if (cl->q->q.qlen)
 906                                         cbq_activate_class(cl);
 907
 908                                 cl = cl_prev;
 909                         }
 910
 911 next_class:
 912                         cl_prev = cl;
 913                         cl = cl->next_alive;
 914                 } while (cl_prev != cl_tail);
 915         } while (deficit);
 916
 917         q->active[prio] = cl_prev;
 918
 919         return NULL;
 920 }
 921
 922 static inline struct sk_buff *
 923 cbq_dequeue_1(struct Qdisc *sch)
 924 {
 925         struct cbq_sched_data *q = qdisc_priv(sch);
 926         struct sk_buff *skb;
 927         unsigned int activemask;
 928
 929         activemask = q->activemask & 0xFF;
 930         while (activemask) {
 931                 int prio = ffz(~activemask);
 932                 activemask &= ~(1<<prio);
 933                 skb = cbq_dequeue_prio(sch, prio);
 934                 if (skb)
 935                         return skb;
 936         }
 937         return NULL;
 938 }
 939
 940 static struct sk_buff *
 941 cbq_dequeue(struct Qdisc *sch)
 942 {
 943         struct sk_buff *skb;
 944         struct cbq_sched_data *q = qdisc_priv(sch);
 945         psched_time_t now;
 946
 947         now = psched_get_time();
 948
 949         if (q->tx_class)
 950                 cbq_update(q);
 951
 952         q->now = now;
 953
 954         for (;;) {
 955                 q->wd_expires = 0;
 956
 957                 skb = cbq_dequeue_1(sch);
 958                 if (skb) {
 959                         qdisc_bstats_update(sch, skb);
 960                         sch->q.qlen--;
 961                         qdisc_unthrottled(sch);
 962                         return skb;
 963                 }
 964
 965                 /* All the classes are overlimit.
 966                  *
 967                  * It is possible, if:
 968                  *
 969                  * 1. Scheduler is empty.
 970                  * 2. Toplevel cutoff inhibited borrowing.
 971                  * 3. Root class is overlimit.
 972                  *
 973                  * Reset 2d and 3d conditions and retry.
 974                  *
 975                  * Note, that NS and cbq-2.0 are buggy, peeking
 976                  * an arbitrary class is appropriate for ancestor-only
 977                  * sharing, but not for toplevel algorithm.
 978                  *
 979                  * Our version is better, but slower, because it requires
 980                  * two passes, but it is unavoidable with top-level sharing.
 981                  */
 982
 983                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
 984                     q->link.undertime == PSCHED_PASTPERFECT)
 985                         break;
 986
 987                 q->toplevel = TC_CBQ_MAXLEVEL;
 988                 q->link.undertime = PSCHED_PASTPERFECT;
 989         }
 990
 991         /* No packets in scheduler or nobody wants to give them to us :-(
 992          * Sigh... start watchdog timer in the last case.
 993          */
 994
 995         if (sch->q.qlen) {
 996                 sch->qstats.overlimits++;
 997                 if (q->wd_expires)
 998                         qdisc_watchdog_schedule(&q->watchdog,
 999                                                 now + q->wd_expires);
1000         }
1001         return NULL;
1002 }
1003
1004 /* CBQ class maintanance routines */
1005
1006 static void cbq_adjust_levels(struct cbq_class *this)
1007 {
1008         if (this == NULL)
1009                 return;
1010
1011         do {
1012                 int level = 0;
1013                 struct cbq_class *cl;
1014
1015                 cl = this->children;
1016                 if (cl) {
1017                         do {
1018                                 if (cl->level > level)
1019                                         level = cl->level;
1020                         } while ((cl = cl->sibling) != this->children);
1021                 }
1022                 this->level = level + 1;
1023         } while ((this = this->tparent) != NULL);
1024 }
1025
1026 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1027 {
1028         struct cbq_class *cl;
1029         unsigned int h;
1030
1031         if (q->quanta[prio] == 0)
1032                 return;
1033
1034         for (h = 0; h < q->clhash.hashsize; h++) {
1035                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1036                         /* BUGGGG... Beware! This expression suffer of
1037                          * arithmetic overflows!
1038                          */
1039                         if (cl->priority == prio) {
1040                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1041                                         q->quanta[prio];
1042                         }
1043                         if (cl->quantum <= 0 ||
1044                             cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
1045                                 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1046                                         cl->common.classid, cl->quantum);
1047                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1048                         }
1049                 }
1050         }
1051 }
1052
1053 static void cbq_sync_defmap(struct cbq_class *cl)
1054 {
1055         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1056         struct cbq_class *split = cl->split;
1057         unsigned int h;
1058         int i;
1059
1060         if (split == NULL)
1061                 return;
1062
1063         for (i = 0; i <= TC_PRIO_MAX; i++) {
1064                 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1065                         split->defaults[i] = NULL;
1066         }
1067
1068         for (i = 0; i <= TC_PRIO_MAX; i++) {
1069                 int level = split->level;
1070
1071                 if (split->defaults[i])
1072                         continue;
1073
1074                 for (h = 0; h < q->clhash.hashsize; h++) {
1075                         struct cbq_class *c;
1076
1077                         hlist_for_each_entry(c, &q->clhash.hash[h],
1078                                              common.hnode) {
1079                                 if (c->split == split && c->level < level &&
1080                                     c->defmap & (1<<i)) {
1081                                         split->defaults[i] = c;
1082                                         level = c->level;
1083                                 }
1084                         }
1085                 }
1086         }
1087 }
1088
1089 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1090 {
1091         struct cbq_class *split = NULL;
1092
1093         if (splitid == 0) {
1094                 split = cl->split;
1095                 if (!split)
1096                         return;
1097                 splitid = split->common.classid;
1098         }
1099
1100         if (split == NULL || split->common.classid != splitid) {
1101                 for (split = cl->tparent; split; split = split->tparent)
1102                         if (split->common.classid == splitid)
1103                                 break;
1104         }
1105
1106         if (split == NULL)
1107                 return;
1108
1109         if (cl->split != split) {
1110                 cl->defmap = 0;
1111                 cbq_sync_defmap(cl);
1112                 cl->split = split;
1113                 cl->defmap = def & mask;
1114         } else
1115                 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1116
1117         cbq_sync_defmap(cl);
1118 }
1119
1120 static void cbq_unlink_class(struct cbq_class *this)
1121 {
1122         struct cbq_class *cl, **clp;
1123         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1124
1125         qdisc_class_hash_remove(&q->clhash, &this->common);
1126
1127         if (this->tparent) {
1128                 clp = &this->sibling;
1129                 cl = *clp;
1130                 do {
1131                         if (cl == this) {
1132                                 *clp = cl->sibling;
1133                                 break;
1134                         }
1135                         clp = &cl->sibling;
1136                 } while ((cl = *clp) != this->sibling);
1137
1138                 if (this->tparent->children == this) {
1139                         this->tparent->children = this->sibling;
1140                         if (this->sibling == this)
1141                                 this->tparent->children = NULL;
1142                 }
1143         } else {
1144                 WARN_ON(this->sibling != this);
1145         }
1146 }
1147
1148 static void cbq_link_class(struct cbq_class *this)
1149 {
1150         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1151         struct cbq_class *parent = this->tparent;
1152
1153         this->sibling = this;
1154         qdisc_class_hash_insert(&q->clhash, &this->common);
1155
1156         if (parent == NULL)
1157                 return;
1158
1159         if (parent->children == NULL) {
1160                 parent->children = this;
1161         } else {
1162                 this->sibling = parent->children->sibling;
1163                 parent->children->sibling = this;
1164         }
1165 }
1166
1167 static unsigned int cbq_drop(struct Qdisc *sch)
1168 {
1169         struct cbq_sched_data *q = qdisc_priv(sch);
1170         struct cbq_class *cl, *cl_head;
1171         int prio;
1172         unsigned int len;
1173
1174         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1175                 cl_head = q->active[prio];
1176                 if (!cl_head)
1177                         continue;
1178
1179                 cl = cl_head;
1180                 do {
1181                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1182                                 sch->q.qlen--;
1183                                 if (!cl->q->q.qlen)
1184                                         cbq_deactivate_class(cl);
1185                                 return len;
1186                         }
1187                 } while ((cl = cl->next_alive) != cl_head);
1188         }
1189         return 0;
1190 }
1191
1192 static void
1193 cbq_reset(struct Qdisc *sch)
1194 {
1195         struct cbq_sched_data *q = qdisc_priv(sch);
1196         struct cbq_class *cl;
1197         int prio;
1198         unsigned int h;
1199
1200         q->activemask = 0;
1201         q->pmask = 0;
1202         q->tx_class = NULL;
1203         q->tx_borrowed = NULL;
1204         qdisc_watchdog_cancel(&q->watchdog);
1205         hrtimer_cancel(&q->delay_timer);
1206         q->toplevel = TC_CBQ_MAXLEVEL;
1207         q->now = psched_get_time();
1208
1209         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1210                 q->active[prio] = NULL;
1211
1212         for (h = 0; h < q->clhash.hashsize; h++) {
1213                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1214                         qdisc_reset(cl->q);
1215
1216                         cl->next_alive = NULL;
1217                         cl->undertime = PSCHED_PASTPERFECT;
1218                         cl->avgidle = cl->maxidle;
1219                         cl->deficit = cl->quantum;
1220                         cl->cpriority = cl->priority;
1221                 }
1222         }
1223         sch->q.qlen = 0;
1224 }
1225
1226
1227 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1228 {
1229         if (lss->change & TCF_CBQ_LSS_FLAGS) {
1230                 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1231                 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1232         }
1233         if (lss->change & TCF_CBQ_LSS_EWMA)
1234                 cl->ewma_log = lss->ewma_log;
1235         if (lss->change & TCF_CBQ_LSS_AVPKT)
1236                 cl->avpkt = lss->avpkt;
1237         if (lss->change & TCF_CBQ_LSS_MINIDLE)
1238                 cl->minidle = -(long)lss->minidle;
1239         if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1240                 cl->maxidle = lss->maxidle;
1241                 cl->avgidle = lss->maxidle;
1242         }
1243         if (lss->change & TCF_CBQ_LSS_OFFTIME)
1244                 cl->offtime = lss->offtime;
1245         return 0;
1246 }
1247
1248 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1249 {
1250         q->nclasses[cl->priority]--;
1251         q->quanta[cl->priority] -= cl->weight;
1252         cbq_normalize_quanta(q, cl->priority);
1253 }
1254
1255 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1256 {
1257         q->nclasses[cl->priority]++;
1258         q->quanta[cl->priority] += cl->weight;
1259         cbq_normalize_quanta(q, cl->priority);
1260 }
1261
1262 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1263 {
1264         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1265
1266         if (wrr->allot)
1267                 cl->allot = wrr->allot;
1268         if (wrr->weight)
1269                 cl->weight = wrr->weight;
1270         if (wrr->priority) {
1271                 cl->priority = wrr->priority - 1;
1272                 cl->cpriority = cl->priority;
1273                 if (cl->priority >= cl->priority2)
1274                         cl->priority2 = TC_CBQ_MAXPRIO - 1;
1275         }
1276
1277         cbq_addprio(q, cl);
1278         return 0;
1279 }
1280
1281 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1282 {
1283         switch (ovl->strategy) {
1284         case TC_CBQ_OVL_CLASSIC:
1285                 cl->overlimit = cbq_ovl_classic;
1286                 break;
1287         case TC_CBQ_OVL_DELAY:
1288                 cl->overlimit = cbq_ovl_delay;
1289                 break;
1290         case TC_CBQ_OVL_LOWPRIO:
1291                 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1292                     ovl->priority2 - 1 <= cl->priority)
1293                         return -EINVAL;
1294                 cl->priority2 = ovl->priority2 - 1;
1295                 cl->overlimit = cbq_ovl_lowprio;
1296                 break;
1297         case TC_CBQ_OVL_DROP:
1298                 cl->overlimit = cbq_ovl_drop;
1299                 break;
1300         case TC_CBQ_OVL_RCLASSIC:
1301                 cl->overlimit = cbq_ovl_rclassic;
1302                 break;
1303         default:
1304                 return -EINVAL;
1305         }
1306         cl->penalty = ovl->penalty;
1307         return 0;
1308 }
1309
1310 #ifdef CONFIG_NET_CLS_ACT
1311 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1312 {
1313         cl->police = p->police;
1314
1315         if (cl->q->handle) {
1316                 if (p->police == TC_POLICE_RECLASSIFY)
1317                         cl->q->reshape_fail = cbq_reshape_fail;
1318                 else
1319                         cl->q->reshape_fail = NULL;
1320         }
1321         return 0;
1322 }
1323 #endif
1324
1325 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1326 {
1327         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1328         return 0;
1329 }
1330
1331 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1332         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1333         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1334         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1335         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1336         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1337         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1338         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1339 };
1340
1341 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1342 {
1343         struct cbq_sched_data *q = qdisc_priv(sch);
1344         struct nlattr *tb[TCA_CBQ_MAX + 1];
1345         struct tc_ratespec *r;
1346         int err;
1347
1348         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1349         if (err < 0)
1350                 return err;
1351
1352         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1353                 return -EINVAL;
1354
1355         r = nla_data(tb[TCA_CBQ_RATE]);
1356
1357         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1358                 return -EINVAL;
1359
1360         err = qdisc_class_hash_init(&q->clhash);
1361         if (err < 0)
1362                 goto put_rtab;
1363
1364         q->link.refcnt = 1;
1365         q->link.sibling = &q->link;
1366         q->link.common.classid = sch->handle;
1367         q->link.qdisc = sch;
1368         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1369                                       sch->handle);
1370         if (!q->link.q)
1371                 q->link.q = &noop_qdisc;
1372
1373         q->link.priority = TC_CBQ_MAXPRIO - 1;
1374         q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1375         q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1376         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1377         q->link.overlimit = cbq_ovl_classic;
1378         q->link.allot = psched_mtu(qdisc_dev(sch));
1379         q->link.quantum = q->link.allot;
1380         q->link.weight = q->link.R_tab->rate.rate;
1381
1382         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1383         q->link.avpkt = q->link.allot/2;
1384         q->link.minidle = -0x7FFFFFFF;
1385
1386         qdisc_watchdog_init(&q->watchdog, sch);
1387         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1388         q->delay_timer.function = cbq_undelay;
1389         q->toplevel = TC_CBQ_MAXLEVEL;
1390         q->now = psched_get_time();
1391
1392         cbq_link_class(&q->link);
1393
1394         if (tb[TCA_CBQ_LSSOPT])
1395                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1396
1397         cbq_addprio(q, &q->link);
1398         return 0;
1399
1400 put_rtab:
1401         qdisc_put_rtab(q->link.R_tab);
1402         return err;
1403 }
1404
1405 static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1406 {
1407         unsigned char *b = skb_tail_pointer(skb);
1408
1409         if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1410                 goto nla_put_failure;
1411         return skb->len;
1412
1413 nla_put_failure:
1414         nlmsg_trim(skb, b);
1415         return -1;
1416 }
1417
1418 static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1419 {
1420         unsigned char *b = skb_tail_pointer(skb);
1421         struct tc_cbq_lssopt opt;
1422
1423         opt.flags = 0;
1424         if (cl->borrow == NULL)
1425                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1426         if (cl->share == NULL)
1427                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1428         opt.ewma_log = cl->ewma_log;
1429         opt.level = cl->level;
1430         opt.avpkt = cl->avpkt;
1431         opt.maxidle = cl->maxidle;
1432         opt.minidle = (u32)(-cl->minidle);
1433         opt.offtime = cl->offtime;
1434         opt.change = ~0;
1435         if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1436                 goto nla_put_failure;
1437         return skb->len;
1438
1439 nla_put_failure:
1440         nlmsg_trim(skb, b);
1441         return -1;
1442 }
1443
1444 static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1445 {
1446         unsigned char *b = skb_tail_pointer(skb);
1447         struct tc_cbq_wrropt opt;
1448
1449         memset(&opt, 0, sizeof(opt));
1450         opt.flags = 0;
1451         opt.allot = cl->allot;
1452         opt.priority = cl->priority + 1;
1453         opt.cpriority = cl->cpriority + 1;
1454         opt.weight = cl->weight;
1455         if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1456                 goto nla_put_failure;
1457         return skb->len;
1458
1459 nla_put_failure:
1460         nlmsg_trim(skb, b);
1461         return -1;
1462 }
1463
1464 static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1465 {
1466         unsigned char *b = skb_tail_pointer(skb);
1467         struct tc_cbq_ovl opt;
1468
1469         opt.strategy = cl->ovl_strategy;
1470         opt.priority2 = cl->priority2 + 1;
1471         opt.pad = 0;
1472         opt.penalty = cl->penalty;
1473         if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1474                 goto nla_put_failure;
1475         return skb->len;
1476
1477 nla_put_failure:
1478         nlmsg_trim(skb, b);
1479         return -1;
1480 }
1481
1482 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1483 {
1484         unsigned char *b = skb_tail_pointer(skb);
1485         struct tc_cbq_fopt opt;
1486
1487         if (cl->split || cl->defmap) {
1488                 opt.split = cl->split ? cl->split->common.classid : 0;
1489                 opt.defmap = cl->defmap;
1490                 opt.defchange = ~0;
1491                 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1492                         goto nla_put_failure;
1493         }
1494         return skb->len;
1495
1496 nla_put_failure:
1497         nlmsg_trim(skb, b);
1498         return -1;
1499 }
1500
1501 #ifdef CONFIG_NET_CLS_ACT
1502 static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1503 {
1504         unsigned char *b = skb_tail_pointer(skb);
1505         struct tc_cbq_police opt;
1506
1507         if (cl->police) {
1508                 opt.police = cl->police;
1509                 opt.__res1 = 0;
1510                 opt.__res2 = 0;
1511                 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1512                         goto nla_put_failure;
1513         }
1514         return skb->len;
1515
1516 nla_put_failure:
1517         nlmsg_trim(skb, b);
1518         return -1;
1519 }
1520 #endif
1521
1522 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1523 {
1524         if (cbq_dump_lss(skb, cl) < 0 ||
1525             cbq_dump_rate(skb, cl) < 0 ||
1526             cbq_dump_wrr(skb, cl) < 0 ||
1527             cbq_dump_ovl(skb, cl) < 0 ||
1528 #ifdef CONFIG_NET_CLS_ACT
1529             cbq_dump_police(skb, cl) < 0 ||
1530 #endif
1531             cbq_dump_fopt(skb, cl) < 0)
1532                 return -1;
1533         return 0;
1534 }
1535
1536 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1537 {
1538         struct cbq_sched_data *q = qdisc_priv(sch);
1539         struct nlattr *nest;
1540
1541         nest = nla_nest_start(skb, TCA_OPTIONS);
1542         if (nest == NULL)
1543                 goto nla_put_failure;
1544         if (cbq_dump_attr(skb, &q->link) < 0)
1545                 goto nla_put_failure;
1546         return nla_nest_end(skb, nest);
1547
1548 nla_put_failure:
1549         nla_nest_cancel(skb, nest);
1550         return -1;
1551 }
1552
1553 static int
1554 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1555 {
1556         struct cbq_sched_data *q = qdisc_priv(sch);
1557
1558         q->link.xstats.avgidle = q->link.avgidle;
1559         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1560 }
1561
1562 static int
1563 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1564                struct sk_buff *skb, struct tcmsg *tcm)
1565 {
1566         struct cbq_class *cl = (struct cbq_class *)arg;
1567         struct nlattr *nest;
1568
1569         if (cl->tparent)
1570                 tcm->tcm_parent = cl->tparent->common.classid;
1571         else
1572                 tcm->tcm_parent = TC_H_ROOT;
1573         tcm->tcm_handle = cl->common.classid;
1574         tcm->tcm_info = cl->q->handle;
1575
1576         nest = nla_nest_start(skb, TCA_OPTIONS);
1577         if (nest == NULL)
1578                 goto nla_put_failure;
1579         if (cbq_dump_attr(skb, cl) < 0)
1580                 goto nla_put_failure;
1581         return nla_nest_end(skb, nest);
1582
1583 nla_put_failure:
1584         nla_nest_cancel(skb, nest);
1585         return -1;
1586 }
1587
1588 static int
1589 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1590         struct gnet_dump *d)
1591 {
1592         struct cbq_sched_data *q = qdisc_priv(sch);
1593         struct cbq_class *cl = (struct cbq_class *)arg;
1594
1595         cl->qstats.qlen = cl->q->q.qlen;
1596         cl->xstats.avgidle = cl->avgidle;
1597         cl->xstats.undertime = 0;
1598
1599         if (cl->undertime != PSCHED_PASTPERFECT)
1600                 cl->xstats.undertime = cl->undertime - q->now;
1601
1602         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1603             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1604             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1605                 return -1;
1606
1607         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1608 }
1609
1610 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1611                      struct Qdisc **old)
1612 {
1613         struct cbq_class *cl = (struct cbq_class *)arg;
1614
1615         if (new == NULL) {
1616                 new = qdisc_create_dflt(sch->dev_queue,
1617                                         &pfifo_qdisc_ops, cl->common.classid);
1618                 if (new == NULL)
1619                         return -ENOBUFS;
1620         } else {
1621 #ifdef CONFIG_NET_CLS_ACT
1622                 if (cl->police == TC_POLICE_RECLASSIFY)
1623                         new->reshape_fail = cbq_reshape_fail;
1624 #endif
1625         }
1626         sch_tree_lock(sch);
1627         *old = cl->q;
1628         cl->q = new;
1629         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1630         qdisc_reset(*old);
1631         sch_tree_unlock(sch);
1632
1633         return 0;
1634 }
1635
1636 static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1637 {
1638         struct cbq_class *cl = (struct cbq_class *)arg;
1639
1640         return cl->q;
1641 }
1642
1643 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1644 {
1645         struct cbq_class *cl = (struct cbq_class *)arg;
1646
1647         if (cl->q->q.qlen == 0)
1648                 cbq_deactivate_class(cl);
1649 }
1650
1651 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1652 {
1653         struct cbq_sched_data *q = qdisc_priv(sch);
1654         struct cbq_class *cl = cbq_class_lookup(q, classid);
1655
1656         if (cl) {
1657                 cl->refcnt++;
1658                 return (unsigned long)cl;
1659         }
1660         return 0;
1661 }
1662
1663 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1664 {
1665         struct cbq_sched_data *q = qdisc_priv(sch);
1666
1667         WARN_ON(cl->filters);
1668
1669         tcf_destroy_chain(&cl->filter_list);
1670         qdisc_destroy(cl->q);
1671         qdisc_put_rtab(cl->R_tab);
1672         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1673         if (cl != &q->link)
1674                 kfree(cl);
1675 }
1676
1677 static void cbq_destroy(struct Qdisc *sch)
1678 {
1679         struct cbq_sched_data *q = qdisc_priv(sch);
1680         struct hlist_node *next;
1681         struct cbq_class *cl;
1682         unsigned int h;
1683
1684 #ifdef CONFIG_NET_CLS_ACT
1685         q->rx_class = NULL;
1686 #endif
1687         /*
1688          * Filters must be destroyed first because we don't destroy the
1689          * classes from root to leafs which means that filters can still
1690          * be bound to classes which have been destroyed already. --TGR '04
1691          */
1692         for (h = 0; h < q->clhash.hashsize; h++) {
1693                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
1694                         tcf_destroy_chain(&cl->filter_list);
1695         }
1696         for (h = 0; h < q->clhash.hashsize; h++) {
1697                 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
1698                                           common.hnode)
1699                         cbq_destroy_class(sch, cl);
1700         }
1701         qdisc_class_hash_destroy(&q->clhash);
1702 }
1703
1704 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1705 {
1706         struct cbq_class *cl = (struct cbq_class *)arg;
1707
1708         if (--cl->refcnt == 0) {
1709 #ifdef CONFIG_NET_CLS_ACT
1710                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1711                 struct cbq_sched_data *q = qdisc_priv(sch);
1712
1713                 spin_lock_bh(root_lock);
1714                 if (q->rx_class == cl)
1715                         q->rx_class = NULL;
1716                 spin_unlock_bh(root_lock);
1717 #endif
1718
1719                 cbq_destroy_class(sch, cl);
1720         }
1721 }
1722
1723 static int
1724 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1725                  unsigned long *arg)
1726 {
1727         int err;
1728         struct cbq_sched_data *q = qdisc_priv(sch);
1729         struct cbq_class *cl = (struct cbq_class *)*arg;
1730         struct nlattr *opt = tca[TCA_OPTIONS];
1731         struct nlattr *tb[TCA_CBQ_MAX + 1];
1732         struct cbq_class *parent;
1733         struct qdisc_rate_table *rtab = NULL;
1734
1735         if (opt == NULL)
1736                 return -EINVAL;
1737
1738         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1739         if (err < 0)
1740                 return err;
1741
1742         if (cl) {
1743                 /* Check parent */
1744                 if (parentid) {
1745                         if (cl->tparent &&
1746                             cl->tparent->common.classid != parentid)
1747                                 return -EINVAL;
1748                         if (!cl->tparent && parentid != TC_H_ROOT)
1749                                 return -EINVAL;
1750                 }
1751
1752                 if (tb[TCA_CBQ_RATE]) {
1753                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1754                                               tb[TCA_CBQ_RTAB]);
1755                         if (rtab == NULL)
1756                                 return -EINVAL;
1757                 }
1758
1759                 if (tca[TCA_RATE]) {
1760                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1761                                                     qdisc_root_sleeping_lock(sch),
1762                                                     tca[TCA_RATE]);
1763                         if (err) {
1764                                 qdisc_put_rtab(rtab);
1765                                 return err;
1766                         }
1767                 }
1768
1769                 /* Change class parameters */
1770                 sch_tree_lock(sch);
1771
1772                 if (cl->next_alive != NULL)
1773                         cbq_deactivate_class(cl);
1774
1775                 if (rtab) {
1776                         qdisc_put_rtab(cl->R_tab);
1777                         cl->R_tab = rtab;
1778                 }
1779
1780                 if (tb[TCA_CBQ_LSSOPT])
1781                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1782
1783                 if (tb[TCA_CBQ_WRROPT]) {
1784                         cbq_rmprio(q, cl);
1785                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1786                 }
1787
1788                 if (tb[TCA_CBQ_OVL_STRATEGY])
1789                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1790
1791 #ifdef CONFIG_NET_CLS_ACT
1792                 if (tb[TCA_CBQ_POLICE])
1793                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1794 #endif
1795
1796                 if (tb[TCA_CBQ_FOPT])
1797                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1798
1799                 if (cl->q->q.qlen)
1800                         cbq_activate_class(cl);
1801
1802                 sch_tree_unlock(sch);
1803
1804                 return 0;
1805         }
1806
1807         if (parentid == TC_H_ROOT)
1808                 return -EINVAL;
1809
1810         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1811             tb[TCA_CBQ_LSSOPT] == NULL)
1812                 return -EINVAL;
1813
1814         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1815         if (rtab == NULL)
1816                 return -EINVAL;
1817
1818         if (classid) {
1819                 err = -EINVAL;
1820                 if (TC_H_MAJ(classid ^ sch->handle) ||
1821                     cbq_class_lookup(q, classid))
1822                         goto failure;
1823         } else {
1824                 int i;
1825                 classid = TC_H_MAKE(sch->handle, 0x8000);
1826
1827                 for (i = 0; i < 0x8000; i++) {
1828                         if (++q->hgenerator >= 0x8000)
1829                                 q->hgenerator = 1;
1830                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1831                                 break;
1832                 }
1833                 err = -ENOSR;
1834                 if (i >= 0x8000)
1835                         goto failure;
1836                 classid = classid|q->hgenerator;
1837         }
1838
1839         parent = &q->link;
1840         if (parentid) {
1841                 parent = cbq_class_lookup(q, parentid);
1842                 err = -EINVAL;
1843                 if (parent == NULL)
1844                         goto failure;
1845         }
1846
1847         err = -ENOBUFS;
1848         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1849         if (cl == NULL)
1850                 goto failure;
1851
1852         if (tca[TCA_RATE]) {
1853                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1854                                         qdisc_root_sleeping_lock(sch),
1855                                         tca[TCA_RATE]);
1856                 if (err) {
1857                         kfree(cl);
1858                         goto failure;
1859                 }
1860         }
1861
1862         cl->R_tab = rtab;
1863         rtab = NULL;
1864         cl->refcnt = 1;
1865         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1866         if (!cl->q)
1867                 cl->q = &noop_qdisc;
1868         cl->common.classid = classid;
1869         cl->tparent = parent;
1870         cl->qdisc = sch;
1871         cl->allot = parent->allot;
1872         cl->quantum = cl->allot;
1873         cl->weight = cl->R_tab->rate.rate;
1874
1875         sch_tree_lock(sch);
1876         cbq_link_class(cl);
1877         cl->borrow = cl->tparent;
1878         if (cl->tparent != &q->link)
1879                 cl->share = cl->tparent;
1880         cbq_adjust_levels(parent);
1881         cl->minidle = -0x7FFFFFFF;
1882         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1883         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1884         if (cl->ewma_log == 0)
1885                 cl->ewma_log = q->link.ewma_log;
1886         if (cl->maxidle == 0)
1887                 cl->maxidle = q->link.maxidle;
1888         if (cl->avpkt == 0)
1889                 cl->avpkt = q->link.avpkt;
1890         cl->overlimit = cbq_ovl_classic;
1891         if (tb[TCA_CBQ_OVL_STRATEGY])
1892                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1893 #ifdef CONFIG_NET_CLS_ACT
1894         if (tb[TCA_CBQ_POLICE])
1895                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1896 #endif
1897         if (tb[TCA_CBQ_FOPT])
1898                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1899         sch_tree_unlock(sch);
1900
1901         qdisc_class_hash_grow(sch, &q->clhash);
1902
1903         *arg = (unsigned long)cl;
1904         return 0;
1905
1906 failure:
1907         qdisc_put_rtab(rtab);
1908         return err;
1909 }
1910
1911 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1912 {
1913         struct cbq_sched_data *q = qdisc_priv(sch);
1914         struct cbq_class *cl = (struct cbq_class *)arg;
1915         unsigned int qlen;
1916
1917         if (cl->filters || cl->children || cl == &q->link)
1918                 return -EBUSY;
1919
1920         sch_tree_lock(sch);
1921
1922         qlen = cl->q->q.qlen;
1923         qdisc_reset(cl->q);
1924         qdisc_tree_decrease_qlen(cl->q, qlen);
1925
1926         if (cl->next_alive)
1927                 cbq_deactivate_class(cl);
1928
1929         if (q->tx_borrowed == cl)
1930                 q->tx_borrowed = q->tx_class;
1931         if (q->tx_class == cl) {
1932                 q->tx_class = NULL;
1933                 q->tx_borrowed = NULL;
1934         }
1935 #ifdef CONFIG_NET_CLS_ACT
1936         if (q->rx_class == cl)
1937                 q->rx_class = NULL;
1938 #endif
1939
1940         cbq_unlink_class(cl);
1941         cbq_adjust_levels(cl->tparent);
1942         cl->defmap = 0;
1943         cbq_sync_defmap(cl);
1944
1945         cbq_rmprio(q, cl);
1946         sch_tree_unlock(sch);
1947
1948         BUG_ON(--cl->refcnt == 0);
1949         /*
1950          * This shouldn't happen: we "hold" one cops->get() when called
1951          * from tc_ctl_tclass; the destroy method is done from cops->put().
1952          */
1953
1954         return 0;
1955 }
1956
1957 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1958 {
1959         struct cbq_sched_data *q = qdisc_priv(sch);
1960         struct cbq_class *cl = (struct cbq_class *)arg;
1961
1962         if (cl == NULL)
1963                 cl = &q->link;
1964
1965         return &cl->filter_list;
1966 }
1967
1968 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1969                                      u32 classid)
1970 {
1971         struct cbq_sched_data *q = qdisc_priv(sch);
1972         struct cbq_class *p = (struct cbq_class *)parent;
1973         struct cbq_class *cl = cbq_class_lookup(q, classid);
1974
1975         if (cl) {
1976                 if (p && p->level <= cl->level)
1977                         return 0;
1978                 cl->filters++;
1979                 return (unsigned long)cl;
1980         }
1981         return 0;
1982 }
1983
1984 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1985 {
1986         struct cbq_class *cl = (struct cbq_class *)arg;
1987
1988         cl->filters--;
1989 }
1990
1991 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1992 {
1993         struct cbq_sched_data *q = qdisc_priv(sch);
1994         struct cbq_class *cl;
1995         unsigned int h;
1996
1997         if (arg->stop)
1998                 return;
1999
2000         for (h = 0; h < q->clhash.hashsize; h++) {
2001                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
2002                         if (arg->count < arg->skip) {
2003                                 arg->count++;
2004                                 continue;
2005                         }
2006                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2007                                 arg->stop = 1;
2008                                 return;
2009                         }
2010                         arg->count++;
2011                 }
2012         }
2013 }
2014
2015 static const struct Qdisc_class_ops cbq_class_ops = {
2016         .graft          =       cbq_graft,
2017         .leaf           =       cbq_leaf,
2018         .qlen_notify    =       cbq_qlen_notify,
2019         .get            =       cbq_get,
2020         .put            =       cbq_put,
2021         .change         =       cbq_change_class,
2022         .delete         =       cbq_delete,
2023         .walk           =       cbq_walk,
2024         .tcf_chain      =       cbq_find_tcf,
2025         .bind_tcf       =       cbq_bind_filter,
2026         .unbind_tcf     =       cbq_unbind_filter,
2027         .dump           =       cbq_dump_class,
2028         .dump_stats     =       cbq_dump_class_stats,
2029 };
2030
2031 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2032         .next           =       NULL,
2033         .cl_ops         =       &cbq_class_ops,
2034         .id             =       "cbq",
2035         .priv_size      =       sizeof(struct cbq_sched_data),
2036         .enqueue        =       cbq_enqueue,
2037         .dequeue        =       cbq_dequeue,
2038         .peek           =       qdisc_peek_dequeued,
2039         .drop           =       cbq_drop,
2040         .init           =       cbq_init,
2041         .reset          =       cbq_reset,
2042         .destroy        =       cbq_destroy,
2043         .change         =       NULL,
2044         .dump           =       cbq_dump,
2045         .dump_stats     =       cbq_dump_stats,
2046         .owner          =       THIS_MODULE,
2047 };
2048
2049 static int __init cbq_module_init(void)
2050 {
2051         return register_qdisc(&cbq_qdisc_ops);
2052 }
2053 static void __exit cbq_module_exit(void)
2054 {
2055         unregister_qdisc(&cbq_qdisc_ops);
2056 }
2057 module_init(cbq_module_init)
2058 module_exit(cbq_module_exit)
2059 MODULE_LICENSE("GPL");