net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic_packed bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 142 };
 143
 144 struct cbq_sched_data {
 145         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 146         int                     nclasses[TC_CBQ_MAXPRIO + 1];
 147         unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 148
 149         struct cbq_class        link;
 150
 151         unsigned int            activemask;
 152         struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
 153                                                                    with backlog */
 154
 155 #ifdef CONFIG_NET_CLS_ACT
 156         struct cbq_class        *rx_class;
 157 #endif
 158         struct cbq_class        *tx_class;
 159         struct cbq_class        *tx_borrowed;
 160         int                     tx_len;
 161         psched_time_t           now;            /* Cached timestamp */
 162         psched_time_t           now_rt;         /* Cached real time */
 163         unsigned int            pmask;
 164
 165         struct hrtimer          delay_timer;
 166         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 167                                                    started when CBQ has
 168                                                    backlog, but cannot
 169                                                    transmit just now */
 170         psched_tdiff_t          wd_expires;
 171         int                     toplevel;
 172         u32                     hgenerator;
 173 };
 174
 175
 176 #define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
 177
 178 static inline struct cbq_class *
 179 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 180 {
 181         struct Qdisc_class_common *clc;
 182
 183         clc = qdisc_class_find(&q->clhash, classid);
 184         if (clc == NULL)
 185                 return NULL;
 186         return container_of(clc, struct cbq_class, common);
 187 }
 188
 189 #ifdef CONFIG_NET_CLS_ACT
 190
 191 static struct cbq_class *
 192 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 193 {
 194         struct cbq_class *cl;
 195
 196         for (cl = this->tparent; cl; cl = cl->tparent) {
 197                 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 198
 199                 if (new != NULL && new != this)
 200                         return new;
 201         }
 202         return NULL;
 203 }
 204
 205 #endif
 206
 207 /* Classify packet. The procedure is pretty complicated, but
 208  * it allows us to combine link sharing and priority scheduling
 209  * transparently.
 210  *
 211  * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212  * so that it resolves to split nodes. Then packets are classified
 213  * by logical priority, or a more specific classifier may be attached
 214  * to the split node.
 215  */
 216
 217 static struct cbq_class *
 218 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219 {
 220         struct cbq_sched_data *q = qdisc_priv(sch);
 221         struct cbq_class *head = &q->link;
 222         struct cbq_class **defmap;
 223         struct cbq_class *cl = NULL;
 224         u32 prio = skb->priority;
 225         struct tcf_result res;
 226
 227         /*
 228          *  Step 1. If skb->priority points to one of our classes, use it.
 229          */
 230         if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231             (cl = cbq_class_lookup(q, prio)) != NULL)
 232                 return cl;
 233
 234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235         for (;;) {
 236                 int result = 0;
 237                 defmap = head->defaults;
 238
 239                 /*
 240                  * Step 2+n. Apply classifier.
 241                  */
 242                 if (!head->filter_list ||
 243                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                         goto fallback;
 245
 246                 cl = (void *)res.class;
 247                 if (!cl) {
 248                         if (TC_H_MAJ(res.classid))
 249                                 cl = cbq_class_lookup(q, res.classid);
 250                         else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 251                                 cl = defmap[TC_PRIO_BESTEFFORT];
 252
 253                         if (cl == NULL || cl->level >= head->level)
 254                                 goto fallback;
 255                 }
 256
 257 #ifdef CONFIG_NET_CLS_ACT
 258                 switch (result) {
 259                 case TC_ACT_QUEUED:
 260                 case TC_ACT_STOLEN:
 261                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 262                 case TC_ACT_SHOT:
 263                         return NULL;
 264                 case TC_ACT_RECLASSIFY:
 265                         return cbq_reclassify(skb, cl);
 266                 }
 267 #endif
 268                 if (cl->level == 0)
 269                         return cl;
 270
 271                 /*
 272                  * Step 3+n. If classifier selected a link sharing class,
 273                  *         apply agency specific classifier.
 274                  *         Repeat this procdure until we hit a leaf node.
 275                  */
 276                 head = cl;
 277         }
 278
 279 fallback:
 280         cl = head;
 281
 282         /*
 283          * Step 4. No success...
 284          */
 285         if (TC_H_MAJ(prio) == 0 &&
 286             !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 287             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 288                 return head;
 289
 290         return cl;
 291 }
 292
 293 /*
 294  * A packet has just been enqueued on the empty class.
 295  * cbq_activate_class adds it to the tail of active class list
 296  * of its priority band.
 297  */
 298
 299 static inline void cbq_activate_class(struct cbq_class *cl)
 300 {
 301         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 302         int prio = cl->cpriority;
 303         struct cbq_class *cl_tail;
 304
 305         cl_tail = q->active[prio];
 306         q->active[prio] = cl;
 307
 308         if (cl_tail != NULL) {
 309                 cl->next_alive = cl_tail->next_alive;
 310                 cl_tail->next_alive = cl;
 311         } else {
 312                 cl->next_alive = cl;
 313                 q->activemask |= (1<<prio);
 314         }
 315 }
 316
 317 /*
 318  * Unlink class from active chain.
 319  * Note that this same procedure is done directly in cbq_dequeue*
 320  * during round-robin procedure.
 321  */
 322
 323 static void cbq_deactivate_class(struct cbq_class *this)
 324 {
 325         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 326         int prio = this->cpriority;
 327         struct cbq_class *cl;
 328         struct cbq_class *cl_prev = q->active[prio];
 329
 330         do {
 331                 cl = cl_prev->next_alive;
 332                 if (cl == this) {
 333                         cl_prev->next_alive = cl->next_alive;
 334                         cl->next_alive = NULL;
 335
 336                         if (cl == q->active[prio]) {
 337                                 q->active[prio] = cl_prev;
 338                                 if (cl == q->active[prio]) {
 339                                         q->active[prio] = NULL;
 340                                         q->activemask &= ~(1<<prio);
 341                                         return;
 342                                 }
 343                         }
 344                         return;
 345                 }
 346         } while ((cl_prev = cl) != q->active[prio]);
 347 }
 348
 349 static void
 350 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 351 {
 352         int toplevel = q->toplevel;
 353
 354         if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 355                 psched_time_t now;
 356                 psched_tdiff_t incr;
 357
 358                 now = psched_get_time();
 359                 incr = now - q->now_rt;
 360                 now = q->now + incr;
 361
 362                 do {
 363                         if (cl->undertime < now) {
 364                                 q->toplevel = cl->level;
 365                                 return;
 366                         }
 367                 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 368         }
 369 }
 370
 371 static int
 372 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 373 {
 374         struct cbq_sched_data *q = qdisc_priv(sch);
 375         int uninitialized_var(ret);
 376         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 377
 378 #ifdef CONFIG_NET_CLS_ACT
 379         q->rx_class = cl;
 380 #endif
 381         if (cl == NULL) {
 382                 if (ret & __NET_XMIT_BYPASS)
 383                         sch->qstats.drops++;
 384                 kfree_skb(skb);
 385                 return ret;
 386         }
 387
 388 #ifdef CONFIG_NET_CLS_ACT
 389         cl->q->__parent = sch;
 390 #endif
 391         ret = qdisc_enqueue(skb, cl->q);
 392         if (ret == NET_XMIT_SUCCESS) {
 393                 sch->q.qlen++;
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         if (net_xmit_drop_count(ret)) {
 401                 sch->qstats.drops++;
 402                 cbq_mark_toplevel(q, cl);
 403                 cl->qstats.drops++;
 404         }
 405         return ret;
 406 }
 407
 408 /* Overlimit actions */
 409
 410 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 411
 412 static void cbq_ovl_classic(struct cbq_class *cl)
 413 {
 414         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 415         psched_tdiff_t delay = cl->undertime - q->now;
 416
 417         if (!cl->delayed) {
 418                 delay += cl->offtime;
 419
 420                 /*
 421                  * Class goes to sleep, so that it will have no
 422                  * chance to work avgidle. Let's forgive it 8)
 423                  *
 424                  * BTW cbq-2.0 has a crap in this
 425                  * place, apparently they forgot to shift it by cl->ewma_log.
 426                  */
 427                 if (cl->avgidle < 0)
 428                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 429                 if (cl->avgidle < cl->minidle)
 430                         cl->avgidle = cl->minidle;
 431                 if (delay <= 0)
 432                         delay = 1;
 433                 cl->undertime = q->now + delay;
 434
 435                 cl->xstats.overactions++;
 436                 cl->delayed = 1;
 437         }
 438         if (q->wd_expires == 0 || q->wd_expires > delay)
 439                 q->wd_expires = delay;
 440
 441         /* Dirty work! We must schedule wakeups based on
 442          * real available rate, rather than leaf rate,
 443          * which may be tiny (even zero).
 444          */
 445         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 446                 struct cbq_class *b;
 447                 psched_tdiff_t base_delay = q->wd_expires;
 448
 449                 for (b = cl->borrow; b; b = b->borrow) {
 450                         delay = b->undertime - q->now;
 451                         if (delay < base_delay) {
 452                                 if (delay <= 0)
 453                                         delay = 1;
 454                                 base_delay = delay;
 455                         }
 456                 }
 457
 458                 q->wd_expires = base_delay;
 459         }
 460 }
 461
 462 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 463  * they go overlimit
 464  */
 465
 466 static void cbq_ovl_rclassic(struct cbq_class *cl)
 467 {
 468         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 469         struct cbq_class *this = cl;
 470
 471         do {
 472                 if (cl->level > q->toplevel) {
 473                         cl = NULL;
 474                         break;
 475                 }
 476         } while ((cl = cl->borrow) != NULL);
 477
 478         if (cl == NULL)
 479                 cl = this;
 480         cbq_ovl_classic(cl);
 481 }
 482
 483 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 484
 485 static void cbq_ovl_delay(struct cbq_class *cl)
 486 {
 487         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 488         psched_tdiff_t delay = cl->undertime - q->now;
 489
 490         if (test_bit(__QDISC_STATE_DEACTIVATED,
 491                      &qdisc_root_sleeping(cl->qdisc)->state))
 492                 return;
 493
 494         if (!cl->delayed) {
 495                 psched_time_t sched = q->now;
 496                 ktime_t expires;
 497
 498                 delay += cl->offtime;
 499                 if (cl->avgidle < 0)
 500                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 501                 if (cl->avgidle < cl->minidle)
 502                         cl->avgidle = cl->minidle;
 503                 cl->undertime = q->now + delay;
 504
 505                 if (delay > 0) {
 506                         sched += delay + cl->penalty;
 507                         cl->penalized = sched;
 508                         cl->cpriority = TC_CBQ_MAXPRIO;
 509                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 510
 511                         expires = ktime_set(0, 0);
 512                         expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 513                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514                             ktime_to_ns(ktime_sub(
 515                                         hrtimer_get_expires(&q->delay_timer),
 516                                         expires)) > 0)
 517                                 hrtimer_set_expires(&q->delay_timer, expires);
 518                         hrtimer_restart(&q->delay_timer);
 519                         cl->delayed = 1;
 520                         cl->xstats.overactions++;
 521                         return;
 522                 }
 523                 delay = 1;
 524         }
 525         if (q->wd_expires == 0 || q->wd_expires > delay)
 526                 q->wd_expires = delay;
 527 }
 528
 529 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531 static void cbq_ovl_lowprio(struct cbq_class *cl)
 532 {
 533         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535         cl->penalized = q->now + cl->penalty;
 536
 537         if (cl->cpriority != cl->priority2) {
 538                 cl->cpriority = cl->priority2;
 539                 q->pmask |= (1<<cl->cpriority);
 540                 cl->xstats.overactions++;
 541         }
 542         cbq_ovl_classic(cl);
 543 }
 544
 545 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547 static void cbq_ovl_drop(struct cbq_class *cl)
 548 {
 549         if (cl->q->ops->drop)
 550                 if (cl->q->ops->drop(cl->q))
 551                         cl->qdisc->q.qlen--;
 552         cl->xstats.overactions++;
 553         cbq_ovl_classic(cl);
 554 }
 555
 556 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557                                        psched_time_t now)
 558 {
 559         struct cbq_class *cl;
 560         struct cbq_class *cl_prev = q->active[prio];
 561         psched_time_t sched = now;
 562
 563         if (cl_prev == NULL)
 564                 return 0;
 565
 566         do {
 567                 cl = cl_prev->next_alive;
 568                 if (now - cl->penalized > 0) {
 569                         cl_prev->next_alive = cl->next_alive;
 570                         cl->next_alive = NULL;
 571                         cl->cpriority = cl->priority;
 572                         cl->delayed = 0;
 573                         cbq_activate_class(cl);
 574
 575                         if (cl == q->active[prio]) {
 576                                 q->active[prio] = cl_prev;
 577                                 if (cl == q->active[prio]) {
 578                                         q->active[prio] = NULL;
 579                                         return 0;
 580                                 }
 581                         }
 582
 583                         cl = cl_prev->next_alive;
 584                 } else if (sched - cl->penalized > 0)
 585                         sched = cl->penalized;
 586         } while ((cl_prev = cl) != q->active[prio]);
 587
 588         return sched - now;
 589 }
 590
 591 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592 {
 593         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594                                                 delay_timer);
 595         struct Qdisc *sch = q->watchdog.qdisc;
 596         psched_time_t now;
 597         psched_tdiff_t delay = 0;
 598         unsigned int pmask;
 599
 600         now = psched_get_time();
 601
 602         pmask = q->pmask;
 603         q->pmask = 0;
 604
 605         while (pmask) {
 606                 int prio = ffz(~pmask);
 607                 psched_tdiff_t tmp;
 608
 609                 pmask &= ~(1<<prio);
 610
 611                 tmp = cbq_undelay_prio(q, prio, now);
 612                 if (tmp > 0) {
 613                         q->pmask |= 1<<prio;
 614                         if (tmp < delay || delay == 0)
 615                                 delay = tmp;
 616                 }
 617         }
 618
 619         if (delay) {
 620                 ktime_t time;
 621
 622                 time = ktime_set(0, 0);
 623                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625         }
 626
 627         qdisc_unthrottled(sch);
 628         __netif_schedule(qdisc_root(sch));
 629         return HRTIMER_NORESTART;
 630 }
 631
 632 #ifdef CONFIG_NET_CLS_ACT
 633 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634 {
 635         struct Qdisc *sch = child->__parent;
 636         struct cbq_sched_data *q = qdisc_priv(sch);
 637         struct cbq_class *cl = q->rx_class;
 638
 639         q->rx_class = NULL;
 640
 641         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642                 int ret;
 643
 644                 cbq_mark_toplevel(q, cl);
 645
 646                 q->rx_class = cl;
 647                 cl->q->__parent = sch;
 648
 649                 ret = qdisc_enqueue(skb, cl->q);
 650                 if (ret == NET_XMIT_SUCCESS) {
 651                         sch->q.qlen++;
 652                         if (!cl->next_alive)
 653                                 cbq_activate_class(cl);
 654                         return 0;
 655                 }
 656                 if (net_xmit_drop_count(ret))
 657                         sch->qstats.drops++;
 658                 return 0;
 659         }
 660
 661         sch->qstats.drops++;
 662         return -1;
 663 }
 664 #endif
 665
 666 /*
 667  * It is mission critical procedure.
 668  *
 669  * We "regenerate" toplevel cutoff, if transmitting class
 670  * has backlog and it is not regulated. It is not part of
 671  * original CBQ description, but looks more reasonable.
 672  * Probably, it is wrong. This question needs further investigation.
 673  */
 674
 675 static inline void
 676 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 677                     struct cbq_class *borrowed)
 678 {
 679         if (cl && q->toplevel >= borrowed->level) {
 680                 if (cl->q->q.qlen > 1) {
 681                         do {
 682                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 683                                         q->toplevel = borrowed->level;
 684                                         return;
 685                                 }
 686                         } while ((borrowed = borrowed->borrow) != NULL);
 687                 }
 688 #if 0
 689         /* It is not necessary now. Uncommenting it
 690            will save CPU cycles, but decrease fairness.
 691          */
 692                 q->toplevel = TC_CBQ_MAXLEVEL;
 693 #endif
 694         }
 695 }
 696
 697 static void
 698 cbq_update(struct cbq_sched_data *q)
 699 {
 700         struct cbq_class *this = q->tx_class;
 701         struct cbq_class *cl = this;
 702         int len = q->tx_len;
 703
 704         q->tx_class = NULL;
 705
 706         for ( ; cl; cl = cl->share) {
 707                 long avgidle = cl->avgidle;
 708                 long idle;
 709
 710                 cl->bstats.packets++;
 711                 cl->bstats.bytes += len;
 712
 713                 /*
 714                  * (now - last) is total time between packet right edges.
 715                  * (last_pktlen/rate) is "virtual" busy time, so that
 716                  *
 717                  *      idle = (now - last) - last_pktlen/rate
 718                  */
 719
 720                 idle = q->now - cl->last;
 721                 if ((unsigned long)idle > 128*1024*1024) {
 722                         avgidle = cl->maxidle;
 723                 } else {
 724                         idle -= L2T(cl, len);
 725
 726                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 727                  * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 728                  * cl->avgidle == true_avgidle/W,
 729                  * hence:
 730                  */
 731                         avgidle += idle - (avgidle>>cl->ewma_log);
 732                 }
 733
 734                 if (avgidle <= 0) {
 735                         /* Overlimit or at-limit */
 736
 737                         if (avgidle < cl->minidle)
 738                                 avgidle = cl->minidle;
 739
 740                         cl->avgidle = avgidle;
 741
 742                         /* Calculate expected time, when this class
 743                          * will be allowed to send.
 744                          * It will occur, when:
 745                          * (1-W)*true_avgidle + W*delay = 0, i.e.
 746                          * idle = (1/W - 1)*(-true_avgidle)
 747                          * or
 748                          * idle = (1 - W)*(-cl->avgidle);
 749                          */
 750                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 751
 752                         /*
 753                          * That is not all.
 754                          * To maintain the rate allocated to the class,
 755                          * we add to undertime virtual clock,
 756                          * necessary to complete transmitted packet.
 757                          * (len/phys_bandwidth has been already passed
 758                          * to the moment of cbq_update)
 759                          */
 760
 761                         idle -= L2T(&q->link, len);
 762                         idle += L2T(cl, len);
 763
 764                         cl->undertime = q->now + idle;
 765                 } else {
 766                         /* Underlimit */
 767
 768                         cl->undertime = PSCHED_PASTPERFECT;
 769                         if (avgidle > cl->maxidle)
 770                                 cl->avgidle = cl->maxidle;
 771                         else
 772                                 cl->avgidle = avgidle;
 773                 }
 774                 cl->last = q->now;
 775         }
 776
 777         cbq_update_toplevel(q, this, q->tx_borrowed);
 778 }
 779
 780 static inline struct cbq_class *
 781 cbq_under_limit(struct cbq_class *cl)
 782 {
 783         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 784         struct cbq_class *this_cl = cl;
 785
 786         if (cl->tparent == NULL)
 787                 return cl;
 788
 789         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 790                 cl->delayed = 0;
 791                 return cl;
 792         }
 793
 794         do {
 795                 /* It is very suspicious place. Now overlimit
 796                  * action is generated for not bounded classes
 797                  * only if link is completely congested.
 798                  * Though it is in agree with ancestor-only paradigm,
 799                  * it looks very stupid. Particularly,
 800                  * it means that this chunk of code will either
 801                  * never be called or result in strong amplification
 802                  * of burstiness. Dangerous, silly, and, however,
 803                  * no another solution exists.
 804                  */
 805                 cl = cl->borrow;
 806                 if (!cl) {
 807                         this_cl->qstats.overlimits++;
 808                         this_cl->overlimit(this_cl);
 809                         return NULL;
 810                 }
 811                 if (cl->level > q->toplevel)
 812                         return NULL;
 813         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815         cl->delayed = 0;
 816         return cl;
 817 }
 818
 819 static inline struct sk_buff *
 820 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821 {
 822         struct cbq_sched_data *q = qdisc_priv(sch);
 823         struct cbq_class *cl_tail, *cl_prev, *cl;
 824         struct sk_buff *skb;
 825         int deficit;
 826
 827         cl_tail = cl_prev = q->active[prio];
 828         cl = cl_prev->next_alive;
 829
 830         do {
 831                 deficit = 0;
 832
 833                 /* Start round */
 834                 do {
 835                         struct cbq_class *borrow = cl;
 836
 837                         if (cl->q->q.qlen &&
 838                             (borrow = cbq_under_limit(cl)) == NULL)
 839                                 goto skip_class;
 840
 841                         if (cl->deficit <= 0) {
 842                                 /* Class exhausted its allotment per
 843                                  * this round. Switch to the next one.
 844                                  */
 845                                 deficit = 1;
 846                                 cl->deficit += cl->quantum;
 847                                 goto next_class;
 848                         }
 849
 850                         skb = cl->q->dequeue(cl->q);
 851
 852                         /* Class did not give us any skb :-(
 853                          * It could occur even if cl->q->q.qlen != 0
 854                          * f.e. if cl->q == "tbf"
 855                          */
 856                         if (skb == NULL)
 857                                 goto skip_class;
 858
 859                         cl->deficit -= qdisc_pkt_len(skb);
 860                         q->tx_class = cl;
 861                         q->tx_borrowed = borrow;
 862                         if (borrow != cl) {
 863 #ifndef CBQ_XSTATS_BORROWS_BYTES
 864                                 borrow->xstats.borrows++;
 865                                 cl->xstats.borrows++;
 866 #else
 867                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 868                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 869 #endif
 870                         }
 871                         q->tx_len = qdisc_pkt_len(skb);
 872
 873                         if (cl->deficit <= 0) {
 874                                 q->active[prio] = cl;
 875                                 cl = cl->next_alive;
 876                                 cl->deficit += cl->quantum;
 877                         }
 878                         return skb;
 879
 880 skip_class:
 881                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882                                 /* Class is empty or penalized.
 883                                  * Unlink it from active chain.
 884                                  */
 885                                 cl_prev->next_alive = cl->next_alive;
 886                                 cl->next_alive = NULL;
 887
 888                                 /* Did cl_tail point to it? */
 889                                 if (cl == cl_tail) {
 890                                         /* Repair it! */
 891                                         cl_tail = cl_prev;
 892
 893                                         /* Was it the last class in this band? */
 894                                         if (cl == cl_tail) {
 895                                                 /* Kill the band! */
 896                                                 q->active[prio] = NULL;
 897                                                 q->activemask &= ~(1<<prio);
 898                                                 if (cl->q->q.qlen)
 899                                                         cbq_activate_class(cl);
 900                                                 return NULL;
 901                                         }
 902
 903                                         q->active[prio] = cl_tail;
 904                                 }
 905                                 if (cl->q->q.qlen)
 906                                         cbq_activate_class(cl);
 907
 908                                 cl = cl_prev;
 909                         }
 910
 911 next_class:
 912                         cl_prev = cl;
 913                         cl = cl->next_alive;
 914                 } while (cl_prev != cl_tail);
 915         } while (deficit);
 916
 917         q->active[prio] = cl_prev;
 918
 919         return NULL;
 920 }
 921
 922 static inline struct sk_buff *
 923 cbq_dequeue_1(struct Qdisc *sch)
 924 {
 925         struct cbq_sched_data *q = qdisc_priv(sch);
 926         struct sk_buff *skb;
 927         unsigned int activemask;
 928
 929         activemask = q->activemask & 0xFF;
 930         while (activemask) {
 931                 int prio = ffz(~activemask);
 932                 activemask &= ~(1<<prio);
 933                 skb = cbq_dequeue_prio(sch, prio);
 934                 if (skb)
 935                         return skb;
 936         }
 937         return NULL;
 938 }
 939
 940 static struct sk_buff *
 941 cbq_dequeue(struct Qdisc *sch)
 942 {
 943         struct sk_buff *skb;
 944         struct cbq_sched_data *q = qdisc_priv(sch);
 945         psched_time_t now;
 946         psched_tdiff_t incr;
 947
 948         now = psched_get_time();
 949         incr = now - q->now_rt;
 950
 951         if (q->tx_class) {
 952                 psched_tdiff_t incr2;
 953                 /* Time integrator. We calculate EOS time
 954                  * by adding expected packet transmission time.
 955                  * If real time is greater, we warp artificial clock,
 956                  * so that:
 957                  *
 958                  * cbq_time = max(real_time, work);
 959                  */
 960                 incr2 = L2T(&q->link, q->tx_len);
 961                 q->now += incr2;
 962                 cbq_update(q);
 963                 if ((incr -= incr2) < 0)
 964                         incr = 0;
 965         }
 966         q->now += incr;
 967         q->now_rt = now;
 968
 969         for (;;) {
 970                 q->wd_expires = 0;
 971
 972                 skb = cbq_dequeue_1(sch);
 973                 if (skb) {
 974                         qdisc_bstats_update(sch, skb);
 975                         sch->q.qlen--;
 976                         qdisc_unthrottled(sch);
 977                         return skb;
 978                 }
 979
 980                 /* All the classes are overlimit.
 981                  *
 982                  * It is possible, if:
 983                  *
 984                  * 1. Scheduler is empty.
 985                  * 2. Toplevel cutoff inhibited borrowing.
 986                  * 3. Root class is overlimit.
 987                  *
 988                  * Reset 2d and 3d conditions and retry.
 989                  *
 990                  * Note, that NS and cbq-2.0 are buggy, peeking
 991                  * an arbitrary class is appropriate for ancestor-only
 992                  * sharing, but not for toplevel algorithm.
 993                  *
 994                  * Our version is better, but slower, because it requires
 995                  * two passes, but it is unavoidable with top-level sharing.
 996                  */
 997
 998                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
 999                     q->link.undertime == PSCHED_PASTPERFECT)
1000                         break;
1001
1002                 q->toplevel = TC_CBQ_MAXLEVEL;
1003                 q->link.undertime = PSCHED_PASTPERFECT;
1004         }
1005
1006         /* No packets in scheduler or nobody wants to give them to us :-(
1007          * Sigh... start watchdog timer in the last case.
1008          */
1009
1010         if (sch->q.qlen) {
1011                 sch->qstats.overlimits++;
1012                 if (q->wd_expires)
1013                         qdisc_watchdog_schedule(&q->watchdog,
1014                                                 now + q->wd_expires);
1015         }
1016         return NULL;
1017 }
1018
1019 /* CBQ class maintanance routines */
1020
1021 static void cbq_adjust_levels(struct cbq_class *this)
1022 {
1023         if (this == NULL)
1024                 return;
1025
1026         do {
1027                 int level = 0;
1028                 struct cbq_class *cl;
1029
1030                 cl = this->children;
1031                 if (cl) {
1032                         do {
1033                                 if (cl->level > level)
1034                                         level = cl->level;
1035                         } while ((cl = cl->sibling) != this->children);
1036                 }
1037                 this->level = level + 1;
1038         } while ((this = this->tparent) != NULL);
1039 }
1040
1041 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1042 {
1043         struct cbq_class *cl;
1044         struct hlist_node *n;
1045         unsigned int h;
1046
1047         if (q->quanta[prio] == 0)
1048                 return;
1049
1050         for (h = 0; h < q->clhash.hashsize; h++) {
1051                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1052                         /* BUGGGG... Beware! This expression suffer of
1053                          * arithmetic overflows!
1054                          */
1055                         if (cl->priority == prio) {
1056                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1057                                         q->quanta[prio];
1058                         }
1059                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1060                                 pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1061                                            cl->common.classid, cl->quantum);
1062                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1063                         }
1064                 }
1065         }
1066 }
1067
1068 static void cbq_sync_defmap(struct cbq_class *cl)
1069 {
1070         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1071         struct cbq_class *split = cl->split;
1072         unsigned int h;
1073         int i;
1074
1075         if (split == NULL)
1076                 return;
1077
1078         for (i = 0; i <= TC_PRIO_MAX; i++) {
1079                 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1080                         split->defaults[i] = NULL;
1081         }
1082
1083         for (i = 0; i <= TC_PRIO_MAX; i++) {
1084                 int level = split->level;
1085
1086                 if (split->defaults[i])
1087                         continue;
1088
1089                 for (h = 0; h < q->clhash.hashsize; h++) {
1090                         struct hlist_node *n;
1091                         struct cbq_class *c;
1092
1093                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1094                                              common.hnode) {
1095                                 if (c->split == split && c->level < level &&
1096                                     c->defmap & (1<<i)) {
1097                                         split->defaults[i] = c;
1098                                         level = c->level;
1099                                 }
1100                         }
1101                 }
1102         }
1103 }
1104
1105 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1106 {
1107         struct cbq_class *split = NULL;
1108
1109         if (splitid == 0) {
1110                 split = cl->split;
1111                 if (!split)
1112                         return;
1113                 splitid = split->common.classid;
1114         }
1115
1116         if (split == NULL || split->common.classid != splitid) {
1117                 for (split = cl->tparent; split; split = split->tparent)
1118                         if (split->common.classid == splitid)
1119                                 break;
1120         }
1121
1122         if (split == NULL)
1123                 return;
1124
1125         if (cl->split != split) {
1126                 cl->defmap = 0;
1127                 cbq_sync_defmap(cl);
1128                 cl->split = split;
1129                 cl->defmap = def & mask;
1130         } else
1131                 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1132
1133         cbq_sync_defmap(cl);
1134 }
1135
1136 static void cbq_unlink_class(struct cbq_class *this)
1137 {
1138         struct cbq_class *cl, **clp;
1139         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1140
1141         qdisc_class_hash_remove(&q->clhash, &this->common);
1142
1143         if (this->tparent) {
1144                 clp = &this->sibling;
1145                 cl = *clp;
1146                 do {
1147                         if (cl == this) {
1148                                 *clp = cl->sibling;
1149                                 break;
1150                         }
1151                         clp = &cl->sibling;
1152                 } while ((cl = *clp) != this->sibling);
1153
1154                 if (this->tparent->children == this) {
1155                         this->tparent->children = this->sibling;
1156                         if (this->sibling == this)
1157                                 this->tparent->children = NULL;
1158                 }
1159         } else {
1160                 WARN_ON(this->sibling != this);
1161         }
1162 }
1163
1164 static void cbq_link_class(struct cbq_class *this)
1165 {
1166         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1167         struct cbq_class *parent = this->tparent;
1168
1169         this->sibling = this;
1170         qdisc_class_hash_insert(&q->clhash, &this->common);
1171
1172         if (parent == NULL)
1173                 return;
1174
1175         if (parent->children == NULL) {
1176                 parent->children = this;
1177         } else {
1178                 this->sibling = parent->children->sibling;
1179                 parent->children->sibling = this;
1180         }
1181 }
1182
1183 static unsigned int cbq_drop(struct Qdisc *sch)
1184 {
1185         struct cbq_sched_data *q = qdisc_priv(sch);
1186         struct cbq_class *cl, *cl_head;
1187         int prio;
1188         unsigned int len;
1189
1190         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1191                 cl_head = q->active[prio];
1192                 if (!cl_head)
1193                         continue;
1194
1195                 cl = cl_head;
1196                 do {
1197                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1198                                 sch->q.qlen--;
1199                                 if (!cl->q->q.qlen)
1200                                         cbq_deactivate_class(cl);
1201                                 return len;
1202                         }
1203                 } while ((cl = cl->next_alive) != cl_head);
1204         }
1205         return 0;
1206 }
1207
1208 static void
1209 cbq_reset(struct Qdisc *sch)
1210 {
1211         struct cbq_sched_data *q = qdisc_priv(sch);
1212         struct cbq_class *cl;
1213         struct hlist_node *n;
1214         int prio;
1215         unsigned int h;
1216
1217         q->activemask = 0;
1218         q->pmask = 0;
1219         q->tx_class = NULL;
1220         q->tx_borrowed = NULL;
1221         qdisc_watchdog_cancel(&q->watchdog);
1222         hrtimer_cancel(&q->delay_timer);
1223         q->toplevel = TC_CBQ_MAXLEVEL;
1224         q->now = psched_get_time();
1225         q->now_rt = q->now;
1226
1227         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1228                 q->active[prio] = NULL;
1229
1230         for (h = 0; h < q->clhash.hashsize; h++) {
1231                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1232                         qdisc_reset(cl->q);
1233
1234                         cl->next_alive = NULL;
1235                         cl->undertime = PSCHED_PASTPERFECT;
1236                         cl->avgidle = cl->maxidle;
1237                         cl->deficit = cl->quantum;
1238                         cl->cpriority = cl->priority;
1239                 }
1240         }
1241         sch->q.qlen = 0;
1242 }
1243
1244
1245 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1246 {
1247         if (lss->change & TCF_CBQ_LSS_FLAGS) {
1248                 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1249                 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1250         }
1251         if (lss->change & TCF_CBQ_LSS_EWMA)
1252                 cl->ewma_log = lss->ewma_log;
1253         if (lss->change & TCF_CBQ_LSS_AVPKT)
1254                 cl->avpkt = lss->avpkt;
1255         if (lss->change & TCF_CBQ_LSS_MINIDLE)
1256                 cl->minidle = -(long)lss->minidle;
1257         if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1258                 cl->maxidle = lss->maxidle;
1259                 cl->avgidle = lss->maxidle;
1260         }
1261         if (lss->change & TCF_CBQ_LSS_OFFTIME)
1262                 cl->offtime = lss->offtime;
1263         return 0;
1264 }
1265
1266 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267 {
1268         q->nclasses[cl->priority]--;
1269         q->quanta[cl->priority] -= cl->weight;
1270         cbq_normalize_quanta(q, cl->priority);
1271 }
1272
1273 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1274 {
1275         q->nclasses[cl->priority]++;
1276         q->quanta[cl->priority] += cl->weight;
1277         cbq_normalize_quanta(q, cl->priority);
1278 }
1279
1280 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1281 {
1282         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1283
1284         if (wrr->allot)
1285                 cl->allot = wrr->allot;
1286         if (wrr->weight)
1287                 cl->weight = wrr->weight;
1288         if (wrr->priority) {
1289                 cl->priority = wrr->priority - 1;
1290                 cl->cpriority = cl->priority;
1291                 if (cl->priority >= cl->priority2)
1292                         cl->priority2 = TC_CBQ_MAXPRIO - 1;
1293         }
1294
1295         cbq_addprio(q, cl);
1296         return 0;
1297 }
1298
1299 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1300 {
1301         switch (ovl->strategy) {
1302         case TC_CBQ_OVL_CLASSIC:
1303                 cl->overlimit = cbq_ovl_classic;
1304                 break;
1305         case TC_CBQ_OVL_DELAY:
1306                 cl->overlimit = cbq_ovl_delay;
1307                 break;
1308         case TC_CBQ_OVL_LOWPRIO:
1309                 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1310                     ovl->priority2 - 1 <= cl->priority)
1311                         return -EINVAL;
1312                 cl->priority2 = ovl->priority2 - 1;
1313                 cl->overlimit = cbq_ovl_lowprio;
1314                 break;
1315         case TC_CBQ_OVL_DROP:
1316                 cl->overlimit = cbq_ovl_drop;
1317                 break;
1318         case TC_CBQ_OVL_RCLASSIC:
1319                 cl->overlimit = cbq_ovl_rclassic;
1320                 break;
1321         default:
1322                 return -EINVAL;
1323         }
1324         cl->penalty = ovl->penalty;
1325         return 0;
1326 }
1327
1328 #ifdef CONFIG_NET_CLS_ACT
1329 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1330 {
1331         cl->police = p->police;
1332
1333         if (cl->q->handle) {
1334                 if (p->police == TC_POLICE_RECLASSIFY)
1335                         cl->q->reshape_fail = cbq_reshape_fail;
1336                 else
1337                         cl->q->reshape_fail = NULL;
1338         }
1339         return 0;
1340 }
1341 #endif
1342
1343 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1344 {
1345         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1346         return 0;
1347 }
1348
1349 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1350         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1351         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1352         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1353         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1354         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1355         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1356         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1357 };
1358
1359 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1360 {
1361         struct cbq_sched_data *q = qdisc_priv(sch);
1362         struct nlattr *tb[TCA_CBQ_MAX + 1];
1363         struct tc_ratespec *r;
1364         int err;
1365
1366         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1367         if (err < 0)
1368                 return err;
1369
1370         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1371                 return -EINVAL;
1372
1373         r = nla_data(tb[TCA_CBQ_RATE]);
1374
1375         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1376                 return -EINVAL;
1377
1378         err = qdisc_class_hash_init(&q->clhash);
1379         if (err < 0)
1380                 goto put_rtab;
1381
1382         q->link.refcnt = 1;
1383         q->link.sibling = &q->link;
1384         q->link.common.classid = sch->handle;
1385         q->link.qdisc = sch;
1386         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1387                                       sch->handle);
1388         if (!q->link.q)
1389                 q->link.q = &noop_qdisc;
1390
1391         q->link.priority = TC_CBQ_MAXPRIO - 1;
1392         q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1393         q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1394         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1395         q->link.overlimit = cbq_ovl_classic;
1396         q->link.allot = psched_mtu(qdisc_dev(sch));
1397         q->link.quantum = q->link.allot;
1398         q->link.weight = q->link.R_tab->rate.rate;
1399
1400         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1401         q->link.avpkt = q->link.allot/2;
1402         q->link.minidle = -0x7FFFFFFF;
1403
1404         qdisc_watchdog_init(&q->watchdog, sch);
1405         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1406         q->delay_timer.function = cbq_undelay;
1407         q->toplevel = TC_CBQ_MAXLEVEL;
1408         q->now = psched_get_time();
1409         q->now_rt = q->now;
1410
1411         cbq_link_class(&q->link);
1412
1413         if (tb[TCA_CBQ_LSSOPT])
1414                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1415
1416         cbq_addprio(q, &q->link);
1417         return 0;
1418
1419 put_rtab:
1420         qdisc_put_rtab(q->link.R_tab);
1421         return err;
1422 }
1423
1424 static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1425 {
1426         unsigned char *b = skb_tail_pointer(skb);
1427
1428         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1429         return skb->len;
1430
1431 nla_put_failure:
1432         nlmsg_trim(skb, b);
1433         return -1;
1434 }
1435
1436 static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1437 {
1438         unsigned char *b = skb_tail_pointer(skb);
1439         struct tc_cbq_lssopt opt;
1440
1441         opt.flags = 0;
1442         if (cl->borrow == NULL)
1443                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1444         if (cl->share == NULL)
1445                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1446         opt.ewma_log = cl->ewma_log;
1447         opt.level = cl->level;
1448         opt.avpkt = cl->avpkt;
1449         opt.maxidle = cl->maxidle;
1450         opt.minidle = (u32)(-cl->minidle);
1451         opt.offtime = cl->offtime;
1452         opt.change = ~0;
1453         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1454         return skb->len;
1455
1456 nla_put_failure:
1457         nlmsg_trim(skb, b);
1458         return -1;
1459 }
1460
1461 static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1462 {
1463         unsigned char *b = skb_tail_pointer(skb);
1464         struct tc_cbq_wrropt opt;
1465
1466         opt.flags = 0;
1467         opt.allot = cl->allot;
1468         opt.priority = cl->priority + 1;
1469         opt.cpriority = cl->cpriority + 1;
1470         opt.weight = cl->weight;
1471         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1472         return skb->len;
1473
1474 nla_put_failure:
1475         nlmsg_trim(skb, b);
1476         return -1;
1477 }
1478
1479 static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1480 {
1481         unsigned char *b = skb_tail_pointer(skb);
1482         struct tc_cbq_ovl opt;
1483
1484         opt.strategy = cl->ovl_strategy;
1485         opt.priority2 = cl->priority2 + 1;
1486         opt.pad = 0;
1487         opt.penalty = cl->penalty;
1488         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1489         return skb->len;
1490
1491 nla_put_failure:
1492         nlmsg_trim(skb, b);
1493         return -1;
1494 }
1495
1496 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1497 {
1498         unsigned char *b = skb_tail_pointer(skb);
1499         struct tc_cbq_fopt opt;
1500
1501         if (cl->split || cl->defmap) {
1502                 opt.split = cl->split ? cl->split->common.classid : 0;
1503                 opt.defmap = cl->defmap;
1504                 opt.defchange = ~0;
1505                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1506         }
1507         return skb->len;
1508
1509 nla_put_failure:
1510         nlmsg_trim(skb, b);
1511         return -1;
1512 }
1513
1514 #ifdef CONFIG_NET_CLS_ACT
1515 static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1516 {
1517         unsigned char *b = skb_tail_pointer(skb);
1518         struct tc_cbq_police opt;
1519
1520         if (cl->police) {
1521                 opt.police = cl->police;
1522                 opt.__res1 = 0;
1523                 opt.__res2 = 0;
1524                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1525         }
1526         return skb->len;
1527
1528 nla_put_failure:
1529         nlmsg_trim(skb, b);
1530         return -1;
1531 }
1532 #endif
1533
1534 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1535 {
1536         if (cbq_dump_lss(skb, cl) < 0 ||
1537             cbq_dump_rate(skb, cl) < 0 ||
1538             cbq_dump_wrr(skb, cl) < 0 ||
1539             cbq_dump_ovl(skb, cl) < 0 ||
1540 #ifdef CONFIG_NET_CLS_ACT
1541             cbq_dump_police(skb, cl) < 0 ||
1542 #endif
1543             cbq_dump_fopt(skb, cl) < 0)
1544                 return -1;
1545         return 0;
1546 }
1547
1548 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1549 {
1550         struct cbq_sched_data *q = qdisc_priv(sch);
1551         struct nlattr *nest;
1552
1553         nest = nla_nest_start(skb, TCA_OPTIONS);
1554         if (nest == NULL)
1555                 goto nla_put_failure;
1556         if (cbq_dump_attr(skb, &q->link) < 0)
1557                 goto nla_put_failure;
1558         nla_nest_end(skb, nest);
1559         return skb->len;
1560
1561 nla_put_failure:
1562         nla_nest_cancel(skb, nest);
1563         return -1;
1564 }
1565
1566 static int
1567 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1568 {
1569         struct cbq_sched_data *q = qdisc_priv(sch);
1570
1571         q->link.xstats.avgidle = q->link.avgidle;
1572         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1573 }
1574
1575 static int
1576 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1577                struct sk_buff *skb, struct tcmsg *tcm)
1578 {
1579         struct cbq_class *cl = (struct cbq_class *)arg;
1580         struct nlattr *nest;
1581
1582         if (cl->tparent)
1583                 tcm->tcm_parent = cl->tparent->common.classid;
1584         else
1585                 tcm->tcm_parent = TC_H_ROOT;
1586         tcm->tcm_handle = cl->common.classid;
1587         tcm->tcm_info = cl->q->handle;
1588
1589         nest = nla_nest_start(skb, TCA_OPTIONS);
1590         if (nest == NULL)
1591                 goto nla_put_failure;
1592         if (cbq_dump_attr(skb, cl) < 0)
1593                 goto nla_put_failure;
1594         nla_nest_end(skb, nest);
1595         return skb->len;
1596
1597 nla_put_failure:
1598         nla_nest_cancel(skb, nest);
1599         return -1;
1600 }
1601
1602 static int
1603 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1604         struct gnet_dump *d)
1605 {
1606         struct cbq_sched_data *q = qdisc_priv(sch);
1607         struct cbq_class *cl = (struct cbq_class *)arg;
1608
1609         cl->qstats.qlen = cl->q->q.qlen;
1610         cl->xstats.avgidle = cl->avgidle;
1611         cl->xstats.undertime = 0;
1612
1613         if (cl->undertime != PSCHED_PASTPERFECT)
1614                 cl->xstats.undertime = cl->undertime - q->now;
1615
1616         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1617             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1618             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1619                 return -1;
1620
1621         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1622 }
1623
1624 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1625                      struct Qdisc **old)
1626 {
1627         struct cbq_class *cl = (struct cbq_class *)arg;
1628
1629         if (new == NULL) {
1630                 new = qdisc_create_dflt(sch->dev_queue,
1631                                         &pfifo_qdisc_ops, cl->common.classid);
1632                 if (new == NULL)
1633                         return -ENOBUFS;
1634         } else {
1635 #ifdef CONFIG_NET_CLS_ACT
1636                 if (cl->police == TC_POLICE_RECLASSIFY)
1637                         new->reshape_fail = cbq_reshape_fail;
1638 #endif
1639         }
1640         sch_tree_lock(sch);
1641         *old = cl->q;
1642         cl->q = new;
1643         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1644         qdisc_reset(*old);
1645         sch_tree_unlock(sch);
1646
1647         return 0;
1648 }
1649
1650 static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1651 {
1652         struct cbq_class *cl = (struct cbq_class *)arg;
1653
1654         return cl->q;
1655 }
1656
1657 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1658 {
1659         struct cbq_class *cl = (struct cbq_class *)arg;
1660
1661         if (cl->q->q.qlen == 0)
1662                 cbq_deactivate_class(cl);
1663 }
1664
1665 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1666 {
1667         struct cbq_sched_data *q = qdisc_priv(sch);
1668         struct cbq_class *cl = cbq_class_lookup(q, classid);
1669
1670         if (cl) {
1671                 cl->refcnt++;
1672                 return (unsigned long)cl;
1673         }
1674         return 0;
1675 }
1676
1677 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1678 {
1679         struct cbq_sched_data *q = qdisc_priv(sch);
1680
1681         WARN_ON(cl->filters);
1682
1683         tcf_destroy_chain(&cl->filter_list);
1684         qdisc_destroy(cl->q);
1685         qdisc_put_rtab(cl->R_tab);
1686         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1687         if (cl != &q->link)
1688                 kfree(cl);
1689 }
1690
1691 static void cbq_destroy(struct Qdisc *sch)
1692 {
1693         struct cbq_sched_data *q = qdisc_priv(sch);
1694         struct hlist_node *n, *next;
1695         struct cbq_class *cl;
1696         unsigned int h;
1697
1698 #ifdef CONFIG_NET_CLS_ACT
1699         q->rx_class = NULL;
1700 #endif
1701         /*
1702          * Filters must be destroyed first because we don't destroy the
1703          * classes from root to leafs which means that filters can still
1704          * be bound to classes which have been destroyed already. --TGR '04
1705          */
1706         for (h = 0; h < q->clhash.hashsize; h++) {
1707                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1708                         tcf_destroy_chain(&cl->filter_list);
1709         }
1710         for (h = 0; h < q->clhash.hashsize; h++) {
1711                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1712                                           common.hnode)
1713                         cbq_destroy_class(sch, cl);
1714         }
1715         qdisc_class_hash_destroy(&q->clhash);
1716 }
1717
1718 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1719 {
1720         struct cbq_class *cl = (struct cbq_class *)arg;
1721
1722         if (--cl->refcnt == 0) {
1723 #ifdef CONFIG_NET_CLS_ACT
1724                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1725                 struct cbq_sched_data *q = qdisc_priv(sch);
1726
1727                 spin_lock_bh(root_lock);
1728                 if (q->rx_class == cl)
1729                         q->rx_class = NULL;
1730                 spin_unlock_bh(root_lock);
1731 #endif
1732
1733                 cbq_destroy_class(sch, cl);
1734         }
1735 }
1736
1737 static int
1738 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1739                  unsigned long *arg)
1740 {
1741         int err;
1742         struct cbq_sched_data *q = qdisc_priv(sch);
1743         struct cbq_class *cl = (struct cbq_class *)*arg;
1744         struct nlattr *opt = tca[TCA_OPTIONS];
1745         struct nlattr *tb[TCA_CBQ_MAX + 1];
1746         struct cbq_class *parent;
1747         struct qdisc_rate_table *rtab = NULL;
1748
1749         if (opt == NULL)
1750                 return -EINVAL;
1751
1752         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1753         if (err < 0)
1754                 return err;
1755
1756         if (cl) {
1757                 /* Check parent */
1758                 if (parentid) {
1759                         if (cl->tparent &&
1760                             cl->tparent->common.classid != parentid)
1761                                 return -EINVAL;
1762                         if (!cl->tparent && parentid != TC_H_ROOT)
1763                                 return -EINVAL;
1764                 }
1765
1766                 if (tb[TCA_CBQ_RATE]) {
1767                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1768                                               tb[TCA_CBQ_RTAB]);
1769                         if (rtab == NULL)
1770                                 return -EINVAL;
1771                 }
1772
1773                 if (tca[TCA_RATE]) {
1774                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1775                                                     qdisc_root_sleeping_lock(sch),
1776                                                     tca[TCA_RATE]);
1777                         if (err) {
1778                                 if (rtab)
1779                                         qdisc_put_rtab(rtab);
1780                                 return err;
1781                         }
1782                 }
1783
1784                 /* Change class parameters */
1785                 sch_tree_lock(sch);
1786
1787                 if (cl->next_alive != NULL)
1788                         cbq_deactivate_class(cl);
1789
1790                 if (rtab) {
1791                         qdisc_put_rtab(cl->R_tab);
1792                         cl->R_tab = rtab;
1793                 }
1794
1795                 if (tb[TCA_CBQ_LSSOPT])
1796                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1797
1798                 if (tb[TCA_CBQ_WRROPT]) {
1799                         cbq_rmprio(q, cl);
1800                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1801                 }
1802
1803                 if (tb[TCA_CBQ_OVL_STRATEGY])
1804                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1805
1806 #ifdef CONFIG_NET_CLS_ACT
1807                 if (tb[TCA_CBQ_POLICE])
1808                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1809 #endif
1810
1811                 if (tb[TCA_CBQ_FOPT])
1812                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1813
1814                 if (cl->q->q.qlen)
1815                         cbq_activate_class(cl);
1816
1817                 sch_tree_unlock(sch);
1818
1819                 return 0;
1820         }
1821
1822         if (parentid == TC_H_ROOT)
1823                 return -EINVAL;
1824
1825         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1826             tb[TCA_CBQ_LSSOPT] == NULL)
1827                 return -EINVAL;
1828
1829         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1830         if (rtab == NULL)
1831                 return -EINVAL;
1832
1833         if (classid) {
1834                 err = -EINVAL;
1835                 if (TC_H_MAJ(classid ^ sch->handle) ||
1836                     cbq_class_lookup(q, classid))
1837                         goto failure;
1838         } else {
1839                 int i;
1840                 classid = TC_H_MAKE(sch->handle, 0x8000);
1841
1842                 for (i = 0; i < 0x8000; i++) {
1843                         if (++q->hgenerator >= 0x8000)
1844                                 q->hgenerator = 1;
1845                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1846                                 break;
1847                 }
1848                 err = -ENOSR;
1849                 if (i >= 0x8000)
1850                         goto failure;
1851                 classid = classid|q->hgenerator;
1852         }
1853
1854         parent = &q->link;
1855         if (parentid) {
1856                 parent = cbq_class_lookup(q, parentid);
1857                 err = -EINVAL;
1858                 if (parent == NULL)
1859                         goto failure;
1860         }
1861
1862         err = -ENOBUFS;
1863         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1864         if (cl == NULL)
1865                 goto failure;
1866
1867         if (tca[TCA_RATE]) {
1868                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1869                                         qdisc_root_sleeping_lock(sch),
1870                                         tca[TCA_RATE]);
1871                 if (err) {
1872                         kfree(cl);
1873                         goto failure;
1874                 }
1875         }
1876
1877         cl->R_tab = rtab;
1878         rtab = NULL;
1879         cl->refcnt = 1;
1880         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1881         if (!cl->q)
1882                 cl->q = &noop_qdisc;
1883         cl->common.classid = classid;
1884         cl->tparent = parent;
1885         cl->qdisc = sch;
1886         cl->allot = parent->allot;
1887         cl->quantum = cl->allot;
1888         cl->weight = cl->R_tab->rate.rate;
1889
1890         sch_tree_lock(sch);
1891         cbq_link_class(cl);
1892         cl->borrow = cl->tparent;
1893         if (cl->tparent != &q->link)
1894                 cl->share = cl->tparent;
1895         cbq_adjust_levels(parent);
1896         cl->minidle = -0x7FFFFFFF;
1897         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1898         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1899         if (cl->ewma_log == 0)
1900                 cl->ewma_log = q->link.ewma_log;
1901         if (cl->maxidle == 0)
1902                 cl->maxidle = q->link.maxidle;
1903         if (cl->avpkt == 0)
1904                 cl->avpkt = q->link.avpkt;
1905         cl->overlimit = cbq_ovl_classic;
1906         if (tb[TCA_CBQ_OVL_STRATEGY])
1907                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1908 #ifdef CONFIG_NET_CLS_ACT
1909         if (tb[TCA_CBQ_POLICE])
1910                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1911 #endif
1912         if (tb[TCA_CBQ_FOPT])
1913                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1914         sch_tree_unlock(sch);
1915
1916         qdisc_class_hash_grow(sch, &q->clhash);
1917
1918         *arg = (unsigned long)cl;
1919         return 0;
1920
1921 failure:
1922         qdisc_put_rtab(rtab);
1923         return err;
1924 }
1925
1926 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1927 {
1928         struct cbq_sched_data *q = qdisc_priv(sch);
1929         struct cbq_class *cl = (struct cbq_class *)arg;
1930         unsigned int qlen;
1931
1932         if (cl->filters || cl->children || cl == &q->link)
1933                 return -EBUSY;
1934
1935         sch_tree_lock(sch);
1936
1937         qlen = cl->q->q.qlen;
1938         qdisc_reset(cl->q);
1939         qdisc_tree_decrease_qlen(cl->q, qlen);
1940
1941         if (cl->next_alive)
1942                 cbq_deactivate_class(cl);
1943
1944         if (q->tx_borrowed == cl)
1945                 q->tx_borrowed = q->tx_class;
1946         if (q->tx_class == cl) {
1947                 q->tx_class = NULL;
1948                 q->tx_borrowed = NULL;
1949         }
1950 #ifdef CONFIG_NET_CLS_ACT
1951         if (q->rx_class == cl)
1952                 q->rx_class = NULL;
1953 #endif
1954
1955         cbq_unlink_class(cl);
1956         cbq_adjust_levels(cl->tparent);
1957         cl->defmap = 0;
1958         cbq_sync_defmap(cl);
1959
1960         cbq_rmprio(q, cl);
1961         sch_tree_unlock(sch);
1962
1963         BUG_ON(--cl->refcnt == 0);
1964         /*
1965          * This shouldn't happen: we "hold" one cops->get() when called
1966          * from tc_ctl_tclass; the destroy method is done from cops->put().
1967          */
1968
1969         return 0;
1970 }
1971
1972 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1973 {
1974         struct cbq_sched_data *q = qdisc_priv(sch);
1975         struct cbq_class *cl = (struct cbq_class *)arg;
1976
1977         if (cl == NULL)
1978                 cl = &q->link;
1979
1980         return &cl->filter_list;
1981 }
1982
1983 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1984                                      u32 classid)
1985 {
1986         struct cbq_sched_data *q = qdisc_priv(sch);
1987         struct cbq_class *p = (struct cbq_class *)parent;
1988         struct cbq_class *cl = cbq_class_lookup(q, classid);
1989
1990         if (cl) {
1991                 if (p && p->level <= cl->level)
1992                         return 0;
1993                 cl->filters++;
1994                 return (unsigned long)cl;
1995         }
1996         return 0;
1997 }
1998
1999 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2000 {
2001         struct cbq_class *cl = (struct cbq_class *)arg;
2002
2003         cl->filters--;
2004 }
2005
2006 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2007 {
2008         struct cbq_sched_data *q = qdisc_priv(sch);
2009         struct cbq_class *cl;
2010         struct hlist_node *n;
2011         unsigned int h;
2012
2013         if (arg->stop)
2014                 return;
2015
2016         for (h = 0; h < q->clhash.hashsize; h++) {
2017                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2018                         if (arg->count < arg->skip) {
2019                                 arg->count++;
2020                                 continue;
2021                         }
2022                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2023                                 arg->stop = 1;
2024                                 return;
2025                         }
2026                         arg->count++;
2027                 }
2028         }
2029 }
2030
2031 static const struct Qdisc_class_ops cbq_class_ops = {
2032         .graft          =       cbq_graft,
2033         .leaf           =       cbq_leaf,
2034         .qlen_notify    =       cbq_qlen_notify,
2035         .get            =       cbq_get,
2036         .put            =       cbq_put,
2037         .change         =       cbq_change_class,
2038         .delete         =       cbq_delete,
2039         .walk           =       cbq_walk,
2040         .tcf_chain      =       cbq_find_tcf,
2041         .bind_tcf       =       cbq_bind_filter,
2042         .unbind_tcf     =       cbq_unbind_filter,
2043         .dump           =       cbq_dump_class,
2044         .dump_stats     =       cbq_dump_class_stats,
2045 };
2046
2047 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2048         .next           =       NULL,
2049         .cl_ops         =       &cbq_class_ops,
2050         .id             =       "cbq",
2051         .priv_size      =       sizeof(struct cbq_sched_data),
2052         .enqueue        =       cbq_enqueue,
2053         .dequeue        =       cbq_dequeue,
2054         .peek           =       qdisc_peek_dequeued,
2055         .drop           =       cbq_drop,
2056         .init           =       cbq_init,
2057         .reset          =       cbq_reset,
2058         .destroy        =       cbq_destroy,
2059         .change         =       NULL,
2060         .dump           =       cbq_dump,
2061         .dump_stats     =       cbq_dump_stats,
2062         .owner          =       THIS_MODULE,
2063 };
2064
2065 static int __init cbq_module_init(void)
2066 {
2067         return register_qdisc(&cbq_qdisc_ops);
2068 }
2069 static void __exit cbq_module_exit(void)
2070 {
2071         unregister_qdisc(&cbq_qdisc_ops);
2072 }
2073 module_init(cbq_module_init)
2074 module_exit(cbq_module_exit)
2075 MODULE_LICENSE("GPL");