net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic_packed bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 142 };
 143
 144 struct cbq_sched_data {
 145         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 146         int                     nclasses[TC_CBQ_MAXPRIO + 1];
 147         unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 148
 149         struct cbq_class        link;
 150
 151         unsigned int            activemask;
 152         struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
 153                                                                    with backlog */
 154
 155 #ifdef CONFIG_NET_CLS_ACT
 156         struct cbq_class        *rx_class;
 157 #endif
 158         struct cbq_class        *tx_class;
 159         struct cbq_class        *tx_borrowed;
 160         int                     tx_len;
 161         psched_time_t           now;            /* Cached timestamp */
 162         psched_time_t           now_rt;         /* Cached real time */
 163         unsigned int            pmask;
 164
 165         struct hrtimer          delay_timer;
 166         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 167                                                    started when CBQ has
 168                                                    backlog, but cannot
 169                                                    transmit just now */
 170         psched_tdiff_t          wd_expires;
 171         int                     toplevel;
 172         u32                     hgenerator;
 173 };
 174
 175
 176 #define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
 177
 178 static inline struct cbq_class *
 179 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 180 {
 181         struct Qdisc_class_common *clc;
 182
 183         clc = qdisc_class_find(&q->clhash, classid);
 184         if (clc == NULL)
 185                 return NULL;
 186         return container_of(clc, struct cbq_class, common);
 187 }
 188
 189 #ifdef CONFIG_NET_CLS_ACT
 190
 191 static struct cbq_class *
 192 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 193 {
 194         struct cbq_class *cl;
 195
 196         for (cl = this->tparent; cl; cl = cl->tparent) {
 197                 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 198
 199                 if (new != NULL && new != this)
 200                         return new;
 201         }
 202         return NULL;
 203 }
 204
 205 #endif
 206
 207 /* Classify packet. The procedure is pretty complicated, but
 208  * it allows us to combine link sharing and priority scheduling
 209  * transparently.
 210  *
 211  * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212  * so that it resolves to split nodes. Then packets are classified
 213  * by logical priority, or a more specific classifier may be attached
 214  * to the split node.
 215  */
 216
 217 static struct cbq_class *
 218 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219 {
 220         struct cbq_sched_data *q = qdisc_priv(sch);
 221         struct cbq_class *head = &q->link;
 222         struct cbq_class **defmap;
 223         struct cbq_class *cl = NULL;
 224         u32 prio = skb->priority;
 225         struct tcf_result res;
 226
 227         /*
 228          *  Step 1. If skb->priority points to one of our classes, use it.
 229          */
 230         if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231             (cl = cbq_class_lookup(q, prio)) != NULL)
 232                 return cl;
 233
 234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235         for (;;) {
 236                 int result = 0;
 237                 defmap = head->defaults;
 238
 239                 /*
 240                  * Step 2+n. Apply classifier.
 241                  */
 242                 if (!head->filter_list ||
 243                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                         goto fallback;
 245
 246                 cl = (void *)res.class;
 247                 if (!cl) {
 248                         if (TC_H_MAJ(res.classid))
 249                                 cl = cbq_class_lookup(q, res.classid);
 250                         else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 251                                 cl = defmap[TC_PRIO_BESTEFFORT];
 252
 253                         if (cl == NULL || cl->level >= head->level)
 254                                 goto fallback;
 255                 }
 256
 257 #ifdef CONFIG_NET_CLS_ACT
 258                 switch (result) {
 259                 case TC_ACT_QUEUED:
 260                 case TC_ACT_STOLEN:
 261                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 262                 case TC_ACT_SHOT:
 263                         return NULL;
 264                 case TC_ACT_RECLASSIFY:
 265                         return cbq_reclassify(skb, cl);
 266                 }
 267 #endif
 268                 if (cl->level == 0)
 269                         return cl;
 270
 271                 /*
 272                  * Step 3+n. If classifier selected a link sharing class,
 273                  *         apply agency specific classifier.
 274                  *         Repeat this procdure until we hit a leaf node.
 275                  */
 276                 head = cl;
 277         }
 278
 279 fallback:
 280         cl = head;
 281
 282         /*
 283          * Step 4. No success...
 284          */
 285         if (TC_H_MAJ(prio) == 0 &&
 286             !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 287             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 288                 return head;
 289
 290         return cl;
 291 }
 292
 293 /*
 294  * A packet has just been enqueued on the empty class.
 295  * cbq_activate_class adds it to the tail of active class list
 296  * of its priority band.
 297  */
 298
 299 static inline void cbq_activate_class(struct cbq_class *cl)
 300 {
 301         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 302         int prio = cl->cpriority;
 303         struct cbq_class *cl_tail;
 304
 305         cl_tail = q->active[prio];
 306         q->active[prio] = cl;
 307
 308         if (cl_tail != NULL) {
 309                 cl->next_alive = cl_tail->next_alive;
 310                 cl_tail->next_alive = cl;
 311         } else {
 312                 cl->next_alive = cl;
 313                 q->activemask |= (1<<prio);
 314         }
 315 }
 316
 317 /*
 318  * Unlink class from active chain.
 319  * Note that this same procedure is done directly in cbq_dequeue*
 320  * during round-robin procedure.
 321  */
 322
 323 static void cbq_deactivate_class(struct cbq_class *this)
 324 {
 325         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 326         int prio = this->cpriority;
 327         struct cbq_class *cl;
 328         struct cbq_class *cl_prev = q->active[prio];
 329
 330         do {
 331                 cl = cl_prev->next_alive;
 332                 if (cl == this) {
 333                         cl_prev->next_alive = cl->next_alive;
 334                         cl->next_alive = NULL;
 335
 336                         if (cl == q->active[prio]) {
 337                                 q->active[prio] = cl_prev;
 338                                 if (cl == q->active[prio]) {
 339                                         q->active[prio] = NULL;
 340                                         q->activemask &= ~(1<<prio);
 341                                         return;
 342                                 }
 343                         }
 344                         return;
 345                 }
 346         } while ((cl_prev = cl) != q->active[prio]);
 347 }
 348
 349 static void
 350 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 351 {
 352         int toplevel = q->toplevel;
 353
 354         if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 355                 psched_time_t now;
 356                 psched_tdiff_t incr;
 357
 358                 now = psched_get_time();
 359                 incr = now - q->now_rt;
 360                 now = q->now + incr;
 361
 362                 do {
 363                         if (cl->undertime < now) {
 364                                 q->toplevel = cl->level;
 365                                 return;
 366                         }
 367                 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 368         }
 369 }
 370
 371 static int
 372 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 373 {
 374         struct cbq_sched_data *q = qdisc_priv(sch);
 375         int uninitialized_var(ret);
 376         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 377
 378 #ifdef CONFIG_NET_CLS_ACT
 379         q->rx_class = cl;
 380 #endif
 381         if (cl == NULL) {
 382                 if (ret & __NET_XMIT_BYPASS)
 383                         sch->qstats.drops++;
 384                 kfree_skb(skb);
 385                 return ret;
 386         }
 387
 388 #ifdef CONFIG_NET_CLS_ACT
 389         cl->q->__parent = sch;
 390 #endif
 391         ret = qdisc_enqueue(skb, cl->q);
 392         if (ret == NET_XMIT_SUCCESS) {
 393                 sch->q.qlen++;
 394                 qdisc_bstats_update(sch, skb);
 395                 cbq_mark_toplevel(q, cl);
 396                 if (!cl->next_alive)
 397                         cbq_activate_class(cl);
 398                 return ret;
 399         }
 400
 401         if (net_xmit_drop_count(ret)) {
 402                 sch->qstats.drops++;
 403                 cbq_mark_toplevel(q, cl);
 404                 cl->qstats.drops++;
 405         }
 406         return ret;
 407 }
 408
 409 /* Overlimit actions */
 410
 411 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 412
 413 static void cbq_ovl_classic(struct cbq_class *cl)
 414 {
 415         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 416         psched_tdiff_t delay = cl->undertime - q->now;
 417
 418         if (!cl->delayed) {
 419                 delay += cl->offtime;
 420
 421                 /*
 422                  * Class goes to sleep, so that it will have no
 423                  * chance to work avgidle. Let's forgive it 8)
 424                  *
 425                  * BTW cbq-2.0 has a crap in this
 426                  * place, apparently they forgot to shift it by cl->ewma_log.
 427                  */
 428                 if (cl->avgidle < 0)
 429                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 430                 if (cl->avgidle < cl->minidle)
 431                         cl->avgidle = cl->minidle;
 432                 if (delay <= 0)
 433                         delay = 1;
 434                 cl->undertime = q->now + delay;
 435
 436                 cl->xstats.overactions++;
 437                 cl->delayed = 1;
 438         }
 439         if (q->wd_expires == 0 || q->wd_expires > delay)
 440                 q->wd_expires = delay;
 441
 442         /* Dirty work! We must schedule wakeups based on
 443          * real available rate, rather than leaf rate,
 444          * which may be tiny (even zero).
 445          */
 446         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 447                 struct cbq_class *b;
 448                 psched_tdiff_t base_delay = q->wd_expires;
 449
 450                 for (b = cl->borrow; b; b = b->borrow) {
 451                         delay = b->undertime - q->now;
 452                         if (delay < base_delay) {
 453                                 if (delay <= 0)
 454                                         delay = 1;
 455                                 base_delay = delay;
 456                         }
 457                 }
 458
 459                 q->wd_expires = base_delay;
 460         }
 461 }
 462
 463 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 464  * they go overlimit
 465  */
 466
 467 static void cbq_ovl_rclassic(struct cbq_class *cl)
 468 {
 469         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 470         struct cbq_class *this = cl;
 471
 472         do {
 473                 if (cl->level > q->toplevel) {
 474                         cl = NULL;
 475                         break;
 476                 }
 477         } while ((cl = cl->borrow) != NULL);
 478
 479         if (cl == NULL)
 480                 cl = this;
 481         cbq_ovl_classic(cl);
 482 }
 483
 484 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 485
 486 static void cbq_ovl_delay(struct cbq_class *cl)
 487 {
 488         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 489         psched_tdiff_t delay = cl->undertime - q->now;
 490
 491         if (test_bit(__QDISC_STATE_DEACTIVATED,
 492                      &qdisc_root_sleeping(cl->qdisc)->state))
 493                 return;
 494
 495         if (!cl->delayed) {
 496                 psched_time_t sched = q->now;
 497                 ktime_t expires;
 498
 499                 delay += cl->offtime;
 500                 if (cl->avgidle < 0)
 501                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 502                 if (cl->avgidle < cl->minidle)
 503                         cl->avgidle = cl->minidle;
 504                 cl->undertime = q->now + delay;
 505
 506                 if (delay > 0) {
 507                         sched += delay + cl->penalty;
 508                         cl->penalized = sched;
 509                         cl->cpriority = TC_CBQ_MAXPRIO;
 510                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 511
 512                         expires = ktime_set(0, 0);
 513                         expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 514                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 515                             ktime_to_ns(ktime_sub(
 516                                         hrtimer_get_expires(&q->delay_timer),
 517                                         expires)) > 0)
 518                                 hrtimer_set_expires(&q->delay_timer, expires);
 519                         hrtimer_restart(&q->delay_timer);
 520                         cl->delayed = 1;
 521                         cl->xstats.overactions++;
 522                         return;
 523                 }
 524                 delay = 1;
 525         }
 526         if (q->wd_expires == 0 || q->wd_expires > delay)
 527                 q->wd_expires = delay;
 528 }
 529
 530 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 531
 532 static void cbq_ovl_lowprio(struct cbq_class *cl)
 533 {
 534         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 535
 536         cl->penalized = q->now + cl->penalty;
 537
 538         if (cl->cpriority != cl->priority2) {
 539                 cl->cpriority = cl->priority2;
 540                 q->pmask |= (1<<cl->cpriority);
 541                 cl->xstats.overactions++;
 542         }
 543         cbq_ovl_classic(cl);
 544 }
 545
 546 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 547
 548 static void cbq_ovl_drop(struct cbq_class *cl)
 549 {
 550         if (cl->q->ops->drop)
 551                 if (cl->q->ops->drop(cl->q))
 552                         cl->qdisc->q.qlen--;
 553         cl->xstats.overactions++;
 554         cbq_ovl_classic(cl);
 555 }
 556
 557 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 558                                        psched_time_t now)
 559 {
 560         struct cbq_class *cl;
 561         struct cbq_class *cl_prev = q->active[prio];
 562         psched_time_t sched = now;
 563
 564         if (cl_prev == NULL)
 565                 return 0;
 566
 567         do {
 568                 cl = cl_prev->next_alive;
 569                 if (now - cl->penalized > 0) {
 570                         cl_prev->next_alive = cl->next_alive;
 571                         cl->next_alive = NULL;
 572                         cl->cpriority = cl->priority;
 573                         cl->delayed = 0;
 574                         cbq_activate_class(cl);
 575
 576                         if (cl == q->active[prio]) {
 577                                 q->active[prio] = cl_prev;
 578                                 if (cl == q->active[prio]) {
 579                                         q->active[prio] = NULL;
 580                                         return 0;
 581                                 }
 582                         }
 583
 584                         cl = cl_prev->next_alive;
 585                 } else if (sched - cl->penalized > 0)
 586                         sched = cl->penalized;
 587         } while ((cl_prev = cl) != q->active[prio]);
 588
 589         return sched - now;
 590 }
 591
 592 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 593 {
 594         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 595                                                 delay_timer);
 596         struct Qdisc *sch = q->watchdog.qdisc;
 597         psched_time_t now;
 598         psched_tdiff_t delay = 0;
 599         unsigned int pmask;
 600
 601         now = psched_get_time();
 602
 603         pmask = q->pmask;
 604         q->pmask = 0;
 605
 606         while (pmask) {
 607                 int prio = ffz(~pmask);
 608                 psched_tdiff_t tmp;
 609
 610                 pmask &= ~(1<<prio);
 611
 612                 tmp = cbq_undelay_prio(q, prio, now);
 613                 if (tmp > 0) {
 614                         q->pmask |= 1<<prio;
 615                         if (tmp < delay || delay == 0)
 616                                 delay = tmp;
 617                 }
 618         }
 619
 620         if (delay) {
 621                 ktime_t time;
 622
 623                 time = ktime_set(0, 0);
 624                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 625                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 626         }
 627
 628         qdisc_unthrottled(sch);
 629         __netif_schedule(qdisc_root(sch));
 630         return HRTIMER_NORESTART;
 631 }
 632
 633 #ifdef CONFIG_NET_CLS_ACT
 634 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 635 {
 636         struct Qdisc *sch = child->__parent;
 637         struct cbq_sched_data *q = qdisc_priv(sch);
 638         struct cbq_class *cl = q->rx_class;
 639
 640         q->rx_class = NULL;
 641
 642         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 643                 int ret;
 644
 645                 cbq_mark_toplevel(q, cl);
 646
 647                 q->rx_class = cl;
 648                 cl->q->__parent = sch;
 649
 650                 ret = qdisc_enqueue(skb, cl->q);
 651                 if (ret == NET_XMIT_SUCCESS) {
 652                         sch->q.qlen++;
 653                         qdisc_bstats_update(sch, skb);
 654                         if (!cl->next_alive)
 655                                 cbq_activate_class(cl);
 656                         return 0;
 657                 }
 658                 if (net_xmit_drop_count(ret))
 659                         sch->qstats.drops++;
 660                 return 0;
 661         }
 662
 663         sch->qstats.drops++;
 664         return -1;
 665 }
 666 #endif
 667
 668 /*
 669  * It is mission critical procedure.
 670  *
 671  * We "regenerate" toplevel cutoff, if transmitting class
 672  * has backlog and it is not regulated. It is not part of
 673  * original CBQ description, but looks more reasonable.
 674  * Probably, it is wrong. This question needs further investigation.
 675  */
 676
 677 static inline void
 678 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 679                     struct cbq_class *borrowed)
 680 {
 681         if (cl && q->toplevel >= borrowed->level) {
 682                 if (cl->q->q.qlen > 1) {
 683                         do {
 684                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 685                                         q->toplevel = borrowed->level;
 686                                         return;
 687                                 }
 688                         } while ((borrowed = borrowed->borrow) != NULL);
 689                 }
 690 #if 0
 691         /* It is not necessary now. Uncommenting it
 692            will save CPU cycles, but decrease fairness.
 693          */
 694                 q->toplevel = TC_CBQ_MAXLEVEL;
 695 #endif
 696         }
 697 }
 698
 699 static void
 700 cbq_update(struct cbq_sched_data *q)
 701 {
 702         struct cbq_class *this = q->tx_class;
 703         struct cbq_class *cl = this;
 704         int len = q->tx_len;
 705
 706         q->tx_class = NULL;
 707
 708         for ( ; cl; cl = cl->share) {
 709                 long avgidle = cl->avgidle;
 710                 long idle;
 711
 712                 cl->bstats.packets++;
 713                 cl->bstats.bytes += len;
 714
 715                 /*
 716                  * (now - last) is total time between packet right edges.
 717                  * (last_pktlen/rate) is "virtual" busy time, so that
 718                  *
 719                  *      idle = (now - last) - last_pktlen/rate
 720                  */
 721
 722                 idle = q->now - cl->last;
 723                 if ((unsigned long)idle > 128*1024*1024) {
 724                         avgidle = cl->maxidle;
 725                 } else {
 726                         idle -= L2T(cl, len);
 727
 728                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 729                  * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 730                  * cl->avgidle == true_avgidle/W,
 731                  * hence:
 732                  */
 733                         avgidle += idle - (avgidle>>cl->ewma_log);
 734                 }
 735
 736                 if (avgidle <= 0) {
 737                         /* Overlimit or at-limit */
 738
 739                         if (avgidle < cl->minidle)
 740                                 avgidle = cl->minidle;
 741
 742                         cl->avgidle = avgidle;
 743
 744                         /* Calculate expected time, when this class
 745                          * will be allowed to send.
 746                          * It will occur, when:
 747                          * (1-W)*true_avgidle + W*delay = 0, i.e.
 748                          * idle = (1/W - 1)*(-true_avgidle)
 749                          * or
 750                          * idle = (1 - W)*(-cl->avgidle);
 751                          */
 752                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 753
 754                         /*
 755                          * That is not all.
 756                          * To maintain the rate allocated to the class,
 757                          * we add to undertime virtual clock,
 758                          * necessary to complete transmitted packet.
 759                          * (len/phys_bandwidth has been already passed
 760                          * to the moment of cbq_update)
 761                          */
 762
 763                         idle -= L2T(&q->link, len);
 764                         idle += L2T(cl, len);
 765
 766                         cl->undertime = q->now + idle;
 767                 } else {
 768                         /* Underlimit */
 769
 770                         cl->undertime = PSCHED_PASTPERFECT;
 771                         if (avgidle > cl->maxidle)
 772                                 cl->avgidle = cl->maxidle;
 773                         else
 774                                 cl->avgidle = avgidle;
 775                 }
 776                 cl->last = q->now;
 777         }
 778
 779         cbq_update_toplevel(q, this, q->tx_borrowed);
 780 }
 781
 782 static inline struct cbq_class *
 783 cbq_under_limit(struct cbq_class *cl)
 784 {
 785         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 786         struct cbq_class *this_cl = cl;
 787
 788         if (cl->tparent == NULL)
 789                 return cl;
 790
 791         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 792                 cl->delayed = 0;
 793                 return cl;
 794         }
 795
 796         do {
 797                 /* It is very suspicious place. Now overlimit
 798                  * action is generated for not bounded classes
 799                  * only if link is completely congested.
 800                  * Though it is in agree with ancestor-only paradigm,
 801                  * it looks very stupid. Particularly,
 802                  * it means that this chunk of code will either
 803                  * never be called or result in strong amplification
 804                  * of burstiness. Dangerous, silly, and, however,
 805                  * no another solution exists.
 806                  */
 807                 cl = cl->borrow;
 808                 if (!cl) {
 809                         this_cl->qstats.overlimits++;
 810                         this_cl->overlimit(this_cl);
 811                         return NULL;
 812                 }
 813                 if (cl->level > q->toplevel)
 814                         return NULL;
 815         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 816
 817         cl->delayed = 0;
 818         return cl;
 819 }
 820
 821 static inline struct sk_buff *
 822 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 823 {
 824         struct cbq_sched_data *q = qdisc_priv(sch);
 825         struct cbq_class *cl_tail, *cl_prev, *cl;
 826         struct sk_buff *skb;
 827         int deficit;
 828
 829         cl_tail = cl_prev = q->active[prio];
 830         cl = cl_prev->next_alive;
 831
 832         do {
 833                 deficit = 0;
 834
 835                 /* Start round */
 836                 do {
 837                         struct cbq_class *borrow = cl;
 838
 839                         if (cl->q->q.qlen &&
 840                             (borrow = cbq_under_limit(cl)) == NULL)
 841                                 goto skip_class;
 842
 843                         if (cl->deficit <= 0) {
 844                                 /* Class exhausted its allotment per
 845                                  * this round. Switch to the next one.
 846                                  */
 847                                 deficit = 1;
 848                                 cl->deficit += cl->quantum;
 849                                 goto next_class;
 850                         }
 851
 852                         skb = cl->q->dequeue(cl->q);
 853
 854                         /* Class did not give us any skb :-(
 855                          * It could occur even if cl->q->q.qlen != 0
 856                          * f.e. if cl->q == "tbf"
 857                          */
 858                         if (skb == NULL)
 859                                 goto skip_class;
 860
 861                         cl->deficit -= qdisc_pkt_len(skb);
 862                         q->tx_class = cl;
 863                         q->tx_borrowed = borrow;
 864                         if (borrow != cl) {
 865 #ifndef CBQ_XSTATS_BORROWS_BYTES
 866                                 borrow->xstats.borrows++;
 867                                 cl->xstats.borrows++;
 868 #else
 869                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 870                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 871 #endif
 872                         }
 873                         q->tx_len = qdisc_pkt_len(skb);
 874
 875                         if (cl->deficit <= 0) {
 876                                 q->active[prio] = cl;
 877                                 cl = cl->next_alive;
 878                                 cl->deficit += cl->quantum;
 879                         }
 880                         return skb;
 881
 882 skip_class:
 883                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 884                                 /* Class is empty or penalized.
 885                                  * Unlink it from active chain.
 886                                  */
 887                                 cl_prev->next_alive = cl->next_alive;
 888                                 cl->next_alive = NULL;
 889
 890                                 /* Did cl_tail point to it? */
 891                                 if (cl == cl_tail) {
 892                                         /* Repair it! */
 893                                         cl_tail = cl_prev;
 894
 895                                         /* Was it the last class in this band? */
 896                                         if (cl == cl_tail) {
 897                                                 /* Kill the band! */
 898                                                 q->active[prio] = NULL;
 899                                                 q->activemask &= ~(1<<prio);
 900                                                 if (cl->q->q.qlen)
 901                                                         cbq_activate_class(cl);
 902                                                 return NULL;
 903                                         }
 904
 905                                         q->active[prio] = cl_tail;
 906                                 }
 907                                 if (cl->q->q.qlen)
 908                                         cbq_activate_class(cl);
 909
 910                                 cl = cl_prev;
 911                         }
 912
 913 next_class:
 914                         cl_prev = cl;
 915                         cl = cl->next_alive;
 916                 } while (cl_prev != cl_tail);
 917         } while (deficit);
 918
 919         q->active[prio] = cl_prev;
 920
 921         return NULL;
 922 }
 923
 924 static inline struct sk_buff *
 925 cbq_dequeue_1(struct Qdisc *sch)
 926 {
 927         struct cbq_sched_data *q = qdisc_priv(sch);
 928         struct sk_buff *skb;
 929         unsigned int activemask;
 930
 931         activemask = q->activemask & 0xFF;
 932         while (activemask) {
 933                 int prio = ffz(~activemask);
 934                 activemask &= ~(1<<prio);
 935                 skb = cbq_dequeue_prio(sch, prio);
 936                 if (skb)
 937                         return skb;
 938         }
 939         return NULL;
 940 }
 941
 942 static struct sk_buff *
 943 cbq_dequeue(struct Qdisc *sch)
 944 {
 945         struct sk_buff *skb;
 946         struct cbq_sched_data *q = qdisc_priv(sch);
 947         psched_time_t now;
 948         psched_tdiff_t incr;
 949
 950         now = psched_get_time();
 951         incr = now - q->now_rt;
 952
 953         if (q->tx_class) {
 954                 psched_tdiff_t incr2;
 955                 /* Time integrator. We calculate EOS time
 956                  * by adding expected packet transmission time.
 957                  * If real time is greater, we warp artificial clock,
 958                  * so that:
 959                  *
 960                  * cbq_time = max(real_time, work);
 961                  */
 962                 incr2 = L2T(&q->link, q->tx_len);
 963                 q->now += incr2;
 964                 cbq_update(q);
 965                 if ((incr -= incr2) < 0)
 966                         incr = 0;
 967         }
 968         q->now += incr;
 969         q->now_rt = now;
 970
 971         for (;;) {
 972                 q->wd_expires = 0;
 973
 974                 skb = cbq_dequeue_1(sch);
 975                 if (skb) {
 976                         sch->q.qlen--;
 977                         qdisc_unthrottled(sch);
 978                         return skb;
 979                 }
 980
 981                 /* All the classes are overlimit.
 982                  *
 983                  * It is possible, if:
 984                  *
 985                  * 1. Scheduler is empty.
 986                  * 2. Toplevel cutoff inhibited borrowing.
 987                  * 3. Root class is overlimit.
 988                  *
 989                  * Reset 2d and 3d conditions and retry.
 990                  *
 991                  * Note, that NS and cbq-2.0 are buggy, peeking
 992                  * an arbitrary class is appropriate for ancestor-only
 993                  * sharing, but not for toplevel algorithm.
 994                  *
 995                  * Our version is better, but slower, because it requires
 996                  * two passes, but it is unavoidable with top-level sharing.
 997                  */
 998
 999                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1000                     q->link.undertime == PSCHED_PASTPERFECT)
1001                         break;
1002
1003                 q->toplevel = TC_CBQ_MAXLEVEL;
1004                 q->link.undertime = PSCHED_PASTPERFECT;
1005         }
1006
1007         /* No packets in scheduler or nobody wants to give them to us :-(
1008          * Sigh... start watchdog timer in the last case.
1009          */
1010
1011         if (sch->q.qlen) {
1012                 sch->qstats.overlimits++;
1013                 if (q->wd_expires)
1014                         qdisc_watchdog_schedule(&q->watchdog,
1015                                                 now + q->wd_expires);
1016         }
1017         return NULL;
1018 }
1019
1020 /* CBQ class maintanance routines */
1021
1022 static void cbq_adjust_levels(struct cbq_class *this)
1023 {
1024         if (this == NULL)
1025                 return;
1026
1027         do {
1028                 int level = 0;
1029                 struct cbq_class *cl;
1030
1031                 cl = this->children;
1032                 if (cl) {
1033                         do {
1034                                 if (cl->level > level)
1035                                         level = cl->level;
1036                         } while ((cl = cl->sibling) != this->children);
1037                 }
1038                 this->level = level + 1;
1039         } while ((this = this->tparent) != NULL);
1040 }
1041
1042 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1043 {
1044         struct cbq_class *cl;
1045         struct hlist_node *n;
1046         unsigned int h;
1047
1048         if (q->quanta[prio] == 0)
1049                 return;
1050
1051         for (h = 0; h < q->clhash.hashsize; h++) {
1052                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1053                         /* BUGGGG... Beware! This expression suffer of
1054                          * arithmetic overflows!
1055                          */
1056                         if (cl->priority == prio) {
1057                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1058                                         q->quanta[prio];
1059                         }
1060                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1061                                 pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1062                                            cl->common.classid, cl->quantum);
1063                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1064                         }
1065                 }
1066         }
1067 }
1068
1069 static void cbq_sync_defmap(struct cbq_class *cl)
1070 {
1071         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1072         struct cbq_class *split = cl->split;
1073         unsigned int h;
1074         int i;
1075
1076         if (split == NULL)
1077                 return;
1078
1079         for (i = 0; i <= TC_PRIO_MAX; i++) {
1080                 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1081                         split->defaults[i] = NULL;
1082         }
1083
1084         for (i = 0; i <= TC_PRIO_MAX; i++) {
1085                 int level = split->level;
1086
1087                 if (split->defaults[i])
1088                         continue;
1089
1090                 for (h = 0; h < q->clhash.hashsize; h++) {
1091                         struct hlist_node *n;
1092                         struct cbq_class *c;
1093
1094                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1095                                              common.hnode) {
1096                                 if (c->split == split && c->level < level &&
1097                                     c->defmap & (1<<i)) {
1098                                         split->defaults[i] = c;
1099                                         level = c->level;
1100                                 }
1101                         }
1102                 }
1103         }
1104 }
1105
1106 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1107 {
1108         struct cbq_class *split = NULL;
1109
1110         if (splitid == 0) {
1111                 split = cl->split;
1112                 if (!split)
1113                         return;
1114                 splitid = split->common.classid;
1115         }
1116
1117         if (split == NULL || split->common.classid != splitid) {
1118                 for (split = cl->tparent; split; split = split->tparent)
1119                         if (split->common.classid == splitid)
1120                                 break;
1121         }
1122
1123         if (split == NULL)
1124                 return;
1125
1126         if (cl->split != split) {
1127                 cl->defmap = 0;
1128                 cbq_sync_defmap(cl);
1129                 cl->split = split;
1130                 cl->defmap = def & mask;
1131         } else
1132                 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1133
1134         cbq_sync_defmap(cl);
1135 }
1136
1137 static void cbq_unlink_class(struct cbq_class *this)
1138 {
1139         struct cbq_class *cl, **clp;
1140         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1141
1142         qdisc_class_hash_remove(&q->clhash, &this->common);
1143
1144         if (this->tparent) {
1145                 clp = &this->sibling;
1146                 cl = *clp;
1147                 do {
1148                         if (cl == this) {
1149                                 *clp = cl->sibling;
1150                                 break;
1151                         }
1152                         clp = &cl->sibling;
1153                 } while ((cl = *clp) != this->sibling);
1154
1155                 if (this->tparent->children == this) {
1156                         this->tparent->children = this->sibling;
1157                         if (this->sibling == this)
1158                                 this->tparent->children = NULL;
1159                 }
1160         } else {
1161                 WARN_ON(this->sibling != this);
1162         }
1163 }
1164
1165 static void cbq_link_class(struct cbq_class *this)
1166 {
1167         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1168         struct cbq_class *parent = this->tparent;
1169
1170         this->sibling = this;
1171         qdisc_class_hash_insert(&q->clhash, &this->common);
1172
1173         if (parent == NULL)
1174                 return;
1175
1176         if (parent->children == NULL) {
1177                 parent->children = this;
1178         } else {
1179                 this->sibling = parent->children->sibling;
1180                 parent->children->sibling = this;
1181         }
1182 }
1183
1184 static unsigned int cbq_drop(struct Qdisc *sch)
1185 {
1186         struct cbq_sched_data *q = qdisc_priv(sch);
1187         struct cbq_class *cl, *cl_head;
1188         int prio;
1189         unsigned int len;
1190
1191         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1192                 cl_head = q->active[prio];
1193                 if (!cl_head)
1194                         continue;
1195
1196                 cl = cl_head;
1197                 do {
1198                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1199                                 sch->q.qlen--;
1200                                 if (!cl->q->q.qlen)
1201                                         cbq_deactivate_class(cl);
1202                                 return len;
1203                         }
1204                 } while ((cl = cl->next_alive) != cl_head);
1205         }
1206         return 0;
1207 }
1208
1209 static void
1210 cbq_reset(struct Qdisc *sch)
1211 {
1212         struct cbq_sched_data *q = qdisc_priv(sch);
1213         struct cbq_class *cl;
1214         struct hlist_node *n;
1215         int prio;
1216         unsigned int h;
1217
1218         q->activemask = 0;
1219         q->pmask = 0;
1220         q->tx_class = NULL;
1221         q->tx_borrowed = NULL;
1222         qdisc_watchdog_cancel(&q->watchdog);
1223         hrtimer_cancel(&q->delay_timer);
1224         q->toplevel = TC_CBQ_MAXLEVEL;
1225         q->now = psched_get_time();
1226         q->now_rt = q->now;
1227
1228         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1229                 q->active[prio] = NULL;
1230
1231         for (h = 0; h < q->clhash.hashsize; h++) {
1232                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1233                         qdisc_reset(cl->q);
1234
1235                         cl->next_alive = NULL;
1236                         cl->undertime = PSCHED_PASTPERFECT;
1237                         cl->avgidle = cl->maxidle;
1238                         cl->deficit = cl->quantum;
1239                         cl->cpriority = cl->priority;
1240                 }
1241         }
1242         sch->q.qlen = 0;
1243 }
1244
1245
1246 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1247 {
1248         if (lss->change & TCF_CBQ_LSS_FLAGS) {
1249                 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1250                 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1251         }
1252         if (lss->change & TCF_CBQ_LSS_EWMA)
1253                 cl->ewma_log = lss->ewma_log;
1254         if (lss->change & TCF_CBQ_LSS_AVPKT)
1255                 cl->avpkt = lss->avpkt;
1256         if (lss->change & TCF_CBQ_LSS_MINIDLE)
1257                 cl->minidle = -(long)lss->minidle;
1258         if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1259                 cl->maxidle = lss->maxidle;
1260                 cl->avgidle = lss->maxidle;
1261         }
1262         if (lss->change & TCF_CBQ_LSS_OFFTIME)
1263                 cl->offtime = lss->offtime;
1264         return 0;
1265 }
1266
1267 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1268 {
1269         q->nclasses[cl->priority]--;
1270         q->quanta[cl->priority] -= cl->weight;
1271         cbq_normalize_quanta(q, cl->priority);
1272 }
1273
1274 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1275 {
1276         q->nclasses[cl->priority]++;
1277         q->quanta[cl->priority] += cl->weight;
1278         cbq_normalize_quanta(q, cl->priority);
1279 }
1280
1281 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1282 {
1283         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1284
1285         if (wrr->allot)
1286                 cl->allot = wrr->allot;
1287         if (wrr->weight)
1288                 cl->weight = wrr->weight;
1289         if (wrr->priority) {
1290                 cl->priority = wrr->priority - 1;
1291                 cl->cpriority = cl->priority;
1292                 if (cl->priority >= cl->priority2)
1293                         cl->priority2 = TC_CBQ_MAXPRIO - 1;
1294         }
1295
1296         cbq_addprio(q, cl);
1297         return 0;
1298 }
1299
1300 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1301 {
1302         switch (ovl->strategy) {
1303         case TC_CBQ_OVL_CLASSIC:
1304                 cl->overlimit = cbq_ovl_classic;
1305                 break;
1306         case TC_CBQ_OVL_DELAY:
1307                 cl->overlimit = cbq_ovl_delay;
1308                 break;
1309         case TC_CBQ_OVL_LOWPRIO:
1310                 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1311                     ovl->priority2 - 1 <= cl->priority)
1312                         return -EINVAL;
1313                 cl->priority2 = ovl->priority2 - 1;
1314                 cl->overlimit = cbq_ovl_lowprio;
1315                 break;
1316         case TC_CBQ_OVL_DROP:
1317                 cl->overlimit = cbq_ovl_drop;
1318                 break;
1319         case TC_CBQ_OVL_RCLASSIC:
1320                 cl->overlimit = cbq_ovl_rclassic;
1321                 break;
1322         default:
1323                 return -EINVAL;
1324         }
1325         cl->penalty = ovl->penalty;
1326         return 0;
1327 }
1328
1329 #ifdef CONFIG_NET_CLS_ACT
1330 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1331 {
1332         cl->police = p->police;
1333
1334         if (cl->q->handle) {
1335                 if (p->police == TC_POLICE_RECLASSIFY)
1336                         cl->q->reshape_fail = cbq_reshape_fail;
1337                 else
1338                         cl->q->reshape_fail = NULL;
1339         }
1340         return 0;
1341 }
1342 #endif
1343
1344 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1345 {
1346         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1347         return 0;
1348 }
1349
1350 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1351         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1352         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1353         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1354         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1355         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1356         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1357         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1358 };
1359
1360 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1361 {
1362         struct cbq_sched_data *q = qdisc_priv(sch);
1363         struct nlattr *tb[TCA_CBQ_MAX + 1];
1364         struct tc_ratespec *r;
1365         int err;
1366
1367         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1368         if (err < 0)
1369                 return err;
1370
1371         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1372                 return -EINVAL;
1373
1374         r = nla_data(tb[TCA_CBQ_RATE]);
1375
1376         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1377                 return -EINVAL;
1378
1379         err = qdisc_class_hash_init(&q->clhash);
1380         if (err < 0)
1381                 goto put_rtab;
1382
1383         q->link.refcnt = 1;
1384         q->link.sibling = &q->link;
1385         q->link.common.classid = sch->handle;
1386         q->link.qdisc = sch;
1387         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1388                                       sch->handle);
1389         if (!q->link.q)
1390                 q->link.q = &noop_qdisc;
1391
1392         q->link.priority = TC_CBQ_MAXPRIO - 1;
1393         q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1394         q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1395         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1396         q->link.overlimit = cbq_ovl_classic;
1397         q->link.allot = psched_mtu(qdisc_dev(sch));
1398         q->link.quantum = q->link.allot;
1399         q->link.weight = q->link.R_tab->rate.rate;
1400
1401         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1402         q->link.avpkt = q->link.allot/2;
1403         q->link.minidle = -0x7FFFFFFF;
1404
1405         qdisc_watchdog_init(&q->watchdog, sch);
1406         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1407         q->delay_timer.function = cbq_undelay;
1408         q->toplevel = TC_CBQ_MAXLEVEL;
1409         q->now = psched_get_time();
1410         q->now_rt = q->now;
1411
1412         cbq_link_class(&q->link);
1413
1414         if (tb[TCA_CBQ_LSSOPT])
1415                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1416
1417         cbq_addprio(q, &q->link);
1418         return 0;
1419
1420 put_rtab:
1421         qdisc_put_rtab(q->link.R_tab);
1422         return err;
1423 }
1424
1425 static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1426 {
1427         unsigned char *b = skb_tail_pointer(skb);
1428
1429         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1430         return skb->len;
1431
1432 nla_put_failure:
1433         nlmsg_trim(skb, b);
1434         return -1;
1435 }
1436
1437 static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1438 {
1439         unsigned char *b = skb_tail_pointer(skb);
1440         struct tc_cbq_lssopt opt;
1441
1442         opt.flags = 0;
1443         if (cl->borrow == NULL)
1444                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1445         if (cl->share == NULL)
1446                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1447         opt.ewma_log = cl->ewma_log;
1448         opt.level = cl->level;
1449         opt.avpkt = cl->avpkt;
1450         opt.maxidle = cl->maxidle;
1451         opt.minidle = (u32)(-cl->minidle);
1452         opt.offtime = cl->offtime;
1453         opt.change = ~0;
1454         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1455         return skb->len;
1456
1457 nla_put_failure:
1458         nlmsg_trim(skb, b);
1459         return -1;
1460 }
1461
1462 static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1463 {
1464         unsigned char *b = skb_tail_pointer(skb);
1465         struct tc_cbq_wrropt opt;
1466
1467         opt.flags = 0;
1468         opt.allot = cl->allot;
1469         opt.priority = cl->priority + 1;
1470         opt.cpriority = cl->cpriority + 1;
1471         opt.weight = cl->weight;
1472         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1473         return skb->len;
1474
1475 nla_put_failure:
1476         nlmsg_trim(skb, b);
1477         return -1;
1478 }
1479
1480 static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1481 {
1482         unsigned char *b = skb_tail_pointer(skb);
1483         struct tc_cbq_ovl opt;
1484
1485         opt.strategy = cl->ovl_strategy;
1486         opt.priority2 = cl->priority2 + 1;
1487         opt.pad = 0;
1488         opt.penalty = cl->penalty;
1489         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1490         return skb->len;
1491
1492 nla_put_failure:
1493         nlmsg_trim(skb, b);
1494         return -1;
1495 }
1496
1497 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1498 {
1499         unsigned char *b = skb_tail_pointer(skb);
1500         struct tc_cbq_fopt opt;
1501
1502         if (cl->split || cl->defmap) {
1503                 opt.split = cl->split ? cl->split->common.classid : 0;
1504                 opt.defmap = cl->defmap;
1505                 opt.defchange = ~0;
1506                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1507         }
1508         return skb->len;
1509
1510 nla_put_failure:
1511         nlmsg_trim(skb, b);
1512         return -1;
1513 }
1514
1515 #ifdef CONFIG_NET_CLS_ACT
1516 static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1517 {
1518         unsigned char *b = skb_tail_pointer(skb);
1519         struct tc_cbq_police opt;
1520
1521         if (cl->police) {
1522                 opt.police = cl->police;
1523                 opt.__res1 = 0;
1524                 opt.__res2 = 0;
1525                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1526         }
1527         return skb->len;
1528
1529 nla_put_failure:
1530         nlmsg_trim(skb, b);
1531         return -1;
1532 }
1533 #endif
1534
1535 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1536 {
1537         if (cbq_dump_lss(skb, cl) < 0 ||
1538             cbq_dump_rate(skb, cl) < 0 ||
1539             cbq_dump_wrr(skb, cl) < 0 ||
1540             cbq_dump_ovl(skb, cl) < 0 ||
1541 #ifdef CONFIG_NET_CLS_ACT
1542             cbq_dump_police(skb, cl) < 0 ||
1543 #endif
1544             cbq_dump_fopt(skb, cl) < 0)
1545                 return -1;
1546         return 0;
1547 }
1548
1549 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1550 {
1551         struct cbq_sched_data *q = qdisc_priv(sch);
1552         struct nlattr *nest;
1553
1554         nest = nla_nest_start(skb, TCA_OPTIONS);
1555         if (nest == NULL)
1556                 goto nla_put_failure;
1557         if (cbq_dump_attr(skb, &q->link) < 0)
1558                 goto nla_put_failure;
1559         nla_nest_end(skb, nest);
1560         return skb->len;
1561
1562 nla_put_failure:
1563         nla_nest_cancel(skb, nest);
1564         return -1;
1565 }
1566
1567 static int
1568 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1569 {
1570         struct cbq_sched_data *q = qdisc_priv(sch);
1571
1572         q->link.xstats.avgidle = q->link.avgidle;
1573         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1574 }
1575
1576 static int
1577 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1578                struct sk_buff *skb, struct tcmsg *tcm)
1579 {
1580         struct cbq_class *cl = (struct cbq_class *)arg;
1581         struct nlattr *nest;
1582
1583         if (cl->tparent)
1584                 tcm->tcm_parent = cl->tparent->common.classid;
1585         else
1586                 tcm->tcm_parent = TC_H_ROOT;
1587         tcm->tcm_handle = cl->common.classid;
1588         tcm->tcm_info = cl->q->handle;
1589
1590         nest = nla_nest_start(skb, TCA_OPTIONS);
1591         if (nest == NULL)
1592                 goto nla_put_failure;
1593         if (cbq_dump_attr(skb, cl) < 0)
1594                 goto nla_put_failure;
1595         nla_nest_end(skb, nest);
1596         return skb->len;
1597
1598 nla_put_failure:
1599         nla_nest_cancel(skb, nest);
1600         return -1;
1601 }
1602
1603 static int
1604 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1605         struct gnet_dump *d)
1606 {
1607         struct cbq_sched_data *q = qdisc_priv(sch);
1608         struct cbq_class *cl = (struct cbq_class *)arg;
1609
1610         cl->qstats.qlen = cl->q->q.qlen;
1611         cl->xstats.avgidle = cl->avgidle;
1612         cl->xstats.undertime = 0;
1613
1614         if (cl->undertime != PSCHED_PASTPERFECT)
1615                 cl->xstats.undertime = cl->undertime - q->now;
1616
1617         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1618             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1619             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1620                 return -1;
1621
1622         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1623 }
1624
1625 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1626                      struct Qdisc **old)
1627 {
1628         struct cbq_class *cl = (struct cbq_class *)arg;
1629
1630         if (new == NULL) {
1631                 new = qdisc_create_dflt(sch->dev_queue,
1632                                         &pfifo_qdisc_ops, cl->common.classid);
1633                 if (new == NULL)
1634                         return -ENOBUFS;
1635         } else {
1636 #ifdef CONFIG_NET_CLS_ACT
1637                 if (cl->police == TC_POLICE_RECLASSIFY)
1638                         new->reshape_fail = cbq_reshape_fail;
1639 #endif
1640         }
1641         sch_tree_lock(sch);
1642         *old = cl->q;
1643         cl->q = new;
1644         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1645         qdisc_reset(*old);
1646         sch_tree_unlock(sch);
1647
1648         return 0;
1649 }
1650
1651 static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1652 {
1653         struct cbq_class *cl = (struct cbq_class *)arg;
1654
1655         return cl->q;
1656 }
1657
1658 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1659 {
1660         struct cbq_class *cl = (struct cbq_class *)arg;
1661
1662         if (cl->q->q.qlen == 0)
1663                 cbq_deactivate_class(cl);
1664 }
1665
1666 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1667 {
1668         struct cbq_sched_data *q = qdisc_priv(sch);
1669         struct cbq_class *cl = cbq_class_lookup(q, classid);
1670
1671         if (cl) {
1672                 cl->refcnt++;
1673                 return (unsigned long)cl;
1674         }
1675         return 0;
1676 }
1677
1678 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1679 {
1680         struct cbq_sched_data *q = qdisc_priv(sch);
1681
1682         WARN_ON(cl->filters);
1683
1684         tcf_destroy_chain(&cl->filter_list);
1685         qdisc_destroy(cl->q);
1686         qdisc_put_rtab(cl->R_tab);
1687         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1688         if (cl != &q->link)
1689                 kfree(cl);
1690 }
1691
1692 static void cbq_destroy(struct Qdisc *sch)
1693 {
1694         struct cbq_sched_data *q = qdisc_priv(sch);
1695         struct hlist_node *n, *next;
1696         struct cbq_class *cl;
1697         unsigned int h;
1698
1699 #ifdef CONFIG_NET_CLS_ACT
1700         q->rx_class = NULL;
1701 #endif
1702         /*
1703          * Filters must be destroyed first because we don't destroy the
1704          * classes from root to leafs which means that filters can still
1705          * be bound to classes which have been destroyed already. --TGR '04
1706          */
1707         for (h = 0; h < q->clhash.hashsize; h++) {
1708                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1709                         tcf_destroy_chain(&cl->filter_list);
1710         }
1711         for (h = 0; h < q->clhash.hashsize; h++) {
1712                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1713                                           common.hnode)
1714                         cbq_destroy_class(sch, cl);
1715         }
1716         qdisc_class_hash_destroy(&q->clhash);
1717 }
1718
1719 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1720 {
1721         struct cbq_class *cl = (struct cbq_class *)arg;
1722
1723         if (--cl->refcnt == 0) {
1724 #ifdef CONFIG_NET_CLS_ACT
1725                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1726                 struct cbq_sched_data *q = qdisc_priv(sch);
1727
1728                 spin_lock_bh(root_lock);
1729                 if (q->rx_class == cl)
1730                         q->rx_class = NULL;
1731                 spin_unlock_bh(root_lock);
1732 #endif
1733
1734                 cbq_destroy_class(sch, cl);
1735         }
1736 }
1737
1738 static int
1739 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1740                  unsigned long *arg)
1741 {
1742         int err;
1743         struct cbq_sched_data *q = qdisc_priv(sch);
1744         struct cbq_class *cl = (struct cbq_class *)*arg;
1745         struct nlattr *opt = tca[TCA_OPTIONS];
1746         struct nlattr *tb[TCA_CBQ_MAX + 1];
1747         struct cbq_class *parent;
1748         struct qdisc_rate_table *rtab = NULL;
1749
1750         if (opt == NULL)
1751                 return -EINVAL;
1752
1753         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1754         if (err < 0)
1755                 return err;
1756
1757         if (cl) {
1758                 /* Check parent */
1759                 if (parentid) {
1760                         if (cl->tparent &&
1761                             cl->tparent->common.classid != parentid)
1762                                 return -EINVAL;
1763                         if (!cl->tparent && parentid != TC_H_ROOT)
1764                                 return -EINVAL;
1765                 }
1766
1767                 if (tb[TCA_CBQ_RATE]) {
1768                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1769                                               tb[TCA_CBQ_RTAB]);
1770                         if (rtab == NULL)
1771                                 return -EINVAL;
1772                 }
1773
1774                 if (tca[TCA_RATE]) {
1775                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1776                                                     qdisc_root_sleeping_lock(sch),
1777                                                     tca[TCA_RATE]);
1778                         if (err) {
1779                                 if (rtab)
1780                                         qdisc_put_rtab(rtab);
1781                                 return err;
1782                         }
1783                 }
1784
1785                 /* Change class parameters */
1786                 sch_tree_lock(sch);
1787
1788                 if (cl->next_alive != NULL)
1789                         cbq_deactivate_class(cl);
1790
1791                 if (rtab) {
1792                         qdisc_put_rtab(cl->R_tab);
1793                         cl->R_tab = rtab;
1794                 }
1795
1796                 if (tb[TCA_CBQ_LSSOPT])
1797                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1798
1799                 if (tb[TCA_CBQ_WRROPT]) {
1800                         cbq_rmprio(q, cl);
1801                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1802                 }
1803
1804                 if (tb[TCA_CBQ_OVL_STRATEGY])
1805                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1806
1807 #ifdef CONFIG_NET_CLS_ACT
1808                 if (tb[TCA_CBQ_POLICE])
1809                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1810 #endif
1811
1812                 if (tb[TCA_CBQ_FOPT])
1813                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1814
1815                 if (cl->q->q.qlen)
1816                         cbq_activate_class(cl);
1817
1818                 sch_tree_unlock(sch);
1819
1820                 return 0;
1821         }
1822
1823         if (parentid == TC_H_ROOT)
1824                 return -EINVAL;
1825
1826         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1827             tb[TCA_CBQ_LSSOPT] == NULL)
1828                 return -EINVAL;
1829
1830         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1831         if (rtab == NULL)
1832                 return -EINVAL;
1833
1834         if (classid) {
1835                 err = -EINVAL;
1836                 if (TC_H_MAJ(classid ^ sch->handle) ||
1837                     cbq_class_lookup(q, classid))
1838                         goto failure;
1839         } else {
1840                 int i;
1841                 classid = TC_H_MAKE(sch->handle, 0x8000);
1842
1843                 for (i = 0; i < 0x8000; i++) {
1844                         if (++q->hgenerator >= 0x8000)
1845                                 q->hgenerator = 1;
1846                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1847                                 break;
1848                 }
1849                 err = -ENOSR;
1850                 if (i >= 0x8000)
1851                         goto failure;
1852                 classid = classid|q->hgenerator;
1853         }
1854
1855         parent = &q->link;
1856         if (parentid) {
1857                 parent = cbq_class_lookup(q, parentid);
1858                 err = -EINVAL;
1859                 if (parent == NULL)
1860                         goto failure;
1861         }
1862
1863         err = -ENOBUFS;
1864         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1865         if (cl == NULL)
1866                 goto failure;
1867
1868         if (tca[TCA_RATE]) {
1869                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1870                                         qdisc_root_sleeping_lock(sch),
1871                                         tca[TCA_RATE]);
1872                 if (err) {
1873                         kfree(cl);
1874                         goto failure;
1875                 }
1876         }
1877
1878         cl->R_tab = rtab;
1879         rtab = NULL;
1880         cl->refcnt = 1;
1881         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1882         if (!cl->q)
1883                 cl->q = &noop_qdisc;
1884         cl->common.classid = classid;
1885         cl->tparent = parent;
1886         cl->qdisc = sch;
1887         cl->allot = parent->allot;
1888         cl->quantum = cl->allot;
1889         cl->weight = cl->R_tab->rate.rate;
1890
1891         sch_tree_lock(sch);
1892         cbq_link_class(cl);
1893         cl->borrow = cl->tparent;
1894         if (cl->tparent != &q->link)
1895                 cl->share = cl->tparent;
1896         cbq_adjust_levels(parent);
1897         cl->minidle = -0x7FFFFFFF;
1898         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1899         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1900         if (cl->ewma_log == 0)
1901                 cl->ewma_log = q->link.ewma_log;
1902         if (cl->maxidle == 0)
1903                 cl->maxidle = q->link.maxidle;
1904         if (cl->avpkt == 0)
1905                 cl->avpkt = q->link.avpkt;
1906         cl->overlimit = cbq_ovl_classic;
1907         if (tb[TCA_CBQ_OVL_STRATEGY])
1908                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1909 #ifdef CONFIG_NET_CLS_ACT
1910         if (tb[TCA_CBQ_POLICE])
1911                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1912 #endif
1913         if (tb[TCA_CBQ_FOPT])
1914                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1915         sch_tree_unlock(sch);
1916
1917         qdisc_class_hash_grow(sch, &q->clhash);
1918
1919         *arg = (unsigned long)cl;
1920         return 0;
1921
1922 failure:
1923         qdisc_put_rtab(rtab);
1924         return err;
1925 }
1926
1927 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1928 {
1929         struct cbq_sched_data *q = qdisc_priv(sch);
1930         struct cbq_class *cl = (struct cbq_class *)arg;
1931         unsigned int qlen;
1932
1933         if (cl->filters || cl->children || cl == &q->link)
1934                 return -EBUSY;
1935
1936         sch_tree_lock(sch);
1937
1938         qlen = cl->q->q.qlen;
1939         qdisc_reset(cl->q);
1940         qdisc_tree_decrease_qlen(cl->q, qlen);
1941
1942         if (cl->next_alive)
1943                 cbq_deactivate_class(cl);
1944
1945         if (q->tx_borrowed == cl)
1946                 q->tx_borrowed = q->tx_class;
1947         if (q->tx_class == cl) {
1948                 q->tx_class = NULL;
1949                 q->tx_borrowed = NULL;
1950         }
1951 #ifdef CONFIG_NET_CLS_ACT
1952         if (q->rx_class == cl)
1953                 q->rx_class = NULL;
1954 #endif
1955
1956         cbq_unlink_class(cl);
1957         cbq_adjust_levels(cl->tparent);
1958         cl->defmap = 0;
1959         cbq_sync_defmap(cl);
1960
1961         cbq_rmprio(q, cl);
1962         sch_tree_unlock(sch);
1963
1964         BUG_ON(--cl->refcnt == 0);
1965         /*
1966          * This shouldn't happen: we "hold" one cops->get() when called
1967          * from tc_ctl_tclass; the destroy method is done from cops->put().
1968          */
1969
1970         return 0;
1971 }
1972
1973 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1974 {
1975         struct cbq_sched_data *q = qdisc_priv(sch);
1976         struct cbq_class *cl = (struct cbq_class *)arg;
1977
1978         if (cl == NULL)
1979                 cl = &q->link;
1980
1981         return &cl->filter_list;
1982 }
1983
1984 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1985                                      u32 classid)
1986 {
1987         struct cbq_sched_data *q = qdisc_priv(sch);
1988         struct cbq_class *p = (struct cbq_class *)parent;
1989         struct cbq_class *cl = cbq_class_lookup(q, classid);
1990
1991         if (cl) {
1992                 if (p && p->level <= cl->level)
1993                         return 0;
1994                 cl->filters++;
1995                 return (unsigned long)cl;
1996         }
1997         return 0;
1998 }
1999
2000 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2001 {
2002         struct cbq_class *cl = (struct cbq_class *)arg;
2003
2004         cl->filters--;
2005 }
2006
2007 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2008 {
2009         struct cbq_sched_data *q = qdisc_priv(sch);
2010         struct cbq_class *cl;
2011         struct hlist_node *n;
2012         unsigned int h;
2013
2014         if (arg->stop)
2015                 return;
2016
2017         for (h = 0; h < q->clhash.hashsize; h++) {
2018                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2019                         if (arg->count < arg->skip) {
2020                                 arg->count++;
2021                                 continue;
2022                         }
2023                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2024                                 arg->stop = 1;
2025                                 return;
2026                         }
2027                         arg->count++;
2028                 }
2029         }
2030 }
2031
2032 static const struct Qdisc_class_ops cbq_class_ops = {
2033         .graft          =       cbq_graft,
2034         .leaf           =       cbq_leaf,
2035         .qlen_notify    =       cbq_qlen_notify,
2036         .get            =       cbq_get,
2037         .put            =       cbq_put,
2038         .change         =       cbq_change_class,
2039         .delete         =       cbq_delete,
2040         .walk           =       cbq_walk,
2041         .tcf_chain      =       cbq_find_tcf,
2042         .bind_tcf       =       cbq_bind_filter,
2043         .unbind_tcf     =       cbq_unbind_filter,
2044         .dump           =       cbq_dump_class,
2045         .dump_stats     =       cbq_dump_class_stats,
2046 };
2047
2048 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2049         .next           =       NULL,
2050         .cl_ops         =       &cbq_class_ops,
2051         .id             =       "cbq",
2052         .priv_size      =       sizeof(struct cbq_sched_data),
2053         .enqueue        =       cbq_enqueue,
2054         .dequeue        =       cbq_dequeue,
2055         .peek           =       qdisc_peek_dequeued,
2056         .drop           =       cbq_drop,
2057         .init           =       cbq_init,
2058         .reset          =       cbq_reset,
2059         .destroy        =       cbq_destroy,
2060         .change         =       NULL,
2061         .dump           =       cbq_dump,
2062         .dump_stats     =       cbq_dump_stats,
2063         .owner          =       THIS_MODULE,
2064 };
2065
2066 static int __init cbq_module_init(void)
2067 {
2068         return register_qdisc(&cbq_qdisc_ops);
2069 }
2070 static void __exit cbq_module_exit(void)
2071 {
2072         unregister_qdisc(&cbq_qdisc_ops);
2073 }
2074 module_init(cbq_module_init)
2075 module_exit(cbq_module_exit)
2076 MODULE_LICENSE("GPL");