net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int uninitialized_var(ret);
 374         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 375
 376 #ifdef CONFIG_NET_CLS_ACT
 377         q->rx_class = cl;
 378 #endif
 379         if (cl == NULL) {
 380                 if (ret & __NET_XMIT_BYPASS)
 381                         sch->qstats.drops++;
 382                 kfree_skb(skb);
 383                 return ret;
 384         }
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         cl->q->__parent = sch;
 388 #endif
 389         ret = qdisc_enqueue(skb, cl->q);
 390         if (ret == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes += qdisc_pkt_len(skb);
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         if (net_xmit_drop_count(ret)) {
 401                 sch->qstats.drops++;
 402                 cbq_mark_toplevel(q, cl);
 403                 cl->qstats.drops++;
 404         }
 405         return ret;
 406 }
 407
 408 static int
 409 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 410 {
 411         struct cbq_sched_data *q = qdisc_priv(sch);
 412         struct cbq_class *cl;
 413         int ret;
 414
 415         if ((cl = q->tx_class) == NULL) {
 416                 kfree_skb(skb);
 417                 sch->qstats.drops++;
 418                 return NET_XMIT_CN;
 419         }
 420         q->tx_class = NULL;
 421
 422         cbq_mark_toplevel(q, cl);
 423
 424 #ifdef CONFIG_NET_CLS_ACT
 425         q->rx_class = cl;
 426         cl->q->__parent = sch;
 427 #endif
 428         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 429                 sch->q.qlen++;
 430                 sch->qstats.requeues++;
 431                 if (!cl->next_alive)
 432                         cbq_activate_class(cl);
 433                 return 0;
 434         }
 435         if (net_xmit_drop_count(ret)) {
 436                 sch->qstats.drops++;
 437                 cl->qstats.drops++;
 438         }
 439         return ret;
 440 }
 441
 442 /* Overlimit actions */
 443
 444 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 445
 446 static void cbq_ovl_classic(struct cbq_class *cl)
 447 {
 448         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 449         psched_tdiff_t delay = cl->undertime - q->now;
 450
 451         if (!cl->delayed) {
 452                 delay += cl->offtime;
 453
 454                 /*
 455                    Class goes to sleep, so that it will have no
 456                    chance to work avgidle. Let's forgive it 8)
 457
 458                    BTW cbq-2.0 has a crap in this
 459                    place, apparently they forgot to shift it by cl->ewma_log.
 460                  */
 461                 if (cl->avgidle < 0)
 462                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 463                 if (cl->avgidle < cl->minidle)
 464                         cl->avgidle = cl->minidle;
 465                 if (delay <= 0)
 466                         delay = 1;
 467                 cl->undertime = q->now + delay;
 468
 469                 cl->xstats.overactions++;
 470                 cl->delayed = 1;
 471         }
 472         if (q->wd_expires == 0 || q->wd_expires > delay)
 473                 q->wd_expires = delay;
 474
 475         /* Dirty work! We must schedule wakeups based on
 476            real available rate, rather than leaf rate,
 477            which may be tiny (even zero).
 478          */
 479         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 480                 struct cbq_class *b;
 481                 psched_tdiff_t base_delay = q->wd_expires;
 482
 483                 for (b = cl->borrow; b; b = b->borrow) {
 484                         delay = b->undertime - q->now;
 485                         if (delay < base_delay) {
 486                                 if (delay <= 0)
 487                                         delay = 1;
 488                                 base_delay = delay;
 489                         }
 490                 }
 491
 492                 q->wd_expires = base_delay;
 493         }
 494 }
 495
 496 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 497    they go overlimit
 498  */
 499
 500 static void cbq_ovl_rclassic(struct cbq_class *cl)
 501 {
 502         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 503         struct cbq_class *this = cl;
 504
 505         do {
 506                 if (cl->level > q->toplevel) {
 507                         cl = NULL;
 508                         break;
 509                 }
 510         } while ((cl = cl->borrow) != NULL);
 511
 512         if (cl == NULL)
 513                 cl = this;
 514         cbq_ovl_classic(cl);
 515 }
 516
 517 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 518
 519 static void cbq_ovl_delay(struct cbq_class *cl)
 520 {
 521         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 522         psched_tdiff_t delay = cl->undertime - q->now;
 523
 524         if (test_bit(__QDISC_STATE_DEACTIVATED,
 525                      &qdisc_root_sleeping(cl->qdisc)->state))
 526                 return;
 527
 528         if (!cl->delayed) {
 529                 psched_time_t sched = q->now;
 530                 ktime_t expires;
 531
 532                 delay += cl->offtime;
 533                 if (cl->avgidle < 0)
 534                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 535                 if (cl->avgidle < cl->minidle)
 536                         cl->avgidle = cl->minidle;
 537                 cl->undertime = q->now + delay;
 538
 539                 if (delay > 0) {
 540                         sched += delay + cl->penalty;
 541                         cl->penalized = sched;
 542                         cl->cpriority = TC_CBQ_MAXPRIO;
 543                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 544
 545                         expires = ktime_set(0, 0);
 546                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 547                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 548                             ktime_to_ns(ktime_sub(
 549                                         hrtimer_get_expires(&q->delay_timer),
 550                                         expires)) > 0)
 551                                 hrtimer_set_expires(&q->delay_timer, expires);
 552                         hrtimer_restart(&q->delay_timer);
 553                         cl->delayed = 1;
 554                         cl->xstats.overactions++;
 555                         return;
 556                 }
 557                 delay = 1;
 558         }
 559         if (q->wd_expires == 0 || q->wd_expires > delay)
 560                 q->wd_expires = delay;
 561 }
 562
 563 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 564
 565 static void cbq_ovl_lowprio(struct cbq_class *cl)
 566 {
 567         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 568
 569         cl->penalized = q->now + cl->penalty;
 570
 571         if (cl->cpriority != cl->priority2) {
 572                 cl->cpriority = cl->priority2;
 573                 q->pmask |= (1<<cl->cpriority);
 574                 cl->xstats.overactions++;
 575         }
 576         cbq_ovl_classic(cl);
 577 }
 578
 579 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 580
 581 static void cbq_ovl_drop(struct cbq_class *cl)
 582 {
 583         if (cl->q->ops->drop)
 584                 if (cl->q->ops->drop(cl->q))
 585                         cl->qdisc->q.qlen--;
 586         cl->xstats.overactions++;
 587         cbq_ovl_classic(cl);
 588 }
 589
 590 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 591                                        psched_time_t now)
 592 {
 593         struct cbq_class *cl;
 594         struct cbq_class *cl_prev = q->active[prio];
 595         psched_time_t sched = now;
 596
 597         if (cl_prev == NULL)
 598                 return 0;
 599
 600         do {
 601                 cl = cl_prev->next_alive;
 602                 if (now - cl->penalized > 0) {
 603                         cl_prev->next_alive = cl->next_alive;
 604                         cl->next_alive = NULL;
 605                         cl->cpriority = cl->priority;
 606                         cl->delayed = 0;
 607                         cbq_activate_class(cl);
 608
 609                         if (cl == q->active[prio]) {
 610                                 q->active[prio] = cl_prev;
 611                                 if (cl == q->active[prio]) {
 612                                         q->active[prio] = NULL;
 613                                         return 0;
 614                                 }
 615                         }
 616
 617                         cl = cl_prev->next_alive;
 618                 } else if (sched - cl->penalized > 0)
 619                         sched = cl->penalized;
 620         } while ((cl_prev = cl) != q->active[prio]);
 621
 622         return sched - now;
 623 }
 624
 625 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 626 {
 627         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 628                                                 delay_timer);
 629         struct Qdisc *sch = q->watchdog.qdisc;
 630         psched_time_t now;
 631         psched_tdiff_t delay = 0;
 632         unsigned pmask;
 633
 634         now = psched_get_time();
 635
 636         pmask = q->pmask;
 637         q->pmask = 0;
 638
 639         while (pmask) {
 640                 int prio = ffz(~pmask);
 641                 psched_tdiff_t tmp;
 642
 643                 pmask &= ~(1<<prio);
 644
 645                 tmp = cbq_undelay_prio(q, prio, now);
 646                 if (tmp > 0) {
 647                         q->pmask |= 1<<prio;
 648                         if (tmp < delay || delay == 0)
 649                                 delay = tmp;
 650                 }
 651         }
 652
 653         if (delay) {
 654                 ktime_t time;
 655
 656                 time = ktime_set(0, 0);
 657                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 658                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 659         }
 660
 661         sch->flags &= ~TCQ_F_THROTTLED;
 662         __netif_schedule(qdisc_root(sch));
 663         return HRTIMER_NORESTART;
 664 }
 665
 666 #ifdef CONFIG_NET_CLS_ACT
 667 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 668 {
 669         struct Qdisc *sch = child->__parent;
 670         struct cbq_sched_data *q = qdisc_priv(sch);
 671         struct cbq_class *cl = q->rx_class;
 672
 673         q->rx_class = NULL;
 674
 675         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 676                 int ret;
 677
 678                 cbq_mark_toplevel(q, cl);
 679
 680                 q->rx_class = cl;
 681                 cl->q->__parent = sch;
 682
 683                 ret = qdisc_enqueue(skb, cl->q);
 684                 if (ret == NET_XMIT_SUCCESS) {
 685                         sch->q.qlen++;
 686                         sch->bstats.packets++;
 687                         sch->bstats.bytes += qdisc_pkt_len(skb);
 688                         if (!cl->next_alive)
 689                                 cbq_activate_class(cl);
 690                         return 0;
 691                 }
 692                 if (net_xmit_drop_count(ret))
 693                         sch->qstats.drops++;
 694                 return 0;
 695         }
 696
 697         sch->qstats.drops++;
 698         return -1;
 699 }
 700 #endif
 701
 702 /*
 703    It is mission critical procedure.
 704
 705    We "regenerate" toplevel cutoff, if transmitting class
 706    has backlog and it is not regulated. It is not part of
 707    original CBQ description, but looks more reasonable.
 708    Probably, it is wrong. This question needs further investigation.
 709 */
 710
 711 static __inline__ void
 712 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 713                     struct cbq_class *borrowed)
 714 {
 715         if (cl && q->toplevel >= borrowed->level) {
 716                 if (cl->q->q.qlen > 1) {
 717                         do {
 718                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 719                                         q->toplevel = borrowed->level;
 720                                         return;
 721                                 }
 722                         } while ((borrowed=borrowed->borrow) != NULL);
 723                 }
 724 #if 0
 725         /* It is not necessary now. Uncommenting it
 726            will save CPU cycles, but decrease fairness.
 727          */
 728                 q->toplevel = TC_CBQ_MAXLEVEL;
 729 #endif
 730         }
 731 }
 732
 733 static void
 734 cbq_update(struct cbq_sched_data *q)
 735 {
 736         struct cbq_class *this = q->tx_class;
 737         struct cbq_class *cl = this;
 738         int len = q->tx_len;
 739
 740         q->tx_class = NULL;
 741
 742         for ( ; cl; cl = cl->share) {
 743                 long avgidle = cl->avgidle;
 744                 long idle;
 745
 746                 cl->bstats.packets++;
 747                 cl->bstats.bytes += len;
 748
 749                 /*
 750                    (now - last) is total time between packet right edges.
 751                    (last_pktlen/rate) is "virtual" busy time, so that
 752
 753                          idle = (now - last) - last_pktlen/rate
 754                  */
 755
 756                 idle = q->now - cl->last;
 757                 if ((unsigned long)idle > 128*1024*1024) {
 758                         avgidle = cl->maxidle;
 759                 } else {
 760                         idle -= L2T(cl, len);
 761
 762                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 763                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 764                    cl->avgidle == true_avgidle/W,
 765                    hence:
 766                  */
 767                         avgidle += idle - (avgidle>>cl->ewma_log);
 768                 }
 769
 770                 if (avgidle <= 0) {
 771                         /* Overlimit or at-limit */
 772
 773                         if (avgidle < cl->minidle)
 774                                 avgidle = cl->minidle;
 775
 776                         cl->avgidle = avgidle;
 777
 778                         /* Calculate expected time, when this class
 779                            will be allowed to send.
 780                            It will occur, when:
 781                            (1-W)*true_avgidle + W*delay = 0, i.e.
 782                            idle = (1/W - 1)*(-true_avgidle)
 783                            or
 784                            idle = (1 - W)*(-cl->avgidle);
 785                          */
 786                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 787
 788                         /*
 789                            That is not all.
 790                            To maintain the rate allocated to the class,
 791                            we add to undertime virtual clock,
 792                            necessary to complete transmitted packet.
 793                            (len/phys_bandwidth has been already passed
 794                            to the moment of cbq_update)
 795                          */
 796
 797                         idle -= L2T(&q->link, len);
 798                         idle += L2T(cl, len);
 799
 800                         cl->undertime = q->now + idle;
 801                 } else {
 802                         /* Underlimit */
 803
 804                         cl->undertime = PSCHED_PASTPERFECT;
 805                         if (avgidle > cl->maxidle)
 806                                 cl->avgidle = cl->maxidle;
 807                         else
 808                                 cl->avgidle = avgidle;
 809                 }
 810                 cl->last = q->now;
 811         }
 812
 813         cbq_update_toplevel(q, this, q->tx_borrowed);
 814 }
 815
 816 static __inline__ struct cbq_class *
 817 cbq_under_limit(struct cbq_class *cl)
 818 {
 819         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 820         struct cbq_class *this_cl = cl;
 821
 822         if (cl->tparent == NULL)
 823                 return cl;
 824
 825         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 826                 cl->delayed = 0;
 827                 return cl;
 828         }
 829
 830         do {
 831                 /* It is very suspicious place. Now overlimit
 832                    action is generated for not bounded classes
 833                    only if link is completely congested.
 834                    Though it is in agree with ancestor-only paradigm,
 835                    it looks very stupid. Particularly,
 836                    it means that this chunk of code will either
 837                    never be called or result in strong amplification
 838                    of burstiness. Dangerous, silly, and, however,
 839                    no another solution exists.
 840                  */
 841                 if ((cl = cl->borrow) == NULL) {
 842                         this_cl->qstats.overlimits++;
 843                         this_cl->overlimit(this_cl);
 844                         return NULL;
 845                 }
 846                 if (cl->level > q->toplevel)
 847                         return NULL;
 848         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 849
 850         cl->delayed = 0;
 851         return cl;
 852 }
 853
 854 static __inline__ struct sk_buff *
 855 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 856 {
 857         struct cbq_sched_data *q = qdisc_priv(sch);
 858         struct cbq_class *cl_tail, *cl_prev, *cl;
 859         struct sk_buff *skb;
 860         int deficit;
 861
 862         cl_tail = cl_prev = q->active[prio];
 863         cl = cl_prev->next_alive;
 864
 865         do {
 866                 deficit = 0;
 867
 868                 /* Start round */
 869                 do {
 870                         struct cbq_class *borrow = cl;
 871
 872                         if (cl->q->q.qlen &&
 873                             (borrow = cbq_under_limit(cl)) == NULL)
 874                                 goto skip_class;
 875
 876                         if (cl->deficit <= 0) {
 877                                 /* Class exhausted its allotment per
 878                                    this round. Switch to the next one.
 879                                  */
 880                                 deficit = 1;
 881                                 cl->deficit += cl->quantum;
 882                                 goto next_class;
 883                         }
 884
 885                         skb = cl->q->dequeue(cl->q);
 886
 887                         /* Class did not give us any skb :-(
 888                            It could occur even if cl->q->q.qlen != 0
 889                            f.e. if cl->q == "tbf"
 890                          */
 891                         if (skb == NULL)
 892                                 goto skip_class;
 893
 894                         cl->deficit -= qdisc_pkt_len(skb);
 895                         q->tx_class = cl;
 896                         q->tx_borrowed = borrow;
 897                         if (borrow != cl) {
 898 #ifndef CBQ_XSTATS_BORROWS_BYTES
 899                                 borrow->xstats.borrows++;
 900                                 cl->xstats.borrows++;
 901 #else
 902                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 903                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 904 #endif
 905                         }
 906                         q->tx_len = qdisc_pkt_len(skb);
 907
 908                         if (cl->deficit <= 0) {
 909                                 q->active[prio] = cl;
 910                                 cl = cl->next_alive;
 911                                 cl->deficit += cl->quantum;
 912                         }
 913                         return skb;
 914
 915 skip_class:
 916                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 917                                 /* Class is empty or penalized.
 918                                    Unlink it from active chain.
 919                                  */
 920                                 cl_prev->next_alive = cl->next_alive;
 921                                 cl->next_alive = NULL;
 922
 923                                 /* Did cl_tail point to it? */
 924                                 if (cl == cl_tail) {
 925                                         /* Repair it! */
 926                                         cl_tail = cl_prev;
 927
 928                                         /* Was it the last class in this band? */
 929                                         if (cl == cl_tail) {
 930                                                 /* Kill the band! */
 931                                                 q->active[prio] = NULL;
 932                                                 q->activemask &= ~(1<<prio);
 933                                                 if (cl->q->q.qlen)
 934                                                         cbq_activate_class(cl);
 935                                                 return NULL;
 936                                         }
 937
 938                                         q->active[prio] = cl_tail;
 939                                 }
 940                                 if (cl->q->q.qlen)
 941                                         cbq_activate_class(cl);
 942
 943                                 cl = cl_prev;
 944                         }
 945
 946 next_class:
 947                         cl_prev = cl;
 948                         cl = cl->next_alive;
 949                 } while (cl_prev != cl_tail);
 950         } while (deficit);
 951
 952         q->active[prio] = cl_prev;
 953
 954         return NULL;
 955 }
 956
 957 static __inline__ struct sk_buff *
 958 cbq_dequeue_1(struct Qdisc *sch)
 959 {
 960         struct cbq_sched_data *q = qdisc_priv(sch);
 961         struct sk_buff *skb;
 962         unsigned activemask;
 963
 964         activemask = q->activemask&0xFF;
 965         while (activemask) {
 966                 int prio = ffz(~activemask);
 967                 activemask &= ~(1<<prio);
 968                 skb = cbq_dequeue_prio(sch, prio);
 969                 if (skb)
 970                         return skb;
 971         }
 972         return NULL;
 973 }
 974
 975 static struct sk_buff *
 976 cbq_dequeue(struct Qdisc *sch)
 977 {
 978         struct sk_buff *skb;
 979         struct cbq_sched_data *q = qdisc_priv(sch);
 980         psched_time_t now;
 981         psched_tdiff_t incr;
 982
 983         now = psched_get_time();
 984         incr = now - q->now_rt;
 985
 986         if (q->tx_class) {
 987                 psched_tdiff_t incr2;
 988                 /* Time integrator. We calculate EOS time
 989                    by adding expected packet transmission time.
 990                    If real time is greater, we warp artificial clock,
 991                    so that:
 992
 993                    cbq_time = max(real_time, work);
 994                  */
 995                 incr2 = L2T(&q->link, q->tx_len);
 996                 q->now += incr2;
 997                 cbq_update(q);
 998                 if ((incr -= incr2) < 0)
 999                         incr = 0;
1000         }
1001         q->now += incr;
1002         q->now_rt = now;
1003
1004         for (;;) {
1005                 q->wd_expires = 0;
1006
1007                 skb = cbq_dequeue_1(sch);
1008                 if (skb) {
1009                         sch->q.qlen--;
1010                         sch->flags &= ~TCQ_F_THROTTLED;
1011                         return skb;
1012                 }
1013
1014                 /* All the classes are overlimit.
1015
1016                    It is possible, if:
1017
1018                    1. Scheduler is empty.
1019                    2. Toplevel cutoff inhibited borrowing.
1020                    3. Root class is overlimit.
1021
1022                    Reset 2d and 3d conditions and retry.
1023
1024                    Note, that NS and cbq-2.0 are buggy, peeking
1025                    an arbitrary class is appropriate for ancestor-only
1026                    sharing, but not for toplevel algorithm.
1027
1028                    Our version is better, but slower, because it requires
1029                    two passes, but it is unavoidable with top-level sharing.
1030                 */
1031
1032                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1033                     q->link.undertime == PSCHED_PASTPERFECT)
1034                         break;
1035
1036                 q->toplevel = TC_CBQ_MAXLEVEL;
1037                 q->link.undertime = PSCHED_PASTPERFECT;
1038         }
1039
1040         /* No packets in scheduler or nobody wants to give them to us :-(
1041            Sigh... start watchdog timer in the last case. */
1042
1043         if (sch->q.qlen) {
1044                 sch->qstats.overlimits++;
1045                 if (q->wd_expires)
1046                         qdisc_watchdog_schedule(&q->watchdog,
1047                                                 now + q->wd_expires);
1048         }
1049         return NULL;
1050 }
1051
1052 /* CBQ class maintanance routines */
1053
1054 static void cbq_adjust_levels(struct cbq_class *this)
1055 {
1056         if (this == NULL)
1057                 return;
1058
1059         do {
1060                 int level = 0;
1061                 struct cbq_class *cl;
1062
1063                 if ((cl = this->children) != NULL) {
1064                         do {
1065                                 if (cl->level > level)
1066                                         level = cl->level;
1067                         } while ((cl = cl->sibling) != this->children);
1068                 }
1069                 this->level = level+1;
1070         } while ((this = this->tparent) != NULL);
1071 }
1072
1073 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1074 {
1075         struct cbq_class *cl;
1076         struct hlist_node *n;
1077         unsigned int h;
1078
1079         if (q->quanta[prio] == 0)
1080                 return;
1081
1082         for (h = 0; h < q->clhash.hashsize; h++) {
1083                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1084                         /* BUGGGG... Beware! This expression suffer of
1085                            arithmetic overflows!
1086                          */
1087                         if (cl->priority == prio) {
1088                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1089                                         q->quanta[prio];
1090                         }
1091                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1092                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1093                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1094                         }
1095                 }
1096         }
1097 }
1098
1099 static void cbq_sync_defmap(struct cbq_class *cl)
1100 {
1101         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1102         struct cbq_class *split = cl->split;
1103         unsigned h;
1104         int i;
1105
1106         if (split == NULL)
1107                 return;
1108
1109         for (i=0; i<=TC_PRIO_MAX; i++) {
1110                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1111                         split->defaults[i] = NULL;
1112         }
1113
1114         for (i=0; i<=TC_PRIO_MAX; i++) {
1115                 int level = split->level;
1116
1117                 if (split->defaults[i])
1118                         continue;
1119
1120                 for (h = 0; h < q->clhash.hashsize; h++) {
1121                         struct hlist_node *n;
1122                         struct cbq_class *c;
1123
1124                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1125                                              common.hnode) {
1126                                 if (c->split == split && c->level < level &&
1127                                     c->defmap&(1<<i)) {
1128                                         split->defaults[i] = c;
1129                                         level = c->level;
1130                                 }
1131                         }
1132                 }
1133         }
1134 }
1135
1136 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1137 {
1138         struct cbq_class *split = NULL;
1139
1140         if (splitid == 0) {
1141                 if ((split = cl->split) == NULL)
1142                         return;
1143                 splitid = split->common.classid;
1144         }
1145
1146         if (split == NULL || split->common.classid != splitid) {
1147                 for (split = cl->tparent; split; split = split->tparent)
1148                         if (split->common.classid == splitid)
1149                                 break;
1150         }
1151
1152         if (split == NULL)
1153                 return;
1154
1155         if (cl->split != split) {
1156                 cl->defmap = 0;
1157                 cbq_sync_defmap(cl);
1158                 cl->split = split;
1159                 cl->defmap = def&mask;
1160         } else
1161                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1162
1163         cbq_sync_defmap(cl);
1164 }
1165
1166 static void cbq_unlink_class(struct cbq_class *this)
1167 {
1168         struct cbq_class *cl, **clp;
1169         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1170
1171         qdisc_class_hash_remove(&q->clhash, &this->common);
1172
1173         if (this->tparent) {
1174                 clp=&this->sibling;
1175                 cl = *clp;
1176                 do {
1177                         if (cl == this) {
1178                                 *clp = cl->sibling;
1179                                 break;
1180                         }
1181                         clp = &cl->sibling;
1182                 } while ((cl = *clp) != this->sibling);
1183
1184                 if (this->tparent->children == this) {
1185                         this->tparent->children = this->sibling;
1186                         if (this->sibling == this)
1187                                 this->tparent->children = NULL;
1188                 }
1189         } else {
1190                 WARN_ON(this->sibling != this);
1191         }
1192 }
1193
1194 static void cbq_link_class(struct cbq_class *this)
1195 {
1196         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1197         struct cbq_class *parent = this->tparent;
1198
1199         this->sibling = this;
1200         qdisc_class_hash_insert(&q->clhash, &this->common);
1201
1202         if (parent == NULL)
1203                 return;
1204
1205         if (parent->children == NULL) {
1206                 parent->children = this;
1207         } else {
1208                 this->sibling = parent->children->sibling;
1209                 parent->children->sibling = this;
1210         }
1211 }
1212
1213 static unsigned int cbq_drop(struct Qdisc* sch)
1214 {
1215         struct cbq_sched_data *q = qdisc_priv(sch);
1216         struct cbq_class *cl, *cl_head;
1217         int prio;
1218         unsigned int len;
1219
1220         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1221                 if ((cl_head = q->active[prio]) == NULL)
1222                         continue;
1223
1224                 cl = cl_head;
1225                 do {
1226                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1227                                 sch->q.qlen--;
1228                                 if (!cl->q->q.qlen)
1229                                         cbq_deactivate_class(cl);
1230                                 return len;
1231                         }
1232                 } while ((cl = cl->next_alive) != cl_head);
1233         }
1234         return 0;
1235 }
1236
1237 static void
1238 cbq_reset(struct Qdisc* sch)
1239 {
1240         struct cbq_sched_data *q = qdisc_priv(sch);
1241         struct cbq_class *cl;
1242         struct hlist_node *n;
1243         int prio;
1244         unsigned h;
1245
1246         q->activemask = 0;
1247         q->pmask = 0;
1248         q->tx_class = NULL;
1249         q->tx_borrowed = NULL;
1250         qdisc_watchdog_cancel(&q->watchdog);
1251         hrtimer_cancel(&q->delay_timer);
1252         q->toplevel = TC_CBQ_MAXLEVEL;
1253         q->now = psched_get_time();
1254         q->now_rt = q->now;
1255
1256         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1257                 q->active[prio] = NULL;
1258
1259         for (h = 0; h < q->clhash.hashsize; h++) {
1260                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1261                         qdisc_reset(cl->q);
1262
1263                         cl->next_alive = NULL;
1264                         cl->undertime = PSCHED_PASTPERFECT;
1265                         cl->avgidle = cl->maxidle;
1266                         cl->deficit = cl->quantum;
1267                         cl->cpriority = cl->priority;
1268                 }
1269         }
1270         sch->q.qlen = 0;
1271 }
1272
1273
1274 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1275 {
1276         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1277                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1278                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1279         }
1280         if (lss->change&TCF_CBQ_LSS_EWMA)
1281                 cl->ewma_log = lss->ewma_log;
1282         if (lss->change&TCF_CBQ_LSS_AVPKT)
1283                 cl->avpkt = lss->avpkt;
1284         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1285                 cl->minidle = -(long)lss->minidle;
1286         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1287                 cl->maxidle = lss->maxidle;
1288                 cl->avgidle = lss->maxidle;
1289         }
1290         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1291                 cl->offtime = lss->offtime;
1292         return 0;
1293 }
1294
1295 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1296 {
1297         q->nclasses[cl->priority]--;
1298         q->quanta[cl->priority] -= cl->weight;
1299         cbq_normalize_quanta(q, cl->priority);
1300 }
1301
1302 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1303 {
1304         q->nclasses[cl->priority]++;
1305         q->quanta[cl->priority] += cl->weight;
1306         cbq_normalize_quanta(q, cl->priority);
1307 }
1308
1309 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1310 {
1311         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1312
1313         if (wrr->allot)
1314                 cl->allot = wrr->allot;
1315         if (wrr->weight)
1316                 cl->weight = wrr->weight;
1317         if (wrr->priority) {
1318                 cl->priority = wrr->priority-1;
1319                 cl->cpriority = cl->priority;
1320                 if (cl->priority >= cl->priority2)
1321                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1322         }
1323
1324         cbq_addprio(q, cl);
1325         return 0;
1326 }
1327
1328 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1329 {
1330         switch (ovl->strategy) {
1331         case TC_CBQ_OVL_CLASSIC:
1332                 cl->overlimit = cbq_ovl_classic;
1333                 break;
1334         case TC_CBQ_OVL_DELAY:
1335                 cl->overlimit = cbq_ovl_delay;
1336                 break;
1337         case TC_CBQ_OVL_LOWPRIO:
1338                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1339                     ovl->priority2-1 <= cl->priority)
1340                         return -EINVAL;
1341                 cl->priority2 = ovl->priority2-1;
1342                 cl->overlimit = cbq_ovl_lowprio;
1343                 break;
1344         case TC_CBQ_OVL_DROP:
1345                 cl->overlimit = cbq_ovl_drop;
1346                 break;
1347         case TC_CBQ_OVL_RCLASSIC:
1348                 cl->overlimit = cbq_ovl_rclassic;
1349                 break;
1350         default:
1351                 return -EINVAL;
1352         }
1353         cl->penalty = ovl->penalty;
1354         return 0;
1355 }
1356
1357 #ifdef CONFIG_NET_CLS_ACT
1358 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1359 {
1360         cl->police = p->police;
1361
1362         if (cl->q->handle) {
1363                 if (p->police == TC_POLICE_RECLASSIFY)
1364                         cl->q->reshape_fail = cbq_reshape_fail;
1365                 else
1366                         cl->q->reshape_fail = NULL;
1367         }
1368         return 0;
1369 }
1370 #endif
1371
1372 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1373 {
1374         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1375         return 0;
1376 }
1377
1378 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1379         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1380         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1381         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1382         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1383         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1384         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1385         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1386 };
1387
1388 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1389 {
1390         struct cbq_sched_data *q = qdisc_priv(sch);
1391         struct nlattr *tb[TCA_CBQ_MAX + 1];
1392         struct tc_ratespec *r;
1393         int err;
1394
1395         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1396         if (err < 0)
1397                 return err;
1398
1399         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1400                 return -EINVAL;
1401
1402         r = nla_data(tb[TCA_CBQ_RATE]);
1403
1404         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1405                 return -EINVAL;
1406
1407         err = qdisc_class_hash_init(&q->clhash);
1408         if (err < 0)
1409                 goto put_rtab;
1410
1411         q->link.refcnt = 1;
1412         q->link.sibling = &q->link;
1413         q->link.common.classid = sch->handle;
1414         q->link.qdisc = sch;
1415         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1416                                             &pfifo_qdisc_ops,
1417                                             sch->handle)))
1418                 q->link.q = &noop_qdisc;
1419
1420         q->link.priority = TC_CBQ_MAXPRIO-1;
1421         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1422         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1423         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1424         q->link.overlimit = cbq_ovl_classic;
1425         q->link.allot = psched_mtu(qdisc_dev(sch));
1426         q->link.quantum = q->link.allot;
1427         q->link.weight = q->link.R_tab->rate.rate;
1428
1429         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1430         q->link.avpkt = q->link.allot/2;
1431         q->link.minidle = -0x7FFFFFFF;
1432
1433         qdisc_watchdog_init(&q->watchdog, sch);
1434         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1435         q->delay_timer.function = cbq_undelay;
1436         q->toplevel = TC_CBQ_MAXLEVEL;
1437         q->now = psched_get_time();
1438         q->now_rt = q->now;
1439
1440         cbq_link_class(&q->link);
1441
1442         if (tb[TCA_CBQ_LSSOPT])
1443                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1444
1445         cbq_addprio(q, &q->link);
1446         return 0;
1447
1448 put_rtab:
1449         qdisc_put_rtab(q->link.R_tab);
1450         return err;
1451 }
1452
1453 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1454 {
1455         unsigned char *b = skb_tail_pointer(skb);
1456
1457         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1458         return skb->len;
1459
1460 nla_put_failure:
1461         nlmsg_trim(skb, b);
1462         return -1;
1463 }
1464
1465 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1466 {
1467         unsigned char *b = skb_tail_pointer(skb);
1468         struct tc_cbq_lssopt opt;
1469
1470         opt.flags = 0;
1471         if (cl->borrow == NULL)
1472                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1473         if (cl->share == NULL)
1474                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1475         opt.ewma_log = cl->ewma_log;
1476         opt.level = cl->level;
1477         opt.avpkt = cl->avpkt;
1478         opt.maxidle = cl->maxidle;
1479         opt.minidle = (u32)(-cl->minidle);
1480         opt.offtime = cl->offtime;
1481         opt.change = ~0;
1482         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1483         return skb->len;
1484
1485 nla_put_failure:
1486         nlmsg_trim(skb, b);
1487         return -1;
1488 }
1489
1490 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1491 {
1492         unsigned char *b = skb_tail_pointer(skb);
1493         struct tc_cbq_wrropt opt;
1494
1495         opt.flags = 0;
1496         opt.allot = cl->allot;
1497         opt.priority = cl->priority+1;
1498         opt.cpriority = cl->cpriority+1;
1499         opt.weight = cl->weight;
1500         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1501         return skb->len;
1502
1503 nla_put_failure:
1504         nlmsg_trim(skb, b);
1505         return -1;
1506 }
1507
1508 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1509 {
1510         unsigned char *b = skb_tail_pointer(skb);
1511         struct tc_cbq_ovl opt;
1512
1513         opt.strategy = cl->ovl_strategy;
1514         opt.priority2 = cl->priority2+1;
1515         opt.pad = 0;
1516         opt.penalty = cl->penalty;
1517         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1518         return skb->len;
1519
1520 nla_put_failure:
1521         nlmsg_trim(skb, b);
1522         return -1;
1523 }
1524
1525 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1526 {
1527         unsigned char *b = skb_tail_pointer(skb);
1528         struct tc_cbq_fopt opt;
1529
1530         if (cl->split || cl->defmap) {
1531                 opt.split = cl->split ? cl->split->common.classid : 0;
1532                 opt.defmap = cl->defmap;
1533                 opt.defchange = ~0;
1534                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1535         }
1536         return skb->len;
1537
1538 nla_put_failure:
1539         nlmsg_trim(skb, b);
1540         return -1;
1541 }
1542
1543 #ifdef CONFIG_NET_CLS_ACT
1544 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1545 {
1546         unsigned char *b = skb_tail_pointer(skb);
1547         struct tc_cbq_police opt;
1548
1549         if (cl->police) {
1550                 opt.police = cl->police;
1551                 opt.__res1 = 0;
1552                 opt.__res2 = 0;
1553                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1554         }
1555         return skb->len;
1556
1557 nla_put_failure:
1558         nlmsg_trim(skb, b);
1559         return -1;
1560 }
1561 #endif
1562
1563 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1564 {
1565         if (cbq_dump_lss(skb, cl) < 0 ||
1566             cbq_dump_rate(skb, cl) < 0 ||
1567             cbq_dump_wrr(skb, cl) < 0 ||
1568             cbq_dump_ovl(skb, cl) < 0 ||
1569 #ifdef CONFIG_NET_CLS_ACT
1570             cbq_dump_police(skb, cl) < 0 ||
1571 #endif
1572             cbq_dump_fopt(skb, cl) < 0)
1573                 return -1;
1574         return 0;
1575 }
1576
1577 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1578 {
1579         struct cbq_sched_data *q = qdisc_priv(sch);
1580         struct nlattr *nest;
1581
1582         nest = nla_nest_start(skb, TCA_OPTIONS);
1583         if (nest == NULL)
1584                 goto nla_put_failure;
1585         if (cbq_dump_attr(skb, &q->link) < 0)
1586                 goto nla_put_failure;
1587         nla_nest_end(skb, nest);
1588         return skb->len;
1589
1590 nla_put_failure:
1591         nla_nest_cancel(skb, nest);
1592         return -1;
1593 }
1594
1595 static int
1596 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1597 {
1598         struct cbq_sched_data *q = qdisc_priv(sch);
1599
1600         q->link.xstats.avgidle = q->link.avgidle;
1601         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1602 }
1603
1604 static int
1605 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1606                struct sk_buff *skb, struct tcmsg *tcm)
1607 {
1608         struct cbq_class *cl = (struct cbq_class*)arg;
1609         struct nlattr *nest;
1610
1611         if (cl->tparent)
1612                 tcm->tcm_parent = cl->tparent->common.classid;
1613         else
1614                 tcm->tcm_parent = TC_H_ROOT;
1615         tcm->tcm_handle = cl->common.classid;
1616         tcm->tcm_info = cl->q->handle;
1617
1618         nest = nla_nest_start(skb, TCA_OPTIONS);
1619         if (nest == NULL)
1620                 goto nla_put_failure;
1621         if (cbq_dump_attr(skb, cl) < 0)
1622                 goto nla_put_failure;
1623         nla_nest_end(skb, nest);
1624         return skb->len;
1625
1626 nla_put_failure:
1627         nla_nest_cancel(skb, nest);
1628         return -1;
1629 }
1630
1631 static int
1632 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1633         struct gnet_dump *d)
1634 {
1635         struct cbq_sched_data *q = qdisc_priv(sch);
1636         struct cbq_class *cl = (struct cbq_class*)arg;
1637
1638         cl->qstats.qlen = cl->q->q.qlen;
1639         cl->xstats.avgidle = cl->avgidle;
1640         cl->xstats.undertime = 0;
1641
1642         if (cl->undertime != PSCHED_PASTPERFECT)
1643                 cl->xstats.undertime = cl->undertime - q->now;
1644
1645         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1646             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1647             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1648                 return -1;
1649
1650         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1651 }
1652
1653 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1654                      struct Qdisc **old)
1655 {
1656         struct cbq_class *cl = (struct cbq_class*)arg;
1657
1658         if (cl) {
1659                 if (new == NULL) {
1660                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1661                                                 &pfifo_qdisc_ops,
1662                                                 cl->common.classid);
1663                         if (new == NULL)
1664                                 return -ENOBUFS;
1665                 } else {
1666 #ifdef CONFIG_NET_CLS_ACT
1667                         if (cl->police == TC_POLICE_RECLASSIFY)
1668                                 new->reshape_fail = cbq_reshape_fail;
1669 #endif
1670                 }
1671                 sch_tree_lock(sch);
1672                 *old = xchg(&cl->q, new);
1673                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1674                 qdisc_reset(*old);
1675                 sch_tree_unlock(sch);
1676
1677                 return 0;
1678         }
1679         return -ENOENT;
1680 }
1681
1682 static struct Qdisc *
1683 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1684 {
1685         struct cbq_class *cl = (struct cbq_class*)arg;
1686
1687         return cl ? cl->q : NULL;
1688 }
1689
1690 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1691 {
1692         struct cbq_class *cl = (struct cbq_class *)arg;
1693
1694         if (cl->q->q.qlen == 0)
1695                 cbq_deactivate_class(cl);
1696 }
1697
1698 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1699 {
1700         struct cbq_sched_data *q = qdisc_priv(sch);
1701         struct cbq_class *cl = cbq_class_lookup(q, classid);
1702
1703         if (cl) {
1704                 cl->refcnt++;
1705                 return (unsigned long)cl;
1706         }
1707         return 0;
1708 }
1709
1710 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1711 {
1712         struct cbq_sched_data *q = qdisc_priv(sch);
1713
1714         WARN_ON(cl->filters);
1715
1716         tcf_destroy_chain(&cl->filter_list);
1717         qdisc_destroy(cl->q);
1718         qdisc_put_rtab(cl->R_tab);
1719         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1720         if (cl != &q->link)
1721                 kfree(cl);
1722 }
1723
1724 static void
1725 cbq_destroy(struct Qdisc* sch)
1726 {
1727         struct cbq_sched_data *q = qdisc_priv(sch);
1728         struct hlist_node *n, *next;
1729         struct cbq_class *cl;
1730         unsigned h;
1731
1732 #ifdef CONFIG_NET_CLS_ACT
1733         q->rx_class = NULL;
1734 #endif
1735         /*
1736          * Filters must be destroyed first because we don't destroy the
1737          * classes from root to leafs which means that filters can still
1738          * be bound to classes which have been destroyed already. --TGR '04
1739          */
1740         for (h = 0; h < q->clhash.hashsize; h++) {
1741                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1742                         tcf_destroy_chain(&cl->filter_list);
1743         }
1744         for (h = 0; h < q->clhash.hashsize; h++) {
1745                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1746                                           common.hnode)
1747                         cbq_destroy_class(sch, cl);
1748         }
1749         qdisc_class_hash_destroy(&q->clhash);
1750 }
1751
1752 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1753 {
1754         struct cbq_class *cl = (struct cbq_class*)arg;
1755
1756         if (--cl->refcnt == 0) {
1757 #ifdef CONFIG_NET_CLS_ACT
1758                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1759                 struct cbq_sched_data *q = qdisc_priv(sch);
1760
1761                 spin_lock_bh(root_lock);
1762                 if (q->rx_class == cl)
1763                         q->rx_class = NULL;
1764                 spin_unlock_bh(root_lock);
1765 #endif
1766
1767                 cbq_destroy_class(sch, cl);
1768         }
1769 }
1770
1771 static int
1772 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1773                  unsigned long *arg)
1774 {
1775         int err;
1776         struct cbq_sched_data *q = qdisc_priv(sch);
1777         struct cbq_class *cl = (struct cbq_class*)*arg;
1778         struct nlattr *opt = tca[TCA_OPTIONS];
1779         struct nlattr *tb[TCA_CBQ_MAX + 1];
1780         struct cbq_class *parent;
1781         struct qdisc_rate_table *rtab = NULL;
1782
1783         if (opt == NULL)
1784                 return -EINVAL;
1785
1786         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1787         if (err < 0)
1788                 return err;
1789
1790         if (cl) {
1791                 /* Check parent */
1792                 if (parentid) {
1793                         if (cl->tparent &&
1794                             cl->tparent->common.classid != parentid)
1795                                 return -EINVAL;
1796                         if (!cl->tparent && parentid != TC_H_ROOT)
1797                                 return -EINVAL;
1798                 }
1799
1800                 if (tb[TCA_CBQ_RATE]) {
1801                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1802                         if (rtab == NULL)
1803                                 return -EINVAL;
1804                 }
1805
1806                 /* Change class parameters */
1807                 sch_tree_lock(sch);
1808
1809                 if (cl->next_alive != NULL)
1810                         cbq_deactivate_class(cl);
1811
1812                 if (rtab) {
1813                         rtab = xchg(&cl->R_tab, rtab);
1814                         qdisc_put_rtab(rtab);
1815                 }
1816
1817                 if (tb[TCA_CBQ_LSSOPT])
1818                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1819
1820                 if (tb[TCA_CBQ_WRROPT]) {
1821                         cbq_rmprio(q, cl);
1822                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1823                 }
1824
1825                 if (tb[TCA_CBQ_OVL_STRATEGY])
1826                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1827
1828 #ifdef CONFIG_NET_CLS_ACT
1829                 if (tb[TCA_CBQ_POLICE])
1830                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1831 #endif
1832
1833                 if (tb[TCA_CBQ_FOPT])
1834                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1835
1836                 if (cl->q->q.qlen)
1837                         cbq_activate_class(cl);
1838
1839                 sch_tree_unlock(sch);
1840
1841                 if (tca[TCA_RATE])
1842                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1843                                               qdisc_root_sleeping_lock(sch),
1844                                               tca[TCA_RATE]);
1845                 return 0;
1846         }
1847
1848         if (parentid == TC_H_ROOT)
1849                 return -EINVAL;
1850
1851         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1852             tb[TCA_CBQ_LSSOPT] == NULL)
1853                 return -EINVAL;
1854
1855         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1856         if (rtab == NULL)
1857                 return -EINVAL;
1858
1859         if (classid) {
1860                 err = -EINVAL;
1861                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1862                         goto failure;
1863         } else {
1864                 int i;
1865                 classid = TC_H_MAKE(sch->handle,0x8000);
1866
1867                 for (i=0; i<0x8000; i++) {
1868                         if (++q->hgenerator >= 0x8000)
1869                                 q->hgenerator = 1;
1870                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1871                                 break;
1872                 }
1873                 err = -ENOSR;
1874                 if (i >= 0x8000)
1875                         goto failure;
1876                 classid = classid|q->hgenerator;
1877         }
1878
1879         parent = &q->link;
1880         if (parentid) {
1881                 parent = cbq_class_lookup(q, parentid);
1882                 err = -EINVAL;
1883                 if (parent == NULL)
1884                         goto failure;
1885         }
1886
1887         err = -ENOBUFS;
1888         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1889         if (cl == NULL)
1890                 goto failure;
1891         cl->R_tab = rtab;
1892         rtab = NULL;
1893         cl->refcnt = 1;
1894         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1895                                         &pfifo_qdisc_ops, classid)))
1896                 cl->q = &noop_qdisc;
1897         cl->common.classid = classid;
1898         cl->tparent = parent;
1899         cl->qdisc = sch;
1900         cl->allot = parent->allot;
1901         cl->quantum = cl->allot;
1902         cl->weight = cl->R_tab->rate.rate;
1903
1904         sch_tree_lock(sch);
1905         cbq_link_class(cl);
1906         cl->borrow = cl->tparent;
1907         if (cl->tparent != &q->link)
1908                 cl->share = cl->tparent;
1909         cbq_adjust_levels(parent);
1910         cl->minidle = -0x7FFFFFFF;
1911         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1912         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1913         if (cl->ewma_log==0)
1914                 cl->ewma_log = q->link.ewma_log;
1915         if (cl->maxidle==0)
1916                 cl->maxidle = q->link.maxidle;
1917         if (cl->avpkt==0)
1918                 cl->avpkt = q->link.avpkt;
1919         cl->overlimit = cbq_ovl_classic;
1920         if (tb[TCA_CBQ_OVL_STRATEGY])
1921                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1922 #ifdef CONFIG_NET_CLS_ACT
1923         if (tb[TCA_CBQ_POLICE])
1924                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1925 #endif
1926         if (tb[TCA_CBQ_FOPT])
1927                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1928         sch_tree_unlock(sch);
1929
1930         qdisc_class_hash_grow(sch, &q->clhash);
1931
1932         if (tca[TCA_RATE])
1933                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1934                                   qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1935
1936         *arg = (unsigned long)cl;
1937         return 0;
1938
1939 failure:
1940         qdisc_put_rtab(rtab);
1941         return err;
1942 }
1943
1944 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1945 {
1946         struct cbq_sched_data *q = qdisc_priv(sch);
1947         struct cbq_class *cl = (struct cbq_class*)arg;
1948         unsigned int qlen;
1949
1950         if (cl->filters || cl->children || cl == &q->link)
1951                 return -EBUSY;
1952
1953         sch_tree_lock(sch);
1954
1955         qlen = cl->q->q.qlen;
1956         qdisc_reset(cl->q);
1957         qdisc_tree_decrease_qlen(cl->q, qlen);
1958
1959         if (cl->next_alive)
1960                 cbq_deactivate_class(cl);
1961
1962         if (q->tx_borrowed == cl)
1963                 q->tx_borrowed = q->tx_class;
1964         if (q->tx_class == cl) {
1965                 q->tx_class = NULL;
1966                 q->tx_borrowed = NULL;
1967         }
1968 #ifdef CONFIG_NET_CLS_ACT
1969         if (q->rx_class == cl)
1970                 q->rx_class = NULL;
1971 #endif
1972
1973         cbq_unlink_class(cl);
1974         cbq_adjust_levels(cl->tparent);
1975         cl->defmap = 0;
1976         cbq_sync_defmap(cl);
1977
1978         cbq_rmprio(q, cl);
1979         sch_tree_unlock(sch);
1980
1981         if (--cl->refcnt == 0)
1982                 cbq_destroy_class(sch, cl);
1983
1984         return 0;
1985 }
1986
1987 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1988 {
1989         struct cbq_sched_data *q = qdisc_priv(sch);
1990         struct cbq_class *cl = (struct cbq_class *)arg;
1991
1992         if (cl == NULL)
1993                 cl = &q->link;
1994
1995         return &cl->filter_list;
1996 }
1997
1998 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1999                                      u32 classid)
2000 {
2001         struct cbq_sched_data *q = qdisc_priv(sch);
2002         struct cbq_class *p = (struct cbq_class*)parent;
2003         struct cbq_class *cl = cbq_class_lookup(q, classid);
2004
2005         if (cl) {
2006                 if (p && p->level <= cl->level)
2007                         return 0;
2008                 cl->filters++;
2009                 return (unsigned long)cl;
2010         }
2011         return 0;
2012 }
2013
2014 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2015 {
2016         struct cbq_class *cl = (struct cbq_class*)arg;
2017
2018         cl->filters--;
2019 }
2020
2021 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2022 {
2023         struct cbq_sched_data *q = qdisc_priv(sch);
2024         struct cbq_class *cl;
2025         struct hlist_node *n;
2026         unsigned h;
2027
2028         if (arg->stop)
2029                 return;
2030
2031         for (h = 0; h < q->clhash.hashsize; h++) {
2032                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2033                         if (arg->count < arg->skip) {
2034                                 arg->count++;
2035                                 continue;
2036                         }
2037                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2038                                 arg->stop = 1;
2039                                 return;
2040                         }
2041                         arg->count++;
2042                 }
2043         }
2044 }
2045
2046 static const struct Qdisc_class_ops cbq_class_ops = {
2047         .graft          =       cbq_graft,
2048         .leaf           =       cbq_leaf,
2049         .qlen_notify    =       cbq_qlen_notify,
2050         .get            =       cbq_get,
2051         .put            =       cbq_put,
2052         .change         =       cbq_change_class,
2053         .delete         =       cbq_delete,
2054         .walk           =       cbq_walk,
2055         .tcf_chain      =       cbq_find_tcf,
2056         .bind_tcf       =       cbq_bind_filter,
2057         .unbind_tcf     =       cbq_unbind_filter,
2058         .dump           =       cbq_dump_class,
2059         .dump_stats     =       cbq_dump_class_stats,
2060 };
2061
2062 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2063         .next           =       NULL,
2064         .cl_ops         =       &cbq_class_ops,
2065         .id             =       "cbq",
2066         .priv_size      =       sizeof(struct cbq_sched_data),
2067         .enqueue        =       cbq_enqueue,
2068         .dequeue        =       cbq_dequeue,
2069         .requeue        =       cbq_requeue,
2070         .drop           =       cbq_drop,
2071         .init           =       cbq_init,
2072         .reset          =       cbq_reset,
2073         .destroy        =       cbq_destroy,
2074         .change         =       NULL,
2075         .dump           =       cbq_dump,
2076         .dump_stats     =       cbq_dump_stats,
2077         .owner          =       THIS_MODULE,
2078 };
2079
2080 static int __init cbq_module_init(void)
2081 {
2082         return register_qdisc(&cbq_qdisc_ops);
2083 }
2084 static void __exit cbq_module_exit(void)
2085 {
2086         unregister_qdisc(&cbq_qdisc_ops);
2087 }
2088 module_init(cbq_module_init)
2089 module_exit(cbq_module_exit)
2090 MODULE_LICENSE("GPL");