net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int uninitialized_var(ret);
 374         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 375
 376 #ifdef CONFIG_NET_CLS_ACT
 377         q->rx_class = cl;
 378 #endif
 379         if (cl == NULL) {
 380                 if (ret & __NET_XMIT_BYPASS)
 381                         sch->qstats.drops++;
 382                 kfree_skb(skb);
 383                 return ret;
 384         }
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         cl->q->__parent = sch;
 388 #endif
 389         ret = qdisc_enqueue(skb, cl->q);
 390         if (ret == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes += qdisc_pkt_len(skb);
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         if (net_xmit_drop_count(ret)) {
 401                 sch->qstats.drops++;
 402                 cbq_mark_toplevel(q, cl);
 403                 cl->qstats.drops++;
 404         }
 405         return ret;
 406 }
 407
 408 static int
 409 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 410 {
 411         struct cbq_sched_data *q = qdisc_priv(sch);
 412         struct cbq_class *cl;
 413         int ret;
 414
 415         if ((cl = q->tx_class) == NULL) {
 416                 kfree_skb(skb);
 417                 sch->qstats.drops++;
 418                 return NET_XMIT_CN;
 419         }
 420         q->tx_class = NULL;
 421
 422         cbq_mark_toplevel(q, cl);
 423
 424 #ifdef CONFIG_NET_CLS_ACT
 425         q->rx_class = cl;
 426         cl->q->__parent = sch;
 427 #endif
 428         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 429                 sch->q.qlen++;
 430                 sch->qstats.requeues++;
 431                 if (!cl->next_alive)
 432                         cbq_activate_class(cl);
 433                 return 0;
 434         }
 435         if (net_xmit_drop_count(ret)) {
 436                 sch->qstats.drops++;
 437                 cl->qstats.drops++;
 438         }
 439         return ret;
 440 }
 441
 442 /* Overlimit actions */
 443
 444 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 445
 446 static void cbq_ovl_classic(struct cbq_class *cl)
 447 {
 448         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 449         psched_tdiff_t delay = cl->undertime - q->now;
 450
 451         if (!cl->delayed) {
 452                 delay += cl->offtime;
 453
 454                 /*
 455                    Class goes to sleep, so that it will have no
 456                    chance to work avgidle. Let's forgive it 8)
 457
 458                    BTW cbq-2.0 has a crap in this
 459                    place, apparently they forgot to shift it by cl->ewma_log.
 460                  */
 461                 if (cl->avgidle < 0)
 462                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 463                 if (cl->avgidle < cl->minidle)
 464                         cl->avgidle = cl->minidle;
 465                 if (delay <= 0)
 466                         delay = 1;
 467                 cl->undertime = q->now + delay;
 468
 469                 cl->xstats.overactions++;
 470                 cl->delayed = 1;
 471         }
 472         if (q->wd_expires == 0 || q->wd_expires > delay)
 473                 q->wd_expires = delay;
 474
 475         /* Dirty work! We must schedule wakeups based on
 476            real available rate, rather than leaf rate,
 477            which may be tiny (even zero).
 478          */
 479         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 480                 struct cbq_class *b;
 481                 psched_tdiff_t base_delay = q->wd_expires;
 482
 483                 for (b = cl->borrow; b; b = b->borrow) {
 484                         delay = b->undertime - q->now;
 485                         if (delay < base_delay) {
 486                                 if (delay <= 0)
 487                                         delay = 1;
 488                                 base_delay = delay;
 489                         }
 490                 }
 491
 492                 q->wd_expires = base_delay;
 493         }
 494 }
 495
 496 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 497    they go overlimit
 498  */
 499
 500 static void cbq_ovl_rclassic(struct cbq_class *cl)
 501 {
 502         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 503         struct cbq_class *this = cl;
 504
 505         do {
 506                 if (cl->level > q->toplevel) {
 507                         cl = NULL;
 508                         break;
 509                 }
 510         } while ((cl = cl->borrow) != NULL);
 511
 512         if (cl == NULL)
 513                 cl = this;
 514         cbq_ovl_classic(cl);
 515 }
 516
 517 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 518
 519 static void cbq_ovl_delay(struct cbq_class *cl)
 520 {
 521         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 522         psched_tdiff_t delay = cl->undertime - q->now;
 523
 524         if (!cl->delayed) {
 525                 psched_time_t sched = q->now;
 526                 ktime_t expires;
 527
 528                 delay += cl->offtime;
 529                 if (cl->avgidle < 0)
 530                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 531                 if (cl->avgidle < cl->minidle)
 532                         cl->avgidle = cl->minidle;
 533                 cl->undertime = q->now + delay;
 534
 535                 if (delay > 0) {
 536                         sched += delay + cl->penalty;
 537                         cl->penalized = sched;
 538                         cl->cpriority = TC_CBQ_MAXPRIO;
 539                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 540
 541                         expires = ktime_set(0, 0);
 542                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 543                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 544                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 545                                                   expires)) > 0)
 546                                 q->delay_timer.expires = expires;
 547                         hrtimer_restart(&q->delay_timer);
 548                         cl->delayed = 1;
 549                         cl->xstats.overactions++;
 550                         return;
 551                 }
 552                 delay = 1;
 553         }
 554         if (q->wd_expires == 0 || q->wd_expires > delay)
 555                 q->wd_expires = delay;
 556 }
 557
 558 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 559
 560 static void cbq_ovl_lowprio(struct cbq_class *cl)
 561 {
 562         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 563
 564         cl->penalized = q->now + cl->penalty;
 565
 566         if (cl->cpriority != cl->priority2) {
 567                 cl->cpriority = cl->priority2;
 568                 q->pmask |= (1<<cl->cpriority);
 569                 cl->xstats.overactions++;
 570         }
 571         cbq_ovl_classic(cl);
 572 }
 573
 574 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 575
 576 static void cbq_ovl_drop(struct cbq_class *cl)
 577 {
 578         if (cl->q->ops->drop)
 579                 if (cl->q->ops->drop(cl->q))
 580                         cl->qdisc->q.qlen--;
 581         cl->xstats.overactions++;
 582         cbq_ovl_classic(cl);
 583 }
 584
 585 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 586                                        psched_time_t now)
 587 {
 588         struct cbq_class *cl;
 589         struct cbq_class *cl_prev = q->active[prio];
 590         psched_time_t sched = now;
 591
 592         if (cl_prev == NULL)
 593                 return 0;
 594
 595         do {
 596                 cl = cl_prev->next_alive;
 597                 if (now - cl->penalized > 0) {
 598                         cl_prev->next_alive = cl->next_alive;
 599                         cl->next_alive = NULL;
 600                         cl->cpriority = cl->priority;
 601                         cl->delayed = 0;
 602                         cbq_activate_class(cl);
 603
 604                         if (cl == q->active[prio]) {
 605                                 q->active[prio] = cl_prev;
 606                                 if (cl == q->active[prio]) {
 607                                         q->active[prio] = NULL;
 608                                         return 0;
 609                                 }
 610                         }
 611
 612                         cl = cl_prev->next_alive;
 613                 } else if (sched - cl->penalized > 0)
 614                         sched = cl->penalized;
 615         } while ((cl_prev = cl) != q->active[prio]);
 616
 617         return sched - now;
 618 }
 619
 620 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 621 {
 622         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 623                                                 delay_timer);
 624         struct Qdisc *sch = q->watchdog.qdisc;
 625         psched_time_t now;
 626         psched_tdiff_t delay = 0;
 627         unsigned pmask;
 628
 629         now = psched_get_time();
 630
 631         pmask = q->pmask;
 632         q->pmask = 0;
 633
 634         while (pmask) {
 635                 int prio = ffz(~pmask);
 636                 psched_tdiff_t tmp;
 637
 638                 pmask &= ~(1<<prio);
 639
 640                 tmp = cbq_undelay_prio(q, prio, now);
 641                 if (tmp > 0) {
 642                         q->pmask |= 1<<prio;
 643                         if (tmp < delay || delay == 0)
 644                                 delay = tmp;
 645                 }
 646         }
 647
 648         if (delay) {
 649                 ktime_t time;
 650
 651                 time = ktime_set(0, 0);
 652                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 653                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 654         }
 655
 656         sch->flags &= ~TCQ_F_THROTTLED;
 657         __netif_schedule(qdisc_root(sch));
 658         return HRTIMER_NORESTART;
 659 }
 660
 661 #ifdef CONFIG_NET_CLS_ACT
 662 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 663 {
 664         struct Qdisc *sch = child->__parent;
 665         struct cbq_sched_data *q = qdisc_priv(sch);
 666         struct cbq_class *cl = q->rx_class;
 667
 668         q->rx_class = NULL;
 669
 670         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 671                 int ret;
 672
 673                 cbq_mark_toplevel(q, cl);
 674
 675                 q->rx_class = cl;
 676                 cl->q->__parent = sch;
 677
 678                 ret = qdisc_enqueue(skb, cl->q);
 679                 if (ret == NET_XMIT_SUCCESS) {
 680                         sch->q.qlen++;
 681                         sch->bstats.packets++;
 682                         sch->bstats.bytes += qdisc_pkt_len(skb);
 683                         if (!cl->next_alive)
 684                                 cbq_activate_class(cl);
 685                         return 0;
 686                 }
 687                 if (net_xmit_drop_count(ret))
 688                         sch->qstats.drops++;
 689                 return 0;
 690         }
 691
 692         sch->qstats.drops++;
 693         return -1;
 694 }
 695 #endif
 696
 697 /*
 698    It is mission critical procedure.
 699
 700    We "regenerate" toplevel cutoff, if transmitting class
 701    has backlog and it is not regulated. It is not part of
 702    original CBQ description, but looks more reasonable.
 703    Probably, it is wrong. This question needs further investigation.
 704 */
 705
 706 static __inline__ void
 707 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 708                     struct cbq_class *borrowed)
 709 {
 710         if (cl && q->toplevel >= borrowed->level) {
 711                 if (cl->q->q.qlen > 1) {
 712                         do {
 713                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 714                                         q->toplevel = borrowed->level;
 715                                         return;
 716                                 }
 717                         } while ((borrowed=borrowed->borrow) != NULL);
 718                 }
 719 #if 0
 720         /* It is not necessary now. Uncommenting it
 721            will save CPU cycles, but decrease fairness.
 722          */
 723                 q->toplevel = TC_CBQ_MAXLEVEL;
 724 #endif
 725         }
 726 }
 727
 728 static void
 729 cbq_update(struct cbq_sched_data *q)
 730 {
 731         struct cbq_class *this = q->tx_class;
 732         struct cbq_class *cl = this;
 733         int len = q->tx_len;
 734
 735         q->tx_class = NULL;
 736
 737         for ( ; cl; cl = cl->share) {
 738                 long avgidle = cl->avgidle;
 739                 long idle;
 740
 741                 cl->bstats.packets++;
 742                 cl->bstats.bytes += len;
 743
 744                 /*
 745                    (now - last) is total time between packet right edges.
 746                    (last_pktlen/rate) is "virtual" busy time, so that
 747
 748                          idle = (now - last) - last_pktlen/rate
 749                  */
 750
 751                 idle = q->now - cl->last;
 752                 if ((unsigned long)idle > 128*1024*1024) {
 753                         avgidle = cl->maxidle;
 754                 } else {
 755                         idle -= L2T(cl, len);
 756
 757                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 758                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 759                    cl->avgidle == true_avgidle/W,
 760                    hence:
 761                  */
 762                         avgidle += idle - (avgidle>>cl->ewma_log);
 763                 }
 764
 765                 if (avgidle <= 0) {
 766                         /* Overlimit or at-limit */
 767
 768                         if (avgidle < cl->minidle)
 769                                 avgidle = cl->minidle;
 770
 771                         cl->avgidle = avgidle;
 772
 773                         /* Calculate expected time, when this class
 774                            will be allowed to send.
 775                            It will occur, when:
 776                            (1-W)*true_avgidle + W*delay = 0, i.e.
 777                            idle = (1/W - 1)*(-true_avgidle)
 778                            or
 779                            idle = (1 - W)*(-cl->avgidle);
 780                          */
 781                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 782
 783                         /*
 784                            That is not all.
 785                            To maintain the rate allocated to the class,
 786                            we add to undertime virtual clock,
 787                            necessary to complete transmitted packet.
 788                            (len/phys_bandwidth has been already passed
 789                            to the moment of cbq_update)
 790                          */
 791
 792                         idle -= L2T(&q->link, len);
 793                         idle += L2T(cl, len);
 794
 795                         cl->undertime = q->now + idle;
 796                 } else {
 797                         /* Underlimit */
 798
 799                         cl->undertime = PSCHED_PASTPERFECT;
 800                         if (avgidle > cl->maxidle)
 801                                 cl->avgidle = cl->maxidle;
 802                         else
 803                                 cl->avgidle = avgidle;
 804                 }
 805                 cl->last = q->now;
 806         }
 807
 808         cbq_update_toplevel(q, this, q->tx_borrowed);
 809 }
 810
 811 static __inline__ struct cbq_class *
 812 cbq_under_limit(struct cbq_class *cl)
 813 {
 814         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 815         struct cbq_class *this_cl = cl;
 816
 817         if (cl->tparent == NULL)
 818                 return cl;
 819
 820         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 821                 cl->delayed = 0;
 822                 return cl;
 823         }
 824
 825         do {
 826                 /* It is very suspicious place. Now overlimit
 827                    action is generated for not bounded classes
 828                    only if link is completely congested.
 829                    Though it is in agree with ancestor-only paradigm,
 830                    it looks very stupid. Particularly,
 831                    it means that this chunk of code will either
 832                    never be called or result in strong amplification
 833                    of burstiness. Dangerous, silly, and, however,
 834                    no another solution exists.
 835                  */
 836                 if ((cl = cl->borrow) == NULL) {
 837                         this_cl->qstats.overlimits++;
 838                         this_cl->overlimit(this_cl);
 839                         return NULL;
 840                 }
 841                 if (cl->level > q->toplevel)
 842                         return NULL;
 843         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 844
 845         cl->delayed = 0;
 846         return cl;
 847 }
 848
 849 static __inline__ struct sk_buff *
 850 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 851 {
 852         struct cbq_sched_data *q = qdisc_priv(sch);
 853         struct cbq_class *cl_tail, *cl_prev, *cl;
 854         struct sk_buff *skb;
 855         int deficit;
 856
 857         cl_tail = cl_prev = q->active[prio];
 858         cl = cl_prev->next_alive;
 859
 860         do {
 861                 deficit = 0;
 862
 863                 /* Start round */
 864                 do {
 865                         struct cbq_class *borrow = cl;
 866
 867                         if (cl->q->q.qlen &&
 868                             (borrow = cbq_under_limit(cl)) == NULL)
 869                                 goto skip_class;
 870
 871                         if (cl->deficit <= 0) {
 872                                 /* Class exhausted its allotment per
 873                                    this round. Switch to the next one.
 874                                  */
 875                                 deficit = 1;
 876                                 cl->deficit += cl->quantum;
 877                                 goto next_class;
 878                         }
 879
 880                         skb = cl->q->dequeue(cl->q);
 881
 882                         /* Class did not give us any skb :-(
 883                            It could occur even if cl->q->q.qlen != 0
 884                            f.e. if cl->q == "tbf"
 885                          */
 886                         if (skb == NULL)
 887                                 goto skip_class;
 888
 889                         cl->deficit -= qdisc_pkt_len(skb);
 890                         q->tx_class = cl;
 891                         q->tx_borrowed = borrow;
 892                         if (borrow != cl) {
 893 #ifndef CBQ_XSTATS_BORROWS_BYTES
 894                                 borrow->xstats.borrows++;
 895                                 cl->xstats.borrows++;
 896 #else
 897                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 898                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 899 #endif
 900                         }
 901                         q->tx_len = qdisc_pkt_len(skb);
 902
 903                         if (cl->deficit <= 0) {
 904                                 q->active[prio] = cl;
 905                                 cl = cl->next_alive;
 906                                 cl->deficit += cl->quantum;
 907                         }
 908                         return skb;
 909
 910 skip_class:
 911                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 912                                 /* Class is empty or penalized.
 913                                    Unlink it from active chain.
 914                                  */
 915                                 cl_prev->next_alive = cl->next_alive;
 916                                 cl->next_alive = NULL;
 917
 918                                 /* Did cl_tail point to it? */
 919                                 if (cl == cl_tail) {
 920                                         /* Repair it! */
 921                                         cl_tail = cl_prev;
 922
 923                                         /* Was it the last class in this band? */
 924                                         if (cl == cl_tail) {
 925                                                 /* Kill the band! */
 926                                                 q->active[prio] = NULL;
 927                                                 q->activemask &= ~(1<<prio);
 928                                                 if (cl->q->q.qlen)
 929                                                         cbq_activate_class(cl);
 930                                                 return NULL;
 931                                         }
 932
 933                                         q->active[prio] = cl_tail;
 934                                 }
 935                                 if (cl->q->q.qlen)
 936                                         cbq_activate_class(cl);
 937
 938                                 cl = cl_prev;
 939                         }
 940
 941 next_class:
 942                         cl_prev = cl;
 943                         cl = cl->next_alive;
 944                 } while (cl_prev != cl_tail);
 945         } while (deficit);
 946
 947         q->active[prio] = cl_prev;
 948
 949         return NULL;
 950 }
 951
 952 static __inline__ struct sk_buff *
 953 cbq_dequeue_1(struct Qdisc *sch)
 954 {
 955         struct cbq_sched_data *q = qdisc_priv(sch);
 956         struct sk_buff *skb;
 957         unsigned activemask;
 958
 959         activemask = q->activemask&0xFF;
 960         while (activemask) {
 961                 int prio = ffz(~activemask);
 962                 activemask &= ~(1<<prio);
 963                 skb = cbq_dequeue_prio(sch, prio);
 964                 if (skb)
 965                         return skb;
 966         }
 967         return NULL;
 968 }
 969
 970 static struct sk_buff *
 971 cbq_dequeue(struct Qdisc *sch)
 972 {
 973         struct sk_buff *skb;
 974         struct cbq_sched_data *q = qdisc_priv(sch);
 975         psched_time_t now;
 976         psched_tdiff_t incr;
 977
 978         now = psched_get_time();
 979         incr = now - q->now_rt;
 980
 981         if (q->tx_class) {
 982                 psched_tdiff_t incr2;
 983                 /* Time integrator. We calculate EOS time
 984                    by adding expected packet transmission time.
 985                    If real time is greater, we warp artificial clock,
 986                    so that:
 987
 988                    cbq_time = max(real_time, work);
 989                  */
 990                 incr2 = L2T(&q->link, q->tx_len);
 991                 q->now += incr2;
 992                 cbq_update(q);
 993                 if ((incr -= incr2) < 0)
 994                         incr = 0;
 995         }
 996         q->now += incr;
 997         q->now_rt = now;
 998
 999         for (;;) {
1000                 q->wd_expires = 0;
1001
1002                 skb = cbq_dequeue_1(sch);
1003                 if (skb) {
1004                         sch->q.qlen--;
1005                         sch->flags &= ~TCQ_F_THROTTLED;
1006                         return skb;
1007                 }
1008
1009                 /* All the classes are overlimit.
1010
1011                    It is possible, if:
1012
1013                    1. Scheduler is empty.
1014                    2. Toplevel cutoff inhibited borrowing.
1015                    3. Root class is overlimit.
1016
1017                    Reset 2d and 3d conditions and retry.
1018
1019                    Note, that NS and cbq-2.0 are buggy, peeking
1020                    an arbitrary class is appropriate for ancestor-only
1021                    sharing, but not for toplevel algorithm.
1022
1023                    Our version is better, but slower, because it requires
1024                    two passes, but it is unavoidable with top-level sharing.
1025                 */
1026
1027                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1028                     q->link.undertime == PSCHED_PASTPERFECT)
1029                         break;
1030
1031                 q->toplevel = TC_CBQ_MAXLEVEL;
1032                 q->link.undertime = PSCHED_PASTPERFECT;
1033         }
1034
1035         /* No packets in scheduler or nobody wants to give them to us :-(
1036            Sigh... start watchdog timer in the last case. */
1037
1038         if (sch->q.qlen) {
1039                 sch->qstats.overlimits++;
1040                 if (q->wd_expires)
1041                         qdisc_watchdog_schedule(&q->watchdog,
1042                                                 now + q->wd_expires);
1043         }
1044         return NULL;
1045 }
1046
1047 /* CBQ class maintanance routines */
1048
1049 static void cbq_adjust_levels(struct cbq_class *this)
1050 {
1051         if (this == NULL)
1052                 return;
1053
1054         do {
1055                 int level = 0;
1056                 struct cbq_class *cl;
1057
1058                 if ((cl = this->children) != NULL) {
1059                         do {
1060                                 if (cl->level > level)
1061                                         level = cl->level;
1062                         } while ((cl = cl->sibling) != this->children);
1063                 }
1064                 this->level = level+1;
1065         } while ((this = this->tparent) != NULL);
1066 }
1067
1068 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1069 {
1070         struct cbq_class *cl;
1071         struct hlist_node *n;
1072         unsigned int h;
1073
1074         if (q->quanta[prio] == 0)
1075                 return;
1076
1077         for (h = 0; h < q->clhash.hashsize; h++) {
1078                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1079                         /* BUGGGG... Beware! This expression suffer of
1080                            arithmetic overflows!
1081                          */
1082                         if (cl->priority == prio) {
1083                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1084                                         q->quanta[prio];
1085                         }
1086                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1087                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1088                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1089                         }
1090                 }
1091         }
1092 }
1093
1094 static void cbq_sync_defmap(struct cbq_class *cl)
1095 {
1096         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1097         struct cbq_class *split = cl->split;
1098         unsigned h;
1099         int i;
1100
1101         if (split == NULL)
1102                 return;
1103
1104         for (i=0; i<=TC_PRIO_MAX; i++) {
1105                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1106                         split->defaults[i] = NULL;
1107         }
1108
1109         for (i=0; i<=TC_PRIO_MAX; i++) {
1110                 int level = split->level;
1111
1112                 if (split->defaults[i])
1113                         continue;
1114
1115                 for (h = 0; h < q->clhash.hashsize; h++) {
1116                         struct hlist_node *n;
1117                         struct cbq_class *c;
1118
1119                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1120                                              common.hnode) {
1121                                 if (c->split == split && c->level < level &&
1122                                     c->defmap&(1<<i)) {
1123                                         split->defaults[i] = c;
1124                                         level = c->level;
1125                                 }
1126                         }
1127                 }
1128         }
1129 }
1130
1131 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1132 {
1133         struct cbq_class *split = NULL;
1134
1135         if (splitid == 0) {
1136                 if ((split = cl->split) == NULL)
1137                         return;
1138                 splitid = split->common.classid;
1139         }
1140
1141         if (split == NULL || split->common.classid != splitid) {
1142                 for (split = cl->tparent; split; split = split->tparent)
1143                         if (split->common.classid == splitid)
1144                                 break;
1145         }
1146
1147         if (split == NULL)
1148                 return;
1149
1150         if (cl->split != split) {
1151                 cl->defmap = 0;
1152                 cbq_sync_defmap(cl);
1153                 cl->split = split;
1154                 cl->defmap = def&mask;
1155         } else
1156                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1157
1158         cbq_sync_defmap(cl);
1159 }
1160
1161 static void cbq_unlink_class(struct cbq_class *this)
1162 {
1163         struct cbq_class *cl, **clp;
1164         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165
1166         qdisc_class_hash_remove(&q->clhash, &this->common);
1167
1168         if (this->tparent) {
1169                 clp=&this->sibling;
1170                 cl = *clp;
1171                 do {
1172                         if (cl == this) {
1173                                 *clp = cl->sibling;
1174                                 break;
1175                         }
1176                         clp = &cl->sibling;
1177                 } while ((cl = *clp) != this->sibling);
1178
1179                 if (this->tparent->children == this) {
1180                         this->tparent->children = this->sibling;
1181                         if (this->sibling == this)
1182                                 this->tparent->children = NULL;
1183                 }
1184         } else {
1185                 WARN_ON(this->sibling != this);
1186         }
1187 }
1188
1189 static void cbq_link_class(struct cbq_class *this)
1190 {
1191         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1192         struct cbq_class *parent = this->tparent;
1193
1194         this->sibling = this;
1195         qdisc_class_hash_insert(&q->clhash, &this->common);
1196
1197         if (parent == NULL)
1198                 return;
1199
1200         if (parent->children == NULL) {
1201                 parent->children = this;
1202         } else {
1203                 this->sibling = parent->children->sibling;
1204                 parent->children->sibling = this;
1205         }
1206 }
1207
1208 static unsigned int cbq_drop(struct Qdisc* sch)
1209 {
1210         struct cbq_sched_data *q = qdisc_priv(sch);
1211         struct cbq_class *cl, *cl_head;
1212         int prio;
1213         unsigned int len;
1214
1215         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1216                 if ((cl_head = q->active[prio]) == NULL)
1217                         continue;
1218
1219                 cl = cl_head;
1220                 do {
1221                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1222                                 sch->q.qlen--;
1223                                 if (!cl->q->q.qlen)
1224                                         cbq_deactivate_class(cl);
1225                                 return len;
1226                         }
1227                 } while ((cl = cl->next_alive) != cl_head);
1228         }
1229         return 0;
1230 }
1231
1232 static void
1233 cbq_reset(struct Qdisc* sch)
1234 {
1235         struct cbq_sched_data *q = qdisc_priv(sch);
1236         struct cbq_class *cl;
1237         struct hlist_node *n;
1238         int prio;
1239         unsigned h;
1240
1241         q->activemask = 0;
1242         q->pmask = 0;
1243         q->tx_class = NULL;
1244         q->tx_borrowed = NULL;
1245         qdisc_watchdog_cancel(&q->watchdog);
1246         hrtimer_cancel(&q->delay_timer);
1247         q->toplevel = TC_CBQ_MAXLEVEL;
1248         q->now = psched_get_time();
1249         q->now_rt = q->now;
1250
1251         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1252                 q->active[prio] = NULL;
1253
1254         for (h = 0; h < q->clhash.hashsize; h++) {
1255                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1256                         qdisc_reset(cl->q);
1257
1258                         cl->next_alive = NULL;
1259                         cl->undertime = PSCHED_PASTPERFECT;
1260                         cl->avgidle = cl->maxidle;
1261                         cl->deficit = cl->quantum;
1262                         cl->cpriority = cl->priority;
1263                 }
1264         }
1265         sch->q.qlen = 0;
1266 }
1267
1268
1269 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1270 {
1271         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1272                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1273                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1274         }
1275         if (lss->change&TCF_CBQ_LSS_EWMA)
1276                 cl->ewma_log = lss->ewma_log;
1277         if (lss->change&TCF_CBQ_LSS_AVPKT)
1278                 cl->avpkt = lss->avpkt;
1279         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1280                 cl->minidle = -(long)lss->minidle;
1281         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1282                 cl->maxidle = lss->maxidle;
1283                 cl->avgidle = lss->maxidle;
1284         }
1285         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1286                 cl->offtime = lss->offtime;
1287         return 0;
1288 }
1289
1290 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1291 {
1292         q->nclasses[cl->priority]--;
1293         q->quanta[cl->priority] -= cl->weight;
1294         cbq_normalize_quanta(q, cl->priority);
1295 }
1296
1297 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1298 {
1299         q->nclasses[cl->priority]++;
1300         q->quanta[cl->priority] += cl->weight;
1301         cbq_normalize_quanta(q, cl->priority);
1302 }
1303
1304 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1305 {
1306         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1307
1308         if (wrr->allot)
1309                 cl->allot = wrr->allot;
1310         if (wrr->weight)
1311                 cl->weight = wrr->weight;
1312         if (wrr->priority) {
1313                 cl->priority = wrr->priority-1;
1314                 cl->cpriority = cl->priority;
1315                 if (cl->priority >= cl->priority2)
1316                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1317         }
1318
1319         cbq_addprio(q, cl);
1320         return 0;
1321 }
1322
1323 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1324 {
1325         switch (ovl->strategy) {
1326         case TC_CBQ_OVL_CLASSIC:
1327                 cl->overlimit = cbq_ovl_classic;
1328                 break;
1329         case TC_CBQ_OVL_DELAY:
1330                 cl->overlimit = cbq_ovl_delay;
1331                 break;
1332         case TC_CBQ_OVL_LOWPRIO:
1333                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1334                     ovl->priority2-1 <= cl->priority)
1335                         return -EINVAL;
1336                 cl->priority2 = ovl->priority2-1;
1337                 cl->overlimit = cbq_ovl_lowprio;
1338                 break;
1339         case TC_CBQ_OVL_DROP:
1340                 cl->overlimit = cbq_ovl_drop;
1341                 break;
1342         case TC_CBQ_OVL_RCLASSIC:
1343                 cl->overlimit = cbq_ovl_rclassic;
1344                 break;
1345         default:
1346                 return -EINVAL;
1347         }
1348         cl->penalty = ovl->penalty;
1349         return 0;
1350 }
1351
1352 #ifdef CONFIG_NET_CLS_ACT
1353 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1354 {
1355         cl->police = p->police;
1356
1357         if (cl->q->handle) {
1358                 if (p->police == TC_POLICE_RECLASSIFY)
1359                         cl->q->reshape_fail = cbq_reshape_fail;
1360                 else
1361                         cl->q->reshape_fail = NULL;
1362         }
1363         return 0;
1364 }
1365 #endif
1366
1367 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1368 {
1369         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1370         return 0;
1371 }
1372
1373 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1374         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1375         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1376         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1377         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1378         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1379         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1380         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1381 };
1382
1383 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1384 {
1385         struct cbq_sched_data *q = qdisc_priv(sch);
1386         struct nlattr *tb[TCA_CBQ_MAX + 1];
1387         struct tc_ratespec *r;
1388         int err;
1389
1390         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1391         if (err < 0)
1392                 return err;
1393
1394         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1395                 return -EINVAL;
1396
1397         r = nla_data(tb[TCA_CBQ_RATE]);
1398
1399         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1400                 return -EINVAL;
1401
1402         err = qdisc_class_hash_init(&q->clhash);
1403         if (err < 0)
1404                 goto put_rtab;
1405
1406         q->link.refcnt = 1;
1407         q->link.sibling = &q->link;
1408         q->link.common.classid = sch->handle;
1409         q->link.qdisc = sch;
1410         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1411                                             &pfifo_qdisc_ops,
1412                                             sch->handle)))
1413                 q->link.q = &noop_qdisc;
1414
1415         q->link.priority = TC_CBQ_MAXPRIO-1;
1416         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1417         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1418         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1419         q->link.overlimit = cbq_ovl_classic;
1420         q->link.allot = psched_mtu(qdisc_dev(sch));
1421         q->link.quantum = q->link.allot;
1422         q->link.weight = q->link.R_tab->rate.rate;
1423
1424         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1425         q->link.avpkt = q->link.allot/2;
1426         q->link.minidle = -0x7FFFFFFF;
1427
1428         qdisc_watchdog_init(&q->watchdog, sch);
1429         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1430         q->delay_timer.function = cbq_undelay;
1431         q->toplevel = TC_CBQ_MAXLEVEL;
1432         q->now = psched_get_time();
1433         q->now_rt = q->now;
1434
1435         cbq_link_class(&q->link);
1436
1437         if (tb[TCA_CBQ_LSSOPT])
1438                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1439
1440         cbq_addprio(q, &q->link);
1441         return 0;
1442
1443 put_rtab:
1444         qdisc_put_rtab(q->link.R_tab);
1445         return err;
1446 }
1447
1448 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1449 {
1450         unsigned char *b = skb_tail_pointer(skb);
1451
1452         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1453         return skb->len;
1454
1455 nla_put_failure:
1456         nlmsg_trim(skb, b);
1457         return -1;
1458 }
1459
1460 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1461 {
1462         unsigned char *b = skb_tail_pointer(skb);
1463         struct tc_cbq_lssopt opt;
1464
1465         opt.flags = 0;
1466         if (cl->borrow == NULL)
1467                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1468         if (cl->share == NULL)
1469                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1470         opt.ewma_log = cl->ewma_log;
1471         opt.level = cl->level;
1472         opt.avpkt = cl->avpkt;
1473         opt.maxidle = cl->maxidle;
1474         opt.minidle = (u32)(-cl->minidle);
1475         opt.offtime = cl->offtime;
1476         opt.change = ~0;
1477         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1478         return skb->len;
1479
1480 nla_put_failure:
1481         nlmsg_trim(skb, b);
1482         return -1;
1483 }
1484
1485 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1486 {
1487         unsigned char *b = skb_tail_pointer(skb);
1488         struct tc_cbq_wrropt opt;
1489
1490         opt.flags = 0;
1491         opt.allot = cl->allot;
1492         opt.priority = cl->priority+1;
1493         opt.cpriority = cl->cpriority+1;
1494         opt.weight = cl->weight;
1495         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1496         return skb->len;
1497
1498 nla_put_failure:
1499         nlmsg_trim(skb, b);
1500         return -1;
1501 }
1502
1503 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1504 {
1505         unsigned char *b = skb_tail_pointer(skb);
1506         struct tc_cbq_ovl opt;
1507
1508         opt.strategy = cl->ovl_strategy;
1509         opt.priority2 = cl->priority2+1;
1510         opt.pad = 0;
1511         opt.penalty = cl->penalty;
1512         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1513         return skb->len;
1514
1515 nla_put_failure:
1516         nlmsg_trim(skb, b);
1517         return -1;
1518 }
1519
1520 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1521 {
1522         unsigned char *b = skb_tail_pointer(skb);
1523         struct tc_cbq_fopt opt;
1524
1525         if (cl->split || cl->defmap) {
1526                 opt.split = cl->split ? cl->split->common.classid : 0;
1527                 opt.defmap = cl->defmap;
1528                 opt.defchange = ~0;
1529                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1530         }
1531         return skb->len;
1532
1533 nla_put_failure:
1534         nlmsg_trim(skb, b);
1535         return -1;
1536 }
1537
1538 #ifdef CONFIG_NET_CLS_ACT
1539 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1540 {
1541         unsigned char *b = skb_tail_pointer(skb);
1542         struct tc_cbq_police opt;
1543
1544         if (cl->police) {
1545                 opt.police = cl->police;
1546                 opt.__res1 = 0;
1547                 opt.__res2 = 0;
1548                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1549         }
1550         return skb->len;
1551
1552 nla_put_failure:
1553         nlmsg_trim(skb, b);
1554         return -1;
1555 }
1556 #endif
1557
1558 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1559 {
1560         if (cbq_dump_lss(skb, cl) < 0 ||
1561             cbq_dump_rate(skb, cl) < 0 ||
1562             cbq_dump_wrr(skb, cl) < 0 ||
1563             cbq_dump_ovl(skb, cl) < 0 ||
1564 #ifdef CONFIG_NET_CLS_ACT
1565             cbq_dump_police(skb, cl) < 0 ||
1566 #endif
1567             cbq_dump_fopt(skb, cl) < 0)
1568                 return -1;
1569         return 0;
1570 }
1571
1572 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1573 {
1574         struct cbq_sched_data *q = qdisc_priv(sch);
1575         struct nlattr *nest;
1576
1577         nest = nla_nest_start(skb, TCA_OPTIONS);
1578         if (nest == NULL)
1579                 goto nla_put_failure;
1580         if (cbq_dump_attr(skb, &q->link) < 0)
1581                 goto nla_put_failure;
1582         nla_nest_end(skb, nest);
1583         return skb->len;
1584
1585 nla_put_failure:
1586         nla_nest_cancel(skb, nest);
1587         return -1;
1588 }
1589
1590 static int
1591 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1592 {
1593         struct cbq_sched_data *q = qdisc_priv(sch);
1594
1595         q->link.xstats.avgidle = q->link.avgidle;
1596         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1597 }
1598
1599 static int
1600 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1601                struct sk_buff *skb, struct tcmsg *tcm)
1602 {
1603         struct cbq_class *cl = (struct cbq_class*)arg;
1604         struct nlattr *nest;
1605
1606         if (cl->tparent)
1607                 tcm->tcm_parent = cl->tparent->common.classid;
1608         else
1609                 tcm->tcm_parent = TC_H_ROOT;
1610         tcm->tcm_handle = cl->common.classid;
1611         tcm->tcm_info = cl->q->handle;
1612
1613         nest = nla_nest_start(skb, TCA_OPTIONS);
1614         if (nest == NULL)
1615                 goto nla_put_failure;
1616         if (cbq_dump_attr(skb, cl) < 0)
1617                 goto nla_put_failure;
1618         nla_nest_end(skb, nest);
1619         return skb->len;
1620
1621 nla_put_failure:
1622         nla_nest_cancel(skb, nest);
1623         return -1;
1624 }
1625
1626 static int
1627 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1628         struct gnet_dump *d)
1629 {
1630         struct cbq_sched_data *q = qdisc_priv(sch);
1631         struct cbq_class *cl = (struct cbq_class*)arg;
1632
1633         cl->qstats.qlen = cl->q->q.qlen;
1634         cl->xstats.avgidle = cl->avgidle;
1635         cl->xstats.undertime = 0;
1636
1637         if (cl->undertime != PSCHED_PASTPERFECT)
1638                 cl->xstats.undertime = cl->undertime - q->now;
1639
1640         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1641             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1642             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1643                 return -1;
1644
1645         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1646 }
1647
1648 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1649                      struct Qdisc **old)
1650 {
1651         struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653         if (cl) {
1654                 if (new == NULL) {
1655                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1656                                                 &pfifo_qdisc_ops,
1657                                                 cl->common.classid);
1658                         if (new == NULL)
1659                                 return -ENOBUFS;
1660                 } else {
1661 #ifdef CONFIG_NET_CLS_ACT
1662                         if (cl->police == TC_POLICE_RECLASSIFY)
1663                                 new->reshape_fail = cbq_reshape_fail;
1664 #endif
1665                 }
1666                 sch_tree_lock(sch);
1667                 *old = xchg(&cl->q, new);
1668                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1669                 qdisc_reset(*old);
1670                 sch_tree_unlock(sch);
1671
1672                 return 0;
1673         }
1674         return -ENOENT;
1675 }
1676
1677 static struct Qdisc *
1678 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1679 {
1680         struct cbq_class *cl = (struct cbq_class*)arg;
1681
1682         return cl ? cl->q : NULL;
1683 }
1684
1685 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1686 {
1687         struct cbq_class *cl = (struct cbq_class *)arg;
1688
1689         if (cl->q->q.qlen == 0)
1690                 cbq_deactivate_class(cl);
1691 }
1692
1693 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1694 {
1695         struct cbq_sched_data *q = qdisc_priv(sch);
1696         struct cbq_class *cl = cbq_class_lookup(q, classid);
1697
1698         if (cl) {
1699                 cl->refcnt++;
1700                 return (unsigned long)cl;
1701         }
1702         return 0;
1703 }
1704
1705 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1706 {
1707         struct cbq_sched_data *q = qdisc_priv(sch);
1708
1709         WARN_ON(cl->filters);
1710
1711         tcf_destroy_chain(&cl->filter_list);
1712         qdisc_destroy(cl->q);
1713         qdisc_put_rtab(cl->R_tab);
1714         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1715         if (cl != &q->link)
1716                 kfree(cl);
1717 }
1718
1719 static void
1720 cbq_destroy(struct Qdisc* sch)
1721 {
1722         struct cbq_sched_data *q = qdisc_priv(sch);
1723         struct hlist_node *n, *next;
1724         struct cbq_class *cl;
1725         unsigned h;
1726
1727 #ifdef CONFIG_NET_CLS_ACT
1728         q->rx_class = NULL;
1729 #endif
1730         /*
1731          * Filters must be destroyed first because we don't destroy the
1732          * classes from root to leafs which means that filters can still
1733          * be bound to classes which have been destroyed already. --TGR '04
1734          */
1735         for (h = 0; h < q->clhash.hashsize; h++) {
1736                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1737                         tcf_destroy_chain(&cl->filter_list);
1738         }
1739         for (h = 0; h < q->clhash.hashsize; h++) {
1740                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1741                                           common.hnode)
1742                         cbq_destroy_class(sch, cl);
1743         }
1744         qdisc_class_hash_destroy(&q->clhash);
1745 }
1746
1747 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1748 {
1749         struct cbq_class *cl = (struct cbq_class*)arg;
1750
1751         if (--cl->refcnt == 0) {
1752 #ifdef CONFIG_NET_CLS_ACT
1753                 spinlock_t *root_lock = qdisc_root_lock(sch);
1754                 struct cbq_sched_data *q = qdisc_priv(sch);
1755
1756                 spin_lock_bh(root_lock);
1757                 if (q->rx_class == cl)
1758                         q->rx_class = NULL;
1759                 spin_unlock_bh(root_lock);
1760 #endif
1761
1762                 cbq_destroy_class(sch, cl);
1763         }
1764 }
1765
1766 static int
1767 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1768                  unsigned long *arg)
1769 {
1770         int err;
1771         struct cbq_sched_data *q = qdisc_priv(sch);
1772         struct cbq_class *cl = (struct cbq_class*)*arg;
1773         struct nlattr *opt = tca[TCA_OPTIONS];
1774         struct nlattr *tb[TCA_CBQ_MAX + 1];
1775         struct cbq_class *parent;
1776         struct qdisc_rate_table *rtab = NULL;
1777
1778         if (opt == NULL)
1779                 return -EINVAL;
1780
1781         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1782         if (err < 0)
1783                 return err;
1784
1785         if (cl) {
1786                 /* Check parent */
1787                 if (parentid) {
1788                         if (cl->tparent &&
1789                             cl->tparent->common.classid != parentid)
1790                                 return -EINVAL;
1791                         if (!cl->tparent && parentid != TC_H_ROOT)
1792                                 return -EINVAL;
1793                 }
1794
1795                 if (tb[TCA_CBQ_RATE]) {
1796                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1797                         if (rtab == NULL)
1798                                 return -EINVAL;
1799                 }
1800
1801                 /* Change class parameters */
1802                 sch_tree_lock(sch);
1803
1804                 if (cl->next_alive != NULL)
1805                         cbq_deactivate_class(cl);
1806
1807                 if (rtab) {
1808                         rtab = xchg(&cl->R_tab, rtab);
1809                         qdisc_put_rtab(rtab);
1810                 }
1811
1812                 if (tb[TCA_CBQ_LSSOPT])
1813                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1814
1815                 if (tb[TCA_CBQ_WRROPT]) {
1816                         cbq_rmprio(q, cl);
1817                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1818                 }
1819
1820                 if (tb[TCA_CBQ_OVL_STRATEGY])
1821                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1822
1823 #ifdef CONFIG_NET_CLS_ACT
1824                 if (tb[TCA_CBQ_POLICE])
1825                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1826 #endif
1827
1828                 if (tb[TCA_CBQ_FOPT])
1829                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1830
1831                 if (cl->q->q.qlen)
1832                         cbq_activate_class(cl);
1833
1834                 sch_tree_unlock(sch);
1835
1836                 if (tca[TCA_RATE])
1837                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1838                                               qdisc_root_lock(sch),
1839                                               tca[TCA_RATE]);
1840                 return 0;
1841         }
1842
1843         if (parentid == TC_H_ROOT)
1844                 return -EINVAL;
1845
1846         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1847             tb[TCA_CBQ_LSSOPT] == NULL)
1848                 return -EINVAL;
1849
1850         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1851         if (rtab == NULL)
1852                 return -EINVAL;
1853
1854         if (classid) {
1855                 err = -EINVAL;
1856                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1857                         goto failure;
1858         } else {
1859                 int i;
1860                 classid = TC_H_MAKE(sch->handle,0x8000);
1861
1862                 for (i=0; i<0x8000; i++) {
1863                         if (++q->hgenerator >= 0x8000)
1864                                 q->hgenerator = 1;
1865                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1866                                 break;
1867                 }
1868                 err = -ENOSR;
1869                 if (i >= 0x8000)
1870                         goto failure;
1871                 classid = classid|q->hgenerator;
1872         }
1873
1874         parent = &q->link;
1875         if (parentid) {
1876                 parent = cbq_class_lookup(q, parentid);
1877                 err = -EINVAL;
1878                 if (parent == NULL)
1879                         goto failure;
1880         }
1881
1882         err = -ENOBUFS;
1883         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1884         if (cl == NULL)
1885                 goto failure;
1886         cl->R_tab = rtab;
1887         rtab = NULL;
1888         cl->refcnt = 1;
1889         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1890                                         &pfifo_qdisc_ops, classid)))
1891                 cl->q = &noop_qdisc;
1892         cl->common.classid = classid;
1893         cl->tparent = parent;
1894         cl->qdisc = sch;
1895         cl->allot = parent->allot;
1896         cl->quantum = cl->allot;
1897         cl->weight = cl->R_tab->rate.rate;
1898
1899         sch_tree_lock(sch);
1900         cbq_link_class(cl);
1901         cl->borrow = cl->tparent;
1902         if (cl->tparent != &q->link)
1903                 cl->share = cl->tparent;
1904         cbq_adjust_levels(parent);
1905         cl->minidle = -0x7FFFFFFF;
1906         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1907         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1908         if (cl->ewma_log==0)
1909                 cl->ewma_log = q->link.ewma_log;
1910         if (cl->maxidle==0)
1911                 cl->maxidle = q->link.maxidle;
1912         if (cl->avpkt==0)
1913                 cl->avpkt = q->link.avpkt;
1914         cl->overlimit = cbq_ovl_classic;
1915         if (tb[TCA_CBQ_OVL_STRATEGY])
1916                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1917 #ifdef CONFIG_NET_CLS_ACT
1918         if (tb[TCA_CBQ_POLICE])
1919                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1920 #endif
1921         if (tb[TCA_CBQ_FOPT])
1922                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1923         sch_tree_unlock(sch);
1924
1925         qdisc_class_hash_grow(sch, &q->clhash);
1926
1927         if (tca[TCA_RATE])
1928                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1929                                   qdisc_root_lock(sch), tca[TCA_RATE]);
1930
1931         *arg = (unsigned long)cl;
1932         return 0;
1933
1934 failure:
1935         qdisc_put_rtab(rtab);
1936         return err;
1937 }
1938
1939 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1940 {
1941         struct cbq_sched_data *q = qdisc_priv(sch);
1942         struct cbq_class *cl = (struct cbq_class*)arg;
1943         unsigned int qlen;
1944
1945         if (cl->filters || cl->children || cl == &q->link)
1946                 return -EBUSY;
1947
1948         sch_tree_lock(sch);
1949
1950         qlen = cl->q->q.qlen;
1951         qdisc_reset(cl->q);
1952         qdisc_tree_decrease_qlen(cl->q, qlen);
1953
1954         if (cl->next_alive)
1955                 cbq_deactivate_class(cl);
1956
1957         if (q->tx_borrowed == cl)
1958                 q->tx_borrowed = q->tx_class;
1959         if (q->tx_class == cl) {
1960                 q->tx_class = NULL;
1961                 q->tx_borrowed = NULL;
1962         }
1963 #ifdef CONFIG_NET_CLS_ACT
1964         if (q->rx_class == cl)
1965                 q->rx_class = NULL;
1966 #endif
1967
1968         cbq_unlink_class(cl);
1969         cbq_adjust_levels(cl->tparent);
1970         cl->defmap = 0;
1971         cbq_sync_defmap(cl);
1972
1973         cbq_rmprio(q, cl);
1974         sch_tree_unlock(sch);
1975
1976         if (--cl->refcnt == 0)
1977                 cbq_destroy_class(sch, cl);
1978
1979         return 0;
1980 }
1981
1982 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1983 {
1984         struct cbq_sched_data *q = qdisc_priv(sch);
1985         struct cbq_class *cl = (struct cbq_class *)arg;
1986
1987         if (cl == NULL)
1988                 cl = &q->link;
1989
1990         return &cl->filter_list;
1991 }
1992
1993 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1994                                      u32 classid)
1995 {
1996         struct cbq_sched_data *q = qdisc_priv(sch);
1997         struct cbq_class *p = (struct cbq_class*)parent;
1998         struct cbq_class *cl = cbq_class_lookup(q, classid);
1999
2000         if (cl) {
2001                 if (p && p->level <= cl->level)
2002                         return 0;
2003                 cl->filters++;
2004                 return (unsigned long)cl;
2005         }
2006         return 0;
2007 }
2008
2009 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2010 {
2011         struct cbq_class *cl = (struct cbq_class*)arg;
2012
2013         cl->filters--;
2014 }
2015
2016 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2017 {
2018         struct cbq_sched_data *q = qdisc_priv(sch);
2019         struct cbq_class *cl;
2020         struct hlist_node *n;
2021         unsigned h;
2022
2023         if (arg->stop)
2024                 return;
2025
2026         for (h = 0; h < q->clhash.hashsize; h++) {
2027                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2028                         if (arg->count < arg->skip) {
2029                                 arg->count++;
2030                                 continue;
2031                         }
2032                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2033                                 arg->stop = 1;
2034                                 return;
2035                         }
2036                         arg->count++;
2037                 }
2038         }
2039 }
2040
2041 static const struct Qdisc_class_ops cbq_class_ops = {
2042         .graft          =       cbq_graft,
2043         .leaf           =       cbq_leaf,
2044         .qlen_notify    =       cbq_qlen_notify,
2045         .get            =       cbq_get,
2046         .put            =       cbq_put,
2047         .change         =       cbq_change_class,
2048         .delete         =       cbq_delete,
2049         .walk           =       cbq_walk,
2050         .tcf_chain      =       cbq_find_tcf,
2051         .bind_tcf       =       cbq_bind_filter,
2052         .unbind_tcf     =       cbq_unbind_filter,
2053         .dump           =       cbq_dump_class,
2054         .dump_stats     =       cbq_dump_class_stats,
2055 };
2056
2057 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2058         .next           =       NULL,
2059         .cl_ops         =       &cbq_class_ops,
2060         .id             =       "cbq",
2061         .priv_size      =       sizeof(struct cbq_sched_data),
2062         .enqueue        =       cbq_enqueue,
2063         .dequeue        =       cbq_dequeue,
2064         .requeue        =       cbq_requeue,
2065         .drop           =       cbq_drop,
2066         .init           =       cbq_init,
2067         .reset          =       cbq_reset,
2068         .destroy        =       cbq_destroy,
2069         .change         =       NULL,
2070         .dump           =       cbq_dump,
2071         .dump_stats     =       cbq_dump_stats,
2072         .owner          =       THIS_MODULE,
2073 };
2074
2075 static int __init cbq_module_init(void)
2076 {
2077         return register_qdisc(&cbq_qdisc_ops);
2078 }
2079 static void __exit cbq_module_exit(void)
2080 {
2081         unregister_qdisc(&cbq_qdisc_ops);
2082 }
2083 module_init(cbq_module_init)
2084 module_exit(cbq_module_exit)
2085 MODULE_LICENSE("GPL");