net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct cbq_class        *next;          /* hash table link */
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         u32                     classid;
  81         unsigned char           priority;       /* class priority */
  82         unsigned char           priority2;      /* priority to be used after overlimit */
  83         unsigned char           ewma_log;       /* time constant for idle time calculation */
  84         unsigned char           ovl_strategy;
  85 #ifdef CONFIG_NET_CLS_ACT
  86         unsigned char           police;
  87 #endif
  88
  89         u32                     defmap;
  90
  91         /* Link-sharing scheduler parameters */
  92         long                    maxidle;        /* Class parameters: see below. */
  93         long                    offtime;
  94         long                    minidle;
  95         u32                     avpkt;
  96         struct qdisc_rate_table *R_tab;
  97
  98         /* Overlimit strategy parameters */
  99         void                    (*overlimit)(struct cbq_class *cl);
 100         psched_tdiff_t          penalty;
 101
 102         /* General scheduler (WRR) parameters */
 103         long                    allot;
 104         long                    quantum;        /* Allotment per WRR round */
 105         long                    weight;         /* Relative allotment: see below */
 106
 107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108         struct cbq_class        *split;         /* Ptr to split node */
 109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                    parent otherwise */
 113         struct cbq_class        *sibling;       /* Sibling chain */
 114         struct cbq_class        *children;      /* Pointer to children chain */
 115
 116         struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119 /* Variables */
 120         unsigned char           cpriority;      /* Effective priority */
 121         unsigned char           delayed;
 122         unsigned char           level;          /* level of the class in hierarchy:
 123                                                    0 for leaf classes, and maximal
 124                                                    level of children + 1 for nodes.
 125                                                  */
 126
 127         psched_time_t           last;           /* Last end of service */
 128         psched_time_t           undertime;
 129         long                    avgidle;
 130         long                    deficit;        /* Saved deficit for WRR */
 131         psched_time_t           penalized;
 132         struct gnet_stats_basic bstats;
 133         struct gnet_stats_queue qstats;
 134         struct gnet_stats_rate_est rate_est;
 135         struct tc_cbq_xstats    xstats;
 136
 137         struct tcf_proto        *filter_list;
 138
 139         int                     refcnt;
 140         int                     filters;
 141
 142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143 };
 144
 145 struct cbq_sched_data
 146 {
 147         struct cbq_class        *classes[16];           /* Hash table of all classes */
 148         int                     nclasses[TC_CBQ_MAXPRIO+1];
 149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151         struct cbq_class        link;
 152
 153         unsigned                activemask;
 154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                    with backlog */
 156
 157 #ifdef CONFIG_NET_CLS_ACT
 158         struct cbq_class        *rx_class;
 159 #endif
 160         struct cbq_class        *tx_class;
 161         struct cbq_class        *tx_borrowed;
 162         int                     tx_len;
 163         psched_time_t           now;            /* Cached timestamp */
 164         psched_time_t           now_rt;         /* Cached real time */
 165         unsigned                pmask;
 166
 167         struct hrtimer          delay_timer;
 168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                    started when CBQ has
 170                                                    backlog, but cannot
 171                                                    transmit just now */
 172         psched_tdiff_t          wd_expires;
 173         int                     toplevel;
 174         u32                     hgenerator;
 175 };
 176
 177
 178 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 179
 180
 181 static __inline__ unsigned cbq_hash(u32 h)
 182 {
 183         h ^= h>>8;
 184         h ^= h>>4;
 185         return h&0xF;
 186 }
 187
 188 static __inline__ struct cbq_class *
 189 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 190 {
 191         struct cbq_class *cl;
 192
 193         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 194                 if (cl->classid == classid)
 195                         return cl;
 196         return NULL;
 197 }
 198
 199 #ifdef CONFIG_NET_CLS_ACT
 200
 201 static struct cbq_class *
 202 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 203 {
 204         struct cbq_class *cl, *new;
 205
 206         for (cl = this->tparent; cl; cl = cl->tparent)
 207                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 208                         return new;
 209
 210         return NULL;
 211 }
 212
 213 #endif
 214
 215 /* Classify packet. The procedure is pretty complicated, but
 216    it allows us to combine link sharing and priority scheduling
 217    transparently.
 218
 219    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 220    so that it resolves to split nodes. Then packets are classified
 221    by logical priority, or a more specific classifier may be attached
 222    to the split node.
 223  */
 224
 225 static struct cbq_class *
 226 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 227 {
 228         struct cbq_sched_data *q = qdisc_priv(sch);
 229         struct cbq_class *head = &q->link;
 230         struct cbq_class **defmap;
 231         struct cbq_class *cl = NULL;
 232         u32 prio = skb->priority;
 233         struct tcf_result res;
 234
 235         /*
 236          *  Step 1. If skb->priority points to one of our classes, use it.
 237          */
 238         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 239             (cl = cbq_class_lookup(q, prio)) != NULL)
 240                 return cl;
 241
 242         *qerr = NET_XMIT_BYPASS;
 243         for (;;) {
 244                 int result = 0;
 245                 defmap = head->defaults;
 246
 247                 /*
 248                  * Step 2+n. Apply classifier.
 249                  */
 250                 if (!head->filter_list ||
 251                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 252                         goto fallback;
 253
 254                 if ((cl = (void*)res.class) == NULL) {
 255                         if (TC_H_MAJ(res.classid))
 256                                 cl = cbq_class_lookup(q, res.classid);
 257                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 258                                 cl = defmap[TC_PRIO_BESTEFFORT];
 259
 260                         if (cl == NULL || cl->level >= head->level)
 261                                 goto fallback;
 262                 }
 263
 264 #ifdef CONFIG_NET_CLS_ACT
 265                 switch (result) {
 266                 case TC_ACT_QUEUED:
 267                 case TC_ACT_STOLEN:
 268                         *qerr = NET_XMIT_SUCCESS;
 269                 case TC_ACT_SHOT:
 270                         return NULL;
 271                 case TC_ACT_RECLASSIFY:
 272                         return cbq_reclassify(skb, cl);
 273                 }
 274 #endif
 275                 if (cl->level == 0)
 276                         return cl;
 277
 278                 /*
 279                  * Step 3+n. If classifier selected a link sharing class,
 280                  *         apply agency specific classifier.
 281                  *         Repeat this procdure until we hit a leaf node.
 282                  */
 283                 head = cl;
 284         }
 285
 286 fallback:
 287         cl = head;
 288
 289         /*
 290          * Step 4. No success...
 291          */
 292         if (TC_H_MAJ(prio) == 0 &&
 293             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 294             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 295                 return head;
 296
 297         return cl;
 298 }
 299
 300 /*
 301    A packet has just been enqueued on the empty class.
 302    cbq_activate_class adds it to the tail of active class list
 303    of its priority band.
 304  */
 305
 306 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 307 {
 308         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 309         int prio = cl->cpriority;
 310         struct cbq_class *cl_tail;
 311
 312         cl_tail = q->active[prio];
 313         q->active[prio] = cl;
 314
 315         if (cl_tail != NULL) {
 316                 cl->next_alive = cl_tail->next_alive;
 317                 cl_tail->next_alive = cl;
 318         } else {
 319                 cl->next_alive = cl;
 320                 q->activemask |= (1<<prio);
 321         }
 322 }
 323
 324 /*
 325    Unlink class from active chain.
 326    Note that this same procedure is done directly in cbq_dequeue*
 327    during round-robin procedure.
 328  */
 329
 330 static void cbq_deactivate_class(struct cbq_class *this)
 331 {
 332         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 333         int prio = this->cpriority;
 334         struct cbq_class *cl;
 335         struct cbq_class *cl_prev = q->active[prio];
 336
 337         do {
 338                 cl = cl_prev->next_alive;
 339                 if (cl == this) {
 340                         cl_prev->next_alive = cl->next_alive;
 341                         cl->next_alive = NULL;
 342
 343                         if (cl == q->active[prio]) {
 344                                 q->active[prio] = cl_prev;
 345                                 if (cl == q->active[prio]) {
 346                                         q->active[prio] = NULL;
 347                                         q->activemask &= ~(1<<prio);
 348                                         return;
 349                                 }
 350                         }
 351                         return;
 352                 }
 353         } while ((cl_prev = cl) != q->active[prio]);
 354 }
 355
 356 static void
 357 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 358 {
 359         int toplevel = q->toplevel;
 360
 361         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 362                 psched_time_t now;
 363                 psched_tdiff_t incr;
 364
 365                 now = psched_get_time();
 366                 incr = now - q->now_rt;
 367                 now = q->now + incr;
 368
 369                 do {
 370                         if (cl->undertime < now) {
 371                                 q->toplevel = cl->level;
 372                                 return;
 373                         }
 374                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 375         }
 376 }
 377
 378 static int
 379 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 380 {
 381         struct cbq_sched_data *q = qdisc_priv(sch);
 382         int len = skb->len;
 383         int uninitialized_var(ret);
 384         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         q->rx_class = cl;
 388 #endif
 389         if (cl == NULL) {
 390                 if (ret == NET_XMIT_BYPASS)
 391                         sch->qstats.drops++;
 392                 kfree_skb(skb);
 393                 return ret;
 394         }
 395
 396 #ifdef CONFIG_NET_CLS_ACT
 397         cl->q->__parent = sch;
 398 #endif
 399         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 400                 sch->q.qlen++;
 401                 sch->bstats.packets++;
 402                 sch->bstats.bytes+=len;
 403                 cbq_mark_toplevel(q, cl);
 404                 if (!cl->next_alive)
 405                         cbq_activate_class(cl);
 406                 return ret;
 407         }
 408
 409         sch->qstats.drops++;
 410         cbq_mark_toplevel(q, cl);
 411         cl->qstats.drops++;
 412         return ret;
 413 }
 414
 415 static int
 416 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 417 {
 418         struct cbq_sched_data *q = qdisc_priv(sch);
 419         struct cbq_class *cl;
 420         int ret;
 421
 422         if ((cl = q->tx_class) == NULL) {
 423                 kfree_skb(skb);
 424                 sch->qstats.drops++;
 425                 return NET_XMIT_CN;
 426         }
 427         q->tx_class = NULL;
 428
 429         cbq_mark_toplevel(q, cl);
 430
 431 #ifdef CONFIG_NET_CLS_ACT
 432         q->rx_class = cl;
 433         cl->q->__parent = sch;
 434 #endif
 435         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 436                 sch->q.qlen++;
 437                 sch->qstats.requeues++;
 438                 if (!cl->next_alive)
 439                         cbq_activate_class(cl);
 440                 return 0;
 441         }
 442         sch->qstats.drops++;
 443         cl->qstats.drops++;
 444         return ret;
 445 }
 446
 447 /* Overlimit actions */
 448
 449 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 450
 451 static void cbq_ovl_classic(struct cbq_class *cl)
 452 {
 453         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 454         psched_tdiff_t delay = cl->undertime - q->now;
 455
 456         if (!cl->delayed) {
 457                 delay += cl->offtime;
 458
 459                 /*
 460                    Class goes to sleep, so that it will have no
 461                    chance to work avgidle. Let's forgive it 8)
 462
 463                    BTW cbq-2.0 has a crap in this
 464                    place, apparently they forgot to shift it by cl->ewma_log.
 465                  */
 466                 if (cl->avgidle < 0)
 467                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 468                 if (cl->avgidle < cl->minidle)
 469                         cl->avgidle = cl->minidle;
 470                 if (delay <= 0)
 471                         delay = 1;
 472                 cl->undertime = q->now + delay;
 473
 474                 cl->xstats.overactions++;
 475                 cl->delayed = 1;
 476         }
 477         if (q->wd_expires == 0 || q->wd_expires > delay)
 478                 q->wd_expires = delay;
 479
 480         /* Dirty work! We must schedule wakeups based on
 481            real available rate, rather than leaf rate,
 482            which may be tiny (even zero).
 483          */
 484         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 485                 struct cbq_class *b;
 486                 psched_tdiff_t base_delay = q->wd_expires;
 487
 488                 for (b = cl->borrow; b; b = b->borrow) {
 489                         delay = b->undertime - q->now;
 490                         if (delay < base_delay) {
 491                                 if (delay <= 0)
 492                                         delay = 1;
 493                                 base_delay = delay;
 494                         }
 495                 }
 496
 497                 q->wd_expires = base_delay;
 498         }
 499 }
 500
 501 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 502    they go overlimit
 503  */
 504
 505 static void cbq_ovl_rclassic(struct cbq_class *cl)
 506 {
 507         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 508         struct cbq_class *this = cl;
 509
 510         do {
 511                 if (cl->level > q->toplevel) {
 512                         cl = NULL;
 513                         break;
 514                 }
 515         } while ((cl = cl->borrow) != NULL);
 516
 517         if (cl == NULL)
 518                 cl = this;
 519         cbq_ovl_classic(cl);
 520 }
 521
 522 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 523
 524 static void cbq_ovl_delay(struct cbq_class *cl)
 525 {
 526         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 527         psched_tdiff_t delay = cl->undertime - q->now;
 528
 529         if (!cl->delayed) {
 530                 psched_time_t sched = q->now;
 531                 ktime_t expires;
 532
 533                 delay += cl->offtime;
 534                 if (cl->avgidle < 0)
 535                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 536                 if (cl->avgidle < cl->minidle)
 537                         cl->avgidle = cl->minidle;
 538                 cl->undertime = q->now + delay;
 539
 540                 if (delay > 0) {
 541                         sched += delay + cl->penalty;
 542                         cl->penalized = sched;
 543                         cl->cpriority = TC_CBQ_MAXPRIO;
 544                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 545
 546                         expires = ktime_set(0, 0);
 547                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 548                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 549                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 550                                                   expires)) > 0)
 551                                 q->delay_timer.expires = expires;
 552                         hrtimer_restart(&q->delay_timer);
 553                         cl->delayed = 1;
 554                         cl->xstats.overactions++;
 555                         return;
 556                 }
 557                 delay = 1;
 558         }
 559         if (q->wd_expires == 0 || q->wd_expires > delay)
 560                 q->wd_expires = delay;
 561 }
 562
 563 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 564
 565 static void cbq_ovl_lowprio(struct cbq_class *cl)
 566 {
 567         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 568
 569         cl->penalized = q->now + cl->penalty;
 570
 571         if (cl->cpriority != cl->priority2) {
 572                 cl->cpriority = cl->priority2;
 573                 q->pmask |= (1<<cl->cpriority);
 574                 cl->xstats.overactions++;
 575         }
 576         cbq_ovl_classic(cl);
 577 }
 578
 579 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 580
 581 static void cbq_ovl_drop(struct cbq_class *cl)
 582 {
 583         if (cl->q->ops->drop)
 584                 if (cl->q->ops->drop(cl->q))
 585                         cl->qdisc->q.qlen--;
 586         cl->xstats.overactions++;
 587         cbq_ovl_classic(cl);
 588 }
 589
 590 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 591                                        psched_time_t now)
 592 {
 593         struct cbq_class *cl;
 594         struct cbq_class *cl_prev = q->active[prio];
 595         psched_time_t sched = now;
 596
 597         if (cl_prev == NULL)
 598                 return 0;
 599
 600         do {
 601                 cl = cl_prev->next_alive;
 602                 if (now - cl->penalized > 0) {
 603                         cl_prev->next_alive = cl->next_alive;
 604                         cl->next_alive = NULL;
 605                         cl->cpriority = cl->priority;
 606                         cl->delayed = 0;
 607                         cbq_activate_class(cl);
 608
 609                         if (cl == q->active[prio]) {
 610                                 q->active[prio] = cl_prev;
 611                                 if (cl == q->active[prio]) {
 612                                         q->active[prio] = NULL;
 613                                         return 0;
 614                                 }
 615                         }
 616
 617                         cl = cl_prev->next_alive;
 618                 } else if (sched - cl->penalized > 0)
 619                         sched = cl->penalized;
 620         } while ((cl_prev = cl) != q->active[prio]);
 621
 622         return sched - now;
 623 }
 624
 625 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 626 {
 627         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 628                                                 delay_timer);
 629         struct Qdisc *sch = q->watchdog.qdisc;
 630         psched_time_t now;
 631         psched_tdiff_t delay = 0;
 632         unsigned pmask;
 633
 634         now = psched_get_time();
 635
 636         pmask = q->pmask;
 637         q->pmask = 0;
 638
 639         while (pmask) {
 640                 int prio = ffz(~pmask);
 641                 psched_tdiff_t tmp;
 642
 643                 pmask &= ~(1<<prio);
 644
 645                 tmp = cbq_undelay_prio(q, prio, now);
 646                 if (tmp > 0) {
 647                         q->pmask |= 1<<prio;
 648                         if (tmp < delay || delay == 0)
 649                                 delay = tmp;
 650                 }
 651         }
 652
 653         if (delay) {
 654                 ktime_t time;
 655
 656                 time = ktime_set(0, 0);
 657                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 658                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 659         }
 660
 661         sch->flags &= ~TCQ_F_THROTTLED;
 662         netif_schedule(sch->dev);
 663         return HRTIMER_NORESTART;
 664 }
 665
 666 #ifdef CONFIG_NET_CLS_ACT
 667 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 668 {
 669         int len = skb->len;
 670         struct Qdisc *sch = child->__parent;
 671         struct cbq_sched_data *q = qdisc_priv(sch);
 672         struct cbq_class *cl = q->rx_class;
 673
 674         q->rx_class = NULL;
 675
 676         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 677
 678                 cbq_mark_toplevel(q, cl);
 679
 680                 q->rx_class = cl;
 681                 cl->q->__parent = sch;
 682
 683                 if (cl->q->enqueue(skb, cl->q) == 0) {
 684                         sch->q.qlen++;
 685                         sch->bstats.packets++;
 686                         sch->bstats.bytes+=len;
 687                         if (!cl->next_alive)
 688                                 cbq_activate_class(cl);
 689                         return 0;
 690                 }
 691                 sch->qstats.drops++;
 692                 return 0;
 693         }
 694
 695         sch->qstats.drops++;
 696         return -1;
 697 }
 698 #endif
 699
 700 /*
 701    It is mission critical procedure.
 702
 703    We "regenerate" toplevel cutoff, if transmitting class
 704    has backlog and it is not regulated. It is not part of
 705    original CBQ description, but looks more reasonable.
 706    Probably, it is wrong. This question needs further investigation.
 707 */
 708
 709 static __inline__ void
 710 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 711                     struct cbq_class *borrowed)
 712 {
 713         if (cl && q->toplevel >= borrowed->level) {
 714                 if (cl->q->q.qlen > 1) {
 715                         do {
 716                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 717                                         q->toplevel = borrowed->level;
 718                                         return;
 719                                 }
 720                         } while ((borrowed=borrowed->borrow) != NULL);
 721                 }
 722 #if 0
 723         /* It is not necessary now. Uncommenting it
 724            will save CPU cycles, but decrease fairness.
 725          */
 726                 q->toplevel = TC_CBQ_MAXLEVEL;
 727 #endif
 728         }
 729 }
 730
 731 static void
 732 cbq_update(struct cbq_sched_data *q)
 733 {
 734         struct cbq_class *this = q->tx_class;
 735         struct cbq_class *cl = this;
 736         int len = q->tx_len;
 737
 738         q->tx_class = NULL;
 739
 740         for ( ; cl; cl = cl->share) {
 741                 long avgidle = cl->avgidle;
 742                 long idle;
 743
 744                 cl->bstats.packets++;
 745                 cl->bstats.bytes += len;
 746
 747                 /*
 748                    (now - last) is total time between packet right edges.
 749                    (last_pktlen/rate) is "virtual" busy time, so that
 750
 751                          idle = (now - last) - last_pktlen/rate
 752                  */
 753
 754                 idle = q->now - cl->last;
 755                 if ((unsigned long)idle > 128*1024*1024) {
 756                         avgidle = cl->maxidle;
 757                 } else {
 758                         idle -= L2T(cl, len);
 759
 760                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 761                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 762                    cl->avgidle == true_avgidle/W,
 763                    hence:
 764                  */
 765                         avgidle += idle - (avgidle>>cl->ewma_log);
 766                 }
 767
 768                 if (avgidle <= 0) {
 769                         /* Overlimit or at-limit */
 770
 771                         if (avgidle < cl->minidle)
 772                                 avgidle = cl->minidle;
 773
 774                         cl->avgidle = avgidle;
 775
 776                         /* Calculate expected time, when this class
 777                            will be allowed to send.
 778                            It will occur, when:
 779                            (1-W)*true_avgidle + W*delay = 0, i.e.
 780                            idle = (1/W - 1)*(-true_avgidle)
 781                            or
 782                            idle = (1 - W)*(-cl->avgidle);
 783                          */
 784                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 785
 786                         /*
 787                            That is not all.
 788                            To maintain the rate allocated to the class,
 789                            we add to undertime virtual clock,
 790                            necessary to complete transmitted packet.
 791                            (len/phys_bandwidth has been already passed
 792                            to the moment of cbq_update)
 793                          */
 794
 795                         idle -= L2T(&q->link, len);
 796                         idle += L2T(cl, len);
 797
 798                         cl->undertime = q->now + idle;
 799                 } else {
 800                         /* Underlimit */
 801
 802                         cl->undertime = PSCHED_PASTPERFECT;
 803                         if (avgidle > cl->maxidle)
 804                                 cl->avgidle = cl->maxidle;
 805                         else
 806                                 cl->avgidle = avgidle;
 807                 }
 808                 cl->last = q->now;
 809         }
 810
 811         cbq_update_toplevel(q, this, q->tx_borrowed);
 812 }
 813
 814 static __inline__ struct cbq_class *
 815 cbq_under_limit(struct cbq_class *cl)
 816 {
 817         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 818         struct cbq_class *this_cl = cl;
 819
 820         if (cl->tparent == NULL)
 821                 return cl;
 822
 823         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 824                 cl->delayed = 0;
 825                 return cl;
 826         }
 827
 828         do {
 829                 /* It is very suspicious place. Now overlimit
 830                    action is generated for not bounded classes
 831                    only if link is completely congested.
 832                    Though it is in agree with ancestor-only paradigm,
 833                    it looks very stupid. Particularly,
 834                    it means that this chunk of code will either
 835                    never be called or result in strong amplification
 836                    of burstiness. Dangerous, silly, and, however,
 837                    no another solution exists.
 838                  */
 839                 if ((cl = cl->borrow) == NULL) {
 840                         this_cl->qstats.overlimits++;
 841                         this_cl->overlimit(this_cl);
 842                         return NULL;
 843                 }
 844                 if (cl->level > q->toplevel)
 845                         return NULL;
 846         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 847
 848         cl->delayed = 0;
 849         return cl;
 850 }
 851
 852 static __inline__ struct sk_buff *
 853 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 854 {
 855         struct cbq_sched_data *q = qdisc_priv(sch);
 856         struct cbq_class *cl_tail, *cl_prev, *cl;
 857         struct sk_buff *skb;
 858         int deficit;
 859
 860         cl_tail = cl_prev = q->active[prio];
 861         cl = cl_prev->next_alive;
 862
 863         do {
 864                 deficit = 0;
 865
 866                 /* Start round */
 867                 do {
 868                         struct cbq_class *borrow = cl;
 869
 870                         if (cl->q->q.qlen &&
 871                             (borrow = cbq_under_limit(cl)) == NULL)
 872                                 goto skip_class;
 873
 874                         if (cl->deficit <= 0) {
 875                                 /* Class exhausted its allotment per
 876                                    this round. Switch to the next one.
 877                                  */
 878                                 deficit = 1;
 879                                 cl->deficit += cl->quantum;
 880                                 goto next_class;
 881                         }
 882
 883                         skb = cl->q->dequeue(cl->q);
 884
 885                         /* Class did not give us any skb :-(
 886                            It could occur even if cl->q->q.qlen != 0
 887                            f.e. if cl->q == "tbf"
 888                          */
 889                         if (skb == NULL)
 890                                 goto skip_class;
 891
 892                         cl->deficit -= skb->len;
 893                         q->tx_class = cl;
 894                         q->tx_borrowed = borrow;
 895                         if (borrow != cl) {
 896 #ifndef CBQ_XSTATS_BORROWS_BYTES
 897                                 borrow->xstats.borrows++;
 898                                 cl->xstats.borrows++;
 899 #else
 900                                 borrow->xstats.borrows += skb->len;
 901                                 cl->xstats.borrows += skb->len;
 902 #endif
 903                         }
 904                         q->tx_len = skb->len;
 905
 906                         if (cl->deficit <= 0) {
 907                                 q->active[prio] = cl;
 908                                 cl = cl->next_alive;
 909                                 cl->deficit += cl->quantum;
 910                         }
 911                         return skb;
 912
 913 skip_class:
 914                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 915                                 /* Class is empty or penalized.
 916                                    Unlink it from active chain.
 917                                  */
 918                                 cl_prev->next_alive = cl->next_alive;
 919                                 cl->next_alive = NULL;
 920
 921                                 /* Did cl_tail point to it? */
 922                                 if (cl == cl_tail) {
 923                                         /* Repair it! */
 924                                         cl_tail = cl_prev;
 925
 926                                         /* Was it the last class in this band? */
 927                                         if (cl == cl_tail) {
 928                                                 /* Kill the band! */
 929                                                 q->active[prio] = NULL;
 930                                                 q->activemask &= ~(1<<prio);
 931                                                 if (cl->q->q.qlen)
 932                                                         cbq_activate_class(cl);
 933                                                 return NULL;
 934                                         }
 935
 936                                         q->active[prio] = cl_tail;
 937                                 }
 938                                 if (cl->q->q.qlen)
 939                                         cbq_activate_class(cl);
 940
 941                                 cl = cl_prev;
 942                         }
 943
 944 next_class:
 945                         cl_prev = cl;
 946                         cl = cl->next_alive;
 947                 } while (cl_prev != cl_tail);
 948         } while (deficit);
 949
 950         q->active[prio] = cl_prev;
 951
 952         return NULL;
 953 }
 954
 955 static __inline__ struct sk_buff *
 956 cbq_dequeue_1(struct Qdisc *sch)
 957 {
 958         struct cbq_sched_data *q = qdisc_priv(sch);
 959         struct sk_buff *skb;
 960         unsigned activemask;
 961
 962         activemask = q->activemask&0xFF;
 963         while (activemask) {
 964                 int prio = ffz(~activemask);
 965                 activemask &= ~(1<<prio);
 966                 skb = cbq_dequeue_prio(sch, prio);
 967                 if (skb)
 968                         return skb;
 969         }
 970         return NULL;
 971 }
 972
 973 static struct sk_buff *
 974 cbq_dequeue(struct Qdisc *sch)
 975 {
 976         struct sk_buff *skb;
 977         struct cbq_sched_data *q = qdisc_priv(sch);
 978         psched_time_t now;
 979         psched_tdiff_t incr;
 980
 981         now = psched_get_time();
 982         incr = now - q->now_rt;
 983
 984         if (q->tx_class) {
 985                 psched_tdiff_t incr2;
 986                 /* Time integrator. We calculate EOS time
 987                    by adding expected packet transmission time.
 988                    If real time is greater, we warp artificial clock,
 989                    so that:
 990
 991                    cbq_time = max(real_time, work);
 992                  */
 993                 incr2 = L2T(&q->link, q->tx_len);
 994                 q->now += incr2;
 995                 cbq_update(q);
 996                 if ((incr -= incr2) < 0)
 997                         incr = 0;
 998         }
 999         q->now += incr;
1000         q->now_rt = now;
1001
1002         for (;;) {
1003                 q->wd_expires = 0;
1004
1005                 skb = cbq_dequeue_1(sch);
1006                 if (skb) {
1007                         sch->q.qlen--;
1008                         sch->flags &= ~TCQ_F_THROTTLED;
1009                         return skb;
1010                 }
1011
1012                 /* All the classes are overlimit.
1013
1014                    It is possible, if:
1015
1016                    1. Scheduler is empty.
1017                    2. Toplevel cutoff inhibited borrowing.
1018                    3. Root class is overlimit.
1019
1020                    Reset 2d and 3d conditions and retry.
1021
1022                    Note, that NS and cbq-2.0 are buggy, peeking
1023                    an arbitrary class is appropriate for ancestor-only
1024                    sharing, but not for toplevel algorithm.
1025
1026                    Our version is better, but slower, because it requires
1027                    two passes, but it is unavoidable with top-level sharing.
1028                 */
1029
1030                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1031                     q->link.undertime == PSCHED_PASTPERFECT)
1032                         break;
1033
1034                 q->toplevel = TC_CBQ_MAXLEVEL;
1035                 q->link.undertime = PSCHED_PASTPERFECT;
1036         }
1037
1038         /* No packets in scheduler or nobody wants to give them to us :-(
1039            Sigh... start watchdog timer in the last case. */
1040
1041         if (sch->q.qlen) {
1042                 sch->qstats.overlimits++;
1043                 if (q->wd_expires)
1044                         qdisc_watchdog_schedule(&q->watchdog,
1045                                                 now + q->wd_expires);
1046         }
1047         return NULL;
1048 }
1049
1050 /* CBQ class maintanance routines */
1051
1052 static void cbq_adjust_levels(struct cbq_class *this)
1053 {
1054         if (this == NULL)
1055                 return;
1056
1057         do {
1058                 int level = 0;
1059                 struct cbq_class *cl;
1060
1061                 if ((cl = this->children) != NULL) {
1062                         do {
1063                                 if (cl->level > level)
1064                                         level = cl->level;
1065                         } while ((cl = cl->sibling) != this->children);
1066                 }
1067                 this->level = level+1;
1068         } while ((this = this->tparent) != NULL);
1069 }
1070
1071 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1072 {
1073         struct cbq_class *cl;
1074         unsigned h;
1075
1076         if (q->quanta[prio] == 0)
1077                 return;
1078
1079         for (h=0; h<16; h++) {
1080                 for (cl = q->classes[h]; cl; cl = cl->next) {
1081                         /* BUGGGG... Beware! This expression suffer of
1082                            arithmetic overflows!
1083                          */
1084                         if (cl->priority == prio) {
1085                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1086                                         q->quanta[prio];
1087                         }
1088                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1089                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1090                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1091                         }
1092                 }
1093         }
1094 }
1095
1096 static void cbq_sync_defmap(struct cbq_class *cl)
1097 {
1098         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1099         struct cbq_class *split = cl->split;
1100         unsigned h;
1101         int i;
1102
1103         if (split == NULL)
1104                 return;
1105
1106         for (i=0; i<=TC_PRIO_MAX; i++) {
1107                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1108                         split->defaults[i] = NULL;
1109         }
1110
1111         for (i=0; i<=TC_PRIO_MAX; i++) {
1112                 int level = split->level;
1113
1114                 if (split->defaults[i])
1115                         continue;
1116
1117                 for (h=0; h<16; h++) {
1118                         struct cbq_class *c;
1119
1120                         for (c = q->classes[h]; c; c = c->next) {
1121                                 if (c->split == split && c->level < level &&
1122                                     c->defmap&(1<<i)) {
1123                                         split->defaults[i] = c;
1124                                         level = c->level;
1125                                 }
1126                         }
1127                 }
1128         }
1129 }
1130
1131 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1132 {
1133         struct cbq_class *split = NULL;
1134
1135         if (splitid == 0) {
1136                 if ((split = cl->split) == NULL)
1137                         return;
1138                 splitid = split->classid;
1139         }
1140
1141         if (split == NULL || split->classid != splitid) {
1142                 for (split = cl->tparent; split; split = split->tparent)
1143                         if (split->classid == splitid)
1144                                 break;
1145         }
1146
1147         if (split == NULL)
1148                 return;
1149
1150         if (cl->split != split) {
1151                 cl->defmap = 0;
1152                 cbq_sync_defmap(cl);
1153                 cl->split = split;
1154                 cl->defmap = def&mask;
1155         } else
1156                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1157
1158         cbq_sync_defmap(cl);
1159 }
1160
1161 static void cbq_unlink_class(struct cbq_class *this)
1162 {
1163         struct cbq_class *cl, **clp;
1164         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165
1166         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1167                 if (cl == this) {
1168                         *clp = cl->next;
1169                         cl->next = NULL;
1170                         break;
1171                 }
1172         }
1173
1174         if (this->tparent) {
1175                 clp=&this->sibling;
1176                 cl = *clp;
1177                 do {
1178                         if (cl == this) {
1179                                 *clp = cl->sibling;
1180                                 break;
1181                         }
1182                         clp = &cl->sibling;
1183                 } while ((cl = *clp) != this->sibling);
1184
1185                 if (this->tparent->children == this) {
1186                         this->tparent->children = this->sibling;
1187                         if (this->sibling == this)
1188                                 this->tparent->children = NULL;
1189                 }
1190         } else {
1191                 BUG_TRAP(this->sibling == this);
1192         }
1193 }
1194
1195 static void cbq_link_class(struct cbq_class *this)
1196 {
1197         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1198         unsigned h = cbq_hash(this->classid);
1199         struct cbq_class *parent = this->tparent;
1200
1201         this->sibling = this;
1202         this->next = q->classes[h];
1203         q->classes[h] = this;
1204
1205         if (parent == NULL)
1206                 return;
1207
1208         if (parent->children == NULL) {
1209                 parent->children = this;
1210         } else {
1211                 this->sibling = parent->children->sibling;
1212                 parent->children->sibling = this;
1213         }
1214 }
1215
1216 static unsigned int cbq_drop(struct Qdisc* sch)
1217 {
1218         struct cbq_sched_data *q = qdisc_priv(sch);
1219         struct cbq_class *cl, *cl_head;
1220         int prio;
1221         unsigned int len;
1222
1223         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1224                 if ((cl_head = q->active[prio]) == NULL)
1225                         continue;
1226
1227                 cl = cl_head;
1228                 do {
1229                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1230                                 sch->q.qlen--;
1231                                 if (!cl->q->q.qlen)
1232                                         cbq_deactivate_class(cl);
1233                                 return len;
1234                         }
1235                 } while ((cl = cl->next_alive) != cl_head);
1236         }
1237         return 0;
1238 }
1239
1240 static void
1241 cbq_reset(struct Qdisc* sch)
1242 {
1243         struct cbq_sched_data *q = qdisc_priv(sch);
1244         struct cbq_class *cl;
1245         int prio;
1246         unsigned h;
1247
1248         q->activemask = 0;
1249         q->pmask = 0;
1250         q->tx_class = NULL;
1251         q->tx_borrowed = NULL;
1252         qdisc_watchdog_cancel(&q->watchdog);
1253         hrtimer_cancel(&q->delay_timer);
1254         q->toplevel = TC_CBQ_MAXLEVEL;
1255         q->now = psched_get_time();
1256         q->now_rt = q->now;
1257
1258         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1259                 q->active[prio] = NULL;
1260
1261         for (h = 0; h < 16; h++) {
1262                 for (cl = q->classes[h]; cl; cl = cl->next) {
1263                         qdisc_reset(cl->q);
1264
1265                         cl->next_alive = NULL;
1266                         cl->undertime = PSCHED_PASTPERFECT;
1267                         cl->avgidle = cl->maxidle;
1268                         cl->deficit = cl->quantum;
1269                         cl->cpriority = cl->priority;
1270                 }
1271         }
1272         sch->q.qlen = 0;
1273 }
1274
1275
1276 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1277 {
1278         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1279                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1280                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1281         }
1282         if (lss->change&TCF_CBQ_LSS_EWMA)
1283                 cl->ewma_log = lss->ewma_log;
1284         if (lss->change&TCF_CBQ_LSS_AVPKT)
1285                 cl->avpkt = lss->avpkt;
1286         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1287                 cl->minidle = -(long)lss->minidle;
1288         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1289                 cl->maxidle = lss->maxidle;
1290                 cl->avgidle = lss->maxidle;
1291         }
1292         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1293                 cl->offtime = lss->offtime;
1294         return 0;
1295 }
1296
1297 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1298 {
1299         q->nclasses[cl->priority]--;
1300         q->quanta[cl->priority] -= cl->weight;
1301         cbq_normalize_quanta(q, cl->priority);
1302 }
1303
1304 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1305 {
1306         q->nclasses[cl->priority]++;
1307         q->quanta[cl->priority] += cl->weight;
1308         cbq_normalize_quanta(q, cl->priority);
1309 }
1310
1311 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1312 {
1313         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1314
1315         if (wrr->allot)
1316                 cl->allot = wrr->allot;
1317         if (wrr->weight)
1318                 cl->weight = wrr->weight;
1319         if (wrr->priority) {
1320                 cl->priority = wrr->priority-1;
1321                 cl->cpriority = cl->priority;
1322                 if (cl->priority >= cl->priority2)
1323                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1324         }
1325
1326         cbq_addprio(q, cl);
1327         return 0;
1328 }
1329
1330 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1331 {
1332         switch (ovl->strategy) {
1333         case TC_CBQ_OVL_CLASSIC:
1334                 cl->overlimit = cbq_ovl_classic;
1335                 break;
1336         case TC_CBQ_OVL_DELAY:
1337                 cl->overlimit = cbq_ovl_delay;
1338                 break;
1339         case TC_CBQ_OVL_LOWPRIO:
1340                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1341                     ovl->priority2-1 <= cl->priority)
1342                         return -EINVAL;
1343                 cl->priority2 = ovl->priority2-1;
1344                 cl->overlimit = cbq_ovl_lowprio;
1345                 break;
1346         case TC_CBQ_OVL_DROP:
1347                 cl->overlimit = cbq_ovl_drop;
1348                 break;
1349         case TC_CBQ_OVL_RCLASSIC:
1350                 cl->overlimit = cbq_ovl_rclassic;
1351                 break;
1352         default:
1353                 return -EINVAL;
1354         }
1355         cl->penalty = ovl->penalty;
1356         return 0;
1357 }
1358
1359 #ifdef CONFIG_NET_CLS_ACT
1360 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1361 {
1362         cl->police = p->police;
1363
1364         if (cl->q->handle) {
1365                 if (p->police == TC_POLICE_RECLASSIFY)
1366                         cl->q->reshape_fail = cbq_reshape_fail;
1367                 else
1368                         cl->q->reshape_fail = NULL;
1369         }
1370         return 0;
1371 }
1372 #endif
1373
1374 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1375 {
1376         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1377         return 0;
1378 }
1379
1380 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1381         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1382         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1383         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1384         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1385         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1386         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1387         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1388 };
1389
1390 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1391 {
1392         struct cbq_sched_data *q = qdisc_priv(sch);
1393         struct nlattr *tb[TCA_CBQ_MAX + 1];
1394         struct tc_ratespec *r;
1395         int err;
1396
1397         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1398         if (err < 0)
1399                 return err;
1400
1401         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1402                 return -EINVAL;
1403
1404         r = nla_data(tb[TCA_CBQ_RATE]);
1405
1406         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1407                 return -EINVAL;
1408
1409         q->link.refcnt = 1;
1410         q->link.sibling = &q->link;
1411         q->link.classid = sch->handle;
1412         q->link.qdisc = sch;
1413         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1414                                             sch->handle)))
1415                 q->link.q = &noop_qdisc;
1416
1417         q->link.priority = TC_CBQ_MAXPRIO-1;
1418         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1419         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1420         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1421         q->link.overlimit = cbq_ovl_classic;
1422         q->link.allot = psched_mtu(sch->dev);
1423         q->link.quantum = q->link.allot;
1424         q->link.weight = q->link.R_tab->rate.rate;
1425
1426         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1427         q->link.avpkt = q->link.allot/2;
1428         q->link.minidle = -0x7FFFFFFF;
1429
1430         qdisc_watchdog_init(&q->watchdog, sch);
1431         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1432         q->delay_timer.function = cbq_undelay;
1433         q->toplevel = TC_CBQ_MAXLEVEL;
1434         q->now = psched_get_time();
1435         q->now_rt = q->now;
1436
1437         cbq_link_class(&q->link);
1438
1439         if (tb[TCA_CBQ_LSSOPT])
1440                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1441
1442         cbq_addprio(q, &q->link);
1443         return 0;
1444 }
1445
1446 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1447 {
1448         unsigned char *b = skb_tail_pointer(skb);
1449
1450         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1451         return skb->len;
1452
1453 nla_put_failure:
1454         nlmsg_trim(skb, b);
1455         return -1;
1456 }
1457
1458 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1459 {
1460         unsigned char *b = skb_tail_pointer(skb);
1461         struct tc_cbq_lssopt opt;
1462
1463         opt.flags = 0;
1464         if (cl->borrow == NULL)
1465                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1466         if (cl->share == NULL)
1467                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1468         opt.ewma_log = cl->ewma_log;
1469         opt.level = cl->level;
1470         opt.avpkt = cl->avpkt;
1471         opt.maxidle = cl->maxidle;
1472         opt.minidle = (u32)(-cl->minidle);
1473         opt.offtime = cl->offtime;
1474         opt.change = ~0;
1475         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1476         return skb->len;
1477
1478 nla_put_failure:
1479         nlmsg_trim(skb, b);
1480         return -1;
1481 }
1482
1483 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1484 {
1485         unsigned char *b = skb_tail_pointer(skb);
1486         struct tc_cbq_wrropt opt;
1487
1488         opt.flags = 0;
1489         opt.allot = cl->allot;
1490         opt.priority = cl->priority+1;
1491         opt.cpriority = cl->cpriority+1;
1492         opt.weight = cl->weight;
1493         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1494         return skb->len;
1495
1496 nla_put_failure:
1497         nlmsg_trim(skb, b);
1498         return -1;
1499 }
1500
1501 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1502 {
1503         unsigned char *b = skb_tail_pointer(skb);
1504         struct tc_cbq_ovl opt;
1505
1506         opt.strategy = cl->ovl_strategy;
1507         opt.priority2 = cl->priority2+1;
1508         opt.pad = 0;
1509         opt.penalty = cl->penalty;
1510         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1511         return skb->len;
1512
1513 nla_put_failure:
1514         nlmsg_trim(skb, b);
1515         return -1;
1516 }
1517
1518 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1519 {
1520         unsigned char *b = skb_tail_pointer(skb);
1521         struct tc_cbq_fopt opt;
1522
1523         if (cl->split || cl->defmap) {
1524                 opt.split = cl->split ? cl->split->classid : 0;
1525                 opt.defmap = cl->defmap;
1526                 opt.defchange = ~0;
1527                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1528         }
1529         return skb->len;
1530
1531 nla_put_failure:
1532         nlmsg_trim(skb, b);
1533         return -1;
1534 }
1535
1536 #ifdef CONFIG_NET_CLS_ACT
1537 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1538 {
1539         unsigned char *b = skb_tail_pointer(skb);
1540         struct tc_cbq_police opt;
1541
1542         if (cl->police) {
1543                 opt.police = cl->police;
1544                 opt.__res1 = 0;
1545                 opt.__res2 = 0;
1546                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1547         }
1548         return skb->len;
1549
1550 nla_put_failure:
1551         nlmsg_trim(skb, b);
1552         return -1;
1553 }
1554 #endif
1555
1556 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1557 {
1558         if (cbq_dump_lss(skb, cl) < 0 ||
1559             cbq_dump_rate(skb, cl) < 0 ||
1560             cbq_dump_wrr(skb, cl) < 0 ||
1561             cbq_dump_ovl(skb, cl) < 0 ||
1562 #ifdef CONFIG_NET_CLS_ACT
1563             cbq_dump_police(skb, cl) < 0 ||
1564 #endif
1565             cbq_dump_fopt(skb, cl) < 0)
1566                 return -1;
1567         return 0;
1568 }
1569
1570 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1571 {
1572         struct cbq_sched_data *q = qdisc_priv(sch);
1573         struct nlattr *nest;
1574
1575         nest = nla_nest_start(skb, TCA_OPTIONS);
1576         if (nest == NULL)
1577                 goto nla_put_failure;
1578         if (cbq_dump_attr(skb, &q->link) < 0)
1579                 goto nla_put_failure;
1580         nla_nest_end(skb, nest);
1581         return skb->len;
1582
1583 nla_put_failure:
1584         nla_nest_cancel(skb, nest);
1585         return -1;
1586 }
1587
1588 static int
1589 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1590 {
1591         struct cbq_sched_data *q = qdisc_priv(sch);
1592
1593         q->link.xstats.avgidle = q->link.avgidle;
1594         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1595 }
1596
1597 static int
1598 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1599                struct sk_buff *skb, struct tcmsg *tcm)
1600 {
1601         struct cbq_class *cl = (struct cbq_class*)arg;
1602         struct nlattr *nest;
1603
1604         if (cl->tparent)
1605                 tcm->tcm_parent = cl->tparent->classid;
1606         else
1607                 tcm->tcm_parent = TC_H_ROOT;
1608         tcm->tcm_handle = cl->classid;
1609         tcm->tcm_info = cl->q->handle;
1610
1611         nest = nla_nest_start(skb, TCA_OPTIONS);
1612         if (nest == NULL)
1613                 goto nla_put_failure;
1614         if (cbq_dump_attr(skb, cl) < 0)
1615                 goto nla_put_failure;
1616         nla_nest_end(skb, nest);
1617         return skb->len;
1618
1619 nla_put_failure:
1620         nla_nest_cancel(skb, nest);
1621         return -1;
1622 }
1623
1624 static int
1625 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1626         struct gnet_dump *d)
1627 {
1628         struct cbq_sched_data *q = qdisc_priv(sch);
1629         struct cbq_class *cl = (struct cbq_class*)arg;
1630
1631         cl->qstats.qlen = cl->q->q.qlen;
1632         cl->xstats.avgidle = cl->avgidle;
1633         cl->xstats.undertime = 0;
1634
1635         if (cl->undertime != PSCHED_PASTPERFECT)
1636                 cl->xstats.undertime = cl->undertime - q->now;
1637
1638         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1639             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1640             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1641                 return -1;
1642
1643         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1644 }
1645
1646 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1647                      struct Qdisc **old)
1648 {
1649         struct cbq_class *cl = (struct cbq_class*)arg;
1650
1651         if (cl) {
1652                 if (new == NULL) {
1653                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1654                                                      cl->classid)) == NULL)
1655                                 return -ENOBUFS;
1656                 } else {
1657 #ifdef CONFIG_NET_CLS_ACT
1658                         if (cl->police == TC_POLICE_RECLASSIFY)
1659                                 new->reshape_fail = cbq_reshape_fail;
1660 #endif
1661                 }
1662                 sch_tree_lock(sch);
1663                 *old = xchg(&cl->q, new);
1664                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1665                 qdisc_reset(*old);
1666                 sch_tree_unlock(sch);
1667
1668                 return 0;
1669         }
1670         return -ENOENT;
1671 }
1672
1673 static struct Qdisc *
1674 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1675 {
1676         struct cbq_class *cl = (struct cbq_class*)arg;
1677
1678         return cl ? cl->q : NULL;
1679 }
1680
1681 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1682 {
1683         struct cbq_class *cl = (struct cbq_class *)arg;
1684
1685         if (cl->q->q.qlen == 0)
1686                 cbq_deactivate_class(cl);
1687 }
1688
1689 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1690 {
1691         struct cbq_sched_data *q = qdisc_priv(sch);
1692         struct cbq_class *cl = cbq_class_lookup(q, classid);
1693
1694         if (cl) {
1695                 cl->refcnt++;
1696                 return (unsigned long)cl;
1697         }
1698         return 0;
1699 }
1700
1701 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1702 {
1703         struct cbq_sched_data *q = qdisc_priv(sch);
1704
1705         BUG_TRAP(!cl->filters);
1706
1707         tcf_destroy_chain(cl->filter_list);
1708         qdisc_destroy(cl->q);
1709         qdisc_put_rtab(cl->R_tab);
1710         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1711         if (cl != &q->link)
1712                 kfree(cl);
1713 }
1714
1715 static void
1716 cbq_destroy(struct Qdisc* sch)
1717 {
1718         struct cbq_sched_data *q = qdisc_priv(sch);
1719         struct cbq_class *cl;
1720         unsigned h;
1721
1722 #ifdef CONFIG_NET_CLS_ACT
1723         q->rx_class = NULL;
1724 #endif
1725         /*
1726          * Filters must be destroyed first because we don't destroy the
1727          * classes from root to leafs which means that filters can still
1728          * be bound to classes which have been destroyed already. --TGR '04
1729          */
1730         for (h = 0; h < 16; h++) {
1731                 for (cl = q->classes[h]; cl; cl = cl->next) {
1732                         tcf_destroy_chain(cl->filter_list);
1733                         cl->filter_list = NULL;
1734                 }
1735         }
1736         for (h = 0; h < 16; h++) {
1737                 struct cbq_class *next;
1738
1739                 for (cl = q->classes[h]; cl; cl = next) {
1740                         next = cl->next;
1741                         cbq_destroy_class(sch, cl);
1742                 }
1743         }
1744 }
1745
1746 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1747 {
1748         struct cbq_class *cl = (struct cbq_class*)arg;
1749
1750         if (--cl->refcnt == 0) {
1751 #ifdef CONFIG_NET_CLS_ACT
1752                 struct cbq_sched_data *q = qdisc_priv(sch);
1753
1754                 spin_lock_bh(&sch->dev->queue_lock);
1755                 if (q->rx_class == cl)
1756                         q->rx_class = NULL;
1757                 spin_unlock_bh(&sch->dev->queue_lock);
1758 #endif
1759
1760                 cbq_destroy_class(sch, cl);
1761         }
1762 }
1763
1764 static int
1765 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1766                  unsigned long *arg)
1767 {
1768         int err;
1769         struct cbq_sched_data *q = qdisc_priv(sch);
1770         struct cbq_class *cl = (struct cbq_class*)*arg;
1771         struct nlattr *opt = tca[TCA_OPTIONS];
1772         struct nlattr *tb[TCA_CBQ_MAX + 1];
1773         struct cbq_class *parent;
1774         struct qdisc_rate_table *rtab = NULL;
1775
1776         if (opt == NULL)
1777                 return -EINVAL;
1778
1779         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1780         if (err < 0)
1781                 return err;
1782
1783         if (cl) {
1784                 /* Check parent */
1785                 if (parentid) {
1786                         if (cl->tparent && cl->tparent->classid != parentid)
1787                                 return -EINVAL;
1788                         if (!cl->tparent && parentid != TC_H_ROOT)
1789                                 return -EINVAL;
1790                 }
1791
1792                 if (tb[TCA_CBQ_RATE]) {
1793                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1794                         if (rtab == NULL)
1795                                 return -EINVAL;
1796                 }
1797
1798                 /* Change class parameters */
1799                 sch_tree_lock(sch);
1800
1801                 if (cl->next_alive != NULL)
1802                         cbq_deactivate_class(cl);
1803
1804                 if (rtab) {
1805                         rtab = xchg(&cl->R_tab, rtab);
1806                         qdisc_put_rtab(rtab);
1807                 }
1808
1809                 if (tb[TCA_CBQ_LSSOPT])
1810                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1811
1812                 if (tb[TCA_CBQ_WRROPT]) {
1813                         cbq_rmprio(q, cl);
1814                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1815                 }
1816
1817                 if (tb[TCA_CBQ_OVL_STRATEGY])
1818                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1819
1820 #ifdef CONFIG_NET_CLS_ACT
1821                 if (tb[TCA_CBQ_POLICE])
1822                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1823 #endif
1824
1825                 if (tb[TCA_CBQ_FOPT])
1826                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1827
1828                 if (cl->q->q.qlen)
1829                         cbq_activate_class(cl);
1830
1831                 sch_tree_unlock(sch);
1832
1833                 if (tca[TCA_RATE])
1834                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1835                                               &sch->dev->queue_lock,
1836                                               tca[TCA_RATE]);
1837                 return 0;
1838         }
1839
1840         if (parentid == TC_H_ROOT)
1841                 return -EINVAL;
1842
1843         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1844             tb[TCA_CBQ_LSSOPT] == NULL)
1845                 return -EINVAL;
1846
1847         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1848         if (rtab == NULL)
1849                 return -EINVAL;
1850
1851         if (classid) {
1852                 err = -EINVAL;
1853                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1854                         goto failure;
1855         } else {
1856                 int i;
1857                 classid = TC_H_MAKE(sch->handle,0x8000);
1858
1859                 for (i=0; i<0x8000; i++) {
1860                         if (++q->hgenerator >= 0x8000)
1861                                 q->hgenerator = 1;
1862                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1863                                 break;
1864                 }
1865                 err = -ENOSR;
1866                 if (i >= 0x8000)
1867                         goto failure;
1868                 classid = classid|q->hgenerator;
1869         }
1870
1871         parent = &q->link;
1872         if (parentid) {
1873                 parent = cbq_class_lookup(q, parentid);
1874                 err = -EINVAL;
1875                 if (parent == NULL)
1876                         goto failure;
1877         }
1878
1879         err = -ENOBUFS;
1880         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1881         if (cl == NULL)
1882                 goto failure;
1883         cl->R_tab = rtab;
1884         rtab = NULL;
1885         cl->refcnt = 1;
1886         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1887                 cl->q = &noop_qdisc;
1888         cl->classid = classid;
1889         cl->tparent = parent;
1890         cl->qdisc = sch;
1891         cl->allot = parent->allot;
1892         cl->quantum = cl->allot;
1893         cl->weight = cl->R_tab->rate.rate;
1894
1895         sch_tree_lock(sch);
1896         cbq_link_class(cl);
1897         cl->borrow = cl->tparent;
1898         if (cl->tparent != &q->link)
1899                 cl->share = cl->tparent;
1900         cbq_adjust_levels(parent);
1901         cl->minidle = -0x7FFFFFFF;
1902         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1903         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1904         if (cl->ewma_log==0)
1905                 cl->ewma_log = q->link.ewma_log;
1906         if (cl->maxidle==0)
1907                 cl->maxidle = q->link.maxidle;
1908         if (cl->avpkt==0)
1909                 cl->avpkt = q->link.avpkt;
1910         cl->overlimit = cbq_ovl_classic;
1911         if (tb[TCA_CBQ_OVL_STRATEGY])
1912                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1913 #ifdef CONFIG_NET_CLS_ACT
1914         if (tb[TCA_CBQ_POLICE])
1915                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1916 #endif
1917         if (tb[TCA_CBQ_FOPT])
1918                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1919         sch_tree_unlock(sch);
1920
1921         if (tca[TCA_RATE])
1922                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1923                                   &sch->dev->queue_lock, tca[TCA_RATE]);
1924
1925         *arg = (unsigned long)cl;
1926         return 0;
1927
1928 failure:
1929         qdisc_put_rtab(rtab);
1930         return err;
1931 }
1932
1933 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1934 {
1935         struct cbq_sched_data *q = qdisc_priv(sch);
1936         struct cbq_class *cl = (struct cbq_class*)arg;
1937         unsigned int qlen;
1938
1939         if (cl->filters || cl->children || cl == &q->link)
1940                 return -EBUSY;
1941
1942         sch_tree_lock(sch);
1943
1944         qlen = cl->q->q.qlen;
1945         qdisc_reset(cl->q);
1946         qdisc_tree_decrease_qlen(cl->q, qlen);
1947
1948         if (cl->next_alive)
1949                 cbq_deactivate_class(cl);
1950
1951         if (q->tx_borrowed == cl)
1952                 q->tx_borrowed = q->tx_class;
1953         if (q->tx_class == cl) {
1954                 q->tx_class = NULL;
1955                 q->tx_borrowed = NULL;
1956         }
1957 #ifdef CONFIG_NET_CLS_ACT
1958         if (q->rx_class == cl)
1959                 q->rx_class = NULL;
1960 #endif
1961
1962         cbq_unlink_class(cl);
1963         cbq_adjust_levels(cl->tparent);
1964         cl->defmap = 0;
1965         cbq_sync_defmap(cl);
1966
1967         cbq_rmprio(q, cl);
1968         sch_tree_unlock(sch);
1969
1970         if (--cl->refcnt == 0)
1971                 cbq_destroy_class(sch, cl);
1972
1973         return 0;
1974 }
1975
1976 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1977 {
1978         struct cbq_sched_data *q = qdisc_priv(sch);
1979         struct cbq_class *cl = (struct cbq_class *)arg;
1980
1981         if (cl == NULL)
1982                 cl = &q->link;
1983
1984         return &cl->filter_list;
1985 }
1986
1987 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1988                                      u32 classid)
1989 {
1990         struct cbq_sched_data *q = qdisc_priv(sch);
1991         struct cbq_class *p = (struct cbq_class*)parent;
1992         struct cbq_class *cl = cbq_class_lookup(q, classid);
1993
1994         if (cl) {
1995                 if (p && p->level <= cl->level)
1996                         return 0;
1997                 cl->filters++;
1998                 return (unsigned long)cl;
1999         }
2000         return 0;
2001 }
2002
2003 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2004 {
2005         struct cbq_class *cl = (struct cbq_class*)arg;
2006
2007         cl->filters--;
2008 }
2009
2010 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2011 {
2012         struct cbq_sched_data *q = qdisc_priv(sch);
2013         unsigned h;
2014
2015         if (arg->stop)
2016                 return;
2017
2018         for (h = 0; h < 16; h++) {
2019                 struct cbq_class *cl;
2020
2021                 for (cl = q->classes[h]; cl; cl = cl->next) {
2022                         if (arg->count < arg->skip) {
2023                                 arg->count++;
2024                                 continue;
2025                         }
2026                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2027                                 arg->stop = 1;
2028                                 return;
2029                         }
2030                         arg->count++;
2031                 }
2032         }
2033 }
2034
2035 static const struct Qdisc_class_ops cbq_class_ops = {
2036         .graft          =       cbq_graft,
2037         .leaf           =       cbq_leaf,
2038         .qlen_notify    =       cbq_qlen_notify,
2039         .get            =       cbq_get,
2040         .put            =       cbq_put,
2041         .change         =       cbq_change_class,
2042         .delete         =       cbq_delete,
2043         .walk           =       cbq_walk,
2044         .tcf_chain      =       cbq_find_tcf,
2045         .bind_tcf       =       cbq_bind_filter,
2046         .unbind_tcf     =       cbq_unbind_filter,
2047         .dump           =       cbq_dump_class,
2048         .dump_stats     =       cbq_dump_class_stats,
2049 };
2050
2051 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2052         .next           =       NULL,
2053         .cl_ops         =       &cbq_class_ops,
2054         .id             =       "cbq",
2055         .priv_size      =       sizeof(struct cbq_sched_data),
2056         .enqueue        =       cbq_enqueue,
2057         .dequeue        =       cbq_dequeue,
2058         .requeue        =       cbq_requeue,
2059         .drop           =       cbq_drop,
2060         .init           =       cbq_init,
2061         .reset          =       cbq_reset,
2062         .destroy        =       cbq_destroy,
2063         .change         =       NULL,
2064         .dump           =       cbq_dump,
2065         .dump_stats     =       cbq_dump_stats,
2066         .owner          =       THIS_MODULE,
2067 };
2068
2069 static int __init cbq_module_init(void)
2070 {
2071         return register_qdisc(&cbq_qdisc_ops);
2072 }
2073 static void __exit cbq_module_exit(void)
2074 {
2075         unregister_qdisc(&cbq_qdisc_ops);
2076 }
2077 module_init(cbq_module_init)
2078 module_exit(cbq_module_exit)
2079 MODULE_LICENSE("GPL");